From 59850d0874429601812bc13408cb1f776649027c Mon Sep 17 00:00:00 2001 From: Roman Divacky Date: Wed, 14 Oct 2009 17:57:32 +0000 Subject: Update llvm to r84119. --- CMakeLists.txt | 101 +- CREDITS.TXT | 29 + LICENSE.TXT | 2 +- Makefile | 22 +- Makefile.config.in | 14 +- Makefile.rules | 192 +- README.txt | 4 +- Xcode/LLVM.xcodeproj/project.pbxproj | 63 - autoconf/AutoRegen.sh | 42 +- autoconf/config.guess | 799 +- autoconf/config.sub | 262 +- autoconf/configure.ac | 282 +- autoconf/m4/config_makefile.m4 | 2 +- autoconf/m4/linux_mixed_64_32.m4 | 17 + bindings/ada/analysis/llvm_analysis-binding.ads | 32 + bindings/ada/analysis/llvm_analysis.ads | 30 + bindings/ada/analysis/llvm_analysis_wrap.cxx | 369 + bindings/ada/bitreader/llvm_bit_reader-binding.ads | 52 + bindings/ada/bitreader/llvm_bit_reader.ads | 6 + bindings/ada/bitreader/llvm_bitreader_wrap.cxx | 423 + bindings/ada/bitwriter/llvm_bit_writer-binding.ads | 28 + bindings/ada/bitwriter/llvm_bit_writer.ads | 6 + bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx | 335 + .../llvm_execution_engine-binding.ads | 192 + .../ada/executionengine/llvm_execution_engine.ads | 90 + .../executionengine/llvm_executionengine_wrap.cxx | 924 ++ bindings/ada/llvm.gpr | 34 + bindings/ada/llvm/llvm-binding.ads | 1974 +++++ bindings/ada/llvm/llvm.ads | 493 ++ .../ada/llvm/llvm_link_time_optimizer-binding.ads | 207 + bindings/ada/llvm/llvm_link_time_optimizer.ads | 184 + bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx | 923 ++ bindings/ada/llvm/llvm_wrap.cxx | 8817 ++++++++++++++++++++ bindings/ada/target/llvm_target-binding.ads | 138 + bindings/ada/target/llvm_target.ads | 72 + bindings/ada/target/llvm_target_wrap.cxx | 720 ++ .../ada/transforms/llvm_transforms-binding.ads | 206 + bindings/ada/transforms/llvm_transforms.ads | 6 + bindings/ada/transforms/llvm_transforms_wrap.cxx | 828 ++ bindings/ocaml/Makefile.ocaml | 4 + bindings/ocaml/bitreader/bitreader_ocaml.c | 14 +- bindings/ocaml/bitreader/llvm_bitreader.ml | 6 +- bindings/ocaml/bitreader/llvm_bitreader.mli | 22 +- .../ocaml/executionengine/executionengine_ocaml.c | 8 +- .../ocaml/executionengine/llvm_executionengine.ml | 3 + .../ocaml/executionengine/llvm_executionengine.mli | 11 +- bindings/ocaml/llvm/Makefile | 2 +- bindings/ocaml/llvm/llvm.ml | 175 +- bindings/ocaml/llvm/llvm.mli | 462 +- bindings/ocaml/llvm/llvm_ocaml.c | 374 +- cmake/config-ix.cmake | 85 +- cmake/modules/AddLLVM.cmake | 12 +- cmake/modules/CheckAtomic.cmake | 18 + cmake/modules/GetTargetTriple.cmake | 8 +- cmake/modules/LLVMConfig.cmake | 147 +- cmake/modules/LLVMLibDeps.cmake | 68 + cmake/modules/TableGen.cmake | 15 +- configure | 1228 ++- docs/BitCodeFormat.html | 14 +- docs/Bugpoint.html | 13 +- docs/CMake.html | 48 +- docs/CodeGenerator.html | 29 +- docs/CodingStandards.html | 621 +- docs/CommandGuide/FileCheck.pod | 65 + docs/CommandGuide/Makefile | 10 +- docs/CommandGuide/index.html | 4 +- docs/CommandGuide/lit.pod | 222 + docs/CommandGuide/llc.pod | 22 +- docs/CommandGuide/llvm-as.pod | 6 +- docs/CommandGuide/llvm-dis.pod | 6 +- docs/CommandGuide/llvm-extract.pod | 10 +- docs/CommandGuide/llvm-ld.pod | 6 + docs/CommandGuide/llvm-link.pod | 9 +- docs/CommandGuide/llvmc.pod | 6 + docs/CommandGuide/opt.pod | 29 +- docs/CommandLine.html | 38 +- docs/CompilerDriver.html | 38 +- docs/DebuggingJITedCode.html | 171 + docs/DeveloperPolicy.html | 27 +- docs/ExceptionHandling.html | 508 +- docs/FAQ.html | 6 +- docs/GCCFEBuildInstrs.html | 3 +- docs/GarbageCollection.html | 8 +- docs/GetElementPtr.html | 19 +- docs/GettingStarted.html | 37 +- docs/GettingStartedVS.html | 8 +- docs/GoldPlugin.html | 3 +- docs/HistoricalNotes/2007-OriginalClangReadme.txt | 178 + docs/HowToReleaseLLVM.html | 608 +- docs/HowToSubmitABug.html | 18 +- docs/LangRef.html | 6273 +++++++------- docs/LinkTimeOptimization.html | 6 +- docs/MakefileGuide.html | 9 +- docs/Passes.html | 123 +- docs/ProgrammersManual.html | 226 +- docs/Projects.html | 6 +- docs/ReleaseNotes.html | 1045 ++- docs/SourceLevelDebugging.html | 963 +-- docs/SystemLibrary.html | 37 +- docs/TableGenFundamentals.html | 38 +- docs/TestingGuide.html | 263 +- docs/UsingLibraries.html | 12 +- docs/WritingAnLLVMBackend.html | 64 +- docs/WritingAnLLVMPass.html | 21 +- docs/index.html | 9 +- docs/re_format.7 | 756 ++ docs/tutorial/JITTutorial1.html | 6 +- docs/tutorial/JITTutorial2.html | 10 +- docs/tutorial/LangImpl2.html | 27 +- docs/tutorial/LangImpl3.html | 81 +- docs/tutorial/LangImpl4.html | 165 +- docs/tutorial/LangImpl5.html | 153 +- docs/tutorial/LangImpl6.html | 143 +- docs/tutorial/LangImpl7.html | 149 +- docs/tutorial/OCamlLangImpl3.html | 13 +- docs/tutorial/OCamlLangImpl4.html | 13 +- docs/tutorial/OCamlLangImpl5.html | 13 +- docs/tutorial/OCamlLangImpl6.html | 9 +- docs/tutorial/OCamlLangImpl7.html | 11 +- examples/BrainF/BrainF.cpp | 76 +- examples/BrainF/BrainF.h | 3 +- examples/BrainF/BrainFDriver.cpp | 53 +- examples/Fibonacci/fibonacci.cpp | 26 +- examples/HowToUseJIT/HowToUseJIT.cpp | 21 +- examples/Kaleidoscope/CMakeLists.txt | 11 +- examples/Kaleidoscope/Chapter2/CMakeLists.txt | 3 + examples/Kaleidoscope/Chapter2/Makefile | 13 + examples/Kaleidoscope/Chapter2/toy.cpp | 398 + examples/Kaleidoscope/Chapter3/CMakeLists.txt | 5 + examples/Kaleidoscope/Chapter3/Makefile | 15 + examples/Kaleidoscope/Chapter3/toy.cpp | 563 ++ examples/Kaleidoscope/Chapter4/CMakeLists.txt | 5 + examples/Kaleidoscope/Chapter4/Makefile | 15 + examples/Kaleidoscope/Chapter4/toy.cpp | 610 ++ examples/Kaleidoscope/Chapter5/CMakeLists.txt | 5 + examples/Kaleidoscope/Chapter5/Makefile | 15 + examples/Kaleidoscope/Chapter5/toy.cpp | 855 ++ examples/Kaleidoscope/Chapter6/CMakeLists.txt | 5 + examples/Kaleidoscope/Chapter6/Makefile | 15 + examples/Kaleidoscope/Chapter6/toy.cpp | 973 +++ examples/Kaleidoscope/Chapter7/CMakeLists.txt | 5 + examples/Kaleidoscope/Chapter7/Makefile | 15 + examples/Kaleidoscope/Chapter7/toy.cpp | 1139 +++ examples/Kaleidoscope/Makefile | 8 +- examples/ModuleMaker/ModuleMaker.cpp | 15 +- examples/ParallelJIT/CMakeLists.txt | 4 +- examples/ParallelJIT/ParallelJIT.cpp | 33 +- include/llvm-c/BitReader.h | 8 +- include/llvm-c/Core.h | 230 +- include/llvm-c/ExecutionEngine.h | 2 +- include/llvm-c/Target.h | 15 +- include/llvm-c/lto.h | 1 - include/llvm/ADT/APFloat.h | 12 +- include/llvm/ADT/APInt.h | 38 +- include/llvm/ADT/DenseMap.h | 132 +- include/llvm/ADT/DenseMapInfo.h | 135 + include/llvm/ADT/DepthFirstIterator.h | 79 +- include/llvm/ADT/EquivalenceClasses.h | 7 +- include/llvm/ADT/FoldingSet.h | 20 +- include/llvm/ADT/ImmutableMap.h | 17 +- include/llvm/ADT/ImmutableSet.h | 233 +- include/llvm/ADT/IndexedMap.h | 2 +- include/llvm/ADT/PointerIntPair.h | 9 +- include/llvm/ADT/PointerUnion.h | 113 +- include/llvm/ADT/PostOrderIterator.h | 11 +- include/llvm/ADT/SCCIterator.h | 15 +- include/llvm/ADT/STLExtras.h | 9 +- include/llvm/ADT/SmallPtrSet.h | 8 + include/llvm/ADT/SmallSet.h | 2 +- include/llvm/ADT/SmallString.h | 66 +- include/llvm/ADT/SmallVector.h | 23 +- include/llvm/ADT/SparseBitVector.h | 13 +- include/llvm/ADT/StringExtras.h | 4 - include/llvm/ADT/StringMap.h | 96 +- include/llvm/ADT/StringRef.h | 335 + include/llvm/ADT/Trie.h | 4 +- include/llvm/ADT/Triple.h | 122 +- include/llvm/ADT/Twine.h | 422 + include/llvm/ADT/ilist.h | 24 +- include/llvm/ADT/ilist_node.h | 27 +- include/llvm/AbstractTypeUser.h | 9 +- include/llvm/Analysis/AliasAnalysis.h | 17 +- include/llvm/Analysis/AliasSetTracker.h | 40 +- include/llvm/Analysis/CallGraph.h | 99 +- include/llvm/Analysis/ConstantFolding.h | 18 +- include/llvm/Analysis/ConstantsScanner.h | 4 +- include/llvm/Analysis/DebugInfo.h | 473 +- include/llvm/Analysis/Dominators.h | 188 +- include/llvm/Analysis/FindUsedTypes.h | 3 +- include/llvm/Analysis/IVUsers.h | 22 +- include/llvm/Analysis/InlineCost.h | 180 + include/llvm/Analysis/Interval.h | 5 +- include/llvm/Analysis/IntervalIterator.h | 3 +- include/llvm/Analysis/IntervalPartition.h | 5 +- include/llvm/Analysis/LibCallAliasAnalysis.h | 2 +- include/llvm/Analysis/LoopDependenceAnalysis.h | 109 +- include/llvm/Analysis/LoopInfo.h | 632 +- include/llvm/Analysis/LoopPass.h | 6 +- include/llvm/Analysis/MallocHelper.h | 86 + include/llvm/Analysis/MemoryDependenceAnalysis.h | 2 +- include/llvm/Analysis/Passes.h | 22 + include/llvm/Analysis/PointerTracking.h | 131 + include/llvm/Analysis/PostDominators.h | 14 +- include/llvm/Analysis/ProfileInfo.h | 95 +- include/llvm/Analysis/ProfileInfoLoader.h | 52 +- include/llvm/Analysis/ProfileInfoTypes.h | 3 +- include/llvm/Analysis/ScalarEvolution.h | 354 +- include/llvm/Analysis/ScalarEvolutionExpander.h | 21 +- include/llvm/Analysis/ScalarEvolutionExpressions.h | 314 +- include/llvm/Analysis/SparsePropagation.h | 19 +- include/llvm/Analysis/Trace.h | 7 +- include/llvm/Analysis/ValueTracking.h | 23 +- include/llvm/Argument.h | 3 +- include/llvm/Assembly/Parser.h | 12 + include/llvm/Assembly/Writer.h | 3 - include/llvm/Attributes.h | 4 +- include/llvm/AutoUpgrade.h | 4 + include/llvm/BasicBlock.h | 21 +- include/llvm/Bitcode/Archive.h | 1 - include/llvm/Bitcode/BitstreamReader.h | 14 +- include/llvm/Bitcode/BitstreamWriter.h | 22 +- include/llvm/Bitcode/LLVMBitCodes.h | 33 +- include/llvm/Bitcode/ReaderWriter.h | 52 +- include/llvm/CallGraphSCCPass.h | 6 +- include/llvm/CodeGen/AsmPrinter.h | 178 +- include/llvm/CodeGen/BinaryObject.h | 140 +- include/llvm/CodeGen/CallingConvLower.h | 114 +- include/llvm/CodeGen/DwarfWriter.h | 22 +- include/llvm/CodeGen/FastISel.h | 48 +- include/llvm/CodeGen/FileWriters.h | 10 +- include/llvm/CodeGen/GCMetadata.h | 2 +- include/llvm/CodeGen/GCMetadataPrinter.h | 4 +- include/llvm/CodeGen/JITCodeEmitter.h | 37 +- include/llvm/CodeGen/LinkAllCodegenComponents.h | 2 - include/llvm/CodeGen/LiveInterval.h | 518 +- include/llvm/CodeGen/LiveIntervalAnalysis.h | 170 +- include/llvm/CodeGen/LiveStackAnalysis.h | 5 +- include/llvm/CodeGen/LiveVariables.h | 17 +- include/llvm/CodeGen/MachineBasicBlock.h | 9 +- include/llvm/CodeGen/MachineCodeEmitter.h | 36 +- include/llvm/CodeGen/MachineConstantPool.h | 24 +- include/llvm/CodeGen/MachineDominators.h | 8 +- include/llvm/CodeGen/MachineFrameInfo.h | 44 +- include/llvm/CodeGen/MachineFunction.h | 77 +- include/llvm/CodeGen/MachineFunctionAnalysis.h | 49 + include/llvm/CodeGen/MachineFunctionPass.h | 23 +- include/llvm/CodeGen/MachineInstr.h | 71 +- include/llvm/CodeGen/MachineInstrBuilder.h | 12 +- include/llvm/CodeGen/MachineJumpTableInfo.h | 7 +- include/llvm/CodeGen/MachineLoopInfo.h | 49 +- include/llvm/CodeGen/MachineMemOperand.h | 36 +- include/llvm/CodeGen/MachineModuleInfo.h | 74 +- include/llvm/CodeGen/MachineModuleInfoImpls.h | 79 + include/llvm/CodeGen/MachineOperand.h | 39 +- include/llvm/CodeGen/MachineRegisterInfo.h | 9 +- include/llvm/CodeGen/ObjectCodeEmitter.h | 178 + include/llvm/CodeGen/Passes.h | 32 +- include/llvm/CodeGen/PseudoSourceValue.h | 32 +- include/llvm/CodeGen/RegAllocRegistry.h | 4 +- include/llvm/CodeGen/RegisterCoalescer.h | 16 +- include/llvm/CodeGen/RegisterScavenging.h | 94 +- include/llvm/CodeGen/RuntimeLibcalls.h | 17 +- include/llvm/CodeGen/ScheduleDAG.h | 55 +- include/llvm/CodeGen/ScheduleHazardRecognizer.h | 5 + include/llvm/CodeGen/SelectionDAG.h | 353 +- include/llvm/CodeGen/SelectionDAGISel.h | 7 +- include/llvm/CodeGen/SelectionDAGNodes.h | 470 +- include/llvm/CodeGen/ValueTypes.h | 610 +- include/llvm/CodeGen/ValueTypes.td | 31 +- include/llvm/CompilerDriver/BuiltinOptions.h | 1 + include/llvm/CompilerDriver/Common.td | 21 +- include/llvm/CompilerDriver/CompilationGraph.h | 4 +- include/llvm/CompilerDriver/ForceLinkage.h | 40 + include/llvm/Config/AsmParsers.def.in | 29 + include/llvm/Config/config.h.cmake | 40 +- include/llvm/Config/config.h.in | 33 +- include/llvm/Constant.h | 73 +- include/llvm/Constants.h | 358 +- include/llvm/DerivedTypes.h | 42 +- include/llvm/ExecutionEngine/ExecutionEngine.h | 197 +- include/llvm/ExecutionEngine/JITEventListener.h | 17 + include/llvm/ExecutionEngine/JITMemoryManager.h | 84 +- include/llvm/Function.h | 26 +- include/llvm/GlobalAlias.h | 4 +- include/llvm/GlobalValue.h | 24 +- include/llvm/GlobalVariable.h | 18 +- include/llvm/InlineAsm.h | 18 +- include/llvm/InstrTypes.h | 190 +- include/llvm/Instruction.def | 4 +- include/llvm/Instruction.h | 38 +- include/llvm/Instructions.h | 805 +- include/llvm/IntrinsicInst.h | 26 +- include/llvm/Intrinsics.h | 6 +- include/llvm/Intrinsics.td | 81 +- include/llvm/IntrinsicsARM.td | 171 +- include/llvm/IntrinsicsBlackfin.td | 34 + include/llvm/IntrinsicsX86.td | 108 +- include/llvm/LLVMContext.h | 195 +- include/llvm/LinkAllPasses.h | 12 +- include/llvm/LinkAllVMCore.h | 2 +- include/llvm/Linker.h | 36 +- include/llvm/MC/MCAsmInfo.h | 472 ++ include/llvm/MC/MCAsmInfoCOFF.h | 24 + include/llvm/MC/MCAsmInfoDarwin.h | 32 + include/llvm/MC/MCAsmLexer.h | 141 + include/llvm/MC/MCAsmParser.h | 79 + include/llvm/MC/MCAssembler.h | 661 ++ include/llvm/MC/MCCodeEmitter.h | 34 + include/llvm/MC/MCContext.h | 44 +- include/llvm/MC/MCDisassembler.h | 50 + include/llvm/MC/MCExpr.h | 328 + include/llvm/MC/MCInst.h | 78 +- include/llvm/MC/MCInstPrinter.h | 37 + include/llvm/MC/MCSection.h | 46 +- include/llvm/MC/MCSectionELF.h | 191 + include/llvm/MC/MCSectionMachO.h | 175 + include/llvm/MC/MCStreamer.h | 93 +- include/llvm/MC/MCSymbol.h | 85 +- include/llvm/MC/MCValue.h | 23 +- include/llvm/MC/SectionKind.h | 221 + include/llvm/Metadata.h | 377 + include/llvm/Module.h | 138 +- include/llvm/OperandTraits.h | 7 +- include/llvm/Operator.h | 306 + include/llvm/Pass.h | 35 +- include/llvm/PassAnalysisSupport.h | 17 + include/llvm/PassManagers.h | 33 +- include/llvm/PassSupport.h | 11 +- include/llvm/Support/Allocator.h | 97 +- include/llvm/Support/CFG.h | 19 +- include/llvm/Support/CallSite.h | 7 +- include/llvm/Support/Casting.h | 4 +- include/llvm/Support/CommandLine.h | 104 +- include/llvm/Support/Compiler.h | 8 +- include/llvm/Support/ConstantFolder.h | 42 +- include/llvm/Support/ConstantRange.h | 61 +- include/llvm/Support/DataTypes.h.cmake | 79 +- include/llvm/Support/DataTypes.h.in | 69 +- include/llvm/Support/Debug.h | 52 +- include/llvm/Support/DebugLoc.h | 33 +- include/llvm/Support/Dwarf.h | 1 + include/llvm/Support/ErrorHandling.h | 87 + include/llvm/Support/Format.h | 61 +- include/llvm/Support/FormattedStream.h | 150 + include/llvm/Support/GetElementPtrTypeIterator.h | 5 +- include/llvm/Support/GraphWriter.h | 107 +- include/llvm/Support/IRBuilder.h | 541 +- include/llvm/Support/IRReader.h | 115 + include/llvm/Support/InstVisitor.h | 22 +- include/llvm/Support/LeakDetector.h | 6 +- include/llvm/Support/ManagedStatic.h | 18 +- include/llvm/Support/Mangler.h | 74 +- include/llvm/Support/MathExtras.h | 21 +- include/llvm/Support/MemoryBuffer.h | 5 + include/llvm/Support/MemoryObject.h | 70 + include/llvm/Support/NoFolder.h | 27 +- include/llvm/Support/PassNameParser.h | 5 +- include/llvm/Support/PatternMatch.h | 26 +- include/llvm/Support/PointerLikeTypeTraits.h | 10 +- include/llvm/Support/PrettyStackTrace.h | 6 + include/llvm/Support/Recycler.h | 3 +- include/llvm/Support/Regex.h | 63 + include/llvm/Support/Registry.h | 3 - include/llvm/Support/SourceMgr.h | 17 +- include/llvm/Support/StandardPasses.h | 130 +- include/llvm/Support/StringPool.h | 13 +- include/llvm/Support/SystemUtils.h | 18 +- include/llvm/Support/TargetFolder.h | 33 +- include/llvm/Support/Timer.h | 6 +- include/llvm/Support/TypeBuilder.h | 259 +- include/llvm/Support/ValueHandle.h | 155 +- include/llvm/Support/raw_os_ostream.h | 42 + include/llvm/Support/raw_ostream.h | 277 +- include/llvm/Support/type_traits.h | 40 +- include/llvm/SymbolTableListTraits.h | 3 +- include/llvm/System/Alarm.h | 3 +- include/llvm/System/Disassembler.h | 2 +- include/llvm/System/DynamicLibrary.h | 102 +- include/llvm/System/Memory.h | 9 +- include/llvm/System/Mutex.h | 44 +- include/llvm/System/Path.h | 21 +- include/llvm/System/Process.h | 5 + include/llvm/System/Program.h | 176 +- include/llvm/System/RWMutex.h | 18 +- include/llvm/System/TimeValue.h | 4 +- include/llvm/Target/SubtargetFeature.h | 7 +- include/llvm/Target/Target.td | 124 +- include/llvm/Target/TargetAsmParser.h | 65 + include/llvm/Target/TargetCallingConv.td | 10 + include/llvm/Target/TargetData.h | 15 +- include/llvm/Target/TargetELFWriterInfo.h | 23 +- include/llvm/Target/TargetFrameInfo.h | 31 +- include/llvm/Target/TargetInstrDesc.h | 45 +- include/llvm/Target/TargetInstrInfo.h | 141 +- include/llvm/Target/TargetInstrItineraries.h | 142 +- include/llvm/Target/TargetLowering.h | 460 +- include/llvm/Target/TargetLoweringObjectFile.h | 361 + include/llvm/Target/TargetMachine.h | 139 +- include/llvm/Target/TargetOptions.h | 30 +- include/llvm/Target/TargetRegisterInfo.h | 161 +- include/llvm/Target/TargetRegistry.h | 560 ++ include/llvm/Target/TargetSchedule.td | 25 +- include/llvm/Target/TargetSelect.h | 51 +- include/llvm/Target/TargetSelectionDAG.td | 8 +- include/llvm/Target/TargetSubtarget.h | 12 + include/llvm/Transforms/IPO.h | 5 +- include/llvm/Transforms/IPO/InlinerPass.h | 18 +- include/llvm/Transforms/Instrumentation.h | 3 + include/llvm/Transforms/Scalar.h | 33 +- include/llvm/Transforms/Utils/AddrModeMatcher.h | 8 +- include/llvm/Transforms/Utils/BasicBlockUtils.h | 21 +- include/llvm/Transforms/Utils/BasicInliner.h | 2 +- include/llvm/Transforms/Utils/Cloning.h | 42 +- include/llvm/Transforms/Utils/FunctionUtils.h | 2 +- include/llvm/Transforms/Utils/Local.h | 6 +- include/llvm/Transforms/Utils/PromoteMemToReg.h | 2 + include/llvm/Transforms/Utils/SSAUpdater.h | 108 + include/llvm/Transforms/Utils/SSI.h | 32 +- include/llvm/Transforms/Utils/UnrollLoop.h | 4 +- include/llvm/Transforms/Utils/ValueMapper.h | 3 +- include/llvm/Type.h | 86 +- include/llvm/TypeSymbolTable.h | 18 +- include/llvm/Use.h | 7 +- include/llvm/User.h | 1 - include/llvm/Value.h | 98 +- include/llvm/ValueSymbolTable.h | 12 +- lib/Analysis/AliasAnalysis.cpp | 23 +- lib/Analysis/AliasAnalysisCounter.cpp | 66 +- lib/Analysis/AliasAnalysisEvaluator.cpp | 108 +- lib/Analysis/AliasSetTracker.cpp | 53 +- lib/Analysis/Analysis.cpp | 1 - lib/Analysis/BasicAliasAnalysis.cpp | 411 +- lib/Analysis/CFGPrinter.cpp | 68 +- lib/Analysis/CMakeLists.txt | 11 +- lib/Analysis/CaptureTracking.cpp | 2 +- lib/Analysis/ConstantFolding.cpp | 463 +- lib/Analysis/DbgInfoPrinter.cpp | 9 +- lib/Analysis/DebugInfo.cpp | 1199 +-- lib/Analysis/IPA/Andersens.cpp | 168 +- lib/Analysis/IPA/CallGraph.cpp | 102 +- lib/Analysis/IPA/CallGraphSCCPass.cpp | 295 +- lib/Analysis/IPA/FindUsedTypes.cpp | 11 +- lib/Analysis/IPA/GlobalsModRef.cpp | 9 +- lib/Analysis/IVUsers.cpp | 57 +- lib/Analysis/InlineCost.cpp | 338 + lib/Analysis/InstCount.cpp | 9 +- lib/Analysis/Interval.cpp | 21 +- lib/Analysis/IntervalPartition.cpp | 2 +- lib/Analysis/LibCallAliasAnalysis.cpp | 2 - lib/Analysis/LibCallSemantics.cpp | 5 +- lib/Analysis/LoopDependenceAnalysis.cpp | 279 +- lib/Analysis/LoopInfo.cpp | 366 +- lib/Analysis/LoopPass.cpp | 60 +- lib/Analysis/MallocHelper.cpp | 230 + lib/Analysis/MemoryDependenceAnalysis.cpp | 125 +- lib/Analysis/PointerTracking.cpp | 265 + lib/Analysis/PostDominators.cpp | 10 +- lib/Analysis/ProfileEstimatorPass.cpp | 310 + lib/Analysis/ProfileInfo.cpp | 166 +- lib/Analysis/ProfileInfoLoader.cpp | 193 +- lib/Analysis/ProfileInfoLoaderPass.cpp | 237 +- lib/Analysis/ProfileVerifierPass.cpp | 343 + lib/Analysis/README.txt | 18 + lib/Analysis/ScalarEvolution.cpp | 2109 +++-- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 133 + lib/Analysis/ScalarEvolutionExpander.cpp | 482 +- lib/Analysis/SparsePropagation.cpp | 30 +- lib/Analysis/Trace.cpp | 10 +- lib/Analysis/ValueTracking.cpp | 167 +- lib/Archive/Archive.cpp | 21 +- lib/Archive/ArchiveReader.cpp | 16 +- lib/Archive/ArchiveWriter.cpp | 19 +- lib/AsmParser/LLLexer.cpp | 59 +- lib/AsmParser/LLLexer.h | 6 +- lib/AsmParser/LLParser.cpp | 1334 +-- lib/AsmParser/LLParser.h | 16 +- lib/AsmParser/LLToken.h | 16 +- lib/AsmParser/Parser.cpp | 39 +- lib/Bitcode/Reader/BitReader.cpp | 12 +- lib/Bitcode/Reader/BitcodeReader.cpp | 844 +- lib/Bitcode/Reader/BitcodeReader.h | 50 +- lib/Bitcode/Reader/Deserialize.cpp | 14 +- lib/Bitcode/Writer/BitWriter.cpp | 28 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 571 +- lib/Bitcode/Writer/BitcodeWriterPass.cpp | 21 +- lib/Bitcode/Writer/Serialize.cpp | 9 +- lib/Bitcode/Writer/ValueEnumerator.cpp | 167 +- lib/Bitcode/Writer/ValueEnumerator.h | 23 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 1194 +-- lib/CodeGen/AsmPrinter/DIE.cpp | 57 +- lib/CodeGen/AsmPrinter/DIE.h | 34 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 1161 ++- lib/CodeGen/AsmPrinter/DwarfDebug.h | 115 +- lib/CodeGen/AsmPrinter/DwarfException.cpp | 917 +- lib/CodeGen/AsmPrinter/DwarfException.h | 60 +- lib/CodeGen/AsmPrinter/DwarfLabel.cpp | 7 +- lib/CodeGen/AsmPrinter/DwarfLabel.h | 10 +- lib/CodeGen/AsmPrinter/DwarfPrinter.cpp | 40 +- lib/CodeGen/AsmPrinter/DwarfPrinter.h | 10 +- lib/CodeGen/AsmPrinter/DwarfWriter.cpp | 34 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 70 +- lib/CodeGen/BranchFolding.cpp | 175 +- lib/CodeGen/BranchFolding.h | 84 + lib/CodeGen/CMakeLists.txt | 10 +- lib/CodeGen/CodePlacementOpt.cpp | 4 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 9 +- lib/CodeGen/DwarfEHPrepare.cpp | 55 +- lib/CodeGen/ELF.h | 232 +- lib/CodeGen/ELFCodeEmitter.cpp | 155 +- lib/CodeGen/ELFCodeEmitter.h | 102 +- lib/CodeGen/ELFWriter.cpp | 903 +- lib/CodeGen/ELFWriter.h | 191 +- lib/CodeGen/ExactHazardRecognizer.cpp | 160 + lib/CodeGen/ExactHazardRecognizer.h | 61 + lib/CodeGen/GCMetadata.cpp | 34 +- lib/CodeGen/GCMetadataPrinter.cpp | 4 +- lib/CodeGen/GCStrategy.cpp | 23 +- lib/CodeGen/IfConversion.cpp | 64 +- lib/CodeGen/IntrinsicLowering.cpp | 490 +- lib/CodeGen/LLVMTargetMachine.cpp | 136 +- lib/CodeGen/LiveInterval.cpp | 148 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 1002 ++- lib/CodeGen/LiveStackAnalysis.cpp | 12 +- lib/CodeGen/LiveVariables.cpp | 280 +- lib/CodeGen/LowerSubregs.cpp | 146 +- lib/CodeGen/MachO.h | 119 +- lib/CodeGen/MachOCodeEmitter.cpp | 84 +- lib/CodeGen/MachOCodeEmitter.h | 88 +- lib/CodeGen/MachOWriter.cpp | 222 +- lib/CodeGen/MachOWriter.h | 161 +- lib/CodeGen/MachineBasicBlock.cpp | 90 +- lib/CodeGen/MachineDominators.cpp | 4 + lib/CodeGen/MachineFunction.cpp | 313 +- lib/CodeGen/MachineFunctionAnalysis.cpp | 50 + lib/CodeGen/MachineFunctionPass.cpp | 50 + lib/CodeGen/MachineInstr.cpp | 268 +- lib/CodeGen/MachineLICM.cpp | 76 +- lib/CodeGen/MachineLoopInfo.cpp | 9 +- lib/CodeGen/MachineModuleInfo.cpp | 81 +- lib/CodeGen/MachineModuleInfoImpls.cpp | 45 + lib/CodeGen/MachineRegisterInfo.cpp | 8 +- lib/CodeGen/MachineSink.cpp | 59 +- lib/CodeGen/MachineVerifier.cpp | 382 +- lib/CodeGen/ObjectCodeEmitter.cpp | 141 + lib/CodeGen/PBQP/AnnotatedGraph.h | 184 + lib/CodeGen/PBQP/ExhaustiveSolver.h | 110 + lib/CodeGen/PBQP/GraphBase.h | 582 ++ lib/CodeGen/PBQP/HeuristicSolver.h | 789 ++ lib/CodeGen/PBQP/Heuristics/Briggs.h | 383 + lib/CodeGen/PBQP/PBQPMath.h | 288 + lib/CodeGen/PBQP/SimpleGraph.h | 100 + lib/CodeGen/PBQP/Solution.h | 88 + lib/CodeGen/PBQP/Solver.h | 31 + lib/CodeGen/PHIElimination.cpp | 113 +- lib/CodeGen/PHIElimination.h | 125 + lib/CodeGen/PostRASchedulerList.cpp | 659 +- lib/CodeGen/PreAllocSplitting.cpp | 228 +- lib/CodeGen/PrologEpilogInserter.cpp | 362 +- lib/CodeGen/PrologEpilogInserter.h | 14 + lib/CodeGen/PseudoSourceValue.cpp | 20 +- lib/CodeGen/README.txt | 67 +- lib/CodeGen/RegAllocLinearScan.cpp | 188 +- lib/CodeGen/RegAllocLocal.cpp | 92 +- lib/CodeGen/RegAllocPBQP.cpp | 241 +- lib/CodeGen/RegisterScavenging.cpp | 495 +- lib/CodeGen/ScheduleDAG.cpp | 103 +- lib/CodeGen/ScheduleDAGEmit.cpp | 4 - lib/CodeGen/ScheduleDAGInstrs.cpp | 135 +- lib/CodeGen/ScheduleDAGInstrs.h | 15 +- lib/CodeGen/ScheduleDAGPrinter.cpp | 14 +- lib/CodeGen/SelectionDAG/CMakeLists.txt | 6 +- lib/CodeGen/SelectionDAG/CallingConvLower.cpp | 121 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 654 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 183 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 693 ++ lib/CodeGen/SelectionDAG/InstrEmitter.h | 119 + lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 448 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 154 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 318 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 130 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 45 +- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 68 +- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 24 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 589 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 81 +- lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 26 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 73 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 113 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 69 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1245 +-- lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp | 1269 +-- lib/CodeGen/SelectionDAG/SelectionDAGBuild.h | 34 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 423 +- lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 38 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 836 +- lib/CodeGen/ShadowStackGC.cpp | 87 +- lib/CodeGen/ShrinkWrapping.cpp | 223 +- lib/CodeGen/SimpleHazardRecognizer.h | 89 + lib/CodeGen/SimpleRegisterCoalescing.cpp | 1176 ++- lib/CodeGen/SimpleRegisterCoalescing.h | 88 +- lib/CodeGen/SjLjEHPrepare.cpp | 520 ++ lib/CodeGen/Spiller.cpp | 95 +- lib/CodeGen/StackProtector.cpp | 13 +- lib/CodeGen/StackSlotColoring.cpp | 75 +- lib/CodeGen/StrongPHIElimination.cpp | 71 +- lib/CodeGen/TargetInstrInfoImpl.cpp | 159 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 459 +- lib/CodeGen/UnreachableBlockElim.cpp | 25 +- lib/CodeGen/VirtRegMap.cpp | 5 +- lib/CodeGen/VirtRegMap.h | 22 +- lib/CodeGen/VirtRegRewriter.cpp | 470 +- lib/CodeGen/VirtRegRewriter.h | 18 - lib/CompilerDriver/BuiltinOptions.cpp | 2 + lib/CompilerDriver/CompilationGraph.cpp | 6 +- lib/CompilerDriver/Main.cpp | 28 +- lib/CompilerDriver/Plugin.cpp | 8 +- lib/CompilerDriver/Tool.cpp | 2 +- lib/ExecutionEngine/ExecutionEngine.cpp | 321 +- lib/ExecutionEngine/ExecutionEngineBindings.cpp | 25 +- lib/ExecutionEngine/Interpreter/Execution.cpp | 188 +- .../Interpreter/ExternalFunctions.cpp | 108 +- lib/ExecutionEngine/Interpreter/Interpreter.cpp | 4 +- lib/ExecutionEngine/Interpreter/Interpreter.h | 18 +- lib/ExecutionEngine/JIT/CMakeLists.txt | 2 + lib/ExecutionEngine/JIT/Intercept.cpp | 11 +- lib/ExecutionEngine/JIT/JIT.cpp | 172 +- lib/ExecutionEngine/JIT/JIT.h | 47 +- lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp | 208 + lib/ExecutionEngine/JIT/JITDebugRegisterer.h | 116 + lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp | 182 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.h | 4 +- lib/ExecutionEngine/JIT/JITEmitter.cpp | 292 +- lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 387 +- lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp | 5 +- .../JIT/OProfileJITEventListener.cpp | 178 + lib/ExecutionEngine/JIT/TargetSelect.cpp | 85 +- lib/Linker/LinkArchives.cpp | 11 +- lib/Linker/LinkItems.cpp | 33 +- lib/Linker/LinkModules.cpp | 121 +- lib/Linker/Linker.cpp | 45 +- lib/MC/CMakeLists.txt | 19 + lib/MC/MCAsmInfo.cpp | 107 + lib/MC/MCAsmInfoCOFF.cpp | 37 + lib/MC/MCAsmInfoDarwin.cpp | 52 + lib/MC/MCAsmLexer.cpp | 23 + lib/MC/MCAsmParser.cpp | 18 + lib/MC/MCAsmStreamer.cpp | 311 +- lib/MC/MCAssembler.cpp | 1190 +++ lib/MC/MCCodeEmitter.cpp | 18 + lib/MC/MCContext.cpp | 33 +- lib/MC/MCDisassembler.cpp | 14 + lib/MC/MCExpr.cpp | 286 + lib/MC/MCInst.cpp | 50 + lib/MC/MCInstPrinter.cpp | 14 + lib/MC/MCMachOStreamer.cpp | 379 + lib/MC/MCNullStreamer.cpp | 70 + lib/MC/MCSection.cpp | 45 + lib/MC/MCSectionELF.cpp | 138 + lib/MC/MCSectionMachO.cpp | 271 + lib/MC/MCStreamer.cpp | 2 +- lib/MC/MCSymbol.cpp | 110 + lib/MC/MCValue.cpp | 34 + lib/MC/TargetAsmParser.cpp | 19 + lib/Makefile | 2 +- lib/Support/APFloat.cpp | 301 +- lib/Support/APInt.cpp | 455 +- lib/Support/Allocator.cpp | 242 +- lib/Support/CMakeLists.txt | 15 +- lib/Support/COPYRIGHT.regex | 54 + lib/Support/CommandLine.cpp | 696 +- lib/Support/ConstantRange.cpp | 330 +- lib/Support/Debug.cpp | 56 +- lib/Support/Dwarf.cpp | 43 +- lib/Support/ErrorHandling.cpp | 73 + lib/Support/FoldingSet.cpp | 17 +- lib/Support/FormattedStream.cpp | 93 + lib/Support/GraphWriter.cpp | 128 +- lib/Support/MemoryObject.cpp | 34 + lib/Support/PluginLoader.cpp | 13 +- lib/Support/PrettyStackTrace.cpp | 11 +- lib/Support/Regex.cpp | 92 + lib/Support/SlowOperationInformer.cpp | 11 +- lib/Support/SourceMgr.cpp | 40 + lib/Support/Statistic.cpp | 16 +- lib/Support/StringMap.cpp | 26 +- lib/Support/StringPool.cpp | 8 +- lib/Support/StringRef.cpp | 188 + lib/Support/SystemUtils.cpp | 41 +- lib/Support/TargetRegistry.cpp | 92 + lib/Support/Timer.cpp | 92 +- lib/Support/Triple.cpp | 352 +- lib/Support/Twine.cpp | 133 + lib/Support/raw_os_ostream.cpp | 30 + lib/Support/raw_ostream.cpp | 355 +- lib/Support/regcclass.h | 70 + lib/Support/regcname.h | 139 + lib/Support/regcomp.c | 1525 ++++ lib/Support/regengine.inc | 1027 +++ lib/Support/regerror.c | 135 + lib/Support/regex2.h | 157 + lib/Support/regex_impl.h | 108 + lib/Support/regexec.c | 161 + lib/Support/regfree.c | 72 + lib/Support/regstrlcpy.c | 52 + lib/Support/regutils.h | 53 + lib/System/CMakeLists.txt | 25 +- lib/System/Disassembler.cpp | 2 +- lib/System/DynamicLibrary.cpp | 79 +- lib/System/Errno.cpp | 5 +- lib/System/Makefile | 6 + lib/System/Memory.cpp | 19 +- lib/System/Mutex.cpp | 9 +- lib/System/Path.cpp | 22 +- lib/System/Program.cpp | 27 + lib/System/RWMutex.cpp | 12 +- lib/System/Threading.cpp | 1 + lib/System/Unix/Alarm.inc | 2 +- lib/System/Unix/Host.inc | 46 +- lib/System/Unix/Memory.inc | 7 +- lib/System/Unix/Path.inc | 73 +- lib/System/Unix/Process.inc | 29 +- lib/System/Unix/Program.inc | 131 +- lib/System/Unix/Signals.inc | 14 + lib/System/Unix/TimeValue.inc | 2 +- lib/System/Win32/DynamicLibrary.inc | 44 +- lib/System/Win32/Memory.inc | 7 +- lib/System/Win32/Path.inc | 30 +- lib/System/Win32/Process.inc | 10 +- lib/System/Win32/Program.inc | 130 +- lib/System/Win32/Signals.inc | 53 +- lib/System/Win32/TimeValue.inc | 2 +- lib/Target/ARM/ARM.h | 23 +- lib/Target/ARM/ARM.td | 33 +- lib/Target/ARM/ARMAddressingModes.h | 109 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 1060 +++ lib/Target/ARM/ARMBaseInstrInfo.h | 333 + lib/Target/ARM/ARMBaseRegisterInfo.cpp | 1360 +++ lib/Target/ARM/ARMBaseRegisterInfo.h | 148 + lib/Target/ARM/ARMCallingConv.td | 2 + lib/Target/ARM/ARMCodeEmitter.cpp | 211 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 775 +- lib/Target/ARM/ARMConstantPoolValue.cpp | 36 +- lib/Target/ARM/ARMConstantPoolValue.h | 33 +- lib/Target/ARM/ARMFrameInfo.h | 4 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 1196 ++- lib/Target/ARM/ARMISelLowering.cpp | 1523 +++- lib/Target/ARM/ARMISelLowering.h | 91 +- lib/Target/ARM/ARMInstrFormats.td | 906 +- lib/Target/ARM/ARMInstrInfo.cpp | 856 +- lib/Target/ARM/ARMInstrInfo.h | 243 +- lib/Target/ARM/ARMInstrInfo.td | 682 +- lib/Target/ARM/ARMInstrNEON.td | 2127 ++++- lib/Target/ARM/ARMInstrThumb.td | 662 +- lib/Target/ARM/ARMInstrThumb2.td | 826 +- lib/Target/ARM/ARMInstrVFP.td | 195 +- lib/Target/ARM/ARMJITInfo.cpp | 68 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 562 +- lib/Target/ARM/ARMMCAsmInfo.cpp | 72 + lib/Target/ARM/ARMMCAsmInfo.h | 31 + lib/Target/ARM/ARMMachineFunctionInfo.h | 19 +- lib/Target/ARM/ARMPerfectShuffle.h | 6586 +++++++++++++++ lib/Target/ARM/ARMRegisterInfo.cpp | 1367 +-- lib/Target/ARM/ARMRegisterInfo.h | 121 +- lib/Target/ARM/ARMRegisterInfo.td | 99 +- lib/Target/ARM/ARMSchedule.td | 149 +- lib/Target/ARM/ARMScheduleV6.td | 12 +- lib/Target/ARM/ARMScheduleV7.td | 587 ++ lib/Target/ARM/ARMSubtarget.cpp | 77 +- lib/Target/ARM/ARMSubtarget.h | 31 +- lib/Target/ARM/ARMTargetMachine.cpp | 218 +- lib/Target/ARM/ARMTargetMachine.h | 50 +- lib/Target/ARM/ARMTargetObjectFile.h | 39 + lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 618 ++ lib/Target/ARM/AsmParser/CMakeLists.txt | 6 + lib/Target/ARM/AsmParser/Makefile | 15 + lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 691 +- lib/Target/ARM/AsmPrinter/Makefile | 2 +- lib/Target/ARM/CMakeLists.txt | 7 +- lib/Target/ARM/Makefile | 2 +- lib/Target/ARM/NEONPreAllocPass.cpp | 394 + lib/Target/ARM/README-Thumb.txt | 28 + lib/Target/ARM/README-Thumb2.txt | 6 + lib/Target/ARM/README.txt | 63 + lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp | 23 + lib/Target/ARM/TargetInfo/CMakeLists.txt | 7 + lib/Target/ARM/TargetInfo/Makefile | 15 + lib/Target/ARM/Thumb1InstrInfo.cpp | 185 +- lib/Target/ARM/Thumb1InstrInfo.h | 27 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 323 +- lib/Target/ARM/Thumb1RegisterInfo.h | 39 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 158 + lib/Target/ARM/Thumb2InstrInfo.cpp | 635 +- lib/Target/ARM/Thumb2InstrInfo.h | 74 +- lib/Target/ARM/Thumb2RegisterInfo.cpp | 724 +- lib/Target/ARM/Thumb2RegisterInfo.h | 29 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 685 ++ lib/Target/Alpha/Alpha.h | 12 +- lib/Target/Alpha/Alpha.td | 6 + lib/Target/Alpha/AlphaBranchSelector.cpp | 2 +- lib/Target/Alpha/AlphaCallingConv.td | 37 + lib/Target/Alpha/AlphaCodeEmitter.cpp | 36 +- lib/Target/Alpha/AlphaISelDAGToDAG.cpp | 168 +- lib/Target/Alpha/AlphaISelLowering.cpp | 514 +- lib/Target/Alpha/AlphaISelLowering.h | 42 +- lib/Target/Alpha/AlphaInstrInfo.cpp | 50 +- lib/Target/Alpha/AlphaInstrInfo.h | 10 - lib/Target/Alpha/AlphaInstrInfo.td | 12 +- lib/Target/Alpha/AlphaJITInfo.cpp | 46 +- lib/Target/Alpha/AlphaMCAsmInfo.cpp | 22 + lib/Target/Alpha/AlphaMCAsmInfo.h | 29 + lib/Target/Alpha/AlphaRegisterInfo.cpp | 39 +- lib/Target/Alpha/AlphaRegisterInfo.h | 5 +- lib/Target/Alpha/AlphaSubtarget.cpp | 2 +- lib/Target/Alpha/AlphaSubtarget.h | 5 +- lib/Target/Alpha/AlphaTargetMachine.cpp | 99 +- lib/Target/Alpha/AlphaTargetMachine.h | 34 +- lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp | 122 +- lib/Target/Alpha/AsmPrinter/Makefile | 2 +- lib/Target/Alpha/CMakeLists.txt | 3 +- lib/Target/Alpha/Makefile | 4 +- lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp | 20 + lib/Target/Alpha/TargetInfo/CMakeLists.txt | 7 + lib/Target/Alpha/TargetInfo/Makefile | 15 + .../Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp | 242 + lib/Target/Blackfin/AsmPrinter/CMakeLists.txt | 6 + lib/Target/Blackfin/AsmPrinter/Makefile | 16 + lib/Target/Blackfin/Blackfin.h | 38 + lib/Target/Blackfin/Blackfin.td | 201 + lib/Target/Blackfin/BlackfinCallingConv.td | 30 + lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp | 191 + lib/Target/Blackfin/BlackfinISelLowering.cpp | 614 ++ lib/Target/Blackfin/BlackfinISelLowering.h | 81 + lib/Target/Blackfin/BlackfinInstrFormats.td | 34 + lib/Target/Blackfin/BlackfinInstrInfo.cpp | 280 + lib/Target/Blackfin/BlackfinInstrInfo.h | 80 + lib/Target/Blackfin/BlackfinInstrInfo.td | 873 ++ lib/Target/Blackfin/BlackfinMCAsmInfo.cpp | 21 + lib/Target/Blackfin/BlackfinMCAsmInfo.h | 29 + lib/Target/Blackfin/BlackfinRegisterInfo.cpp | 472 ++ lib/Target/Blackfin/BlackfinRegisterInfo.h | 104 + lib/Target/Blackfin/BlackfinRegisterInfo.td | 385 + lib/Target/Blackfin/BlackfinSubtarget.cpp | 36 + lib/Target/Blackfin/BlackfinSubtarget.h | 45 + lib/Target/Blackfin/BlackfinTargetMachine.cpp | 42 + lib/Target/Blackfin/BlackfinTargetMachine.h | 54 + lib/Target/Blackfin/CMakeLists.txt | 21 + lib/Target/Blackfin/Makefile | 23 + lib/Target/Blackfin/README.txt | 244 + .../Blackfin/TargetInfo/BlackfinTargetInfo.cpp | 21 + lib/Target/Blackfin/TargetInfo/CMakeLists.txt | 7 + lib/Target/Blackfin/TargetInfo/Makefile | 15 + lib/Target/CBackend/CBackend.cpp | 383 +- lib/Target/CBackend/CTargetMachine.h | 17 +- lib/Target/CBackend/Makefile | 3 + .../CBackend/TargetInfo/CBackendTargetInfo.cpp | 19 + lib/Target/CBackend/TargetInfo/CMakeLists.txt | 6 + lib/Target/CBackend/TargetInfo/Makefile | 15 + lib/Target/CMakeLists.txt | 8 +- lib/Target/CellSPU/AsmPrinter/CMakeLists.txt | 2 +- lib/Target/CellSPU/AsmPrinter/Makefile | 2 +- lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp | 160 +- lib/Target/CellSPU/CMakeLists.txt | 2 +- lib/Target/CellSPU/Makefile | 2 +- lib/Target/CellSPU/SPU.h | 8 +- lib/Target/CellSPU/SPUHazardRecognizers.cpp | 7 +- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 317 +- lib/Target/CellSPU/SPUISelLowering.cpp | 615 +- lib/Target/CellSPU/SPUISelLowering.h | 43 +- lib/Target/CellSPU/SPUInstrInfo.cpp | 103 +- lib/Target/CellSPU/SPUInstrInfo.h | 12 - lib/Target/CellSPU/SPUInstrInfo.td | 7 - lib/Target/CellSPU/SPUMCAsmInfo.cpp | 40 + lib/Target/CellSPU/SPUMCAsmInfo.h | 28 + lib/Target/CellSPU/SPUNodes.td | 6 +- lib/Target/CellSPU/SPURegisterInfo.cpp | 28 +- lib/Target/CellSPU/SPURegisterInfo.h | 8 +- lib/Target/CellSPU/SPUSubtarget.cpp | 6 +- lib/Target/CellSPU/SPUSubtarget.h | 9 +- lib/Target/CellSPU/SPUTargetMachine.cpp | 65 +- lib/Target/CellSPU/SPUTargetMachine.h | 27 +- lib/Target/CellSPU/TargetInfo/CMakeLists.txt | 7 + .../CellSPU/TargetInfo/CellSPUTargetInfo.cpp | 20 + lib/Target/CellSPU/TargetInfo/Makefile | 15 + lib/Target/CppBackend/CPPBackend.cpp | 130 +- lib/Target/CppBackend/CPPTargetMachine.h | 19 +- lib/Target/CppBackend/Makefile | 3 + lib/Target/CppBackend/TargetInfo/CMakeLists.txt | 6 + .../CppBackend/TargetInfo/CppBackendTargetInfo.cpp | 26 + lib/Target/CppBackend/TargetInfo/Makefile | 15 + lib/Target/MSIL/MSILWriter.cpp | 184 +- lib/Target/MSIL/MSILWriter.h | 29 +- lib/Target/MSIL/Makefile | 3 + lib/Target/MSIL/TargetInfo/CMakeLists.txt | 6 + lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp | 26 + lib/Target/MSIL/TargetInfo/Makefile | 15 + lib/Target/MSP430/AsmPrinter/CMakeLists.txt | 6 + lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp | 281 + lib/Target/MSP430/AsmPrinter/Makefile | 15 + lib/Target/MSP430/CMakeLists.txt | 8 +- lib/Target/MSP430/MSP430.h | 8 +- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 189 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 262 +- lib/Target/MSP430/MSP430ISelLowering.h | 60 +- lib/Target/MSP430/MSP430InstrInfo.cpp | 7 +- lib/Target/MSP430/MSP430InstrInfo.td | 82 +- lib/Target/MSP430/MSP430MCAsmInfo.cpp | 20 + lib/Target/MSP430/MSP430MCAsmInfo.h | 28 + lib/Target/MSP430/MSP430RegisterInfo.cpp | 35 +- lib/Target/MSP430/MSP430RegisterInfo.h | 7 +- lib/Target/MSP430/MSP430Subtarget.cpp | 4 +- lib/Target/MSP430/MSP430Subtarget.h | 7 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 57 +- lib/Target/MSP430/MSP430TargetMachine.h | 10 +- lib/Target/MSP430/Makefile | 4 +- lib/Target/MSP430/TargetInfo/CMakeLists.txt | 7 + lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp | 20 + lib/Target/MSP430/TargetInfo/Makefile | 15 + lib/Target/Mips/AsmPrinter/CMakeLists.txt | 2 +- lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp | 236 +- lib/Target/Mips/CMakeLists.txt | 3 +- lib/Target/Mips/Makefile | 2 +- lib/Target/Mips/Mips.h | 9 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 82 +- lib/Target/Mips/MipsISelLowering.cpp | 302 +- lib/Target/Mips/MipsISelLowering.h | 48 +- lib/Target/Mips/MipsInstrInfo.cpp | 50 +- lib/Target/Mips/MipsInstrInfo.h | 45 +- lib/Target/Mips/MipsMCAsmInfo.cpp | 27 + lib/Target/Mips/MipsMCAsmInfo.h | 30 + lib/Target/Mips/MipsMachineFunction.h | 4 +- lib/Target/Mips/MipsRegisterInfo.cpp | 45 +- lib/Target/Mips/MipsRegisterInfo.h | 7 +- lib/Target/Mips/MipsSubtarget.cpp | 36 +- lib/Target/Mips/MipsSubtarget.h | 20 +- lib/Target/Mips/MipsTargetMachine.cpp | 101 +- lib/Target/Mips/MipsTargetMachine.h | 31 +- lib/Target/Mips/MipsTargetObjectFile.cpp | 93 + lib/Target/Mips/MipsTargetObjectFile.h | 41 + lib/Target/Mips/TargetInfo/CMakeLists.txt | 7 + lib/Target/Mips/TargetInfo/Makefile | 15 + lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp | 21 + lib/Target/PIC16/AsmPrinter/CMakeLists.txt | 9 + lib/Target/PIC16/AsmPrinter/Makefile | 15 + lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp | 484 ++ lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h | 80 + lib/Target/PIC16/CMakeLists.txt | 4 +- lib/Target/PIC16/MCSectionPIC16.h | 88 + lib/Target/PIC16/Makefile | 4 +- lib/Target/PIC16/PIC16.h | 50 +- lib/Target/PIC16/PIC16DebugInfo.cpp | 197 +- lib/Target/PIC16/PIC16DebugInfo.h | 16 +- lib/Target/PIC16/PIC16ISelDAGToDAG.cpp | 2 + lib/Target/PIC16/PIC16ISelDAGToDAG.h | 2 +- lib/Target/PIC16/PIC16ISelLowering.cpp | 431 +- lib/Target/PIC16/PIC16ISelLowering.h | 65 +- lib/Target/PIC16/PIC16InstrInfo.cpp | 5 +- lib/Target/PIC16/PIC16InstrInfo.td | 11 +- lib/Target/PIC16/PIC16MCAsmInfo.cpp | 58 + lib/Target/PIC16/PIC16MCAsmInfo.h | 35 + lib/Target/PIC16/PIC16MemSelOpt.cpp | 2 +- lib/Target/PIC16/PIC16RegisterInfo.cpp | 17 +- lib/Target/PIC16/PIC16RegisterInfo.h | 5 +- lib/Target/PIC16/PIC16Subtarget.cpp | 2 +- lib/Target/PIC16/PIC16Subtarget.h | 5 +- lib/Target/PIC16/PIC16TargetMachine.cpp | 57 +- lib/Target/PIC16/PIC16TargetMachine.h | 18 +- lib/Target/PIC16/PIC16TargetObjectFile.cpp | 440 + lib/Target/PIC16/PIC16TargetObjectFile.h | 120 + lib/Target/PIC16/TargetInfo/CMakeLists.txt | 7 + lib/Target/PIC16/TargetInfo/Makefile | 15 + lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp | 21 + lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp | 565 +- lib/Target/PowerPC/CMakeLists.txt | 2 +- lib/Target/PowerPC/Makefile | 2 +- lib/Target/PowerPC/PPC.h | 13 +- lib/Target/PowerPC/PPCCodeEmitter.cpp | 37 +- lib/Target/PowerPC/PPCFrameInfo.h | 285 +- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 10 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 271 +- lib/Target/PowerPC/PPCISelLowering.cpp | 836 +- lib/Target/PowerPC/PPCISelLowering.h | 142 +- lib/Target/PowerPC/PPCInstr64Bit.td | 50 +- lib/Target/PowerPC/PPCInstrBuilder.h | 2 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 86 +- lib/Target/PowerPC/PPCInstrInfo.h | 10 - lib/Target/PowerPC/PPCInstrInfo.td | 63 +- lib/Target/PowerPC/PPCJITInfo.cpp | 10 +- lib/Target/PowerPC/PPCMCAsmInfo.cpp | 58 + lib/Target/PowerPC/PPCMCAsmInfo.h | 31 + lib/Target/PowerPC/PPCMachOWriterInfo.cpp | 5 +- lib/Target/PowerPC/PPCPredicates.cpp | 3 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 149 +- lib/Target/PowerPC/PPCRegisterInfo.h | 7 +- lib/Target/PowerPC/PPCRegisterInfo.td | 18 +- lib/Target/PowerPC/PPCSubtarget.cpp | 28 +- lib/Target/PowerPC/PPCSubtarget.h | 25 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 177 +- lib/Target/PowerPC/PPCTargetMachine.h | 45 +- lib/Target/PowerPC/README.txt | 2 +- lib/Target/PowerPC/TargetInfo/CMakeLists.txt | 7 + lib/Target/PowerPC/TargetInfo/Makefile | 15 + .../PowerPC/TargetInfo/PowerPCTargetInfo.cpp | 23 + lib/Target/README.txt | 125 +- lib/Target/Sparc/AsmPrinter/Makefile | 2 +- lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp | 203 +- lib/Target/Sparc/CMakeLists.txt | 2 +- lib/Target/Sparc/FPMover.cpp | 10 +- lib/Target/Sparc/Makefile | 2 +- lib/Target/Sparc/Sparc.h | 10 +- lib/Target/Sparc/SparcISelDAGToDAG.cpp | 33 +- lib/Target/Sparc/SparcISelLowering.cpp | 257 +- lib/Target/Sparc/SparcISelLowering.h | 39 +- lib/Target/Sparc/SparcInstrInfo.cpp | 73 +- lib/Target/Sparc/SparcInstrInfo.h | 12 +- lib/Target/Sparc/SparcInstrInfo.td | 65 +- lib/Target/Sparc/SparcMCAsmInfo.cpp | 38 + lib/Target/Sparc/SparcMCAsmInfo.h | 28 + lib/Target/Sparc/SparcMachineFunctionInfo.h | 32 + lib/Target/Sparc/SparcRegisterInfo.cpp | 21 +- lib/Target/Sparc/SparcRegisterInfo.h | 5 +- lib/Target/Sparc/SparcRegisterInfo.td | 12 +- lib/Target/Sparc/SparcSubtarget.cpp | 2 +- lib/Target/Sparc/SparcSubtarget.h | 5 +- lib/Target/Sparc/SparcTargetMachine.cpp | 66 +- lib/Target/Sparc/SparcTargetMachine.h | 24 +- lib/Target/Sparc/TargetInfo/CMakeLists.txt | 7 + lib/Target/Sparc/TargetInfo/Makefile | 15 + lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp | 19 + lib/Target/SubtargetFeature.cpp | 45 +- lib/Target/SystemZ/AsmPrinter/CMakeLists.txt | 6 + lib/Target/SystemZ/AsmPrinter/Makefile | 15 + .../SystemZ/AsmPrinter/SystemZAsmPrinter.cpp | 391 + lib/Target/SystemZ/CMakeLists.txt | 23 + lib/Target/SystemZ/Makefile | 22 + lib/Target/SystemZ/SystemZ.h | 61 + lib/Target/SystemZ/SystemZ.td | 61 + lib/Target/SystemZ/SystemZCallingConv.td | 46 + lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 829 ++ lib/Target/SystemZ/SystemZISelLowering.cpp | 843 ++ lib/Target/SystemZ/SystemZISelLowering.h | 141 + lib/Target/SystemZ/SystemZInstrBuilder.h | 128 + lib/Target/SystemZ/SystemZInstrFP.td | 340 + lib/Target/SystemZ/SystemZInstrFormats.td | 133 + lib/Target/SystemZ/SystemZInstrInfo.cpp | 648 ++ lib/Target/SystemZ/SystemZInstrInfo.h | 119 + lib/Target/SystemZ/SystemZInstrInfo.td | 1155 +++ lib/Target/SystemZ/SystemZMCAsmInfo.cpp | 26 + lib/Target/SystemZ/SystemZMCAsmInfo.h | 29 + lib/Target/SystemZ/SystemZMachineFunctionInfo.h | 50 + lib/Target/SystemZ/SystemZOperands.td | 306 + lib/Target/SystemZ/SystemZRegisterInfo.cpp | 343 + lib/Target/SystemZ/SystemZRegisterInfo.h | 82 + lib/Target/SystemZ/SystemZRegisterInfo.td | 490 ++ lib/Target/SystemZ/SystemZSubtarget.cpp | 47 + lib/Target/SystemZ/SystemZSubtarget.h | 45 + lib/Target/SystemZ/SystemZTargetMachine.cpp | 44 + lib/Target/SystemZ/SystemZTargetMachine.h | 61 + lib/Target/SystemZ/TargetInfo/CMakeLists.txt | 7 + lib/Target/SystemZ/TargetInfo/Makefile | 15 + .../SystemZ/TargetInfo/SystemZTargetInfo.cpp | 19 + lib/Target/Target.cpp | 2 +- lib/Target/TargetData.cpp | 90 +- lib/Target/TargetInstrInfo.cpp | 69 +- lib/Target/TargetLoweringObjectFile.cpp | 1089 +++ lib/Target/TargetMachine.cpp | 39 +- lib/Target/TargetRegisterInfo.cpp | 10 +- lib/Target/X86/AsmParser/CMakeLists.txt | 6 + lib/Target/X86/AsmParser/Makefile | 15 + lib/Target/X86/AsmParser/X86AsmParser.cpp | 479 ++ lib/Target/X86/AsmPrinter/CMakeLists.txt | 6 +- lib/Target/X86/AsmPrinter/Makefile | 2 +- lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp | 84 +- lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h | 86 + lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 947 ++- lib/Target/X86/AsmPrinter/X86AsmPrinter.h | 150 + lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp | 131 + lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h | 99 + lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 485 ++ lib/Target/X86/AsmPrinter/X86MCInstLower.h | 54 + lib/Target/X86/CMakeLists.txt | 14 +- lib/Target/X86/Makefile | 4 +- lib/Target/X86/README-X86-64.txt | 49 + lib/Target/X86/README.txt | 20 + lib/Target/X86/TargetInfo/CMakeLists.txt | 7 + lib/Target/X86/TargetInfo/Makefile | 15 + lib/Target/X86/TargetInfo/X86TargetInfo.cpp | 23 + lib/Target/X86/X86.h | 17 +- lib/Target/X86/X86.td | 32 +- lib/Target/X86/X86COFFMachineModuleInfo.cpp | 123 + lib/Target/X86/X86COFFMachineModuleInfo.h | 67 + lib/Target/X86/X86CallingConv.td | 45 +- lib/Target/X86/X86CodeEmitter.cpp | 672 +- lib/Target/X86/X86CompilationCallback_Win64.asm | 31 +- lib/Target/X86/X86ELFWriterInfo.cpp | 93 +- lib/Target/X86/X86ELFWriterInfo.h | 23 +- lib/Target/X86/X86FastISel.cpp | 415 +- lib/Target/X86/X86FloatingPoint.cpp | 45 +- lib/Target/X86/X86FloatingPointRegKill.cpp | 6 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 1124 ++- lib/Target/X86/X86ISelLowering.cpp | 2359 ++++-- lib/Target/X86/X86ISelLowering.h | 188 +- lib/Target/X86/X86Instr64bit.td | 468 +- lib/Target/X86/X86InstrBuilder.h | 52 +- lib/Target/X86/X86InstrFPStack.td | 25 + lib/Target/X86/X86InstrFormats.td | 30 +- lib/Target/X86/X86InstrInfo.cpp | 1336 +-- lib/Target/X86/X86InstrInfo.h | 122 +- lib/Target/X86/X86InstrInfo.td | 802 +- lib/Target/X86/X86InstrMMX.td | 121 +- lib/Target/X86/X86InstrSSE.td | 560 +- lib/Target/X86/X86JITInfo.cpp | 81 +- lib/Target/X86/X86JITInfo.h | 7 +- lib/Target/X86/X86MCAsmInfo.cpp | 123 + lib/Target/X86/X86MCAsmInfo.h | 42 + lib/Target/X86/X86RegisterInfo.cpp | 616 +- lib/Target/X86/X86RegisterInfo.h | 27 +- lib/Target/X86/X86RegisterInfo.td | 344 +- lib/Target/X86/X86Relocations.h | 30 +- lib/Target/X86/X86Subtarget.cpp | 232 +- lib/Target/X86/X86Subtarget.h | 107 +- lib/Target/X86/X86TargetMachine.cpp | 279 +- lib/Target/X86/X86TargetMachine.h | 50 +- lib/Target/X86/X86TargetObjectFile.cpp | 65 + lib/Target/X86/X86TargetObjectFile.h | 40 + lib/Target/XCore/AsmPrinter/CMakeLists.txt | 6 + lib/Target/XCore/AsmPrinter/Makefile | 16 + lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp | 374 + lib/Target/XCore/CMakeLists.txt | 5 +- lib/Target/XCore/MCSectionXCore.cpp | 35 + lib/Target/XCore/MCSectionXCore.h | 54 + lib/Target/XCore/Makefile | 4 +- lib/Target/XCore/TargetInfo/CMakeLists.txt | 7 + lib/Target/XCore/TargetInfo/Makefile | 15 + lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp | 19 + lib/Target/XCore/XCore.h | 8 +- lib/Target/XCore/XCore.td | 17 +- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 64 +- lib/Target/XCore/XCoreISelLowering.cpp | 606 +- lib/Target/XCore/XCoreISelLowering.h | 57 +- lib/Target/XCore/XCoreInstrInfo.cpp | 47 +- lib/Target/XCore/XCoreInstrInfo.h | 14 +- lib/Target/XCore/XCoreInstrInfo.td | 68 +- lib/Target/XCore/XCoreMCAsmInfo.cpp | 31 + lib/Target/XCore/XCoreMCAsmInfo.h | 29 + lib/Target/XCore/XCoreRegisterInfo.cpp | 97 +- lib/Target/XCore/XCoreRegisterInfo.h | 5 +- lib/Target/XCore/XCoreSubtarget.cpp | 10 +- lib/Target/XCore/XCoreSubtarget.h | 11 +- lib/Target/XCore/XCoreTargetMachine.cpp | 50 +- lib/Target/XCore/XCoreTargetMachine.h | 13 +- lib/Target/XCore/XCoreTargetObjectFile.cpp | 67 + lib/Target/XCore/XCoreTargetObjectFile.h | 26 + lib/Transforms/Hello/Hello.cpp | 6 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 122 +- lib/Transforms/IPO/CMakeLists.txt | 7 +- lib/Transforms/IPO/ConstantMerge.cpp | 2 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 62 +- lib/Transforms/IPO/ExtractGV.cpp | 9 +- lib/Transforms/IPO/FunctionAttrs.cpp | 38 +- lib/Transforms/IPO/GlobalDCE.cpp | 4 + lib/Transforms/IPO/GlobalOpt.cpp | 838 +- lib/Transforms/IPO/IPConstantPropagation.cpp | 10 +- lib/Transforms/IPO/IndMemRemoval.cpp | 19 +- lib/Transforms/IPO/InlineAlways.cpp | 2 +- lib/Transforms/IPO/InlineSimple.cpp | 4 +- lib/Transforms/IPO/Inliner.cpp | 441 +- lib/Transforms/IPO/Internalize.cpp | 13 +- lib/Transforms/IPO/LoopExtractor.cpp | 108 +- lib/Transforms/IPO/LowerSetJmp.cpp | 62 +- lib/Transforms/IPO/MergeFunctions.cpp | 45 +- lib/Transforms/IPO/PartialInlining.cpp | 3 +- lib/Transforms/IPO/PruneEH.cpp | 17 +- lib/Transforms/IPO/RaiseAllocations.cpp | 45 +- lib/Transforms/IPO/StripSymbols.cpp | 180 +- lib/Transforms/IPO/StructRetPromotion.cpp | 105 +- lib/Transforms/Instrumentation/BlockProfiling.cpp | 31 +- lib/Transforms/Instrumentation/CMakeLists.txt | 1 + lib/Transforms/Instrumentation/EdgeProfiling.cpp | 36 +- .../Instrumentation/MaximumSpanningTree.h | 95 + .../Instrumentation/OptimalEdgeProfiling.cpp | 219 + lib/Transforms/Instrumentation/ProfilingUtils.cpp | 39 +- lib/Transforms/Instrumentation/RSProfiling.cpp | 68 +- lib/Transforms/Makefile | 2 +- lib/Transforms/Scalar/ADCE.cpp | 4 +- lib/Transforms/Scalar/BasicBlockPlacement.cpp | 7 +- lib/Transforms/Scalar/CMakeLists.txt | 3 +- lib/Transforms/Scalar/CodeGenLICM.cpp | 112 + lib/Transforms/Scalar/CodeGenPrepare.cpp | 84 +- lib/Transforms/Scalar/CondPropagate.cpp | 12 +- lib/Transforms/Scalar/ConstantProp.cpp | 5 +- lib/Transforms/Scalar/DCE.cpp | 3 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 67 +- lib/Transforms/Scalar/GVN.cpp | 1386 +-- lib/Transforms/Scalar/IndVarSimplify.cpp | 137 +- lib/Transforms/Scalar/InstructionCombining.cpp | 4048 +++++---- lib/Transforms/Scalar/JumpThreading.cpp | 550 +- lib/Transforms/Scalar/LICM.cpp | 51 +- lib/Transforms/Scalar/LoopDeletion.cpp | 70 +- lib/Transforms/Scalar/LoopIndexSplit.cpp | 38 +- lib/Transforms/Scalar/LoopRotation.cpp | 72 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 349 +- lib/Transforms/Scalar/LoopUnroll.cpp | 58 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 227 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 230 +- lib/Transforms/Scalar/Reassociate.cpp | 83 +- lib/Transforms/Scalar/Reg2Mem.cpp | 133 +- lib/Transforms/Scalar/SCCP.cpp | 148 +- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 199 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 13 +- lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp | 15 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 1044 +-- lib/Transforms/Scalar/TailDuplication.cpp | 20 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 5 +- lib/Transforms/Utils/AddrModeMatcher.cpp | 15 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 118 +- lib/Transforms/Utils/BasicInliner.cpp | 20 +- lib/Transforms/Utils/BreakCriticalEdges.cpp | 118 +- lib/Transforms/Utils/CMakeLists.txt | 7 +- lib/Transforms/Utils/CloneFunction.cpp | 33 +- lib/Transforms/Utils/CloneModule.cpp | 12 +- lib/Transforms/Utils/CodeExtractor.cpp | 139 +- lib/Transforms/Utils/DemoteRegToStack.cpp | 6 +- lib/Transforms/Utils/InlineFunction.cpp | 314 +- lib/Transforms/Utils/InstructionNamer.cpp | 4 +- lib/Transforms/Utils/LCSSA.cpp | 305 +- lib/Transforms/Utils/Local.cpp | 16 +- lib/Transforms/Utils/LoopSimplify.cpp | 218 +- lib/Transforms/Utils/LowerAllocations.cpp | 73 +- lib/Transforms/Utils/LowerInvoke.cpp | 107 +- lib/Transforms/Utils/LowerSwitch.cpp | 48 +- lib/Transforms/Utils/Mem2Reg.cpp | 2 +- lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 54 +- lib/Transforms/Utils/SSAUpdater.cpp | 335 + lib/Transforms/Utils/SSI.cpp | 332 +- lib/Transforms/Utils/SimplifyCFG.cpp | 241 +- lib/Transforms/Utils/UnifyFunctionExitNodes.cpp | 18 +- lib/Transforms/Utils/UnrollLoop.cpp | 27 +- lib/Transforms/Utils/ValueMapper.cpp | 54 +- lib/VMCore/AsmWriter.cpp | 870 +- lib/VMCore/Attributes.cpp | 30 +- lib/VMCore/AutoUpgrade.cpp | 188 +- lib/VMCore/BasicBlock.cpp | 15 +- lib/VMCore/CMakeLists.txt | 3 +- lib/VMCore/ConstantFold.cpp | 1096 ++- lib/VMCore/ConstantFold.h | 53 +- lib/VMCore/Constants.cpp | 2284 ++--- lib/VMCore/ConstantsContext.h | 787 ++ lib/VMCore/Core.cpp | 725 +- lib/VMCore/Dominators.cpp | 104 +- lib/VMCore/Function.cpp | 31 +- lib/VMCore/Globals.cpp | 33 +- lib/VMCore/InlineAsm.cpp | 34 +- lib/VMCore/Instruction.cpp | 104 +- lib/VMCore/Instructions.cpp | 1051 ++- lib/VMCore/IntrinsicInst.cpp | 10 +- lib/VMCore/LLVMContext.cpp | 507 +- lib/VMCore/LLVMContextImpl.h | 202 +- lib/VMCore/LeakDetector.cpp | 119 +- lib/VMCore/LeaksContext.h | 89 + lib/VMCore/Mangler.cpp | 172 +- lib/VMCore/Metadata.cpp | 433 + lib/VMCore/Module.cpp | 79 +- lib/VMCore/Pass.cpp | 40 +- lib/VMCore/PassManager.cpp | 244 +- lib/VMCore/Type.cpp | 876 +- lib/VMCore/TypeSymbolTable.cpp | 60 +- lib/VMCore/TypesContext.h | 424 + lib/VMCore/Use.cpp | 2 +- lib/VMCore/Value.cpp | 337 +- lib/VMCore/ValueSymbolTable.cpp | 68 +- lib/VMCore/ValueTypes.cpp | 148 +- lib/VMCore/Verifier.cpp | 396 +- projects/sample/configure | 10 +- runtime/Makefile | 16 +- runtime/libprofile/Makefile | 5 +- runtime/libprofile/OptimalEdgeProfiling.c | 45 + runtime/libprofile/exported_symbols.lst | 2 + test/Analysis/Andersens/2007-11-19-InlineAsm.ll | 2 +- test/Analysis/Andersens/2008-03-19-External.ll | 2 +- test/Analysis/Andersens/2008-04-07-Memcpy.ll | 2 +- .../Andersens/2008-12-27-BuiltinWrongType.ll | 2 +- test/Analysis/Andersens/basictest.ll | 2 +- test/Analysis/Andersens/external.ll | 2 +- test/Analysis/Andersens/modreftest.ll | 2 +- test/Analysis/Andersens/modreftest2.ll | 2 +- test/Analysis/Andersens/trivialtest.ll | 2 +- test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll | 2 +- test/Analysis/BasicAA/2003-03-04-GEPCrash.ll | 2 +- test/Analysis/BasicAA/2003-04-22-GEPProblem.ll | 2 +- test/Analysis/BasicAA/2003-04-25-GEPCrash.ll | 2 +- test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll | 2 +- test/Analysis/BasicAA/2003-06-01-AliasCrash.ll | 2 +- test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll | 2 +- test/Analysis/BasicAA/2003-09-19-LocalArgument.ll | 2 +- test/Analysis/BasicAA/2003-11-04-SimpleCases.ll | 2 +- test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll | 2 +- test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll | 2 +- test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll | 2 +- test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll | 2 +- test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll | 2 +- .../BasicAA/2006-03-03-BadArraySubscript.ll | 2 +- .../BasicAA/2006-11-03-BasicAAVectorCrash.ll | 2 +- .../BasicAA/2007-01-13-BasePointerBadNoAlias.ll | 4 +- .../Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll | 2 +- test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll | 6 +- .../BasicAA/2007-08-05-GetOverloadedModRef.ll | 2 +- .../BasicAA/2007-10-24-ArgumentsGlobals.ll | 2 +- test/Analysis/BasicAA/2007-11-05-SizeCrash.ll | 2 +- .../BasicAA/2007-12-08-OutOfBoundsCrash.ll | 2 +- test/Analysis/BasicAA/2008-04-15-Byval.ll | 2 +- test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll | 2 +- test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll | 2 +- .../BasicAA/2008-12-09-GEP-IndicesAlias.ll | 2 +- test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll | 2 +- test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll | 16 + .../Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll | 30 + test/Analysis/BasicAA/byval.ll | 2 +- test/Analysis/BasicAA/cas.ll | 7 +- test/Analysis/BasicAA/constant-over-index.ll | 2 +- test/Analysis/BasicAA/featuretest.ll | 2 +- test/Analysis/BasicAA/gcsetest.ll | 4 +- test/Analysis/BasicAA/global-size.ll | 2 +- test/Analysis/BasicAA/modref.ll | 2 +- test/Analysis/BasicAA/no-escape-call.ll | 2 +- test/Analysis/BasicAA/nocapture.ll | 2 +- test/Analysis/BasicAA/phi-aa.ll | 29 + test/Analysis/BasicAA/pure-const-dce.ll | 6 +- test/Analysis/BasicAA/store-promote.ll | 53 + test/Analysis/BasicAA/tailcall-modref.ll | 2 +- test/Analysis/CallGraph/2008-09-09-DirectCall.ll | 2 +- test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll | 2 +- .../Dominators/2006-10-02-BreakCritEdges.ll | 2 +- .../Dominators/2007-01-14-BreakCritEdges.ll | 2 +- test/Analysis/Dominators/2007-07-11-SplitBlock.ll | 2 +- test/Analysis/Dominators/2007-07-12-SplitBlock.ll | 2 +- .../GlobalsModRef/2008-09-03-ReadGlobals.ll | 2 +- test/Analysis/GlobalsModRef/aliastest.ll | 2 +- test/Analysis/GlobalsModRef/chaining-analysis.ll | 2 +- test/Analysis/GlobalsModRef/indirect-global.ll | 2 +- test/Analysis/GlobalsModRef/modreftest.ll | 2 +- test/Analysis/GlobalsModRef/purecse.ll | 2 +- test/Analysis/LoopDependenceAnalysis/alias.ll | 44 + test/Analysis/LoopDependenceAnalysis/siv-strong.ll | 110 + .../LoopDependenceAnalysis/siv-weak-crossing.ll | 118 + .../LoopDependenceAnalysis/siv-weak-zero.ll | 56 + test/Analysis/LoopDependenceAnalysis/ziv.ll | 63 + .../Analysis/LoopInfo/2003-05-15-NestingProblem.ll | 2 +- test/Analysis/PointerTracking/dg.exp | 3 + test/Analysis/PointerTracking/sizes.ll | 84 + .../2006-09-26-PostDominanceFrontier.ll | 2 +- .../2007-04-17-PostDominanceFrontier.ll | 2 +- .../PostDominators/2007-04-20-PostDom-Reset.ll | 2 +- test/Analysis/PostDominators/pr1098.ll | 2 +- test/Analysis/Profiling/dg.exp | 4 + test/Analysis/Profiling/edge-profiling.ll | 139 + test/Analysis/Profiling/profiling-tool-chain.ll | 212 + .../ScalarEvolution/2007-07-15-NegativeStride.ll | 2 +- .../2007-08-06-MisinterpretBranch.ll | 2 +- .../ScalarEvolution/2007-08-06-Unsigned.ll | 2 +- .../ScalarEvolution/2007-09-27-LargeStepping.ll | 2 +- .../ScalarEvolution/2007-11-14-SignedAddRec.ll | 2 +- .../ScalarEvolution/2007-11-18-OrInstruction.ll | 2 +- .../2008-02-11-ReversedCondition.ll | 2 +- .../ScalarEvolution/2008-02-12-SMAXTripCount.ll | 2 +- test/Analysis/ScalarEvolution/2008-02-15-UMax.ll | 2 +- .../2008-05-25-NegativeStepToZero.ll | 2 +- .../ScalarEvolution/2008-06-12-BinomialInt64.ll | 2 +- .../ScalarEvolution/2008-07-12-UnneededSelect1.ll | 2 +- .../ScalarEvolution/2008-07-12-UnneededSelect2.ll | 2 +- .../ScalarEvolution/2008-07-19-InfiniteLoop.ll | 2 +- .../ScalarEvolution/2008-07-19-WrappingIV.ll | 2 +- .../ScalarEvolution/2008-07-29-SGTTripCount.ll | 2 +- .../ScalarEvolution/2008-07-29-SMinExpr.ll | 2 +- .../ScalarEvolution/2008-08-04-IVOverflow.ll | 2 +- .../ScalarEvolution/2008-08-04-LongAddRec.ll | 2 +- .../ScalarEvolution/2008-11-02-QuadraticCrash.ll | 2 +- .../ScalarEvolution/2008-11-15-CubicOOM.ll | 2 +- .../ScalarEvolution/2008-11-18-LessThanOrEqual.ll | 2 +- .../Analysis/ScalarEvolution/2008-11-18-Stride1.ll | 2 +- .../Analysis/ScalarEvolution/2008-11-18-Stride2.ll | 2 +- .../ScalarEvolution/2008-12-08-FiniteSGE.ll | 2 +- .../ScalarEvolution/2008-12-11-SMaxOverflow.ll | 2 +- .../ScalarEvolution/2008-12-14-StrideAndSigned.ll | 2 +- .../ScalarEvolution/2008-12-15-DontUseSDiv.ll | 2 +- .../2009-01-02-SignedNegativeStride.ll | 2 +- .../ScalarEvolution/2009-04-22-TruncCast.ll | 2 +- .../ScalarEvolution/2009-05-09-PointerEdgeCount.ll | 2 +- .../2009-07-04-GroupConstantsWidthMismatch.ll | 16 + .../ScalarEvolution/SolveQuadraticEquation.ll | 2 +- test/Analysis/ScalarEvolution/and-xor.ll | 2 +- .../ScalarEvolution/avoid-infinite-recursion-0.ll | 30 + .../ScalarEvolution/avoid-infinite-recursion-1.ll | 354 + test/Analysis/ScalarEvolution/avoid-smax-0.ll | 2 +- test/Analysis/ScalarEvolution/avoid-smax-1.ll | 4 +- test/Analysis/ScalarEvolution/div-overflow.ll | 2 +- test/Analysis/ScalarEvolution/do-loop.ll | 2 +- test/Analysis/ScalarEvolution/max-trip-count.ll | 6 +- test/Analysis/ScalarEvolution/nsw-offset.ll | 76 + test/Analysis/ScalarEvolution/nsw.ll | 40 + test/Analysis/ScalarEvolution/pointer-sign-bits.ll | 2 +- test/Analysis/ScalarEvolution/pr3909.ll | 2 +- test/Analysis/ScalarEvolution/scev-aa.ll | 194 + test/Analysis/ScalarEvolution/sext-inreg.ll | 2 +- test/Analysis/ScalarEvolution/sext-iv-0.ll | 2 +- test/Analysis/ScalarEvolution/sext-iv-1.ll | 2 +- test/Analysis/ScalarEvolution/sext-iv-2.ll | 74 + test/Analysis/ScalarEvolution/smax.ll | 4 +- test/Analysis/ScalarEvolution/trip-count.ll | 2 +- test/Analysis/ScalarEvolution/trip-count2.ll | 2 +- test/Analysis/ScalarEvolution/trip-count3.ll | 2 +- test/Analysis/ScalarEvolution/trip-count4.ll | 2 +- test/Analysis/ScalarEvolution/trip-count5.ll | 2 +- test/Analysis/ScalarEvolution/trip-count6.ll | 2 +- test/Analysis/ScalarEvolution/trip-count7.ll | 2 +- test/Analysis/ScalarEvolution/trip-count8.ll | 37 + test/Analysis/ScalarEvolution/xor-and.ll | 2 +- test/Analysis/ScalarEvolution/zext-wrap.ll | 24 + test/Archive/extract.ll | 16 + test/Assembler/2002-01-24-BadSymbolTableAssert.ll | 2 +- test/Assembler/2002-01-24-ValueRefineAbsType.ll | 2 +- test/Assembler/2002-02-19-TypeParsing.ll | 2 +- test/Assembler/2002-03-08-NameCollision.ll | 2 +- test/Assembler/2002-03-08-NameCollision2.ll | 2 +- test/Assembler/2002-04-04-PureVirtMethCall.ll | 2 +- test/Assembler/2002-04-04-PureVirtMethCall2.ll | 2 +- test/Assembler/2002-04-05-TypeParsing.ll | 2 +- test/Assembler/2002-04-07-HexFloatConstants.ll | 2 +- test/Assembler/2002-04-29-NameBinding.ll | 2 +- test/Assembler/2002-05-02-InvalidForwardRef.ll | 2 +- test/Assembler/2002-05-02-ParseError.ll | 2 +- .../Assembler/2002-07-08-HugePerformanceProblem.ll | 2 +- .../Assembler/2002-07-25-ParserAssertionFailure.ll | 2 +- test/Assembler/2002-08-15-CastAmbiguity.ll | 2 +- test/Assembler/2002-08-15-ConstantExprProblem.ll | 2 +- .../2002-08-15-UnresolvedGlobalReference.ll | 2 +- test/Assembler/2002-08-19-BytecodeReader.ll | 2 +- test/Assembler/2002-08-22-DominanceProblem.ll | 2 +- test/Assembler/2002-10-08-LargeArrayPerformance.ll | 2 +- test/Assembler/2002-10-15-NameClash.ll | 2 +- test/Assembler/2002-12-15-GlobalResolve.ll | 2 +- test/Assembler/2003-01-30-UnsignedString.ll | 2 +- .../2003-04-25-UnresolvedGlobalReference.ll | 2 +- test/Assembler/2003-05-15-AssemblerProblem.ll | 2 +- test/Assembler/2003-05-15-SwitchBug.ll | 2 +- test/Assembler/2003-05-21-ConstantShiftExpr.ll | 2 +- test/Assembler/2003-05-21-EmptyStructTest.ll | 2 +- test/Assembler/2003-06-30-RecursiveTypeProblem.ll | 2 +- test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll | 2 +- .../2003-10-04-NotMergingGlobalConstants.ll | 2 +- test/Assembler/2003-12-30-TypeMapInvalidMemory.ll | 2 +- test/Assembler/2004-02-27-SelfUseAssertError.ll | 2 +- .../2004-04-04-GetElementPtrIndexTypes.ll | 2 +- test/Assembler/2004-10-22-BCWriterUndefBug.ll | 2 +- test/Assembler/2004-11-28-InvalidTypeCrash.ll | 2 +- .../2005-01-31-CallingAggregateFunction.ll | 2 +- test/Assembler/2007-01-02-Undefined-Arg-Type.ll | 2 +- test/Assembler/2007-01-05-Cmp-ConstExpr.ll | 2 +- test/Assembler/2007-01-16-CrashOnBadCast.ll | 2 +- test/Assembler/2007-01-16-CrashOnBadCast2.ll | 2 +- test/Assembler/2007-03-18-InvalidNumberedVar.ll | 2 +- test/Assembler/2008-02-20-MultipleReturnValue.ll | 2 +- test/Assembler/2008-09-02-FunctionNotes2.ll | 2 +- test/Assembler/2009-02-28-StripOpaqueName.ll | 2 +- test/Assembler/2009-07-24-ZeroArgGEP.ll | 5 + test/Assembler/ConstantExprFold.ll | 1 + test/Assembler/anon-functions.ll | 2 +- test/Assembler/flags.ll | 212 + test/Assembler/getelementptr.ll | 12 +- test/Assembler/insertextractvalue.ll | 6 + test/Assembler/msasm.ll | 36 + test/Assembler/select.ll | 2 +- test/Assembler/unnamed.ll | 51 + test/Assembler/vector-cmp.ll | 12 +- test/Bindings/Ocaml/analysis.ml | 10 +- test/Bindings/Ocaml/bitreader.ml | 14 +- test/Bindings/Ocaml/bitwriter.ml | 8 +- test/Bindings/Ocaml/dg.exp | 5 + test/Bindings/Ocaml/executionengine.ml | 16 +- test/Bindings/Ocaml/scalar_opts.ml | 10 +- test/Bindings/Ocaml/target.ml | 11 +- test/Bindings/Ocaml/vmcore.ml | 168 +- test/Bitcode/extractelement.ll | 2 +- test/Bitcode/memcpy.ll | 2 +- test/Bitcode/metadata-2.ll | 87 + test/Bitcode/metadata.ll | 6 + test/BugPoint/crash-narrowfunctiontest.ll | 2 +- test/BugPoint/remove_arguments_test.ll | 2 +- test/CMakeLists.txt | 31 + test/CodeGen/ARM/2006-11-10-CycleInDAG.ll | 2 +- test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll | 2 +- test/CodeGen/ARM/2007-03-07-CombinerCrash.ll | 2 +- test/CodeGen/ARM/2007-03-13-InstrSched.ll | 4 +- test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll | 2 +- test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2007-04-03-PEIBug.ll | 2 +- test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll | 2 +- test/CodeGen/ARM/2007-04-30-CombinerCrash.ll | 2 +- test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll | 2 +- test/CodeGen/ARM/2007-05-07-jumptoentry.ll | 2 +- test/CodeGen/ARM/2007-05-07-tailmerge-1.ll | 8 +- test/CodeGen/ARM/2007-05-09-tailmerge-2.ll | 8 +- test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll | 2 +- test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2007-05-22-tailmerge-3.ll | 16 +- test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll | 2 +- .../ARM/2007-05-31-RegScavengerInfiniteLoop.ll | 2 +- test/CodeGen/ARM/2007-08-15-ReuseBug.ll | 2 +- test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll | 2 +- test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll | 2 +- test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll | 2 +- test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll | 2 +- test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll | 2 +- test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll | 2 +- test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll | 2 +- test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll | 2 +- test/CodeGen/ARM/2008-07-17-Fdiv.ll | 2 +- test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll | 2 +- test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll | 2 +- test/CodeGen/ARM/2008-09-14-CoalescerBug.ll | 2 +- test/CodeGen/ARM/2008-09-17-CoalescerBug.ll | 2 +- test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll | 2 +- test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll | 2 +- test/CodeGen/ARM/2009-02-16-SpillerBug.ll | 2 +- test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll | 2 +- test/CodeGen/ARM/2009-02-27-SpillerBug.ll | 2 +- test/CodeGen/ARM/2009-03-07-SpillerBug.ll | 2 +- test/CodeGen/ARM/2009-03-09-AddrModeBug.ll | 2 +- test/CodeGen/ARM/2009-04-06-AsmModifier.ll | 2 +- test/CodeGen/ARM/2009-04-08-AggregateAddr.ll | 2 +- test/CodeGen/ARM/2009-04-08-FREM.ll | 2 +- test/CodeGen/ARM/2009-04-08-FloatUndef.ll | 2 +- test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll | 2 +- test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll | 2 +- test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll | 2 +- test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll | 2 +- test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll | 4 +- test/CodeGen/ARM/2009-06-02-ISelCrash.ll | 2 +- test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll | 2 +- test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2009-06-22-CoalescerBug.ll | 2 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll | 2 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll | 2 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll | 2 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll | 2 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll | 2 +- test/CodeGen/ARM/2009-07-01-CommuteBug.ll | 2 +- test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll | 7 + test/CodeGen/ARM/2009-07-18-RewriterBug.ll | 1323 +++ test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll | 94 + test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll | 95 + test/CodeGen/ARM/2009-07-29-VFP3Registers.ll | 108 + .../ARM/2009-08-02-RegScavengerAssert-Neon.ll | 29 + .../CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll | 33 + test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll | 25 + .../ARM/2009-08-15-RegScavenger-EarlyClobber.ll | 42 + test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll | 10 + test/CodeGen/ARM/2009-08-21-PostRAKill.ll | 40 + test/CodeGen/ARM/2009-08-21-PostRAKill2.ll | 38 + test/CodeGen/ARM/2009-08-21-PostRAKill3.ll | 31 + test/CodeGen/ARM/2009-08-21-PostRAKill4.ll | 26 + test/CodeGen/ARM/2009-08-23-linkerprivate.ll | 8 + test/CodeGen/ARM/2009-08-26-ScalarToVector.ll | 27 + test/CodeGen/ARM/2009-08-27-ScalarToVector.ll | 35 + test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll | 25 + test/CodeGen/ARM/2009-08-29-TooLongSplat.ll | 23 + test/CodeGen/ARM/2009-08-31-LSDA-Name.ll | 103 + test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll | 9 + test/CodeGen/ARM/2009-09-01-PostRAProlog.ll | 106 + test/CodeGen/ARM/2009-09-09-AllOnes.ll | 10 + test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll | 18 + test/CodeGen/ARM/2009-09-10-postdec.ll | 11 + test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll | 61 + test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll | 41 + test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll | 34 + test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll | 14 + test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll | 23 + test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll | 21 + test/CodeGen/ARM/2009-09-24-spill-align.ll | 17 + test/CodeGen/ARM/2009-09-27-CoalescerBug.ll | 24 + test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll | 19 + test/CodeGen/ARM/addrmode.ll | 2 +- test/CodeGen/ARM/aliases.ll | 3 +- test/CodeGen/ARM/align.ll | 8 +- test/CodeGen/ARM/alloca.ll | 4 +- test/CodeGen/ARM/argaddr.ll | 2 +- test/CodeGen/ARM/arguments-nosplit-double.ll | 2 +- test/CodeGen/ARM/arguments-nosplit-i64.ll | 2 +- test/CodeGen/ARM/arguments.ll | 4 +- test/CodeGen/ARM/arguments2.ll | 4 +- test/CodeGen/ARM/arguments3.ll | 4 +- test/CodeGen/ARM/arguments4.ll | 4 +- test/CodeGen/ARM/arguments5.ll | 4 +- test/CodeGen/ARM/arguments6.ll | 4 +- test/CodeGen/ARM/arguments7.ll | 4 +- test/CodeGen/ARM/arguments8.ll | 4 +- test/CodeGen/ARM/arguments_f64_backfill.ll | 2 +- test/CodeGen/ARM/arm-asm.ll | 2 +- test/CodeGen/ARM/arm-frameaddr.ll | 4 +- test/CodeGen/ARM/arm-negative-stride.ll | 2 +- test/CodeGen/ARM/bfc.ll | 19 + test/CodeGen/ARM/bic.ll | 2 +- test/CodeGen/ARM/bits.ll | 2 +- test/CodeGen/ARM/bx_fold.ll | 4 +- test/CodeGen/ARM/call.ll | 6 +- test/CodeGen/ARM/call_nolink.ll | 2 +- test/CodeGen/ARM/carry.ll | 6 +- test/CodeGen/ARM/clz.ll | 2 +- test/CodeGen/ARM/compare-call.ll | 2 +- test/CodeGen/ARM/constants.ll | 14 +- test/CodeGen/ARM/cse-libcalls.ll | 2 +- test/CodeGen/ARM/ctors_dtors.ll | 24 +- test/CodeGen/ARM/div.ll | 2 +- test/CodeGen/ARM/dyn-stackalloc.ll | 2 +- test/CodeGen/ARM/extloadi1.ll | 2 +- test/CodeGen/ARM/fabss.ll | 15 + test/CodeGen/ARM/fadds.ll | 12 + test/CodeGen/ARM/fcopysign.ll | 4 +- test/CodeGen/ARM/fdivs.ll | 12 + test/CodeGen/ARM/fixunsdfdi.ll | 4 +- test/CodeGen/ARM/fmacs.ll | 13 + test/CodeGen/ARM/fmdrr-fmrrd.ll | 4 +- test/CodeGen/ARM/fmscs.ll | 13 + test/CodeGen/ARM/fmuls.ll | 12 + test/CodeGen/ARM/fnegs.ll | 25 + test/CodeGen/ARM/fnmacs.ll | 13 + test/CodeGen/ARM/fnmscs.ll | 24 + test/CodeGen/ARM/fnmul.ll | 4 +- test/CodeGen/ARM/fnmuls.ll | 23 + test/CodeGen/ARM/formal.ll | 2 +- test/CodeGen/ARM/fp.ll | 38 +- test/CodeGen/ARM/fp_convert.ll | 49 + test/CodeGen/ARM/fparith.ll | 34 +- test/CodeGen/ARM/fpcmp.ll | 30 +- test/CodeGen/ARM/fpcmp_ueq.ll | 4 +- test/CodeGen/ARM/fpconv.ll | 64 +- test/CodeGen/ARM/fpmem.ll | 6 +- test/CodeGen/ARM/fpow.ll | 2 +- test/CodeGen/ARM/fpowi.ll | 2 +- test/CodeGen/ARM/fptoint.ll | 4 +- test/CodeGen/ARM/fsubs.ll | 10 + test/CodeGen/ARM/hardfloat_neon.ll | 13 + test/CodeGen/ARM/hello.ll | 8 +- test/CodeGen/ARM/hidden-vis-2.ll | 5 +- test/CodeGen/ARM/hidden-vis-3.ll | 9 +- test/CodeGen/ARM/hidden-vis.ll | 19 +- test/CodeGen/ARM/iabs.ll | 2 +- test/CodeGen/ARM/ifcvt1.ll | 4 +- test/CodeGen/ARM/ifcvt2.ll | 8 +- test/CodeGen/ARM/ifcvt3.ll | 6 +- test/CodeGen/ARM/ifcvt4.ll | 6 +- test/CodeGen/ARM/ifcvt5.ll | 5 +- test/CodeGen/ARM/ifcvt6.ll | 8 +- test/CodeGen/ARM/ifcvt7.ll | 11 +- test/CodeGen/ARM/ifcvt8.ll | 5 +- test/CodeGen/ARM/ifcvt9.ll | 2 +- test/CodeGen/ARM/illegal-vector-bitcast.ll | 3 +- test/CodeGen/ARM/imm.ll | 2 +- test/CodeGen/ARM/inlineasm-imm-arm.ll | 2 +- test/CodeGen/ARM/inlineasm.ll | 2 +- test/CodeGen/ARM/inlineasm2.ll | 2 +- test/CodeGen/ARM/insn-sched1.ll | 4 +- test/CodeGen/ARM/ispositive.ll | 2 +- test/CodeGen/ARM/large-stack.ll | 2 +- test/CodeGen/ARM/ldm.ll | 6 +- test/CodeGen/ARM/ldr.ll | 10 +- test/CodeGen/ARM/ldr_ext.ll | 31 +- test/CodeGen/ARM/ldr_frame.ll | 2 +- test/CodeGen/ARM/ldr_post.ll | 2 +- test/CodeGen/ARM/ldr_pre.ll | 2 +- test/CodeGen/ARM/ldrd.ll | 14 +- test/CodeGen/ARM/load-global.ll | 12 +- test/CodeGen/ARM/load.ll | 2 +- test/CodeGen/ARM/long-setcc.ll | 2 +- test/CodeGen/ARM/long.ll | 16 +- test/CodeGen/ARM/long_shift.ll | 2 +- test/CodeGen/ARM/lsr-code-insertion.ll | 4 +- test/CodeGen/ARM/lsr-scale-addr-mode.ll | 2 +- test/CodeGen/ARM/mem.ll | 4 +- test/CodeGen/ARM/memcpy-inline.ll | 8 +- test/CodeGen/ARM/memfunc.ll | 2 +- test/CodeGen/ARM/mls.ll | 14 + test/CodeGen/ARM/mul.ll | 4 +- test/CodeGen/ARM/mul_const.ll | 17 + test/CodeGen/ARM/mulhi.ll | 6 +- test/CodeGen/ARM/mvn.ll | 2 +- test/CodeGen/ARM/neon_arith1.ll | 2 +- test/CodeGen/ARM/neon_ld1.ll | 6 +- test/CodeGen/ARM/neon_ld2.ll | 6 +- test/CodeGen/ARM/pack.ll | 4 +- test/CodeGen/ARM/pr3502.ll | 2 +- test/CodeGen/ARM/private.ll | 2 +- test/CodeGen/ARM/remat.ll | 4 +- test/CodeGen/ARM/ret0.ll | 2 +- test/CodeGen/ARM/ret_arg1.ll | 2 +- test/CodeGen/ARM/ret_arg2.ll | 2 +- test/CodeGen/ARM/ret_arg3.ll | 2 +- test/CodeGen/ARM/ret_arg4.ll | 2 +- test/CodeGen/ARM/ret_arg5.ll | 2 +- test/CodeGen/ARM/ret_f32_arg2.ll | 2 +- test/CodeGen/ARM/ret_f32_arg5.ll | 2 +- test/CodeGen/ARM/ret_f64_arg2.ll | 2 +- test/CodeGen/ARM/ret_f64_arg_reg_split.ll | 2 +- test/CodeGen/ARM/ret_f64_arg_split.ll | 2 +- test/CodeGen/ARM/ret_f64_arg_stack.ll | 2 +- test/CodeGen/ARM/ret_i128_arg2.ll | 2 +- test/CodeGen/ARM/ret_i64_arg2.ll | 2 +- test/CodeGen/ARM/ret_i64_arg3.ll | 2 +- test/CodeGen/ARM/ret_i64_arg_split.ll | 2 +- test/CodeGen/ARM/ret_void.ll | 2 +- test/CodeGen/ARM/rev.ll | 4 +- test/CodeGen/ARM/sbfx.ll | 37 + test/CodeGen/ARM/section.ll | 4 +- test/CodeGen/ARM/select.ll | 27 +- test/CodeGen/ARM/select_xform.ll | 2 +- test/CodeGen/ARM/shifter_operand.ll | 4 +- test/CodeGen/ARM/smul.ll | 10 +- test/CodeGen/ARM/spill-q.ll | 57 + test/CodeGen/ARM/stack-frame.ll | 4 +- test/CodeGen/ARM/stm.ll | 2 +- test/CodeGen/ARM/str_post.ll | 4 +- test/CodeGen/ARM/str_pre-2.ll | 4 +- test/CodeGen/ARM/str_pre.ll | 2 +- test/CodeGen/ARM/str_trunc.ll | 4 +- test/CodeGen/ARM/sxt_rot.ll | 6 +- test/CodeGen/ARM/t2-imm.ll | 9 + test/CodeGen/ARM/thread_pointer.ll | 2 +- test/CodeGen/ARM/tls1.ll | 6 +- test/CodeGen/ARM/tls2.ll | 6 +- test/CodeGen/ARM/tls3.ll | 2 +- test/CodeGen/ARM/trunc_ldr.ll | 4 +- test/CodeGen/ARM/truncstore-dag-combine.ll | 4 +- test/CodeGen/ARM/tst_teq.ll | 4 +- test/CodeGen/ARM/uint64tof64.ll | 2 +- test/CodeGen/ARM/unaligned_load_store.ll | 39 +- test/CodeGen/ARM/unord.ll | 4 +- test/CodeGen/ARM/uxt_rot.ll | 6 +- test/CodeGen/ARM/uxtb.ll | 2 +- test/CodeGen/ARM/vaba.ll | 100 +- test/CodeGen/ARM/vabd.ll | 107 +- test/CodeGen/ARM/vabs.ll | 85 +- test/CodeGen/ARM/vadd.ll | 213 +- test/CodeGen/ARM/vargs.ll | 2 +- test/CodeGen/ARM/vargs_align.ll | 10 +- test/CodeGen/ARM/vbits.ll | 507 ++ test/CodeGen/ARM/vbsl.ll | 20 +- test/CodeGen/ARM/vceq.ll | 62 +- test/CodeGen/ARM/vcge.ll | 126 +- test/CodeGen/ARM/vcgt.ll | 126 +- test/CodeGen/ARM/vcnt.ll | 119 +- test/CodeGen/ARM/vcombine.ll | 36 + test/CodeGen/ARM/vcvt.ll | 97 +- test/CodeGen/ARM/vdup.ll | 143 +- test/CodeGen/ARM/vext.ll | 56 + test/CodeGen/ARM/vfcmp.ll | 101 +- test/CodeGen/ARM/vfp.ll | 43 +- test/CodeGen/ARM/vget_lane.ll | 146 +- test/CodeGen/ARM/vhadd.ll | 156 +- test/CodeGen/ARM/vhsub.ll | 32 +- test/CodeGen/ARM/vicmp.ll | 88 +- test/CodeGen/ARM/vld1.ll | 83 + test/CodeGen/ARM/vld2.ll | 113 + test/CodeGen/ARM/vld3.ll | 117 + test/CodeGen/ARM/vld4.ll | 117 + test/CodeGen/ARM/vldlane.ll | 328 + test/CodeGen/ARM/vminmax.ll | 293 + test/CodeGen/ARM/vmla.ll | 126 +- test/CodeGen/ARM/vmls.ll | 126 +- test/CodeGen/ARM/vmov.ll | 214 +- test/CodeGen/ARM/vmul.ll | 190 +- test/CodeGen/ARM/vneg.ll | 78 +- test/CodeGen/ARM/vpadal.ll | 32 +- test/CodeGen/ARM/vpadd.ll | 142 +- test/CodeGen/ARM/vpminmax.ll | 147 + test/CodeGen/ARM/vqadd.ll | 42 +- test/CodeGen/ARM/vqdmul.ll | 281 + test/CodeGen/ARM/vqshl.ll | 266 +- test/CodeGen/ARM/vqshrn.ll | 113 +- test/CodeGen/ARM/vqsub.ll | 42 +- test/CodeGen/ARM/vrec.ll | 119 + test/CodeGen/ARM/vrev.ll | 113 + test/CodeGen/ARM/vshift.ll | 145 +- test/CodeGen/ARM/vshiftins.ll | 42 +- test/CodeGen/ARM/vshl.ll | 394 +- test/CodeGen/ARM/vshll.ll | 29 +- test/CodeGen/ARM/vshrn.ll | 39 +- test/CodeGen/ARM/vsra.ll | 82 +- test/CodeGen/ARM/vst1.ll | 93 + test/CodeGen/ARM/vst2.ll | 84 + test/CodeGen/ARM/vst3.ll | 88 + test/CodeGen/ARM/vst4.ll | 88 + test/CodeGen/ARM/vstlane.ll | 197 + test/CodeGen/ARM/vsub.ll | 213 +- test/CodeGen/ARM/vtbl.ll | 109 + test/CodeGen/ARM/vtrn.ll | 97 + test/CodeGen/ARM/vuzp.ll | 75 + test/CodeGen/ARM/vzip.ll | 75 + test/CodeGen/ARM/weak.ll | 4 +- test/CodeGen/ARM/weak2.ll | 2 +- test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll | 2 +- test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll | 2 +- test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll | 2 +- test/CodeGen/Alpha/2006-01-26-VaargBreak.ll | 2 +- test/CodeGen/Alpha/2006-04-04-zextload.ll | 2 +- test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll | 2 +- test/CodeGen/Alpha/2006-11-01-vastart.ll | 2 +- test/CodeGen/Alpha/2007-11-27-mulneg3.ll | 2 +- test/CodeGen/Alpha/2008-11-10-smul_lohi.ll | 2 +- test/CodeGen/Alpha/2008-11-12-Add128.ll | 2 +- .../Alpha/2009-07-16-PromoteFloatCompare.ll | 6 + test/CodeGen/Alpha/add.ll | 2 +- test/CodeGen/Alpha/add128.ll | 2 +- test/CodeGen/Alpha/bic.ll | 2 +- test/CodeGen/Alpha/bsr.ll | 2 +- test/CodeGen/Alpha/call_adj.ll | 2 +- test/CodeGen/Alpha/cmov.ll | 4 +- test/CodeGen/Alpha/cmpbge.ll | 2 +- test/CodeGen/Alpha/ctlz.ll | 8 +- test/CodeGen/Alpha/ctlz_e.ll | 2 +- test/CodeGen/Alpha/ctpop.ll | 8 +- test/CodeGen/Alpha/eqv.ll | 2 +- test/CodeGen/Alpha/i32_sub_1.ll | 2 +- test/CodeGen/Alpha/illegal-element-type.ll | 2 +- test/CodeGen/Alpha/jmp_table.ll | 8 +- test/CodeGen/Alpha/mb.ll | 2 +- test/CodeGen/Alpha/mul128.ll | 2 +- test/CodeGen/Alpha/mul5.ll | 2 +- test/CodeGen/Alpha/neg1.ll | 2 +- test/CodeGen/Alpha/not.ll | 2 +- test/CodeGen/Alpha/ornot.ll | 2 +- test/CodeGen/Alpha/private.ll | 2 +- test/CodeGen/Alpha/rpcc.ll | 2 +- test/CodeGen/Alpha/srl_and.ll | 2 +- test/CodeGen/Alpha/sub128.ll | 2 +- test/CodeGen/Alpha/weak.ll | 4 +- test/CodeGen/Alpha/wmb.ll | 2 +- test/CodeGen/Alpha/zapnot.ll | 2 +- test/CodeGen/Alpha/zapnot2.ll | 2 +- test/CodeGen/Alpha/zapnot3.ll | 2 +- test/CodeGen/Alpha/zapnot4.ll | 2 +- .../Blackfin/2009-08-04-LowerExtract-Live.ll | 15 + .../Blackfin/2009-08-11-RegScavenger-CSR.ll | 17 + test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll | 19 + test/CodeGen/Blackfin/2009-08-15-MissingDead.ll | 25 + test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll | 17 + test/CodeGen/Blackfin/add-overflow.ll | 18 + test/CodeGen/Blackfin/add.ll | 5 + test/CodeGen/Blackfin/addsub-i128.ll | 42 + test/CodeGen/Blackfin/basic-i1.ll | 51 + test/CodeGen/Blackfin/basic-i16.ll | 36 + test/CodeGen/Blackfin/basic-i32.ll | 51 + test/CodeGen/Blackfin/basic-i64.ll | 51 + test/CodeGen/Blackfin/basic-i8.ll | 51 + test/CodeGen/Blackfin/basictest.ll | 19 + test/CodeGen/Blackfin/burg.ll | 19 + test/CodeGen/Blackfin/cmp-small-imm.ll | 6 + test/CodeGen/Blackfin/cmp64.ll | 17 + test/CodeGen/Blackfin/ct32.ll | 20 + test/CodeGen/Blackfin/ct64.ll | 20 + test/CodeGen/Blackfin/ctlz16.ll | 18 + test/CodeGen/Blackfin/ctlz64.ll | 15 + test/CodeGen/Blackfin/ctpop16.ll | 18 + test/CodeGen/Blackfin/cttz16.ll | 18 + test/CodeGen/Blackfin/cycles.ll | 17 + test/CodeGen/Blackfin/dg.exp | 5 + test/CodeGen/Blackfin/double-cast.ll | 8 + test/CodeGen/Blackfin/frameindex.ll | 10 + test/CodeGen/Blackfin/i17mem.ll | 9 + test/CodeGen/Blackfin/i1mem.ll | 9 + test/CodeGen/Blackfin/i1ops.ll | 10 + test/CodeGen/Blackfin/i216mem.ll | 9 + test/CodeGen/Blackfin/i248mem.ll | 9 + test/CodeGen/Blackfin/i256mem.ll | 9 + test/CodeGen/Blackfin/i256param.ll | 7 + test/CodeGen/Blackfin/i56param.ll | 8 + test/CodeGen/Blackfin/i8mem.ll | 10 + test/CodeGen/Blackfin/inline-asm.ll | 38 + test/CodeGen/Blackfin/int-setcc.ll | 80 + test/CodeGen/Blackfin/invalid-apint.ll | 15 + test/CodeGen/Blackfin/jumptable.ll | 53 + test/CodeGen/Blackfin/large-switch.ll | 187 + test/CodeGen/Blackfin/load-i16.ll | 13 + test/CodeGen/Blackfin/logic-i16.ll | 16 + test/CodeGen/Blackfin/many-args.ll | 23 + test/CodeGen/Blackfin/mulhu.ll | 106 + test/CodeGen/Blackfin/printf.ll | 10 + test/CodeGen/Blackfin/printf2.ll | 8 + test/CodeGen/Blackfin/promote-logic.ll | 42 + test/CodeGen/Blackfin/promote-setcc.ll | 37 + test/CodeGen/Blackfin/sdiv.ll | 5 + test/CodeGen/Blackfin/simple-select.ll | 11 + test/CodeGen/Blackfin/switch.ll | 18 + test/CodeGen/Blackfin/switch2.ll | 16 + test/CodeGen/Blackfin/sync-intr.ll | 13 + test/CodeGen/CBackend/2002-05-16-NameCollide.ll | 2 +- test/CodeGen/CBackend/2002-05-21-MissingReturn.ll | 2 +- .../CodeGen/CBackend/2002-08-19-ConstPointerRef.ll | 2 +- test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll | 2 +- test/CodeGen/CBackend/2002-08-19-DataPointer.ll | 2 +- .../CodeGen/CBackend/2002-08-19-FunctionPointer.ll | 2 +- .../CBackend/2002-08-19-HardConstantExpr.ll | 2 +- test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll | 2 +- .../CodeGen/CBackend/2002-08-20-UnnamedArgument.ll | 2 +- .../CBackend/2002-08-26-IndirectCallTest.ll | 2 +- .../CBackend/2002-08-30-StructureOrderingTest.ll | 2 +- .../CBackend/2002-09-20-ArrayTypeFailure.ll | 2 +- .../CBackend/2002-09-20-VarArgPrototypes.ll | 2 +- .../CBackend/2002-10-15-OpaqueTypeProblem.ll | 2 +- test/CodeGen/CBackend/2002-10-16-External.ll | 2 +- .../CBackend/2002-10-30-FunctionPointerAlloca.ll | 2 +- test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll | 2 +- .../CBackend/2003-05-12-IntegerSizeWarning.ll | 2 +- test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll | 2 +- .../CBackend/2003-05-31-MissingStructName.ll | 2 +- .../CodeGen/CBackend/2003-06-01-NullPointerType.ll | 2 +- test/CodeGen/CBackend/2003-06-11-HexConstant.ll | 2 +- .../CBackend/2003-06-11-LiteralStringProblem.ll | 2 +- test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll | 2 +- .../CBackend/2003-06-28-LinkOnceGlobalVars.ll | 2 +- test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll | 2 +- test/CodeGen/CBackend/2003-10-23-UnusedType.ll | 2 +- .../CBackend/2003-10-28-CastToPtrToStruct.ll | 2 +- .../CBackend/2003-11-21-ConstantShiftExpr.ll | 2 +- .../CBackend/2004-02-13-FrameReturnAddress.ll | 2 +- .../CBackend/2004-02-15-PreexistingExternals.ll | 2 +- .../CBackend/2004-02-26-FPNotPrintableConstants.ll | 2 +- .../CBackend/2004-02-26-LinkOnceFunctions.ll | 2 +- test/CodeGen/CBackend/2004-08-09-va-end-null.ll | 2 +- .../CBackend/2004-11-13-FunctionPointerCast.ll | 2 +- test/CodeGen/CBackend/2004-12-03-ExternStatics.ll | 2 +- .../CBackend/2004-12-28-LogicalConstantExprs.ll | 2 +- .../CBackend/2005-02-14-VolatileOperations.ll | 2 +- .../CBackend/2005-03-08-RecursiveTypeCrash.ll | 2 +- .../CBackend/2005-07-14-NegationToMinusMinus.ll | 2 +- test/CodeGen/CBackend/2005-08-23-Fmod.ll | 2 +- .../CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll | 2 +- test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll | 2 +- test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll | 2 +- test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll | 2 +- .../CBackend/2007-01-17-StackSaveNRestore.ll | 4 +- test/CodeGen/CBackend/2007-02-05-memset.ll | 2 +- test/CodeGen/CBackend/2007-02-23-NameConflicts.ll | 6 +- test/CodeGen/CBackend/2007-07-11-PackedStruct.ll | 2 +- .../CBackend/2008-02-01-UnalignedLoadStore.ll | 2 +- test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll | 2 +- test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll | 2 +- test/CodeGen/CBackend/2008-06-04-IndirectMem.ll | 2 +- .../CBackend/2008-10-21-PPCLongDoubleConstant.ll | 2 +- test/CodeGen/CBackend/fneg.ll | 2 +- test/CodeGen/CBackend/pr2408.ll | 2 +- test/CodeGen/CBackend/vectors.ll | 2 +- test/CodeGen/CPP/2007-06-16-Funcname.ll | 2 +- test/CodeGen/CPP/2009-05-01-Long-Double.ll | 2 +- test/CodeGen/CPP/2009-05-04-CondBr.ll | 2 +- test/CodeGen/CPP/llvm2cpp.ll | 2 +- test/CodeGen/CellSPU/2009-01-01-BrCond.ll | 2 +- test/CodeGen/CellSPU/and_ops.ll | 2 +- test/CodeGen/CellSPU/call.ll | 2 +- test/CodeGen/CellSPU/call_indirect.ll | 4 +- test/CodeGen/CellSPU/ctpop.ll | 2 +- test/CodeGen/CellSPU/dp_farith.ll | 2 +- test/CodeGen/CellSPU/eqv.ll | 2 +- test/CodeGen/CellSPU/extract_elt.ll | 2 +- test/CodeGen/CellSPU/fcmp32.ll | 2 +- test/CodeGen/CellSPU/fcmp64.ll | 2 +- test/CodeGen/CellSPU/fdiv.ll | 2 +- test/CodeGen/CellSPU/fneg-fabs.ll | 2 +- test/CodeGen/CellSPU/i64ops.ll | 2 +- test/CodeGen/CellSPU/i8ops.ll | 2 +- test/CodeGen/CellSPU/icmp16.ll | 2 +- test/CodeGen/CellSPU/icmp32.ll | 2 +- test/CodeGen/CellSPU/icmp64.ll | 2 +- test/CodeGen/CellSPU/icmp8.ll | 2 +- test/CodeGen/CellSPU/immed16.ll | 2 +- test/CodeGen/CellSPU/immed32.ll | 2 +- test/CodeGen/CellSPU/immed64.ll | 2 +- test/CodeGen/CellSPU/int2fp.ll | 2 +- test/CodeGen/CellSPU/intrinsics_branch.ll | 2 +- test/CodeGen/CellSPU/intrinsics_float.ll | 2 +- test/CodeGen/CellSPU/intrinsics_logical.ll | 2 +- test/CodeGen/CellSPU/loads.ll | 10 +- test/CodeGen/CellSPU/mul-with-overflow.ll | 2 +- test/CodeGen/CellSPU/mul_ops.ll | 2 +- test/CodeGen/CellSPU/nand.ll | 2 +- test/CodeGen/CellSPU/or_ops.ll | 2 +- test/CodeGen/CellSPU/private.ll | 2 +- test/CodeGen/CellSPU/rotate_ops.ll | 2 +- test/CodeGen/CellSPU/select_bits.ll | 2 +- test/CodeGen/CellSPU/sext128.ll | 47 + test/CodeGen/CellSPU/shift_ops.ll | 2 +- test/CodeGen/CellSPU/sp_farith.ll | 2 +- test/CodeGen/CellSPU/stores.ll | 2 +- test/CodeGen/CellSPU/struct_1.ll | 4 +- test/CodeGen/CellSPU/trunc.ll | 2 +- test/CodeGen/CellSPU/vec_const.ll | 4 +- test/CodeGen/CellSPU/vecinsert.ll | 2 +- .../Generic/2002-04-14-UnexpectedUnsignedType.ll | 2 +- .../Generic/2002-04-16-StackFrameSizeAlignment.ll | 2 +- test/CodeGen/Generic/2003-05-27-phifcmpd.ll | 2 +- .../CodeGen/Generic/2003-05-27-useboolinotherbb.ll | 2 +- test/CodeGen/Generic/2003-05-27-usefsubasbool.ll | 2 +- test/CodeGen/Generic/2003-05-28-ManyArgs.ll | 2 +- test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll | 2 +- test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll | 2 +- test/CodeGen/Generic/2003-07-06-BadIntCmp.ll | 2 +- test/CodeGen/Generic/2003-07-07-BadLongConst.ll | 2 +- test/CodeGen/Generic/2003-07-08-BadCastToBool.ll | 2 +- test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll | 2 +- test/CodeGen/Generic/2004-02-08-UnwindSupport.ll | 2 +- .../Generic/2004-05-09-LiveVarPartialRegister.ll | 2 +- test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll | 2 +- test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll | 2 +- .../Generic/2005-07-12-memcpy-i64-length.ll | 2 +- .../Generic/2005-10-18-ZeroSizeStackObject.ll | 2 +- test/CodeGen/Generic/2005-10-21-longlonggtu.ll | 2 +- test/CodeGen/Generic/2005-12-01-Crash.ll | 2 +- test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll | 2 +- test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll | 2 +- .../2006-01-18-InvalidBranchOpcodeAssert.ll | 2 +- test/CodeGen/Generic/2006-02-12-InsertLibcall.ll | 2 +- .../Generic/2006-03-01-dagcombineinfloop.ll | 2 +- test/CodeGen/Generic/2006-04-11-vecload.ll | 2 +- test/CodeGen/Generic/2006-04-26-SetCCAnd.ll | 2 +- .../CodeGen/Generic/2006-04-28-Sign-extend-bool.ll | 2 +- .../Generic/2006-05-06-GEP-Cast-Sink-Crash.ll | 2 +- .../CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll | 2 +- .../Generic/2006-06-13-ComputeMaskedBitsCrash.ll | 2 +- .../Generic/2006-06-28-SimplifySetCCCrash.ll | 2 +- test/CodeGen/Generic/2006-07-03-schedulers.ll | 6 +- test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll | 2 +- test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll | 2 +- test/CodeGen/Generic/2006-09-06-SwitchLowering.ll | 2 +- test/CodeGen/Generic/2006-10-27-CondFolding.ll | 2 +- test/CodeGen/Generic/2006-10-29-Crash.ll | 2 +- .../Generic/2006-11-06-MemIntrinsicExpand.ll | 2 +- test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll | 2 +- test/CodeGen/Generic/2006-12-16-InlineAsmCrash.ll | 2 +- test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll | 2 +- test/CodeGen/Generic/2007-02-16-BranchFold.ll | 2 +- .../Generic/2007-02-23-DAGCombine-Miscompile.ll | 2 +- test/CodeGen/Generic/2007-02-25-invoke.ll | 2 +- .../Generic/2007-04-08-MultipleFrameIndices.ll | 2 +- .../Generic/2007-04-13-SwitchLowerBadPhi.ll | 2 +- test/CodeGen/Generic/2007-04-14-BitTestsBadMask.ll | 4 +- test/CodeGen/Generic/2007-04-17-lsr-crash.ll | 2 +- test/CodeGen/Generic/2007-04-27-BitTestsBadMask.ll | 4 +- .../CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll | 2 +- test/CodeGen/Generic/2007-04-27-LargeMemObject.ll | 2 +- .../Generic/2007-04-30-LandingPadBranchFolding.ll | 2 +- test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll | 2 +- test/CodeGen/Generic/2007-05-05-Personality.ll | 2 +- .../Generic/2007-05-15-InfiniteRecursion.ll | 2 +- .../Generic/2007-06-06-CriticalEdgeLandingPad.ll | 2 +- .../Generic/2007-11-21-UndeadIllegalNode.ll | 2 +- test/CodeGen/Generic/2007-12-17-InvokeAsm.ll | 2 +- test/CodeGen/Generic/2007-12-31-UnusedSelector.ll | 2 +- test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll | 2 +- test/CodeGen/Generic/2008-01-30-LoadCrash.ll | 2 +- test/CodeGen/Generic/2008-02-04-Ctlz.ll | 2 +- .../CodeGen/Generic/2008-02-04-ExtractSubvector.ll | 2 +- test/CodeGen/Generic/2008-02-20-MatchingMem.ll | 2 +- test/CodeGen/Generic/2008-02-25-NegateZero.ll | 2 +- test/CodeGen/Generic/2008-02-26-NegatableCrash.ll | 2 +- .../Generic/2008-08-07-PtrToInt-SmallerInt.ll | 2 +- test/CodeGen/Generic/2009-03-17-LSR-APInt.ll | 2 +- .../Generic/2009-03-29-SoftFloatVectorExtract.ll | 2 +- test/CodeGen/Generic/2009-04-10-SinkCrash.ll | 2 +- test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll | 2 +- .../Generic/2009-06-03-UnreachableSplitPad.ll | 2 +- test/CodeGen/Generic/APIntLoadStore.ll | 2 +- test/CodeGen/Generic/APIntParam.ll | 2 +- test/CodeGen/Generic/APIntSextParam.ll | 2 +- test/CodeGen/Generic/APIntZextParam.ll | 2 +- test/CodeGen/Generic/BasicInstrs.ll | 2 +- test/CodeGen/Generic/BurgBadRegAlloc.ll | 2 +- test/CodeGen/Generic/ConstantExprLowering.ll | 2 +- test/CodeGen/Generic/GC/alloc_loop.ll | 2 +- test/CodeGen/Generic/GC/argpromotion.ll | 2 +- test/CodeGen/Generic/GC/deadargelim.ll | 2 +- test/CodeGen/Generic/GC/frame_size.ll | 2 +- test/CodeGen/Generic/GC/inline.ll | 2 +- test/CodeGen/Generic/GC/inline2.ll | 4 +- test/CodeGen/Generic/GC/lower_gcroot.ll | 2 +- test/CodeGen/Generic/GC/redundant_init.ll | 2 +- test/CodeGen/Generic/GC/simple_ocaml.ll | 4 +- test/CodeGen/Generic/Makefile | 4 +- test/CodeGen/Generic/SwitchLowering.ll | 2 +- test/CodeGen/Generic/add-with-overflow-24.ll | 2 +- test/CodeGen/Generic/add-with-overflow.ll | 4 +- test/CodeGen/Generic/addc-fold2.ll | 4 +- test/CodeGen/Generic/asm-large-immediate.ll | 2 +- test/CodeGen/Generic/badCallArgLRLLVM.ll | 2 +- test/CodeGen/Generic/badFoldGEP.ll | 2 +- test/CodeGen/Generic/badarg6.ll | 2 +- test/CodeGen/Generic/badlive.ll | 2 +- test/CodeGen/Generic/bool-to-double.ll | 2 +- test/CodeGen/Generic/bool-vector.ll | 2 +- test/CodeGen/Generic/call-ret0.ll | 2 +- test/CodeGen/Generic/call-ret42.ll | 2 +- test/CodeGen/Generic/call-void.ll | 2 +- test/CodeGen/Generic/call2-ret0.ll | 2 +- test/CodeGen/Generic/cast-fp.ll | 2 +- test/CodeGen/Generic/constindices.ll | 2 +- test/CodeGen/Generic/debug-info.ll | 2 +- test/CodeGen/Generic/div-neg-power-2.ll | 2 +- test/CodeGen/Generic/empty-load-store.ll | 2 +- test/CodeGen/Generic/externally_available.ll | 2 +- test/CodeGen/Generic/fastcall.ll | 2 +- test/CodeGen/Generic/fneg-fabs.ll | 2 +- test/CodeGen/Generic/fp-to-int-invalid.ll | 2 +- test/CodeGen/Generic/fp_to_int.ll | 2 +- test/CodeGen/Generic/fpowi-promote.ll | 4 +- test/CodeGen/Generic/fwdtwice.ll | 2 +- test/CodeGen/Generic/getresult-undef.ll | 2 +- test/CodeGen/Generic/global-ret0.ll | 2 +- test/CodeGen/Generic/hello.ll | 2 +- test/CodeGen/Generic/i128-addsub.ll | 2 +- test/CodeGen/Generic/i128-arith.ll | 2 +- test/CodeGen/Generic/inline-asm-special-strings.ll | 2 +- test/CodeGen/Generic/intrinsics.ll | 2 +- test/CodeGen/Generic/invalid-memcpy.ll | 2 +- test/CodeGen/Generic/isunord.ll | 3 +- test/CodeGen/Generic/llvm-ct-intrinsics.ll | 2 +- ...ltiple-return-values-cross-block-with-invoke.ll | 2 +- test/CodeGen/Generic/negintconst.ll | 2 +- test/CodeGen/Generic/nested-select.ll | 2 +- test/CodeGen/Generic/phi-immediate-factoring.ll | 2 +- test/CodeGen/Generic/pr2625.ll | 2 +- test/CodeGen/Generic/pr3288.ll | 2 +- test/CodeGen/Generic/print-add.ll | 2 +- test/CodeGen/Generic/print-arith-fp.ll | 2 +- test/CodeGen/Generic/print-arith-int.ll | 2 +- test/CodeGen/Generic/print-int.ll | 2 +- test/CodeGen/Generic/print-mul-exp.ll | 2 +- test/CodeGen/Generic/print-mul.ll | 2 +- test/CodeGen/Generic/print-shift.ll | 2 +- test/CodeGen/Generic/ret0.ll | 2 +- test/CodeGen/Generic/ret42.ll | 2 +- test/CodeGen/Generic/select-cc.ll | 2 +- test/CodeGen/Generic/select.ll | 2 +- test/CodeGen/Generic/shift-int64.ll | 2 +- test/CodeGen/Generic/spillccr.ll | 2 +- test/CodeGen/Generic/stack-protector.ll | 4 +- test/CodeGen/Generic/stacksave-restore.ll | 2 +- test/CodeGen/Generic/storetrunc-fp.ll | 2 +- test/CodeGen/Generic/switch-crit-edge-constant.ll | 2 +- test/CodeGen/Generic/switch-lower-feature-2.ll | 2 +- test/CodeGen/Generic/switch-lower-feature.ll | 10 +- test/CodeGen/Generic/switch-lower.ll | 2 +- test/CodeGen/Generic/trap.ll | 2 +- test/CodeGen/Generic/v-split.ll | 2 +- test/CodeGen/Generic/vector-casts.ll | 2 +- test/CodeGen/Generic/vector-constantexpr.ll | 2 +- test/CodeGen/Generic/vector-identity-shuffle.ll | 2 +- test/CodeGen/Generic/vector.ll | 2 +- test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll | 2 +- test/CodeGen/MSP430/2009-05-17-Rot.ll | 2 +- test/CodeGen/MSP430/2009-05-17-Shift.ll | 2 +- test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll | 2 +- .../CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll | 30 + test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll | 22 + test/CodeGen/MSP430/2009-10-10-OrImpDef.ll | 14 + test/CodeGen/MSP430/Inst16mi.ll | 48 + test/CodeGen/MSP430/Inst16mm.ll | 54 + test/CodeGen/MSP430/Inst16mr.ll | 48 + test/CodeGen/MSP430/Inst16rm.ll | 38 + test/CodeGen/MSP430/Inst16rr.ll | 37 + test/CodeGen/MSP430/Inst8mi.ll | 48 + test/CodeGen/MSP430/Inst8mm.ll | 55 + test/CodeGen/MSP430/Inst8mr.ll | 48 + test/CodeGen/MSP430/Inst8rm.ll | 38 + test/CodeGen/MSP430/Inst8rr.ll | 38 + test/CodeGen/MSP430/inline-asm.ll | 25 + test/CodeGen/Mips/2008-06-05-Carry.ll | 2 +- test/CodeGen/Mips/2008-07-03-SRet.ll | 2 +- test/CodeGen/Mips/2008-07-05-ByVal.ll | 2 +- test/CodeGen/Mips/2008-07-06-fadd64.ll | 2 +- test/CodeGen/Mips/2008-07-07-FPExtend.ll | 2 +- test/CodeGen/Mips/2008-07-07-Float2Int.ll | 2 +- .../Mips/2008-07-07-IntDoubleConvertions.ll | 2 +- test/CodeGen/Mips/2008-07-15-InternalConstant.ll | 2 +- test/CodeGen/Mips/2008-07-15-SmallSection.ll | 4 +- test/CodeGen/Mips/2008-07-16-SignExtInReg.ll | 2 +- test/CodeGen/Mips/2008-07-22-Cstpool.ll | 2 +- test/CodeGen/Mips/2008-07-23-fpcmp.ll | 2 +- test/CodeGen/Mips/2008-07-29-icmp.ll | 2 +- test/CodeGen/Mips/2008-07-31-fcopysign.ll | 2 +- test/CodeGen/Mips/2008-08-01-AsmInline.ll | 2 +- test/CodeGen/Mips/2008-08-03-ReturnDouble.ll | 2 +- test/CodeGen/Mips/2008-08-03-fabs64.ll | 2 +- test/CodeGen/Mips/2008-08-04-Bitconvert.ll | 2 +- test/CodeGen/Mips/2008-08-06-Alloca.ll | 2 +- test/CodeGen/Mips/2008-08-07-CC.ll | 2 +- test/CodeGen/Mips/2008-08-07-FPRound.ll | 2 +- test/CodeGen/Mips/2008-08-08-bswap.ll | 2 +- test/CodeGen/Mips/2008-08-08-ctlz.ll | 2 +- test/CodeGen/Mips/2008-10-13-LegalizerBug.ll | 2 +- test/CodeGen/Mips/2008-11-10-xint_to_fp.ll | 2 +- test/CodeGen/Mips/private.ll | 2 +- test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll | 29 + test/CodeGen/PIC16/dg.exp | 5 + test/CodeGen/PIC16/global-in-user-section.ll | 5 + test/CodeGen/PIC16/globals.ll | 15 + test/CodeGen/PIC16/sext.ll | 10 + test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll | 2 +- test/CodeGen/PowerPC/2004-11-30-shift-crash.ll | 2 +- test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll | 2 +- test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll | 2 +- test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll | 2 +- test/CodeGen/PowerPC/2005-01-14-UndefLong.ll | 2 +- test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll | 2 +- .../PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll | 2 +- .../CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll | 2 +- test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll | 2 +- .../PowerPC/2006-01-11-darwin-fp-argument.ll | 2 +- test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll | 2 +- .../PowerPC/2006-04-01-FloatDoubleExtend.ll | 2 +- test/CodeGen/PowerPC/2006-04-05-splat-ish.ll | 3 +- test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll | 2 +- test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll | 2 +- .../PowerPC/2006-07-07-ComputeMaskedBits.ll | 2 +- test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll | 4 +- test/CodeGen/PowerPC/2006-08-11-RetVector.ll | 4 +- test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll | 2 +- test/CodeGen/PowerPC/2006-09-28-shift_64.ll | 2 +- .../PowerPC/2006-10-11-combiner-aa-regression.ll | 3 +- test/CodeGen/PowerPC/2006-10-13-Miscompile.ll | 2 +- test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll | 2 +- test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll | 2 +- .../PowerPC/2006-11-10-DAGCombineMiscompile.ll | 2 +- test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll | 2 +- test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll | 6 +- test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll | 6 +- test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll | 4 +- test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll | 4 +- test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll | 4 +- .../PowerPC/2007-01-31-InlineAsmAddrMode.ll | 4 +- test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll | 2 +- .../PowerPC/2007-02-16-InlineAsmNConstraint.ll | 2 +- test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll | 2 +- test/CodeGen/PowerPC/2007-03-24-cntlzd.ll | 2 +- test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll | 2 +- .../PowerPC/2007-04-24-InlineAsm-I-Modifier.ll | 4 +- .../PowerPC/2007-04-30-InlineAsmEarlyClobber.ll | 8 +- .../PowerPC/2007-05-03-InlineAsm-S-Constraint.ll | 2 +- .../PowerPC/2007-05-14-InlineAsmSelectCrash.ll | 2 +- test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll | 8 +- .../PowerPC/2007-05-30-dagcombine-miscomp.ll | 2 +- test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll | 2 +- test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll | 2 +- test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll | 2 +- .../PowerPC/2007-09-07-LoadStoreIdxForms.ll | 2 +- test/CodeGen/PowerPC/2007-09-08-unaligned.ll | 8 +- .../PowerPC/2007-09-11-RegCoalescerAssert.ll | 2 +- .../PowerPC/2007-09-12-LiveIntervalsAssert.ll | 2 +- .../PowerPC/2007-10-16-InlineAsmFrameOffset.ll | 2 +- test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll | 2 +- .../PowerPC/2007-10-21-LocalRegAllocAssert.ll | 2 +- .../PowerPC/2007-10-21-LocalRegAllocAssert2.ll | 2 +- test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll | 2 +- .../CodeGen/PowerPC/2007-11-16-landingpad-split.ll | 2 +- test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll | 6 +- test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll | 2 +- .../PowerPC/2008-02-05-LiveIntervalsAssert.ll | 2 +- .../PowerPC/2008-02-09-LocalRegAllocAssert.ll | 2 +- .../PowerPC/2008-03-05-RegScavengerAssert.ll | 2 +- test/CodeGen/PowerPC/2008-03-06-KillInfo.ll | 2 +- .../PowerPC/2008-03-17-RegScavengerCrash.ll | 2 +- .../PowerPC/2008-03-18-RegScavengerAssert.ll | 2 +- test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll | 2 +- test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll | 2 +- test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll | 2 +- .../PowerPC/2008-04-10-LiveIntervalCrash.ll | 2 +- test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll | 2 +- test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll | 2 +- test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll | 2 +- test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll | 2 +- test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll | 2 +- .../PowerPC/2008-06-23-LiveVariablesCrash.ll | 2 +- test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll | 4 +- test/CodeGen/PowerPC/2008-07-15-Bswap.ll | 2 +- test/CodeGen/PowerPC/2008-07-15-Fabs.ll | 2 +- test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll | 2 +- test/CodeGen/PowerPC/2008-07-17-Fneg.ll | 2 +- test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll | 2 +- test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll | 2 +- .../PowerPC/2008-10-17-AsmMatchingOperands.ll | 2 +- test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll | 2 +- test/CodeGen/PowerPC/2008-10-28-f128-i32.ll | 2 +- test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll | 2 +- test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll | 2 +- .../PowerPC/2008-12-02-LegalizeTypeAssert.ll | 2 +- test/CodeGen/PowerPC/2008-12-12-EH.ll | 4 +- test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll | 2 +- test/CodeGen/PowerPC/2009-03-17-LSRBug.ll | 2 +- test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll | 2 +- .../PowerPC/2009-07-16-InlineAsm-M-Operand.ll | 16 + .../2009-08-17-inline-asm-addr-mode-breakage.ll | 25 + test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll | 8 + test/CodeGen/PowerPC/2009-09-18-carrybit.ll | 62 + test/CodeGen/PowerPC/Atomics-32.ll | 2 +- test/CodeGen/PowerPC/Atomics-64.ll | 2 +- test/CodeGen/PowerPC/Frames-alloca.ll | 57 +- test/CodeGen/PowerPC/Frames-large.ll | 119 +- test/CodeGen/PowerPC/Frames-leaf.ll | 32 +- test/CodeGen/PowerPC/Frames-small.ll | 18 +- test/CodeGen/PowerPC/LargeAbsoluteAddr.ll | 6 +- test/CodeGen/PowerPC/addc.ll | 2 +- test/CodeGen/PowerPC/addi-reassoc.ll | 2 +- test/CodeGen/PowerPC/align.ll | 6 +- test/CodeGen/PowerPC/and-branch.ll | 2 +- test/CodeGen/PowerPC/and-elim.ll | 2 +- test/CodeGen/PowerPC/and-imm.ll | 2 +- test/CodeGen/PowerPC/and_add.ll | 2 +- test/CodeGen/PowerPC/and_sext.ll | 4 +- test/CodeGen/PowerPC/and_sra.ll | 2 +- test/CodeGen/PowerPC/atomic-1.ll | 4 +- test/CodeGen/PowerPC/atomic-2.ll | 4 +- test/CodeGen/PowerPC/available-externally.ll | 116 +- test/CodeGen/PowerPC/big-endian-actual-args.ll | 4 +- test/CodeGen/PowerPC/big-endian-call-result.ll | 4 +- test/CodeGen/PowerPC/big-endian-formal-args.ll | 8 +- test/CodeGen/PowerPC/branch-opt.ll | 2 +- test/CodeGen/PowerPC/bswap-load-store.ll | 12 +- test/CodeGen/PowerPC/buildvec_canonicalize.ll | 6 +- test/CodeGen/PowerPC/calls.ll | 6 +- test/CodeGen/PowerPC/cmp-cmp.ll | 2 +- test/CodeGen/PowerPC/compare-duplicate.ll | 2 +- test/CodeGen/PowerPC/compare-simm.ll | 2 +- test/CodeGen/PowerPC/constants.ll | 6 +- test/CodeGen/PowerPC/cr_spilling.ll | 2 +- test/CodeGen/PowerPC/cttz.ll | 2 +- test/CodeGen/PowerPC/darwin-labels.ll | 2 +- test/CodeGen/PowerPC/delete-node.ll | 2 +- test/CodeGen/PowerPC/div-2.ll | 4 +- test/CodeGen/PowerPC/eqv-andc-orc-nor.ll | 10 +- test/CodeGen/PowerPC/extsh.ll | 2 +- test/CodeGen/PowerPC/fabs.ll | 2 +- test/CodeGen/PowerPC/fma.ll | 2 +- test/CodeGen/PowerPC/fnabs.ll | 2 +- test/CodeGen/PowerPC/fneg.ll | 2 +- test/CodeGen/PowerPC/fold-li.ll | 2 +- test/CodeGen/PowerPC/fp-branch.ll | 2 +- test/CodeGen/PowerPC/fp-int-fp.ll | 2 +- test/CodeGen/PowerPC/fp_to_uint.ll | 2 +- test/CodeGen/PowerPC/fpcopy.ll | 2 +- test/CodeGen/PowerPC/frounds.ll | 2 +- test/CodeGen/PowerPC/fsqrt.ll | 14 +- test/CodeGen/PowerPC/hello.ll | 4 +- test/CodeGen/PowerPC/hidden-vis-2.ll | 2 +- test/CodeGen/PowerPC/hidden-vis.ll | 2 +- test/CodeGen/PowerPC/i128-and-beyond.ll | 2 +- test/CodeGen/PowerPC/i64_fp.ll | 16 +- test/CodeGen/PowerPC/iabs.ll | 2 +- test/CodeGen/PowerPC/illegal-element-type.ll | 2 +- test/CodeGen/PowerPC/inlineasm-copy.ll | 2 +- test/CodeGen/PowerPC/int-fp-conv-0.ll | 2 +- test/CodeGen/PowerPC/int-fp-conv-1.ll | 2 +- test/CodeGen/PowerPC/invalid-memcpy.ll | 4 +- test/CodeGen/PowerPC/inverted-bool-compares.ll | 2 +- test/CodeGen/PowerPC/ispositive.ll | 2 +- test/CodeGen/PowerPC/itofp128.ll | 2 +- test/CodeGen/PowerPC/lha.ll | 2 +- test/CodeGen/PowerPC/load-constant-addr.ll | 4 +- test/CodeGen/PowerPC/long-compare.ll | 8 +- test/CodeGen/PowerPC/longdbl-truncate.ll | 2 +- test/CodeGen/PowerPC/mask64.ll | 2 +- test/CodeGen/PowerPC/mem-rr-addr-mode.ll | 4 +- test/CodeGen/PowerPC/mem_update.ll | 4 +- test/CodeGen/PowerPC/mul-neg-power-2.ll | 2 +- test/CodeGen/PowerPC/mul-with-overflow.ll | 2 +- test/CodeGen/PowerPC/mulhs.ll | 2 +- test/CodeGen/PowerPC/multiple-return-values.ll | 4 +- test/CodeGen/PowerPC/neg.ll | 2 +- test/CodeGen/PowerPC/no-dead-strip.ll | 2 +- test/CodeGen/PowerPC/or-addressing-mode.ll | 4 +- test/CodeGen/PowerPC/ppcf128-1-opt.ll | 2 +- test/CodeGen/PowerPC/ppcf128-1.ll | 2 +- test/CodeGen/PowerPC/ppcf128-2.ll | 2 +- test/CodeGen/PowerPC/ppcf128-3.ll | 2 +- test/CodeGen/PowerPC/ppcf128-4.ll | 2 +- test/CodeGen/PowerPC/pr3711_widen_bit.ll | 2 +- test/CodeGen/PowerPC/private.ll | 10 +- test/CodeGen/PowerPC/reg-coalesce-simple.ll | 2 +- test/CodeGen/PowerPC/retaddr.ll | 6 +- test/CodeGen/PowerPC/return-val-i128.ll | 2 +- test/CodeGen/PowerPC/rlwimi-commute.ll | 4 +- test/CodeGen/PowerPC/rlwimi.ll | 4 +- test/CodeGen/PowerPC/rlwimi2.ll | 2 +- test/CodeGen/PowerPC/rlwimi3.ll | 2 +- test/CodeGen/PowerPC/rlwinm.ll | 2 +- test/CodeGen/PowerPC/rlwinm2.ll | 2 +- test/CodeGen/PowerPC/rotl-2.ll | 6 +- test/CodeGen/PowerPC/rotl-64.ll | 4 +- test/CodeGen/PowerPC/rotl.ll | 4 +- test/CodeGen/PowerPC/sections.ll | 8 + test/CodeGen/PowerPC/select-cc.ll | 2 +- test/CodeGen/PowerPC/select_lt0.ll | 2 +- test/CodeGen/PowerPC/setcc_no_zext.ll | 2 +- test/CodeGen/PowerPC/seteq-0.ll | 3 +- test/CodeGen/PowerPC/shift128.ll | 2 +- test/CodeGen/PowerPC/shl_elim.ll | 2 +- test/CodeGen/PowerPC/shl_sext.ll | 2 +- test/CodeGen/PowerPC/sign_ext_inreg1.ll | 4 +- test/CodeGen/PowerPC/small-arguments.ll | 2 +- test/CodeGen/PowerPC/stfiwx-2.ll | 4 +- test/CodeGen/PowerPC/stfiwx.ll | 8 +- test/CodeGen/PowerPC/store-load-fwd.ll | 2 +- test/CodeGen/PowerPC/subc.ll | 2 +- test/CodeGen/PowerPC/tailcall1-64.ll | 2 +- test/CodeGen/PowerPC/tailcall1.ll | 2 +- test/CodeGen/PowerPC/tailcallpic1.ll | 2 +- test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll | 583 ++ test/CodeGen/PowerPC/trampoline.ll | 2 +- test/CodeGen/PowerPC/unsafe-math.ll | 4 +- test/CodeGen/PowerPC/vcmp-fold.ll | 2 +- test/CodeGen/PowerPC/vec_br_cmp.ll | 2 +- test/CodeGen/PowerPC/vec_call.ll | 2 +- test/CodeGen/PowerPC/vec_constants.ll | 2 +- test/CodeGen/PowerPC/vec_fneg.ll | 2 +- test/CodeGen/PowerPC/vec_insert.ll | 2 +- test/CodeGen/PowerPC/vec_misaligned.ll | 2 +- test/CodeGen/PowerPC/vec_mul.ll | 4 +- test/CodeGen/PowerPC/vec_perf_shuffle.ll | 2 +- test/CodeGen/PowerPC/vec_shift.ll | 2 +- test/CodeGen/PowerPC/vec_shuffle.ll | 4 +- test/CodeGen/PowerPC/vec_splat.ll | 4 +- test/CodeGen/PowerPC/vec_vrsave.ll | 2 +- test/CodeGen/PowerPC/vec_zero.ll | 2 +- test/CodeGen/PowerPC/vector-identity-shuffle.ll | 4 +- test/CodeGen/PowerPC/vector.ll | 4 +- .../CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll | 2 +- test/CodeGen/SPARC/2007-05-09-JumpTables.ll | 2 +- .../CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll | 2 +- .../SPARC/2008-10-10-InlineAsmMemoryOperand.ll | 2 +- .../SPARC/2008-10-10-InlineAsmRegOperand.ll | 2 +- test/CodeGen/SPARC/2009-08-28-PIC.ll | 9 + test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll | 6 + test/CodeGen/SPARC/basictest.ll | 2 +- test/CodeGen/SPARC/ctpop.ll | 8 +- test/CodeGen/SPARC/private.ll | 2 +- test/CodeGen/SPARC/xnor.ll | 2 +- test/CodeGen/SystemZ/00-RetVoid.ll | 6 + test/CodeGen/SystemZ/01-RetArg.ll | 6 + test/CodeGen/SystemZ/01-RetImm.ll | 49 + test/CodeGen/SystemZ/02-MemArith.ll | 133 + test/CodeGen/SystemZ/02-RetAdd.ll | 6 + test/CodeGen/SystemZ/02-RetAddImm.ll | 6 + test/CodeGen/SystemZ/02-RetAnd.ll | 7 + test/CodeGen/SystemZ/02-RetAndImm.ll | 28 + test/CodeGen/SystemZ/02-RetNeg.ll | 7 + test/CodeGen/SystemZ/02-RetOr.ll | 6 + test/CodeGen/SystemZ/02-RetOrImm.ll | 28 + test/CodeGen/SystemZ/02-RetSub.ll | 7 + test/CodeGen/SystemZ/02-RetSubImm.ll | 7 + test/CodeGen/SystemZ/02-RetXor.ll | 6 + test/CodeGen/SystemZ/02-RetXorImm.ll | 6 + test/CodeGen/SystemZ/03-RetAddImmSubreg.ll | 42 + test/CodeGen/SystemZ/03-RetAddSubreg.ll | 22 + test/CodeGen/SystemZ/03-RetAndImmSubreg.ll | 38 + test/CodeGen/SystemZ/03-RetAndSubreg.ll | 21 + test/CodeGen/SystemZ/03-RetArgSubreg.ll | 19 + test/CodeGen/SystemZ/03-RetImmSubreg.ll | 42 + test/CodeGen/SystemZ/03-RetNegImmSubreg.ll | 8 + test/CodeGen/SystemZ/03-RetOrImmSubreg.ll | 60 + test/CodeGen/SystemZ/03-RetOrSubreg.ll | 23 + test/CodeGen/SystemZ/03-RetSubImmSubreg.ll | 42 + test/CodeGen/SystemZ/03-RetSubSubreg.ll | 22 + test/CodeGen/SystemZ/03-RetXorImmSubreg.ll | 58 + test/CodeGen/SystemZ/03-RetXorSubreg.ll | 23 + test/CodeGen/SystemZ/04-RetShifts.ll | 121 + test/CodeGen/SystemZ/05-LoadAddr.ll | 11 + test/CodeGen/SystemZ/05-MemImmStores.ll | 50 + test/CodeGen/SystemZ/05-MemLoadsStores.ll | 44 + test/CodeGen/SystemZ/05-MemLoadsStores16.ll | 85 + test/CodeGen/SystemZ/05-MemRegLoads.ll | 75 + test/CodeGen/SystemZ/05-MemRegStores.ll | 79 + test/CodeGen/SystemZ/06-CallViaStack.ll | 17 + test/CodeGen/SystemZ/06-FrameIdxLoad.ll | 16 + test/CodeGen/SystemZ/06-LocalFrame.ll | 13 + test/CodeGen/SystemZ/06-SimpleCall.ll | 12 + test/CodeGen/SystemZ/07-BrCond.ll | 141 + test/CodeGen/SystemZ/07-BrCond32.ll | 142 + test/CodeGen/SystemZ/07-BrUnCond.ll | 18 + test/CodeGen/SystemZ/07-CmpImm.ll | 137 + test/CodeGen/SystemZ/07-CmpImm32.ll | 139 + test/CodeGen/SystemZ/07-SelectCC.ll | 11 + test/CodeGen/SystemZ/08-DivRem.ll | 55 + test/CodeGen/SystemZ/08-DivRemMemOp.ll | 64 + test/CodeGen/SystemZ/08-SimpleMuls.ll | 29 + test/CodeGen/SystemZ/09-DynamicAlloca.ll | 14 + test/CodeGen/SystemZ/09-Globals.ll | 23 + test/CodeGen/SystemZ/09-Switches.ll | 39 + test/CodeGen/SystemZ/10-FuncsPic.ll | 27 + test/CodeGen/SystemZ/10-GlobalsPic.ll | 29 + test/CodeGen/SystemZ/11-BSwap.ll | 74 + .../CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll | 12 + test/CodeGen/SystemZ/2009-06-02-And32Imm.ll | 14 + test/CodeGen/SystemZ/2009-06-02-Rotate.ll | 13 + test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll | 19 + test/CodeGen/SystemZ/2009-07-04-Shl32.ll | 27 + test/CodeGen/SystemZ/2009-07-05-Shifts.ll | 25 + .../SystemZ/2009-07-10-BadIncomingArgOffset.ll | 22 + test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll | 16 + test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll | 32 + .../SystemZ/2009-08-21-InlineAsmRConstraint.ll | 21 + test/CodeGen/SystemZ/2009-08-22-FCopySign.ll | 22 + test/CodeGen/SystemZ/dg.exp | 5 + test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll | 2 +- .../CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll | 2 +- test/CodeGen/Thumb/2007-03-06-AddR7.ll | 4 +- test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll | 2 +- test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll | 2 +- test/CodeGen/Thumb/2009-07-19-SPDecBug.ll | 33 + test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll | 11 + test/CodeGen/Thumb/2009-07-27-PEIAssert.ll | 26 + test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll | 737 ++ test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll | 40 + test/CodeGen/Thumb/2009-08-20-ISelBug.ll | 66 + test/CodeGen/Thumb/asmprinter-bug.ll | 288 + test/CodeGen/Thumb/dyn-stackalloc.ll | 6 +- test/CodeGen/Thumb/fpconv.ll | 2 +- test/CodeGen/Thumb/fpow.ll | 2 +- test/CodeGen/Thumb/frame_thumb.ll | 4 +- test/CodeGen/Thumb/iabs.ll | 2 +- test/CodeGen/Thumb/inlineasm-imm-thumb.ll | 2 +- test/CodeGen/Thumb/ispositive.ll | 4 +- test/CodeGen/Thumb/large-stack.ll | 2 +- test/CodeGen/Thumb/ldr_ext.ll | 51 +- test/CodeGen/Thumb/ldr_frame.ll | 12 +- test/CodeGen/Thumb/long-setcc.ll | 2 +- test/CodeGen/Thumb/long.ll | 8 +- test/CodeGen/Thumb/long_shift.ll | 26 + test/CodeGen/Thumb/mul.ll | 22 + test/CodeGen/Thumb/pop.ll | 13 + test/CodeGen/Thumb/push.ll | 10 + test/CodeGen/Thumb/select.ll | 14 +- test/CodeGen/Thumb/stack-frame.ll | 4 +- test/CodeGen/Thumb/thumb-imm.ll | 2 +- test/CodeGen/Thumb/tst_teq.ll | 2 +- test/CodeGen/Thumb/unord.ll | 4 +- test/CodeGen/Thumb/vargs.ll | 6 +- .../CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll | 35 + test/CodeGen/Thumb2/2009-07-21-ISelBug.ll | 36 + test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll | 22 + test/CodeGen/Thumb2/2009-07-30-PEICrash.ll | 193 + test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll | 85 + test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll | 46 + test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll | 29 + test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll | 153 + test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll | 508 ++ .../CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll | 34 + .../Thumb2/2009-08-04-SubregLoweringBug2.ll | 42 + .../Thumb2/2009-08-04-SubregLoweringBug3.ll | 54 + test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll | 24 + test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll | 16 + test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll | 80 + test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll | 20 + test/CodeGen/Thumb2/2009-08-10-ISelBug.ll | 15 + test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll | 154 + test/CodeGen/Thumb2/carry.ll | 12 +- test/CodeGen/Thumb2/frameless.ll | 6 + test/CodeGen/Thumb2/frameless2.ll | 12 + test/CodeGen/Thumb2/large-stack.ll | 28 + test/CodeGen/Thumb2/load-global.ll | 28 +- test/CodeGen/Thumb2/mul_const.ll | 18 + test/CodeGen/Thumb2/pic-load.ll | 21 + test/CodeGen/Thumb2/thumb2-adc.ll | 18 +- test/CodeGen/Thumb2/thumb2-add.ll | 16 +- test/CodeGen/Thumb2/thumb2-add2.ll | 12 +- test/CodeGen/Thumb2/thumb2-add3.ll | 2 +- test/CodeGen/Thumb2/thumb2-add4.ll | 17 +- test/CodeGen/Thumb2/thumb2-add5.ll | 16 +- test/CodeGen/Thumb2/thumb2-add6.ll | 5 +- test/CodeGen/Thumb2/thumb2-and.ll | 16 +- test/CodeGen/Thumb2/thumb2-and2.ll | 2 +- test/CodeGen/Thumb2/thumb2-asr.ll | 4 +- test/CodeGen/Thumb2/thumb2-asr2.ll | 4 +- test/CodeGen/Thumb2/thumb2-bcc.ll | 19 + test/CodeGen/Thumb2/thumb2-bfc.ll | 8 +- test/CodeGen/Thumb2/thumb2-bic.ll | 56 +- test/CodeGen/Thumb2/thumb2-branch.ll | 61 + test/CodeGen/Thumb2/thumb2-call.ll | 27 + test/CodeGen/Thumb2/thumb2-clz.ll | 2 +- test/CodeGen/Thumb2/thumb2-cmn.ll | 10 +- test/CodeGen/Thumb2/thumb2-cmn2.ll | 2 +- test/CodeGen/Thumb2/thumb2-cmp.ll | 12 +- test/CodeGen/Thumb2/thumb2-cmp2.ll | 18 +- test/CodeGen/Thumb2/thumb2-eor.ll | 18 +- test/CodeGen/Thumb2/thumb2-eor2.ll | 2 +- test/CodeGen/Thumb2/thumb2-ifcvt1.ll | 84 + test/CodeGen/Thumb2/thumb2-ifcvt2.ll | 93 + test/CodeGen/Thumb2/thumb2-ifcvt3.ll | 32 + test/CodeGen/Thumb2/thumb2-jtb.ll | 120 + test/CodeGen/Thumb2/thumb2-ldm.ll | 40 + test/CodeGen/Thumb2/thumb2-ldr.ll | 23 +- test/CodeGen/Thumb2/thumb2-ldr_ext.ll | 8 +- test/CodeGen/Thumb2/thumb2-ldr_post.ll | 2 +- test/CodeGen/Thumb2/thumb2-ldr_pre.ll | 4 +- test/CodeGen/Thumb2/thumb2-ldrb.ll | 22 +- test/CodeGen/Thumb2/thumb2-ldrd.ll | 12 + test/CodeGen/Thumb2/thumb2-ldrh.ll | 22 +- test/CodeGen/Thumb2/thumb2-lsl.ll | 4 +- test/CodeGen/Thumb2/thumb2-lsl2.ll | 4 +- test/CodeGen/Thumb2/thumb2-lsr.ll | 4 +- test/CodeGen/Thumb2/thumb2-lsr2.ll | 4 +- test/CodeGen/Thumb2/thumb2-lsr3.ll | 19 + test/CodeGen/Thumb2/thumb2-mla.ll | 2 +- test/CodeGen/Thumb2/thumb2-mls.ll | 2 +- test/CodeGen/Thumb2/thumb2-mov.ll | 62 +- test/CodeGen/Thumb2/thumb2-mov2.ll | 32 +- test/CodeGen/Thumb2/thumb2-mov3.ll | 12 +- test/CodeGen/Thumb2/thumb2-mov4.ll | 2 +- test/CodeGen/Thumb2/thumb2-mul.ll | 4 +- test/CodeGen/Thumb2/thumb2-mulhi.ll | 20 + test/CodeGen/Thumb2/thumb2-mvn.ll | 12 +- test/CodeGen/Thumb2/thumb2-mvn2.ll | 18 +- test/CodeGen/Thumb2/thumb2-neg.ll | 4 +- test/CodeGen/Thumb2/thumb2-orn.ll | 10 +- test/CodeGen/Thumb2/thumb2-orn2.ll | 3 +- test/CodeGen/Thumb2/thumb2-orr.ll | 16 +- test/CodeGen/Thumb2/thumb2-orr2.ll | 2 +- test/CodeGen/Thumb2/thumb2-pack.ll | 73 + test/CodeGen/Thumb2/thumb2-rev.ll | 17 +- test/CodeGen/Thumb2/thumb2-rev16.ll | 32 + test/CodeGen/Thumb2/thumb2-ror.ll | 2 +- test/CodeGen/Thumb2/thumb2-ror2.ll | 4 +- test/CodeGen/Thumb2/thumb2-rsb.ll | 8 +- test/CodeGen/Thumb2/thumb2-rsb2.ll | 2 +- test/CodeGen/Thumb2/thumb2-sbc.ll | 8 + test/CodeGen/Thumb2/thumb2-select.ll | 98 + test/CodeGen/Thumb2/thumb2-select_xform.ll | 24 + test/CodeGen/Thumb2/thumb2-shifter.ll | 10 +- test/CodeGen/Thumb2/thumb2-smla.ll | 10 + test/CodeGen/Thumb2/thumb2-smul.ll | 23 + test/CodeGen/Thumb2/thumb2-spill-q.ll | 57 + test/CodeGen/Thumb2/thumb2-str.ll | 25 +- test/CodeGen/Thumb2/thumb2-str_post.ll | 4 +- test/CodeGen/Thumb2/thumb2-str_pre.ll | 2 +- test/CodeGen/Thumb2/thumb2-strb.ll | 25 +- test/CodeGen/Thumb2/thumb2-strh.ll | 25 +- test/CodeGen/Thumb2/thumb2-sub.ll | 20 +- test/CodeGen/Thumb2/thumb2-sub2.ll | 2 +- test/CodeGen/Thumb2/thumb2-sub4.ll | 16 +- test/CodeGen/Thumb2/thumb2-sub5.ll | 5 +- test/CodeGen/Thumb2/thumb2-sxt_rot.ll | 6 +- test/CodeGen/Thumb2/thumb2-tbb.ll | 57 + test/CodeGen/Thumb2/thumb2-tbh.ll | 90 + test/CodeGen/Thumb2/thumb2-teq.ll | 3 +- test/CodeGen/Thumb2/thumb2-teq2.ll | 10 +- test/CodeGen/Thumb2/thumb2-tst.ll | 3 +- test/CodeGen/Thumb2/thumb2-tst2.ll | 22 +- test/CodeGen/Thumb2/thumb2-uxt_rot.ll | 6 +- test/CodeGen/Thumb2/thumb2-uxtb.ll | 2 +- test/CodeGen/Thumb2/tls1.ll | 6 +- test/CodeGen/Thumb2/tls2.ll | 22 +- test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll | 2 +- test/CodeGen/X86/2003-08-23-DeadBlockTest.ll | 2 +- test/CodeGen/X86/2003-11-03-GlobalBool.ll | 2 +- test/CodeGen/X86/2004-02-12-Memcpy.ll | 2 +- test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll | 4 +- .../X86/2004-02-14-InefficientStackPointer.ll | 2 +- test/CodeGen/X86/2004-02-22-Casts.ll | 2 +- test/CodeGen/X86/2004-03-30-Select-Max.ll | 2 +- test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll | 2 +- test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll | 2 +- test/CodeGen/X86/2004-06-10-StackifierCrash.ll | 2 +- test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll | 2 +- test/CodeGen/X86/2005-01-17-CycleInDAG.ll | 2 +- test/CodeGen/X86/2005-02-14-IllegalAssembler.ll | 2 +- test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll | 2 +- test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll | 2 +- test/CodeGen/X86/2006-03-01-InstrSchedBug.ll | 2 +- test/CodeGen/X86/2006-03-02-InstrSchedBug.ll | 2 +- test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll | 2 +- test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll | 3 +- test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll | 2 +- test/CodeGen/X86/2006-05-02-InstrSched1.ll | 3 +- test/CodeGen/X86/2006-05-02-InstrSched2.ll | 2 +- test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll | 2 +- test/CodeGen/X86/2006-05-08-InstrSched.ll | 3 +- test/CodeGen/X86/2006-05-11-InstrSched.ll | 2 +- test/CodeGen/X86/2006-05-17-VectorArg.ll | 2 +- test/CodeGen/X86/2006-05-22-FPSetEQ.ll | 4 +- test/CodeGen/X86/2006-05-25-CycleInDAG.ll | 2 +- .../CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll | 2 +- .../CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll | 2 +- test/CodeGen/X86/2006-07-19-ATTAsm.ll | 2 +- test/CodeGen/X86/2006-07-20-InlineAsm.ll | 2 +- .../X86/2006-07-28-AsmPrint-Long-As-Pointer.ll | 2 +- test/CodeGen/X86/2006-07-31-SingleRegClass.ll | 2 +- test/CodeGen/X86/2006-08-07-CycleInDAG.ll | 2 +- test/CodeGen/X86/2006-08-16-CycleInDAG.ll | 2 +- test/CodeGen/X86/2006-08-21-ExtraMovInst.ll | 2 +- test/CodeGen/X86/2006-09-01-CycleInDAG.ll | 2 +- test/CodeGen/X86/2006-10-02-BoolRetCrash.ll | 2 +- test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll | 2 +- test/CodeGen/X86/2006-10-09-CycleInDAG.ll | 2 +- .../X86/2006-10-10-FindModifiedNodeSlotBug.ll | 2 +- test/CodeGen/X86/2006-10-12-CycleInDAG.ll | 2 +- test/CodeGen/X86/2006-10-13-CycleInDAG.ll | 2 +- .../X86/2006-10-19-SwitchUnnecessaryBranching.ll | 9 +- test/CodeGen/X86/2006-11-12-CSRetCC.ll | 2 +- test/CodeGen/X86/2006-11-17-IllegalMove.ll | 4 +- test/CodeGen/X86/2006-11-27-SelectLegalize.ll | 2 +- test/CodeGen/X86/2006-11-28-Memcpy.ll | 6 +- test/CodeGen/X86/2006-12-19-IntelSyntax.ll | 2 +- test/CodeGen/X86/2007-01-08-InstrSched.ll | 11 +- test/CodeGen/X86/2007-01-13-StackPtrIndex.ll | 2 +- test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll | 2 +- test/CodeGen/X86/2007-02-04-OrAddrMode.ll | 4 +- test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll | 2 +- test/CodeGen/X86/2007-02-25-FastCCStack.ll | 2 +- test/CodeGen/X86/2007-03-01-SpillerCrash.ll | 4 +- test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll | 2 +- test/CodeGen/X86/2007-03-16-InlineAsm.ll | 2 +- test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll | 2 +- .../X86/2007-03-24-InlineAsmMultiRegConstraint.ll | 2 +- test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll | 2 +- test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll | 2 +- .../CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll | 2 +- test/CodeGen/X86/2007-03-26-CoalescerBug.ll | 2 +- test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll | 4 +- .../X86/2007-04-11-InlineAsmVectorResult.ll | 2 +- test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll | 2 +- test/CodeGen/X86/2007-04-24-Huge-Stack.ll | 2 +- test/CodeGen/X86/2007-04-24-VectorCrash.ll | 2 +- test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll | 4 +- .../X86/2007-04-27-InlineAsm-IntMemInput.ll | 2 +- test/CodeGen/X86/2007-05-05-VecCastExpand.ll | 2 +- test/CodeGen/X86/2007-05-07-InvokeSRet.ll | 2 +- test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll | 2 +- test/CodeGen/X86/2007-05-15-maskmovq.ll | 2 +- test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll | 4 +- test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll | 4 +- test/CodeGen/X86/2007-06-04-tailmerge4.ll | 2 +- test/CodeGen/X86/2007-06-05-LSR-Dominator.ll | 2 +- test/CodeGen/X86/2007-06-14-branchfold.ll | 2 +- test/CodeGen/X86/2007-06-15-IntToMMX.ll | 2 +- test/CodeGen/X86/2007-06-28-X86-64-isel.ll | 2 +- test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll | 2 +- test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll | 2 +- test/CodeGen/X86/2007-07-03-GR64ToVR64.ll | 6 +- test/CodeGen/X86/2007-07-10-StackerAssert.ll | 2 +- test/CodeGen/X86/2007-07-18-Vector-Extract.ll | 4 +- test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll | 2 +- test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll | 2 +- test/CodeGen/X86/2007-08-10-SignExtSubreg.ll | 2 +- test/CodeGen/X86/2007-08-13-AppendingLinkage.ll | 2 +- test/CodeGen/X86/2007-08-13-SpillerReuse.ll | 2 +- test/CodeGen/X86/2007-09-05-InvalidAsm.ll | 2 +- test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll | 2 +- test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll | 2 +- test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll | 2 +- test/CodeGen/X86/2007-09-27-LDIntrinsics.ll | 4 +- test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll | 2 +- test/CodeGen/X86/2007-10-05-3AddrConvert.ll | 2 +- test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll | 2 +- test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll | 2 +- test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll | 2 +- test/CodeGen/X86/2007-10-14-CoalescerCrash.ll | 2 +- test/CodeGen/X86/2007-10-15-CoalescerCrash.ll | 2 +- test/CodeGen/X86/2007-10-16-CoalescerCrash.ll | 2 +- test/CodeGen/X86/2007-10-16-IllegalAsm.ll | 2 +- test/CodeGen/X86/2007-10-16-fp80_select.ll | 2 +- test/CodeGen/X86/2007-10-17-IllegalAsm.ll | 4 +- test/CodeGen/X86/2007-10-19-SpillerUnfold.ll | 2 +- .../CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll | 2 +- test/CodeGen/X86/2007-10-29-ExtendSetCC.ll | 2 +- test/CodeGen/X86/2007-10-30-LSRCrash.ll | 2 +- test/CodeGen/X86/2007-10-31-extractelement-i64.ll | 2 +- test/CodeGen/X86/2007-11-01-ISelCrash.ll | 2 +- test/CodeGen/X86/2007-11-02-BadAsm.ll | 2 +- test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll | 2 +- test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll | 2 +- test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll | 2 +- .../X86/2007-11-04-rip-immediate-constant.ll | 2 +- test/CodeGen/X86/2007-11-06-InstrSched.ll | 2 +- test/CodeGen/X86/2007-11-07-MulBy4.ll | 2 +- test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll | 5 +- test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll | 2 +- test/CodeGen/X86/2007-11-30-TestLoadFolding.ll | 4 +- test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll | 2 +- test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll | 2 +- test/CodeGen/X86/2007-12-18-LoadCSEBug.ll | 2 +- test/CodeGen/X86/2008-01-08-IllegalCMP.ll | 2 +- test/CodeGen/X86/2008-01-08-SchedulerCrash.ll | 2 +- test/CodeGen/X86/2008-01-09-LongDoubleSin.ll | 2 +- test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll | 2 +- .../X86/2008-01-16-InvalidDAGCombineXform.ll | 2 +- test/CodeGen/X86/2008-01-16-Trampoline.ll | 4 +- test/CodeGen/X86/2008-01-25-EmptyFunction.ll | 2 +- test/CodeGen/X86/2008-02-05-ISelCrash.ll | 2 +- test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll | 2 +- test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll | 2 +- test/CodeGen/X86/2008-02-14-BitMiscompile.ll | 2 +- test/CodeGen/X86/2008-02-18-TailMergingBug.ll | 2 +- test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll | 4 +- test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll | 2 +- test/CodeGen/X86/2008-02-22-ReMatBug.ll | 3 +- test/CodeGen/X86/2008-02-25-InlineAsmBug.ll | 2 +- test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll | 2 +- test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll | 2 +- test/CodeGen/X86/2008-02-27-PEICrash.ll | 2 +- test/CodeGen/X86/2008-03-06-frem-fpstack.ll | 2 +- test/CodeGen/X86/2008-03-07-APIntBug.ll | 2 +- test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll | 4 +- test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll | 2 +- test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll | 2 +- test/CodeGen/X86/2008-03-14-SpillerCrash.ll | 2 +- test/CodeGen/X86/2008-03-18-CoalescerBug.ll | 4 +- test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll | 2 +- test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll | 2 +- test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll | 2 +- test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll | 2 +- test/CodeGen/X86/2008-04-02-unnamedEH.ll | 3 +- test/CodeGen/X86/2008-04-08-CoalescerCrash.ll | 2 +- test/CodeGen/X86/2008-04-09-BranchFolding.ll | 2 +- test/CodeGen/X86/2008-04-15-LiveVariableBug.ll | 4 +- test/CodeGen/X86/2008-04-16-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-04-16-ReMatBug.ll | 2 +- test/CodeGen/X86/2008-04-17-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-04-24-MemCpyBug.ll | 2 +- test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll | 2 +- test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll | 2 +- test/CodeGen/X86/2008-04-28-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll | 2 +- test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll | 2 +- test/CodeGen/X86/2008-05-09-PHIElimBug.ll | 2 +- test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll | 2 +- test/CodeGen/X86/2008-05-12-tailmerge-5.ll | 2 +- test/CodeGen/X86/2008-05-21-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll | 2 +- test/CodeGen/X86/2008-05-28-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll | 2 +- test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll | 2 +- .../CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll | 6 +- test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll | 4 +- test/CodeGen/X86/2008-06-16-SubregsBug.ll | 2 +- test/CodeGen/X86/2008-06-18-BadShuffle.ll | 2 +- test/CodeGen/X86/2008-06-25-VecISelBug.ll | 2 +- test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll | 2 +- .../CodeGen/X86/2008-07-09-ELFSectionAttributes.ll | 2 +- test/CodeGen/X86/2008-07-11-SHLBy1.ll | 2 +- test/CodeGen/X86/2008-07-11-SpillerBug.ll | 9 +- test/CodeGen/X86/2008-07-16-CoalescerCrash.ll | 2 +- test/CodeGen/X86/2008-07-19-movups-spills.ll | 4 +- test/CodeGen/X86/2008-07-22-CombinerCrash.ll | 2 +- test/CodeGen/X86/2008-07-23-VSetCC.ll | 6 +- test/CodeGen/X86/2008-08-05-SpillerBug.ll | 2 +- test/CodeGen/X86/2008-08-06-RewriterBug.ll | 2 +- test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll | 2 +- test/CodeGen/X86/2008-08-19-SubAndFetch.ll | 5 +- test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll | 2 +- test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll | 5 +- test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll | 4 +- test/CodeGen/X86/2008-08-31-EH_RETURN32.ll | 4 +- test/CodeGen/X86/2008-08-31-EH_RETURN64.ll | 4 +- test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll | 4 +- test/CodeGen/X86/2008-09-09-LinearScanBug.ll | 2 +- test/CodeGen/X86/2008-09-11-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-09-11-CoalescerBug2.ll | 2 +- test/CodeGen/X86/2008-09-17-inline-asm-1.ll | 16 +- test/CodeGen/X86/2008-09-18-inline-asm-2.ll | 4 +- test/CodeGen/X86/2008-09-19-RegAllocBug.ll | 2 +- test/CodeGen/X86/2008-09-25-sseregparm-1.ll | 4 +- test/CodeGen/X86/2008-09-26-FrameAddrBug.ll | 2 +- test/CodeGen/X86/2008-09-29-ReMatBug.ll | 2 +- test/CodeGen/X86/2008-09-29-VolatileBug.ll | 2 +- test/CodeGen/X86/2008-10-02-Atomics32-2.ll | 2 +- test/CodeGen/X86/2008-10-06-MMXISelBug.ll | 2 +- test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll | 2 +- test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll | 2 +- test/CodeGen/X86/2008-10-07-SSEISelBug.ll | 2 +- test/CodeGen/X86/2008-10-11-CallCrash.ll | 2 +- test/CodeGen/X86/2008-10-13-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-10-16-SpillerBug.ll | 2 +- test/CodeGen/X86/2008-10-16-VecUnaryOp.ll | 2 +- test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll | 4 +- test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll | 4 +- test/CodeGen/X86/2008-10-24-FlippedCompare.ll | 2 +- test/CodeGen/X86/2008-10-27-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-10-27-StackRealignment.ll | 4 +- test/CodeGen/X86/2008-10-29-ExpandVAARG.ll | 2 +- test/CodeGen/X86/2008-11-03-F80VAARG.ll | 2 +- test/CodeGen/X86/2008-11-06-testb.ll | 2 +- test/CodeGen/X86/2008-11-13-inlineasm-3.ll | 2 +- test/CodeGen/X86/2008-11-29-DivideConstant16bit.ll | 3 +- .../X86/2008-11-29-DivideConstant16bitSigned.ll | 3 +- test/CodeGen/X86/2008-11-29-ULT-Sign.ll | 2 +- test/CodeGen/X86/2008-12-01-SpillerAssert.ll | 2 +- .../X86/2008-12-01-loop-iv-used-outside-loop.ll | 2 +- test/CodeGen/X86/2008-12-02-IllegalResultType.ll | 2 +- test/CodeGen/X86/2008-12-02-dagcombine-1.ll | 2 +- test/CodeGen/X86/2008-12-02-dagcombine-2.ll | 2 +- test/CodeGen/X86/2008-12-02-dagcombine-3.ll | 4 +- test/CodeGen/X86/2008-12-05-SpillerCrash.ll | 2 +- test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll | 12 + test/CodeGen/X86/2008-12-16-BadShift.ll | 2 +- test/CodeGen/X86/2008-12-16-dagcombine-4.ll | 2 +- test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll | 7 +- test/CodeGen/X86/2008-12-22-dagcombine-5.ll | 2 +- test/CodeGen/X86/2008-12-23-crazy-address.ll | 2 +- test/CodeGen/X86/2008-12-23-dagcombine-6.ll | 2 +- test/CodeGen/X86/2009-01-12-CoalescerBug.ll | 2 +- test/CodeGen/X86/2009-01-13-DoubleUpdate.ll | 2 +- test/CodeGen/X86/2009-01-16-SchedulerBug.ll | 2 +- test/CodeGen/X86/2009-01-16-UIntToFP.ll | 2 +- test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll | 2 +- test/CodeGen/X86/2009-01-25-NoSSE.ll | 2 +- test/CodeGen/X86/2009-01-26-WrongCheck.ll | 2 +- test/CodeGen/X86/2009-01-27-NullStrings.ll | 39 +- test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll | 2 +- test/CodeGen/X86/2009-01-31-BigShift.ll | 2 +- test/CodeGen/X86/2009-01-31-BigShift2.ll | 2 +- test/CodeGen/X86/2009-01-31-BigShift3.ll | 2 +- test/CodeGen/X86/2009-02-01-LargeMask.ll | 2 +- test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll | 2 +- test/CodeGen/X86/2009-02-04-sext-i64-gep.ll | 2 +- test/CodeGen/X86/2009-02-05-CoalescerBug.ll | 4 +- test/CodeGen/X86/2009-02-07-CoalescerBug.ll | 2 +- test/CodeGen/X86/2009-02-08-CoalescerBug.ll | 2 +- .../CodeGen/X86/2009-02-11-codegenprepare-reuse.ll | 2 +- test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll | 4 +- .../X86/2009-02-12-InlineAsm-nieZ-constraints.ll | 4 +- test/CodeGen/X86/2009-02-12-SpillerBug.ll | 2 +- test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll | 2 +- test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll | 2 +- test/CodeGen/X86/2009-02-25-CommuteBug.ll | 2 +- test/CodeGen/X86/2009-02-26-MachineLICMBug.ll | 2 +- test/CodeGen/X86/2009-03-03-BTHang.ll | 2 +- test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll | 2 +- test/CodeGen/X86/2009-03-05-burr-list-crash.ll | 2 +- test/CodeGen/X86/2009-03-07-FPConstSelect.ll | 2 +- test/CodeGen/X86/2009-03-09-APIntCrash.ll | 2 +- test/CodeGen/X86/2009-03-09-SpillerBug.ll | 2 +- test/CodeGen/X86/2009-03-10-CoalescerBug.ll | 2 +- test/CodeGen/X86/2009-03-11-CoalescerBug.ll | 2 +- test/CodeGen/X86/2009-03-12-CPAlignBug.ll | 2 +- test/CodeGen/X86/2009-03-13-PHIElimBug.ll | 2 +- test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll | 2 +- test/CodeGen/X86/2009-03-16-SpillerBug.ll | 2 +- test/CodeGen/X86/2009-03-23-LinearScanBug.ll | 2 +- test/CodeGen/X86/2009-03-23-MultiUseSched.ll | 2 +- test/CodeGen/X86/2009-03-23-i80-fp80.ll | 4 +- test/CodeGen/X86/2009-03-25-TestBug.ll | 2 +- test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll | 4 +- test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll | 2 +- .../X86/2009-04-12-FastIselOverflowCrash.ll | 2 +- test/CodeGen/X86/2009-04-12-picrel.ll | 2 +- test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll | 2 +- test/CodeGen/X86/2009-04-13-2AddrAssert.ll | 2 +- test/CodeGen/X86/2009-04-14-IllegalRegs.ll | 2 +- test/CodeGen/X86/2009-04-16-SpillerUnfold.ll | 2 +- test/CodeGen/X86/2009-04-20-LinearScanOpt.ll | 2 +- test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll | 10 +- test/CodeGen/X86/2009-04-24.ll | 4 +- test/CodeGen/X86/2009-04-25-CoalescerBug.ll | 2 +- test/CodeGen/X86/2009-04-27-CoalescerAssert.ll | 2 +- test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll | 2 +- .../CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll | 2 +- test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll | 2 +- .../CodeGen/X86/2009-04-29-IndirectDestOperands.ll | 2 +- test/CodeGen/X86/2009-04-29-LinearScanBug.ll | 2 +- test/CodeGen/X86/2009-04-29-RegAllocAssert.ll | 2 +- test/CodeGen/X86/2009-04-scale.ll | 2 +- test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll | 2 +- test/CodeGen/X86/2009-05-11-tailmerge-crash.ll | 2 +- .../X86/2009-05-19-SingleElementExtractElement.ll | 2 +- .../CodeGen/X86/2009-05-23-available_externally.ll | 2 +- test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll | 2 +- test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll | 2 +- test/CodeGen/X86/2009-05-30-ISelBug.ll | 2 +- test/CodeGen/X86/2009-06-02-RewriterBug.ll | 2 +- test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll | 2 +- test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll | 8 +- test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll | 2 +- .../X86/2009-06-05-ScalarToVectorByteMMX.ll | 2 +- test/CodeGen/X86/2009-06-05-VZextByteShort.ll | 2 +- test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll | 2 +- test/CodeGen/X86/2009-06-05-sitofpCrash.ll | 2 +- test/CodeGen/X86/2009-06-06-ConcatVectors.ll | 2 +- test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll | 2 +- ...-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll | 4 +- test/CodeGen/X86/2009-06-15-not-a-tail-call.ll | 2 +- .../X86/2009-06-18-movlp-shuffle-register.ll | 2 +- test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll | 137 + test/CodeGen/X86/2009-07-07-SplitICmp.ll | 8 + .../X86/2009-07-09-ExtractBoolFromVector.ll | 11 + test/CodeGen/X86/2009-07-15-CoalescerBug.ll | 958 +++ test/CodeGen/X86/2009-07-16-CoalescerBug.ll | 210 + test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll | 102 + test/CodeGen/X86/2009-07-17-StackColoringBug.ll | 55 + test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll | 11 + test/CodeGen/X86/2009-07-20-CoalescerBug.ll | 165 + test/CodeGen/X86/2009-07-20-DAGCombineBug.ll | 29 + .../CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll | 10 + test/CodeGen/X86/2009-08-06-branchfolder-crash.ll | 142 + test/CodeGen/X86/2009-08-06-inlineasm.ll | 26 + test/CodeGen/X86/2009-08-08-CastError.ll | 9 + test/CodeGen/X86/2009-08-12-badswitch.ll | 176 + .../X86/2009-08-14-Win64MemoryIndirectArg.ll | 57 + .../X86/2009-08-19-LoadNarrowingMiscompile.ll | 15 + test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll | 69 + test/CodeGen/X86/2009-08-23-linkerprivate.ll | 8 + test/CodeGen/X86/2009-09-07-CoalescerBug.ll | 48 + test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll | 48 + test/CodeGen/X86/2009-09-16-CoalescerBug.ll | 64 + .../X86/2009-09-19-SchedCustomLoweringBug.ll | 30 + test/CodeGen/X86/2009-09-19-earlyclobber.ll | 15 + test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll | 36 + test/CodeGen/X86/2009-09-22-CoalescerBug.ll | 124 + test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll | 91 + test/CodeGen/X86/2009-10-08-MachineLICMBug.ll | 264 + test/CodeGen/X86/20090313-signext.ll | 2 +- test/CodeGen/X86/Atomics-32.ll | 2 +- test/CodeGen/X86/Atomics-64.ll | 2 +- test/CodeGen/X86/abi-isel.ll | 8710 ++++++++++++++++++- test/CodeGen/X86/add-trick32.ll | 2 +- test/CodeGen/X86/add-trick64.ll | 2 +- test/CodeGen/X86/add-with-overflow.ll | 8 +- test/CodeGen/X86/aliases.ll | 3 +- test/CodeGen/X86/aligned-comm.ll | 8 +- test/CodeGen/X86/all-ones-vector.ll | 2 +- test/CodeGen/X86/alloca-align-rounding.ll | 4 +- test/CodeGen/X86/and-or-fold.ll | 2 +- test/CodeGen/X86/and-su.ll | 2 +- test/CodeGen/X86/anyext-uses.ll | 2 +- test/CodeGen/X86/anyext.ll | 18 + test/CodeGen/X86/arg-cast.ll | 6 +- test/CodeGen/X86/asm-block-labels.ll | 2 +- test/CodeGen/X86/asm-global-imm.ll | 4 +- test/CodeGen/X86/asm-indirect-mem.ll | 2 +- test/CodeGen/X86/asm-modifier-P.ll | 79 + test/CodeGen/X86/asm-modifier.ll | 41 + test/CodeGen/X86/atomic_add.ll | 217 + test/CodeGen/X86/atomic_op.ll | 2 +- test/CodeGen/X86/attribute-sections.ll | 18 + test/CodeGen/X86/avoid-lea-scale2.ll | 8 + test/CodeGen/X86/avoid-loop-align-2.ll | 2 +- test/CodeGen/X86/avoid-loop-align.ll | 2 +- test/CodeGen/X86/bitcast-int-to-vector.ll | 2 +- test/CodeGen/X86/bitcast.ll | 4 +- test/CodeGen/X86/bitcast2.ll | 4 +- test/CodeGen/X86/break-anti-dependencies.ll | 4 +- test/CodeGen/X86/bss_pagealigned.ll | 21 + test/CodeGen/X86/bswap-inline-asm.ll | 2 +- test/CodeGen/X86/bswap.ll | 4 +- test/CodeGen/X86/bt.ll | 6 +- test/CodeGen/X86/byval.ll | 4 +- test/CodeGen/X86/byval2.ll | 4 +- test/CodeGen/X86/byval3.ll | 4 +- test/CodeGen/X86/byval4.ll | 4 +- test/CodeGen/X86/byval5.ll | 4 +- test/CodeGen/X86/byval6.ll | 2 +- test/CodeGen/X86/byval7.ll | 2 +- test/CodeGen/X86/call-imm.ll | 8 +- test/CodeGen/X86/call-push.ll | 2 +- test/CodeGen/X86/change-compare-stride-0.ll | 4 +- test/CodeGen/X86/change-compare-stride-1.ll | 2 +- test/CodeGen/X86/clz.ll | 6 +- test/CodeGen/X86/cmov.ll | 157 + test/CodeGen/X86/cmp-test.ll | 4 +- test/CodeGen/X86/cmp0.ll | 2 +- test/CodeGen/X86/cmp1.ll | 2 +- test/CodeGen/X86/cmp2.ll | 2 +- test/CodeGen/X86/coalesce-esp.ll | 36 + test/CodeGen/X86/coalescer-commute1.ll | 2 +- test/CodeGen/X86/coalescer-commute2.ll | 4 +- test/CodeGen/X86/coalescer-commute3.ll | 2 +- test/CodeGen/X86/coalescer-commute4.ll | 2 +- test/CodeGen/X86/coalescer-commute5.ll | 2 +- test/CodeGen/X86/coalescer-cross.ll | 41 + test/CodeGen/X86/coalescer-remat.ll | 2 +- test/CodeGen/X86/code_placement.ll | 4 +- test/CodeGen/X86/codegen-prepare-cast.ll | 2 +- test/CodeGen/X86/codemodel.ll | 67 + test/CodeGen/X86/combine-lds.ll | 2 +- test/CodeGen/X86/combiner-aa-0.ll | 20 + test/CodeGen/X86/combiner-aa-1.ll | 23 + test/CodeGen/X86/commute-intrinsic.ll | 2 +- test/CodeGen/X86/commute-two-addr.ll | 2 +- test/CodeGen/X86/compare-add.ll | 2 +- test/CodeGen/X86/compare-inf.ll | 76 + test/CodeGen/X86/compare_folding.ll | 4 +- test/CodeGen/X86/compiler_used.ll | 9 + test/CodeGen/X86/complex-fca.ll | 2 +- test/CodeGen/X86/const-select.ll | 4 +- test/CodeGen/X86/constant-pool-remat-0.ll | 8 +- test/CodeGen/X86/constpool.ll | 8 +- test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll | 2 +- test/CodeGen/X86/copysign-zero.ll | 4 +- test/CodeGen/X86/critical-edge-split.ll | 2 +- test/CodeGen/X86/cstring.ll | 2 +- test/CodeGen/X86/dag-rauw-cse.ll | 2 +- test/CodeGen/X86/dagcombine-buildvector.ll | 2 +- test/CodeGen/X86/dagcombine-cse.ll | 2 +- test/CodeGen/X86/darwin-bzero.ll | 2 +- test/CodeGen/X86/darwin-no-dead-strip.ll | 2 +- test/CodeGen/X86/darwin-quote.ll | 15 + test/CodeGen/X86/darwin-stub.ll | 4 +- test/CodeGen/X86/div_const.ll | 2 +- test/CodeGen/X86/divrem.ll | 2 +- test/CodeGen/X86/dll-linkage.ll | 9 + test/CodeGen/X86/dollar-name.ll | 7 +- test/CodeGen/X86/dyn-stackalloc.ll | 8 +- test/CodeGen/X86/empty-struct-return-type.ll | 15 + test/CodeGen/X86/epilogue.ll | 4 +- test/CodeGen/X86/extend.ll | 4 +- test/CodeGen/X86/extern_weak.ll | 2 +- test/CodeGen/X86/extmul128.ll | 2 +- test/CodeGen/X86/extmul64.ll | 2 +- test/CodeGen/X86/extract-combine.ll | 2 +- test/CodeGen/X86/extract-extract.ll | 24 + test/CodeGen/X86/extractelement-from-arg.ll | 2 +- test/CodeGen/X86/extractelement-load.ll | 4 +- test/CodeGen/X86/extractelement-shuffle.ll | 2 +- test/CodeGen/X86/extractps.ll | 2 +- test/CodeGen/X86/fabs.ll | 5 +- test/CodeGen/X86/fast-cc-callee-pops.ll | 3 +- test/CodeGen/X86/fast-cc-merge-stack-adj.ll | 2 +- test/CodeGen/X86/fast-cc-pass-in-regs.ll | 2 +- test/CodeGen/X86/fast-isel-bail.ll | 2 +- test/CodeGen/X86/fast-isel-bc.ll | 19 + test/CodeGen/X86/fast-isel-call.ll | 2 +- test/CodeGen/X86/fast-isel-constpool.ll | 2 +- test/CodeGen/X86/fast-isel-fneg.ll | 16 + test/CodeGen/X86/fast-isel-gep.ll | 53 + test/CodeGen/X86/fast-isel-gv.ll | 2 +- test/CodeGen/X86/fast-isel-i1.ll | 2 +- test/CodeGen/X86/fast-isel-mem.ll | 4 +- test/CodeGen/X86/fast-isel-phys.ll | 2 +- test/CodeGen/X86/fast-isel-shift-imm.ll | 2 +- test/CodeGen/X86/fast-isel-tailcall.ll | 2 +- test/CodeGen/X86/fast-isel-tls.ll | 2 +- test/CodeGen/X86/fast-isel-trunc.ll | 4 +- test/CodeGen/X86/fast-isel.ll | 11 +- test/CodeGen/X86/fastcall-correct-mangling.ll | 2 +- test/CodeGen/X86/fastcc-2.ll | 4 +- test/CodeGen/X86/fastcc-byval.ll | 2 +- test/CodeGen/X86/fastcc-sret.ll | 2 +- test/CodeGen/X86/fastcc.ll | 5 +- test/CodeGen/X86/field-extract-use-trunc.ll | 4 +- test/CodeGen/X86/fildll.ll | 2 +- test/CodeGen/X86/fmul-zero.ll | 4 +- test/CodeGen/X86/fold-add.ll | 2 +- test/CodeGen/X86/fold-and-shift.ll | 2 +- test/CodeGen/X86/fold-call-2.ll | 2 +- test/CodeGen/X86/fold-call-3.ll | 2 +- test/CodeGen/X86/fold-call.ll | 4 +- test/CodeGen/X86/fold-imm.ll | 4 +- test/CodeGen/X86/fold-load.ll | 2 +- test/CodeGen/X86/fold-mul-lohi.ll | 4 +- test/CodeGen/X86/fold-pcmpeqd-0.ll | 6 +- test/CodeGen/X86/fold-pcmpeqd-1.ll | 2 +- test/CodeGen/X86/fold-pcmpeqd-2.ll | 4 +- test/CodeGen/X86/fold-sext-trunc.ll | 2 +- test/CodeGen/X86/fp-immediate-shorten.ll | 2 +- test/CodeGen/X86/fp-in-intregs.ll | 2 +- test/CodeGen/X86/fp-stack-2results.ll | 4 +- test/CodeGen/X86/fp-stack-O0-crash.ll | 30 + test/CodeGen/X86/fp-stack-compare.ll | 2 +- test/CodeGen/X86/fp-stack-direct-ret.ll | 4 +- test/CodeGen/X86/fp-stack-ret-conv.ll | 6 +- test/CodeGen/X86/fp-stack-ret-store.ll | 2 +- test/CodeGen/X86/fp-stack-ret.ll | 3 +- test/CodeGen/X86/fp-stack-retcopy.ll | 2 +- test/CodeGen/X86/fp-stack-set-st1.ll | 2 +- test/CodeGen/X86/fp2sint.ll | 2 +- test/CodeGen/X86/fp_constant_op.ll | 2 +- test/CodeGen/X86/fp_load_cast_fold.ll | 2 +- test/CodeGen/X86/fp_load_fold.ll | 2 +- test/CodeGen/X86/fsxor-alignment.ll | 2 +- test/CodeGen/X86/full-lsr.ll | 2 +- test/CodeGen/X86/ga-offset.ll | 4 +- test/CodeGen/X86/global-sections-tls.ll | 14 + test/CodeGen/X86/global-sections.ll | 123 + test/CodeGen/X86/h-register-addressing-32.ll | 2 +- test/CodeGen/X86/h-register-addressing-64.ll | 2 +- test/CodeGen/X86/h-register-store.ll | 4 +- test/CodeGen/X86/h-registers-0.ll | 4 +- test/CodeGen/X86/h-registers-1.ll | 2 +- test/CodeGen/X86/h-registers-2.ll | 2 +- test/CodeGen/X86/h-registers-3.ll | 4 +- test/CodeGen/X86/hidden-vis-2.ll | 4 +- test/CodeGen/X86/hidden-vis-3.ll | 12 +- test/CodeGen/X86/hidden-vis-4.ll | 7 +- test/CodeGen/X86/hidden-vis.ll | 20 +- test/CodeGen/X86/i128-and-beyond.ll | 2 +- test/CodeGen/X86/i128-immediate.ll | 2 +- test/CodeGen/X86/i128-mul.ll | 2 +- test/CodeGen/X86/i128-ret.ll | 4 +- test/CodeGen/X86/i256-add.ll | 2 +- test/CodeGen/X86/i2k.ll | 2 +- test/CodeGen/X86/i64-mem-copy.ll | 4 +- test/CodeGen/X86/iabs.ll | 2 +- test/CodeGen/X86/illegal-asm.ll | 4 +- test/CodeGen/X86/illegal-insert.ll | 2 +- test/CodeGen/X86/illegal-vector-args-return.ll | 8 +- test/CodeGen/X86/imp-def-copies.ll | 2 +- test/CodeGen/X86/imul-lea-2.ll | 6 +- test/CodeGen/X86/imul-lea.ll | 2 +- test/CodeGen/X86/inline-asm-2addr.ll | 2 +- test/CodeGen/X86/inline-asm-R-constraint.ll | 18 + test/CodeGen/X86/inline-asm-flag-clobber.ll | 4 +- test/CodeGen/X86/inline-asm-fpstack.ll | 2 +- test/CodeGen/X86/inline-asm-fpstack2.ll | 2 +- test/CodeGen/X86/inline-asm-fpstack3.ll | 2 +- test/CodeGen/X86/inline-asm-fpstack4.ll | 2 +- test/CodeGen/X86/inline-asm-fpstack5.ll | 2 +- test/CodeGen/X86/inline-asm-modifier-n.ll | 2 +- test/CodeGen/X86/inline-asm-mrv.ll | 8 +- test/CodeGen/X86/inline-asm-out-regs.ll | 2 +- test/CodeGen/X86/inline-asm-pic.ll | 4 +- test/CodeGen/X86/inline-asm-q-regs.ll | 10 + test/CodeGen/X86/inline-asm-tied.ll | 2 +- test/CodeGen/X86/inline-asm-x-scalar.ll | 2 +- test/CodeGen/X86/inline-asm.ll | 2 +- test/CodeGen/X86/ins_subreg_coalesce-1.ll | 2 +- test/CodeGen/X86/ins_subreg_coalesce-2.ll | 2 +- test/CodeGen/X86/ins_subreg_coalesce-3.ll | 4 +- test/CodeGen/X86/insertelement-copytoregs.ll | 2 +- test/CodeGen/X86/insertelement-legalize.ll | 2 +- test/CodeGen/X86/invalid-shift-immediate.ll | 2 +- test/CodeGen/X86/isel-sink.ll | 4 +- test/CodeGen/X86/isel-sink2.ll | 2 +- test/CodeGen/X86/isel-sink3.ll | 4 +- test/CodeGen/X86/isint.ll | 2 +- test/CodeGen/X86/isnan.ll | 2 +- test/CodeGen/X86/isnan2.ll | 2 +- test/CodeGen/X86/ispositive.ll | 2 +- test/CodeGen/X86/iv-users-in-other-loops.ll | 6 +- test/CodeGen/X86/jump_sign.ll | 2 +- test/CodeGen/X86/ldzero.ll | 2 +- test/CodeGen/X86/lea-2.ll | 4 +- test/CodeGen/X86/lea-3.ll | 4 +- test/CodeGen/X86/lea-4.ll | 2 +- test/CodeGen/X86/lea-recursion.ll | 2 +- test/CodeGen/X86/lea.ll | 35 +- test/CodeGen/X86/legalizedag_vec.ll | 6 +- test/CodeGen/X86/lfence.ll | 2 +- test/CodeGen/X86/limited-prec.ll | 6 +- test/CodeGen/X86/live-out-reg-info.ll | 2 +- test/CodeGen/X86/local-liveness.ll | 2 +- test/CodeGen/X86/long-setcc.ll | 6 +- test/CodeGen/X86/longlong-deadload.ll | 2 +- test/CodeGen/X86/loop-hoist.ll | 14 +- test/CodeGen/X86/loop-strength-reduce-2.ll | 4 +- test/CodeGen/X86/loop-strength-reduce-3.ll | 2 +- test/CodeGen/X86/loop-strength-reduce.ll | 2 +- test/CodeGen/X86/loop-strength-reduce2.ll | 2 +- test/CodeGen/X86/loop-strength-reduce3.ll | 4 +- test/CodeGen/X86/loop-strength-reduce4.ll | 4 +- test/CodeGen/X86/loop-strength-reduce5.ll | 2 +- test/CodeGen/X86/loop-strength-reduce6.ll | 2 +- test/CodeGen/X86/loop-strength-reduce7.ll | 2 +- test/CodeGen/X86/loop-strength-reduce8.ll | 2 +- test/CodeGen/X86/lsr-loop-exit-cond.ll | 5 +- test/CodeGen/X86/lsr-negative-stride.ll | 2 +- test/CodeGen/X86/lsr-sort.ll | 2 +- test/CodeGen/X86/masked-iv-safe.ll | 2 +- test/CodeGen/X86/masked-iv-unsafe.ll | 2 +- test/CodeGen/X86/maskmovdqu.ll | 4 +- test/CodeGen/X86/memcpy-2.ll | 4 +- test/CodeGen/X86/memcpy.ll | 2 +- test/CodeGen/X86/memmove-0.ll | 2 +- test/CodeGen/X86/memmove-1.ll | 2 +- test/CodeGen/X86/memmove-2.ll | 2 +- test/CodeGen/X86/memmove-3.ll | 2 +- test/CodeGen/X86/memmove-4.ll | 2 +- test/CodeGen/X86/memset-2.ll | 6 +- test/CodeGen/X86/memset.ll | 4 +- test/CodeGen/X86/memset64-on-x86-32.ll | 4 +- test/CodeGen/X86/mfence.ll | 6 +- test/CodeGen/X86/mingw-alloca.ll | 19 +- test/CodeGen/X86/mmx-arg-passing.ll | 10 +- test/CodeGen/X86/mmx-arg-passing2.ll | 4 +- test/CodeGen/X86/mmx-arith.ll | 2 +- test/CodeGen/X86/mmx-bitcast-to-i64.ll | 2 +- test/CodeGen/X86/mmx-copy-gprs.ll | 6 +- test/CodeGen/X86/mmx-emms.ll | 2 +- test/CodeGen/X86/mmx-insert-element.ll | 4 +- test/CodeGen/X86/mmx-pinsrw.ll | 2 +- test/CodeGen/X86/mmx-punpckhdq.ll | 2 +- test/CodeGen/X86/mmx-s2v.ll | 2 +- test/CodeGen/X86/mmx-shift.ll | 8 +- test/CodeGen/X86/mmx-shuffle.ll | 2 +- test/CodeGen/X86/mmx-vzmovl-2.ll | 4 +- test/CodeGen/X86/mmx-vzmovl.ll | 4 +- test/CodeGen/X86/movfs.ll | 2 +- test/CodeGen/X86/movgs.ll | 2 +- test/CodeGen/X86/mul-legalize.ll | 2 +- test/CodeGen/X86/mul-remat.ll | 2 +- test/CodeGen/X86/mul-shift-reassoc.ll | 4 +- test/CodeGen/X86/mul128.ll | 2 +- test/CodeGen/X86/mul64.ll | 2 +- .../X86/multiple-return-values-cross-block.ll | 2 +- test/CodeGen/X86/multiple-return-values.ll | 2 +- test/CodeGen/X86/nancvt.ll | 2 +- test/CodeGen/X86/narrow_op-1.ll | 8 +- test/CodeGen/X86/narrow_op-2.ll | 8 +- test/CodeGen/X86/neg_fp.ll | 4 +- test/CodeGen/X86/negate-add-zero.ll | 2 +- test/CodeGen/X86/negative-sin.ll | 4 +- test/CodeGen/X86/negative-subscript.ll | 2 +- test/CodeGen/X86/negative_zero.ll | 2 +- test/CodeGen/X86/nobt.ll | 2 +- test/CodeGen/X86/nofence.ll | 2 +- test/CodeGen/X86/omit-label.ll | 38 +- test/CodeGen/X86/opt-ext-uses.ll | 2 +- test/CodeGen/X86/optimize-max-0.ll | 2 +- test/CodeGen/X86/optimize-max-1.ll | 2 +- test/CodeGen/X86/optimize-max-2.ll | 2 +- test/CodeGen/X86/or-branch.ll | 2 +- test/CodeGen/X86/overlap-shift.ll | 2 +- test/CodeGen/X86/packed_struct.ll | 6 +- test/CodeGen/X86/peep-test-0.ll | 2 +- test/CodeGen/X86/peep-test-1.ll | 2 +- test/CodeGen/X86/peep-test-2.ll | 2 +- test/CodeGen/X86/peep-test-3.ll | 89 + test/CodeGen/X86/peep-vector-extract-concat.ll | 2 +- test/CodeGen/X86/peep-vector-extract-insert.ll | 2 +- test/CodeGen/X86/personality.ll | 50 + test/CodeGen/X86/phys_subreg_coalesce-2.ll | 2 +- test/CodeGen/X86/phys_subreg_coalesce.ll | 2 +- test/CodeGen/X86/pic-load-remat.ll | 2 +- test/CodeGen/X86/pic.ll | 208 + test/CodeGen/X86/pic_jumptable.ll | 8 +- test/CodeGen/X86/pmul.ll | 2 +- test/CodeGen/X86/postalloc-coalescing.ll | 2 +- test/CodeGen/X86/pr1462.ll | 2 +- test/CodeGen/X86/pr1489.ll | 14 +- test/CodeGen/X86/pr1505.ll | 2 +- test/CodeGen/X86/pr1505b.ll | 4 +- test/CodeGen/X86/pr2177.ll | 2 +- test/CodeGen/X86/pr2182.ll | 2 +- test/CodeGen/X86/pr2326.ll | 2 +- test/CodeGen/X86/pr2623.ll | 2 +- test/CodeGen/X86/pr2656.ll | 2 +- test/CodeGen/X86/pr2659.ll | 2 +- test/CodeGen/X86/pr2849.ll | 2 +- test/CodeGen/X86/pr2924.ll | 2 +- test/CodeGen/X86/pr2982.ll | 2 +- test/CodeGen/X86/pr3154.ll | 4 +- test/CodeGen/X86/pr3216.ll | 2 +- test/CodeGen/X86/pr3241.ll | 2 +- test/CodeGen/X86/pr3243.ll | 2 +- test/CodeGen/X86/pr3244.ll | 2 +- test/CodeGen/X86/pr3250.ll | 2 +- test/CodeGen/X86/pr3317.ll | 2 +- test/CodeGen/X86/pr3366.ll | 2 +- test/CodeGen/X86/pr3457.ll | 2 +- test/CodeGen/X86/pr3495-2.ll | 2 +- test/CodeGen/X86/pr3495.ll | 6 +- test/CodeGen/X86/pr3522.ll | 2 +- test/CodeGen/X86/pre-split1.ll | 2 +- test/CodeGen/X86/pre-split10.ll | 2 +- test/CodeGen/X86/pre-split11.ll | 34 + test/CodeGen/X86/pre-split2.ll | 2 +- test/CodeGen/X86/pre-split3.ll | 2 +- test/CodeGen/X86/pre-split4.ll | 2 +- test/CodeGen/X86/pre-split5.ll | 2 +- test/CodeGen/X86/pre-split6.ll | 2 +- test/CodeGen/X86/pre-split7.ll | 2 +- test/CodeGen/X86/pre-split8.ll | 2 +- test/CodeGen/X86/pre-split9.ll | 2 +- test/CodeGen/X86/prefetch.ll | 2 +- test/CodeGen/X86/private-2.ll | 2 +- test/CodeGen/X86/private.ll | 8 +- test/CodeGen/X86/ptrtoint-constexpr.ll | 8 + test/CodeGen/X86/rdtsc.ll | 4 +- test/CodeGen/X86/red-zone.ll | 26 +- test/CodeGen/X86/red-zone2.ll | 2 +- test/CodeGen/X86/regpressure.ll | 2 +- test/CodeGen/X86/rem-2.ll | 2 +- test/CodeGen/X86/rem.ll | 2 +- test/CodeGen/X86/remat-constant.ll | 2 +- test/CodeGen/X86/remat-mov-1.ll | 2 +- test/CodeGen/X86/remat-scalar-zero.ll | 95 + test/CodeGen/X86/ret-addr.ll | 4 +- test/CodeGen/X86/ret-i64-0.ll | 2 +- test/CodeGen/X86/ret-mmx.ll | 2 +- test/CodeGen/X86/rip-rel-address.ll | 9 +- test/CodeGen/X86/rodata-relocs.ll | 14 +- test/CodeGen/X86/rot16.ll | 2 +- test/CodeGen/X86/rot32.ll | 2 +- test/CodeGen/X86/rot64.ll | 2 +- test/CodeGen/X86/rotate.ll | 2 +- test/CodeGen/X86/rotate2.ll | 2 +- test/CodeGen/X86/scalar-extract.ll | 2 +- test/CodeGen/X86/scalar-min-max-fill-operand.ll | 12 +- test/CodeGen/X86/scalar_sse_minmax.ll | 4 +- test/CodeGen/X86/scalarize-bitcast.ll | 2 +- test/CodeGen/X86/scev-interchange.ll | 52 +- test/CodeGen/X86/select-zero-one.ll | 6 +- test/CodeGen/X86/select.ll | 6 +- test/CodeGen/X86/setoeq.ll | 4 +- test/CodeGen/X86/setuge.ll | 2 +- test/CodeGen/X86/sext-load.ll | 2 +- test/CodeGen/X86/sext-ret-val.ll | 2 +- test/CodeGen/X86/sext-select.ll | 2 +- test/CodeGen/X86/sext-trunc.ll | 2 +- test/CodeGen/X86/sfence.ll | 2 +- test/CodeGen/X86/shift-and.ll | 4 +- test/CodeGen/X86/shift-coalesce.ll | 4 +- test/CodeGen/X86/shift-codegen.ll | 2 +- test/CodeGen/X86/shift-combine.ll | 2 +- test/CodeGen/X86/shift-double.ll | 2 +- test/CodeGen/X86/shift-folding.ll | 2 +- test/CodeGen/X86/shift-i128.ll | 4 +- test/CodeGen/X86/shift-i256.ll | 4 +- test/CodeGen/X86/shift-one.ll | 2 +- test/CodeGen/X86/shift-parts.ll | 22 + test/CodeGen/X86/shl_elim.ll | 6 +- test/CodeGen/X86/shrink-fp-const1.ll | 2 +- test/CodeGen/X86/shrink-fp-const2.ll | 2 +- test/CodeGen/X86/sincos.ll | 30 +- test/CodeGen/X86/sink-hoist.ll | 43 + test/CodeGen/X86/small-byval-memcpy.ll | 2 +- test/CodeGen/X86/smul-with-overflow-2.ll | 4 +- test/CodeGen/X86/smul-with-overflow-3.ll | 2 +- test/CodeGen/X86/smul-with-overflow.ll | 2 +- test/CodeGen/X86/soft-fp.ll | 4 +- test/CodeGen/X86/split-eh-lpad-edges.ll | 2 +- test/CodeGen/X86/split-select.ll | 2 +- test/CodeGen/X86/split-vector-rem.ll | 4 +- test/CodeGen/X86/sret.ll | 2 +- test/CodeGen/X86/sse-align-0.ll | 2 +- test/CodeGen/X86/sse-align-1.ll | 2 +- test/CodeGen/X86/sse-align-10.ll | 2 +- test/CodeGen/X86/sse-align-11.ll | 4 +- test/CodeGen/X86/sse-align-12.ll | 2 +- test/CodeGen/X86/sse-align-2.ll | 2 +- test/CodeGen/X86/sse-align-3.ll | 2 +- test/CodeGen/X86/sse-align-4.ll | 2 +- test/CodeGen/X86/sse-align-5.ll | 2 +- test/CodeGen/X86/sse-align-6.ll | 2 +- test/CodeGen/X86/sse-align-7.ll | 2 +- test/CodeGen/X86/sse-align-8.ll | 2 +- test/CodeGen/X86/sse-align-9.ll | 2 +- test/CodeGen/X86/sse-fcopysign.ll | 2 +- test/CodeGen/X86/sse-load-ret.ll | 6 +- test/CodeGen/X86/sse-minmax.ll | 392 + test/CodeGen/X86/sse-varargs.ll | 2 +- test/CodeGen/X86/sse2.ll | 34 + test/CodeGen/X86/sse3.ll | 273 + test/CodeGen/X86/sse41.ll | 226 + test/CodeGen/X86/sse42.ll | 38 + test/CodeGen/X86/sse_reload_fold.ll | 3 +- test/CodeGen/X86/stack-align.ll | 2 +- test/CodeGen/X86/stack-color-with-reg-2.ll | 2 +- test/CodeGen/X86/stack-color-with-reg.ll | 6 +- test/CodeGen/X86/stdarg.ll | 20 + test/CodeGen/X86/store-empty-member.ll | 14 + test/CodeGen/X86/store-fp-constant.ll | 4 +- test/CodeGen/X86/store-global-address.ll | 2 +- test/CodeGen/X86/store_op_load_fold.ll | 2 +- test/CodeGen/X86/store_op_load_fold2.ll | 2 +- test/CodeGen/X86/storetrunc-fp.ll | 2 +- test/CodeGen/X86/stride-nine-with-base-reg.ll | 4 +- test/CodeGen/X86/stride-reuse.ll | 4 +- test/CodeGen/X86/sub-with-overflow.ll | 4 +- test/CodeGen/X86/subreg-to-reg-0.ll | 2 +- test/CodeGen/X86/subreg-to-reg-1.ll | 2 +- test/CodeGen/X86/subreg-to-reg-2.ll | 2 +- test/CodeGen/X86/subreg-to-reg-3.ll | 2 +- test/CodeGen/X86/subreg-to-reg-4.ll | 2 +- test/CodeGen/X86/subreg-to-reg-5.ll | 2 +- test/CodeGen/X86/subreg-to-reg-6.ll | 2 +- test/CodeGen/X86/switch-zextload.ll | 2 +- test/CodeGen/X86/swizzle.ll | 6 +- test/CodeGen/X86/tailcall-i1.ll | 2 +- test/CodeGen/X86/tailcall-stackalign.ll | 2 +- test/CodeGen/X86/tailcall-structret.ll | 2 +- test/CodeGen/X86/tailcall-void.ll | 2 +- test/CodeGen/X86/tailcall1.ll | 2 +- test/CodeGen/X86/tailcallbyval.ll | 4 +- test/CodeGen/X86/tailcallbyval64.ll | 8 +- test/CodeGen/X86/tailcallfp.ll | 2 +- test/CodeGen/X86/tailcallfp2.ll | 2 +- test/CodeGen/X86/tailcallpic1.ll | 2 +- test/CodeGen/X86/tailcallpic2.ll | 2 +- test/CodeGen/X86/tailcallstack64.ll | 19 +- test/CodeGen/X86/test-nofold.ll | 2 +- test/CodeGen/X86/test-shrink-bug.ll | 23 + test/CodeGen/X86/test-shrink.ll | 158 + test/CodeGen/X86/testl-commute.ll | 2 +- test/CodeGen/X86/tls-pic.ll | 67 + test/CodeGen/X86/tls1.ll | 4 +- test/CodeGen/X86/tls10.ll | 4 +- test/CodeGen/X86/tls11.ll | 4 +- test/CodeGen/X86/tls12.ll | 4 +- test/CodeGen/X86/tls13.ll | 4 +- test/CodeGen/X86/tls14.ll | 4 +- test/CodeGen/X86/tls15.ll | 4 +- test/CodeGen/X86/tls2.ll | 4 +- test/CodeGen/X86/tls3.ll | 4 +- test/CodeGen/X86/tls4.ll | 4 +- test/CodeGen/X86/tls5.ll | 4 +- test/CodeGen/X86/tls6.ll | 4 +- test/CodeGen/X86/tls7.ll | 4 +- test/CodeGen/X86/tls8.ll | 4 +- test/CodeGen/X86/tls9.ll | 4 +- test/CodeGen/X86/trap.ll | 2 +- test/CodeGen/X86/trunc-to-bool.ll | 2 +- test/CodeGen/X86/twoaddr-coalesce-2.ll | 2 +- test/CodeGen/X86/twoaddr-coalesce.ll | 2 +- test/CodeGen/X86/twoaddr-delete.ll | 2 +- test/CodeGen/X86/twoaddr-pass-sink.ll | 2 +- test/CodeGen/X86/twoaddr-remat.ll | 2 +- test/CodeGen/X86/uint_to_fp-2.ll | 2 +- test/CodeGen/X86/uint_to_fp.ll | 4 +- test/CodeGen/X86/umul-with-carry.ll | 2 +- test/CodeGen/X86/umul-with-overflow.ll | 2 +- test/CodeGen/X86/urem-i8-constant.ll | 2 +- test/CodeGen/X86/v4f32-immediate.ll | 2 +- test/CodeGen/X86/variable-sized-darwin-bzero.ll | 2 +- test/CodeGen/X86/variadic-node-pic.ll | 2 +- test/CodeGen/X86/vec_add.ll | 2 +- test/CodeGen/X86/vec_align.ll | 2 +- test/CodeGen/X86/vec_call.ll | 4 +- test/CodeGen/X86/vec_clear.ll | 2 +- test/CodeGen/X86/vec_compare.ll | 43 + test/CodeGen/X86/vec_ctbits.ll | 2 +- test/CodeGen/X86/vec_extract-sse4.ll | 2 +- test/CodeGen/X86/vec_extract.ll | 2 +- test/CodeGen/X86/vec_fneg.ll | 2 +- test/CodeGen/X86/vec_i64.ll | 2 +- test/CodeGen/X86/vec_ins_extract-1.ll | 2 +- test/CodeGen/X86/vec_ins_extract.ll | 2 +- test/CodeGen/X86/vec_insert-2.ll | 10 +- test/CodeGen/X86/vec_insert-3.ll | 2 +- test/CodeGen/X86/vec_insert-5.ll | 2 +- test/CodeGen/X86/vec_insert-6.ll | 4 +- test/CodeGen/X86/vec_insert-7.ll | 2 +- test/CodeGen/X86/vec_insert-8.ll | 2 +- test/CodeGen/X86/vec_insert.ll | 4 +- test/CodeGen/X86/vec_insert_4.ll | 2 +- test/CodeGen/X86/vec_loadsingles.ll | 2 +- test/CodeGen/X86/vec_logical.ll | 2 +- test/CodeGen/X86/vec_return.ll | 2 +- test/CodeGen/X86/vec_select.ll | 2 +- test/CodeGen/X86/vec_set-2.ll | 4 +- test/CodeGen/X86/vec_set-3.ll | 2 +- test/CodeGen/X86/vec_set-4.ll | 2 +- test/CodeGen/X86/vec_set-5.ll | 2 +- test/CodeGen/X86/vec_set-6.ll | 2 +- test/CodeGen/X86/vec_set-7.ll | 2 +- test/CodeGen/X86/vec_set-8.ll | 4 +- test/CodeGen/X86/vec_set-9.ll | 4 +- test/CodeGen/X86/vec_set-A.ll | 2 +- test/CodeGen/X86/vec_set-B.ll | 4 +- test/CodeGen/X86/vec_set-C.ll | 6 +- test/CodeGen/X86/vec_set-D.ll | 2 +- test/CodeGen/X86/vec_set-E.ll | 2 +- test/CodeGen/X86/vec_set-F.ll | 6 +- test/CodeGen/X86/vec_set-G.ll | 2 +- test/CodeGen/X86/vec_set-H.ll | 2 +- test/CodeGen/X86/vec_set-I.ll | 4 +- test/CodeGen/X86/vec_set-J.ll | 2 +- test/CodeGen/X86/vec_set.ll | 2 +- test/CodeGen/X86/vec_shift.ll | 6 +- test/CodeGen/X86/vec_shift2.ll | 2 +- test/CodeGen/X86/vec_shift3.ll | 6 +- test/CodeGen/X86/vec_shuffle-10.ll | 2 +- test/CodeGen/X86/vec_shuffle-11.ll | 4 +- test/CodeGen/X86/vec_shuffle-14.ll | 10 +- test/CodeGen/X86/vec_shuffle-15.ll | 2 +- test/CodeGen/X86/vec_shuffle-16.ll | 4 +- test/CodeGen/X86/vec_shuffle-17.ll | 4 +- test/CodeGen/X86/vec_shuffle-18.ll | 2 +- test/CodeGen/X86/vec_shuffle-19.ll | 2 +- test/CodeGen/X86/vec_shuffle-20.ll | 2 +- test/CodeGen/X86/vec_shuffle-22.ll | 4 +- test/CodeGen/X86/vec_shuffle-23.ll | 4 +- test/CodeGen/X86/vec_shuffle-24.ll | 2 +- test/CodeGen/X86/vec_shuffle-25.ll | 2 +- test/CodeGen/X86/vec_shuffle-26.ll | 2 +- test/CodeGen/X86/vec_shuffle-27.ll | 2 +- test/CodeGen/X86/vec_shuffle-28.ll | 6 +- test/CodeGen/X86/vec_shuffle-3.ll | 2 +- test/CodeGen/X86/vec_shuffle-30.ll | 8 +- test/CodeGen/X86/vec_shuffle-31.ll | 7 +- test/CodeGen/X86/vec_shuffle-34.ll | 8 +- test/CodeGen/X86/vec_shuffle-35.ll | 4 +- test/CodeGen/X86/vec_shuffle-36.ll | 2 +- test/CodeGen/X86/vec_shuffle-4.ll | 2 +- test/CodeGen/X86/vec_shuffle-5.ll | 2 +- test/CodeGen/X86/vec_shuffle-6.ll | 2 +- test/CodeGen/X86/vec_shuffle-7.ll | 2 +- test/CodeGen/X86/vec_shuffle-8.ll | 2 +- test/CodeGen/X86/vec_shuffle-9.ll | 2 +- test/CodeGen/X86/vec_shuffle.ll | 2 +- test/CodeGen/X86/vec_splat-2.ll | 2 +- test/CodeGen/X86/vec_splat-3.ll | 2 +- test/CodeGen/X86/vec_splat-4.ll | 2 +- test/CodeGen/X86/vec_splat.ll | 4 +- test/CodeGen/X86/vec_ss_load_fold.ll | 2 +- test/CodeGen/X86/vec_zero-2.ll | 2 +- test/CodeGen/X86/vec_zero.ll | 2 +- test/CodeGen/X86/vec_zero_cse.ll | 6 +- test/CodeGen/X86/vector-intrinsics.ll | 2 +- test/CodeGen/X86/vector-rem.ll | 4 +- test/CodeGen/X86/vector-variable-idx.ll | 2 +- test/CodeGen/X86/vector.ll | 4 +- test/CodeGen/X86/vfcmp.ll | 8 +- test/CodeGen/X86/volatile.ll | 4 +- test/CodeGen/X86/vortex-bug.ll | 2 +- test/CodeGen/X86/vshift-1.ll | 22 +- test/CodeGen/X86/vshift-2.ll | 24 +- test/CodeGen/X86/vshift-3.ll | 21 +- test/CodeGen/X86/vshift-4.ll | 22 +- test/CodeGen/X86/vshift-5.ll | 56 + test/CodeGen/X86/vshift_scalar.ll | 2 +- test/CodeGen/X86/vshift_split.ll | 4 +- test/CodeGen/X86/vshift_split2.ll | 2 +- test/CodeGen/X86/weak.ll | 2 +- test/CodeGen/X86/wide-integer-fold.ll | 12 + test/CodeGen/X86/widen_arith-1.ll | 2 +- test/CodeGen/X86/widen_arith-2.ll | 2 +- test/CodeGen/X86/widen_arith-3.ll | 2 +- test/CodeGen/X86/widen_arith-4.ll | 2 +- test/CodeGen/X86/widen_arith-5.ll | 2 +- test/CodeGen/X86/widen_arith-6.ll | 2 +- test/CodeGen/X86/widen_cast-1.ll | 2 +- test/CodeGen/X86/widen_cast-2.ll | 2 +- test/CodeGen/X86/widen_cast-3.ll | 2 +- test/CodeGen/X86/widen_cast-4.ll | 2 +- test/CodeGen/X86/widen_cast-5.ll | 2 +- test/CodeGen/X86/widen_cast-6.ll | 2 +- test/CodeGen/X86/widen_conv-1.ll | 2 +- test/CodeGen/X86/widen_conv-2.ll | 2 +- test/CodeGen/X86/widen_conv-3.ll | 2 +- test/CodeGen/X86/widen_conv-4.ll | 2 +- test/CodeGen/X86/widen_load-0.ll | 21 + test/CodeGen/X86/widen_load-1.ll | 45 + test/CodeGen/X86/widen_select-1.ll | 2 +- test/CodeGen/X86/widen_shuffle-1.ll | 2 +- test/CodeGen/X86/widen_shuffle-2.ll | 2 +- test/CodeGen/X86/x86-64-and-mask.ll | 2 +- test/CodeGen/X86/x86-64-arg.ll | 2 +- test/CodeGen/X86/x86-64-asm.ll | 2 +- test/CodeGen/X86/x86-64-dead-stack-adjust.ll | 4 +- test/CodeGen/X86/x86-64-disp.ll | 2 +- test/CodeGen/X86/x86-64-frameaddr.ll | 2 +- test/CodeGen/X86/x86-64-gv-offset.ll | 2 +- test/CodeGen/X86/x86-64-malloc.ll | 2 +- test/CodeGen/X86/x86-64-mem.ll | 5 +- test/CodeGen/X86/x86-64-pic-1.ll | 3 +- test/CodeGen/X86/x86-64-pic-10.ll | 3 +- test/CodeGen/X86/x86-64-pic-11.ll | 5 +- test/CodeGen/X86/x86-64-pic-2.ll | 3 +- test/CodeGen/X86/x86-64-pic-3.ll | 3 +- test/CodeGen/X86/x86-64-pic-4.ll | 3 +- test/CodeGen/X86/x86-64-pic-5.ll | 3 +- test/CodeGen/X86/x86-64-pic-6.ll | 3 +- test/CodeGen/X86/x86-64-pic-7.ll | 3 +- test/CodeGen/X86/x86-64-pic-8.ll | 3 +- test/CodeGen/X86/x86-64-pic-9.ll | 3 +- test/CodeGen/X86/x86-64-ret0.ll | 2 +- test/CodeGen/X86/x86-64-shortint.ll | 2 +- test/CodeGen/X86/x86-64-sret-return.ll | 11 +- test/CodeGen/X86/x86-64-varargs.ll | 2 +- test/CodeGen/X86/x86-frameaddr.ll | 2 +- test/CodeGen/X86/x86-frameaddr2.ll | 2 +- test/CodeGen/X86/x86-store-gv-addr.ll | 4 +- test/CodeGen/X86/xmm-r64.ll | 2 +- test/CodeGen/X86/xor.ll | 133 + test/CodeGen/X86/zero-remat.ll | 32 +- test/CodeGen/X86/zext-inreg-0.ll | 4 +- test/CodeGen/X86/zext-inreg-1.ll | 2 +- test/CodeGen/XCore/2008-11-17-Shl64.ll | 2 +- test/CodeGen/XCore/2009-01-08-Crash.ll | 2 +- test/CodeGen/XCore/2009-01-14-Remat-Crash.ll | 2 +- test/CodeGen/XCore/2009-03-27-v2f64-param.ll | 2 +- test/CodeGen/XCore/2009-07-15-store192.ll | 7 + test/CodeGen/XCore/addsub64.ll | 2 +- test/CodeGen/XCore/ashr.ll | 76 + test/CodeGen/XCore/basictest.ll | 2 +- test/CodeGen/XCore/bitrev.ll | 2 +- test/CodeGen/XCore/constants.ll | 11 + test/CodeGen/XCore/cos.ll | 2 +- test/CodeGen/XCore/exp.ll | 2 +- test/CodeGen/XCore/exp2.ll | 2 +- test/CodeGen/XCore/fneg.ll | 3 +- test/CodeGen/XCore/getid.ll | 2 +- test/CodeGen/XCore/globals.ll | 92 + test/CodeGen/XCore/load.ll | 39 + test/CodeGen/XCore/log.ll | 2 +- test/CodeGen/XCore/log10.ll | 2 +- test/CodeGen/XCore/log2.ll | 2 +- test/CodeGen/XCore/pow.ll | 2 +- test/CodeGen/XCore/powi.ll | 2 +- test/CodeGen/XCore/private.ll | 2 +- test/CodeGen/XCore/sext.ll | 32 + test/CodeGen/XCore/sin.ll | 2 +- test/CodeGen/XCore/sqrt.ll | 2 +- test/CodeGen/XCore/store.ll | 35 + test/CodeGen/XCore/tls.ll | 20 + test/CodeGen/XCore/trap.ll | 2 +- test/CodeGen/XCore/unaligned_load.ll | 31 + test/CodeGen/XCore/unaligned_store.ll | 18 + test/CodeGen/XCore/unaligned_store_combine.ll | 12 + test/DebugInfo/2008-10-17-C++DebugCrash.ll | 2 +- test/DebugInfo/2008-11-05-InlinedFuncStart.ll | 4 +- test/DebugInfo/2009-01-15-RecordVariableCrash.ll | 2 +- test/DebugInfo/2009-01-15-dbg_declare.ll | 3 +- test/DebugInfo/2009-01-15-member.ll | 4 +- test/DebugInfo/2009-02-18-DefaultScope-Crash.ll | 2 +- test/DebugInfo/2009-02-27-licm.ll | 2 +- test/DebugInfo/2009-03-03-cheapdse.ll | 2 +- test/DebugInfo/2009-03-03-deadstore.ll | 2 +- test/DebugInfo/2009-03-03-store-to-load-forward.ll | 2 +- test/DebugInfo/2009-03-05-gvn.ll | 2 +- test/DebugInfo/2009-06-12-Inline.ll | 2 +- test/DebugInfo/2009-06-12-InlineFuncStart.ll | 4 +- test/DebugInfo/2009-06-15-InlineFuncStart.ll | 4 +- test/DebugInfo/2009-06-15-abstract_origin.ll | 2 +- .../2009-10-08-DebugInfo-NullGlobalVariable.ll | 72 + test/DebugInfo/deaddebuglabel.ll | 2 +- test/DebugInfo/funccall.ll | 2 +- test/DebugInfo/globalGetElementPtr.ll | 2 +- test/DebugInfo/printdbginfo2.ll | 129 +- test/ExecutionEngine/2002-12-16-ArgTest.ll | 2 +- test/ExecutionEngine/2003-01-04-ArgumentBug.ll | 2 +- test/ExecutionEngine/2003-01-04-LoopTest.ll | 2 +- test/ExecutionEngine/2003-01-04-PhiTest.ll | 2 +- test/ExecutionEngine/2003-01-09-SARTest.ll | 2 +- test/ExecutionEngine/2003-01-10-FUCOM.ll | 2 +- test/ExecutionEngine/2003-01-15-AlignmentTest.ll | 2 +- test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll | 2 +- test/ExecutionEngine/2003-06-04-bzip2-bug.ll | 2 +- test/ExecutionEngine/2003-06-05-PHIBug.ll | 2 +- test/ExecutionEngine/2003-08-15-AllocaAssertion.ll | 2 +- test/ExecutionEngine/2003-08-21-EnvironmentTest.ll | 2 +- .../2003-08-23-RegisterAllocatePhysReg.ll | 2 +- ...-10-18-PHINode-ConstantExpr-CondCode-Failure.ll | 2 +- test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll | 2 +- test/ExecutionEngine/hello.ll | 2 +- test/ExecutionEngine/hello2.ll | 2 +- test/ExecutionEngine/simplesttest.ll | 2 +- test/ExecutionEngine/simpletest.ll | 2 +- test/ExecutionEngine/stubs.ll | 35 + test/ExecutionEngine/test-arith.ll | 2 +- test/ExecutionEngine/test-branch.ll | 2 +- test/ExecutionEngine/test-call.ll | 2 +- test/ExecutionEngine/test-cast.ll | 2 +- test/ExecutionEngine/test-constantexpr.ll | 2 +- test/ExecutionEngine/test-fp.ll | 2 +- test/ExecutionEngine/test-loadstore.ll | 2 +- test/ExecutionEngine/test-logical.ll | 2 +- test/ExecutionEngine/test-loop.ll | 2 +- test/ExecutionEngine/test-malloc.ll | 2 +- test/ExecutionEngine/test-phi.ll | 2 +- test/ExecutionEngine/test-ret.ll | 2 +- test/ExecutionEngine/test-setcond-fp.ll | 2 +- test/ExecutionEngine/test-setcond-int.ll | 2 +- test/ExecutionEngine/test-shift.ll | 2 +- test/Feature/NamedMDNode.ll | 6 + test/Feature/NamedMDNode2.ll | 7 + test/Feature/float.ll | 6 +- test/Feature/globalredefinition3.ll | 2 +- test/Feature/inlineasm.ll | 6 +- test/Feature/load_module.ll | 4 +- test/Feature/md_on_instruction.ll | 23 + test/Feature/md_on_instruction2.ll | 22 + test/Feature/memorymarkers.ll | 36 + test/Feature/weak_constant.ll | 2 +- test/FrontendC++/2003-08-20-ExceptionFail.cpp | 2 +- test/FrontendC++/2003-08-21-EmptyClass.cpp | 2 +- test/FrontendC++/2003-08-27-TypeNamespaces.cpp | 2 +- test/FrontendC++/2003-08-28-ForwardType.cpp | 2 +- test/FrontendC++/2003-08-28-SaveExprBug.cpp | 2 +- test/FrontendC++/2003-08-31-StructLayout.cpp | 2 +- test/FrontendC++/2003-09-22-CompositeExprValue.cpp | 2 +- .../2003-09-29-ArgumentNumberMismatch.cpp | 2 +- test/FrontendC++/2003-09-30-CommaExprBug.cpp | 2 +- .../FrontendC++/2003-09-30-ForIncrementExprBug.cpp | 2 +- .../2003-09-30-ForIncrementExprBug2.cpp | 2 +- test/FrontendC++/2003-09-30-NestedFunctionDecl.cpp | 2 +- test/FrontendC++/2003-10-17-BoolBitfields.cpp | 2 +- .../2003-10-27-VirtualBaseClassCrash.cpp | 2 +- test/FrontendC++/2003-11-04-ArrayConstructors.cpp | 2 +- test/FrontendC++/2003-11-04-CatchLabelName.cpp | 2 +- test/FrontendC++/2003-11-18-EnumArray.cpp | 2 +- .../2003-11-18-PtrMemConstantInitializer.cpp | 2 +- .../2003-11-25-ReturningOpaqueByValue.cpp | 2 +- .../2003-11-27-MultipleInheritanceThunk.cpp | 2 +- .../2003-11-29-DuplicatedCleanupTest.cpp | 2 +- .../2003-12-08-ArrayOfPtrToMemberFunc.cpp | 2 +- .../FrontendC++/2004-03-08-ReinterpretCastCopy.cpp | 2 +- test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp | 2 +- .../2004-06-08-LateTemplateInstantiation.cpp | 2 +- test/FrontendC++/2004-09-27-CompilerCrash.cpp | 2 +- test/FrontendC++/2006-11-06-StackTrace.cpp | 7 +- test/FrontendC++/2006-11-30-NoCompileUnit.cpp | 4 +- test/FrontendC++/2006-11-30-Pubnames.cpp | 4 +- test/FrontendC++/2007-04-05-PackedBitFields-1.cpp | 2 +- .../2007-04-05-PackedBitFieldsOverlap-2.cpp | 2 +- .../2007-04-05-PackedBitFieldsOverlap.cpp | 2 +- .../2007-04-05-PackedBitFieldsSmall.cpp | 2 +- .../2007-04-05-StructPackedFieldUnpacked.cpp | 2 +- test/FrontendC++/2009-04-21-DtorNames-dbg.cpp | 2 +- test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp | 4 +- test/FrontendC++/2009-06-30-ByrefBlock.cpp | 5 +- test/FrontendC++/2009-07-15-LineNumbers.cpp | 28 + .../2009-07-16-PrivateCopyConstructor.cpp | 15 + test/FrontendC++/2009-07-16-Using.cpp | 8 + test/FrontendC++/2009-08-03-Varargs.cpp | 5 + test/FrontendC++/2009-08-05-ZeroInitWidth.cpp | 12 + test/FrontendC++/2009-08-11-VectorRetTy.cpp | 13 + test/FrontendC++/2009-09-04-modify-crash.cpp | 7 + test/FrontendC++/2009-09-09-packed-layout.cpp | 18 + test/FrontendC++/member-alignment.cpp | 20 + test/FrontendC++/msasm.cpp | 23 + test/FrontendC/2002-01-23-LoadQISIReloadFailure.c | 2 +- test/FrontendC/2002-01-24-ComplexSpaceInType.c | 2 +- test/FrontendC/2002-01-24-HandleCallInsnSEGV.c | 2 +- test/FrontendC/2002-02-13-ConditionalInCall.c | 2 +- test/FrontendC/2002-02-13-ReloadProblem.c | 2 +- test/FrontendC/2002-02-13-TypeVarNameCollision.c | 2 +- test/FrontendC/2002-02-13-UnnamedLocal.c | 2 +- test/FrontendC/2002-02-14-EntryNodePreds.c | 2 +- test/FrontendC/2002-02-16-RenamingTest.c | 2 +- test/FrontendC/2002-02-17-ArgumentAddress.c | 2 +- test/FrontendC/2002-02-18-64bitConstant.c | 2 +- test/FrontendC/2002-02-18-StaticData.c | 2 +- test/FrontendC/2002-03-11-LargeCharInString.c | 2 +- test/FrontendC/2002-03-12-ArrayInitialization.c | 2 +- test/FrontendC/2002-03-12-StructInitialize.c | 2 +- test/FrontendC/2002-03-12-StructInitializer.c | 2 +- test/FrontendC/2002-03-14-BrokenPHINode.c | 2 +- test/FrontendC/2002-03-14-BrokenSSA.c | 2 +- test/FrontendC/2002-03-14-QuotesInStrConst.c | 2 +- test/FrontendC/2002-04-07-SwitchStmt.c | 2 +- test/FrontendC/2002-04-08-LocalArray.c | 2 +- test/FrontendC/2002-04-09-StructRetVal.c | 2 +- test/FrontendC/2002-04-10-StructParameters.c | 2 +- test/FrontendC/2002-05-23-StaticValues.c | 2 +- test/FrontendC/2002-05-23-TypeNameCollision.c | 2 +- test/FrontendC/2002-05-24-Alloca.c | 2 +- test/FrontendC/2002-06-25-FWriteInterfaceFailure.c | 2 +- test/FrontendC/2002-07-14-MiscListTests.c | 2 +- test/FrontendC/2002-07-14-MiscTests.c | 2 +- test/FrontendC/2002-07-14-MiscTests2.c | 2 +- test/FrontendC/2002-07-14-MiscTests3.c | 2 +- test/FrontendC/2002-07-16-HardStringInit.c | 2 +- test/FrontendC/2002-07-17-StringConstant.c | 2 +- test/FrontendC/2002-07-29-Casts.c | 2 +- test/FrontendC/2002-07-30-SubregSetAssertion.c | 2 +- test/FrontendC/2002-07-30-UnionTest.c | 2 +- test/FrontendC/2002-07-30-VarArgsCallFailure.c | 2 +- test/FrontendC/2002-07-31-BadAssert.c | 2 +- test/FrontendC/2002-07-31-SubregFailure.c | 2 +- test/FrontendC/2002-08-02-UnionTest.c | 2 +- test/FrontendC/2002-08-19-RecursiveLocals.c | 2 +- test/FrontendC/2002-09-08-PointerShifts.c | 2 +- test/FrontendC/2002-09-18-UnionProblem.c | 2 +- test/FrontendC/2002-09-19-StarInLabel.c | 2 +- test/FrontendC/2002-10-12-TooManyArguments.c | 2 +- test/FrontendC/2002-12-15-GlobalBoolTest.c | 2 +- test/FrontendC/2002-12-15-GlobalConstantTest.c | 2 +- test/FrontendC/2002-12-15-GlobalRedefinition.c | 2 +- test/FrontendC/2002-12-15-StructParameters.c | 2 +- test/FrontendC/2003-03-03-DeferredType.c | 2 +- test/FrontendC/2003-06-22-UnionCrash.c | 2 +- .../2003-06-23-GCC-fold-infinite-recursion.c | 2 +- test/FrontendC/2003-06-26-CFECrash.c | 2 +- .../2003-06-29-MultipleFunctionDefinition.c | 2 +- test/FrontendC/2003-08-18-SigSetJmp.c | 2 +- test/FrontendC/2003-08-18-StructAsValue.c | 2 +- test/FrontendC/2003-08-20-BadBitfieldRef.c | 2 +- test/FrontendC/2003-08-20-PrototypeMismatch.c | 2 +- test/FrontendC/2003-08-20-vfork-bug.c | 2 +- test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c | 2 +- test/FrontendC/2003-08-21-StmtExpr.c | 2 +- test/FrontendC/2003-08-21-WideString.c | 2 +- test/FrontendC/2003-08-23-LocalUnionTest.c | 2 +- test/FrontendC/2003-08-29-BitFieldStruct.c | 2 +- test/FrontendC/2003-08-29-HugeCharConst.c | 2 +- test/FrontendC/2003-08-29-StructLayoutBug.c | 2 +- .../2003-08-30-LargeIntegerBitfieldMember.c | 2 +- test/FrontendC/2003-09-18-BitfieldTests.c | 2 +- test/FrontendC/2003-09-30-StructLayout.c | 2 +- test/FrontendC/2003-10-02-UnionLValueError.c | 2 +- test/FrontendC/2003-10-06-NegateExprType.c | 2 +- test/FrontendC/2003-10-09-UnionInitializerBug.c | 2 +- test/FrontendC/2003-10-28-ident.c | 2 +- test/FrontendC/2003-10-29-AsmRename.c | 2 +- test/FrontendC/2003-11-01-C99-CompoundLiteral.c | 2 +- test/FrontendC/2003-11-01-EmptyStructCrash.c | 2 +- test/FrontendC/2003-11-01-GlobalUnionInit.c | 2 +- test/FrontendC/2003-11-04-EmptyStruct.c | 2 +- test/FrontendC/2003-11-04-OutOfMemory.c | 2 +- test/FrontendC/2003-11-12-VoidString.c | 2 +- test/FrontendC/2003-11-16-StaticArrayInit.c | 2 +- test/FrontendC/2003-11-18-CondExprLValue.c | 2 +- test/FrontendC/2003-11-19-BitFieldArray.c | 2 +- test/FrontendC/2003-11-20-Bitfields.c | 2 +- test/FrontendC/2003-11-20-ComplexDivision.c | 2 +- test/FrontendC/2003-11-20-UnionBitfield.c | 2 +- test/FrontendC/2003-11-26-PointerShift.c | 2 +- test/FrontendC/2003-11-27-ConstructorCast.c | 2 +- .../FrontendC/2003-11-27-UnionCtorInitialization.c | 2 +- test/FrontendC/2004-01-08-ExternInlineRedefine.c | 2 +- test/FrontendC/2004-03-07-ComplexDivEquals.c | 2 +- test/FrontendC/2004-03-09-LargeArrayInitializers.c | 2 +- test/FrontendC/2004-03-15-SimpleIndirectGoto.c | 2 +- test/FrontendC/2004-03-16-AsmRegisterCrash.c | 6 +- test/FrontendC/2004-05-07-VarArrays.c | 2 +- test/FrontendC/2004-05-21-IncompleteEnum.c | 2 +- test/FrontendC/2004-06-08-OpaqueStructArg.c | 2 +- test/FrontendC/2004-06-17-UnorderedBuiltins.c | 2 +- .../2004-06-18-VariableLengthArrayOfStructures.c | 2 +- test/FrontendC/2004-07-06-FunctionCast.c | 2 +- test/FrontendC/2004-08-06-LargeStructTest.c | 2 +- test/FrontendC/2005-02-27-MarkGlobalConstant.c | 2 +- test/FrontendC/2005-07-20-SqrtNoErrno.c | 8 +- test/FrontendC/2005-09-20-ComplexConstants.c | 2 +- test/FrontendC/2005-12-04-DeclarationLineNumbers.c | 2 +- test/FrontendC/2006-01-13-Includes.c | 3 +- test/FrontendC/2007-02-16-WritableStrings.c | 5 +- test/FrontendC/2007-03-27-VarLengthArray.c | 2 +- test/FrontendC/2008-03-05-syncPtr.c | 2 +- test/FrontendC/2008-03-24-BitField-And-Alloca.c | 2 +- test/FrontendC/2008-07-29-EHLabel.ll | 2 +- test/FrontendC/2009-02-17-BitField-dbg.c | 2 +- test/FrontendC/2009-03-09-WeakDeclarations-1.c | 2 +- test/FrontendC/2009-07-14-VoidPtr.c | 6 + test/FrontendC/2009-07-15-pad-wchar_t-array.c | 7 + test/FrontendC/2009-07-17-VoidParameter.c | 4 + test/FrontendC/2009-07-22-StructLayout.c | 34 + .../2009-08-11-AsmBlocksComplexJumpTarget.c | 10 + test/FrontendC/2009-09-24-SqrtErrno.c | 12 + test/FrontendC/Atomics-no64bit.c | 2 +- test/FrontendC/Atomics.c | 2 +- test/FrontendC/func-aligned.c | 7 + test/FrontendC/msasm.c | 23 + test/FrontendC/ptr-rotate.c | 7 + test/FrontendC/redef-ext-inline.c | 6 + test/FrontendC/wchar-const.c | 9 + test/FrontendObjC/2007-10-17-SJLJExceptions.m | 2 +- test/FrontendObjC/2009-04-14-AsmSection.m | 2 +- test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m | 2 +- test/FrontendObjC/2009-08-05-utf16.m | 5 + test/FrontendObjC/2009-08-17-DebugInfo.m | 28 + test/Integer/a15.ll.out | 39 +- test/Integer/a17.ll.out | 37 +- test/Integer/a31.ll.out | 37 +- test/Integer/a33.ll.out | 37 +- test/Integer/a63.ll.out | 37 +- test/Integer/a7.ll.out | 47 +- test/Integer/a9.ll.out | 35 +- test/Integer/varargs_bt.ll | 2 +- test/LLVMC/C++/dash-x.cpp | 9 + test/LLVMC/C++/dg.exp | 5 + test/LLVMC/C++/hello.cpp | 8 + test/LLVMC/C++/together.cpp | 9 + test/LLVMC/C/dg.exp | 5 + test/LLVMC/C/emit-llvm.c | 4 + test/LLVMC/C/hello.c | 12 + test/LLVMC/C/include.c | 9 + test/LLVMC/C/opt-test.c | 12 + test/LLVMC/C/sink.c | 12 + test/LLVMC/C/wall.c | 12 + test/LLVMC/EmptyCompilationGraph.td | 2 +- test/LLVMC/EnvParentheses.td | 2 +- test/LLVMC/ExternOptions.td | 4 +- test/LLVMC/ForwardAs.td | 4 +- test/LLVMC/HookWithArguments.td | 2 +- test/LLVMC/MultiValuedOption.td | 4 +- test/LLVMC/MultipleCompilationGraphs.td | 2 +- test/LLVMC/NoActions.td | 4 +- test/LLVMC/NoCompilationGraph.td | 2 +- test/LLVMC/ObjC++/dg.exp | 5 + test/LLVMC/ObjC++/hello.mm | 8 + test/LLVMC/ObjC/dg.exp | 5 + test/LLVMC/ObjC/hello.m | 12 + test/LLVMC/OneOrMore.td | 4 +- test/LLVMC/TestWarnings.td | 2 +- test/LLVMC/dg.exp | 3 + test/LLVMC/test_data/false.c | 10 + test/Linker/2003-01-30-LinkerRename.ll | 4 +- test/Linker/2003-01-30-LinkerTypeRename.ll | 2 +- test/Linker/2003-04-21-Linkage.ll | 2 +- test/Linker/2003-04-23-LinkOnceLost.ll | 6 +- test/Linker/2003-04-26-NullPtrLinkProblem.ll | 2 +- test/Linker/2003-05-15-TypeProblem.ll | 6 +- test/Linker/2003-05-31-LinkerRename.ll | 2 +- test/Linker/2003-08-20-OpaqueTypeResolve.ll | 8 +- test/Linker/2003-08-23-GlobalVarLinking.ll | 2 +- .../2003-08-23-RecursiveOpaqueTypeResolve.ll | 4 +- test/Linker/2003-08-28-TypeResolvesGlobal.ll | 6 +- test/Linker/2003-08-28-TypeResolvesGlobal2.ll | 8 +- test/Linker/2003-08-28-TypeResolvesGlobal3.ll | 8 +- .../Linker/2003-10-21-ConflictingTypesTolerance.ll | 6 +- test/Linker/2004-02-17-WeakStrongLinkage.ll | 2 +- test/Linker/2004-05-07-TypeResolution1.ll | 6 +- test/Linker/2004-12-03-DisagreeingType.ll | 2 +- test/Linker/2005-02-12-ConstantGlobals-2.ll | 2 +- test/Linker/2005-02-12-ConstantGlobals.ll | 2 +- .../Linker/2005-12-06-AppendingZeroLengthArrays.ll | 2 +- test/Linker/2006-01-19-ConstantPacked.ll | 4 +- test/Linker/2006-06-15-GlobalVarAlignment.ll | 2 +- test/Linker/2008-03-05-AliasReference.ll | 6 +- test/Linker/2008-06-13-LinkOnceRedefinition.ll | 10 +- test/Linker/2008-06-26-AddressSpace.ll | 8 +- test/Linker/2008-07-06-AliasFnDecl.ll | 6 +- test/Linker/2008-07-06-AliasWeakDest.ll | 8 +- test/Linker/2009-09-03-mdnode.ll | 30 + test/Linker/2009-09-03-mdnode2.ll | 25 + test/Linker/AppendingLinkage.ll | 2 +- test/Linker/AppendingLinkage2.ll | 2 +- test/Linker/ConstantGlobals1.ll | 2 +- test/Linker/ConstantGlobals2.ll | 2 +- test/Linker/ConstantGlobals3.ll | 2 +- test/Linker/LinkOnce.ll | 2 +- test/Linker/basiclink.ll | 8 +- test/Linker/link-archive.ll | 4 +- test/Linker/link-global-to-func.ll | 8 +- test/Linker/link-messages.ll | 4 +- test/Linker/linkmdnode.ll | 12 + test/Linker/linkmdnode2.ll | 12 + test/Linker/linknamedmdnode.ll | 6 + test/Linker/linknamedmdnode2.ll | 6 + test/Linker/partial-type-refinement-link.ll | 20 + test/Linker/partial-type-refinement.ll | 24 + test/Linker/redefinition.ll | 6 +- test/Linker/weakextern.ll | 2 +- test/MC/AsmParser/ARM/arm_word_directive.s | 6 + test/MC/AsmParser/ARM/dg.exp | 5 + test/MC/AsmParser/X86/dg.exp | 5 + test/MC/AsmParser/X86/x86_instructions.s | 58 + test/MC/AsmParser/X86/x86_operands.s | 58 + test/MC/AsmParser/X86/x86_word_directive.s | 6 + test/MC/AsmParser/assignment.s | 8 +- test/MC/AsmParser/conditional_asm.s | 12 + test/MC/AsmParser/dg.exp | 1 + test/MC/AsmParser/directive_abort.s | 6 + test/MC/AsmParser/directive_align.s | 14 +- test/MC/AsmParser/directive_ascii.s | 50 +- test/MC/AsmParser/directive_comm.s | 8 + test/MC/AsmParser/directive_darwin_section.s | 4 + test/MC/AsmParser/directive_desc.s | 8 + test/MC/AsmParser/directive_file.s | 5 + test/MC/AsmParser/directive_fill.s | 16 +- test/MC/AsmParser/directive_include.s | 9 + test/MC/AsmParser/directive_lcomm.s | 10 + test/MC/AsmParser/directive_line.s | 5 + test/MC/AsmParser/directive_loc.s | 8 + test/MC/AsmParser/directive_lsym.s | 13 + test/MC/AsmParser/directive_org.s | 10 +- test/MC/AsmParser/directive_set.s | 8 +- test/MC/AsmParser/directive_space.s | 11 +- .../AsmParser/directive_subsections_via_symbols.s | 6 + test/MC/AsmParser/directive_symbol_attrs.s | 8 +- test/MC/AsmParser/directive_values.s | 18 +- test/MC/AsmParser/directive_zerofill.s | 10 + test/MC/AsmParser/exprs-invalid.s | 13 + test/MC/AsmParser/exprs.s | 6 +- test/MC/AsmParser/hello.s | 28 + test/MC/AsmParser/labels.s | 59 + test/MC/MachO/comm-1.s | 114 + test/MC/MachO/data.s | 67 + test/MC/MachO/dg.exp | 6 + test/MC/MachO/lcomm-attributes.s | 136 + test/MC/MachO/reloc.s | 227 + test/MC/MachO/section-align-1.s | 87 + test/MC/MachO/section-align-2.s | 137 + test/MC/MachO/sections.s | 540 ++ test/MC/MachO/symbol-flags.s | 254 + test/MC/MachO/symbol-indirect.s | 268 + test/MC/MachO/symbols-1.s | 161 + test/MC/MachO/values.s | 135 + test/MC/MachO/zerofill-1.s | 121 + test/MC/MachO/zerofill-2.s | 103 + test/MC/MachO/zerofill-3.s | 141 + test/Makefile | 94 +- test/Makefile.tests | 2 +- test/Other/2002-01-31-CallGraph.ll | 2 +- test/Other/2002-02-24-InlineBrokePHINodes.ll | 2 +- test/Other/2002-03-11-ConstPropCrash.ll | 2 +- test/Other/2003-02-19-LoopInfoNestingBug.ll | 2 +- test/Other/2006-02-05-PassManager.ll | 2 +- .../2007-04-24-eliminate-mostly-empty-blocks.ll | 2 +- test/Other/2007-06-05-PassID.ll | 2 +- test/Other/2007-06-28-PassManager.ll | 8 +- test/Other/2007-09-10-PassManager.ll | 2 +- test/Other/2008-02-14-PassManager.ll | 4 +- test/Other/2008-03-19-PassManager.ll | 2 +- test/Other/2008-06-04-FieldSizeInPacked.ll | 2 +- test/Other/2008-08-14-PassManager.ll | 4 +- test/Other/2008-10-06-RemoveDeadPass.ll | 2 +- test/Other/2009-03-31-CallGraph.ll | 2 +- test/Other/2009-06-05-no-implicit-float.ll | 2 +- test/Other/2009-09-14-function-elements.ll | 6 + test/Scripts/macho-dump | 256 + test/Transforms/ADCE/2002-01-31-UseStuckAround.ll | 2 +- test/Transforms/ADCE/2002-05-22-PHITest.ll | 2 +- test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll | 2 +- test/Transforms/ADCE/2002-05-28-Crash-distilled.ll | 2 +- test/Transforms/ADCE/2002-05-28-Crash.ll | 2 +- .../Transforms/ADCE/2002-07-17-AssertionFailure.ll | 2 +- test/Transforms/ADCE/2002-07-17-PHIAssertion.ll | 2 +- test/Transforms/ADCE/2002-07-29-Segfault.ll | 2 +- .../ADCE/2003-01-22-PredecessorProblem.ll | 2 +- .../ADCE/2003-04-25-PHIPostDominateProblem.ll | 2 +- test/Transforms/ADCE/2003-06-11-InvalidCFG.ll | 2 +- test/Transforms/ADCE/2003-06-24-BadSuccessor.ll | 2 +- .../ADCE/2003-06-24-BasicFunctionality.ll | 2 +- test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll | 2 +- test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll | 2 +- .../ADCE/2003-11-16-MissingPostDominanceInfo.ll | 2 +- .../Transforms/ADCE/2004-05-04-UnreachableBlock.ll | 2 +- .../Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll | 2 +- test/Transforms/ADCE/basictest.ll | 2 +- test/Transforms/ADCE/basictest1.ll | 2 +- test/Transforms/ADCE/basictest2.ll | 2 +- test/Transforms/ADCE/dce_pure_call.ll | 2 +- test/Transforms/ADCE/dce_pure_invoke.ll | 2 +- test/Transforms/ADCE/unreachable-function.ll | 2 +- .../ArgumentPromotion/2008-02-01-ReturnAttrs.ll | 2 +- .../ArgumentPromotion/2008-07-02-array-indexing.ll | 2 +- .../ArgumentPromotion/2008-09-07-CGUpdate.ll | 2 +- .../2008-09-08-CGUpdateSelfEdge.ll | 2 +- .../ArgumentPromotion/aggregate-promote.ll | 2 +- test/Transforms/ArgumentPromotion/attrs.ll | 2 +- test/Transforms/ArgumentPromotion/basictest.ll | 2 +- test/Transforms/ArgumentPromotion/byval-2.ll | 2 +- test/Transforms/ArgumentPromotion/byval.ll | 2 +- .../ArgumentPromotion/callgraph-update.ll | 23 + test/Transforms/ArgumentPromotion/chained.ll | 2 +- test/Transforms/ArgumentPromotion/control-flow.ll | 2 +- test/Transforms/ArgumentPromotion/control-flow2.ll | 2 +- test/Transforms/ArgumentPromotion/pr3085.ll | 2 +- test/Transforms/BlockPlacement/basictest.ll | 2 +- .../2007-10-19-InlineAsmDirectives.ll | 2 +- .../CodeExtractor/2004-03-13-LoopExtractorCrash.ll | 2 +- .../CodeExtractor/2004-03-14-DominanceProblem.ll | 2 +- .../CodeExtractor/2004-03-14-NoSwitchSupport.ll | 2 +- .../CodeExtractor/2004-03-17-MissedLiveIns.ll | 2 +- .../CodeExtractor/2004-03-17-OutputMismatch.ll | 2 +- .../2004-03-17-UpdatePHIsOutsideRegion.ll | 2 +- .../CodeExtractor/2004-03-18-InvokeHandling.ll | 2 +- .../CodeExtractor/2004-08-12-BlockExtractPHI.ll | 2 +- .../CodeExtractor/2004-11-12-InvokeExtract.ll | 2 +- .../CodeGenPrepare/2008-11-24-RAUW-Self.ll | 2 +- .../CondProp/2006-08-14-SingleEntryPhiCrash.ll | 2 +- .../Transforms/CondProp/2006-11-01-PhiNodeCrash.ll | 2 +- test/Transforms/CondProp/2007-08-01-InvalidRead.ll | 2 +- .../CondProp/2009-01-25-SingleEntryPHI.ll | 2 +- test/Transforms/CondProp/basictest-dbg.ll | 2 +- test/Transforms/CondProp/basictest.ll | 4 +- test/Transforms/CondProp/phisimplify.ll | 2 +- test/Transforms/CondProp/phisimplify2.ll | 2 +- test/Transforms/CondProp/phisimplify3.ll | 2 +- .../ConstProp/2002-05-03-DivideByZeroException.ll | 2 +- .../Transforms/ConstProp/2002-05-03-NotOperator.ll | 2 +- .../Transforms/ConstProp/2002-09-03-SetCC-Bools.ll | 2 +- .../Transforms/ConstProp/2003-05-12-DivideError.ll | 2 +- test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll | 2 +- .../Transforms/ConstProp/2006-11-30-vector-cast.ll | 4 +- .../ConstProp/2006-12-01-TruncBoolBug.ll | 2 +- test/Transforms/ConstProp/2006-12-01-bool-casts.ll | 4 +- test/Transforms/ConstProp/2007-02-05-BitCast.ll | 2 +- test/Transforms/ConstProp/2007-11-23-cttz.ll | 2 +- .../ConstProp/2008-07-07-VectorCompare.ll | 26 +- test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll | 24 + .../ConstProp/2009-09-19-ConstFold-i1-ConstExpr.ll | 41 + test/Transforms/ConstProp/basictest.ll | 19 +- test/Transforms/ConstProp/bitcast2.ll | 2 +- test/Transforms/ConstProp/bswap.ll | 2 +- test/Transforms/ConstProp/calls.ll | 2 +- test/Transforms/ConstProp/div-zero.ll | 2 +- test/Transforms/ConstProp/float-to-ptr-cast.ll | 2 +- test/Transforms/ConstProp/logicaltest.ll | 2 +- test/Transforms/ConstProp/nottest.ll | 2 +- test/Transforms/ConstProp/overflow-ops.ll | 172 + test/Transforms/ConstProp/phi.ll | 2 +- test/Transforms/ConstProp/remtest.ll | 2 +- .../ConstantMerge/2002-09-23-CPR-Update.ll | 2 +- .../2003-10-28-MergeExternalConstants.ll | 4 +- .../2006-03-07-DontMergeDiffSections.ll | 4 +- .../DeadArgElim/2006-06-27-struct-ret.ll | 2 +- .../DeadArgElim/2007-02-07-FuncRename.ll | 4 +- .../DeadArgElim/2007-10-18-VarargsReturn.ll | 2 +- .../DeadArgElim/2007-12-20-ParamAttrs.ll | 2 +- .../DeadArgElim/2008-01-16-VarargsParamAttrs.ll | 2 +- .../DeadArgElim/2008-06-23-DeadAfterLive.ll | 2 +- .../DeadArgElim/2009-03-17-MRE-Invoke.ll | 2 +- test/Transforms/DeadArgElim/basictest.ll | 2 +- test/Transforms/DeadArgElim/canon.ll | 2 +- test/Transforms/DeadArgElim/dead_vaargs.ll | 4 +- test/Transforms/DeadArgElim/deadretval.ll | 2 +- test/Transforms/DeadArgElim/deadretval2.ll | 2 +- test/Transforms/DeadArgElim/keepalive.ll | 2 +- test/Transforms/DeadArgElim/multdeadretval.ll | 2 +- .../2004-11-28-LiveStoreDeleted.ll | 2 +- .../2004-12-28-PartialStore.ll | 2 +- .../DeadStoreElimination/2005-11-30-vaarg.ll | 2 +- .../DeadStoreElimination/2006-06-27-AST-Remove.ll | 2 +- .../DeadStoreElimination/2008-07-28-load-store.ll | 2 +- .../2008-11-28-MemDepUpdate.ll | 2 +- .../2008-11-29-OffEndOfBlock.ll | 2 +- .../DeadStoreElimination/PartialStore.ll | 2 +- test/Transforms/DeadStoreElimination/alloca.ll | 2 +- test/Transforms/DeadStoreElimination/byval.ll | 2 +- .../DeadStoreElimination/context-sensitive.ll | 2 +- test/Transforms/DeadStoreElimination/crash.ll | 26 + test/Transforms/DeadStoreElimination/free.ll | 2 +- test/Transforms/DeadStoreElimination/memcpy.ll | 2 +- test/Transforms/DeadStoreElimination/simple.ll | 2 +- .../DeadStoreElimination/volatile-load.ll | 2 +- test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll | 2 +- .../FunctionAttrs/2008-09-03-ReadNone.ll | 2 +- .../FunctionAttrs/2008-09-03-ReadOnly.ll | 2 +- .../FunctionAttrs/2008-09-13-VolatileRead.ll | 2 +- .../FunctionAttrs/2008-10-04-LocalMemory.ll | 2 +- .../FunctionAttrs/2008-12-29-Constant.ll | 2 +- .../FunctionAttrs/2008-12-31-NoCapture.ll | 4 +- .../FunctionAttrs/2009-01-02-LocalStores.ll | 4 +- test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll | 2 +- test/Transforms/GVN/2007-07-25-DominatedLoop.ll | 2 +- test/Transforms/GVN/2007-07-25-InfiniteLoop.ll | 2 +- test/Transforms/GVN/2007-07-25-Loop.ll | 2 +- test/Transforms/GVN/2007-07-25-NestedLoop.ll | 2 +- .../Transforms/GVN/2007-07-25-SinglePredecessor.ll | 2 +- .../Transforms/GVN/2007-07-26-InterlockingLoops.ll | 4 +- test/Transforms/GVN/2007-07-26-NonRedundant.ll | 2 +- test/Transforms/GVN/2007-07-26-PhiErasure.ll | 4 +- test/Transforms/GVN/2007-07-30-PredIDom.ll | 2 +- test/Transforms/GVN/2007-07-31-NoDomInherit.ll | 2 +- test/Transforms/GVN/2007-07-31-RedundantPhi.ll | 2 +- test/Transforms/GVN/2008-02-12-UndefLoad.ll | 2 +- test/Transforms/GVN/2008-02-13-NewPHI.ll | 2 +- .../GVN/2008-02-24-NonDominatedMemcpy.ll | 2 +- test/Transforms/GVN/2008-02-26-MemCpySize.ll | 2 +- test/Transforms/GVN/2008-07-02-Unreachable.ll | 2 +- test/Transforms/GVN/2008-12-09-SelfRemove.ll | 2 +- test/Transforms/GVN/2008-12-12-RLE-Crash.ll | 2 +- test/Transforms/GVN/2008-12-14-rle-reanalyze.ll | 2 +- test/Transforms/GVN/2008-12-15-CacheVisited.ll | 2 +- test/Transforms/GVN/2009-01-21-SortInvalidation.ll | 2 +- test/Transforms/GVN/2009-01-22-SortInvalidation.ll | 2 +- test/Transforms/GVN/2009-02-17-LoadPRECrash.ll | 2 +- test/Transforms/GVN/2009-03-05-dbg.ll | 2 +- test/Transforms/GVN/2009-03-10-PREOnVoid.ll | 2 +- test/Transforms/GVN/2009-06-17-InvalidPRE.ll | 2 +- test/Transforms/GVN/2009-07-13-MemDepSortFail.ll | 67 + .../GVN/2009-11-12-MemDepMallocBitCast.ll | 15 + test/Transforms/GVN/basic.ll | 2 +- test/Transforms/GVN/bitcast-of-call.ll | 2 +- test/Transforms/GVN/calls-nonlocal.ll | 2 +- test/Transforms/GVN/calls-readonly.ll | 2 +- test/Transforms/GVN/condprop.ll | 2 +- test/Transforms/GVN/load-constant-mem.ll | 2 +- test/Transforms/GVN/local-pre.ll | 2 +- test/Transforms/GVN/lpre-basic.ll | 2 +- test/Transforms/GVN/lpre-call-wrap-2.ll | 7 +- test/Transforms/GVN/lpre-call-wrap.ll | 7 +- test/Transforms/GVN/mixed.ll | 4 +- test/Transforms/GVN/pre-basic-add.ll | 2 +- test/Transforms/GVN/pre-single-pred.ll | 2 +- test/Transforms/GVN/rle-dominated.ll | 2 +- test/Transforms/GVN/rle-must-alias.ll | 2 +- test/Transforms/GVN/rle-no-phi-translate.ll | 2 +- test/Transforms/GVN/rle-nonlocal.ll | 2 +- test/Transforms/GVN/rle-phi-translate.ll | 4 +- test/Transforms/GVN/rle-semidominated.ll | 2 +- test/Transforms/GVN/rle.ll | 282 + test/Transforms/GlobalDCE/2002-07-17-CastRef.ll | 2 +- .../Transforms/GlobalDCE/2002-07-17-ConstantRef.ll | 2 +- .../Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll | 2 +- .../GlobalDCE/2002-08-17-WorkListTest.ll | 2 +- test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll | 2 +- .../GlobalDCE/2003-07-01-SelfReference.ll | 2 +- .../GlobalDCE/2003-10-09-PreserveWeakGlobals.ll | 2 +- .../Transforms/GlobalDCE/2009-01-05-DeadAliases.ll | 4 +- .../GlobalDCE/2009-02-17-AliasUsesAliasee.ll | 2 +- test/Transforms/GlobalDCE/2009-09-03-MDNode.ll | 264 + test/Transforms/GlobalDCE/basicvariabletest.ll | 2 +- test/Transforms/GlobalDCE/externally_available.ll | 2 +- .../GlobalOpt/2004-10-10-CastStoreOnce.ll | 2 +- .../GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll | 2 +- test/Transforms/GlobalOpt/2005-09-27-Crash.ll | 2 +- .../GlobalOpt/2006-07-07-InlineAsmCrash.ll | 2 +- .../GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll | 2 +- test/Transforms/GlobalOpt/2007-04-05-Crash.ll | 2 +- test/Transforms/GlobalOpt/2007-05-13-Crash.ll | 2 +- .../GlobalOpt/2007-06-04-PackedStruct.ll | 2 +- .../GlobalOpt/2007-11-09-GEP-GEP-Crash.ll | 2 +- test/Transforms/GlobalOpt/2008-01-03-Crash.ll | 2 +- .../GlobalOpt/2008-01-13-OutOfRangeSROA.ll | 2 +- .../GlobalOpt/2008-01-29-VolatileGlobal.ll | 2 +- test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll | 2 +- .../GlobalOpt/2008-04-26-SROA-Global-Align.ll | 6 +- test/Transforms/GlobalOpt/2008-07-17-addrspace.ll | 2 +- .../GlobalOpt/2008-12-16-HeapSRACrash-2.ll | 2 +- .../GlobalOpt/2008-12-16-HeapSRACrash.ll | 2 +- test/Transforms/GlobalOpt/2009-01-13-phi-user.ll | 2 +- .../GlobalOpt/2009-02-15-BitcastAlias.ll | 2 +- .../GlobalOpt/2009-02-15-ResolveAlias.ll | 2 +- test/Transforms/GlobalOpt/2009-03-03-dbg.ll | 2 +- test/Transforms/GlobalOpt/2009-03-05-dbg.ll | 2 +- test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll | 2 +- .../GlobalOpt/2009-03-07-PromotePtrToBool.ll | 2 +- .../GlobalOpt/2009-06-01-RecursivePHI.ll | 2 +- test/Transforms/GlobalOpt/alias-resolve.ll | 2 +- test/Transforms/GlobalOpt/basictest.ll | 2 +- test/Transforms/GlobalOpt/constantexpr-dangle.ll | 2 +- test/Transforms/GlobalOpt/ctor-list-opt-dbg.ll | 6 +- .../Transforms/GlobalOpt/ctor-list-opt-inbounds.ll | 23 + test/Transforms/GlobalOpt/ctor-list-opt.ll | 6 +- test/Transforms/GlobalOpt/deadglobal-2.ll | 2 +- test/Transforms/GlobalOpt/deadglobal.ll | 2 +- test/Transforms/GlobalOpt/globalsra-partial.ll | 2 +- .../GlobalOpt/globalsra-unknown-index.ll | 41 + test/Transforms/GlobalOpt/globalsra.ll | 2 +- test/Transforms/GlobalOpt/heap-sra-1.ll | 4 +- test/Transforms/GlobalOpt/heap-sra-2.ll | 4 +- test/Transforms/GlobalOpt/heap-sra-phi.ll | 4 +- test/Transforms/GlobalOpt/integer-bool.ll | 2 +- test/Transforms/GlobalOpt/iterate.ll | 2 +- test/Transforms/GlobalOpt/load-store-global.ll | 2 +- test/Transforms/GlobalOpt/malloc-promote-1.ll | 2 +- test/Transforms/GlobalOpt/malloc-promote-2.ll | 2 +- test/Transforms/GlobalOpt/malloc-promote-3.ll | 2 +- test/Transforms/GlobalOpt/memcpy.ll | 2 +- test/Transforms/GlobalOpt/memset.ll | 2 +- test/Transforms/GlobalOpt/phi-select.ll | 2 +- test/Transforms/GlobalOpt/storepointer-compare.ll | 2 +- test/Transforms/GlobalOpt/storepointer.ll | 2 +- test/Transforms/GlobalOpt/trivialstore.ll | 2 +- test/Transforms/GlobalOpt/undef-init.ll | 2 +- .../IPConstantProp/2008-06-09-WeakProp.ll | 2 +- .../IPConstantProp/2009-09-24-byval-ptr.ll | 40 + test/Transforms/IPConstantProp/deadarg.ll | 2 +- test/Transforms/IPConstantProp/recursion.ll | 2 +- test/Transforms/IPConstantProp/return-argument.ll | 2 +- test/Transforms/IPConstantProp/return-constant.ll | 2 +- test/Transforms/IPConstantProp/return-constants.ll | 2 +- test/Transforms/IndMemRem/2009-01-24-Noalias.ll | 2 +- .../IndVarSimplify/2002-09-09-PointerIndVar.ll | 2 +- .../IndVarSimplify/2003-04-16-ExprAnalysis.ll | 2 +- .../IndVarSimplify/2003-09-12-MultiplePred.ll | 2 +- .../IndVarSimplify/2003-09-23-NotAtTop.ll | 4 +- .../IndVarSimplify/2003-12-10-IndVarDeadCode.ll | 2 +- .../IndVarSimplify/2003-12-10-RemoveInstrCrash.ll | 2 +- test/Transforms/IndVarSimplify/2003-12-15-Crash.ll | 2 +- .../IndVarSimplify/2003-12-21-IndVarSize.ll | 2 +- .../IndVarSimplify/2004-03-10-PHIInsertionBug.ll | 2 +- .../IndVarSimplify/2004-04-05-InvokeCastCrash.ll | 2 +- .../2004-04-07-ScalarEvolutionCrash.ll | 2 +- .../IndVarSimplify/2005-02-11-InvokeCrash.ll | 2 +- .../IndVarSimplify/2005-02-17-TruncateExprCrash.ll | 2 +- .../IndVarSimplify/2005-02-26-ExitValueCompute.ll | 2 +- .../IndVarSimplify/2005-06-15-InstMoveCrash.ll | 2 +- test/Transforms/IndVarSimplify/2005-11-18-Crash.ll | 2 +- .../IndVarSimplify/2006-03-31-NegativeStride.ll | 2 +- .../2006-06-16-Indvar-LCSSA-Crash.ll | 2 +- .../IndVarSimplify/2006-09-20-LFTR-Crash.ll | 2 +- .../IndVarSimplify/2006-12-10-BitCast.ll | 2 +- .../IndVarSimplify/2007-01-06-TripCount.ll | 2 +- .../IndVarSimplify/2007-01-08-X86-64-Pointer.ll | 4 +- .../IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll | 2 +- .../IndVarSimplify/2007-11-23-BitcastCrash.ll | 2 +- .../IndVarSimplify/2008-06-15-SCEVExpanderBug.ll | 2 +- .../Transforms/IndVarSimplify/2008-09-02-IVType.ll | 2 +- .../IndVarSimplify/2008-10-03-CouldNotCompute.ll | 2 +- .../IndVarSimplify/2008-11-03-Floating.ll | 2 +- .../IndVarSimplify/2008-11-17-Floating.ll | 6 +- .../IndVarSimplify/2008-11-25-APFloatAssert.ll | 2 +- .../IndVarSimplify/2009-04-14-shorten_iv_vars.ll | 2 +- .../IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll | 2 +- .../IndVarSimplify/2009-04-22-IndvarCrash.ll | 2 +- .../IndVarSimplify/2009-04-27-Floating.ll | 2 +- .../IndVarSimplify/2009-05-24-useafterfree.ll | 2 +- test/Transforms/IndVarSimplify/ada-loops.ll | 2 +- test/Transforms/IndVarSimplify/addrec-gep.ll | 2 +- test/Transforms/IndVarSimplify/ashr-tripcount.ll | 2 +- test/Transforms/IndVarSimplify/avoid-i0.ll | 2 +- test/Transforms/IndVarSimplify/casted-argument.ll | 2 +- test/Transforms/IndVarSimplify/complex-scev.ll | 2 +- test/Transforms/IndVarSimplify/divide-pointer.ll | 2 +- test/Transforms/IndVarSimplify/exit_value_tests.ll | 2 +- .../Transforms/IndVarSimplify/gep-with-mul-base.ll | 2 +- .../IndVarSimplify/interesting-invoke-use.ll | 2 +- .../IndVarSimplify/iterationCount_zext_or_trunc.ll | 2 +- test/Transforms/IndVarSimplify/iv-sext.ll | 143 + test/Transforms/IndVarSimplify/iv-zext.ll | 2 +- test/Transforms/IndVarSimplify/lftr-other-uses.ll | 2 +- test/Transforms/IndVarSimplify/lftr-promote.ll | 38 + test/Transforms/IndVarSimplify/lftr_simple.ll | 2 +- .../IndVarSimplify/loop-invariant-step.ll | 2 +- test/Transforms/IndVarSimplify/loop_evaluate10.ll | 47 + test/Transforms/IndVarSimplify/loop_evaluate11.ll | 36 + test/Transforms/IndVarSimplify/loop_evaluate7.ll | 2 +- test/Transforms/IndVarSimplify/loop_evaluate8.ll | 2 +- test/Transforms/IndVarSimplify/loop_evaluate9.ll | 6 +- test/Transforms/IndVarSimplify/loop_evaluate_1.ll | 2 +- test/Transforms/IndVarSimplify/loop_evaluate_2.ll | 2 +- test/Transforms/IndVarSimplify/loop_evaluate_3.ll | 2 +- test/Transforms/IndVarSimplify/loop_evaluate_4.ll | 2 +- test/Transforms/IndVarSimplify/loop_evaluate_5.ll | 2 +- test/Transforms/IndVarSimplify/loop_evaluate_6.ll | 2 +- test/Transforms/IndVarSimplify/masked-iv.ll | 2 +- test/Transforms/IndVarSimplify/max-pointer.ll | 39 + .../phi-uses-value-multiple-times.ll | 2 +- test/Transforms/IndVarSimplify/pointer-indvars.ll | 2 +- test/Transforms/IndVarSimplify/pointer.ll | 2 +- .../Transforms/IndVarSimplify/polynomial-expand.ll | 38 + .../IndVarSimplify/preserve-gep-loop-variant.ll | 41 + .../IndVarSimplify/preserve-gep-nested.ll | 75 + .../IndVarSimplify/preserve-gep-remainder.ll | 2 +- test/Transforms/IndVarSimplify/preserve-gep.ll | 2 +- .../IndVarSimplify/preserve-signed-wrap.ll | 2 +- .../promote-iv-to-eliminate-casts.ll | 2 +- test/Transforms/IndVarSimplify/shrunk-constant.ll | 2 +- .../Transforms/IndVarSimplify/signed-trip-count.ll | 2 +- .../IndVarSimplify/single-element-range.ll | 27 + test/Transforms/IndVarSimplify/sink-alloca.ll | 31 + test/Transforms/IndVarSimplify/sink-trapping.ll | 19 + test/Transforms/IndVarSimplify/subtract.ll | 2 +- .../Transforms/IndVarSimplify/tripcount_compute.ll | 2 +- .../IndVarSimplify/tripcount_infinite.ll | 2 +- .../IndVarSimplify/variable-stride-ivs-0.ll | 2 +- .../IndVarSimplify/variable-stride-ivs-1.ll | 2 +- test/Transforms/Inline/2003-09-14-InlineValue.ll | 2 +- .../Inline/2003-09-22-PHINodeInlineFail.ll | 2 +- .../Inline/2003-09-22-PHINodesInExceptionDest.ll | 2 +- .../2003-09-22-PHINodesInNormalInvokeDest.ll | 2 +- .../Inline/2003-10-13-AllocaDominanceProblem.ll | 2 +- .../2003-10-26-InlineInvokeExceptionDestPhi.ll | 2 +- .../Inline/2004-04-15-InlineDeletesCall.ll | 2 +- .../Transforms/Inline/2004-04-20-InlineLinkOnce.ll | 2 +- .../2004-10-17-InlineFunctionWithoutReturn.ll | 2 +- .../Inline/2006-01-14-CallGraphUpdate.ll | 2 +- .../Inline/2006-07-12-InlinePruneCGUpdate.ll | 2 +- .../Inline/2006-11-09-InlineCGUpdate-2.ll | 2 +- .../Transforms/Inline/2006-11-09-InlineCGUpdate.ll | 2 +- test/Transforms/Inline/2007-04-15-InlineEH.ll | 2 +- test/Transforms/Inline/2007-06-06-NoInline.ll | 2 +- test/Transforms/Inline/2007-06-25-WeakInline.ll | 2 +- .../Transforms/Inline/2007-12-19-InlineNoUnwind.ll | 4 +- test/Transforms/Inline/2008-03-04-StructRet.ll | 2 +- test/Transforms/Inline/2008-03-07-Inline-2.ll | 2 +- test/Transforms/Inline/2008-03-07-Inline.ll | 2 +- test/Transforms/Inline/2008-09-02-AlwaysInline.ll | 2 +- test/Transforms/Inline/2008-09-02-NoInline.ll | 2 +- test/Transforms/Inline/2008-10-30-AlwaysInline.ll | 4 +- test/Transforms/Inline/2008-11-04-AlwaysInline.ll | 2 +- .../Inline/2009-01-08-NoInlineDynamicAlloca.ll | 2 +- .../Inline/2009-01-12-RecursiveInline.ll | 2 +- .../Inline/2009-01-13-RecursiveInlineCrash.ll | 2 +- .../Inline/2009-05-07-CallUsingSelfCrash.ll | 2 +- test/Transforms/Inline/PR4909.ll | 15 + test/Transforms/Inline/alloca-in-scc.ll | 31 + test/Transforms/Inline/alloca_test.ll | 4 +- test/Transforms/Inline/always_inline_dyn_alloca.ll | 2 +- test/Transforms/Inline/array_merge.ll | 26 + test/Transforms/Inline/basictest.ll | 2 +- test/Transforms/Inline/byval.ll | 2 +- test/Transforms/Inline/byval2.ll | 2 +- test/Transforms/Inline/callgraph-update.ll | 33 + test/Transforms/Inline/casts.ll | 2 +- test/Transforms/Inline/cfg_preserve_test.ll | 2 +- test/Transforms/Inline/crash.ll | 57 + test/Transforms/Inline/dynamic_alloca_test.ll | 4 +- test/Transforms/Inline/externally_available.ll | 2 +- test/Transforms/Inline/indirect_resolve.ll | 16 + test/Transforms/Inline/inline-invoke-tail.ll | 2 +- test/Transforms/Inline/inline-tail.ll | 2 +- test/Transforms/Inline/inline_cleanup.ll | 4 +- test/Transforms/Inline/inline_constprop.ll | 4 +- test/Transforms/Inline/inline_dce.ll | 2 +- test/Transforms/Inline/inline_prune.ll | 4 +- test/Transforms/Inline/invoke_test-1.ll | 2 +- test/Transforms/Inline/invoke_test-2.ll | 2 +- test/Transforms/Inline/invoke_test-3.ll | 2 +- test/Transforms/Inline/nested-inline.ll | 111 + .../InstCombine/2002-03-11-InstCombineHang.ll | 2 +- .../InstCombine/2002-05-14-SubFailure.ll | 2 +- test/Transforms/InstCombine/2002-08-02-CastTest.ll | 2 +- .../InstCombine/2002-12-05-MissedConstProp.ll | 2 +- .../InstCombine/2003-05-26-CastMiscompile.ll | 2 +- .../InstCombine/2003-05-27-ConstExprCrash.ll | 2 +- .../InstCombine/2003-06-05-BranchInvertInfLoop.ll | 2 +- .../InstCombine/2003-07-21-ExternalConstant.ll | 2 +- .../InstCombine/2003-08-12-AllocaNonNull.ll | 2 +- .../InstCombine/2003-09-09-VolatileLoadElim.ll | 2 +- .../InstCombine/2003-10-29-CallSiteResolve.ll | 2 +- .../InstCombine/2003-11-03-VarargsCallBug.ll | 2 +- .../InstCombine/2003-11-13-ConstExprCastCall.ll | 5 +- .../InstCombine/2004-01-13-InstCombineInvokePHI.ll | 2 +- .../InstCombine/2004-02-23-ShiftShiftOverflow.ll | 2 +- .../InstCombine/2004-03-13-InstCombineInfLoop.ll | 2 +- .../2004-04-04-InstCombineReplaceAllUsesWith.ll | 2 +- .../InstCombine/2004-05-07-UnsizedCastLoad.ll | 2 +- .../InstCombine/2004-07-27-ConstantExprMul.ll | 2 +- .../InstCombine/2004-08-09-RemInfLoop.ll | 2 +- .../Transforms/InstCombine/2004-08-10-BoolSetCC.ll | 2 +- .../InstCombine/2004-09-20-BadLoadCombine.ll | 2 +- .../InstCombine/2004-09-20-BadLoadCombine2.ll | 2 +- .../InstCombine/2004-09-28-BadShiftAndSetCC.ll | 2 +- .../InstCombine/2004-11-22-Missed-and-fold.ll | 2 +- .../2004-11-27-SetCCForCastLargerAndConstant.ll | 32 +- .../InstCombine/2004-12-08-RemInfiniteLoop.ll | 2 +- .../InstCombine/2005-03-04-ShiftOverflow.ll | 2 +- .../InstCombine/2005-04-07-UDivSelectCrash.ll | 2 +- .../InstCombine/2005-06-15-DivSelectCrash.ll | 2 +- .../InstCombine/2005-06-15-ShiftSetCCCrash.ll | 2 +- .../InstCombine/2005-06-16-RangeCrash.ll | 2 +- .../2005-06-16-SetCCOrSetCCMiscompile.ll | 2 +- .../InstCombine/2005-07-07-DeadPHILoop.ll | 2 +- .../InstCombine/2006-02-13-DemandedMiscompile.ll | 2 +- test/Transforms/InstCombine/2006-02-28-Crash.ll | 2 +- .../InstCombine/2006-03-30-ExtractElement.ll | 2 +- .../InstCombine/2006-04-28-ShiftShiftLongLong.ll | 6 +- .../InstCombine/2006-05-04-DemandedBitCrash.ll | 2 +- .../InstCombine/2006-09-15-CastToBool.ll | 2 +- .../2006-10-19-SignedToUnsignedCastAndConst-2.ll | 2 +- .../2006-10-19-SignedToUnsignedCastAndConst.ll | 2 +- test/Transforms/InstCombine/2006-10-20-mask.ll | 2 +- .../InstCombine/2006-10-26-VectorReassoc.ll | 2 +- .../Transforms/InstCombine/2006-11-03-Memmove64.ll | 2 +- .../InstCombine/2006-11-10-ashr-miscompile.ll | 2 +- .../InstCombine/2006-12-01-BadFPVectorXform.ll | 4 +- .../InstCombine/2006-12-05-fp-to-int-ext.ll | 2 +- .../InstCombine/2006-12-08-ICmp-Combining.ll | 2 +- .../InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll | 2 +- .../InstCombine/2006-12-08-Select-ICmp.ll | 2 +- .../InstCombine/2006-12-15-Range-Test.ll | 4 +- .../InstCombine/2006-12-23-Select-Cmp-Cmp.ll | 2 +- .../InstCombine/2007-01-13-ExtCompareMiscompile.ll | 2 +- test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll | 2 +- .../InstCombine/2007-01-18-VectorInfLoop.ll | 2 +- test/Transforms/InstCombine/2007-01-27-AndICmp.ll | 2 +- .../InstCombine/2007-02-01-LoadSinkAlloca.ll | 4 +- .../InstCombine/2007-02-07-PointerCast.ll | 2 +- .../InstCombine/2007-02-23-PhiFoldInfLoop.ll | 2 +- .../InstCombine/2007-03-13-CompareMerge.ll | 2 +- .../InstCombine/2007-03-19-BadTruncChangePR1261.ll | 2 +- .../InstCombine/2007-03-21-SignedRangeTest.ll | 2 +- .../InstCombine/2007-03-25-BadShiftMask.ll | 2 +- .../InstCombine/2007-03-25-DoubleShift.ll | 2 +- .../InstCombine/2007-03-26-BadShiftMask.ll | 2 +- test/Transforms/InstCombine/2007-03-27-PR1280.ll | 2 +- .../2007-04-04-BadFoldBitcastIntoMalloc.ll | 2 +- .../InstCombine/2007-04-08-SingleEltVectorCrash.ll | 2 +- test/Transforms/InstCombine/2007-05-04-Crash.ll | 2 +- test/Transforms/InstCombine/2007-05-10-icmp-or.ll | 2 +- test/Transforms/InstCombine/2007-05-14-Crash.ll | 2 +- .../InstCombine/2007-05-18-CastFoldBug.ll | 2 +- .../InstCombine/2007-06-06-AshrSignBit.ll | 2 +- .../InstCombine/2007-06-21-DivCompareMiscomp.ll | 2 +- .../InstCombine/2007-08-02-InfiniteLoop.ll | 2 +- .../InstCombine/2007-09-10-AliasConstFold.ll | 2 +- .../InstCombine/2007-09-11-Trampoline.ll | 2 +- .../InstCombine/2007-09-17-AliasConstFold2.ll | 2 +- .../InstCombine/2007-10-10-EliminateMemCpy.ll | 4 +- test/Transforms/InstCombine/2007-10-12-Crash.ll | 2 +- .../Transforms/InstCombine/2007-10-28-stacksave.ll | 2 +- .../InstCombine/2007-10-31-RangeCrash.ll | 2 +- .../InstCombine/2007-10-31-StringCrash.ll | 2 +- .../InstCombine/2007-11-07-OpaqueAlignCrash.ll | 2 +- .../InstCombine/2007-11-15-CompareMiscomp.ll | 2 +- .../Transforms/InstCombine/2007-11-22-IcmpCrash.ll | 2 +- .../InstCombine/2007-11-25-CompatibleAttributes.ll | 2 +- .../InstCombine/2007-12-10-ConstFoldCompare.ll | 2 +- test/Transforms/InstCombine/2007-12-12-GEPScale.ll | 2 +- .../InstCombine/2007-12-16-AsmNoUnwind.ll | 2 +- .../InstCombine/2007-12-18-AddSelCmpSub.ll | 2 +- test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll | 2 +- .../InstCombine/2008-01-06-BitCastAttributes.ll | 2 +- .../Transforms/InstCombine/2008-01-06-CastCrash.ll | 2 +- test/Transforms/InstCombine/2008-01-06-VoidCast.ll | 2 +- .../Transforms/InstCombine/2008-01-13-AndCmpCmp.ll | 2 +- .../InstCombine/2008-01-13-NoBitCastAttributes.ll | 2 +- .../InstCombine/2008-01-14-DoubleNest.ll | 2 +- .../InstCombine/2008-01-14-VarArgTrampoline.ll | 2 +- .../2008-01-21-MismatchedCastAndCompare.ll | 7 +- test/Transforms/InstCombine/2008-01-21-MulTrunc.ll | 6 +- .../InstCombine/2008-01-27-FloatSelect.ll | 2 +- test/Transforms/InstCombine/2008-01-29-AddICmp.ll | 2 +- test/Transforms/InstCombine/2008-02-13-MulURem.ll | 2 +- .../InstCombine/2008-02-16-SDivOverflow.ll | 2 +- .../InstCombine/2008-02-16-SDivOverflow2.ll | 2 +- test/Transforms/InstCombine/2008-02-23-MulSub.ll | 2 +- .../InstCombine/2008-02-28-OrFCmpCrash.ll | 2 +- test/Transforms/InstCombine/2008-03-13-IntToPtr.ll | 2 +- .../InstCombine/2008-04-22-ByValBitcast.ll | 2 +- .../InstCombine/2008-04-28-VolatileStore.ll | 2 +- .../2008-04-29-VolatileLoadDontMerge.ll | 2 +- .../InstCombine/2008-04-29-VolatileLoadMerge.ll | 2 +- .../InstCombine/2008-05-08-LiveStoreDelete.ll | 2 +- .../InstCombine/2008-05-08-StrLenSink.ll | 6 +- .../InstCombine/2008-05-09-SinkOfInvoke.ll | 2 +- test/Transforms/InstCombine/2008-05-17-InfLoop.ll | 2 +- .../InstCombine/2008-05-18-FoldIntToPtr.ll | 2 +- .../InstCombine/2008-05-22-IDivVector.ll | 2 +- .../InstCombine/2008-05-22-NegValVector.ll | 2 +- .../InstCombine/2008-05-23-CompareFold.ll | 2 +- test/Transforms/InstCombine/2008-05-31-AddBool.ll | 2 +- test/Transforms/InstCombine/2008-05-31-Bools.ll | 2 +- .../InstCombine/2008-06-05-ashr-crash.ll | 2 +- test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll | 2 +- .../InstCombine/2008-06-13-InfiniteLoopStore.ll | 2 +- .../InstCombine/2008-06-13-ReadOnlyCallStore.ll | 2 +- .../InstCombine/2008-06-19-UncondLoad.ll | 2 +- .../InstCombine/2008-06-21-CompareMiscomp.ll | 2 +- .../InstCombine/2008-06-24-StackRestore.ll | 2 +- test/Transforms/InstCombine/2008-07-08-AndICmp.ll | 2 +- .../InstCombine/2008-07-08-ShiftOneAndOne.ll | 2 +- test/Transforms/InstCombine/2008-07-08-SubAnd.ll | 2 +- .../InstCombine/2008-07-08-VolatileLoadMerge.ll | 2 +- .../InstCombine/2008-07-09-SubAndError.ll | 2 +- .../InstCombine/2008-07-10-CastSextBool.ll | 4 +- .../Transforms/InstCombine/2008-07-10-ICmpBinOp.ll | 4 +- test/Transforms/InstCombine/2008-07-11-RemAnd.ll | 2 +- test/Transforms/InstCombine/2008-07-13-DivZero.ll | 4 +- test/Transforms/InstCombine/2008-07-16-fsub.ll | 2 +- .../InstCombine/2008-07-16-sse2_storel_dq.ll | 2 +- test/Transforms/InstCombine/2008-08-05-And.ll | 2 +- .../InstCombine/2008-08-17-ICmpXorSignbit.ll | 2 +- .../InstCombine/2008-09-02-VectorCrash.ll | 2 +- .../Transforms/InstCombine/2008-09-29-FoldingOr.ll | 2 +- .../InstCombine/2008-10-11-DivCompareFold.ll | 2 +- .../InstCombine/2008-10-23-ConstFoldWithoutMask.ll | 2 +- .../InstCombine/2008-11-01-SRemDemandedBits.ll | 2 +- test/Transforms/InstCombine/2008-11-08-FCmp.ll | 13 +- .../Transforms/InstCombine/2008-11-20-DivMulRem.ll | 2 +- .../InstCombine/2008-11-27-IDivVector.ll | 2 +- .../InstCombine/2008-11-27-MultiplyIntVec.ll | 2 +- .../InstCombine/2008-11-27-UDivNegative.ll | 2 +- .../InstCombine/2008-12-17-SRemNegConstVec.ll | 2 +- .../InstCombine/2009-01-05-i128-crash.ll | 2 +- .../InstCombine/2009-01-08-AlignAlloca.ll | 2 +- .../InstCombine/2009-01-16-PointerAddrSpace.ll | 2 +- .../2009-01-19-fmod-constant-float-specials.ll | 8 +- .../InstCombine/2009-01-19-fmod-constant-float.ll | 4 +- .../InstCombine/2009-01-24-EmptyStruct.ll | 2 +- .../InstCombine/2009-01-31-InfIterate.ll | 2 +- test/Transforms/InstCombine/2009-01-31-Pressure.ll | 2 +- .../Transforms/InstCombine/2009-02-04-FPBitcast.ll | 2 +- .../InstCombine/2009-02-20-InstCombine-SROA.ll | 2 +- test/Transforms/InstCombine/2009-02-21-LoadCST.ll | 2 +- .../InstCombine/2009-02-25-CrashZeroSizeArray.ll | 2 +- .../InstCombine/2009-03-18-vector-ashr-crash.ll | 2 +- .../InstCombine/2009-03-20-AShrOverShift.ll | 2 +- test/Transforms/InstCombine/2009-03-24-InfLoop.ll | 2 +- .../InstCombine/2009-04-07-MulPromoteToI96.ll | 2 +- .../InstCombine/2009-05-23-FCmpToICmp.ll | 2 +- .../InstCombine/2009-06-11-StoreAddrSpace.ll | 2 +- .../InstCombine/2009-06-16-SRemDemandedBits.ll | 2 +- .../InstCombine/2009-07-02-MaskedIntVector.ll | 2 +- test/Transforms/InstCombine/CPP_min_max.ll | 2 +- test/Transforms/InstCombine/IntPtrCast.ll | 3 +- test/Transforms/InstCombine/JavaCompare.ll | 2 +- test/Transforms/InstCombine/add-shrink.ll | 4 +- test/Transforms/InstCombine/add-sitofp.ll | 2 +- test/Transforms/InstCombine/add.ll | 2 +- test/Transforms/InstCombine/add2.ll | 24 +- test/Transforms/InstCombine/add3.ll | 21 + test/Transforms/InstCombine/addnegneg.ll | 2 +- test/Transforms/InstCombine/adjust-for-sminmax.ll | 2 +- test/Transforms/InstCombine/align-2d-gep.ll | 2 +- test/Transforms/InstCombine/align-addr.ll | 2 +- test/Transforms/InstCombine/align-external.ll | 22 + test/Transforms/InstCombine/align-inc.ll | 4 +- test/Transforms/InstCombine/alloca.ll | 2 +- test/Transforms/InstCombine/and-compare.ll | 2 +- test/Transforms/InstCombine/and-fcmp.ll | 4 +- test/Transforms/InstCombine/and-not-or.ll | 4 +- test/Transforms/InstCombine/and-or-and.ll | 2 +- test/Transforms/InstCombine/and-or-not.ll | 6 +- test/Transforms/InstCombine/and-or.ll | 4 +- test/Transforms/InstCombine/and-xor-merge.ll | 4 +- test/Transforms/InstCombine/and.ll | 2 +- test/Transforms/InstCombine/and2.ll | 2 +- test/Transforms/InstCombine/apint-add1.ll | 2 +- test/Transforms/InstCombine/apint-add2.ll | 2 +- test/Transforms/InstCombine/apint-and-compare.ll | 2 +- test/Transforms/InstCombine/apint-and-or-and.ll | 2 +- test/Transforms/InstCombine/apint-and-xor-merge.ll | 4 +- test/Transforms/InstCombine/apint-and1.ll | 2 +- test/Transforms/InstCombine/apint-and2.ll | 2 +- .../InstCombine/apint-call-cast-target.ll | 2 +- test/Transforms/InstCombine/apint-cast-and-cast.ll | 2 +- .../InstCombine/apint-cast-cast-to-and.ll | 2 +- test/Transforms/InstCombine/apint-cast.ll | 10 +- test/Transforms/InstCombine/apint-div1.ll | 2 +- test/Transforms/InstCombine/apint-div2.ll | 2 +- .../InstCombine/apint-elim-logicalops.ll | 2 +- test/Transforms/InstCombine/apint-mul1.ll | 2 +- test/Transforms/InstCombine/apint-mul2.ll | 2 +- test/Transforms/InstCombine/apint-not.ll | 2 +- test/Transforms/InstCombine/apint-or1.ll | 2 +- test/Transforms/InstCombine/apint-or2.ll | 2 +- test/Transforms/InstCombine/apint-rem1.ll | 2 +- test/Transforms/InstCombine/apint-rem2.ll | 2 +- test/Transforms/InstCombine/apint-select.ll | 2 +- .../Transforms/InstCombine/apint-shift-simplify.ll | 2 +- test/Transforms/InstCombine/apint-shift.ll | 2 +- test/Transforms/InstCombine/apint-shl-trunc.ll | 2 +- test/Transforms/InstCombine/apint-sub.ll | 2 +- test/Transforms/InstCombine/apint-xor1.ll | 2 +- test/Transforms/InstCombine/apint-xor2.ll | 2 +- test/Transforms/InstCombine/apint-zext1.ll | 4 +- test/Transforms/InstCombine/apint-zext2.ll | 4 +- test/Transforms/InstCombine/ashr-nop.ll | 2 +- test/Transforms/InstCombine/badmalloc.ll | 19 + test/Transforms/InstCombine/binop-cast.ll | 4 +- test/Transforms/InstCombine/bit-tracking.ll | 2 +- .../InstCombine/bitcast-scalar-to-vector.ll | 2 +- test/Transforms/InstCombine/bitcast-vec-canon.ll | 22 + test/Transforms/InstCombine/bitcast-vector-fold.ll | 2 +- test/Transforms/InstCombine/bitcount.ll | 4 +- test/Transforms/InstCombine/bittest.ll | 2 +- test/Transforms/InstCombine/bswap-fold.ll | 4 +- test/Transforms/InstCombine/bswap.ll | 2 +- test/Transforms/InstCombine/call-cast-target.ll | 2 +- test/Transforms/InstCombine/call-intrinsics.ll | 2 +- test/Transforms/InstCombine/call.ll | 25 +- test/Transforms/InstCombine/call2.ll | 2 +- test/Transforms/InstCombine/canonicalize_branch.ll | 2 +- test/Transforms/InstCombine/cast-and-cast.ll | 2 +- test/Transforms/InstCombine/cast-cast-to-and.ll | 2 +- test/Transforms/InstCombine/cast-load-gep.ll | 2 +- test/Transforms/InstCombine/cast-malloc.ll | 2 +- test/Transforms/InstCombine/cast-mul-select.ll | 12 +- test/Transforms/InstCombine/cast-propagate.ll | 2 +- test/Transforms/InstCombine/cast-set.ll | 14 +- test/Transforms/InstCombine/cast-sext-zext.ll | 2 +- test/Transforms/InstCombine/cast.ll | 77 +- test/Transforms/InstCombine/cast2.ll | 9 +- test/Transforms/InstCombine/cast3.ll | 35 + test/Transforms/InstCombine/cast_ld_addr_space.ll | 2 +- test/Transforms/InstCombine/cast_ptr.ll | 12 +- test/Transforms/InstCombine/constant-fold-gep.ll | 54 + .../InstCombine/constant-fold-ptr-casts.ll | 2 +- test/Transforms/InstCombine/crash.ll | 46 + test/Transforms/InstCombine/dce-iterate.ll | 2 +- test/Transforms/InstCombine/deadcode.ll | 4 +- test/Transforms/InstCombine/div-cmp-overflow.ll | 2 +- test/Transforms/InstCombine/div.ll | 2 +- .../InstCombine/enforce-known-alignment.ll | 2 +- test/Transforms/InstCombine/exact-sdiv.ll | 52 + test/Transforms/InstCombine/extractvalue.ll | 2 +- test/Transforms/InstCombine/fold-bin-operand.ll | 13 + test/Transforms/InstCombine/fold-vector-zero.ll | 2 +- test/Transforms/InstCombine/fp-ret-bitcast.ll | 2 +- test/Transforms/InstCombine/fpcast.ll | 6 +- test/Transforms/InstCombine/fpextend.ll | 2 +- test/Transforms/InstCombine/fsub-fsub.ll | 2 +- test/Transforms/InstCombine/getelementptr.ll | 459 +- test/Transforms/InstCombine/hoist_instr.ll | 5 +- test/Transforms/InstCombine/icmp.ll | 16 +- test/Transforms/InstCombine/known_align.ll | 2 +- test/Transforms/InstCombine/load.ll | 2 +- test/Transforms/InstCombine/load2.ll | 2 +- test/Transforms/InstCombine/load3.ll | 2 +- test/Transforms/InstCombine/loadstore-alignment.ll | 2 +- test/Transforms/InstCombine/logical-select.ll | 2 +- test/Transforms/InstCombine/lshr-phi.ll | 2 +- test/Transforms/InstCombine/malloc-free-delete.ll | 4 +- test/Transforms/InstCombine/malloc.ll | 2 +- test/Transforms/InstCombine/malloc2.ll | 4 +- test/Transforms/InstCombine/malloc3.ll | 2 +- test/Transforms/InstCombine/memcpy-to-load.ll | 2 +- test/Transforms/InstCombine/memmove.ll | 2 +- test/Transforms/InstCombine/memset.ll | 2 +- test/Transforms/InstCombine/mul-masked-bits.ll | 2 +- test/Transforms/InstCombine/mul.ll | 33 +- test/Transforms/InstCombine/multi-use-or.ll | 2 +- test/Transforms/InstCombine/narrow.ll | 2 +- test/Transforms/InstCombine/no-negzero.ll | 33 + test/Transforms/InstCombine/not-fcmp.ll | 2 +- test/Transforms/InstCombine/not.ll | 2 +- test/Transforms/InstCombine/nothrow.ll | 2 +- test/Transforms/InstCombine/nsw.ll | 20 + test/Transforms/InstCombine/odr-linkage.ll | 2 +- test/Transforms/InstCombine/or-fcmp.ll | 4 +- test/Transforms/InstCombine/or-to-xor.ll | 4 +- test/Transforms/InstCombine/or.ll | 2 +- test/Transforms/InstCombine/or2.ll | 2 +- test/Transforms/InstCombine/phi-merge-gep.ll | 102 + test/Transforms/InstCombine/phi-merge.ll | 2 +- test/Transforms/InstCombine/phi.ll | 2 +- test/Transforms/InstCombine/pr2645-0.ll | 2 +- test/Transforms/InstCombine/pr2645-1.ll | 2 +- test/Transforms/InstCombine/pr2996.ll | 2 +- test/Transforms/InstCombine/preserve-sminmax.ll | 2 +- test/Transforms/InstCombine/ptr-int-cast.ll | 2 +- test/Transforms/InstCombine/rem.ll | 2 +- test/Transforms/InstCombine/sdiv-1.ll | 2 +- test/Transforms/InstCombine/sdiv-2.ll | 2 +- test/Transforms/InstCombine/sdiv-shift.ll | 9 + test/Transforms/InstCombine/select-2.ll | 2 +- test/Transforms/InstCombine/select-load-call.ll | 2 +- test/Transforms/InstCombine/select.ll | 63 +- test/Transforms/InstCombine/set.ll | 2 +- test/Transforms/InstCombine/setcc-cast-cast.ll | 2 +- .../InstCombine/setcc-strength-reduce.ll | 2 +- test/Transforms/InstCombine/sext-misc.ll | 2 +- test/Transforms/InstCombine/shift-simplify.ll | 2 +- test/Transforms/InstCombine/shift-sra.ll | 4 +- test/Transforms/InstCombine/shift-trunc-shift.ll | 2 +- test/Transforms/InstCombine/shift.ll | 139 +- test/Transforms/InstCombine/shufflemask-undef.ll | 2 +- test/Transforms/InstCombine/shufflevec-constant.ll | 2 +- test/Transforms/InstCombine/signed-comparison.ll | 2 +- test/Transforms/InstCombine/signext.ll | 2 +- .../InstCombine/simplify-demanded-bits-pointer.ll | 84 + test/Transforms/InstCombine/sink_instruction.ll | 43 +- test/Transforms/InstCombine/sitofp.ll | 2 +- test/Transforms/InstCombine/srem-simplify-bug.ll | 2 +- test/Transforms/InstCombine/srem.ll | 2 +- test/Transforms/InstCombine/srem1.ll | 2 +- test/Transforms/InstCombine/stack-overalign.ll | 2 +- test/Transforms/InstCombine/stacksaverestore.ll | 2 +- test/Transforms/InstCombine/store-merge.ll | 2 +- test/Transforms/InstCombine/store.ll | 2 +- test/Transforms/InstCombine/sub.ll | 2 +- test/Transforms/InstCombine/trunc-mask-ext.ll | 2 +- test/Transforms/InstCombine/udiv-simplify-bug-0.ll | 2 +- test/Transforms/InstCombine/udiv-simplify-bug-1.ll | 2 +- .../InstCombine/udiv_select_to_select_shift.ll | 2 +- .../Transforms/InstCombine/udivrem-change-width.ll | 19 + test/Transforms/InstCombine/urem-simplify-bug.ll | 2 +- test/Transforms/InstCombine/urem.ll | 2 +- test/Transforms/InstCombine/vec_demanded_elts-2.ll | 2 +- test/Transforms/InstCombine/vec_demanded_elts-3.ll | 2 +- test/Transforms/InstCombine/vec_demanded_elts.ll | 10 +- test/Transforms/InstCombine/vec_extract_elt.ll | 2 +- test/Transforms/InstCombine/vec_insertelt.ll | 2 +- test/Transforms/InstCombine/vec_narrow.ll | 2 +- test/Transforms/InstCombine/vec_shuffle.ll | 46 +- test/Transforms/InstCombine/vector-casts.ll | 107 + test/Transforms/InstCombine/vector-srem.ll | 2 +- test/Transforms/InstCombine/volatile_store.ll | 4 +- test/Transforms/InstCombine/xor-demorgans.ll | 2 +- test/Transforms/InstCombine/xor-undef.ll | 2 +- test/Transforms/InstCombine/xor.ll | 2 +- test/Transforms/InstCombine/xor2.ll | 28 +- test/Transforms/InstCombine/zero-point-zero-add.ll | 2 +- test/Transforms/InstCombine/zeroext-and-reduce.ll | 2 +- test/Transforms/InstCombine/zext-bool-add-sub.ll | 29 + test/Transforms/InstCombine/zext-fold.ll | 2 +- test/Transforms/InstCombine/zext-or-icmp.ll | 2 +- test/Transforms/InstCombine/zext.ll | 30 +- .../Internalize/2008-05-09-AllButMain.ll | 10 +- .../Internalize/2009-01-05-InternalizeAliases.ll | 2 +- .../JumpThreading/2008-11-27-EntryMunge.ll | 2 +- .../Transforms/JumpThreading/2008-11-28-InfLoop.ll | 2 +- .../JumpThreading/2009-01-08-DeadLoopRepl.ll | 2 +- .../JumpThreading/2009-01-19-InfSwitchLoop.ll | 2 +- test/Transforms/JumpThreading/and-and-cond.ll | 4 +- test/Transforms/JumpThreading/and-cond.ll | 4 +- test/Transforms/JumpThreading/basic.ll | 129 +- test/Transforms/JumpThreading/branch-no-const.ll | 2 +- test/Transforms/JumpThreading/compare.ll | 2 +- test/Transforms/JumpThreading/crash.ll | 56 + .../JumpThreading/no-irreducible-loops.ll | 2 +- test/Transforms/JumpThreading/thread-loads.ll | 2 +- .../LCSSA/2006-06-03-IncorrectIDFPhis.ll | 8 +- .../LCSSA/2006-06-12-MultipleExitsSameBlock.ll | 4 +- test/Transforms/LCSSA/2006-07-09-NoDominator.ll | 2 +- .../LCSSA/2006-10-31-UnreachableBlock-2.ll | 2 +- .../LCSSA/2006-10-31-UnreachableBlock.ll | 2 +- test/Transforms/LCSSA/2007-07-12-LICM-2.ll | 2 +- test/Transforms/LCSSA/2007-07-12-LICM-3.ll | 2 +- test/Transforms/LCSSA/2007-07-12-LICM.ll | 2 +- test/Transforms/LCSSA/basictest.ll | 4 +- test/Transforms/LCSSA/invoke-dest.ll | 2 +- .../LICM/2003-02-26-LoopExitNotDominated.ll | 2 +- .../LICM/2003-02-27-NestedLoopExitBlocks.ll | 2 +- .../LICM/2003-02-27-PreheaderExitNodeUpdate.ll | 2 +- .../Transforms/LICM/2003-02-27-PreheaderProblem.ll | 2 +- test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll | 2 +- .../LICM/2003-02-28-PromoteDifferentType.ll | 2 +- test/Transforms/LICM/2003-05-02-LoadHoist.ll | 2 +- test/Transforms/LICM/2003-12-11-SinkingToPHI.ll | 2 +- .../LICM/2004-09-14-AliasAnalysisInvalidate.ll | 2 +- test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll | 2 +- .../LICM/2005-03-24-LICM-Aggregate-Crash.ll | 2 +- .../LICM/2006-09-12-DeadUserOfSunkInstr.ll | 2 +- test/Transforms/LICM/2007-05-22-VolatileSink.ll | 2 +- test/Transforms/LICM/2007-07-30-AliasSet.ll | 2 +- test/Transforms/LICM/2007-09-17-PromoteValue.ll | 2 +- .../Transforms/LICM/2007-09-24-PromoteNullValue.ll | 2 +- .../Transforms/LICM/2007-10-01-PromoteSafeValue.ll | 2 +- test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll | 2 +- .../LICM/2008-07-22-LoadGlobalConstant.ll | 2 +- test/Transforms/LICM/2009-03-25-AliasSetTracker.ll | 2 +- test/Transforms/LICM/Preserve-LCSSA.ll | 2 +- test/Transforms/LICM/basictest.ll | 2 +- test/Transforms/LICM/hoisting.ll | 50 + test/Transforms/LICM/licm_preserve_dbginfo.ll | 55 + test/Transforms/LICM/no-preheader-test.ll | 2 +- test/Transforms/LICM/scalar_promote.ll | 52 +- test/Transforms/LICM/sinking.ll | 235 + .../LoopDeletion/2007-07-23-InfiniteLoop.ll | 2 +- test/Transforms/LoopDeletion/2008-05-06-Phi.ll | 2 +- test/Transforms/LoopDeletion/dcetest.ll | 2 +- .../LoopIndexSplit/2007-09-21-LoopBound.ll | 2 +- .../2007-09-24-UpdateIterationSpace.ll | 2 +- .../2007-09-25-UpdateIterationSpace-2.ll | 2 +- .../LoopIndexSplit/2008-01-28-IndDecrement.ll | 2 +- test/Transforms/LoopIndexSplit/2008-02-08-Crash.ll | 2 +- .../LoopIndexSplit/2008-02-13-ExitValueNum.ll | 2 +- .../LoopIndexSplit/2008-02-13-LoopLatch.ll | 2 +- .../LoopIndexSplit/2008-02-13-LoopLatchPHI.ll | 2 +- test/Transforms/LoopIndexSplit/2008-02-14-Crash.ll | 2 +- .../LoopIndexSplit/2008-03-24-ExitPhi.ll | 2 +- .../Transforms/LoopIndexSplit/2008-05-19-IndVar.ll | 2 +- .../LoopIndexSplit/2008-06-03-DomFrontier.ll | 2 +- .../LoopIndexSplit/2008-07-08-MisCompilation.ll | 2 +- test/Transforms/LoopIndexSplit/2008-09-17-IVUse.ll | 2 +- test/Transforms/LoopIndexSplit/2008-09-20-Crash.ll | 2 +- test/Transforms/LoopIndexSplit/2008-10-06-Crash.ll | 2 +- .../LoopIndexSplit/2008-10-10-OneIteration.ll | 2 +- test/Transforms/LoopIndexSplit/2008-11-10-Sign.ll | 2 +- .../2009-03-02-UpdateIterationSpace-crash.ll | 2 +- test/Transforms/LoopIndexSplit/2009-03-30-undef.ll | 2 +- test/Transforms/LoopIndexSplit/Crash-2007-08-17.ll | 2 +- test/Transforms/LoopIndexSplit/Crash-2007-12-03.ll | 2 +- .../Transforms/LoopIndexSplit/Crash2-2007-08-17.ll | 2 +- .../LoopIndexSplit/ExitCondition-2007-09-10.ll | 2 +- .../LoopIndexSplit/OneIterLoop-2007-08-17.ll | 2 +- .../LoopIndexSplit/OneIterLoop2-2007-08-17.ll | 2 +- .../LoopIndexSplit/OneIterLoop3-2007-08-17.ll | 2 +- test/Transforms/LoopIndexSplit/PR3913.ll | 24 + .../LoopIndexSplit/SaveLastValue-2007-08-17.ll | 2 +- .../LoopIndexSplit/SplitValue-2007-08-24-dbg.ll | 2 +- .../LoopIndexSplit/SplitValue-2007-08-24.ll | 2 +- .../LoopIndexSplit/UpperBound-2007-08-24.ll | 2 +- .../LoopIndexSplit/non-iv-cmp-operand.ll | 2 +- .../LoopRotate/2009-01-25-SingleEntryPhi.ll | 2 +- test/Transforms/LoopRotate/LRCrash-1.ll | 2 +- test/Transforms/LoopRotate/LRCrash-2.ll | 2 +- test/Transforms/LoopRotate/LRCrash-3.ll | 2 +- test/Transforms/LoopRotate/LRCrash-4.ll | 2 +- test/Transforms/LoopRotate/LRCrash-5.ll | 2 +- test/Transforms/LoopRotate/PhiRename-1.ll | 2 +- test/Transforms/LoopRotate/PhiSelfRefernce-1.ll | 2 +- test/Transforms/LoopRotate/pr2639.ll | 2 +- test/Transforms/LoopRotate/preserve-scev.ll | 47 + .../LoopSimplify/2003-04-25-AssertFail.ll | 2 +- .../2003-05-12-PreheaderExitOfChild.ll | 2 +- .../LoopSimplify/2003-08-15-PreheadersFail.ll | 2 +- .../LoopSimplify/2003-12-10-ExitBlocksProblem.ll | 2 +- .../2004-02-05-DominatorInfoCorruption.ll | 2 +- .../LoopSimplify/2004-03-15-IncorrectDomUpdate.ll | 2 +- .../LoopSimplify/2004-04-01-IncorrectDomUpdate.ll | 2 +- .../2004-04-12-LoopSimplify-SwitchBackedges.ll | 2 +- .../2004-04-13-LoopSimplifyUpdateDomFrontier.ll | 2 +- .../LoopSimplify/2007-10-28-InvokeCrash.ll | 2 +- test/Transforms/LoopSimplify/basictest.ll | 2 +- test/Transforms/LoopSimplify/hardertest.ll | 2 +- test/Transforms/LoopSimplify/merge-exits.ll | 3 +- test/Transforms/LoopSimplify/phi-node-simplify.ll | 2 +- test/Transforms/LoopSimplify/single-backedge.ll | 2 +- .../LoopStrengthReduce/2005-08-15-AddRecIV.ll | 2 +- .../2005-08-17-OutOfLoopVariant.ll | 2 +- .../2005-09-12-UsesOutOutsideOfLoop.ll | 2 +- .../LoopStrengthReduce/2007-04-23-UseIterator.ll | 2 +- .../LoopStrengthReduce/2008-08-06-CmpStride.ll | 2 +- .../LoopStrengthReduce/2008-08-13-CmpStride.ll | 2 +- .../LoopStrengthReduce/2008-08-14-ShadowIV.ll | 2 +- .../LoopStrengthReduce/2008-09-09-Overflow.ll | 2 +- .../2009-01-13-nonconstant-stride-outside-loop.ll | 4 +- .../2009-02-09-ivs-different-sizes.ll | 2 +- .../LoopStrengthReduce/2009-04-28-no-reduce-mul.ll | 2 +- .../change-compare-stride-trickiness-0.ll | 2 +- .../change-compare-stride-trickiness-1.ll | 2 +- .../change-compare-stride-trickiness-2.ll | 2 +- test/Transforms/LoopStrengthReduce/dead-phi.ll | 2 +- .../LoopStrengthReduce/different-type-ivs.ll | 2 +- .../dont-hoist-simple-loop-constants.ll | 2 +- .../dont_insert_redundant_ops.ll | 2 +- .../LoopStrengthReduce/dont_reduce_bytes.ll | 2 +- test/Transforms/LoopStrengthReduce/dont_reverse.ll | 2 +- .../LoopStrengthReduce/exit_compare_live_range.ll | 5 +- .../LoopStrengthReduce/invariant_value_first.ll | 2 +- .../invariant_value_first_arg.ll | 2 +- .../Transforms/LoopStrengthReduce/nested-reduce.ll | 2 +- .../LoopStrengthReduce/ops_after_indvar.ll | 2 +- .../phi_node_update_multiple_preds.ll | 2 +- test/Transforms/LoopStrengthReduce/pr2537.ll | 2 +- test/Transforms/LoopStrengthReduce/pr2570.ll | 2 +- test/Transforms/LoopStrengthReduce/pr3086.ll | 4 +- test/Transforms/LoopStrengthReduce/pr3399.ll | 2 +- test/Transforms/LoopStrengthReduce/pr3571.ll | 2 +- .../LoopStrengthReduce/quadradic-exit-value.ll | 2 +- .../LoopStrengthReduce/related_indvars.ll | 2 +- .../Transforms/LoopStrengthReduce/remove_indvar.ll | 2 +- .../LoopStrengthReduce/share_code_in_preheader.ll | 2 +- test/Transforms/LoopStrengthReduce/share_ivs.ll | 2 +- .../use_postinc_value_outside_loop.ll | 2 +- .../var_stride_used_by_compare.ll | 4 +- .../LoopStrengthReduce/variable_stride.ll | 2 +- .../LoopUnroll/2004-05-13-DontUnrollTooMuch.ll | 2 +- .../LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll | 2 +- .../LoopUnroll/2006-08-24-MultiBlockLoop.ll | 2 +- test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll | 2 +- .../LoopUnroll/2007-05-05-UnrollMiscomp.ll | 2 +- .../LoopUnroll/2007-05-09-UnknownTripCount.ll | 2 +- test/Transforms/LoopUnroll/2007-11-05-Crash.ll | 2 +- .../LoopUnswitch/2006-02-14-LoopSimplifyCrash.ll | 2 +- .../LoopUnswitch/2006-02-22-UnswitchCrash.ll | 2 +- .../LoopUnswitch/2006-06-13-SingleEntryPHI.ll | 2 +- .../LoopUnswitch/2006-06-27-DeadSwitchCase.ll | 2 +- .../LoopUnswitch/2007-05-09-Unreachable.ll | 2 +- test/Transforms/LoopUnswitch/2007-05-09-tl.ll | 2 +- .../LoopUnswitch/2007-07-12-ExitDomInfo.ll | 2 +- test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll | 2 +- test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll | 2 +- test/Transforms/LoopUnswitch/2007-08-01-Dom.ll | 2 +- test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll | 2 +- .../LoopUnswitch/2007-10-04-DomFrontier.ll | 2 +- test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll | 2 +- .../LoopUnswitch/2008-06-17-DomFrontier.ll | 2 +- .../LoopUnswitch/2008-11-03-Invariant.ll | 2 +- test/Transforms/LoopUnswitch/basictest.ll | 2 +- test/Transforms/LoopUnswitch/preserve-analyses.ll | 129 + test/Transforms/LowerInvoke/2003-12-10-Crash.ll | 2 +- test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll | 2 +- .../LowerInvoke/2005-08-03-InvokeWithPHI.ll | 2 +- .../LowerInvoke/2005-08-03-InvokeWithPHIUse.ll | 2 +- .../LowerInvoke/2008-02-14-CritEdgePhiCrash.ll | 2 +- test/Transforms/LowerInvoke/basictest.ll | 2 +- .../LowerSetJmp/2003-11-05-DominanceProperties.ll | 2 +- test/Transforms/LowerSetJmp/simpletest.ll | 2 +- .../LowerSwitch/2003-05-01-PHIProblem.ll | 2 +- .../LowerSwitch/2003-08-23-EmptySwitch.ll | 2 +- .../LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll | 2 +- test/Transforms/LowerSwitch/feature.ll | 2 +- .../Mem2Reg/2002-03-28-UninitializedVal.ll | 2 +- .../2002-05-01-ShouldNotPromoteThisAlloca.ll | 2 +- test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll | 2 +- .../Mem2Reg/2003-04-18-DeadBlockProblem.ll | 2 +- .../2003-04-24-MultipleIdenticalSuccessors.ll | 2 +- .../Mem2Reg/2003-06-26-IterativePromote.ll | 2 +- .../Mem2Reg/2003-10-05-DeadPHIInsertion.ll | 2 +- .../Mem2Reg/2005-06-30-ReadBeforeWrite.ll | 2 +- test/Transforms/Mem2Reg/2005-11-28-Crash.ll | 2 +- .../Mem2Reg/2007-08-27-VolatileLoadsStores.ll | 2 +- test/Transforms/Mem2Reg/PromoteMemToRegister.ll | 2 +- test/Transforms/Mem2Reg/UndefValuesMerge.ll | 2 +- test/Transforms/Mem2Reg/crash.ll | 24 + .../MemCpyOpt/2008-02-24-MultipleUseofSRet.ll | 2 +- .../MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll | 2 +- .../Transforms/MemCpyOpt/2008-04-29-SRetRemoval.ll | 2 +- test/Transforms/MemCpyOpt/align.ll | 18 + test/Transforms/MemCpyOpt/crash.ll | 45 + test/Transforms/MemCpyOpt/form-memset.ll | 4 +- test/Transforms/MemCpyOpt/form-memset2.ll | 4 +- test/Transforms/MemCpyOpt/memcpy.ll | 2 +- test/Transforms/MemCpyOpt/memmove.ll | 37 + test/Transforms/MemCpyOpt/sret.ll | 2 +- test/Transforms/MergeFunc/fold-weak.ll | 2 +- test/Transforms/MergeFunc/phi-speculation1.ll | 2 +- test/Transforms/MergeFunc/phi-speculation2.ll | 2 +- test/Transforms/PruneEH/2003-09-14-ExternalCall.ll | 2 +- test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll | 2 +- test/Transforms/PruneEH/2008-06-02-Weak.ll | 2 +- test/Transforms/PruneEH/2008-09-05-CGUpdate.ll | 2 +- test/Transforms/PruneEH/recursivetest.ll | 2 +- test/Transforms/PruneEH/simplenoreturntest.ll | 2 +- test/Transforms/PruneEH/simpletest.ll | 2 +- .../RaiseAllocations/2004-11-08-FreeUseCrash.ll | 2 +- .../RaiseAllocations/2007-10-17-InvokeFree.ll | 2 +- .../RaiseAllocations/FreeCastConstantExpr.ll | 2 +- .../Reassociate/2002-05-15-AgressiveSubMove.ll | 2 +- .../Reassociate/2002-05-15-MissedTree.ll | 2 +- .../Reassociate/2002-05-15-SubReassociate.ll | 2 +- .../Reassociate/2002-05-15-SubReassociate2.ll | 2 +- .../Reassociate/2002-07-09-DominanceProblem.ll | 2 +- .../Reassociate/2003-08-12-InfiniteLoop.ll | 2 +- test/Transforms/Reassociate/2005-08-24-Crash.ll | 2 +- .../Reassociate/2005-09-01-ArrayOutOfBounds.ll | 2 +- .../Reassociate/2006-04-27-ReassociateVector.ll | 2 +- test/Transforms/Reassociate/basictest.ll | 2 +- test/Transforms/Reassociate/basictest2.ll | 2 +- test/Transforms/Reassociate/basictest3.ll | 2 +- test/Transforms/Reassociate/basictest4.ll | 2 +- test/Transforms/Reassociate/inverses.ll | 2 +- test/Transforms/Reassociate/looptest.ll | 2 +- test/Transforms/Reassociate/mul-factor3.ll | 3 +- test/Transforms/Reassociate/mul-neg-add.ll | 2 +- test/Transforms/Reassociate/mulfactor.ll | 2 +- test/Transforms/Reassociate/mulfactor2.ll | 3 +- test/Transforms/Reassociate/negation.ll | 2 +- test/Transforms/Reassociate/otherops.ll | 2 +- test/Transforms/Reassociate/shift-factor.ll | 3 +- test/Transforms/Reassociate/shifttest.ll | 2 +- test/Transforms/Reassociate/subtest.ll | 2 +- test/Transforms/Reassociate/subtest2.ll | 2 +- test/Transforms/SCCP/2002-05-02-EdgeFailure.ll | 2 +- test/Transforms/SCCP/2002-05-02-MissSecondInst.ll | 2 +- .../SCCP/2002-05-20-MissedIncomingValue.ll | 2 +- test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll | 2 +- .../SCCP/2002-08-30-GetElementPtrTest.ll | 2 +- .../SCCP/2003-06-24-OverdefinedPHIValue.ll | 2 +- test/Transforms/SCCP/2003-08-26-InvokeHandling.ll | 2 +- test/Transforms/SCCP/2004-11-16-DeadInvoke.ll | 2 +- test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll | 2 +- test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll | 2 +- test/Transforms/SCCP/2006-12-04-PackedType.ll | 2 +- test/Transforms/SCCP/2006-12-19-UndefBug.ll | 2 +- test/Transforms/SCCP/2007-05-16-InvokeCrash.ll | 4 +- test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll | 2 +- test/Transforms/SCCP/2008-03-10-sret.ll | 2 +- .../SCCP/2008-04-22-multiple-ret-sccp.ll | 2 +- test/Transforms/SCCP/2008-05-23-UndefCallFold.ll | 2 +- test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll | 4 +- .../SCCP/2009-05-27-VectorOperandZero.ll | 2 +- test/Transforms/SCCP/apint-array.ll | 2 +- test/Transforms/SCCP/apint-basictest.ll | 2 +- test/Transforms/SCCP/apint-basictest2.ll | 4 +- test/Transforms/SCCP/apint-basictest3.ll | 4 +- test/Transforms/SCCP/apint-basictest4.ll | 6 +- test/Transforms/SCCP/apint-bigarray.ll | 2 +- test/Transforms/SCCP/apint-bigint.ll | 2 +- test/Transforms/SCCP/apint-bigint2.ll | 2 +- test/Transforms/SCCP/apint-ipsccp1.ll | 2 +- test/Transforms/SCCP/apint-ipsccp2.ll | 2 +- test/Transforms/SCCP/apint-ipsccp3.ll | 2 +- test/Transforms/SCCP/apint-ipsccp4.ll | 6 +- test/Transforms/SCCP/apint-load.ll | 4 +- test/Transforms/SCCP/apint-phi.ll | 2 +- test/Transforms/SCCP/apint-select.ll | 2 +- test/Transforms/SCCP/basictest.ll | 2 +- test/Transforms/SCCP/calltest.ll | 3 +- test/Transforms/SCCP/ipsccp-basic.ll | 2 +- test/Transforms/SCCP/ipsccp-conditional.ll | 2 +- test/Transforms/SCCP/ipsccp-gvar.ll | 2 +- test/Transforms/SCCP/loadtest.ll | 2 +- test/Transforms/SCCP/logical-nuke.ll | 2 +- test/Transforms/SCCP/phitest.ll | 3 +- test/Transforms/SCCP/sccptest.ll | 2 +- test/Transforms/SCCP/select.ll | 2 +- .../SRETPromotion/2008-03-11-attributes.ll | 2 +- .../2008-06-04-function-pointer-passing.ll | 2 +- .../SRETPromotion/2008-06-05-non-call-use.ll | 2 +- test/Transforms/SRETPromotion/basictest.ll | 2 +- test/Transforms/SSI/2009-07-09-Invoke.ll | 71 + test/Transforms/SSI/2009-08-15-UnreachableBB.ll | 19 + test/Transforms/SSI/2009-08-17-CritEdge.ll | 15 + test/Transforms/SSI/2009-08-19-UnreachableBB2.ll | 22 + test/Transforms/SSI/dg.exp | 3 + test/Transforms/SSI/ssiphi.ll | 22 + test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll | 2 +- .../ScalarRepl/2003-05-30-InvalidIndices.ll | 2 +- .../Transforms/ScalarRepl/2003-05-30-MultiLevel.ll | 2 +- .../ScalarRepl/2003-09-12-IncorrectPromote.ll | 2 +- .../ScalarRepl/2003-10-29-ArrayProblem.ll | 2 +- .../ScalarRepl/2005-12-14-UnionPromoteCrash.ll | 2 +- .../2006-01-24-IllegalUnionPromoteCrash.ll | 2 +- .../ScalarRepl/2006-04-20-PromoteCrash.ll | 2 +- .../ScalarRepl/2006-10-23-PointerUnionCrash.ll | 2 +- .../ScalarRepl/2006-11-07-InvalidArrayPromote.ll | 2 +- .../Transforms/ScalarRepl/2006-12-11-SROA-Crash.ll | 2 +- .../ScalarRepl/2007-03-19-CanonicalizeMemcpy.ll | 2 +- .../ScalarRepl/2007-05-24-LargeAggregate.ll | 2 +- .../ScalarRepl/2007-05-29-MemcpyPreserve.ll | 2 +- .../ScalarRepl/2007-11-03-bigendian_apint.ll | 2 +- .../Transforms/ScalarRepl/2008-01-29-PromoteBug.ll | 2 +- .../2008-02-28-SubElementExtractCrash.ll | 2 +- .../ScalarRepl/2008-06-05-loadstore-agg.ll | 2 +- .../Transforms/ScalarRepl/2008-06-22-LargeArray.ll | 2 +- .../2008-08-22-out-of-range-array-promote.ll | 2 +- .../Transforms/ScalarRepl/2008-09-22-vector-gep.ll | 2 +- .../ScalarRepl/2009-01-09-scalarrepl-empty.ll | 2 +- .../2009-02-02-ScalarPromoteOutOfRange.ll | 2 +- test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll | 2 +- .../ScalarRepl/2009-03-04-MemCpyAlign.ll | 2 +- .../ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll | 2 +- test/Transforms/ScalarRepl/2009-03-17-CleanUp.ll | 2 +- .../ScalarRepl/2009-04-21-ZeroLengthMemSet.ll | 2 +- test/Transforms/ScalarRepl/2009-05-08-I1Crash.ll | 2 +- .../ScalarRepl/2009-06-01-BitcastIntPadding.ll | 2 +- test/Transforms/ScalarRepl/2009-08-16-VLA.ll | 23 + test/Transforms/ScalarRepl/AggregatePromote.ll | 2 +- test/Transforms/ScalarRepl/DifferingTypes.ll | 2 +- test/Transforms/ScalarRepl/arraytest.ll | 2 +- test/Transforms/ScalarRepl/badarray.ll | 2 +- test/Transforms/ScalarRepl/basictest.ll | 2 +- test/Transforms/ScalarRepl/bitfield-sroa.ll | 2 +- test/Transforms/ScalarRepl/copy-aggregate.ll | 2 +- test/Transforms/ScalarRepl/debuginfo.ll | 2 +- test/Transforms/ScalarRepl/load-store-aggregate.ll | 2 +- test/Transforms/ScalarRepl/memcpy-from-global.ll | 2 +- .../ScalarRepl/memset-aggregate-byte-leader.ll | 4 +- test/Transforms/ScalarRepl/memset-aggregate.ll | 6 +- test/Transforms/ScalarRepl/not-a-vector.ll | 6 +- test/Transforms/ScalarRepl/phinodepromote.ll | 2 +- test/Transforms/ScalarRepl/select_promote.ll | 2 +- test/Transforms/ScalarRepl/sroa-fca.ll | 2 +- test/Transforms/ScalarRepl/sroa_two.ll | 2 +- test/Transforms/ScalarRepl/union-fp-int.ll | 4 +- test/Transforms/ScalarRepl/union-packed.ll | 4 +- test/Transforms/ScalarRepl/union-pointer.ll | 4 +- test/Transforms/ScalarRepl/vector_memcpy.ll | 2 +- test/Transforms/ScalarRepl/vector_promote.ll | 4 +- test/Transforms/ScalarRepl/volatile.ll | 4 +- .../SimplifyCFG/2002-05-05-EmptyBlockMerge.ll | 2 +- .../SimplifyCFG/2002-05-21-PHIElimination.ll | 2 +- test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll | 2 +- .../SimplifyCFG/2002-09-24-PHIAssertion.ll | 2 +- .../SimplifyCFG/2003-03-07-DominateProblem.ll | 2 +- .../SimplifyCFG/2003-08-05-InvokeCrash.ll | 2 +- .../SimplifyCFG/2003-08-05-MishandleInvoke.ll | 2 +- .../SimplifyCFG/2003-08-17-BranchFold.ll | 2 +- .../SimplifyCFG/2003-08-17-BranchFoldOrdering.ll | 2 +- .../SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll | 2 +- .../SimplifyCFG/2003-08-17-FoldSwitch.ll | 2 +- .../SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll | 2 +- test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll | 2 +- .../SimplifyCFG/2005-08-01-PHIUpdateFail.ll | 2 +- .../SimplifyCFG/2005-08-03-PHIFactorCrash.ll | 2 +- .../SimplifyCFG/2005-10-02-InvokeSimplify.ll | 2 +- .../SimplifyCFG/2005-12-03-IncorrectPHIFold.ll | 2 +- .../SimplifyCFG/2006-02-17-InfiniteUnroll.ll | 2 +- test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll | 2 +- test/Transforms/SimplifyCFG/2006-08-03-Crash.ll | 2 +- .../Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll | 2 +- .../SimplifyCFG/2006-10-29-InvokeCrash.ll | 2 +- .../SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll | 2 +- .../SimplifyCFG/2007-11-22-InvokeNoUnwind.ll | 2 +- test/Transforms/SimplifyCFG/2007-12-21-Crash.ll | 2 +- .../SimplifyCFG/2008-01-02-hoist-fp-add.ll | 2 +- .../2008-04-23-MergeMultipleResultRet.ll | 2 +- .../SimplifyCFG/2008-04-27-MultipleReturnCrash.ll | 2 +- .../SimplifyCFG/2008-05-16-PHIBlockMerge.ll | 2 +- .../SimplifyCFG/2008-07-13-InfLoopMiscompile.ll | 2 +- .../SimplifyCFG/2008-09-08-MultiplePred.ll | 2 +- .../SimplifyCFG/2008-09-17-SpeculativeHoist.ll | 2 +- .../2008-10-03-SpeculativelyExecuteBeforePHI.ll | 2 +- .../SimplifyCFG/2008-12-06-SingleEntryPhi.ll | 2 +- test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll | 2 +- .../SimplifyCFG/2009-01-18-PHIPropCrash.ll | 3 +- ...2009-01-19-UnconditionalTrappingConstantExpr.ll | 2 +- .../2009-03-05-Speculative-Hoist-Dbg.ll | 2 +- .../SimplifyCFG/2009-05-12-externweak.ll | 2 +- .../SimplifyCFG/2009-06-15-InvokeCrash.ll | 2 +- test/Transforms/SimplifyCFG/BrUnwind.ll | 2 +- test/Transforms/SimplifyCFG/DeadSetCC.ll | 2 +- .../SimplifyCFG/EqualPHIEdgeBlockMerge.ll | 2 +- test/Transforms/SimplifyCFG/HoistCode.ll | 2 +- test/Transforms/SimplifyCFG/PhiBlockMerge.ll | 2 +- test/Transforms/SimplifyCFG/PhiBlockMerge2.ll | 2 +- test/Transforms/SimplifyCFG/PhiEliminate.ll | 2 +- test/Transforms/SimplifyCFG/PhiEliminate2.ll | 2 +- test/Transforms/SimplifyCFG/PhiNoEliminate.ll | 2 +- test/Transforms/SimplifyCFG/SpeculativeExec.ll | 4 +- .../Transforms/SimplifyCFG/UncondBranchToReturn.ll | 2 +- .../Transforms/SimplifyCFG/UnreachableEliminate.ll | 2 +- test/Transforms/SimplifyCFG/basictest.ll | 2 +- .../SimplifyCFG/branch-branch-dbginfo.ll | 2 +- test/Transforms/SimplifyCFG/branch-cond-merge.ll | 4 +- test/Transforms/SimplifyCFG/branch-cond-prop.ll | 2 +- test/Transforms/SimplifyCFG/branch-fold-test.ll | 2 +- test/Transforms/SimplifyCFG/branch-fold.ll | 2 +- test/Transforms/SimplifyCFG/branch-phi-thread.ll | 2 +- test/Transforms/SimplifyCFG/branch_fold_dbg.ll | 2 +- test/Transforms/SimplifyCFG/dbginfo.ll | 4 +- .../SimplifyCFG/hoist-common-code.dbg.ll | 2 +- test/Transforms/SimplifyCFG/hoist-common-code.ll | 2 +- test/Transforms/SimplifyCFG/invoke_unwind.ll | 33 + test/Transforms/SimplifyCFG/iterative-simplify.ll | 2 +- test/Transforms/SimplifyCFG/noreturn-call.ll | 2 +- test/Transforms/SimplifyCFG/return-merge.ll | 2 +- .../SimplifyCFG/switch-simplify-crash.ll | 2 +- test/Transforms/SimplifyCFG/switch_create.ll | 2 +- .../Transforms/SimplifyCFG/switch_formation.dbg.ll | 3 +- test/Transforms/SimplifyCFG/switch_formation.ll | 3 +- test/Transforms/SimplifyCFG/switch_switch_fold.ll | 2 +- .../SimplifyCFG/switch_switch_fold_dbginfo.ll | 2 +- test/Transforms/SimplifyCFG/switch_thread.ll | 2 +- .../SimplifyCFG/trapping-load-unreachable.ll | 2 +- .../SimplifyCFG/two-entry-phi-return.dbg.ll | 2 +- .../Transforms/SimplifyCFG/two-entry-phi-return.ll | 2 +- .../SimplifyLibCalls/2005-05-20-sprintf-crash.ll | 2 +- .../2007-04-06-strchr-miscompile.ll | 2 +- .../SimplifyLibCalls/2008-05-19-memcmp.ll | 2 +- .../SimplifyLibCalls/2009-01-04-Annotate.ll | 2 +- .../SimplifyLibCalls/2009-02-11-NotInitialized.ll | 2 +- .../SimplifyLibCalls/2009-02-12-StrTo.ll | 2 +- .../SimplifyLibCalls/2009-05-30-memcmp-byte.ll | 2 +- .../Transforms/SimplifyLibCalls/2009-07-28-Exit.ll | 22 + .../SimplifyLibCalls/2009-07-29-Exit2.ll | 24 + test/Transforms/SimplifyLibCalls/FFS.ll | 2 +- test/Transforms/SimplifyLibCalls/FPrintF.ll | 7 +- test/Transforms/SimplifyLibCalls/IsDigit.ll | 2 +- test/Transforms/SimplifyLibCalls/MemCpy.ll | 2 +- test/Transforms/SimplifyLibCalls/Printf.ll | 4 +- test/Transforms/SimplifyLibCalls/Puts.ll | 7 +- test/Transforms/SimplifyLibCalls/SPrintF.ll | 6 +- test/Transforms/SimplifyLibCalls/StrCat.ll | 8 +- test/Transforms/SimplifyLibCalls/StrChr.ll | 6 +- test/Transforms/SimplifyLibCalls/StrCmp.ll | 2 +- test/Transforms/SimplifyLibCalls/StrCpy.ll | 6 +- test/Transforms/SimplifyLibCalls/StrLen.ll | 2 +- test/Transforms/SimplifyLibCalls/StrNCat.ll | 8 +- test/Transforms/SimplifyLibCalls/StrNCmp.ll | 2 +- test/Transforms/SimplifyLibCalls/StrNCpy.ll | 6 +- test/Transforms/SimplifyLibCalls/ToAscii.ll | 2 +- test/Transforms/SimplifyLibCalls/abs.ll | 2 +- test/Transforms/SimplifyLibCalls/exp2.ll | 2 +- test/Transforms/SimplifyLibCalls/floor.ll | 2 +- test/Transforms/SimplifyLibCalls/half-powr.ll | 7 +- test/Transforms/SimplifyLibCalls/memcmp.ll | 2 +- test/Transforms/SimplifyLibCalls/memmove.ll | 2 +- test/Transforms/SimplifyLibCalls/memset-64.ll | 12 + test/Transforms/SimplifyLibCalls/memset.ll | 2 +- test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll | 33 + test/Transforms/SimplifyLibCalls/pow2.ll | 2 +- test/Transforms/SimplifyLibCalls/weak-symbols.ll | 26 + .../StripSymbols/2007-01-15-llvm.used.ll | 4 +- test/Transforms/TailCallElim/accum_recursion.ll | 2 +- .../TailCallElim/accum_recursion_constant_arg.ll | 2 +- test/Transforms/TailCallElim/ackermann.ll | 3 +- .../TailCallElim/dont-tce-tail-marked-call.ll | 2 +- test/Transforms/TailCallElim/dont_reorder_load.ll | 2 +- test/Transforms/TailCallElim/inf-recursion.ll | 2 +- test/Transforms/TailCallElim/intervening-inst.ll | 2 +- .../TailCallElim/move_alloca_for_tail_call.ll | 6 +- test/Transforms/TailCallElim/reorder_load.ll | 2 +- test/Transforms/TailCallElim/return_constant.ll | 2 +- .../TailCallElim/trivial_codegen_tailcall.ll | 2 +- test/Transforms/TailDup/2003-06-24-Simpleloop.ll | 2 +- test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll | 2 +- .../TailDup/2003-08-23-InvalidatedPointers.ll | 2 +- .../TailDup/2003-08-31-UnreachableBlocks.ll | 2 +- .../TailDup/2004-04-01-DemoteRegToStack.ll | 2 +- test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll | 2 +- .../TailDup/2008-06-11-AvoidDupLoopHeader.ll | 2 +- test/Transforms/TailDup/2009-07-31-phicrash.ll | 14 + test/Transforms/TailDup/MergeTest.ll | 2 +- test/Transforms/TailDup/PHIUpdateTest.ll | 2 +- test/Transforms/TailDup/basictest.ll | 2 +- test/Transforms/TailDup/basictest2.ll | 2 +- test/Transforms/TailDup/if-tail-dup.ll | 4 +- test/Unit/lit.cfg | 65 + test/Verifier/2008-03-01-AllocaSized.ll | 2 +- test/Verifier/2008-08-22-MemCpyAlignment.ll | 2 +- test/Verifier/SelfReferential.ll | 2 +- test/Verifier/aliasing-chain.ll | 2 +- test/Verifier/byval-4.ll | 2 +- test/Verifier/invoke-2.ll | 2 +- test/lib/llvm.exp | 26 +- test/lib/llvm2cpp.exp | 6 +- test/lit.cfg | 155 + test/lit.site.cfg.in | 9 + test/site.exp.in | 27 + tools/CMakeLists.txt | 17 +- tools/Makefile | 4 +- tools/bugpoint/BugDriver.cpp | 83 +- tools/bugpoint/BugDriver.h | 6 +- tools/bugpoint/CrashDebugger.cpp | 78 +- tools/bugpoint/ExecutionDriver.cpp | 120 +- tools/bugpoint/ExtractFunction.cpp | 61 +- tools/bugpoint/FindBugs.cpp | 36 +- tools/bugpoint/ListReducer.h | 14 +- tools/bugpoint/Miscompilation.cpp | 327 +- tools/bugpoint/OptimizerDriver.cpp | 127 +- tools/bugpoint/ToolRunner.cpp | 301 +- tools/bugpoint/ToolRunner.h | 21 +- tools/bugpoint/bugpoint.cpp | 75 +- tools/gold/Makefile | 5 +- tools/gold/gold-plugin.cpp | 5 +- tools/llc/CMakeLists.txt | 2 +- tools/llc/Makefile | 2 +- tools/llc/llc.cpp | 204 +- tools/lli/lli.cpp | 53 +- tools/llvm-ar/llvm-ar.cpp | 39 +- tools/llvm-as/llvm-as.cpp | 129 +- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 370 +- tools/llvm-config/CMakeLists.txt | 36 +- tools/llvm-config/llvm-config.in.in | 31 +- tools/llvm-dis/llvm-dis.cpp | 144 +- tools/llvm-extract/CMakeLists.txt | 2 +- tools/llvm-extract/Makefile | 2 +- tools/llvm-extract/llvm-extract.cpp | 67 +- tools/llvm-ld/Optimize.cpp | 6 +- tools/llvm-ld/llvm-ld.cpp | 103 +- tools/llvm-link/CMakeLists.txt | 2 +- tools/llvm-link/Makefile | 2 +- tools/llvm-link/llvm-link.cpp | 100 +- tools/llvm-mc/AsmCond.h | 40 + tools/llvm-mc/AsmLexer.cpp | 185 +- tools/llvm-mc/AsmLexer.h | 95 +- tools/llvm-mc/AsmParser.cpp | 1477 +++- tools/llvm-mc/AsmParser.h | 162 +- tools/llvm-mc/CMakeLists.txt | 4 +- tools/llvm-mc/Makefile | 11 +- tools/llvm-mc/llvm-mc.cpp | 235 +- tools/llvm-nm/llvm-nm.cpp | 48 +- tools/llvm-prof/llvm-prof.cpp | 329 +- tools/llvm-ranlib/llvm-ranlib.cpp | 9 +- tools/llvm-stub/llvm-stub.c | 5 +- tools/llvmc/doc/LLVMC-Reference.rst | 42 +- tools/llvmc/example/Hello/Hello.cpp | 5 +- tools/llvmc/example/mcc16/driver/Main.cpp | 27 +- .../example/mcc16/plugins/PIC16Base/PIC16Base.td | 6 +- .../example/mcc16/plugins/PIC16Base/PluginMain.cpp | 18 +- tools/llvmc/plugins/Base/Base.td.in | 32 +- tools/lto/LTOCodeGenerator.cpp | 136 +- tools/lto/LTOCodeGenerator.h | 4 +- tools/lto/LTOModule.cpp | 129 +- tools/lto/Makefile | 2 +- tools/lto/lto.cpp | 8 - tools/opt/AnalysisWrappers.cpp | 45 +- tools/opt/CMakeLists.txt | 2 +- tools/opt/GraphPrinters.cpp | 8 +- tools/opt/Makefile | 2 +- tools/opt/PrintSCC.cpp | 28 +- tools/opt/opt.cpp | 201 +- unittests/ADT/APFloatTest.cpp | 517 ++ unittests/ADT/APIntTest.cpp | 183 +- unittests/ADT/SmallStringTest.cpp | 48 + unittests/ADT/SmallVectorTest.cpp | 27 +- unittests/ADT/SparseBitVectorTest.cpp | 36 + unittests/ADT/StringMapTest.cpp | 24 +- unittests/ADT/StringRefTest.cpp | 155 + unittests/ADT/TripleTest.cpp | 84 +- unittests/ADT/TwineTest.cpp | 75 + unittests/ExecutionEngine/ExecutionEngineTest.cpp | 129 + .../ExecutionEngine/JIT/JITEventListenerTest.cpp | 14 +- .../ExecutionEngine/JIT/JITMemoryManagerTest.cpp | 277 + unittests/ExecutionEngine/JIT/JITTest.cpp | 277 + unittests/ExecutionEngine/Makefile | 7 +- unittests/Makefile | 9 +- unittests/Makefile.unittest | 6 +- unittests/Support/AllocatorTest.cpp | 143 + unittests/Support/CommandLineTest.cpp | 60 + unittests/Support/ConstantRangeTest.cpp | 351 + unittests/Support/MathExtrasTest.cpp | 2 +- unittests/Support/RegexTest.cpp | 65 + unittests/Support/TypeBuilderTest.cpp | 281 +- unittests/Support/ValueHandleTest.cpp | 118 +- unittests/Support/raw_ostream_test.cpp | 45 + unittests/Transforms/Makefile | 17 + unittests/Transforms/Utils/Cloning.cpp | 87 + unittests/Transforms/Utils/Makefile | 15 + unittests/VMCore/ConstantsTest.cpp | 5 +- unittests/VMCore/MetadataTest.cpp | 112 +- unittests/VMCore/PassManagerTest.cpp | 36 +- utils/FileCheck/CMakeLists.txt | 11 + utils/FileCheck/FileCheck.cpp | 624 ++ utils/FileCheck/Makefile | 21 + utils/FileUpdate/CMakeLists.txt | 11 + utils/FileUpdate/FileUpdate.cpp | 86 + utils/FileUpdate/Makefile | 21 + utils/Makefile | 2 +- utils/NewNightlyTest.pl | 21 +- utils/PerfectShuffle/PerfectShuffle.cpp | 164 +- utils/TableGen/AsmMatcherEmitter.cpp | 1545 ++++ utils/TableGen/AsmMatcherEmitter.h | 33 + utils/TableGen/AsmWriterEmitter.cpp | 298 +- utils/TableGen/AsmWriterEmitter.h | 3 + utils/TableGen/CMakeLists.txt | 1 + utils/TableGen/CallingConvEmitter.cpp | 16 +- utils/TableGen/CodeEmitterGen.cpp | 12 +- utils/TableGen/CodeGenDAGPatterns.cpp | 198 +- utils/TableGen/CodeGenDAGPatterns.h | 57 +- utils/TableGen/CodeGenInstruction.cpp | 6 +- utils/TableGen/CodeGenInstruction.h | 2 + utils/TableGen/CodeGenTarget.cpp | 84 +- utils/TableGen/CodeGenTarget.h | 4 + utils/TableGen/DAGISelEmitter.cpp | 100 +- utils/TableGen/FastISelEmitter.cpp | 15 +- utils/TableGen/InstrInfoEmitter.cpp | 10 +- utils/TableGen/IntrinsicEmitter.cpp | 47 +- utils/TableGen/LLVMCConfigurationEmitter.cpp | 502 +- utils/TableGen/Record.cpp | 62 +- utils/TableGen/Record.h | 51 +- utils/TableGen/RegisterInfoEmitter.cpp | 17 +- utils/TableGen/StringToOffsetTable.h | 76 + utils/TableGen/SubtargetEmitter.cpp | 154 +- utils/TableGen/SubtargetEmitter.h | 8 +- utils/TableGen/TGParser.cpp | 2 +- utils/TableGen/TGValueTypes.cpp | 45 +- utils/TableGen/TableGen.cpp | 24 +- utils/UpdateCMakeLists.pl | 118 + utils/bugpoint/RemoteRunSafely.sh | 105 + utils/buildit/GNUmakefile | 1 + utils/buildit/build_llvm | 34 +- utils/count/CMakeLists.txt | 3 + utils/count/Makefile | 20 + utils/count/count.c | 48 + utils/crosstool/ARM/build-install-linux.sh | 53 +- utils/crosstool/create-snapshots.sh | 28 +- utils/emacs/emacs.el | 7 +- utils/lit/LitConfig.py | 71 + utils/lit/LitFormats.py | 2 + utils/lit/ProgressBar.py | 267 + utils/lit/ShCommands.py | 85 + utils/lit/ShUtil.py | 346 + utils/lit/TODO | 19 + utils/lit/TclUtil.py | 322 + utils/lit/Test.py | 71 + utils/lit/TestFormats.py | 144 + utils/lit/TestRunner.py | 505 ++ utils/lit/TestingConfig.py | 96 + utils/lit/Util.py | 124 + utils/lit/lit.py | 531 ++ utils/llvm.grm | 19 +- utils/llvmdo | 3 +- utils/llvmgrep | 2 +- utils/not/CMakeLists.txt | 11 + utils/not/Makefile | 21 + utils/not/not.cpp | 17 + utils/unittest/Makefile | 2 +- utils/unittest/UnitTestMain/Makefile | 21 + utils/unittest/UnitTestMain/TestMain.cpp | 15 + utils/unittest/googletest/Makefile | 6 +- utils/unittest/googletest/README.LLVM | 5 + .../include/gtest/internal/gtest-internal.h | 22 +- .../googletest/include/gtest/internal/gtest-port.h | 4 +- utils/valgrind/x86_64-pc-linux-gnu_gcc-4.3.3.supp | 23 + utils/vim/llvm.vim | 8 +- win32/unistd.h | 2 +- 5530 files changed, 217439 insertions(+), 66128 deletions(-) create mode 100644 autoconf/m4/linux_mixed_64_32.m4 create mode 100644 bindings/ada/analysis/llvm_analysis-binding.ads create mode 100644 bindings/ada/analysis/llvm_analysis.ads create mode 100644 bindings/ada/analysis/llvm_analysis_wrap.cxx create mode 100644 bindings/ada/bitreader/llvm_bit_reader-binding.ads create mode 100644 bindings/ada/bitreader/llvm_bit_reader.ads create mode 100644 bindings/ada/bitreader/llvm_bitreader_wrap.cxx create mode 100644 bindings/ada/bitwriter/llvm_bit_writer-binding.ads create mode 100644 bindings/ada/bitwriter/llvm_bit_writer.ads create mode 100644 bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx create mode 100644 bindings/ada/executionengine/llvm_execution_engine-binding.ads create mode 100644 bindings/ada/executionengine/llvm_execution_engine.ads create mode 100644 bindings/ada/executionengine/llvm_executionengine_wrap.cxx create mode 100644 bindings/ada/llvm.gpr create mode 100644 bindings/ada/llvm/llvm-binding.ads create mode 100644 bindings/ada/llvm/llvm.ads create mode 100644 bindings/ada/llvm/llvm_link_time_optimizer-binding.ads create mode 100644 bindings/ada/llvm/llvm_link_time_optimizer.ads create mode 100644 bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx create mode 100644 bindings/ada/llvm/llvm_wrap.cxx create mode 100644 bindings/ada/target/llvm_target-binding.ads create mode 100644 bindings/ada/target/llvm_target.ads create mode 100644 bindings/ada/target/llvm_target_wrap.cxx create mode 100644 bindings/ada/transforms/llvm_transforms-binding.ads create mode 100644 bindings/ada/transforms/llvm_transforms.ads create mode 100644 bindings/ada/transforms/llvm_transforms_wrap.cxx create mode 100644 cmake/modules/CheckAtomic.cmake create mode 100644 cmake/modules/LLVMLibDeps.cmake create mode 100644 docs/CommandGuide/FileCheck.pod create mode 100644 docs/CommandGuide/lit.pod create mode 100644 docs/DebuggingJITedCode.html create mode 100644 docs/HistoricalNotes/2007-OriginalClangReadme.txt create mode 100644 docs/re_format.7 create mode 100644 examples/Kaleidoscope/Chapter2/CMakeLists.txt create mode 100644 examples/Kaleidoscope/Chapter2/Makefile create mode 100644 examples/Kaleidoscope/Chapter2/toy.cpp create mode 100644 examples/Kaleidoscope/Chapter3/CMakeLists.txt create mode 100644 examples/Kaleidoscope/Chapter3/Makefile create mode 100644 examples/Kaleidoscope/Chapter3/toy.cpp create mode 100644 examples/Kaleidoscope/Chapter4/CMakeLists.txt create mode 100644 examples/Kaleidoscope/Chapter4/Makefile create mode 100644 examples/Kaleidoscope/Chapter4/toy.cpp create mode 100644 examples/Kaleidoscope/Chapter5/CMakeLists.txt create mode 100644 examples/Kaleidoscope/Chapter5/Makefile create mode 100644 examples/Kaleidoscope/Chapter5/toy.cpp create mode 100644 examples/Kaleidoscope/Chapter6/CMakeLists.txt create mode 100644 examples/Kaleidoscope/Chapter6/Makefile create mode 100644 examples/Kaleidoscope/Chapter6/toy.cpp create mode 100644 examples/Kaleidoscope/Chapter7/CMakeLists.txt create mode 100644 examples/Kaleidoscope/Chapter7/Makefile create mode 100644 examples/Kaleidoscope/Chapter7/toy.cpp create mode 100644 include/llvm/ADT/DenseMapInfo.h create mode 100644 include/llvm/ADT/StringRef.h create mode 100644 include/llvm/ADT/Twine.h create mode 100644 include/llvm/Analysis/InlineCost.h create mode 100644 include/llvm/Analysis/MallocHelper.h create mode 100644 include/llvm/Analysis/PointerTracking.h create mode 100644 include/llvm/CodeGen/MachineFunctionAnalysis.h create mode 100644 include/llvm/CodeGen/MachineModuleInfoImpls.h create mode 100644 include/llvm/CodeGen/ObjectCodeEmitter.h create mode 100644 include/llvm/Config/AsmParsers.def.in create mode 100644 include/llvm/IntrinsicsBlackfin.td create mode 100644 include/llvm/MC/MCAsmInfo.h create mode 100644 include/llvm/MC/MCAsmInfoCOFF.h create mode 100644 include/llvm/MC/MCAsmInfoDarwin.h create mode 100644 include/llvm/MC/MCAsmLexer.h create mode 100644 include/llvm/MC/MCAsmParser.h create mode 100644 include/llvm/MC/MCAssembler.h create mode 100644 include/llvm/MC/MCCodeEmitter.h create mode 100644 include/llvm/MC/MCDisassembler.h create mode 100644 include/llvm/MC/MCExpr.h create mode 100644 include/llvm/MC/MCInstPrinter.h create mode 100644 include/llvm/MC/MCSectionELF.h create mode 100644 include/llvm/MC/MCSectionMachO.h create mode 100644 include/llvm/MC/SectionKind.h create mode 100644 include/llvm/Metadata.h create mode 100644 include/llvm/Operator.h create mode 100644 include/llvm/Support/ErrorHandling.h create mode 100644 include/llvm/Support/FormattedStream.h create mode 100644 include/llvm/Support/IRReader.h create mode 100644 include/llvm/Support/MemoryObject.h create mode 100644 include/llvm/Support/Regex.h create mode 100644 include/llvm/Support/raw_os_ostream.h create mode 100644 include/llvm/Target/TargetAsmParser.h create mode 100644 include/llvm/Target/TargetLoweringObjectFile.h create mode 100644 include/llvm/Target/TargetRegistry.h create mode 100644 include/llvm/Transforms/Utils/SSAUpdater.h create mode 100644 lib/Analysis/InlineCost.cpp create mode 100644 lib/Analysis/MallocHelper.cpp create mode 100644 lib/Analysis/PointerTracking.cpp create mode 100644 lib/Analysis/ProfileEstimatorPass.cpp create mode 100644 lib/Analysis/ProfileVerifierPass.cpp create mode 100644 lib/Analysis/README.txt create mode 100644 lib/Analysis/ScalarEvolutionAliasAnalysis.cpp create mode 100644 lib/CodeGen/BranchFolding.h create mode 100644 lib/CodeGen/ExactHazardRecognizer.cpp create mode 100644 lib/CodeGen/ExactHazardRecognizer.h create mode 100644 lib/CodeGen/MachineFunctionAnalysis.cpp create mode 100644 lib/CodeGen/MachineFunctionPass.cpp create mode 100644 lib/CodeGen/MachineModuleInfoImpls.cpp create mode 100644 lib/CodeGen/ObjectCodeEmitter.cpp create mode 100644 lib/CodeGen/PBQP/AnnotatedGraph.h create mode 100644 lib/CodeGen/PBQP/ExhaustiveSolver.h create mode 100644 lib/CodeGen/PBQP/GraphBase.h create mode 100644 lib/CodeGen/PBQP/HeuristicSolver.h create mode 100644 lib/CodeGen/PBQP/Heuristics/Briggs.h create mode 100644 lib/CodeGen/PBQP/PBQPMath.h create mode 100644 lib/CodeGen/PBQP/SimpleGraph.h create mode 100644 lib/CodeGen/PBQP/Solution.h create mode 100644 lib/CodeGen/PBQP/Solver.h create mode 100644 lib/CodeGen/PHIElimination.h create mode 100644 lib/CodeGen/SelectionDAG/InstrEmitter.cpp create mode 100644 lib/CodeGen/SelectionDAG/InstrEmitter.h create mode 100644 lib/CodeGen/SimpleHazardRecognizer.h create mode 100644 lib/CodeGen/SjLjEHPrepare.cpp create mode 100644 lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp create mode 100644 lib/ExecutionEngine/JIT/JITDebugRegisterer.h create mode 100644 lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp create mode 100644 lib/MC/MCAsmInfo.cpp create mode 100644 lib/MC/MCAsmInfoCOFF.cpp create mode 100644 lib/MC/MCAsmInfoDarwin.cpp create mode 100644 lib/MC/MCAsmLexer.cpp create mode 100644 lib/MC/MCAsmParser.cpp create mode 100644 lib/MC/MCAssembler.cpp create mode 100644 lib/MC/MCCodeEmitter.cpp create mode 100644 lib/MC/MCDisassembler.cpp create mode 100644 lib/MC/MCExpr.cpp create mode 100644 lib/MC/MCInst.cpp create mode 100644 lib/MC/MCInstPrinter.cpp create mode 100644 lib/MC/MCMachOStreamer.cpp create mode 100644 lib/MC/MCNullStreamer.cpp create mode 100644 lib/MC/MCSection.cpp create mode 100644 lib/MC/MCSectionELF.cpp create mode 100644 lib/MC/MCSectionMachO.cpp create mode 100644 lib/MC/MCSymbol.cpp create mode 100644 lib/MC/MCValue.cpp create mode 100644 lib/MC/TargetAsmParser.cpp create mode 100644 lib/Support/COPYRIGHT.regex create mode 100644 lib/Support/ErrorHandling.cpp create mode 100644 lib/Support/FormattedStream.cpp create mode 100644 lib/Support/MemoryObject.cpp create mode 100644 lib/Support/Regex.cpp create mode 100644 lib/Support/StringRef.cpp create mode 100644 lib/Support/TargetRegistry.cpp create mode 100644 lib/Support/Twine.cpp create mode 100644 lib/Support/raw_os_ostream.cpp create mode 100644 lib/Support/regcclass.h create mode 100644 lib/Support/regcname.h create mode 100644 lib/Support/regcomp.c create mode 100644 lib/Support/regengine.inc create mode 100644 lib/Support/regerror.c create mode 100644 lib/Support/regex2.h create mode 100644 lib/Support/regex_impl.h create mode 100644 lib/Support/regexec.c create mode 100644 lib/Support/regfree.c create mode 100644 lib/Support/regstrlcpy.c create mode 100644 lib/Support/regutils.h create mode 100644 lib/Target/ARM/ARMBaseInstrInfo.cpp create mode 100644 lib/Target/ARM/ARMBaseInstrInfo.h create mode 100644 lib/Target/ARM/ARMBaseRegisterInfo.cpp create mode 100644 lib/Target/ARM/ARMBaseRegisterInfo.h create mode 100644 lib/Target/ARM/ARMMCAsmInfo.cpp create mode 100644 lib/Target/ARM/ARMMCAsmInfo.h create mode 100644 lib/Target/ARM/ARMPerfectShuffle.h create mode 100644 lib/Target/ARM/ARMScheduleV7.td create mode 100644 lib/Target/ARM/ARMTargetObjectFile.h create mode 100644 lib/Target/ARM/AsmParser/ARMAsmParser.cpp create mode 100644 lib/Target/ARM/AsmParser/CMakeLists.txt create mode 100644 lib/Target/ARM/AsmParser/Makefile create mode 100644 lib/Target/ARM/NEONPreAllocPass.cpp create mode 100644 lib/Target/ARM/README-Thumb2.txt create mode 100644 lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp create mode 100644 lib/Target/ARM/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/ARM/TargetInfo/Makefile create mode 100644 lib/Target/ARM/Thumb2ITBlockPass.cpp create mode 100644 lib/Target/ARM/Thumb2SizeReduction.cpp create mode 100644 lib/Target/Alpha/AlphaCallingConv.td create mode 100644 lib/Target/Alpha/AlphaMCAsmInfo.cpp create mode 100644 lib/Target/Alpha/AlphaMCAsmInfo.h create mode 100644 lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp create mode 100644 lib/Target/Alpha/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/Alpha/TargetInfo/Makefile create mode 100644 lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp create mode 100644 lib/Target/Blackfin/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/Blackfin/AsmPrinter/Makefile create mode 100644 lib/Target/Blackfin/Blackfin.h create mode 100644 lib/Target/Blackfin/Blackfin.td create mode 100644 lib/Target/Blackfin/BlackfinCallingConv.td create mode 100644 lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp create mode 100644 lib/Target/Blackfin/BlackfinISelLowering.cpp create mode 100644 lib/Target/Blackfin/BlackfinISelLowering.h create mode 100644 lib/Target/Blackfin/BlackfinInstrFormats.td create mode 100644 lib/Target/Blackfin/BlackfinInstrInfo.cpp create mode 100644 lib/Target/Blackfin/BlackfinInstrInfo.h create mode 100644 lib/Target/Blackfin/BlackfinInstrInfo.td create mode 100644 lib/Target/Blackfin/BlackfinMCAsmInfo.cpp create mode 100644 lib/Target/Blackfin/BlackfinMCAsmInfo.h create mode 100644 lib/Target/Blackfin/BlackfinRegisterInfo.cpp create mode 100644 lib/Target/Blackfin/BlackfinRegisterInfo.h create mode 100644 lib/Target/Blackfin/BlackfinRegisterInfo.td create mode 100644 lib/Target/Blackfin/BlackfinSubtarget.cpp create mode 100644 lib/Target/Blackfin/BlackfinSubtarget.h create mode 100644 lib/Target/Blackfin/BlackfinTargetMachine.cpp create mode 100644 lib/Target/Blackfin/BlackfinTargetMachine.h create mode 100644 lib/Target/Blackfin/CMakeLists.txt create mode 100644 lib/Target/Blackfin/Makefile create mode 100644 lib/Target/Blackfin/README.txt create mode 100644 lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp create mode 100644 lib/Target/Blackfin/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/Blackfin/TargetInfo/Makefile create mode 100644 lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp create mode 100644 lib/Target/CBackend/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/CBackend/TargetInfo/Makefile create mode 100644 lib/Target/CellSPU/SPUMCAsmInfo.cpp create mode 100644 lib/Target/CellSPU/SPUMCAsmInfo.h create mode 100644 lib/Target/CellSPU/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp create mode 100644 lib/Target/CellSPU/TargetInfo/Makefile create mode 100644 lib/Target/CppBackend/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp create mode 100644 lib/Target/CppBackend/TargetInfo/Makefile create mode 100644 lib/Target/MSIL/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp create mode 100644 lib/Target/MSIL/TargetInfo/Makefile create mode 100644 lib/Target/MSP430/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp create mode 100644 lib/Target/MSP430/AsmPrinter/Makefile create mode 100644 lib/Target/MSP430/MSP430MCAsmInfo.cpp create mode 100644 lib/Target/MSP430/MSP430MCAsmInfo.h create mode 100644 lib/Target/MSP430/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp create mode 100644 lib/Target/MSP430/TargetInfo/Makefile create mode 100644 lib/Target/Mips/MipsMCAsmInfo.cpp create mode 100644 lib/Target/Mips/MipsMCAsmInfo.h create mode 100644 lib/Target/Mips/MipsTargetObjectFile.cpp create mode 100644 lib/Target/Mips/MipsTargetObjectFile.h create mode 100644 lib/Target/Mips/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/Mips/TargetInfo/Makefile create mode 100644 lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp create mode 100644 lib/Target/PIC16/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/PIC16/AsmPrinter/Makefile create mode 100644 lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp create mode 100644 lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h create mode 100644 lib/Target/PIC16/MCSectionPIC16.h create mode 100644 lib/Target/PIC16/PIC16MCAsmInfo.cpp create mode 100644 lib/Target/PIC16/PIC16MCAsmInfo.h create mode 100644 lib/Target/PIC16/PIC16TargetObjectFile.cpp create mode 100644 lib/Target/PIC16/PIC16TargetObjectFile.h create mode 100644 lib/Target/PIC16/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/PIC16/TargetInfo/Makefile create mode 100644 lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp create mode 100644 lib/Target/PowerPC/PPCMCAsmInfo.cpp create mode 100644 lib/Target/PowerPC/PPCMCAsmInfo.h create mode 100644 lib/Target/PowerPC/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/PowerPC/TargetInfo/Makefile create mode 100644 lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp create mode 100644 lib/Target/Sparc/SparcMCAsmInfo.cpp create mode 100644 lib/Target/Sparc/SparcMCAsmInfo.h create mode 100644 lib/Target/Sparc/SparcMachineFunctionInfo.h create mode 100644 lib/Target/Sparc/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/Sparc/TargetInfo/Makefile create mode 100644 lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp create mode 100644 lib/Target/SystemZ/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/SystemZ/AsmPrinter/Makefile create mode 100644 lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp create mode 100644 lib/Target/SystemZ/CMakeLists.txt create mode 100644 lib/Target/SystemZ/Makefile create mode 100644 lib/Target/SystemZ/SystemZ.h create mode 100644 lib/Target/SystemZ/SystemZ.td create mode 100644 lib/Target/SystemZ/SystemZCallingConv.td create mode 100644 lib/Target/SystemZ/SystemZISelDAGToDAG.cpp create mode 100644 lib/Target/SystemZ/SystemZISelLowering.cpp create mode 100644 lib/Target/SystemZ/SystemZISelLowering.h create mode 100644 lib/Target/SystemZ/SystemZInstrBuilder.h create mode 100644 lib/Target/SystemZ/SystemZInstrFP.td create mode 100644 lib/Target/SystemZ/SystemZInstrFormats.td create mode 100644 lib/Target/SystemZ/SystemZInstrInfo.cpp create mode 100644 lib/Target/SystemZ/SystemZInstrInfo.h create mode 100644 lib/Target/SystemZ/SystemZInstrInfo.td create mode 100644 lib/Target/SystemZ/SystemZMCAsmInfo.cpp create mode 100644 lib/Target/SystemZ/SystemZMCAsmInfo.h create mode 100644 lib/Target/SystemZ/SystemZMachineFunctionInfo.h create mode 100644 lib/Target/SystemZ/SystemZOperands.td create mode 100644 lib/Target/SystemZ/SystemZRegisterInfo.cpp create mode 100644 lib/Target/SystemZ/SystemZRegisterInfo.h create mode 100644 lib/Target/SystemZ/SystemZRegisterInfo.td create mode 100644 lib/Target/SystemZ/SystemZSubtarget.cpp create mode 100644 lib/Target/SystemZ/SystemZSubtarget.h create mode 100644 lib/Target/SystemZ/SystemZTargetMachine.cpp create mode 100644 lib/Target/SystemZ/SystemZTargetMachine.h create mode 100644 lib/Target/SystemZ/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/SystemZ/TargetInfo/Makefile create mode 100644 lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp create mode 100644 lib/Target/TargetLoweringObjectFile.cpp create mode 100644 lib/Target/X86/AsmParser/CMakeLists.txt create mode 100644 lib/Target/X86/AsmParser/Makefile create mode 100644 lib/Target/X86/AsmParser/X86AsmParser.cpp create mode 100644 lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h create mode 100644 lib/Target/X86/AsmPrinter/X86AsmPrinter.h create mode 100644 lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp create mode 100644 lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h create mode 100644 lib/Target/X86/AsmPrinter/X86MCInstLower.cpp create mode 100644 lib/Target/X86/AsmPrinter/X86MCInstLower.h create mode 100644 lib/Target/X86/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/X86/TargetInfo/Makefile create mode 100644 lib/Target/X86/TargetInfo/X86TargetInfo.cpp create mode 100644 lib/Target/X86/X86COFFMachineModuleInfo.cpp create mode 100644 lib/Target/X86/X86COFFMachineModuleInfo.h create mode 100644 lib/Target/X86/X86MCAsmInfo.cpp create mode 100644 lib/Target/X86/X86MCAsmInfo.h create mode 100644 lib/Target/X86/X86TargetObjectFile.cpp create mode 100644 lib/Target/X86/X86TargetObjectFile.h create mode 100644 lib/Target/XCore/AsmPrinter/CMakeLists.txt create mode 100644 lib/Target/XCore/AsmPrinter/Makefile create mode 100644 lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp create mode 100644 lib/Target/XCore/MCSectionXCore.cpp create mode 100644 lib/Target/XCore/MCSectionXCore.h create mode 100644 lib/Target/XCore/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/XCore/TargetInfo/Makefile create mode 100644 lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp create mode 100644 lib/Target/XCore/XCoreMCAsmInfo.cpp create mode 100644 lib/Target/XCore/XCoreMCAsmInfo.h create mode 100644 lib/Target/XCore/XCoreTargetObjectFile.cpp create mode 100644 lib/Target/XCore/XCoreTargetObjectFile.h create mode 100644 lib/Transforms/Instrumentation/MaximumSpanningTree.h create mode 100644 lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp create mode 100644 lib/Transforms/Scalar/CodeGenLICM.cpp create mode 100644 lib/Transforms/Utils/SSAUpdater.cpp create mode 100644 lib/VMCore/ConstantsContext.h create mode 100644 lib/VMCore/LeaksContext.h create mode 100644 lib/VMCore/Metadata.cpp create mode 100644 lib/VMCore/TypesContext.h create mode 100644 runtime/libprofile/OptimalEdgeProfiling.c create mode 100644 test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll create mode 100644 test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll create mode 100644 test/Analysis/BasicAA/phi-aa.ll create mode 100644 test/Analysis/BasicAA/store-promote.ll create mode 100644 test/Analysis/LoopDependenceAnalysis/alias.ll create mode 100644 test/Analysis/LoopDependenceAnalysis/siv-strong.ll create mode 100644 test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll create mode 100644 test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll create mode 100644 test/Analysis/LoopDependenceAnalysis/ziv.ll create mode 100644 test/Analysis/PointerTracking/dg.exp create mode 100644 test/Analysis/PointerTracking/sizes.ll create mode 100644 test/Analysis/Profiling/dg.exp create mode 100644 test/Analysis/Profiling/edge-profiling.ll create mode 100644 test/Analysis/Profiling/profiling-tool-chain.ll create mode 100644 test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll create mode 100644 test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll create mode 100644 test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll create mode 100644 test/Analysis/ScalarEvolution/nsw-offset.ll create mode 100644 test/Analysis/ScalarEvolution/nsw.ll create mode 100644 test/Analysis/ScalarEvolution/scev-aa.ll create mode 100644 test/Analysis/ScalarEvolution/sext-iv-2.ll create mode 100644 test/Analysis/ScalarEvolution/trip-count8.ll create mode 100644 test/Analysis/ScalarEvolution/zext-wrap.ll create mode 100644 test/Archive/extract.ll create mode 100644 test/Assembler/2009-07-24-ZeroArgGEP.ll create mode 100644 test/Assembler/flags.ll create mode 100644 test/Assembler/msasm.ll create mode 100644 test/Assembler/unnamed.ll create mode 100644 test/Bindings/Ocaml/dg.exp create mode 100644 test/Bitcode/metadata-2.ll create mode 100644 test/Bitcode/metadata.ll create mode 100644 test/CMakeLists.txt create mode 100644 test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll create mode 100644 test/CodeGen/ARM/2009-07-18-RewriterBug.ll create mode 100644 test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll create mode 100644 test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll create mode 100644 test/CodeGen/ARM/2009-07-29-VFP3Registers.ll create mode 100644 test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll create mode 100644 test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll create mode 100644 test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll create mode 100644 test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll create mode 100644 test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll create mode 100644 test/CodeGen/ARM/2009-08-21-PostRAKill.ll create mode 100644 test/CodeGen/ARM/2009-08-21-PostRAKill2.ll create mode 100644 test/CodeGen/ARM/2009-08-21-PostRAKill3.ll create mode 100644 test/CodeGen/ARM/2009-08-21-PostRAKill4.ll create mode 100644 test/CodeGen/ARM/2009-08-23-linkerprivate.ll create mode 100644 test/CodeGen/ARM/2009-08-26-ScalarToVector.ll create mode 100644 test/CodeGen/ARM/2009-08-27-ScalarToVector.ll create mode 100644 test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll create mode 100644 test/CodeGen/ARM/2009-08-29-TooLongSplat.ll create mode 100644 test/CodeGen/ARM/2009-08-31-LSDA-Name.ll create mode 100644 test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll create mode 100644 test/CodeGen/ARM/2009-09-01-PostRAProlog.ll create mode 100644 test/CodeGen/ARM/2009-09-09-AllOnes.ll create mode 100644 test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll create mode 100644 test/CodeGen/ARM/2009-09-10-postdec.ll create mode 100644 test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll create mode 100644 test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll create mode 100644 test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll create mode 100644 test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll create mode 100644 test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll create mode 100644 test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll create mode 100644 test/CodeGen/ARM/2009-09-24-spill-align.ll create mode 100644 test/CodeGen/ARM/2009-09-27-CoalescerBug.ll create mode 100644 test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll create mode 100644 test/CodeGen/ARM/bfc.ll create mode 100644 test/CodeGen/ARM/fabss.ll create mode 100644 test/CodeGen/ARM/fadds.ll create mode 100644 test/CodeGen/ARM/fdivs.ll create mode 100644 test/CodeGen/ARM/fmacs.ll create mode 100644 test/CodeGen/ARM/fmscs.ll create mode 100644 test/CodeGen/ARM/fmuls.ll create mode 100644 test/CodeGen/ARM/fnegs.ll create mode 100644 test/CodeGen/ARM/fnmacs.ll create mode 100644 test/CodeGen/ARM/fnmscs.ll create mode 100644 test/CodeGen/ARM/fnmuls.ll create mode 100644 test/CodeGen/ARM/fp_convert.ll create mode 100644 test/CodeGen/ARM/fsubs.ll create mode 100644 test/CodeGen/ARM/hardfloat_neon.ll create mode 100644 test/CodeGen/ARM/mls.ll create mode 100644 test/CodeGen/ARM/mul_const.ll create mode 100644 test/CodeGen/ARM/sbfx.ll create mode 100644 test/CodeGen/ARM/spill-q.ll create mode 100644 test/CodeGen/ARM/t2-imm.ll create mode 100644 test/CodeGen/ARM/vbits.ll create mode 100644 test/CodeGen/ARM/vcombine.ll create mode 100644 test/CodeGen/ARM/vext.ll create mode 100644 test/CodeGen/ARM/vld1.ll create mode 100644 test/CodeGen/ARM/vld2.ll create mode 100644 test/CodeGen/ARM/vld3.ll create mode 100644 test/CodeGen/ARM/vld4.ll create mode 100644 test/CodeGen/ARM/vldlane.ll create mode 100644 test/CodeGen/ARM/vminmax.ll create mode 100644 test/CodeGen/ARM/vpminmax.ll create mode 100644 test/CodeGen/ARM/vqdmul.ll create mode 100644 test/CodeGen/ARM/vrec.ll create mode 100644 test/CodeGen/ARM/vrev.ll create mode 100644 test/CodeGen/ARM/vst1.ll create mode 100644 test/CodeGen/ARM/vst2.ll create mode 100644 test/CodeGen/ARM/vst3.ll create mode 100644 test/CodeGen/ARM/vst4.ll create mode 100644 test/CodeGen/ARM/vstlane.ll create mode 100644 test/CodeGen/ARM/vtbl.ll create mode 100644 test/CodeGen/ARM/vtrn.ll create mode 100644 test/CodeGen/ARM/vuzp.ll create mode 100644 test/CodeGen/ARM/vzip.ll create mode 100644 test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll create mode 100644 test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll create mode 100644 test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll create mode 100644 test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll create mode 100644 test/CodeGen/Blackfin/2009-08-15-MissingDead.ll create mode 100644 test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll create mode 100644 test/CodeGen/Blackfin/add-overflow.ll create mode 100644 test/CodeGen/Blackfin/add.ll create mode 100644 test/CodeGen/Blackfin/addsub-i128.ll create mode 100644 test/CodeGen/Blackfin/basic-i1.ll create mode 100644 test/CodeGen/Blackfin/basic-i16.ll create mode 100644 test/CodeGen/Blackfin/basic-i32.ll create mode 100644 test/CodeGen/Blackfin/basic-i64.ll create mode 100644 test/CodeGen/Blackfin/basic-i8.ll create mode 100644 test/CodeGen/Blackfin/basictest.ll create mode 100644 test/CodeGen/Blackfin/burg.ll create mode 100644 test/CodeGen/Blackfin/cmp-small-imm.ll create mode 100644 test/CodeGen/Blackfin/cmp64.ll create mode 100644 test/CodeGen/Blackfin/ct32.ll create mode 100644 test/CodeGen/Blackfin/ct64.ll create mode 100644 test/CodeGen/Blackfin/ctlz16.ll create mode 100644 test/CodeGen/Blackfin/ctlz64.ll create mode 100644 test/CodeGen/Blackfin/ctpop16.ll create mode 100644 test/CodeGen/Blackfin/cttz16.ll create mode 100644 test/CodeGen/Blackfin/cycles.ll create mode 100644 test/CodeGen/Blackfin/dg.exp create mode 100644 test/CodeGen/Blackfin/double-cast.ll create mode 100644 test/CodeGen/Blackfin/frameindex.ll create mode 100644 test/CodeGen/Blackfin/i17mem.ll create mode 100644 test/CodeGen/Blackfin/i1mem.ll create mode 100644 test/CodeGen/Blackfin/i1ops.ll create mode 100644 test/CodeGen/Blackfin/i216mem.ll create mode 100644 test/CodeGen/Blackfin/i248mem.ll create mode 100644 test/CodeGen/Blackfin/i256mem.ll create mode 100644 test/CodeGen/Blackfin/i256param.ll create mode 100644 test/CodeGen/Blackfin/i56param.ll create mode 100644 test/CodeGen/Blackfin/i8mem.ll create mode 100644 test/CodeGen/Blackfin/inline-asm.ll create mode 100644 test/CodeGen/Blackfin/int-setcc.ll create mode 100644 test/CodeGen/Blackfin/invalid-apint.ll create mode 100644 test/CodeGen/Blackfin/jumptable.ll create mode 100644 test/CodeGen/Blackfin/large-switch.ll create mode 100644 test/CodeGen/Blackfin/load-i16.ll create mode 100644 test/CodeGen/Blackfin/logic-i16.ll create mode 100644 test/CodeGen/Blackfin/many-args.ll create mode 100644 test/CodeGen/Blackfin/mulhu.ll create mode 100644 test/CodeGen/Blackfin/printf.ll create mode 100644 test/CodeGen/Blackfin/printf2.ll create mode 100644 test/CodeGen/Blackfin/promote-logic.ll create mode 100644 test/CodeGen/Blackfin/promote-setcc.ll create mode 100644 test/CodeGen/Blackfin/sdiv.ll create mode 100644 test/CodeGen/Blackfin/simple-select.ll create mode 100644 test/CodeGen/Blackfin/switch.ll create mode 100644 test/CodeGen/Blackfin/switch2.ll create mode 100644 test/CodeGen/Blackfin/sync-intr.ll create mode 100644 test/CodeGen/CellSPU/sext128.ll create mode 100644 test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll create mode 100644 test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll create mode 100644 test/CodeGen/MSP430/2009-10-10-OrImpDef.ll create mode 100644 test/CodeGen/MSP430/Inst16mi.ll create mode 100644 test/CodeGen/MSP430/Inst16mm.ll create mode 100644 test/CodeGen/MSP430/Inst16mr.ll create mode 100644 test/CodeGen/MSP430/Inst16rm.ll create mode 100644 test/CodeGen/MSP430/Inst16rr.ll create mode 100644 test/CodeGen/MSP430/Inst8mi.ll create mode 100644 test/CodeGen/MSP430/Inst8mm.ll create mode 100644 test/CodeGen/MSP430/Inst8mr.ll create mode 100644 test/CodeGen/MSP430/Inst8rm.ll create mode 100644 test/CodeGen/MSP430/Inst8rr.ll create mode 100644 test/CodeGen/MSP430/inline-asm.ll create mode 100644 test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll create mode 100644 test/CodeGen/PIC16/dg.exp create mode 100644 test/CodeGen/PIC16/global-in-user-section.ll create mode 100644 test/CodeGen/PIC16/globals.ll create mode 100644 test/CodeGen/PIC16/sext.ll create mode 100644 test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll create mode 100644 test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll create mode 100644 test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll create mode 100644 test/CodeGen/PowerPC/2009-09-18-carrybit.ll create mode 100644 test/CodeGen/PowerPC/sections.ll create mode 100644 test/CodeGen/PowerPC/tango.net.ftp.FtpClient.ll create mode 100644 test/CodeGen/SPARC/2009-08-28-PIC.ll create mode 100644 test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll create mode 100644 test/CodeGen/SystemZ/00-RetVoid.ll create mode 100644 test/CodeGen/SystemZ/01-RetArg.ll create mode 100644 test/CodeGen/SystemZ/01-RetImm.ll create mode 100644 test/CodeGen/SystemZ/02-MemArith.ll create mode 100644 test/CodeGen/SystemZ/02-RetAdd.ll create mode 100644 test/CodeGen/SystemZ/02-RetAddImm.ll create mode 100644 test/CodeGen/SystemZ/02-RetAnd.ll create mode 100644 test/CodeGen/SystemZ/02-RetAndImm.ll create mode 100644 test/CodeGen/SystemZ/02-RetNeg.ll create mode 100644 test/CodeGen/SystemZ/02-RetOr.ll create mode 100644 test/CodeGen/SystemZ/02-RetOrImm.ll create mode 100644 test/CodeGen/SystemZ/02-RetSub.ll create mode 100644 test/CodeGen/SystemZ/02-RetSubImm.ll create mode 100644 test/CodeGen/SystemZ/02-RetXor.ll create mode 100644 test/CodeGen/SystemZ/02-RetXorImm.ll create mode 100644 test/CodeGen/SystemZ/03-RetAddImmSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetAddSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetAndImmSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetAndSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetArgSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetImmSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetNegImmSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetOrImmSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetOrSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetSubImmSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetSubSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetXorImmSubreg.ll create mode 100644 test/CodeGen/SystemZ/03-RetXorSubreg.ll create mode 100644 test/CodeGen/SystemZ/04-RetShifts.ll create mode 100644 test/CodeGen/SystemZ/05-LoadAddr.ll create mode 100644 test/CodeGen/SystemZ/05-MemImmStores.ll create mode 100644 test/CodeGen/SystemZ/05-MemLoadsStores.ll create mode 100644 test/CodeGen/SystemZ/05-MemLoadsStores16.ll create mode 100644 test/CodeGen/SystemZ/05-MemRegLoads.ll create mode 100644 test/CodeGen/SystemZ/05-MemRegStores.ll create mode 100644 test/CodeGen/SystemZ/06-CallViaStack.ll create mode 100644 test/CodeGen/SystemZ/06-FrameIdxLoad.ll create mode 100644 test/CodeGen/SystemZ/06-LocalFrame.ll create mode 100644 test/CodeGen/SystemZ/06-SimpleCall.ll create mode 100644 test/CodeGen/SystemZ/07-BrCond.ll create mode 100644 test/CodeGen/SystemZ/07-BrCond32.ll create mode 100644 test/CodeGen/SystemZ/07-BrUnCond.ll create mode 100644 test/CodeGen/SystemZ/07-CmpImm.ll create mode 100644 test/CodeGen/SystemZ/07-CmpImm32.ll create mode 100644 test/CodeGen/SystemZ/07-SelectCC.ll create mode 100644 test/CodeGen/SystemZ/08-DivRem.ll create mode 100644 test/CodeGen/SystemZ/08-DivRemMemOp.ll create mode 100644 test/CodeGen/SystemZ/08-SimpleMuls.ll create mode 100644 test/CodeGen/SystemZ/09-DynamicAlloca.ll create mode 100644 test/CodeGen/SystemZ/09-Globals.ll create mode 100644 test/CodeGen/SystemZ/09-Switches.ll create mode 100644 test/CodeGen/SystemZ/10-FuncsPic.ll create mode 100644 test/CodeGen/SystemZ/10-GlobalsPic.ll create mode 100644 test/CodeGen/SystemZ/11-BSwap.ll create mode 100644 test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll create mode 100644 test/CodeGen/SystemZ/2009-06-02-And32Imm.ll create mode 100644 test/CodeGen/SystemZ/2009-06-02-Rotate.ll create mode 100644 test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll create mode 100644 test/CodeGen/SystemZ/2009-07-04-Shl32.ll create mode 100644 test/CodeGen/SystemZ/2009-07-05-Shifts.ll create mode 100644 test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll create mode 100644 test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll create mode 100644 test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll create mode 100644 test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll create mode 100644 test/CodeGen/SystemZ/2009-08-22-FCopySign.ll create mode 100644 test/CodeGen/SystemZ/dg.exp create mode 100644 test/CodeGen/Thumb/2009-07-19-SPDecBug.ll create mode 100644 test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll create mode 100644 test/CodeGen/Thumb/2009-07-27-PEIAssert.ll create mode 100644 test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll create mode 100644 test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll create mode 100644 test/CodeGen/Thumb/2009-08-20-ISelBug.ll create mode 100644 test/CodeGen/Thumb/asmprinter-bug.ll create mode 100644 test/CodeGen/Thumb/long_shift.ll create mode 100644 test/CodeGen/Thumb/mul.ll create mode 100644 test/CodeGen/Thumb/pop.ll create mode 100644 test/CodeGen/Thumb/push.ll create mode 100644 test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll create mode 100644 test/CodeGen/Thumb2/2009-07-21-ISelBug.ll create mode 100644 test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll create mode 100644 test/CodeGen/Thumb2/2009-07-30-PEICrash.ll create mode 100644 test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll create mode 100644 test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll create mode 100644 test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll create mode 100644 test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll create mode 100644 test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll create mode 100644 test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll create mode 100644 test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll create mode 100644 test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll create mode 100644 test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll create mode 100644 test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll create mode 100644 test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll create mode 100644 test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll create mode 100644 test/CodeGen/Thumb2/2009-08-10-ISelBug.ll create mode 100644 test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll create mode 100644 test/CodeGen/Thumb2/frameless.ll create mode 100644 test/CodeGen/Thumb2/frameless2.ll create mode 100644 test/CodeGen/Thumb2/large-stack.ll create mode 100644 test/CodeGen/Thumb2/mul_const.ll create mode 100644 test/CodeGen/Thumb2/pic-load.ll create mode 100644 test/CodeGen/Thumb2/thumb2-bcc.ll create mode 100644 test/CodeGen/Thumb2/thumb2-branch.ll create mode 100644 test/CodeGen/Thumb2/thumb2-call.ll create mode 100644 test/CodeGen/Thumb2/thumb2-ifcvt1.ll create mode 100644 test/CodeGen/Thumb2/thumb2-ifcvt2.ll create mode 100644 test/CodeGen/Thumb2/thumb2-ifcvt3.ll create mode 100644 test/CodeGen/Thumb2/thumb2-jtb.ll create mode 100644 test/CodeGen/Thumb2/thumb2-ldm.ll create mode 100644 test/CodeGen/Thumb2/thumb2-ldrd.ll create mode 100644 test/CodeGen/Thumb2/thumb2-lsr3.ll create mode 100644 test/CodeGen/Thumb2/thumb2-mulhi.ll create mode 100644 test/CodeGen/Thumb2/thumb2-pack.ll create mode 100644 test/CodeGen/Thumb2/thumb2-rev16.ll create mode 100644 test/CodeGen/Thumb2/thumb2-sbc.ll create mode 100644 test/CodeGen/Thumb2/thumb2-select.ll create mode 100644 test/CodeGen/Thumb2/thumb2-select_xform.ll create mode 100644 test/CodeGen/Thumb2/thumb2-smla.ll create mode 100644 test/CodeGen/Thumb2/thumb2-smul.ll create mode 100644 test/CodeGen/Thumb2/thumb2-spill-q.ll create mode 100644 test/CodeGen/Thumb2/thumb2-tbb.ll create mode 100644 test/CodeGen/Thumb2/thumb2-tbh.ll create mode 100644 test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll create mode 100644 test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll create mode 100644 test/CodeGen/X86/2009-07-07-SplitICmp.ll create mode 100644 test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll create mode 100644 test/CodeGen/X86/2009-07-15-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2009-07-16-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2009-07-16-LoadFoldingBug.ll create mode 100644 test/CodeGen/X86/2009-07-17-StackColoringBug.ll create mode 100644 test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll create mode 100644 test/CodeGen/X86/2009-07-20-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2009-07-20-DAGCombineBug.ll create mode 100644 test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll create mode 100644 test/CodeGen/X86/2009-08-06-branchfolder-crash.ll create mode 100644 test/CodeGen/X86/2009-08-06-inlineasm.ll create mode 100644 test/CodeGen/X86/2009-08-08-CastError.ll create mode 100644 test/CodeGen/X86/2009-08-12-badswitch.ll create mode 100644 test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll create mode 100644 test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll create mode 100644 test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll create mode 100644 test/CodeGen/X86/2009-08-23-linkerprivate.ll create mode 100644 test/CodeGen/X86/2009-09-07-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll create mode 100644 test/CodeGen/X86/2009-09-16-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll create mode 100644 test/CodeGen/X86/2009-09-19-earlyclobber.ll create mode 100644 test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll create mode 100644 test/CodeGen/X86/2009-09-22-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll create mode 100644 test/CodeGen/X86/2009-10-08-MachineLICMBug.ll create mode 100644 test/CodeGen/X86/anyext.ll create mode 100644 test/CodeGen/X86/asm-modifier-P.ll create mode 100644 test/CodeGen/X86/asm-modifier.ll create mode 100644 test/CodeGen/X86/atomic_add.ll create mode 100644 test/CodeGen/X86/attribute-sections.ll create mode 100644 test/CodeGen/X86/avoid-lea-scale2.ll create mode 100644 test/CodeGen/X86/bss_pagealigned.ll create mode 100644 test/CodeGen/X86/cmov.ll create mode 100644 test/CodeGen/X86/coalesce-esp.ll create mode 100644 test/CodeGen/X86/coalescer-cross.ll create mode 100644 test/CodeGen/X86/codemodel.ll create mode 100644 test/CodeGen/X86/combiner-aa-0.ll create mode 100644 test/CodeGen/X86/combiner-aa-1.ll create mode 100644 test/CodeGen/X86/compare-inf.ll create mode 100644 test/CodeGen/X86/compiler_used.ll create mode 100644 test/CodeGen/X86/darwin-quote.ll create mode 100644 test/CodeGen/X86/dll-linkage.ll create mode 100644 test/CodeGen/X86/empty-struct-return-type.ll create mode 100644 test/CodeGen/X86/extract-extract.ll create mode 100644 test/CodeGen/X86/fast-isel-bc.ll create mode 100644 test/CodeGen/X86/fast-isel-fneg.ll create mode 100644 test/CodeGen/X86/fast-isel-gep.ll create mode 100644 test/CodeGen/X86/fp-stack-O0-crash.ll create mode 100644 test/CodeGen/X86/global-sections-tls.ll create mode 100644 test/CodeGen/X86/global-sections.ll create mode 100644 test/CodeGen/X86/inline-asm-R-constraint.ll create mode 100644 test/CodeGen/X86/inline-asm-q-regs.ll create mode 100644 test/CodeGen/X86/peep-test-3.ll create mode 100644 test/CodeGen/X86/personality.ll create mode 100644 test/CodeGen/X86/pic.ll create mode 100644 test/CodeGen/X86/pre-split11.ll create mode 100644 test/CodeGen/X86/ptrtoint-constexpr.ll create mode 100644 test/CodeGen/X86/remat-scalar-zero.ll create mode 100644 test/CodeGen/X86/shift-parts.ll create mode 100644 test/CodeGen/X86/sink-hoist.ll create mode 100644 test/CodeGen/X86/sse-minmax.ll create mode 100644 test/CodeGen/X86/sse2.ll create mode 100644 test/CodeGen/X86/sse3.ll create mode 100644 test/CodeGen/X86/sse41.ll create mode 100644 test/CodeGen/X86/sse42.ll create mode 100644 test/CodeGen/X86/stdarg.ll create mode 100644 test/CodeGen/X86/store-empty-member.ll create mode 100644 test/CodeGen/X86/test-shrink-bug.ll create mode 100644 test/CodeGen/X86/test-shrink.ll create mode 100644 test/CodeGen/X86/tls-pic.ll create mode 100644 test/CodeGen/X86/vec_compare.ll create mode 100644 test/CodeGen/X86/vshift-5.ll create mode 100644 test/CodeGen/X86/wide-integer-fold.ll create mode 100644 test/CodeGen/X86/widen_load-0.ll create mode 100644 test/CodeGen/X86/widen_load-1.ll create mode 100644 test/CodeGen/X86/xor.ll create mode 100644 test/CodeGen/XCore/2009-07-15-store192.ll create mode 100644 test/CodeGen/XCore/ashr.ll create mode 100644 test/CodeGen/XCore/constants.ll create mode 100644 test/CodeGen/XCore/globals.ll create mode 100644 test/CodeGen/XCore/load.ll create mode 100644 test/CodeGen/XCore/sext.ll create mode 100644 test/CodeGen/XCore/store.ll create mode 100644 test/CodeGen/XCore/tls.ll create mode 100644 test/CodeGen/XCore/unaligned_load.ll create mode 100644 test/CodeGen/XCore/unaligned_store.ll create mode 100644 test/CodeGen/XCore/unaligned_store_combine.ll create mode 100644 test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll create mode 100644 test/ExecutionEngine/stubs.ll create mode 100644 test/Feature/NamedMDNode.ll create mode 100644 test/Feature/NamedMDNode2.ll create mode 100644 test/Feature/md_on_instruction.ll create mode 100644 test/Feature/md_on_instruction2.ll create mode 100644 test/Feature/memorymarkers.ll create mode 100644 test/FrontendC++/2009-07-15-LineNumbers.cpp create mode 100644 test/FrontendC++/2009-07-16-PrivateCopyConstructor.cpp create mode 100644 test/FrontendC++/2009-07-16-Using.cpp create mode 100644 test/FrontendC++/2009-08-03-Varargs.cpp create mode 100644 test/FrontendC++/2009-08-05-ZeroInitWidth.cpp create mode 100644 test/FrontendC++/2009-08-11-VectorRetTy.cpp create mode 100644 test/FrontendC++/2009-09-04-modify-crash.cpp create mode 100644 test/FrontendC++/2009-09-09-packed-layout.cpp create mode 100644 test/FrontendC++/member-alignment.cpp create mode 100644 test/FrontendC++/msasm.cpp create mode 100644 test/FrontendC/2009-07-14-VoidPtr.c create mode 100644 test/FrontendC/2009-07-15-pad-wchar_t-array.c create mode 100644 test/FrontendC/2009-07-17-VoidParameter.c create mode 100644 test/FrontendC/2009-07-22-StructLayout.c create mode 100644 test/FrontendC/2009-08-11-AsmBlocksComplexJumpTarget.c create mode 100644 test/FrontendC/2009-09-24-SqrtErrno.c create mode 100644 test/FrontendC/func-aligned.c create mode 100644 test/FrontendC/msasm.c create mode 100644 test/FrontendC/ptr-rotate.c create mode 100644 test/FrontendC/redef-ext-inline.c create mode 100644 test/FrontendC/wchar-const.c create mode 100644 test/FrontendObjC/2009-08-05-utf16.m create mode 100644 test/FrontendObjC/2009-08-17-DebugInfo.m create mode 100644 test/LLVMC/C++/dash-x.cpp create mode 100644 test/LLVMC/C++/dg.exp create mode 100644 test/LLVMC/C++/hello.cpp create mode 100644 test/LLVMC/C++/together.cpp create mode 100644 test/LLVMC/C/dg.exp create mode 100644 test/LLVMC/C/emit-llvm.c create mode 100644 test/LLVMC/C/hello.c create mode 100644 test/LLVMC/C/include.c create mode 100644 test/LLVMC/C/opt-test.c create mode 100644 test/LLVMC/C/sink.c create mode 100644 test/LLVMC/C/wall.c create mode 100644 test/LLVMC/ObjC++/dg.exp create mode 100644 test/LLVMC/ObjC++/hello.mm create mode 100644 test/LLVMC/ObjC/dg.exp create mode 100644 test/LLVMC/ObjC/hello.m create mode 100644 test/LLVMC/dg.exp create mode 100644 test/LLVMC/test_data/false.c create mode 100644 test/Linker/2009-09-03-mdnode.ll create mode 100644 test/Linker/2009-09-03-mdnode2.ll create mode 100644 test/Linker/linkmdnode.ll create mode 100644 test/Linker/linkmdnode2.ll create mode 100644 test/Linker/linknamedmdnode.ll create mode 100644 test/Linker/linknamedmdnode2.ll create mode 100644 test/Linker/partial-type-refinement-link.ll create mode 100644 test/Linker/partial-type-refinement.ll create mode 100644 test/MC/AsmParser/ARM/arm_word_directive.s create mode 100644 test/MC/AsmParser/ARM/dg.exp create mode 100644 test/MC/AsmParser/X86/dg.exp create mode 100644 test/MC/AsmParser/X86/x86_instructions.s create mode 100644 test/MC/AsmParser/X86/x86_operands.s create mode 100644 test/MC/AsmParser/X86/x86_word_directive.s create mode 100644 test/MC/AsmParser/conditional_asm.s create mode 100644 test/MC/AsmParser/directive_abort.s create mode 100644 test/MC/AsmParser/directive_comm.s create mode 100644 test/MC/AsmParser/directive_darwin_section.s create mode 100644 test/MC/AsmParser/directive_desc.s create mode 100644 test/MC/AsmParser/directive_file.s create mode 100644 test/MC/AsmParser/directive_include.s create mode 100644 test/MC/AsmParser/directive_lcomm.s create mode 100644 test/MC/AsmParser/directive_line.s create mode 100644 test/MC/AsmParser/directive_loc.s create mode 100644 test/MC/AsmParser/directive_lsym.s create mode 100644 test/MC/AsmParser/directive_subsections_via_symbols.s create mode 100644 test/MC/AsmParser/directive_zerofill.s create mode 100644 test/MC/AsmParser/exprs-invalid.s create mode 100644 test/MC/AsmParser/hello.s create mode 100644 test/MC/AsmParser/labels.s create mode 100644 test/MC/MachO/comm-1.s create mode 100644 test/MC/MachO/data.s create mode 100644 test/MC/MachO/dg.exp create mode 100644 test/MC/MachO/lcomm-attributes.s create mode 100644 test/MC/MachO/reloc.s create mode 100644 test/MC/MachO/section-align-1.s create mode 100644 test/MC/MachO/section-align-2.s create mode 100644 test/MC/MachO/sections.s create mode 100644 test/MC/MachO/symbol-flags.s create mode 100644 test/MC/MachO/symbol-indirect.s create mode 100644 test/MC/MachO/symbols-1.s create mode 100644 test/MC/MachO/values.s create mode 100644 test/MC/MachO/zerofill-1.s create mode 100644 test/MC/MachO/zerofill-2.s create mode 100644 test/MC/MachO/zerofill-3.s create mode 100644 test/Other/2009-09-14-function-elements.ll create mode 100755 test/Scripts/macho-dump create mode 100644 test/Transforms/ArgumentPromotion/callgraph-update.ll create mode 100644 test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll create mode 100644 test/Transforms/ConstProp/2009-09-19-ConstFold-i1-ConstExpr.ll create mode 100644 test/Transforms/ConstProp/overflow-ops.ll create mode 100644 test/Transforms/DeadStoreElimination/crash.ll create mode 100644 test/Transforms/GVN/2009-07-13-MemDepSortFail.ll create mode 100644 test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll create mode 100644 test/Transforms/GVN/rle.ll create mode 100644 test/Transforms/GlobalDCE/2009-09-03-MDNode.ll create mode 100644 test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll create mode 100644 test/Transforms/GlobalOpt/globalsra-unknown-index.ll create mode 100644 test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll create mode 100644 test/Transforms/IndVarSimplify/iv-sext.ll create mode 100644 test/Transforms/IndVarSimplify/lftr-promote.ll create mode 100644 test/Transforms/IndVarSimplify/loop_evaluate10.ll create mode 100644 test/Transforms/IndVarSimplify/loop_evaluate11.ll create mode 100644 test/Transforms/IndVarSimplify/max-pointer.ll create mode 100644 test/Transforms/IndVarSimplify/polynomial-expand.ll create mode 100644 test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll create mode 100644 test/Transforms/IndVarSimplify/preserve-gep-nested.ll create mode 100644 test/Transforms/IndVarSimplify/single-element-range.ll create mode 100644 test/Transforms/IndVarSimplify/sink-alloca.ll create mode 100644 test/Transforms/IndVarSimplify/sink-trapping.ll create mode 100644 test/Transforms/Inline/PR4909.ll create mode 100644 test/Transforms/Inline/alloca-in-scc.ll create mode 100644 test/Transforms/Inline/array_merge.ll create mode 100644 test/Transforms/Inline/callgraph-update.ll create mode 100644 test/Transforms/Inline/crash.ll create mode 100644 test/Transforms/Inline/indirect_resolve.ll create mode 100644 test/Transforms/Inline/nested-inline.ll create mode 100644 test/Transforms/InstCombine/add3.ll create mode 100644 test/Transforms/InstCombine/align-external.ll create mode 100644 test/Transforms/InstCombine/badmalloc.ll create mode 100644 test/Transforms/InstCombine/bitcast-vec-canon.ll create mode 100644 test/Transforms/InstCombine/cast3.ll create mode 100644 test/Transforms/InstCombine/constant-fold-gep.ll create mode 100644 test/Transforms/InstCombine/crash.ll create mode 100644 test/Transforms/InstCombine/exact-sdiv.ll create mode 100644 test/Transforms/InstCombine/fold-bin-operand.ll create mode 100644 test/Transforms/InstCombine/no-negzero.ll create mode 100644 test/Transforms/InstCombine/nsw.ll create mode 100644 test/Transforms/InstCombine/phi-merge-gep.ll create mode 100644 test/Transforms/InstCombine/sdiv-shift.ll create mode 100644 test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll create mode 100644 test/Transforms/InstCombine/udivrem-change-width.ll create mode 100644 test/Transforms/InstCombine/vector-casts.ll create mode 100644 test/Transforms/InstCombine/zext-bool-add-sub.ll create mode 100644 test/Transforms/JumpThreading/crash.ll create mode 100644 test/Transforms/LICM/hoisting.ll create mode 100644 test/Transforms/LICM/licm_preserve_dbginfo.ll create mode 100644 test/Transforms/LICM/sinking.ll create mode 100644 test/Transforms/LoopIndexSplit/PR3913.ll create mode 100644 test/Transforms/LoopRotate/preserve-scev.ll create mode 100644 test/Transforms/LoopUnswitch/preserve-analyses.ll create mode 100644 test/Transforms/Mem2Reg/crash.ll create mode 100644 test/Transforms/MemCpyOpt/align.ll create mode 100644 test/Transforms/MemCpyOpt/crash.ll create mode 100644 test/Transforms/MemCpyOpt/memmove.ll create mode 100644 test/Transforms/SSI/2009-07-09-Invoke.ll create mode 100644 test/Transforms/SSI/2009-08-15-UnreachableBB.ll create mode 100644 test/Transforms/SSI/2009-08-17-CritEdge.ll create mode 100644 test/Transforms/SSI/2009-08-19-UnreachableBB2.ll create mode 100644 test/Transforms/SSI/dg.exp create mode 100644 test/Transforms/SSI/ssiphi.ll create mode 100644 test/Transforms/ScalarRepl/2009-08-16-VLA.ll create mode 100644 test/Transforms/SimplifyCFG/invoke_unwind.ll create mode 100644 test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll create mode 100644 test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll create mode 100644 test/Transforms/SimplifyLibCalls/memset-64.ll create mode 100644 test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll create mode 100644 test/Transforms/SimplifyLibCalls/weak-symbols.ll create mode 100644 test/Transforms/TailDup/2009-07-31-phicrash.ll create mode 100644 test/Unit/lit.cfg create mode 100644 test/lit.cfg create mode 100644 test/lit.site.cfg.in create mode 100644 test/site.exp.in create mode 100644 tools/llvm-mc/AsmCond.h create mode 100644 unittests/ADT/APFloatTest.cpp create mode 100644 unittests/ADT/SmallStringTest.cpp create mode 100644 unittests/ADT/SparseBitVectorTest.cpp create mode 100644 unittests/ADT/StringRefTest.cpp create mode 100644 unittests/ADT/TwineTest.cpp create mode 100644 unittests/ExecutionEngine/ExecutionEngineTest.cpp create mode 100644 unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp create mode 100644 unittests/ExecutionEngine/JIT/JITTest.cpp create mode 100644 unittests/Support/AllocatorTest.cpp create mode 100644 unittests/Support/CommandLineTest.cpp create mode 100644 unittests/Support/ConstantRangeTest.cpp create mode 100644 unittests/Support/RegexTest.cpp create mode 100644 unittests/Transforms/Makefile create mode 100644 unittests/Transforms/Utils/Cloning.cpp create mode 100644 unittests/Transforms/Utils/Makefile create mode 100644 utils/FileCheck/CMakeLists.txt create mode 100644 utils/FileCheck/FileCheck.cpp create mode 100644 utils/FileCheck/Makefile create mode 100644 utils/FileUpdate/CMakeLists.txt create mode 100644 utils/FileUpdate/FileUpdate.cpp create mode 100644 utils/FileUpdate/Makefile create mode 100644 utils/TableGen/AsmMatcherEmitter.cpp create mode 100644 utils/TableGen/AsmMatcherEmitter.h create mode 100644 utils/TableGen/StringToOffsetTable.h create mode 100755 utils/UpdateCMakeLists.pl create mode 100644 utils/bugpoint/RemoteRunSafely.sh create mode 100644 utils/count/CMakeLists.txt create mode 100644 utils/count/Makefile create mode 100644 utils/count/count.c create mode 100644 utils/lit/LitConfig.py create mode 100644 utils/lit/LitFormats.py create mode 100644 utils/lit/ProgressBar.py create mode 100644 utils/lit/ShCommands.py create mode 100644 utils/lit/ShUtil.py create mode 100644 utils/lit/TODO create mode 100644 utils/lit/TclUtil.py create mode 100644 utils/lit/Test.py create mode 100644 utils/lit/TestFormats.py create mode 100644 utils/lit/TestRunner.py create mode 100644 utils/lit/TestingConfig.py create mode 100644 utils/lit/Util.py create mode 100755 utils/lit/lit.py create mode 100644 utils/not/CMakeLists.txt create mode 100644 utils/not/Makefile create mode 100644 utils/not/not.cpp create mode 100644 utils/unittest/UnitTestMain/Makefile create mode 100644 utils/unittest/UnitTestMain/TestMain.cpp create mode 100644 utils/valgrind/x86_64-pc-linux-gnu_gcc-4.3.3.supp diff --git a/CMakeLists.txt b/CMakeLists.txt index 56f9355d8eb4c..f7126584ba78a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(LLVM) cmake_minimum_required(VERSION 2.6.1) set(PACKAGE_NAME llvm) -set(PACKAGE_VERSION 2.6svn) +set(PACKAGE_VERSION 2.7svn) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvmbugs@cs.uiuc.edu") @@ -19,8 +19,6 @@ endif() string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) -include(FindPerl) - set(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include) set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) @@ -28,26 +26,42 @@ set(LLVM_TOOLS_BINARY_DIR ${LLVM_BINARY_DIR}/bin) set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples) set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) +if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR ) + file(GLOB_RECURSE + tablegenned_files_on_include_dir + "${LLVM_MAIN_SRC_DIR}/include/llvm/*.gen") + file(GLOB_RECURSE + tablegenned_files_on_lib_dir + "${LLVM_MAIN_SRC_DIR}/lib/Target/*.inc") + if( tablegenned_files_on_include_dir OR tablegenned_files_on_lib_dir) + message(FATAL_ERROR "Apparently there is a previous in-source build, +probably as the result of running `configure' and `make' on +${LLVM_MAIN_SRC_DIR}. +This may cause problems. The suspicious files are: +${tablegenned_files_on_lib_dir} +${tablegenned_files_on_include_dir} +Please clean the source directory.") + endif() +endif() + set(LLVM_ALL_TARGETS Alpha ARM + Blackfin CBackend CellSPU CppBackend - IA64 Mips MSIL + MSP430 PIC16 PowerPC Sparc + SystemZ X86 XCore ) -# List of targets whose asmprinters need to be forced to link -# into executables on some platforms (i.e. Windows): -set(LLVM_ASMPRINTERS_FORCE_LINK X86 PowerPC) - if( MSVC ) set(LLVM_TARGETS_TO_BUILD X86 CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") @@ -56,6 +70,9 @@ else( MSVC ) CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") endif( MSVC ) +set(LLVM_TARGET_ARCH "host" + CACHE STRING "Set target to use for LLVM JIT or use \"host\" for automatic detection.") + option(LLVM_ENABLE_THREADS "Use threads if available." ON) if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" ) @@ -65,7 +82,10 @@ else() endif() if( LLVM_ENABLE_ASSERTIONS ) - add_definitions( -D_DEBUG ) + # MSVC doesn't like _DEBUG on release builds. See PR 4379. + if( NOT MSVC ) + add_definitions( -D_DEBUG ) + endif() # On Release builds cmake automatically defines NDEBUG, so we # explicitly undefine it: if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" ) @@ -85,7 +105,7 @@ set(LLVM_ENUM_TARGETS "") foreach(c ${LLVM_TARGETS_TO_BUILD}) list(FIND LLVM_ALL_TARGETS ${c} idx) if( idx LESS 0 ) - message(FATAL_ERROR "The target `${c}' does not exists. + message(FATAL_ERROR "The target `${c}' does not exist. It should be one of\n${LLVM_ALL_TARGETS}") else() set(LLVM_ENUM_TARGETS "${LLVM_ENUM_TARGETS}LLVM_TARGET(${c})\n") @@ -125,7 +145,11 @@ else(WIN32) if(UNIX) set(LLVM_ON_WIN32 0) set(LLVM_ON_UNIX 1) - set(LTDL_SHLIB_EXT ".so") + if(APPLE) + set(LTDL_SHLIB_EXT ".dylib") + else(APPLE) + set(LTDL_SHLIB_EXT ".so") + endif(APPLE) set(EXEEXT "") # FIXME: Maximum path length is currently set to 'safe' fixed value set(MAXPATHLEN 2024) @@ -134,13 +158,9 @@ else(WIN32) endif(UNIX) endif(WIN32) -if( EXISTS ${LLVM_TOOLS_BINARY_DIR}/llvm-config ) - set(HAVE_LLVM_CONFIG 1) -endif( EXISTS ${LLVM_TOOLS_BINARY_DIR}/llvm-config ) - include(config-ix) -option(LLVM_ENABLE_PIC "Build Position-Independent Code" OFF) +option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON) set(ENABLE_PIC 0) if( LLVM_ENABLE_PIC ) @@ -170,7 +190,6 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) add_llvm_definitions( -m32 ) list(APPEND CMAKE_EXE_LINKER_FLAGS -m32) list(APPEND CMAKE_SHARED_LINKER_FLAGS -m32) - set( LLVM_PLO_FLAGS -melf_i386 ${LLVM_PLO_FLAGS} ) endif( LLVM_BUILD_32_BITS ) endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) @@ -187,6 +206,9 @@ if( MSVC ) add_llvm_definitions( -wd4146 -wd4503 -wd4996 -wd4800 -wd4244 -wd4624 ) add_llvm_definitions( -wd4355 -wd4715 -wd4180 -wd4345 -wd4224 ) + # Suppress 'new behavior: elements of array 'array' will be default initialized' + add_llvm_definitions( -wd4351 ) + if (NOT ${LLVM_USE_CRT} STREQUAL "") list(FIND MSVC_CRT ${LLVM_USE_CRT} idx) if (idx LESS 0) @@ -199,6 +221,10 @@ endif( MSVC ) include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR}) +if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) + SET(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-include llvm/System/Solaris.h") +endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) + include(AddLLVM) include(TableGen) @@ -237,16 +263,29 @@ add_subdirectory(lib/Linker) add_subdirectory(lib/Analysis) add_subdirectory(lib/Analysis/IPA) add_subdirectory(lib/MC) +add_subdirectory(test) - set(LLVM_ENUM_ASM_PRINTERS "") - foreach(t ${LLVM_TARGETS_TO_BUILD}) +add_subdirectory(utils/FileCheck) +add_subdirectory(utils/count) +add_subdirectory(utils/not) + +set(LLVM_ENUM_ASM_PRINTERS "") +set(LLVM_ENUM_ASM_PARSERS "") +foreach(t ${LLVM_TARGETS_TO_BUILD}) message(STATUS "Targeting ${t}") add_subdirectory(lib/Target/${t}) + add_subdirectory(lib/Target/${t}/TargetInfo) if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt ) - add_subdirectory(lib/Target/${t}/AsmPrinter) + add_subdirectory(lib/Target/${t}/AsmPrinter) set(LLVM_ENUM_ASM_PRINTERS - "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n") - endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt ) + "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n") + endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmPrinter/CMakeLists.txt ) + if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmParser/CMakeLists.txt ) + add_subdirectory(lib/Target/${t}/AsmParser) + set(LLVM_ENUM_ASM_PARSERS + "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n") + endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmParser/CMakeLists.txt ) + set(CURRENT_LLVM_TARGET) endforeach(t) # Produce llvm/Config/AsmPrinters.def @@ -255,19 +294,28 @@ configure_file( ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def ) +# Produce llvm/Config/AsmParsers.def +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in + ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def + ) + add_subdirectory(lib/ExecutionEngine) add_subdirectory(lib/ExecutionEngine/Interpreter) add_subdirectory(lib/ExecutionEngine/JIT) add_subdirectory(lib/Target) add_subdirectory(lib/AsmParser) -add_subdirectory(lib/Debugger) add_subdirectory(lib/Archive) add_subdirectory(projects) -add_subdirectory(tools) -option(LLVM_EXAMPLES "Build LLVM example programs." OFF) -if (LLVM_EXAMPLES) +option(LLVM_BUILD_TOOLS "Build LLVM tool programs." ON) +if(LLVM_BUILD_TOOLS) + add_subdirectory(tools) +endif() + +option(LLVM_BUILD_EXAMPLES "Build LLVM example programs." ON) +if(LLVM_BUILD_EXAMPLES) add_subdirectory(examples) endif () @@ -276,6 +324,7 @@ install(DIRECTORY include PATTERN ".svn" EXCLUDE PATTERN "*.cmake" EXCLUDE PATTERN "*.in" EXCLUDE + PATTERN "*.tmp" EXCLUDE ) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include diff --git a/CREDITS.TXT b/CREDITS.TXT index e1bad67c0cd79..f6467abfc0387 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -90,6 +90,10 @@ N: Alkis Evlogimenos E: alkis@evlogimenos.com D: Linear scan register allocator, many codegen improvements, Java frontend +N: Ryan Flynn +E: pizza@parseerror.com +D: Miscellaneous bug fixes + N: Brian Gaeke E: gaeke@uiuc.edu W: http://www.students.uiuc.edu/~gaeke/ @@ -117,6 +121,10 @@ N: Dan Gohman E: gohman@apple.com D: Miscellaneous bug fixes +N: David Goodwin +E: david@goodwinz.net +D: Thumb-2 code generator + N: David Greene E: greened@obbligato.org D: Miscellaneous bug fixes @@ -160,6 +168,10 @@ N: Brad Jones E: kungfoomaster@nondot.org D: Support for packed types +N: Rod Kay +E: rkay@auroraux.org +D: Author of LLVM Ada bindings + N: Eric Kidd W: http://randomhacks.net/ D: llvm-config script @@ -174,6 +186,10 @@ N: Sumant Kowshik E: kowshik@uiuc.edu D: Author of the original C backend +N: Benjamin Kramer +E: benny.kra@gmail.com +D: Miscellaneous bug fixes + N: Christopher Lamb E: christopher.lamb@gmail.com D: aligned load/store support, parts of noalias and restrict support @@ -242,6 +258,11 @@ N: Morten Ofstad E: morten@hue.no D: Visual C++ compatibility fixes +N: Jakob Stoklund Olesen +E: stoklund@2pi.dk +D: Machine code verifier +D: Blackfin backend + N: Richard Osborne E: richard@xmos.com D: XCore backend @@ -252,6 +273,10 @@ D: LTO tool, PassManager rewrite, Loop Pass Manager, Loop Rotate D: GCC PCH Integration (llvm-gcc), llvm-gcc improvements D: Optimizer improvements, Loop Index Split +N: Sandeep Patel +E: deeppatel1987@gmail.com +D: ARM calling conventions rewrite, hard float support + N: Vladimir Prus W: http://vladimir_prus.blogspot.com E: ghost@cs.msu.su @@ -299,6 +324,10 @@ E: lauro.venancio@indt.org.br D: ARM backend improvements D: Thread Local Storage implementation +N: Xerxes Ranby +E: xerxes@zafena.se +D: Cmake dependency chain and various bug fixes + N: Bill Wendling E: isanbard@gmail.com D: Bunches of stuff diff --git a/LICENSE.TXT b/LICENSE.TXT index 0dca8ce7bd8e7..fd49172664b5a 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -61,9 +61,9 @@ licenses, and/or restrictions: Program Directory ------- --------- -System Library llvm/lib/System Autoconf llvm/autoconf llvm/projects/ModuleMaker/autoconf llvm/projects/sample/autoconf CellSPU backend llvm/lib/Target/CellSPU/README.txt Google Test llvm/utils/unittest/googletest +OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} diff --git a/Makefile b/Makefile index e750889ae45c6..f3bf3f2345eb8 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ ifeq ($(BUILD_DIRS_ONLY),1) OPTIONAL_DIRS := else DIRS := lib/System lib/Support utils lib/VMCore lib tools/llvm-config \ - tools runtime docs + tools runtime docs unittests OPTIONAL_DIRS := examples projects bindings endif @@ -36,7 +36,7 @@ include $(LEVEL)/Makefile.config # FIXME: Remove runtime entirely once we have an understanding of where # libprofile etc should go. #ifeq ($(LLVMGCC_MAJVERS),4) - DIRS := $(filter-out runtime, $(DIRS)) +# DIRS := $(filter-out runtime, $(DIRS)) #endif ifeq ($(MAKECMDGOALS),libs-only) @@ -62,7 +62,7 @@ ifeq ($(MAKECMDGOALS),install-clang) endif ifeq ($(MAKECMDGOALS),clang-only) - DIRS := $(filter-out tools runtime docs, $(DIRS)) tools/clang + DIRS := $(filter-out tools runtime docs unittests, $(DIRS)) tools/clang OPTIONAL_DIRS := endif @@ -88,10 +88,19 @@ cross-compile-build-tools: $(Verb) if [ ! -f BuildTools/Makefile ]; then \ $(MKDIR) BuildTools; \ cd BuildTools ; \ - $(PROJ_SRC_DIR)/configure ; \ + $(PROJ_SRC_DIR)/configure --build=$(BUILD_TRIPLE) \ + --host=$(BUILD_TRIPLE) --target=$(BUILD_TRIPLE); \ cd .. ; \ fi; \ - ($(MAKE) -C BuildTools BUILD_DIRS_ONLY=1 ) || exit 1; + ($(MAKE) -C BuildTools \ + BUILD_DIRS_ONLY=1 \ + UNIVERSAL= \ + ENABLE_OPTIMIZED=$(ENABLE_OPTIMIZED) \ + ENABLE_PROFILING=$(ENABLE_PROFILING) \ + ENABLE_COVERAGE=$(ENABLE_COVERAGE) \ + DISABLE_ASSERTIONS=$(DISABLE_ASSERTIONS) \ + ENABLE_EXPENSIVE_CHECKS=$(ENABLE_EXPENSIVE_CHECKS) \ + ) || exit 1; endif # Include the main makefile machinery. @@ -117,7 +126,6 @@ debug-opt-prof: dist-hook:: $(Echo) Eliminating files constructed by configure $(Verb) $(RM) -f \ - $(TopDistDir)/include/llvm/ADT/iterator.h \ $(TopDistDir)/include/llvm/Config/config.h \ $(TopDistDir)/include/llvm/Support/DataTypes.h \ $(TopDistDir)/include/llvm/Support/ThreadSupport.h @@ -137,7 +145,7 @@ FilesToConfig := \ include/llvm/Config/Targets.def \ include/llvm/Config/AsmPrinters.def \ include/llvm/Support/DataTypes.h \ - include/llvm/ADT/iterator.h + tools/llvmc/plugins/Base/Base.td FilesToConfigPATH := $(addprefix $(LLVM_OBJ_ROOT)/,$(FilesToConfig)) all-local:: $(FilesToConfigPATH) diff --git a/Makefile.config.in b/Makefile.config.in index e2d2c57b4d5fd..fc84c0bcb1b43 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -89,8 +89,11 @@ PROJ_mandir := $(DESTDIR)$(PROJ_prefix)/share/man LLVM_ON_UNIX:=@LLVM_ON_UNIX@ LLVM_ON_WIN32:=@LLVM_ON_WIN32@ -# Target operating system for which LLVM will be compiled. +# Host operating system for which LLVM will be run. OS=@OS@ +HOST_OS=@HOST_OS@ +# Target operating system for which LLVM will compile for. +TARGET_OS=@TARGET_OS@ # Target hardware architecture ARCH=@ARCH@ @@ -107,6 +110,9 @@ BUILD_EXEEXT=@BUILD_EXEEXT@ BUILD_CC=@BUILD_CC@ BUILD_CXX=@BUILD_CXX@ +# Triple for configuring build tools when cross-compiling +BUILD_TRIPLE=@build@ + # Target triple (cpu-vendor-os) for which we should generate code TARGET_TRIPLE=@target@ @@ -128,6 +134,7 @@ LDFLAGS+=@LDFLAGS@ # Path to the library archiver program. AR_PATH = @AR@ +AR = @AR@ # Path to the nm program NM_PATH = @NM@ @@ -238,6 +245,11 @@ RDYNAMIC := @RDYNAMIC@ #DEBUG_RUNTIME = 1 @DEBUG_RUNTIME@ +# When DEBUG_SYMBOLS is enabled, the compiler libraries will retain debug +# symbols. +#DEBUG_SYMBOLS = 1 +@DEBUG_SYMBOLS@ + # When ENABLE_PROFILING is enabled, the llvm source base is built with profile # information to allow gprof to be used to get execution frequencies. #ENABLE_PROFILING = 1 diff --git a/Makefile.rules b/Makefile.rules index 3ae2db8916de8..e3f388d543488 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -19,10 +19,11 @@ #-------------------------------------------------------------------- # Define the various target sets #-------------------------------------------------------------------- -RecursiveTargets := all clean clean-all install uninstall install-bytecode +RecursiveTargets := all clean clean-all install uninstall install-bytecode \ + unitcheck LocalTargets := all-local clean-local clean-all-local check-local \ install-local printvars uninstall-local \ - install-bytecode-local unittests + install-bytecode-local TopLevelTargets := check dist dist-check dist-clean dist-gzip dist-bzip2 \ dist-zip unittests UserTargets := $(RecursiveTargets) $(LocalTargets) $(TopLevelTargets) @@ -128,8 +129,11 @@ reconfigure: $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \ $(ConfigStatusScript) +# FIXME: The {PIC16,MSP430}/AsmPrinter line here is a hack to force a reconfigure to pick +# up AsmPrinter changes. Remove it after a reasonable delay from 2009-08-13. + .PRECIOUS: $(ConfigStatusScript) -$(ConfigStatusScript): $(ConfigureScript) +$(ConfigStatusScript): $(ConfigureScript) $(LLVM_SRC_ROOT)/lib/Target/PIC16/AsmPrinter/Makefile $(LLVM_SRC_ROOT)/lib/Target/MSP430/AsmPrinter/Makefile $(Echo) Reconfiguring with $< $(Verb) cd $(PROJ_OBJ_ROOT) && \ if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \ @@ -242,6 +246,12 @@ LLVMC_BUILTIN_PLUGIN_2 = $(word 2, $(LLVMC_BUILTIN_PLUGINS)) LLVMC_BUILTIN_PLUGIN_3 = $(word 3, $(LLVMC_BUILTIN_PLUGINS)) LLVMC_BUILTIN_PLUGIN_4 = $(word 4, $(LLVMC_BUILTIN_PLUGINS)) LLVMC_BUILTIN_PLUGIN_5 = $(word 5, $(LLVMC_BUILTIN_PLUGINS)) +LLVMC_BUILTIN_PLUGIN_6 = $(word 6, $(LLVMC_BUILTIN_PLUGINS)) +LLVMC_BUILTIN_PLUGIN_7 = $(word 7, $(LLVMC_BUILTIN_PLUGINS)) +LLVMC_BUILTIN_PLUGIN_8 = $(word 8, $(LLVMC_BUILTIN_PLUGINS)) +LLVMC_BUILTIN_PLUGIN_9 = $(word 9, $(LLVMC_BUILTIN_PLUGINS)) +LLVMC_BUILTIN_PLUGIN_10 = $(word 10, $(LLVMC_BUILTIN_PLUGINS)) + ifneq ($(LLVMC_BUILTIN_PLUGIN_1),) CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_1=$(LLVMC_BUILTIN_PLUGIN_1) @@ -263,6 +273,27 @@ ifneq ($(LLVMC_BUILTIN_PLUGIN_5),) CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_5) endif +ifneq ($(LLVMC_BUILTIN_PLUGIN_6),) +CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_6) +endif + +ifneq ($(LLVMC_BUILTIN_PLUGIN_7),) +CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_7) +endif + +ifneq ($(LLVMC_BUILTIN_PLUGIN_8),) +CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_8) +endif + +ifneq ($(LLVMC_BUILTIN_PLUGIN_9),) +CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_9) +endif + +ifneq ($(LLVMC_BUILTIN_PLUGIN_10),) +CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_10) +endif + + endif endif # LLVMC_BASED_DRIVER @@ -284,7 +315,7 @@ CPP.Defines := # OPTIMIZE_OPTION - The optimization level option we want to build LLVM with # this can be overridden on the make command line. ifndef OPTIMIZE_OPTION - ifneq ($(OS),MingW) + ifneq ($(HOST_OS),MingW) OPTIMIZE_OPTION := -O3 else OPTIMIZE_OPTION := -O2 @@ -294,8 +325,8 @@ endif ifeq ($(ENABLE_OPTIMIZED),1) BuildMode := Release # Don't use -fomit-frame-pointer on Darwin or FreeBSD. - ifneq ($(OS),FreeBSD) - ifneq ($(OS),Darwin) + ifneq ($(HOST_OS),FreeBSD) + ifneq ($(HOST_OS),Darwin) OmitFramePointer := -fomit-frame-pointer endif endif @@ -303,12 +334,19 @@ ifeq ($(ENABLE_OPTIMIZED),1) # Darwin requires -fstrict-aliasing to be explicitly enabled. # Avoid -fstrict-aliasing on Darwin for now, there are unresolved issues # with -fstrict-aliasing and ipa-type-escape radr://6756684 - #ifeq ($(OS),Darwin) + #ifeq ($(HOST_OS),Darwin) # EXTRA_OPTIONS += -fstrict-aliasing -Wstrict-aliasing #endif CXX.Flags += $(OPTIMIZE_OPTION) $(OmitFramePointer) C.Flags += $(OPTIMIZE_OPTION) $(OmitFramePointer) LD.Flags += $(OPTIMIZE_OPTION) + ifdef DEBUG_SYMBOLS + BuildMode := $(BuildMode)+Debug + CXX.Flags += -g + C.Flags += -g + LD.Flags += -g + KEEP_SYMBOLS := 1 + endif else BuildMode := Debug CXX.Flags += -g @@ -334,9 +372,16 @@ ifndef REQUIRES_EH CXX.Flags += -fno-exceptions endif -# IF REQUIRES_RTTI=1 is specified then don't disable run-time type id -ifndef REQUIRES_RTTI -# CXX.Flags += -fno-rtti +ifdef REQUIRES_FRAME_POINTER + CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags)) + C.Flags := $(filter-out -fomit-frame-pointer,$(C.Flags)) + LD.Flags := $(filter-out -fomit-frame-pointer,$(LD.Flags)) +endif + +# If REQUIRES_RTTI=1 is specified then don't disable run-time type id. +ifeq ($(REQUIRES_RTTI), 1) + CXX.Flags := $(filter-out -fno-rtti,$(CXX.Flags)) + CXXFLAGS := $(filter-out -fno-rtti,$(CXXFLAGS)) endif ifdef ENABLE_COVERAGE @@ -376,10 +421,10 @@ ifdef SHARED_LIBRARY endif ifeq ($(ENABLE_PIC),1) - ifeq ($(OS), $(filter $(OS), Cygwin MingW)) + ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW)) # Nothing. Win32 defaults to PIC and warns when given -fPIC else - ifeq ($(OS),Darwin) + ifeq ($(HOST_OS),Darwin) # Common symbols not allowed in dylib files CXX.Flags += -fno-common C.Flags += -fno-common @@ -390,17 +435,14 @@ ifeq ($(ENABLE_PIC),1) endif endif else - ifeq ($(OS),Darwin) + ifeq ($(HOST_OS),Darwin) CXX.Flags += -mdynamic-no-pic C.Flags += -mdynamic-no-pic endif endif -CXX.Flags += $(CXXFLAGS) -Woverloaded-virtual -C.Flags += $(CFLAGS) -CPP.Defines += $(CPPFLAGS) +CXX.Flags += -Woverloaded-virtual CPP.BaseFlags += $(CPP.Defines) -LD.Flags += $(LDFLAGS) AR.Flags := cru # Make Floating point IEEE compliant on Alpha. @@ -417,7 +459,7 @@ ifeq ($(ARCH),Alpha) LD.Flags += -Wl,--no-relax endif -ifeq ($(OS),MingW) +ifeq ($(HOST_OS),MingW) ifeq ($(LLVM_CROSS_COMPILING),1) # Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=525016 ifdef TOOLNAME @@ -430,6 +472,7 @@ ifdef ENABLE_EXPENSIVE_CHECKS # GNU libstdc++ uses RTTI if you define _GLIBCXX_DEBUG, which we did above. # See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40160 CXX.Flags := $(filter-out -fno-rtti,$(CXX.Flags)) + CXXFLAGS := $(filter-out -fno-rtti,$(CXXFLAGS)) endif #-------------------------------------------------------------------- @@ -499,7 +542,7 @@ endif # Adjust to user's request #-------------------------------------------------------------------- -ifeq ($(OS),Darwin) +ifeq ($(HOST_OS),Darwin) DARWIN_VERSION := `sw_vers -productVersion` # Strip a number like 10.4.7 to 10.4 DARWIN_VERSION := $(shell echo $(DARWIN_VERSION)| sed -E 's/(10.[0-9]).*/\1/') @@ -507,10 +550,12 @@ ifeq ($(OS),Darwin) DARWIN_MAJVERS := $(shell echo $(DARWIN_VERSION)| sed -E 's/10.([0-9]).*/\1/') SharedLinkOptions=-Wl,-flat_namespace -Wl,-undefined -Wl,suppress \ - -dynamiclib -mmacosx-version-min=$(DARWIN_VERSION) - TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION) + -dynamiclib + ifneq ($(ARCH),ARM) + SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION) + endif else - ifeq ($(OS),Cygwin) + ifeq ($(HOST_OS),Cygwin) SharedLinkOptions=-shared -nostdlib -Wl,--export-all-symbols \ -Wl,--enable-auto-import -Wl,--enable-auto-image-base else @@ -518,6 +563,12 @@ else endif endif +ifeq ($(TARGET_OS),Darwin) + ifneq ($(ARCH),ARM) + TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION) + endif +endif + # Adjust LD.Flags depending on the kind of library that is to be built. Note # that if LOADABLE_MODULE is specified then the resulting shared library can # be opened with dlopen. @@ -555,7 +606,7 @@ ifndef KEEP_SYMBOLS endif # Adjust linker flags for building an executable -ifneq ($(OS),Darwin) +ifneq ($(HOST_OS),Darwin) ifneq ($(DARWIN_MAJVERS),4) ifdef TOOLNAME ifdef EXAMPLE_TOOL @@ -577,7 +628,7 @@ endif CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \ $(EXTRA_OPTIONS) -ifeq ($(OS),HP-UX) +ifeq ($(HOST_OS),HP-UX) CompileCommonOpts := -D_REENTRANT -D_HPUX_SOURCE endif @@ -605,7 +656,7 @@ ifdef UNIVERSAL # Building universal cannot compute dependencies automatically. DISABLE_AUTO_DEPENDENCIES=1 else - ifeq ($(OS),Darwin) + ifeq ($(TARGET_OS),Darwin) ifeq ($(ARCH),x86_64) TargetCommonOpts = -m64 else @@ -616,10 +667,14 @@ else endif endif -ifeq ($(OS),SunOS) +ifeq ($(HOST_OS),SunOS) CPP.BaseFlags += -include llvm/System/Solaris.h endif +ifeq ($(HOST_OS),AuroraUX) +CPP.BaseFlags += -include llvm/System/Solaris.h +endif # !HOST_OS - AuroraUX. + LD.Flags += -L$(LibDir) -L$(LLVMLibDir) CPP.BaseFlags += -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS # All -I flags should go here, so that they don't confuse llvm-config. @@ -630,31 +685,35 @@ CPP.Flags += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \ $(CPP.BaseFlags) ifeq ($(BUILD_COMPONENT), 1) - Compile.C = $(BUILD_CC) $(CPP.Flags) $(C.Flags) \ + Compile.C = $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -c - Compile.CXX = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) \ + Compile.CXX = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \ + $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -c - Preprocess.CXX= $(BUILD_CXX) $(CPP.Flags) $(TargetCommonOpts) \ + Preprocess.CXX= $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \ $(CompileCommonOpts) $(CXX.Flags) -E - Link = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) \ + Link = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \ + $(LDFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) $(Strip) else - Compile.C = $(CC) $(CPP.Flags) $(C.Flags) \ + Compile.C = $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -c - Compile.CXX = $(CXX) $(CPP.Flags) $(CXX.Flags) \ + Compile.CXX = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -c - Preprocess.CXX= $(CXX) $(CPP.Flags) $(TargetCommonOpts) \ + Preprocess.CXX= $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \ $(CompileCommonOpts) $(CXX.Flags) -E - Link = $(CXX) $(CPP.Flags) $(CXX.Flags) \ + Link = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LDFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) $(Strip) endif -BCCompile.C = $(LLVMGCCWITHPATH) $(CPP.Flags) $(C.Flags) \ +BCCompile.C = $(LLVMGCCWITHPATH) $(CPP.Flags) $(C.Flags) $(CFLAGS) \ + $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -Preprocess.C = $(CC) $(CPP.Flags) $(C.Flags) \ +Preprocess.C = $(CC) $(CPP.Flags) $(C.Flags) $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -E -BCCompile.CXX = $(LLVMGXXWITHPATH) $(CPP.Flags) $(CXX.Flags) \ +BCCompile.CXX = $(LLVMGXXWITHPATH) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \ + $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) ProgInstall = $(INSTALL) $(Install.StripFlag) -m 0755 @@ -780,6 +839,7 @@ clean-all:: $(addsuffix /.makeclean-all,$(PARALLEL_DIRS)) install :: $(addsuffix /.makeinstall ,$(PARALLEL_DIRS)) uninstall:: $(addsuffix /.makeuninstall,$(PARALLEL_DIRS)) install-bytecode :: $(addsuffix /.makeinstall-bytecode,$(PARALLEL_DIRS)) +unitcheck:: $(addsuffix /.makeunitcheck,$(PARALLEL_DIRS)) ParallelTargets := $(foreach T,$(RecursiveTargets),%/.make$(T)) @@ -888,7 +948,7 @@ $(ToolDir)/$(strip $(TOOLNAME))$(EXEEXT): $(LLVM_CONFIG) LLVMLibsOptions += $(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS)) LLVMLibsPaths += $(LLVM_CONFIG) \ - $(shell $(LLVM_CONFIG) --libfiles $(LINK_COMPONENTS)) + $(shell $(LLVM_CONFIG) --libfiles $(LINK_COMPONENTS)) endif endif @@ -960,7 +1020,7 @@ endif # if we're building a library ... ifdef LIBRARYNAME -# Make sure there isn't any extranous whitespace on the LIBRARYNAME option +# Make sure there isn't any extraneous whitespace on the LIBRARYNAME option LIBRARYNAME := $(strip $(LIBRARYNAME)) ifdef LOADABLE_MODULE LibName.A := $(LibDir)/$(LIBRARYNAME).a @@ -1045,9 +1105,9 @@ $(LibName.BCA): $(ObjectsBC) $(LibDir)/.dir $(LLVMLD) \ $(LLVMToolDir)/llvm-ar $(Echo) Building $(BuildMode) Bytecode Archive $(notdir $@) \ "(internalize)" - $(Verb) $(BCLinkLib) -o $(ObjDir)/$(LIBRARYNAME).o $(ObjectsBC) + $(Verb) $(BCLinkLib) -o $(ObjDir)/$(LIBRARYNAME).internalize $(ObjectsBC) $(Verb) $(RM) -f $@ - $(Verb) $(LArchive) $@ $(ObjDir)/$(LIBRARYNAME).o + $(Verb) $(LArchive) $@ $(ObjDir)/$(LIBRARYNAME).internalize.bc else $(LibName.BCA): $(ObjectsBC) $(LibDir)/.dir \ $(LLVMToolDir)/llvm-ar @@ -1169,7 +1229,7 @@ endif # not exporting all of the weak symbols from the binary. This reduces dyld # startup time by 4x on darwin in some cases. ifdef TOOL_NO_EXPORTS -ifeq ($(OS),Darwin) +ifeq ($(HOST_OS),Darwin) # Tiger tools don't support this. ifneq ($(DARWIN_MAJVERS),4) @@ -1177,7 +1237,7 @@ LD.Flags += -Wl,-exported_symbol -Wl,_main endif endif -ifeq ($(OS), $(filter $(OS), Linux NetBSD FreeBSD)) +ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD)) LD.Flags += -Wl,--version-script=$(LLVM_SRC_ROOT)/autoconf/ExportMap.map endif endif @@ -1212,7 +1272,7 @@ install-local:: uninstall-local:: $(Echo) Uninstall circumvented with NO_INSTALL else -DestTool = $(PROJ_bindir)/$(TOOLNAME) +DestTool = $(PROJ_bindir)/$(TOOLNAME)$(EXEEXT) install-local:: $(DestTool) @@ -1231,7 +1291,7 @@ endif ############################################################################### # FIXME: This should be checking for "if not GCC or ICC", not for "if HP-UX" -ifeq ($(OS),HP-UX) +ifeq ($(HOST_OS),HP-UX) DISABLE_AUTO_DEPENDENCIES=1 endif @@ -1251,7 +1311,7 @@ DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.d.tmp" "$(ObjDir)/$*.d"; \ else $(RM) "$(ObjDir)/$*.d.tmp"; exit 1; fi $(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) - $(Echo) "Compiling $*.cpp for $(BuildMode) build " $(PIC_FLAG) + $(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG) $(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \ $(DEPEND_MOVEFILE) @@ -1354,14 +1414,13 @@ $(ObjDir)/%.s: %.c $(ObjDir)/.dir $(BUILT_SOURCES) # make the C and C++ compilers strip debug info out of bytecode libraries. ifdef DEBUG_RUNTIME -$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LLVMAS) $(LOPT) +$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LOPT) $(Echo) "Compiling $*.ll to $*.bc for $(BuildMode) build (bytecode)" - $(Verb) $(LLVMAS) $< -o - | $(LOPT) -std-compile-opts -o $@ -f + $(Verb) $(LOPT) $< -std-compile-opts -o $@ else -$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LLVMAS) $(LOPT) +$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LOPT) $(Echo) "Compiling $*.ll to $*.bc for $(BuildMode) build (bytecode)" - $(Verb) $(LLVMAS) $< -o - | \ - $(LOPT) -std-compile-opts -strip-debug -o $@ -f + $(Verb) $(LOPT) $< -std-compile-opts -strip-debug -o $@ endif @@ -1450,6 +1509,11 @@ $(ObjDir)/%GenAsmWriter1.inc.tmp : %.td $(ObjDir)/.dir $(Echo) "Building $(&2 - exit 1 +#!/bin/bash + +die() { + echo "$@" 1>&2 + exit 1 +} + +clean() { + echo $1 | sed -e 's/\\//g' } -### NOTE: ############################################################" -### The below two variables specify the auto* versions -### periods should be escaped with backslash, for use by grep +### NOTE: ############################################################ +### These variables specify the tool versions we want to use. +### Periods should be escaped with backslash for use by grep. +### +### If you update these, please also update docs/GettingStarted.html want_autoconf_version='2\.60' want_autoheader_version=$want_autoconf_version -### END NOTE #########################################################" - +want_aclocal_version='1\.9\.6' +want_libtool_version='1\.5\.22' +### END NOTE ######################################################### outfile=configure configfile=configure.ac -want_autoconf_version_clean=`echo $want_autoconf_version | sed -e 's/\\\\//g'` -want_autoheader_version_clean=`echo $want_autoheader_version | sed -e 's/\\\\//g'` +want_autoconf_version_clean=$(clean $want_autoconf_version) +want_autoheader_version_clean=$(clean $want_autoheader_version) +want_aclocal_version_clean=$(clean $want_aclocal_version) +want_libtool_version_clean=$(clean $want_libtool_version) test -d autoconf && test -f autoconf/$configfile && cd autoconf test -f $configfile || die "Can't find 'autoconf' dir; please cd into it first" autoconf --version | grep $want_autoconf_version > /dev/null test $? -eq 0 || die "Your autoconf was not detected as being $want_autoconf_version_clean" -aclocal --version | grep '^aclocal.*1\.9\.6' > /dev/null -test $? -eq 0 || die "Your aclocal was not detected as being 1.9.6" +aclocal --version | grep '^aclocal.*'$want_aclocal_version > /dev/null +test $? -eq 0 || die "Your aclocal was not detected as being $want_aclocal_version_clean" autoheader --version | grep '^autoheader.*'$want_autoheader_version > /dev/null test $? -eq 0 || die "Your autoheader was not detected as being $want_autoheader_version_clean" -libtool --version | grep '1\.5\.22' > /dev/null -test $? -eq 0 || die "Your libtool was not detected as being 1.5.22" +libtool --version | grep $want_libtool_version > /dev/null +test $? -eq 0 || die "Your libtool was not detected as being $want_libtool_version_clean" echo "" echo "### NOTE: ############################################################" echo "### If you get *any* warnings from autoconf below you MUST fix the" @@ -37,7 +47,7 @@ echo "### commit any configure script that was generated with warnings" echo "### present. You should get just three 'Regenerating..' lines." echo "######################################################################" echo "" -echo "Regenerating aclocal.m4 with aclocal 1.9.6" +echo "Regenerating aclocal.m4 with aclocal $want_aclocal_version_clean" cwd=`pwd` aclocal --force -I $cwd/m4 || die "aclocal failed" echo "Regenerating configure with autoconf $want_autoconf_version_clean" diff --git a/autoconf/config.guess b/autoconf/config.guess index 7d0185e019ed6..e792aac60807b 100755 --- a/autoconf/config.guess +++ b/autoconf/config.guess @@ -1,9 +1,10 @@ #! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 +# Free Software Foundation, Inc. -timestamp='2004-09-07' +timestamp='2009-09-18' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -17,23 +18,25 @@ timestamp='2004-09-07' # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. -# Originally written by Per Bothner . -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. + +# Originally written by Per Bothner. Please send patches (context +# diff format) to and include a ChangeLog +# entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # -# The plan is that this can be called by configure scripts if you -# don't specify an explicit build system type. +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD me=`echo "$0" | sed -e 's,.*/,,'` @@ -53,8 +56,8 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 -Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -66,11 +69,11 @@ Try \`$me --help' for more information." while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; + echo "$timestamp" ; exit ;; --version | -v ) - echo "$version" ; exit 0 ;; + echo "$version" ; exit ;; --help | --h* | -h ) - echo "$usage"; exit 0 ;; + echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. @@ -104,7 +107,7 @@ set_cc_for_build=' trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; : ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; @@ -123,7 +126,7 @@ case $CC_FOR_BUILD,$HOST_CC,$CC in ;; ,,*) CC_FOR_BUILD=$CC ;; ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ;' +esac ; set_cc_for_build= ;' # This is needed to find uname on a Pyramid OSx when run in the BSD universe. # (ghazi@noc.rutgers.edu 1994-08-24) @@ -158,6 +161,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched @@ -166,7 +170,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep __ELF__ >/dev/null + | grep -q __ELF__ then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? @@ -196,55 +200,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. echo "${machine}-${os}${release}" - exit 0 ;; - amd64:OpenBSD:*:*) - echo x86_64-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - amiga:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - cats:OpenBSD:*:*) - echo arm-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - hp300:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - luna88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - macppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvmeppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sgi:OpenBSD:*:*) - echo mips64-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sun3:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; + exit ;; *:OpenBSD:*:*) - echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} - exit 0 ;; + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; macppc:MirBSD:*:*) - echo powerppc-unknown-mirbsd${UNAME_RELEASE} - exit 0 ;; + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} - exit 0 ;; + exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) @@ -297,37 +269,43 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit 0 ;; + exit ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # Should we change UNAME_MACHINE based on the output of uname instead # of the specific Alpha model? echo alpha-pc-interix - exit 0 ;; + exit ;; 21064:Windows_NT:50:3) echo alpha-dec-winnt3.5 - exit 0 ;; + exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 - exit 0;; + exit ;; *:[Aa]miga[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-amigaos - exit 0 ;; + exit ;; *:[Mm]orph[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-morphos - exit 0 ;; + exit ;; *:OS/390:*:*) echo i370-ibm-openedition - exit 0 ;; + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; *:OS400:*:*) echo powerpc-ibm-os400 - exit 0 ;; + exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} - exit 0;; + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) echo hppa1.1-hitachi-hiuxmpp - exit 0;; + exit ;; Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. if test "`(/bin/universe) 2>/dev/null`" = att ; then @@ -335,32 +313,48 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in else echo pyramid-pyramid-bsd fi - exit 0 ;; + exit ;; NILE*:*:*:dcosx) echo pyramid-pyramid-svr4 - exit 0 ;; + exit ;; DRS?6000:unix:4.0:6*) echo sparc-icl-nx6 - exit 0 ;; - DRS?6000:UNIX_SV:4.2*:7*) + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) case `/usr/bin/uname -p` in - sparc) echo sparc-icl-nx7 && exit 0 ;; + sparc) echo sparc-icl-nx7; exit ;; esac ;; + s390x:SunOS:*:*) + echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; sun4H:SunOS:5.*:*) echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - i86pc:SunOS:5.*:*) - echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; + i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) + eval $set_cc_for_build + SUN_ARCH="i386" + # If there is a compiler, see if it is configured for 64-bit objects. + # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. + # This test works for both compilers. + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + SUN_ARCH="x86_64" + fi + fi + echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; sun4*:SunOS:*:*) case "`/usr/bin/arch -k`" in Series*|S4*) @@ -369,10 +363,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in esac # Japanese Language versions have a version number like `4.1.3-JL'. echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit 0 ;; + exit ;; sun3*:SunOS:*:*) echo m68k-sun-sunos${UNAME_RELEASE} - exit 0 ;; + exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 @@ -384,10 +378,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in echo sparc-sun-sunos${UNAME_RELEASE} ;; esac - exit 0 ;; + exit ;; aushp:SunOS:*:*) echo sparc-auspex-sunos${UNAME_RELEASE} - exit 0 ;; + exit ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not # "atarist" or "atariste" at least should have a processor @@ -398,40 +392,40 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; + exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) echo m68k-milan-mint${UNAME_RELEASE} - exit 0 ;; + exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) echo m68k-hades-mint${UNAME_RELEASE} - exit 0 ;; + exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) echo m68k-unknown-mint${UNAME_RELEASE} - exit 0 ;; + exit ;; m68k:machten:*:*) echo m68k-apple-machten${UNAME_RELEASE} - exit 0 ;; + exit ;; powerpc:machten:*:*) echo powerpc-apple-machten${UNAME_RELEASE} - exit 0 ;; + exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 - exit 0 ;; + exit ;; RISC*:ULTRIX:*:*) echo mips-dec-ultrix${UNAME_RELEASE} - exit 0 ;; + exit ;; VAX*:ULTRIX*:*:*) echo vax-dec-ultrix${UNAME_RELEASE} - exit 0 ;; + exit ;; 2020:CLIX:*:* | 2430:CLIX:*:*) echo clipper-intergraph-clix${UNAME_RELEASE} - exit 0 ;; + exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c @@ -455,32 +449,33 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit (-1); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c \ - && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ - && exit 0 + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } echo mips-mips-riscos${UNAME_RELEASE} - exit 0 ;; + exit ;; Motorola:PowerMAX_OS:*:*) echo powerpc-motorola-powermax - exit 0 ;; + exit ;; Motorola:*:4.3:PL8-*) echo powerpc-harris-powermax - exit 0 ;; + exit ;; Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) echo powerpc-harris-powermax - exit 0 ;; + exit ;; Night_Hawk:Power_UNIX:*:*) echo powerpc-harris-powerunix - exit 0 ;; + exit ;; m88k:CX/UX:7*:*) echo m88k-harris-cxux7 - exit 0 ;; + exit ;; m88k:*:4*:R4*) echo m88k-motorola-sysv4 - exit 0 ;; + exit ;; m88k:*:3*:R3*) echo m88k-motorola-sysv3 - exit 0 ;; + exit ;; AViiON:dgux:*:*) # DG/UX returns AViiON for all architectures UNAME_PROCESSOR=`/usr/bin/uname -p` @@ -496,29 +491,29 @@ EOF else echo i586-dg-dgux${UNAME_RELEASE} fi - exit 0 ;; + exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 - exit 0 ;; + exit ;; M88*:*:R3*:*) # Delta 88k system running SVR3 echo m88k-motorola-sysv3 - exit 0 ;; + exit ;; XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) echo m88k-tektronix-sysv3 - exit 0 ;; + exit ;; Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) echo m68k-tektronix-bsd - exit 0 ;; + exit ;; *:IRIX*:*:*) echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit 0 ;; + exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' i*86:AIX:*:*) echo i386-ibm-aix - exit 0 ;; + exit ;; ia64:AIX:*:*) if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` @@ -526,7 +521,7 @@ EOF IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} - exit 0 ;; + exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then eval $set_cc_for_build @@ -541,15 +536,19 @@ EOF exit(0); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 - echo rs6000-ibm-aix3.2.5 + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then echo rs6000-ibm-aix3.2.4 else echo rs6000-ibm-aix3.2 fi - exit 0 ;; - *:AIX:*:[45]) + exit ;; + *:AIX:*:[456]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 @@ -562,28 +561,28 @@ EOF IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit 0 ;; + exit ;; *:AIX:*:*) echo rs6000-ibm-aix - exit 0 ;; + exit ;; ibmrt:4.4BSD:*|romp-ibm:BSD:*) echo romp-ibm-bsd4.4 - exit 0 ;; + exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit 0 ;; # report: romp-ibm BSD 4.3 + exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx - exit 0 ;; + exit ;; DPX/2?00:B.O.S.:*:*) echo m68k-bull-sysv3 - exit 0 ;; + exit ;; 9000/[34]??:4.3bsd:1.*:*) echo m68k-hp-bsd - exit 0 ;; + exit ;; hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) echo m68k-hp-bsd4.4 - exit 0 ;; + exit ;; 9000/[34678]??:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` case "${UNAME_MACHINE}" in @@ -645,9 +644,19 @@ EOF esac if [ ${HP_ARCH} = "hppa2.0w" ] then - # avoid double evaluation of $set_cc_for_build - test -n "$CC_FOR_BUILD" || eval $set_cc_for_build - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep -q __LP64__ then HP_ARCH="hppa2.0w" else @@ -655,11 +664,11 @@ EOF fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit 0 ;; + exit ;; ia64:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` echo ia64-hp-hpux${HPUX_REV} - exit 0 ;; + exit ;; 3050*:HI-UX:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c @@ -687,216 +696,244 @@ EOF exit (0); } EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 - exit 0 ;; + exit ;; 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) echo hppa1.1-hp-bsd - exit 0 ;; + exit ;; 9000/8??:4.3bsd:*:*) echo hppa1.0-hp-bsd - exit 0 ;; + exit ;; *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix - exit 0 ;; + exit ;; hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) echo hppa1.1-hp-osf - exit 0 ;; + exit ;; hp8??:OSF1:*:*) echo hppa1.0-hp-osf - exit 0 ;; + exit ;; i*86:OSF1:*:*) if [ -x /usr/sbin/sysversion ] ; then echo ${UNAME_MACHINE}-unknown-osf1mk else echo ${UNAME_MACHINE}-unknown-osf1 fi - exit 0 ;; + exit ;; parisc*:Lites*:*:*) echo hppa1.1-hp-lites - exit 0 ;; + exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd - exit 0 ;; + exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit 0 ;; + exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd - exit 0 ;; + exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd - exit 0 ;; + exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd - exit 0 ;; + exit ;; CRAY*Y-MP:*:*:*) echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*[A-Z]90:*:*:*) echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*TS:*:*:*) echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*T3E:*:*:*) echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; CRAY*SV1:*:*:*) echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; *:UNICOS/mp:*:*) echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; + exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; + exit ;; 5000:UNIX_System_V:4.*:*) FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; + exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; sparc*:BSD/OS:*:*) echo sparc-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; *:BSD/OS:*:*) echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; + exit ;; *:FreeBSD:*:*) - echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit 0 ;; + case ${UNAME_MACHINE} in + pc98) + echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin - exit 0 ;; - i*:MINGW*:*) + exit ;; + *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 - exit 0 ;; + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 - exit 0 ;; - x86:Interix*:[34]*) - echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//' - exit 0 ;; + exit ;; + *:Interix*:[3456]*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + EM64T | authenticamd | genuineintel) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + IA64) + echo ia64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) echo i${UNAME_MACHINE}-pc-mks - exit 0 ;; + exit ;; + 8664:Windows_NT:*) + echo x86_64-pc-mks + exit ;; i*:Windows_NT*:* | Pentium*:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we # UNAME_MACHINE based on the output of uname instead of i386? echo i586-pc-interix - exit 0 ;; + exit ;; i*:UWIN*:*) echo ${UNAME_MACHINE}-pc-uwin - exit 0 ;; + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; p*:CYGWIN*:*) echo powerpcle-unknown-cygwin - exit 0 ;; + exit ;; prep*:SunOS:5.*:*) echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; + exit ;; *:GNU:*:*) # the GNU system echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit 0 ;; + exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu - exit 0 ;; + exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix - exit 0 ;; + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep -q ld.so.1 + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; arm*:Linux:*:*) + eval $set_cc_for_build + if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep -q __ARM_EABI__ + then + echo ${UNAME_MACHINE}-unknown-linux-gnu + else + echo ${UNAME_MACHINE}-unknown-linux-gnueabi + fi + exit ;; + avr32*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; cris:Linux:*:*) echo cris-axis-linux-gnu - exit 0 ;; + exit ;; crisv32:Linux:*:*) echo crisv32-axis-linux-gnu - exit 0 ;; + exit ;; frv:Linux:*:*) echo frv-unknown-linux-gnu - exit 0 ;; + exit ;; + i*86:Linux:*:*) + echo ${UNAME_MACHINE}-pc-linux-gnu + exit ;; ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; m68*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - mips:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips - #undef mipsel - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mipsel - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - mips64:Linux:*:*) + exit ;; + mips:Linux:*:* | mips64:Linux:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #undef CPU - #undef mips64 - #undef mips64el + #undef ${UNAME_MACHINE} + #undef ${UNAME_MACHINE}el #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mips64el + CPU=${UNAME_MACHINE}el #else #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips64 + CPU=${UNAME_MACHINE} #else CPU= #endif #endif EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit 0 ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit 0 ;; - alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} - exit 0 ;; + or32:Linux:*:*) + echo or32-unknown-linux-gnu + exit ;; + padre:Linux:*:*) + echo sparc-unknown-linux-gnu + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in @@ -904,87 +941,40 @@ EOF PA8*) echo hppa2.0-unknown-linux-gnu ;; *) echo hppa-unknown-linux-gnu ;; esac - exit 0 ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu - exit 0 ;; + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux - exit 0 ;; + exit ;; sh64*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; sh*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; x86_64:Linux:*:*) echo x86_64-unknown-linux-gnu - exit 0 ;; - i*86:Linux:*:*) - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - # Set LC_ALL=C to ensure ld outputs messages in English. - ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ - | sed -ne '/supported targets:/!d - s/[ ][ ]*/ /g - s/.*supported targets: *// - s/ .*// - p'` - case "$ld_supported_targets" in - elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" - ;; - a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit 0 ;; - coff-i386) - echo "${UNAME_MACHINE}-pc-linux-gnucoff" - exit 0 ;; - "") - # Either a pre-BFD a.out linker (linux-gnuoldld) or - # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" - exit 0 ;; - esac - # Determine whether the default compiler is a.out or elf - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #ifdef __ELF__ - # ifdef __GLIBC__ - # if __GLIBC__ >= 2 - LIBC=gnu - # else - LIBC=gnulibc1 - # endif - # else - LIBC=gnulibc1 - # endif - #else - #ifdef __INTEL_COMPILER - LIBC=gnu - #else - LIBC=gnuaout - #endif - #endif - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0 - test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0 - ;; + exit ;; + xtensa*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both # sysname and nodename. echo i386-sequent-sysv4 - exit 0 ;; + exit ;; i*86:UNIX_SV:4.2MP:2.*) # Unixware is an offshoot of SVR4, but it has its own version # number series starting with 2... @@ -992,27 +982,27 @@ EOF # I just have to hope. -- rms. # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit 0 ;; + exit ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. echo ${UNAME_MACHINE}-pc-os2-emx - exit 0 ;; + exit ;; i*86:XTS-300:*:STOP) echo ${UNAME_MACHINE}-unknown-stop - exit 0 ;; + exit ;; i*86:atheos:*:*) echo ${UNAME_MACHINE}-unknown-atheos - exit 0 ;; - i*86:syllable:*:*) + exit ;; + i*86:syllable:*:*) echo ${UNAME_MACHINE}-pc-syllable - exit 0 ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) echo i386-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; i*86:*DOS:*:*) echo ${UNAME_MACHINE}-pc-msdosdjgpp - exit 0 ;; + exit ;; i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then @@ -1020,15 +1010,16 @@ EOF else echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} fi - exit 0 ;; - i*86:*:5:[78]*) + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - exit 0 ;; + exit ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null 2>&1 ; then echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 fi - exit 0 ;; + exit ;; mini*:CTIX:SYS*5:*) # "miniframe" echo m68010-convergent-sysv - exit 0 ;; + exit ;; mc68k:UNIX:SYSTEM5:3.51m) echo m68k-convergent-sysv - exit 0 ;; + exit ;; M680?0:D-NIX:5.3:*) echo m68k-diab-dnix - exit 0 ;; + exit ;; M68*:*:R3V[5678]*:*) - test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4.3${OS_REL} && exit 0 + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4 && exit 0 ;; + && { echo i486-ncr-sysv4; exit; } ;; + NCR*:*:4.2:* | MPRAS*:*:4.2:*) + OS_REL='.3' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) echo m68k-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 - exit 0 ;; + exit ;; TSUNAMI:LynxOS:2.*:*) echo sparc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) echo powerpc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; + exit ;; SM[BE]S:UNIX_SV:*:*) echo mips-dde-sysv${UNAME_RELEASE} - exit 0 ;; + exit ;; RM*:ReliantUNIX-*:*:*) echo mips-sni-sysv4 - exit 0 ;; + exit ;; RM*:SINIX-*:*:*) echo mips-sni-sysv4 - exit 0 ;; + exit ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` @@ -1120,69 +1124,94 @@ EOF else echo ns32k-sni-sysv fi - exit 0 ;; + exit ;; PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort # says echo i586-unisys-sysv4 - exit 0 ;; + exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm echo hppa1.1-stratus-sysv4 - exit 0 ;; + exit ;; *:*:*:FTX*) # From seanf@swdc.stratus.com. echo i860-stratus-sysv4 - exit 0 ;; + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; *:VOS:*:*) # From Paul.Green@stratus.com. echo hppa1.1-stratus-vos - exit 0 ;; + exit ;; mc68*:A/UX:*:*) echo m68k-apple-aux${UNAME_RELEASE} - exit 0 ;; + exit ;; news*:NEWS-OS:6*:*) echo mips-sony-newsos6 - exit 0 ;; + exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then echo mips-nec-sysv${UNAME_RELEASE} else echo mips-unknown-sysv${UNAME_RELEASE} fi - exit 0 ;; + exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. echo powerpc-be-beos - exit 0 ;; + exit ;; BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. echo powerpc-apple-beos - exit 0 ;; + exit ;; BePC:BeOS:*:*) # BeOS running on Intel PC compatible. echo i586-pc-beos - exit 0 ;; + exit ;; + BePC:Haiku:*:*) # Haiku running on Intel PC compatible. + echo i586-pc-haiku + exit ;; SX-4:SUPER-UX:*:*) echo sx4-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; SX-5:SUPER-UX:*:*) echo sx5-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; SX-6:SUPER-UX:*:*) echo sx6-nec-superux${UNAME_RELEASE} - exit 0 ;; + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Rhapsody:*:*) echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown case $UNAME_PROCESSOR in - *86) UNAME_PROCESSOR=i686 ;; + i386) + eval $set_cc_for_build + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then + if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ + grep IS_64BIT_ARCH >/dev/null + then + UNAME_PROCESSOR="x86_64" + fi + fi ;; unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} - exit 0 ;; + exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` if test "$UNAME_PROCESSOR" = "x86"; then @@ -1190,22 +1219,25 @@ EOF UNAME_MACHINE=pc fi echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} - exit 0 ;; + exit ;; *:QNX:*:4*) echo i386-pc-qnx - exit 0 ;; + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; NSR-?:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} - exit 0 ;; + exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux - exit 0 ;; + exit ;; BS2000:POSIX*:*:*) echo bs2000-siemens-sysv - exit 0 ;; + exit ;; DS/*:UNIX_System_V:*:*) echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} - exit 0 ;; + exit ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 @@ -1216,38 +1248,50 @@ EOF UNAME_MACHINE="$cputype" fi echo ${UNAME_MACHINE}-unknown-plan9 - exit 0 ;; + exit ;; *:TOPS-10:*:*) echo pdp10-unknown-tops10 - exit 0 ;; + exit ;; *:TENEX:*:*) echo pdp10-unknown-tenex - exit 0 ;; + exit ;; KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) echo pdp10-dec-tops20 - exit 0 ;; + exit ;; XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) echo pdp10-xkl-tops20 - exit 0 ;; + exit ;; *:TOPS-20:*:*) echo pdp10-unknown-tops20 - exit 0 ;; + exit ;; *:ITS:*:*) echo pdp10-unknown-its - exit 0 ;; + exit ;; SEI:*:*:SEIUX) echo mips-sei-seiux${UNAME_RELEASE} - exit 0 ;; + exit ;; *:DragonFly:*:*) echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit 0 ;; + exit ;; *:*VMS:*:*) UNAME_MACHINE=`(uname -p) 2>/dev/null` case "${UNAME_MACHINE}" in - A*) echo alpha-dec-vms && exit 0 ;; - I*) echo ia64-dec-vms && exit 0 ;; - V*) echo vax-dec-vms && exit 0 ;; - esac + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; + i*86:AROS:*:*) + echo ${UNAME_MACHINE}-pc-aros + exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 @@ -1279,7 +1323,7 @@ main () #endif #if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix"); exit (0); + printf ("arm-acorn-riscix\n"); exit (0); #endif #if defined (hp300) && !defined (hpux) @@ -1368,11 +1412,12 @@ main () } EOF -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0 +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } # Apollos put the system type in the environment. -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } # Convex versions that predate uname can use getsysinfo(1) @@ -1381,22 +1426,22 @@ then case `getsysinfo -f cpu_type` in c1*) echo c1-convex-bsd - exit 0 ;; + exit ;; c2*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi - exit 0 ;; + exit ;; c34*) echo c34-convex-bsd - exit 0 ;; + exit ;; c38*) echo c38-convex-bsd - exit 0 ;; + exit ;; c4*) echo c4-convex-bsd - exit 0 ;; + exit ;; esac fi @@ -1407,7 +1452,9 @@ This script, last modified $timestamp, has failed to recognize the operating system you are using. It is advised that you download the most up to date version of the config scripts from - ftp://ftp.gnu.org/pub/gnu/config/ + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD +and + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD If the version you run ($0) is already up to date, please send the following data and any information you think might be diff --git a/autoconf/config.sub b/autoconf/config.sub index edb6b663ca2b3..8ca084bf33401 100755 --- a/autoconf/config.sub +++ b/autoconf/config.sub @@ -1,9 +1,10 @@ #! /bin/sh # Configuration validation subroutine script. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 +# Free Software Foundation, Inc. -timestamp='2004-08-29' +timestamp='2009-08-19' # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software @@ -21,22 +22,26 @@ timestamp='2004-08-29' # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. - +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. + # Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. +# diff and a properly formatted GNU ChangeLog entry. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. # If it is invalid, we print an error message on stderr and exit with code 1. # Otherwise, we print the canonical config type on stdout and succeed. +# You can get the latest version of this script from: +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD + # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases # that are meaningful with *any* GNU software. @@ -70,8 +75,8 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 -Free Software Foundation, Inc. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, +2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -83,11 +88,11 @@ Try \`$me --help' for more information." while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; + echo "$timestamp" ; exit ;; --version | -v ) - echo "$version" ; exit 0 ;; + echo "$version" ; exit ;; --help | --h* | -h ) - echo "$usage"; exit 0 ;; + echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. @@ -99,7 +104,7 @@ while test $# -gt 0 ; do *local*) # First pass through any local machine types. echo $1 - exit 0;; + exit ;; * ) break ;; @@ -118,8 +123,10 @@ esac # Here we must recognize all the valid KERNEL-OS combinations. maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in - nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \ - kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) + nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ + uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ + kopensolaris*-gnu* | \ + storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; @@ -145,10 +152,13 @@ case $os in -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis | -knuth | -cray) + -apple | -axis | -knuth | -cray | -microblaze) os= basic_machine=$1 ;; + -bluegene*) + os=-cnk + ;; -sim | -cisco | -oki | -wec | -winbond) os= basic_machine=$1 @@ -170,6 +180,10 @@ case $os in -hiux*) os=-hiuxwe2 ;; + -sco6) + os=-sco5v6 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -sco5) os=-sco3.2v5 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` @@ -186,6 +200,10 @@ case $os in # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; + -sco5v6*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; -sco*) os=-sco3.2v2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` @@ -230,22 +248,28 @@ case $basic_machine in | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ + | bfin \ | c4x | clipper \ | d10v | d30v | dlx | dsp16xx \ - | fr30 | frv \ + | fido | fr30 | frv \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ - | m32r | m32rle | m68000 | m68k | m88k | mcore \ + | lm32 \ + | m32c | m32r | m32rle | m68000 | m68k | m88k \ + | maxq | mb | microblaze | mcore | mep | metag \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ - | mips64vr | mips64vrel \ + | mips64octeon | mips64octeonel \ | mips64orion | mips64orionel \ + | mips64r5900 | mips64r5900el \ + | mips64vr | mips64vrel \ | mips64vr4100 | mips64vr4100el \ | mips64vr4300 | mips64vr4300el \ | mips64vr5000 | mips64vr5000el \ + | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ | mipsisa64 | mipsisa64el \ @@ -254,21 +278,26 @@ case $basic_machine in | mipsisa64sr71k | mipsisa64sr71kel \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ + | moxie \ + | mt \ | msp430 \ + | nios | nios2 \ | ns16k | ns32k \ - | openrisc | or32 \ + | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ | pyramid \ - | sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ + | score \ + | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ - | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv8 | sparcv9 | sparcv9b \ - | strongarm \ + | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ + | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ + | spu | strongarm \ | tahoe | thumb | tic4x | tic80 | tron \ | v850 | v850e \ | we32k \ - | x86 | xscale | xstormy16 | xtensa \ - | z8k) + | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ + | z8k | z80) basic_machine=$basic_machine-unknown ;; m6811 | m68hc11 | m6812 | m68hc12) @@ -278,6 +307,9 @@ case $basic_machine in ;; m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) ;; + ms1) + basic_machine=mt-unknown + ;; # We use `pc' rather than `unknown' # because (1) that's what they normally are, and @@ -297,28 +329,32 @@ case $basic_machine in | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* \ - | bs2000-* \ + | avr-* | avr32-* \ + | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ | elxsi-* \ - | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ + | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ - | m32r-* | m32rle-* \ + | lm32-* \ + | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | mcore-* \ + | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ - | mips64vr-* | mips64vrel-* \ + | mips64octeon-* | mips64octeonel-* \ | mips64orion-* | mips64orionel-* \ + | mips64r5900-* | mips64r5900el-* \ + | mips64vr-* | mips64vrel-* \ | mips64vr4100-* | mips64vr4100el-* \ | mips64vr4300-* | mips64vr4300el-* \ | mips64vr5000-* | mips64vr5000el-* \ + | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ | mipsisa64-* | mipsisa64el-* \ @@ -327,26 +363,33 @@ case $basic_machine in | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipstx39-* | mipstx39el-* \ | mmix-* \ + | mt-* \ | msp430-* \ + | nios-* | nios2-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ | pyramid-* \ | romp-* | rs6000-* \ - | sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \ + | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \ - | sparcv8-* | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ + | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ + | sparclite-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ | tahoe-* | thumb-* \ - | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* | tile-* \ | tron-* \ | v850-* | v850e-* | vax-* \ | we32k-* \ - | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \ - | xtensa-* \ + | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ + | xstormy16-* | xtensa*-* \ | ymp-* \ - | z8k-*) + | z8k-* | z80-*) + ;; + # Recognize the basic CPU types without company name, with glob match. + xtensa*) + basic_machine=$basic_machine-unknown ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. @@ -410,6 +453,10 @@ case $basic_machine in basic_machine=m68k-apollo os=-bsd ;; + aros) + basic_machine=i386-pc + os=-aros + ;; aux) basic_machine=m68k-apple os=-aux @@ -418,10 +465,26 @@ case $basic_machine in basic_machine=ns32k-sequent os=-dynix ;; + blackfin) + basic_machine=bfin-unknown + os=-linux + ;; + blackfin-*) + basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; + bluegene*) + basic_machine=powerpc-ibm + os=-cnk + ;; c90) basic_machine=c90-cray os=-unicos ;; + cegcc) + basic_machine=arm-unknown + os=-cegcc + ;; convex-c1) basic_machine=c1-convex os=-bsd @@ -450,8 +513,8 @@ case $basic_machine in basic_machine=craynv-cray os=-unicosmp ;; - cr16c) - basic_machine=cr16c-unknown + cr16) + basic_machine=cr16-unknown os=-elf ;; crds | unos) @@ -489,6 +552,14 @@ case $basic_machine in basic_machine=m88k-motorola os=-sysv3 ;; + dicos) + basic_machine=i686-pc + os=-dicos + ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; dpx20 | dpx20-*) basic_machine=rs6000-bull os=-bosx @@ -639,6 +710,14 @@ case $basic_machine in basic_machine=m68k-isi os=-sysv ;; + m68knommu) + basic_machine=m68k-unknown + os=-linux + ;; + m68knommu-*) + basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; m88k-omron*) basic_machine=m88k-omron ;; @@ -650,10 +729,17 @@ case $basic_machine in basic_machine=ns32k-utek os=-sysv ;; + microblaze) + basic_machine=microblaze-xilinx + ;; mingw32) basic_machine=i386-pc os=-mingw32 ;; + mingw32ce) + basic_machine=arm-unknown + os=-mingw32ce + ;; miniframe) basic_machine=m68000-convergent ;; @@ -679,6 +765,9 @@ case $basic_machine in basic_machine=i386-pc os=-msdos ;; + ms1-*) + basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` + ;; mvs) basic_machine=i370-ibm os=-mvs @@ -754,9 +843,8 @@ case $basic_machine in basic_machine=hppa1.1-oki os=-proelf ;; - or32 | or32-*) + openrisc | openrisc-*) basic_machine=or32-unknown - os=-coff ;; os400) basic_machine=powerpc-ibm @@ -778,6 +866,14 @@ case $basic_machine in basic_machine=i860-intel os=-osf ;; + parisc) + basic_machine=hppa-unknown + os=-linux + ;; + parisc-*) + basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` + os=-linux + ;; pbd) basic_machine=sparc-tti ;; @@ -787,6 +883,12 @@ case $basic_machine in pc532 | pc532-*) basic_machine=ns32k-pc532 ;; + pc98) + basic_machine=i386-pc + ;; + pc98-*) + basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; pentium | p5 | k5 | k6 | nexgen | viac3) basic_machine=i586-pc ;; @@ -843,6 +945,10 @@ case $basic_machine in basic_machine=i586-unknown os=-pw32 ;; + rdos) + basic_machine=i386-pc + os=-rdos + ;; rom68k) basic_machine=m68k-rom68k os=-coff @@ -869,6 +975,10 @@ case $basic_machine in sb1el) basic_machine=mipsisa64sb1el-unknown ;; + sde) + basic_machine=mipsisa32-sde + os=-elf + ;; sei) basic_machine=mips-sei os=-seiux @@ -880,6 +990,9 @@ case $basic_machine in basic_machine=sh-hitachi os=-hms ;; + sh5el) + basic_machine=sh5le-unknown + ;; sh64) basic_machine=sh64-unknown ;; @@ -969,6 +1082,10 @@ case $basic_machine in basic_machine=tic6x-unknown os=-coff ;; + tile*) + basic_machine=tile-unknown + os=-linux-gnu + ;; tx39) basic_machine=mipstx39-unknown ;; @@ -1029,6 +1146,10 @@ case $basic_machine in basic_machine=hppa1.1-winbond os=-proelf ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; xps | xps100) basic_machine=xps100-honeywell ;; @@ -1040,6 +1161,10 @@ case $basic_machine in basic_machine=z8k-unknown os=-sim ;; + z80-*-coff) + basic_machine=z80-unknown + os=-sim + ;; none) basic_machine=none-none os=-none @@ -1078,13 +1203,10 @@ case $basic_machine in we32k) basic_machine=we32k-att ;; - sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele) + sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) basic_machine=sh-unknown ;; - sh64) - basic_machine=sh64-unknown - ;; - sparc | sparcv8 | sparcv9 | sparcv9b) + sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) basic_machine=sparc-sun ;; cydra) @@ -1151,26 +1273,30 @@ case $os in # Each alternative MUST END IN A *, to match a version number. # -sysv* is not here because it comes later, after sysvr4. -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -kopensolaris* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* \ + | -aos* | -aros* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ + | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* \ + | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \ + | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ + | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*) + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ + | -skyos* | -haiku* | -rdos* | -toppers* | -drops*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1188,7 +1314,7 @@ case $os in os=`echo $os | sed -e 's|nto|nto-qnx|'` ;; -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) ;; -mac*) @@ -1297,6 +1423,12 @@ case $os in -kaos*) os=-kaos ;; + -zvmoe) + os=-zvmoe + ;; + -dicos*) + os=-dicos + ;; -none) ;; *) @@ -1319,6 +1451,12 @@ else # system, and we'll never get to this point. case $basic_machine in + score-*) + os=-elf + ;; + spu-*) + os=-elf + ;; *-acorn) os=-riscix1.2 ;; @@ -1328,9 +1466,9 @@ case $basic_machine in arm*-semi) os=-aout ;; - c4x-* | tic4x-*) - os=-coff - ;; + c4x-* | tic4x-*) + os=-coff + ;; # This must come before the *-dec entry. pdp10-*) os=-tops20 @@ -1356,6 +1494,9 @@ case $basic_machine in m68*-cisco) os=-aout ;; + mep-*) + os=-elf + ;; mips*-cisco) os=-elf ;; @@ -1374,6 +1515,9 @@ case $basic_machine in *-be) os=-beos ;; + *-haiku) + os=-haiku + ;; *-ibm) os=-aix ;; @@ -1482,7 +1626,7 @@ case $basic_machine in -sunos*) vendor=sun ;; - -aix*) + -cnk*|-aix*) vendor=ibm ;; -beos*) @@ -1545,7 +1689,7 @@ case $basic_machine in esac echo $basic_machine$os -exit 0 +exit # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 6b3c4caac0ca5..f1b060e9487a3 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -31,12 +31,12 @@ dnl=== dnl===-----------------------------------------------------------------------=== dnl Initialize autoconf and define the package name, version number and dnl email address for reporting bugs. -AC_INIT([[llvm]],[[2.6svn]],[llvmbugs@cs.uiuc.edu]) +AC_INIT([[llvm]],[[2.7svn]],[llvmbugs@cs.uiuc.edu]) dnl Provide a copyright substitution and ensure the copyright notice is included dnl in the output of --version option of the generated configure script. -AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign."]) -AC_COPYRIGHT([Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign.]) +AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign."]) +AC_COPYRIGHT([Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign.]) dnl Indicate that we require autoconf 2.59 or later. Ths is needed because we dnl use some autoconf macros only available in 2.59. @@ -165,6 +165,11 @@ AC_CACHE_CHECK([type of operating system we're going to host on], llvm_cv_no_link_all_option="-Wl,-z,defaultextract" llvm_cv_os_type="SunOS" llvm_cv_platform_type="Unix" ;; + *-*-auroraux*) + llvm_cv_link_all_option="-Wl,-z,allextract" + llvm_cv_link_all_option="-Wl,-z,defaultextract" + llvm_cv_os_type="AuroraUX" + llvm_cv_platform_type="Unix" ;; *-*-win32*) llvm_cv_link_all_option="-Wl,--whole-archive" llvm_cv_no_link_all_option="-Wl,--no-whole-archive" @@ -175,6 +180,21 @@ AC_CACHE_CHECK([type of operating system we're going to host on], llvm_cv_no_link_all_option="-Wl,--no-whole-archive" llvm_cv_os_type="MingW" llvm_cv_platform_type="Win32" ;; + *-*-haiku*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="Haiku" + llvm_cv_platform_type="Unix" ;; + *-unknown-eabi*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="Freestanding" + llvm_cv_platform_type="Unix" ;; + *-unknown-elf*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="Freestanding" + llvm_cv_platform_type="Unix" ;; *) llvm_cv_link_all_option="" llvm_cv_no_link_all_option="" @@ -182,6 +202,47 @@ AC_CACHE_CHECK([type of operating system we're going to host on], llvm_cv_platform_type="Unknown" ;; esac]) +AC_CACHE_CHECK([type of operating system we're going to target], + [llvm_cv_target_os_type], +[case $target in + *-*-aix*) + llvm_cv_target_os_type="AIX" ;; + *-*-irix*) + llvm_cv_target_os_type="IRIX" ;; + *-*-cygwin*) + llvm_cv_target_os_type="Cygwin" ;; + *-*-darwin*) + llvm_cv_target_os_type="Darwin" ;; + *-*-freebsd*) + llvm_cv_target_os_type="FreeBSD" ;; + *-*-openbsd*) + llvm_cv_target_os_type="OpenBSD" ;; + *-*-netbsd*) + llvm_cv_target_os_type="NetBSD" ;; + *-*-dragonfly*) + llvm_cv_target_os_type="DragonFly" ;; + *-*-hpux*) + llvm_cv_target_os_type="HP-UX" ;; + *-*-interix*) + llvm_cv_target_os_type="Interix" ;; + *-*-linux*) + llvm_cv_target_os_type="Linux" ;; + *-*-solaris*) + llvm_cv_target_os_type="SunOS" ;; + *-*-auroraux*) + llvm_cv_target_os_type="AuroraUX" ;; + *-*-win32*) + llvm_cv_target_os_type="Win32" ;; + *-*-mingw*) + llvm_cv_target_os_type="MingW" ;; + *-*-haiku*) + llvm_cv_target_os_type="Haiku" ;; + *-unknown-eabi*) + llvm_cv_target_os_type="Freestanding" ;; + *) + llvm_cv_target_os_type="Unknown" ;; +esac]) + dnl Make sure we aren't attempting to configure for an unknown system if test "$llvm_cv_os_type" = "Unknown" ; then AC_MSG_ERROR([Operating system is unknown, configure can't continue]) @@ -190,6 +251,8 @@ fi dnl Set the "OS" Makefile variable based on the platform type so the dnl makefile can configure itself to specific build hosts AC_SUBST(OS,$llvm_cv_os_type) +AC_SUBST(HOST_OS,$llvm_cv_os_type) +AC_SUBST(TARGET_OS,$llvm_cv_target_os_type) dnl Set the LINKALL and NOLINKALL Makefile variables based on the platform AC_SUBST(LINKALL,$llvm_cv_link_all_option) @@ -221,12 +284,13 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch], sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; alpha*-*) llvm_cv_target_arch="Alpha" ;; - ia64-*) llvm_cv_target_arch="IA64" ;; arm*-*) llvm_cv_target_arch="ARM" ;; mips-*) llvm_cv_target_arch="Mips" ;; pic16-*) llvm_cv_target_arch="PIC16" ;; xcore-*) llvm_cv_target_arch="XCore" ;; msp430-*) llvm_cv_target_arch="MSP430" ;; + s390x-*) llvm_cv_target_arch="SystemZ" ;; + bfin-*) llvm_cv_target_arch="Blackfin" ;; *) llvm_cv_target_arch="Unknown" ;; esac]) @@ -331,6 +395,16 @@ else AC_SUBST(DEBUG_RUNTIME,[[DEBUG_RUNTIME=1]]) fi +dnl --enable-debug-symbols : should even optimized compiler libraries +dnl have debug symbols? +AC_ARG_ENABLE(debug-symbols, + AS_HELP_STRING(--enable-debug-symbols,[Build compiler with debug symbols (default is NO if optimization is on and YES if it's off)]),,enableval=no) +if test ${enableval} = "no" ; then + AC_SUBST(DEBUG_SYMBOLS,[[]]) +else + AC_SUBST(DEBUG_SYMBOLS,[[DEBUG_SYMBOLS=1]]) +fi + dnl --enable-jit: check whether they want to enable the jit AC_ARG_ENABLE(jit, AS_HELP_STRING(--enable-jit, @@ -341,18 +415,19 @@ then AC_SUBST(JIT,[[]]) else case "$llvm_cv_target_arch" in - x86) AC_SUBST(TARGET_HAS_JIT,1) ;; - Sparc) AC_SUBST(TARGET_HAS_JIT,0) ;; - PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;; - x86_64) AC_SUBST(TARGET_HAS_JIT,1) ;; - Alpha) AC_SUBST(TARGET_HAS_JIT,1) ;; - IA64) AC_SUBST(TARGET_HAS_JIT,0) ;; - ARM) AC_SUBST(TARGET_HAS_JIT,0) ;; - Mips) AC_SUBST(TARGET_HAS_JIT,0) ;; - PIC16) AC_SUBST(TARGET_HAS_JIT,0) ;; - XCore) AC_SUBST(TARGET_HAS_JIT,0) ;; - MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;; - *) AC_SUBST(TARGET_HAS_JIT,0) ;; + x86) AC_SUBST(TARGET_HAS_JIT,1) ;; + Sparc) AC_SUBST(TARGET_HAS_JIT,0) ;; + PowerPC) AC_SUBST(TARGET_HAS_JIT,1) ;; + x86_64) AC_SUBST(TARGET_HAS_JIT,1) ;; + Alpha) AC_SUBST(TARGET_HAS_JIT,1) ;; + ARM) AC_SUBST(TARGET_HAS_JIT,1) ;; + Mips) AC_SUBST(TARGET_HAS_JIT,0) ;; + PIC16) AC_SUBST(TARGET_HAS_JIT,0) ;; + XCore) AC_SUBST(TARGET_HAS_JIT,0) ;; + MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;; + SystemZ) AC_SUBST(TARGET_HAS_JIT,0) ;; + Blackfin) AC_SUBST(TARGET_HAS_JIT,0) ;; + *) AC_SUBST(TARGET_HAS_JIT,0) ;; esac fi @@ -398,44 +473,49 @@ AC_DEFINE_UNQUOTED([ENABLE_PIC],$ENABLE_PIC, dnl Allow specific targets to be specified for building (or not) TARGETS_TO_BUILD="" AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets], - [Build specific host targets: all,host-only,{target-name} (default=all)]),, + [Build specific host targets: all or target1,target2,... Valid targets are: + host, x86, x86_64, sparc, powerpc, alpha, arm, mips, spu, pic16, + xcore, msp430, systemz, blackfin, cbe, msil, and cpp (default=all)]),, enableval=all) +if test "$enableval" = host-only ; then + enableval=host +fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha IA64 ARM Mips CellSPU PIC16 XCore MSP430 CBackend MSIL CppBackend" ;; - host-only) - case "$llvm_cv_target_arch" in - x86) TARGETS_TO_BUILD="X86" ;; - x86_64) TARGETS_TO_BUILD="X86" ;; - Sparc) TARGETS_TO_BUILD="Sparc" ;; - PowerPC) TARGETS_TO_BUILD="PowerPC" ;; - Alpha) TARGETS_TO_BUILD="Alpha" ;; - IA64) TARGETS_TO_BUILD="IA64" ;; - ARM) TARGETS_TO_BUILD="ARM" ;; - Mips) TARGETS_TO_BUILD="Mips" ;; - CellSPU|SPU) TARGETS_TO_BUILD="CellSPU" ;; - PIC16) TARGETS_TO_BUILD="PIC16" ;; - XCore) TARGETS_TO_BUILD="XCore" ;; - MSP430) TARGETS_TO_BUILD="MSP430" ;; - *) AC_MSG_ERROR([Can not set target to build]) ;; - esac - ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in - x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; - x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; - sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; - powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - alpha) TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;; - ia64) TARGETS_TO_BUILD="IA64 $TARGETS_TO_BUILD" ;; - arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; - mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; - spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; - pic16) TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;; - xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; - msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; - cbe) TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;; - msil) TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;; - cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; + x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; + x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; + sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; + powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; + alpha) TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;; + arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; + mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; + spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; + pic16) TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;; + xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; + msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; + systemz) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;; + blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;; + cbe) TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;; + msil) TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;; + cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; + host) case "$llvm_cv_target_arch" in + x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; + x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; + Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; + PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; + Alpha) TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;; + ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; + Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; + CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; + PIC16) TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;; + XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; + MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; + SystemZ) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;; + Blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;; + *) AC_MSG_ERROR([Can not set target to build]) ;; + esac ;; *) AC_MSG_ERROR([Unrecognized target $a_target]) ;; esac done @@ -454,17 +534,22 @@ for a_target in $TARGETS_TO_BUILD; do done # Build the LLVM_TARGET and LLVM_ASM_PRINTER macro uses for -# Targets.def and AsmPrinters.def. +# Targets.def, AsmPrinters.def, and AsmParsers.def. LLVM_ENUM_TARGETS="" LLVM_ENUM_ASM_PRINTERS="" +LLVM_ENUM_ASM_PARSERS="" for target_to_build in $TARGETS_TO_BUILD; do LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS" if test -f ${srcdir}/lib/Target/${target_to_build}/AsmPrinter/Makefile ; then LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS"; fi + if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then + LLVM_ENUM_ASM_PARSERS="LLVM_ASM_PARSER($target_to_build) $LLVM_ENUM_ASM_PARSERS"; + fi done AC_SUBST(LLVM_ENUM_TARGETS) AC_SUBST(LLVM_ENUM_ASM_PRINTERS) +AC_SUBST(LLVM_ENUM_ASM_PARSERS) dnl Prevent the CBackend from using printf("%a") for floating point so older dnl C compilers that cannot deal with the 0x0p+0 hex floating point format @@ -673,6 +758,46 @@ if test "$DOT" != "echo dot" ; then AC_DEFINE_UNQUOTED([LLVM_PATH_DOT],"$DOT${EXEEXT}", [Define to path to dot program if found or 'echo dot' otherwise]) fi +AC_PATH_PROG(FDP, [fdp], [echo fdp]) +if test "$FDP" != "echo fdp" ; then + AC_DEFINE([HAVE_FDP],[1],[Define if the neat program is available]) + dnl If we're targeting for mingw we should emit windows paths, not msys + if test "$llvm_cv_os_type" = "MingW" ; then + FDP=`echo $FDP | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' ` + fi + AC_DEFINE_UNQUOTED([LLVM_PATH_FDP],"$FDP${EXEEXT}", + [Define to path to fdp program if found or 'echo fdp' otherwise]) +fi +AC_PATH_PROG(NEATO, [neato], [echo neato]) +if test "$NEATO" != "echo neato" ; then + AC_DEFINE([HAVE_NEATO],[1],[Define if the neat program is available]) + dnl If we're targeting for mingw we should emit windows paths, not msys + if test "$llvm_cv_os_type" = "MingW" ; then + NEATO=`echo $NEATO | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' ` + fi + AC_DEFINE_UNQUOTED([LLVM_PATH_NEATO],"$NEATO${EXEEXT}", + [Define to path to neato program if found or 'echo neato' otherwise]) +fi +AC_PATH_PROG(TWOPI, [twopi], [echo twopi]) +if test "$TWOPI" != "echo twopi" ; then + AC_DEFINE([HAVE_TWOPI],[1],[Define if the neat program is available]) + dnl If we're targeting for mingw we should emit windows paths, not msys + if test "$llvm_cv_os_type" = "MingW" ; then + TWOPI=`echo $TWOPI | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' ` + fi + AC_DEFINE_UNQUOTED([LLVM_PATH_TWOPI],"$TWOPI${EXEEXT}", + [Define to path to twopi program if found or 'echo twopi' otherwise]) +fi +AC_PATH_PROG(CIRCO, [circo], [echo circo]) +if test "$CIRCO" != "echo circo" ; then + AC_DEFINE([HAVE_CIRCO],[1],[Define if the neat program is available]) + dnl If we're targeting for mingw we should emit windows paths, not msys + if test "$llvm_cv_os_type" = "MingW" ; then + CIRCO=`echo $CIRCO | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' ` + fi + AC_DEFINE_UNQUOTED([LLVM_PATH_CIRCO],"$CIRCO${EXEEXT}", + [Define to path to circo program if found or 'echo circo' otherwise]) +fi AC_PATH_PROGS(GV, [gv gsview32], [echo gv]) if test "$GV" != "echo gv" ; then AC_DEFINE([HAVE_GV],[1],[Define if the gv program is available]) @@ -824,8 +949,6 @@ dnl=== SECTION 5: Check for libraries dnl=== dnl===-----------------------------------------------------------------------=== -dnl libelf is for sparc only; we can ignore it if we don't have it -AC_CHECK_LIB(elf, elf_begin) AC_CHECK_LIB(m,sin) if test "$llvm_cv_os_type" = "MingW" ; then AC_CHECK_LIB(imagehlp, main) @@ -851,7 +974,7 @@ AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1], dnl pthread locking functions are optional - but llvm will not be thread-safe dnl without locks. if test "$ENABLE_THREADS" -eq 1 ; then - AC_CHECK_LIB(pthread,pthread_mutex_init) + AC_CHECK_LIB(pthread, pthread_mutex_init) AC_SEARCH_LIBS(pthread_mutex_lock,pthread, AC_DEFINE([HAVE_PTHREAD_MUTEX_LOCK],[1], [Have pthread_mutex_lock])) @@ -882,6 +1005,42 @@ AC_ARG_WITH(udis86, AC_DEFINE_UNQUOTED([USE_UDIS86],$USE_UDIS86, [Define if use udis86 library]) +dnl Allow OProfile support for JIT output. +AC_ARG_WITH(oprofile, + AS_HELP_STRING([--with-oprofile=], + [Tell OProfile >= 0.9.4 how to symbolize JIT output]), + [ + AC_SUBST(USE_OPROFILE, [1]) + case "$withval" in + /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;; + no) llvm_cv_oppath= + AC_SUBST(USE_OPROFILE, [0]) ;; + *) llvm_cv_oppath="${withval}/lib/oprofile" + CPPFLAGS="-I${withval}/include";; + esac + if test -n "$llvm_cv_oppath" ; then + LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" + dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744: + dnl libbfd is not included properly in libopagent in some Debian + dnl versions. If libbfd isn't found at all, we assume opagent works + dnl anyway. + AC_SEARCH_LIBS(bfd_init, bfd, [], []) + AC_SEARCH_LIBS(op_open_agent, opagent, [], [ + echo "Error! You need to have libopagent around." + exit -1 + ]) + AC_CHECK_HEADER([opagent.h], [], [ + echo "Error! You need to have opagent.h around." + exit -1 + ]) + fi + ], + [ + AC_SUBST(USE_OPROFILE, [0]) + ]) +AC_DEFINE_UNQUOTED([USE_OPROFILE],$USE_OPROFILE, + [Define if we have the oprofile JIT-support library]) + dnl===-----------------------------------------------------------------------=== dnl=== dnl=== SECTION 6: Check for header files @@ -944,10 +1103,11 @@ AC_CHECK_FUNCS([powf fmodf strtof round ]) AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ]) AC_CHECK_FUNCS([isatty mkdtemp mkstemp ]) AC_CHECK_FUNCS([mktemp realpath sbrk setrlimit strdup ]) -AC_CHECK_FUNCS([strerror strerror_r strerror_s ]) +AC_CHECK_FUNCS([strerror strerror_r strerror_s setenv ]) AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ]) AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp]) AC_C_PRINTF_A +dnl FIXME: This is no longer used, please remove (but test)!!! AC_FUNC_ALLOCA AC_FUNC_RAND48 @@ -1003,6 +1163,16 @@ dnl=== SECTION 9: Additional checks, variables, etc. dnl=== dnl===-----------------------------------------------------------------------=== +dnl Handle 32-bit linux systems running a 64-bit kernel. +dnl This has to come after section 4 because it invokes the compiler. +if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then + AC_IS_LINUX_MIXED + if test "$llvm_cv_linux_mixed" = "yes"; then + llvm_cv_target_arch="x86" + ARCH="x86" + fi +fi + dnl Check, whether __dso_handle is present AC_CHECK_FUNCS([__dso_handle]) @@ -1178,8 +1348,8 @@ dnl files can be updated automatically when their *.in sources change. AC_CONFIG_HEADERS([include/llvm/Config/config.h]) AC_CONFIG_FILES([include/llvm/Config/Targets.def]) AC_CONFIG_FILES([include/llvm/Config/AsmPrinters.def]) +AC_CONFIG_FILES([include/llvm/Config/AsmParsers.def]) AC_CONFIG_HEADERS([include/llvm/Support/DataTypes.h]) -AC_CONFIG_HEADERS([include/llvm/ADT/iterator.h]) dnl Configure the makefile's configuration data AC_CONFIG_FILES([Makefile.config]) diff --git a/autoconf/m4/config_makefile.m4 b/autoconf/m4/config_makefile.m4 index f21a25631edfe..b1eaffdcd85ce 100644 --- a/autoconf/m4/config_makefile.m4 +++ b/autoconf/m4/config_makefile.m4 @@ -5,5 +5,5 @@ AC_DEFUN([AC_CONFIG_MAKEFILE], [AC_CONFIG_COMMANDS($1, [${llvm_src}/autoconf/mkinstalldirs `dirname $1` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/$1 $1]) + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/$1 $1]) ]) diff --git a/autoconf/m4/linux_mixed_64_32.m4 b/autoconf/m4/linux_mixed_64_32.m4 new file mode 100644 index 0000000000000..123491f87e5e2 --- /dev/null +++ b/autoconf/m4/linux_mixed_64_32.m4 @@ -0,0 +1,17 @@ +# +# Some Linux machines run a 64-bit kernel with a 32-bit userspace. 'uname -m' +# shows these as x86_64. Ask the system 'gcc' what it thinks. +# +AC_DEFUN([AC_IS_LINUX_MIXED], +[AC_CACHE_CHECK(for 32-bit userspace on 64-bit system,llvm_cv_linux_mixed, +[ AC_LANG_PUSH([C]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM( + [[#ifndef __x86_64__ + error: Not x86-64 even if uname says so! + #endif + ]])], + [llvm_cv_linux_mixed=no], + [llvm_cv_linux_mixed=yes]) + AC_LANG_POP([C]) +]) +]) diff --git a/bindings/ada/analysis/llvm_analysis-binding.ads b/bindings/ada/analysis/llvm_analysis-binding.ads new file mode 100644 index 0000000000000..c51a50353f11d --- /dev/null +++ b/bindings/ada/analysis/llvm_analysis-binding.ads @@ -0,0 +1,32 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with llvm; +with Interfaces.C.Strings; + + +package LLVM_Analysis.Binding is + + function LLVMVerifyModule + (M : in llvm.LLVMModuleRef; + Action : in LLVM_Analysis.LLVMVerifierFailureAction; + OutMessage : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMVerifyFunction + (Fn : in llvm.LLVMValueRef; + Action : in LLVM_Analysis.LLVMVerifierFailureAction) + return Interfaces.C.int; + + procedure LLVMViewFunctionCFG (Fn : in llvm.LLVMValueRef); + + procedure LLVMViewFunctionCFGOnly (Fn : in llvm.LLVMValueRef); + +private + + pragma Import (C, LLVMVerifyModule, "Ada_LLVMVerifyModule"); + pragma Import (C, LLVMVerifyFunction, "Ada_LLVMVerifyFunction"); + pragma Import (C, LLVMViewFunctionCFG, "Ada_LLVMViewFunctionCFG"); + pragma Import (C, LLVMViewFunctionCFGOnly, "Ada_LLVMViewFunctionCFGOnly"); + +end LLVM_Analysis.Binding; diff --git a/bindings/ada/analysis/llvm_analysis.ads b/bindings/ada/analysis/llvm_analysis.ads new file mode 100644 index 0000000000000..aa7b3f0e2e913 --- /dev/null +++ b/bindings/ada/analysis/llvm_analysis.ads @@ -0,0 +1,30 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with Interfaces.C; + + +package LLVM_Analysis is + + -- LLVMVerifierFailureAction + -- + type LLVMVerifierFailureAction is ( + LLVMAbortProcessAction, + LLVMPrintMessageAction, + LLVMReturnStatusAction); + + for LLVMVerifierFailureAction use + (LLVMAbortProcessAction => 0, + LLVMPrintMessageAction => 1, + LLVMReturnStatusAction => 2); + + pragma Convention (C, LLVMVerifierFailureAction); + + type LLVMVerifierFailureAction_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_Analysis.LLVMVerifierFailureAction; + + type LLVMVerifierFailureAction_view is access all + LLVM_Analysis.LLVMVerifierFailureAction; + +end LLVM_Analysis; diff --git a/bindings/ada/analysis/llvm_analysis_wrap.cxx b/bindings/ada/analysis/llvm_analysis_wrap.cxx new file mode 100644 index 0000000000000..f2a8637343de4 --- /dev/null +++ b/bindings/ada/analysis/llvm_analysis_wrap.cxx @@ -0,0 +1,369 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.36 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + + +#ifdef __cplusplus +template class SwigValueWrapper { + T *tt; +public: + SwigValueWrapper() : tt(0) { } + SwigValueWrapper(const SwigValueWrapper& rhs) : tt(new T(*rhs.tt)) { } + SwigValueWrapper(const T& t) : tt(new T(t)) { } + ~SwigValueWrapper() { delete tt; } + SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; } + operator T&() const { return *tt; } + T *operator&() { return tt; } +private: + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); +}; + +template T SwigValueInit() { + return T(); +} +#endif + +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + + + +#include +#include +#include +#if defined(_WIN32) || defined(__CYGWIN32__) +# define DllExport __declspec( dllexport ) +# define SWIGSTDCALL __stdcall +#else +# define DllExport +# define SWIGSTDCALL +#endif + + +#ifdef __cplusplus +# include +#endif + + + + +/* Support for throwing Ada exceptions from C/C++ */ + +typedef enum +{ + SWIG_AdaException, + SWIG_AdaOutOfMemoryException, + SWIG_AdaIndexOutOfRangeException, + SWIG_AdaDivideByZeroException, + SWIG_AdaArgumentOutOfRangeException, + SWIG_AdaNullReferenceException +} SWIG_AdaExceptionCodes; + + +typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *); + + +typedef struct +{ + SWIG_AdaExceptionCodes code; + SWIG_AdaExceptionCallback_t callback; +} + SWIG_AdaExceptions_t; + + +static +SWIG_AdaExceptions_t +SWIG_ada_exceptions[] = +{ + { SWIG_AdaException, NULL }, + { SWIG_AdaOutOfMemoryException, NULL }, + { SWIG_AdaIndexOutOfRangeException, NULL }, + { SWIG_AdaDivideByZeroException, NULL }, + { SWIG_AdaArgumentOutOfRangeException, NULL }, + { SWIG_AdaNullReferenceException, NULL } +}; + + +static +void +SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) +{ + SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback; + if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) { + callback = SWIG_ada_exceptions[code].callback; + } + callback(msg); +} + + + +#ifdef __cplusplus +extern "C" +#endif + +DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Analysis (SWIG_AdaExceptionCallback_t systemException, + SWIG_AdaExceptionCallback_t outOfMemory, + SWIG_AdaExceptionCallback_t indexOutOfRange, + SWIG_AdaExceptionCallback_t divideByZero, + SWIG_AdaExceptionCallback_t argumentOutOfRange, + SWIG_AdaExceptionCallback_t nullReference) +{ + SWIG_ada_exceptions [SWIG_AdaException].callback = systemException; + SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback = outOfMemory; + SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback = indexOutOfRange; + SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback = divideByZero; + SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange; + SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback = nullReference; +} + + +/* Callback for returning strings to Ada without leaking memory */ + +typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *); +static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL; + + + +/* probably obsolete ... +#ifdef __cplusplus +extern "C" +#endif +DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Analysis(SWIG_AdaStringHelperCallback callback) { + SWIG_ada_string_callback = callback; +} +*/ + + + +/* Contract support */ + +#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else + + +#define protected public +#define private public + +#include "llvm-c/Analysis.h" +//#include "llvm-c/BitReader.h" +//#include "llvm-c/BitWriter.h" +//#include "llvm-c/Core.h" +//#include "llvm-c/ExecutionEngine.h" +//#include "llvm-c/LinkTimeOptimizer.h" +//#include "llvm-c/lto.h" +//#include "llvm-c/Target.h" + + + +// struct LLVMCtxt; + + +#undef protected +#undef private +#ifdef __cplusplus +extern "C" { +#endif +DllExport int SWIGSTDCALL Ada_LLVMVerifyModule ( + void * jarg1 + , + + int jarg2 + , + + void * jarg3 + ) +{ + int jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + LLVMVerifierFailureAction arg2 ; + char **arg3 = (char **) 0 ; + int result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = (LLVMVerifierFailureAction) jarg2; + + arg3 = (char **)jarg3; + + result = (int)LLVMVerifyModule(arg1,arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMVerifyFunction ( + void * jarg1 + , + + int jarg2 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMVerifierFailureAction arg2 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMVerifierFailureAction) jarg2; + + result = (int)LLVMVerifyFunction(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMViewFunctionCFG ( + void * jarg1 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + LLVMViewFunctionCFG(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMViewFunctionCFGOnly ( + void * jarg1 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + LLVMViewFunctionCFGOnly(arg1); + + +} + + + +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + diff --git a/bindings/ada/bitreader/llvm_bit_reader-binding.ads b/bindings/ada/bitreader/llvm_bit_reader-binding.ads new file mode 100644 index 0000000000000..4fcdb4a84fcf9 --- /dev/null +++ b/bindings/ada/bitreader/llvm_bit_reader-binding.ads @@ -0,0 +1,52 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with llvm; +with Interfaces.C.Strings; + + +package LLVM_bit_Reader.Binding is + + function LLVMParseBitcode + (MemBuf : in llvm.LLVMMemoryBufferRef; + OutModule : access llvm.LLVMModuleRef; + OutMessage : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMParseBitcodeInContext + (MemBuf : in llvm.LLVMMemoryBufferRef; + ContextRef : in llvm.LLVMContextRef; + OutModule : access llvm.LLVMModuleRef; + OutMessage : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMGetBitcodeModuleProvider + (MemBuf : in llvm.LLVMMemoryBufferRef; + OutMP : access llvm.LLVMModuleProviderRef; + OutMessage : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMGetBitcodeModuleProviderInContext + (MemBuf : in llvm.LLVMMemoryBufferRef; + ContextRef : in llvm.LLVMContextRef; + OutMP : access llvm.LLVMModuleProviderRef; + OutMessage : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + +private + + pragma Import (C, LLVMParseBitcode, "Ada_LLVMParseBitcode"); + pragma Import + (C, + LLVMParseBitcodeInContext, + "Ada_LLVMParseBitcodeInContext"); + pragma Import + (C, + LLVMGetBitcodeModuleProvider, + "Ada_LLVMGetBitcodeModuleProvider"); + pragma Import + (C, + LLVMGetBitcodeModuleProviderInContext, + "Ada_LLVMGetBitcodeModuleProviderInContext"); + +end LLVM_bit_Reader.Binding; diff --git a/bindings/ada/bitreader/llvm_bit_reader.ads b/bindings/ada/bitreader/llvm_bit_reader.ads new file mode 100644 index 0000000000000..7579dea2819d7 --- /dev/null +++ b/bindings/ada/bitreader/llvm_bit_reader.ads @@ -0,0 +1,6 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +package LLVM_bit_Reader is + +end LLVM_bit_Reader; diff --git a/bindings/ada/bitreader/llvm_bitreader_wrap.cxx b/bindings/ada/bitreader/llvm_bitreader_wrap.cxx new file mode 100644 index 0000000000000..b7ecbed355af5 --- /dev/null +++ b/bindings/ada/bitreader/llvm_bitreader_wrap.cxx @@ -0,0 +1,423 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.36 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + + +#ifdef __cplusplus +template class SwigValueWrapper { + T *tt; +public: + SwigValueWrapper() : tt(0) { } + SwigValueWrapper(const SwigValueWrapper& rhs) : tt(new T(*rhs.tt)) { } + SwigValueWrapper(const T& t) : tt(new T(t)) { } + ~SwigValueWrapper() { delete tt; } + SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; } + operator T&() const { return *tt; } + T *operator&() { return tt; } +private: + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); +}; + +template T SwigValueInit() { + return T(); +} +#endif + +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + + + +#include +#include +#include +#if defined(_WIN32) || defined(__CYGWIN32__) +# define DllExport __declspec( dllexport ) +# define SWIGSTDCALL __stdcall +#else +# define DllExport +# define SWIGSTDCALL +#endif + + +#ifdef __cplusplus +# include +#endif + + + + +/* Support for throwing Ada exceptions from C/C++ */ + +typedef enum +{ + SWIG_AdaException, + SWIG_AdaOutOfMemoryException, + SWIG_AdaIndexOutOfRangeException, + SWIG_AdaDivideByZeroException, + SWIG_AdaArgumentOutOfRangeException, + SWIG_AdaNullReferenceException +} SWIG_AdaExceptionCodes; + + +typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *); + + +typedef struct +{ + SWIG_AdaExceptionCodes code; + SWIG_AdaExceptionCallback_t callback; +} + SWIG_AdaExceptions_t; + + +static +SWIG_AdaExceptions_t +SWIG_ada_exceptions[] = +{ + { SWIG_AdaException, NULL }, + { SWIG_AdaOutOfMemoryException, NULL }, + { SWIG_AdaIndexOutOfRangeException, NULL }, + { SWIG_AdaDivideByZeroException, NULL }, + { SWIG_AdaArgumentOutOfRangeException, NULL }, + { SWIG_AdaNullReferenceException, NULL } +}; + + +static +void +SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) +{ + SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback; + if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) { + callback = SWIG_ada_exceptions[code].callback; + } + callback(msg); +} + + + +#ifdef __cplusplus +extern "C" +#endif + +DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_bit_Reader (SWIG_AdaExceptionCallback_t systemException, + SWIG_AdaExceptionCallback_t outOfMemory, + SWIG_AdaExceptionCallback_t indexOutOfRange, + SWIG_AdaExceptionCallback_t divideByZero, + SWIG_AdaExceptionCallback_t argumentOutOfRange, + SWIG_AdaExceptionCallback_t nullReference) +{ + SWIG_ada_exceptions [SWIG_AdaException].callback = systemException; + SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback = outOfMemory; + SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback = indexOutOfRange; + SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback = divideByZero; + SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange; + SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback = nullReference; +} + + +/* Callback for returning strings to Ada without leaking memory */ + +typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *); +static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL; + + + +/* probably obsolete ... +#ifdef __cplusplus +extern "C" +#endif +DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_bit_Reader(SWIG_AdaStringHelperCallback callback) { + SWIG_ada_string_callback = callback; +} +*/ + + + +/* Contract support */ + +#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else + + +#define protected public +#define private public + +//#include "llvm-c/Analysis.h" +#include "llvm-c/BitReader.h" +//#include "llvm-c/BitWriter.h" +//#include "llvm-c/Core.h" +//#include "llvm-c/ExecutionEngine.h" +//#include "llvm-c/LinkTimeOptimizer.h" +//#include "llvm-c/lto.h" +//#include "llvm-c/Target.h" + + + +// struct LLVMCtxt; + + +#undef protected +#undef private +#ifdef __cplusplus +extern "C" { +#endif +DllExport int SWIGSTDCALL Ada_LLVMParseBitcode ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + int jresult ; + LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ; + LLVMModuleRef *arg2 = (LLVMModuleRef *) 0 ; + char **arg3 = (char **) 0 ; + int result; + + arg1 = (LLVMMemoryBufferRef)jarg1; + + arg2 = (LLVMModuleRef *)jarg2; + + arg3 = (char **)jarg3; + + result = (int)LLVMParseBitcode(arg1,arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMParseBitcodeInContext ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + void * jarg4 + ) +{ + int jresult ; + LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ; + LLVMContextRef arg2 = (LLVMContextRef) 0 ; + LLVMModuleRef *arg3 = (LLVMModuleRef *) 0 ; + char **arg4 = (char **) 0 ; + int result; + + arg1 = (LLVMMemoryBufferRef)jarg1; + + arg2 = (LLVMContextRef)jarg2; + + arg3 = (LLVMModuleRef *)jarg3; + + arg4 = (char **)jarg4; + + result = (int)LLVMParseBitcodeInContext(arg1,arg2,arg3,arg4); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMGetBitcodeModuleProvider ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + int jresult ; + LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ; + LLVMModuleProviderRef *arg2 = (LLVMModuleProviderRef *) 0 ; + char **arg3 = (char **) 0 ; + int result; + + arg1 = (LLVMMemoryBufferRef)jarg1; + + arg2 = (LLVMModuleProviderRef *)jarg2; + + arg3 = (char **)jarg3; + + result = (int)LLVMGetBitcodeModuleProvider(arg1,arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMGetBitcodeModuleProviderInContext ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + void * jarg4 + ) +{ + int jresult ; + LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ; + LLVMContextRef arg2 = (LLVMContextRef) 0 ; + LLVMModuleProviderRef *arg3 = (LLVMModuleProviderRef *) 0 ; + char **arg4 = (char **) 0 ; + int result; + + arg1 = (LLVMMemoryBufferRef)jarg1; + + arg2 = (LLVMContextRef)jarg2; + + arg3 = (LLVMModuleProviderRef *)jarg3; + + arg4 = (char **)jarg4; + + result = (int)LLVMGetBitcodeModuleProviderInContext(arg1,arg2,arg3,arg4); + jresult = result; + + + + return jresult; + +} + + + +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + diff --git a/bindings/ada/bitwriter/llvm_bit_writer-binding.ads b/bindings/ada/bitwriter/llvm_bit_writer-binding.ads new file mode 100644 index 0000000000000..b5542df0e062a --- /dev/null +++ b/bindings/ada/bitwriter/llvm_bit_writer-binding.ads @@ -0,0 +1,28 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with llvm; +with Interfaces.C.Strings; + + +package LLVM_bit_Writer.Binding is + + function LLVMWriteBitcodeToFileHandle + (M : in llvm.LLVMModuleRef; + Handle : in Interfaces.C.int) + return Interfaces.C.int; + + function LLVMWriteBitcodeToFile + (M : in llvm.LLVMModuleRef; + Path : in Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + +private + + pragma Import + (C, + LLVMWriteBitcodeToFileHandle, + "Ada_LLVMWriteBitcodeToFileHandle"); + pragma Import (C, LLVMWriteBitcodeToFile, "Ada_LLVMWriteBitcodeToFile"); + +end LLVM_bit_Writer.Binding; diff --git a/bindings/ada/bitwriter/llvm_bit_writer.ads b/bindings/ada/bitwriter/llvm_bit_writer.ads new file mode 100644 index 0000000000000..35b1f38aa9963 --- /dev/null +++ b/bindings/ada/bitwriter/llvm_bit_writer.ads @@ -0,0 +1,6 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +package LLVM_bit_Writer is + +end LLVM_bit_Writer; diff --git a/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx b/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx new file mode 100644 index 0000000000000..4abf44fffd5c8 --- /dev/null +++ b/bindings/ada/bitwriter/llvm_bitwriter_wrap.cxx @@ -0,0 +1,335 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.36 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + + +#ifdef __cplusplus +template class SwigValueWrapper { + T *tt; +public: + SwigValueWrapper() : tt(0) { } + SwigValueWrapper(const SwigValueWrapper& rhs) : tt(new T(*rhs.tt)) { } + SwigValueWrapper(const T& t) : tt(new T(t)) { } + ~SwigValueWrapper() { delete tt; } + SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; } + operator T&() const { return *tt; } + T *operator&() { return tt; } +private: + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); +}; + +template T SwigValueInit() { + return T(); +} +#endif + +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + + + +#include +#include +#include +#if defined(_WIN32) || defined(__CYGWIN32__) +# define DllExport __declspec( dllexport ) +# define SWIGSTDCALL __stdcall +#else +# define DllExport +# define SWIGSTDCALL +#endif + + +#ifdef __cplusplus +# include +#endif + + + + +/* Support for throwing Ada exceptions from C/C++ */ + +typedef enum +{ + SWIG_AdaException, + SWIG_AdaOutOfMemoryException, + SWIG_AdaIndexOutOfRangeException, + SWIG_AdaDivideByZeroException, + SWIG_AdaArgumentOutOfRangeException, + SWIG_AdaNullReferenceException +} SWIG_AdaExceptionCodes; + + +typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *); + + +typedef struct +{ + SWIG_AdaExceptionCodes code; + SWIG_AdaExceptionCallback_t callback; +} + SWIG_AdaExceptions_t; + + +static +SWIG_AdaExceptions_t +SWIG_ada_exceptions[] = +{ + { SWIG_AdaException, NULL }, + { SWIG_AdaOutOfMemoryException, NULL }, + { SWIG_AdaIndexOutOfRangeException, NULL }, + { SWIG_AdaDivideByZeroException, NULL }, + { SWIG_AdaArgumentOutOfRangeException, NULL }, + { SWIG_AdaNullReferenceException, NULL } +}; + + +static +void +SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) +{ + SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback; + if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) { + callback = SWIG_ada_exceptions[code].callback; + } + callback(msg); +} + + + +#ifdef __cplusplus +extern "C" +#endif + +DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_bit_Writer (SWIG_AdaExceptionCallback_t systemException, + SWIG_AdaExceptionCallback_t outOfMemory, + SWIG_AdaExceptionCallback_t indexOutOfRange, + SWIG_AdaExceptionCallback_t divideByZero, + SWIG_AdaExceptionCallback_t argumentOutOfRange, + SWIG_AdaExceptionCallback_t nullReference) +{ + SWIG_ada_exceptions [SWIG_AdaException].callback = systemException; + SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback = outOfMemory; + SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback = indexOutOfRange; + SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback = divideByZero; + SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange; + SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback = nullReference; +} + + +/* Callback for returning strings to Ada without leaking memory */ + +typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *); +static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL; + + + +/* probably obsolete ... +#ifdef __cplusplus +extern "C" +#endif +DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_bit_Writer(SWIG_AdaStringHelperCallback callback) { + SWIG_ada_string_callback = callback; +} +*/ + + + +/* Contract support */ + +#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else + + +#define protected public +#define private public + +#include "llvm-c/Analysis.h" +#include "llvm-c/BitReader.h" +#include "llvm-c/BitWriter.h" +#include "llvm-c/Core.h" +#include "llvm-c/ExecutionEngine.h" +#include "llvm-c/LinkTimeOptimizer.h" +#include "llvm-c/lto.h" +#include "llvm-c/Target.h" + + + +// struct LLVMCtxt; + + +#undef protected +#undef private +#ifdef __cplusplus +extern "C" { +#endif +DllExport int SWIGSTDCALL Ada_LLVMWriteBitcodeToFileHandle ( + void * jarg1 + , + + int jarg2 + ) +{ + int jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + int arg2 ; + int result; + + arg1 = (LLVMModuleRef)jarg1; + + + arg2 = (int) jarg2; + + + result = (int)LLVMWriteBitcodeToFileHandle(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMWriteBitcodeToFile ( + void * jarg1 + , + + char * jarg2 + ) +{ + int jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + int result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + result = (int)LLVMWriteBitcodeToFile(arg1,(char const *)arg2); + jresult = result; + + + + return jresult; + +} + + + +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + diff --git a/bindings/ada/executionengine/llvm_execution_engine-binding.ads b/bindings/ada/executionengine/llvm_execution_engine-binding.ads new file mode 100644 index 0000000000000..a37c462cf3248 --- /dev/null +++ b/bindings/ada/executionengine/llvm_execution_engine-binding.ads @@ -0,0 +1,192 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with llvm; +with Interfaces.C.Strings; + + +package LLVM_execution_Engine.Binding is + + procedure LLVMLinkInJIT; + + procedure LLVMLinkInInterpreter; + + function LLVMCreateGenericValueOfInt + (Ty : in llvm.LLVMTypeRef; + N : in Interfaces.C.Extensions.unsigned_long_long; + IsSigned : in Interfaces.C.int) + return LLVM_execution_Engine.LLVMGenericValueRef; + + function LLVMCreateGenericValueOfPointer + (P : access Interfaces.C.Extensions.void) + return LLVM_execution_Engine.LLVMGenericValueRef; + + function LLVMCreateGenericValueOfFloat + (Ty : in llvm.LLVMTypeRef; + N : in Interfaces.C.double) + return LLVM_execution_Engine.LLVMGenericValueRef; + + function LLVMGenericValueIntWidth + (GenValRef : in LLVM_execution_Engine.LLVMGenericValueRef) + return Interfaces.C.unsigned; + + function LLVMGenericValueToInt + (GenVal : in LLVM_execution_Engine.LLVMGenericValueRef; + IsSigned : in Interfaces.C.int) + return Interfaces.C.Extensions.unsigned_long_long; + + function LLVMGenericValueToPointer + (GenVal : in LLVM_execution_Engine.LLVMGenericValueRef) + return access Interfaces.C.Extensions.void; + + function LLVMGenericValueToFloat + (TyRef : in llvm.LLVMTypeRef; + GenVal : in LLVM_execution_Engine.LLVMGenericValueRef) + return Interfaces.C.double; + + procedure LLVMDisposeGenericValue + (GenVal : in LLVM_execution_Engine.LLVMGenericValueRef); + + function LLVMCreateExecutionEngine + (OutEE : access LLVM_execution_Engine.LLVMExecutionEngineRef; + MP : in llvm.LLVMModuleProviderRef; + OutError : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMCreateInterpreter + (OutInterp : access LLVM_execution_Engine.LLVMExecutionEngineRef; + MP : in llvm.LLVMModuleProviderRef; + OutError : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMCreateJITCompiler + (OutJIT : access LLVM_execution_Engine.LLVMExecutionEngineRef; + MP : in llvm.LLVMModuleProviderRef; + OptLevel : in Interfaces.C.unsigned; + OutError : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + procedure LLVMDisposeExecutionEngine + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef); + + procedure LLVMRunStaticConstructors + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef); + + procedure LLVMRunStaticDestructors + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef); + + function LLVMRunFunctionAsMain + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef; + F : in llvm.LLVMValueRef; + ArgC : in Interfaces.C.unsigned; + ArgV : access Interfaces.C.Strings.chars_ptr; + EnvP : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMRunFunction + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef; + F : in llvm.LLVMValueRef; + NumArgs : in Interfaces.C.unsigned; + Args : access LLVM_execution_Engine.LLVMGenericValueRef) + return LLVM_execution_Engine.LLVMGenericValueRef; + + procedure LLVMFreeMachineCodeForFunction + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef; + F : in llvm.LLVMValueRef); + + procedure LLVMAddModuleProvider + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef; + MP : in llvm.LLVMModuleProviderRef); + + function LLVMRemoveModuleProvider + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef; + MP : in llvm.LLVMModuleProviderRef; + OutMod : access llvm.LLVMModuleRef; + OutError : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMFindFunction + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef; + Name : in Interfaces.C.Strings.chars_ptr; + OutFn : access llvm.LLVMValueRef) + return Interfaces.C.int; + + function LLVMGetExecutionEngineTargetData + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef) + return LLVM_execution_Engine.LLVMTargetDataRef; + + procedure LLVMAddGlobalMapping + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef; + Global : in llvm.LLVMValueRef; + Addr : access Interfaces.C.Extensions.void); + + function LLVMGetPointerToGlobal + (EE : in LLVM_execution_Engine.LLVMExecutionEngineRef; + Global : in llvm.LLVMValueRef) + return access Interfaces.C.Extensions.void; + +private + + pragma Import (C, LLVMLinkInJIT, "Ada_LLVMLinkInJIT"); + pragma Import (C, LLVMLinkInInterpreter, "Ada_LLVMLinkInInterpreter"); + pragma Import + (C, + LLVMCreateGenericValueOfInt, + "Ada_LLVMCreateGenericValueOfInt"); + pragma Import + (C, + LLVMCreateGenericValueOfPointer, + "Ada_LLVMCreateGenericValueOfPointer"); + pragma Import + (C, + LLVMCreateGenericValueOfFloat, + "Ada_LLVMCreateGenericValueOfFloat"); + pragma Import + (C, + LLVMGenericValueIntWidth, + "Ada_LLVMGenericValueIntWidth"); + pragma Import (C, LLVMGenericValueToInt, "Ada_LLVMGenericValueToInt"); + pragma Import + (C, + LLVMGenericValueToPointer, + "Ada_LLVMGenericValueToPointer"); + pragma Import (C, LLVMGenericValueToFloat, "Ada_LLVMGenericValueToFloat"); + pragma Import (C, LLVMDisposeGenericValue, "Ada_LLVMDisposeGenericValue"); + pragma Import + (C, + LLVMCreateExecutionEngine, + "Ada_LLVMCreateExecutionEngine"); + pragma Import (C, LLVMCreateInterpreter, "Ada_LLVMCreateInterpreter"); + pragma Import (C, LLVMCreateJITCompiler, "Ada_LLVMCreateJITCompiler"); + pragma Import + (C, + LLVMDisposeExecutionEngine, + "Ada_LLVMDisposeExecutionEngine"); + pragma Import + (C, + LLVMRunStaticConstructors, + "Ada_LLVMRunStaticConstructors"); + pragma Import + (C, + LLVMRunStaticDestructors, + "Ada_LLVMRunStaticDestructors"); + pragma Import (C, LLVMRunFunctionAsMain, "Ada_LLVMRunFunctionAsMain"); + pragma Import (C, LLVMRunFunction, "Ada_LLVMRunFunction"); + pragma Import + (C, + LLVMFreeMachineCodeForFunction, + "Ada_LLVMFreeMachineCodeForFunction"); + pragma Import (C, LLVMAddModuleProvider, "Ada_LLVMAddModuleProvider"); + pragma Import + (C, + LLVMRemoveModuleProvider, + "Ada_LLVMRemoveModuleProvider"); + pragma Import (C, LLVMFindFunction, "Ada_LLVMFindFunction"); + pragma Import + (C, + LLVMGetExecutionEngineTargetData, + "Ada_LLVMGetExecutionEngineTargetData"); + pragma Import (C, LLVMAddGlobalMapping, "Ada_LLVMAddGlobalMapping"); + pragma Import (C, LLVMGetPointerToGlobal, "Ada_LLVMGetPointerToGlobal"); + +end LLVM_execution_Engine.Binding; diff --git a/bindings/ada/executionengine/llvm_execution_engine.ads b/bindings/ada/executionengine/llvm_execution_engine.ads new file mode 100644 index 0000000000000..c7669920f7ac9 --- /dev/null +++ b/bindings/ada/executionengine/llvm_execution_engine.ads @@ -0,0 +1,90 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with Interfaces.C.Extensions; + + +package LLVM_execution_Engine is + + -- LLVMOpaqueGenericValue + -- + type LLVMOpaqueGenericValue is new + Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueGenericValue_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_execution_Engine.LLVMOpaqueGenericValue; + + type LLVMOpaqueGenericValue_view is access all + LLVM_execution_Engine.LLVMOpaqueGenericValue; + + -- LLVMGenericValueRef + -- + type LLVMGenericValueRef is access all + LLVM_execution_Engine.LLVMOpaqueGenericValue; + + type LLVMGenericValueRef_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_execution_Engine.LLVMGenericValueRef; + + type LLVMGenericValueRef_view is access all + LLVM_execution_Engine.LLVMGenericValueRef; + + -- LLVMOpaqueExecutionEngine + -- + type LLVMOpaqueExecutionEngine is new + Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueExecutionEngine_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_execution_Engine.LLVMOpaqueExecutionEngine; + + type LLVMOpaqueExecutionEngine_view is access all + LLVM_execution_Engine.LLVMOpaqueExecutionEngine; + + -- LLVMExecutionEngineRef + -- + type LLVMExecutionEngineRef is access all + LLVM_execution_Engine.LLVMOpaqueExecutionEngine; + + type LLVMExecutionEngineRef_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_execution_Engine.LLVMExecutionEngineRef; + + type LLVMExecutionEngineRef_view is access all + LLVM_execution_Engine.LLVMExecutionEngineRef; + + -- LLVMTargetDataRef + -- + type LLVMTargetDataRef is new Interfaces.C.Extensions.opaque_structure_def; + + type LLVMTargetDataRef_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_execution_Engine.LLVMTargetDataRef; + + type LLVMTargetDataRef_view is access all + LLVM_execution_Engine.LLVMTargetDataRef; + + -- GenericValue + -- + type GenericValue is new Interfaces.C.Extensions.opaque_structure_def; + + type GenericValue_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_execution_Engine.GenericValue; + + type GenericValue_view is access all LLVM_execution_Engine.GenericValue; + + -- ExecutionEngine + -- + type ExecutionEngine is new Interfaces.C.Extensions.incomplete_class_def; + + type ExecutionEngine_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_execution_Engine.ExecutionEngine; + + type ExecutionEngine_view is access all + LLVM_execution_Engine.ExecutionEngine; + + +end LLVM_execution_Engine; diff --git a/bindings/ada/executionengine/llvm_executionengine_wrap.cxx b/bindings/ada/executionengine/llvm_executionengine_wrap.cxx new file mode 100644 index 0000000000000..b63acacb361f0 --- /dev/null +++ b/bindings/ada/executionengine/llvm_executionengine_wrap.cxx @@ -0,0 +1,924 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.36 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + + +#ifdef __cplusplus +template class SwigValueWrapper { + T *tt; +public: + SwigValueWrapper() : tt(0) { } + SwigValueWrapper(const SwigValueWrapper& rhs) : tt(new T(*rhs.tt)) { } + SwigValueWrapper(const T& t) : tt(new T(t)) { } + ~SwigValueWrapper() { delete tt; } + SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; } + operator T&() const { return *tt; } + T *operator&() { return tt; } +private: + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); +}; + +template T SwigValueInit() { + return T(); +} +#endif + +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + + + +#include +#include +#include +#if defined(_WIN32) || defined(__CYGWIN32__) +# define DllExport __declspec( dllexport ) +# define SWIGSTDCALL __stdcall +#else +# define DllExport +# define SWIGSTDCALL +#endif + + +#ifdef __cplusplus +# include +#endif + + + + +/* Support for throwing Ada exceptions from C/C++ */ + +typedef enum +{ + SWIG_AdaException, + SWIG_AdaOutOfMemoryException, + SWIG_AdaIndexOutOfRangeException, + SWIG_AdaDivideByZeroException, + SWIG_AdaArgumentOutOfRangeException, + SWIG_AdaNullReferenceException +} SWIG_AdaExceptionCodes; + + +typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *); + + +typedef struct +{ + SWIG_AdaExceptionCodes code; + SWIG_AdaExceptionCallback_t callback; +} + SWIG_AdaExceptions_t; + + +static +SWIG_AdaExceptions_t +SWIG_ada_exceptions[] = +{ + { SWIG_AdaException, NULL }, + { SWIG_AdaOutOfMemoryException, NULL }, + { SWIG_AdaIndexOutOfRangeException, NULL }, + { SWIG_AdaDivideByZeroException, NULL }, + { SWIG_AdaArgumentOutOfRangeException, NULL }, + { SWIG_AdaNullReferenceException, NULL } +}; + + +static +void +SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) +{ + SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback; + if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) { + callback = SWIG_ada_exceptions[code].callback; + } + callback(msg); +} + + + +#ifdef __cplusplus +extern "C" +#endif + +DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_execution_Engine (SWIG_AdaExceptionCallback_t systemException, + SWIG_AdaExceptionCallback_t outOfMemory, + SWIG_AdaExceptionCallback_t indexOutOfRange, + SWIG_AdaExceptionCallback_t divideByZero, + SWIG_AdaExceptionCallback_t argumentOutOfRange, + SWIG_AdaExceptionCallback_t nullReference) +{ + SWIG_ada_exceptions [SWIG_AdaException].callback = systemException; + SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback = outOfMemory; + SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback = indexOutOfRange; + SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback = divideByZero; + SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange; + SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback = nullReference; +} + + +/* Callback for returning strings to Ada without leaking memory */ + +typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *); +static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL; + + + +/* probably obsolete ... +#ifdef __cplusplus +extern "C" +#endif +DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_execution_Engine(SWIG_AdaStringHelperCallback callback) { + SWIG_ada_string_callback = callback; +} +*/ + + + +/* Contract support */ + +#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else + + +#define protected public +#define private public + +#include "llvm-c/ExecutionEngine.h" + + + +// struct LLVMCtxt; + + +#undef protected +#undef private +#ifdef __cplusplus +extern "C" { +#endif +DllExport void SWIGSTDCALL Ada_LLVMLinkInJIT ( + ) +{ + LLVMLinkInJIT(); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMLinkInInterpreter ( + ) +{ + LLVMLinkInInterpreter(); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfInt ( + void * jarg1 + , + + unsigned long long jarg2 + , + + int jarg3 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned long long arg2 ; + int arg3 ; + LLVMGenericValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + + arg2 = (unsigned long long) jarg2; + + + + arg3 = (int) jarg3; + + + result = (LLVMGenericValueRef)LLVMCreateGenericValueOfInt(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfPointer ( + void* jarg1 + ) +{ + void * jresult ; + void *arg1 = (void *) 0 ; + LLVMGenericValueRef result; + + arg1 = (void *)jarg1; + + result = (LLVMGenericValueRef)LLVMCreateGenericValueOfPointer(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreateGenericValueOfFloat ( + void * jarg1 + , + + double jarg2 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + double arg2 ; + LLVMGenericValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + + arg2 = (double) jarg2; + + + result = (LLVMGenericValueRef)LLVMCreateGenericValueOfFloat(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGenericValueIntWidth ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMGenericValueRef)jarg1; + + result = (unsigned int)LLVMGenericValueIntWidth(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned long long SWIGSTDCALL Ada_LLVMGenericValueToInt ( + void * jarg1 + , + + int jarg2 + ) +{ + unsigned long long jresult ; + LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ; + int arg2 ; + unsigned long long result; + + arg1 = (LLVMGenericValueRef)jarg1; + + + arg2 = (int) jarg2; + + + result = (unsigned long long)LLVMGenericValueToInt(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport void* SWIGSTDCALL Ada_LLVMGenericValueToPointer ( + void * jarg1 + ) +{ + void* jresult ; + LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ; + void *result = 0 ; + + arg1 = (LLVMGenericValueRef)jarg1; + + result = (void *)LLVMGenericValueToPointer(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport double SWIGSTDCALL Ada_LLVMGenericValueToFloat ( + void * jarg1 + , + + void * jarg2 + ) +{ + double jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMGenericValueRef arg2 = (LLVMGenericValueRef) 0 ; + double result; + + arg1 = (LLVMTypeRef)jarg1; + + arg2 = (LLVMGenericValueRef)jarg2; + + result = (double)LLVMGenericValueToFloat(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposeGenericValue ( + void * jarg1 + ) +{ + LLVMGenericValueRef arg1 = (LLVMGenericValueRef) 0 ; + + arg1 = (LLVMGenericValueRef)jarg1; + + LLVMDisposeGenericValue(arg1); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMCreateExecutionEngine ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + int jresult ; + LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ; + LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ; + char **arg3 = (char **) 0 ; + int result; + + arg1 = (LLVMExecutionEngineRef *)jarg1; + + arg2 = (LLVMModuleProviderRef)jarg2; + + arg3 = (char **)jarg3; + + result = (int)LLVMCreateExecutionEngine(arg1,arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMCreateInterpreter ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + int jresult ; + LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ; + LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ; + char **arg3 = (char **) 0 ; + int result; + + arg1 = (LLVMExecutionEngineRef *)jarg1; + + arg2 = (LLVMModuleProviderRef)jarg2; + + arg3 = (char **)jarg3; + + result = (int)LLVMCreateInterpreter(arg1,arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMCreateJITCompiler ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int jarg3 + , + + void * jarg4 + ) +{ + int jresult ; + LLVMExecutionEngineRef *arg1 = (LLVMExecutionEngineRef *) 0 ; + LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ; + unsigned int arg3 ; + char **arg4 = (char **) 0 ; + int result; + + arg1 = (LLVMExecutionEngineRef *)jarg1; + + arg2 = (LLVMModuleProviderRef)jarg2; + + + arg3 = (unsigned int) jarg3; + + + arg4 = (char **)jarg4; + + result = (int)LLVMCreateJITCompiler(arg1,arg2,arg3,arg4); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposeExecutionEngine ( + void * jarg1 + ) +{ + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + LLVMDisposeExecutionEngine(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMRunStaticConstructors ( + void * jarg1 + ) +{ + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + LLVMRunStaticConstructors(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMRunStaticDestructors ( + void * jarg1 + ) +{ + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + LLVMRunStaticDestructors(arg1); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMRunFunctionAsMain ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int jarg3 + , + + void * jarg4 + , + + void * jarg5 + ) +{ + int jresult ; + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + unsigned int arg3 ; + char **arg4 = (char **) 0 ; + char **arg5 = (char **) 0 ; + int result; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + + arg3 = (unsigned int) jarg3; + + + arg4 = (char **)jarg4; + + arg5 = (char **)jarg5; + + result = (int)LLVMRunFunctionAsMain(arg1,arg2,arg3,(char const *const *)arg4,(char const *const *)arg5); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMRunFunction ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int jarg3 + , + + void * jarg4 + ) +{ + void * jresult ; + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + unsigned int arg3 ; + LLVMGenericValueRef *arg4 = (LLVMGenericValueRef *) 0 ; + LLVMGenericValueRef result; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + + arg3 = (unsigned int) jarg3; + + + arg4 = (LLVMGenericValueRef *)jarg4; + + result = (LLVMGenericValueRef)LLVMRunFunction(arg1,arg2,arg3,arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMFreeMachineCodeForFunction ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + LLVMFreeMachineCodeForFunction(arg1,arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddModuleProvider ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + arg2 = (LLVMModuleProviderRef)jarg2; + + LLVMAddModuleProvider(arg1,arg2); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMRemoveModuleProvider ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + void * jarg4 + ) +{ + int jresult ; + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + LLVMModuleProviderRef arg2 = (LLVMModuleProviderRef) 0 ; + LLVMModuleRef *arg3 = (LLVMModuleRef *) 0 ; + char **arg4 = (char **) 0 ; + int result; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + arg2 = (LLVMModuleProviderRef)jarg2; + + arg3 = (LLVMModuleRef *)jarg3; + + arg4 = (char **)jarg4; + + result = (int)LLVMRemoveModuleProvider(arg1,arg2,arg3,arg4); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMFindFunction ( + void * jarg1 + , + + char * jarg2 + , + + void * jarg3 + ) +{ + int jresult ; + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMValueRef *arg3 = (LLVMValueRef *) 0 ; + int result; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + arg2 = jarg2; + + arg3 = (LLVMValueRef *)jarg3; + + result = (int)LLVMFindFunction(arg1,(char const *)arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport LLVMTargetDataRef SWIGSTDCALL Ada_LLVMGetExecutionEngineTargetData ( + void * jarg1 + ) +{ + LLVMTargetDataRef jresult ; + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + LLVMTargetDataRef result; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + result = LLVMGetExecutionEngineTargetData(arg1); + + jresult = result; + //jresult = new LLVMTargetDataRef ((LLVMTargetDataRef &) result); + + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddGlobalMapping ( + void * jarg1 + , + + void * jarg2 + , + + void* jarg3 + ) +{ + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + void *arg3 = (void *) 0 ; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (void *)jarg3; + + LLVMAddGlobalMapping(arg1,arg2,arg3); + + +} + + + +DllExport void* SWIGSTDCALL Ada_LLVMGetPointerToGlobal ( + void * jarg1 + , + + void * jarg2 + ) +{ + void* jresult ; + LLVMExecutionEngineRef arg1 = (LLVMExecutionEngineRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + void *result = 0 ; + + arg1 = (LLVMExecutionEngineRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (void *)LLVMGetPointerToGlobal(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + diff --git a/bindings/ada/llvm.gpr b/bindings/ada/llvm.gpr new file mode 100644 index 0000000000000..8e87af4fa12e6 --- /dev/null +++ b/bindings/ada/llvm.gpr @@ -0,0 +1,34 @@ +project LLVM is + + for Languages use ("Ada", "C++"); + for Source_Dirs use (".", "analysis", "bitreader", "bitwriter", "executionengine", "llvm", "target", "transforms"); + for Object_Dir use "build"; + for Exec_Dir use "."; + for Library_Name use "llvm_ada"; + for Library_Dir use "lib"; + for Library_Ali_Dir use "objects"; + + package Naming is + for Specification_Suffix ("c++") use ".h"; + for Implementation_Suffix ("c++") use ".cxx"; + end Naming; + + package Builder is + for Default_Switches ("ada") use ("-g"); + end Builder; + + package Compiler is + for Default_Switches ("ada") use ("-gnato", "-fstack-check", "-g", "-gnata", "-gnat05", "-I/usr/local/include"); + for Default_Switches ("c++") use ("-D__STDC_LIMIT_MACROS", "-D__STDC_CONSTANT_MACROS", "-I../../include", "-g"); + end Compiler; + + package Binder is + for Default_Switches ("ada") use ("-E"); + end Binder; + + package Linker is + for Default_Switches ("c++") use ("-g"); + end Linker; + +end LLVM; + diff --git a/bindings/ada/llvm/llvm-binding.ads b/bindings/ada/llvm/llvm-binding.ads new file mode 100644 index 0000000000000..c0e48a1b5bf35 --- /dev/null +++ b/bindings/ada/llvm/llvm-binding.ads @@ -0,0 +1,1974 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with Interfaces.C.Strings; + + +package llvm.Binding is + + procedure LLVMDisposeMessage + (Message : in Interfaces.C.Strings.chars_ptr); + + function LLVMContextCreate return llvm.LLVMContextRef; + + function LLVMGetGlobalContext return llvm.LLVMContextRef; + + procedure LLVMContextDispose (C : in llvm.LLVMContextRef); + + function LLVMModuleCreateWithName + (ModuleID : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMModuleRef; + + function LLVMModuleCreateWithNameInContext + (ModuleID : in Interfaces.C.Strings.chars_ptr; + C : in llvm.LLVMContextRef) + return llvm.LLVMModuleRef; + + procedure LLVMDisposeModule (M : in llvm.LLVMModuleRef); + + function LLVMGetDataLayout + (M : in llvm.LLVMModuleRef) + return Interfaces.C.Strings.chars_ptr; + + procedure LLVMSetDataLayout + (M : in llvm.LLVMModuleRef; + Triple : in Interfaces.C.Strings.chars_ptr); + + function LLVMGetTarget + (M : in llvm.LLVMModuleRef) + return Interfaces.C.Strings.chars_ptr; + + procedure LLVMSetTarget + (M : in llvm.LLVMModuleRef; + Triple : in Interfaces.C.Strings.chars_ptr); + + function LLVMAddTypeName + (M : in llvm.LLVMModuleRef; + Name : in Interfaces.C.Strings.chars_ptr; + Ty : in llvm.LLVMTypeRef) + return Interfaces.C.int; + + procedure LLVMDeleteTypeName + (M : in llvm.LLVMModuleRef; + Name : in Interfaces.C.Strings.chars_ptr); + + function LLVMGetTypeByName + (M : in llvm.LLVMModuleRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMTypeRef; + + procedure LLVMDumpModule (M : in llvm.LLVMModuleRef); + + function LLVMGetTypeKind + (Ty : in llvm.LLVMTypeRef) + return llvm.LLVMTypeKind; + + function LLVMInt1Type return llvm.LLVMTypeRef; + + function LLVMInt8Type return llvm.LLVMTypeRef; + + function LLVMInt16Type return llvm.LLVMTypeRef; + + function LLVMInt32Type return llvm.LLVMTypeRef; + + function LLVMInt64Type return llvm.LLVMTypeRef; + + function LLVMIntType + (NumBits : in Interfaces.C.unsigned) + return llvm.LLVMTypeRef; + + function LLVMGetIntTypeWidth + (IntegerTy : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + function LLVMFloatType return llvm.LLVMTypeRef; + + function LLVMDoubleType return llvm.LLVMTypeRef; + + function LLVMX86FP80Type return llvm.LLVMTypeRef; + + function LLVMFP128Type return llvm.LLVMTypeRef; + + function LLVMPPCFP128Type return llvm.LLVMTypeRef; + + function LLVMFunctionType + (ReturnType : in llvm.LLVMTypeRef; + ParamTypes : access llvm.LLVMTypeRef; + ParamCount : in Interfaces.C.unsigned; + IsVarArg : in Interfaces.C.int) + return llvm.LLVMTypeRef; + + function LLVMIsFunctionVarArg + (FunctionTy : in llvm.LLVMTypeRef) + return Interfaces.C.int; + + function LLVMGetReturnType + (FunctionTy : in llvm.LLVMTypeRef) + return llvm.LLVMTypeRef; + + function LLVMCountParamTypes + (FunctionTy : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + procedure LLVMGetParamTypes + (FunctionTy : in llvm.LLVMTypeRef; + Dest : access llvm.LLVMTypeRef); + + function LLVMStructType + (ElementTypes : access llvm.LLVMTypeRef; + ElementCount : in Interfaces.C.unsigned; + Packed : in Interfaces.C.int) + return llvm.LLVMTypeRef; + + function LLVMCountStructElementTypes + (StructTy : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + procedure LLVMGetStructElementTypes + (StructTy : in llvm.LLVMTypeRef; + Dest : access llvm.LLVMTypeRef); + + function LLVMIsPackedStruct + (StructTy : in llvm.LLVMTypeRef) + return Interfaces.C.int; + + function LLVMArrayType + (ElementType : in llvm.LLVMTypeRef; + ElementCount : in Interfaces.C.unsigned) + return llvm.LLVMTypeRef; + + function LLVMPointerType + (ElementType : in llvm.LLVMTypeRef; + AddressSpace : in Interfaces.C.unsigned) + return llvm.LLVMTypeRef; + + function LLVMVectorType + (ElementType : in llvm.LLVMTypeRef; + ElementCount : in Interfaces.C.unsigned) + return llvm.LLVMTypeRef; + + function LLVMGetElementType + (Ty : in llvm.LLVMTypeRef) + return llvm.LLVMTypeRef; + + function LLVMGetArrayLength + (ArrayTy : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + function LLVMGetPointerAddressSpace + (PointerTy : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + function LLVMGetVectorSize + (VectorTy : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + function LLVMVoidType return llvm.LLVMTypeRef; + + function LLVMLabelType return llvm.LLVMTypeRef; + + function LLVMOpaqueType return llvm.LLVMTypeRef; + + function LLVMCreateTypeHandle + (PotentiallyAbstractTy : in llvm.LLVMTypeRef) + return llvm.LLVMTypeHandleRef; + + procedure LLVMRefineType + (AbstractTy : in llvm.LLVMTypeRef; + ConcreteTy : in llvm.LLVMTypeRef); + + function LLVMResolveTypeHandle + (TypeHandle : in llvm.LLVMTypeHandleRef) + return llvm.LLVMTypeRef; + + procedure LLVMDisposeTypeHandle (TypeHandle : in llvm.LLVMTypeHandleRef); + + function LLVMTypeOf (Val : in llvm.LLVMValueRef) return llvm.LLVMTypeRef; + + function LLVMGetValueName + (Val : in llvm.LLVMValueRef) + return Interfaces.C.Strings.chars_ptr; + + procedure LLVMSetValueName + (Val : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr); + + procedure LLVMDumpValue (Val : in llvm.LLVMValueRef); + + function LLVMIsAArgument + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsABasicBlock + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAInlineAsm + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAUser + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstant + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstantAggregateZero + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstantArray + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstantExpr + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstantFP + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstantInt + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstantPointerNull + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstantStruct + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAConstantVector + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAGlobalValue + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAFunction + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAGlobalAlias + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAGlobalVariable + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAUndefValue + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAInstruction + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsABinaryOperator + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsACallInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAIntrinsicInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsADbgInfoIntrinsic + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsADbgDeclareInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsADbgFuncStartInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsADbgRegionEndInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsADbgRegionStartInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsADbgStopPointInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAEHSelectorInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAMemIntrinsic + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAMemCpyInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAMemMoveInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAMemSetInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsACmpInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAFCmpInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAICmpInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAExtractElementInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAGetElementPtrInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAInsertElementInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAInsertValueInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAPHINode + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsASelectInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAShuffleVectorInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAStoreInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsATerminatorInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsABranchInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAInvokeInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAReturnInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsASwitchInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAUnreachableInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAUnwindInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAUnaryInstruction + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAAllocationInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAAllocaInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAMallocInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsACastInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsABitCastInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAFPExtInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAFPToSIInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAFPToUIInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAFPTruncInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAIntToPtrInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAPtrToIntInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsASExtInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsASIToFPInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsATruncInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAUIToFPInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAZExtInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAExtractValueInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAFreeInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsALoadInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMIsAVAArgInst + (Val : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstNull + (Ty : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstAllOnes + (Ty : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMGetUndef + (Ty : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMIsConstant + (Val : in llvm.LLVMValueRef) + return Interfaces.C.int; + + function LLVMIsNull (Val : in llvm.LLVMValueRef) return Interfaces.C.int; + + function LLVMIsUndef + (Val : in llvm.LLVMValueRef) + return Interfaces.C.int; + + function LLVMConstPointerNull + (Ty : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstInt + (IntTy : in llvm.LLVMTypeRef; + N : in Interfaces.C.Extensions.unsigned_long_long; + SignExtend : in Interfaces.C.int) + return llvm.LLVMValueRef; + + function LLVMConstReal + (RealTy : in llvm.LLVMTypeRef; + N : in Interfaces.C.double) + return llvm.LLVMValueRef; + + function LLVMConstRealOfString + (RealTy : in llvm.LLVMTypeRef; + Text : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMConstString + (Str : in Interfaces.C.Strings.chars_ptr; + Length : in Interfaces.C.unsigned; + DontNullTerminate : in Interfaces.C.int) + return llvm.LLVMValueRef; + + function LLVMConstArray + (ElementTy : in llvm.LLVMTypeRef; + ConstantVals : access llvm.LLVMValueRef; + Length : in Interfaces.C.unsigned) + return llvm.LLVMValueRef; + + function LLVMConstStruct + (ConstantVals : access llvm.LLVMValueRef; + Count : in Interfaces.C.unsigned; + packed : in Interfaces.C.int) + return llvm.LLVMValueRef; + + function LLVMConstVector + (ScalarConstantVals : access llvm.LLVMValueRef; + Size : in Interfaces.C.unsigned) + return llvm.LLVMValueRef; + + function LLVMSizeOf (Ty : in llvm.LLVMTypeRef) return llvm.LLVMValueRef; + + function LLVMConstNeg + (ConstantVal : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstNot + (ConstantVal : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstAdd + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstSub + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstMul + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstUDiv + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstSDiv + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstFDiv + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstURem + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstSRem + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstFRem + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstAnd + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstOr + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstXor + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstICmp + (Predicate : in llvm.LLVMIntPredicate; + LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstFCmp + (Predicate : in llvm.LLVMRealPredicate; + LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstShl + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstLShr + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstAShr + (LHSConstant : in llvm.LLVMValueRef; + RHSConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstGEP + (ConstantVal : in llvm.LLVMValueRef; + ConstantIndices : access llvm.LLVMValueRef; + NumIndices : in Interfaces.C.unsigned) + return llvm.LLVMValueRef; + + function LLVMConstTrunc + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstSExt + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstZExt + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstFPTrunc + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstFPExt + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstUIToFP + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstSIToFP + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstFPToUI + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstFPToSI + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstPtrToInt + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstIntToPtr + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstBitCast + (ConstantVal : in llvm.LLVMValueRef; + ToType : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMConstSelect + (ConstantCondition : in llvm.LLVMValueRef; + ConstantIfTrue : in llvm.LLVMValueRef; + ConstantIfFalse : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstExtractElement + (VectorConstant : in llvm.LLVMValueRef; + IndexConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstInsertElement + (VectorConstant : in llvm.LLVMValueRef; + ElementValueConstant : in llvm.LLVMValueRef; + IndexConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstShuffleVector + (VectorAConstant : in llvm.LLVMValueRef; + VectorBConstant : in llvm.LLVMValueRef; + MaskConstant : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMConstExtractValue + (AggConstant : in llvm.LLVMValueRef; + IdxList : access Interfaces.C.unsigned; + NumIdx : in Interfaces.C.unsigned) + return llvm.LLVMValueRef; + + function LLVMConstInsertValue + (AggConstant : in llvm.LLVMValueRef; + ElementValueConstant : in llvm.LLVMValueRef; + IdxList : access Interfaces.C.unsigned; + NumIdx : in Interfaces.C.unsigned) + return llvm.LLVMValueRef; + + function LLVMConstInlineAsm + (Ty : in llvm.LLVMTypeRef; + AsmString : in Interfaces.C.Strings.chars_ptr; + Constraints : in Interfaces.C.Strings.chars_ptr; + HasSideEffects : in Interfaces.C.int) + return llvm.LLVMValueRef; + + function LLVMGetGlobalParent + (Global : in llvm.LLVMValueRef) + return llvm.LLVMModuleRef; + + function LLVMIsDeclaration + (Global : in llvm.LLVMValueRef) + return Interfaces.C.int; + + function LLVMGetLinkage + (Global : in llvm.LLVMValueRef) + return llvm.LLVMLinkage; + + procedure LLVMSetLinkage + (Global : in llvm.LLVMValueRef; + Linkage : in llvm.LLVMLinkage); + + function LLVMGetSection + (Global : in llvm.LLVMValueRef) + return Interfaces.C.Strings.chars_ptr; + + procedure LLVMSetSection + (Global : in llvm.LLVMValueRef; + Section : in Interfaces.C.Strings.chars_ptr); + + function LLVMGetVisibility + (Global : in llvm.LLVMValueRef) + return llvm.LLVMVisibility; + + procedure LLVMSetVisibility + (Global : in llvm.LLVMValueRef; + Viz : in llvm.LLVMVisibility); + + function LLVMGetAlignment + (Global : in llvm.LLVMValueRef) + return Interfaces.C.unsigned; + + procedure LLVMSetAlignment + (Global : in llvm.LLVMValueRef; + Bytes : in Interfaces.C.unsigned); + + function LLVMAddGlobal + (M : in llvm.LLVMModuleRef; + Ty : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMGetNamedGlobal + (M : in llvm.LLVMModuleRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMGetFirstGlobal + (M : in llvm.LLVMModuleRef) + return llvm.LLVMValueRef; + + function LLVMGetLastGlobal + (M : in llvm.LLVMModuleRef) + return llvm.LLVMValueRef; + + function LLVMGetNextGlobal + (GlobalVar : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMGetPreviousGlobal + (GlobalVar : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + procedure LLVMDeleteGlobal (GlobalVar : in llvm.LLVMValueRef); + + function LLVMGetInitializer + (GlobalVar : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + procedure LLVMSetInitializer + (GlobalVar : in llvm.LLVMValueRef; + ConstantVal : in llvm.LLVMValueRef); + + function LLVMIsThreadLocal + (GlobalVar : in llvm.LLVMValueRef) + return Interfaces.C.int; + + procedure LLVMSetThreadLocal + (GlobalVar : in llvm.LLVMValueRef; + IsThreadLocal : in Interfaces.C.int); + + function LLVMIsGlobalConstant + (GlobalVar : in llvm.LLVMValueRef) + return Interfaces.C.int; + + procedure LLVMSetGlobalConstant + (GlobalVar : in llvm.LLVMValueRef; + IsConstant : in Interfaces.C.int); + + function LLVMAddAlias + (M : in llvm.LLVMModuleRef; + Ty : in llvm.LLVMTypeRef; + Aliasee : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMAddFunction + (M : in llvm.LLVMModuleRef; + Name : in Interfaces.C.Strings.chars_ptr; + FunctionTy : in llvm.LLVMTypeRef) + return llvm.LLVMValueRef; + + function LLVMGetNamedFunction + (M : in llvm.LLVMModuleRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMGetFirstFunction + (M : in llvm.LLVMModuleRef) + return llvm.LLVMValueRef; + + function LLVMGetLastFunction + (M : in llvm.LLVMModuleRef) + return llvm.LLVMValueRef; + + function LLVMGetNextFunction + (Fn : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMGetPreviousFunction + (Fn : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + procedure LLVMDeleteFunction (Fn : in llvm.LLVMValueRef); + + function LLVMGetIntrinsicID + (Fn : in llvm.LLVMValueRef) + return Interfaces.C.unsigned; + + function LLVMGetFunctionCallConv + (Fn : in llvm.LLVMValueRef) + return Interfaces.C.unsigned; + + procedure LLVMSetFunctionCallConv + (Fn : in llvm.LLVMValueRef; + CC : in Interfaces.C.unsigned); + + function LLVMGetGC + (Fn : in llvm.LLVMValueRef) + return Interfaces.C.Strings.chars_ptr; + + procedure LLVMSetGC + (Fn : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr); + + procedure LLVMAddFunctionAttr + (Fn : in llvm.LLVMValueRef; + PA : in llvm.LLVMAttribute); + + procedure LLVMRemoveFunctionAttr + (Fn : in llvm.LLVMValueRef; + PA : in llvm.LLVMAttribute); + + function LLVMCountParams + (Fn : in llvm.LLVMValueRef) + return Interfaces.C.unsigned; + + procedure LLVMGetParams + (Fn : in llvm.LLVMValueRef; + Params : access llvm.LLVMValueRef); + + function LLVMGetParam + (Fn : in llvm.LLVMValueRef; + Index : in Interfaces.C.unsigned) + return llvm.LLVMValueRef; + + function LLVMGetParamParent + (Inst : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMGetFirstParam + (Fn : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMGetLastParam + (Fn : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMGetNextParam + (Arg : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMGetPreviousParam + (Arg : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + procedure LLVMAddAttribute + (Arg : in llvm.LLVMValueRef; + PA : in llvm.LLVMAttribute); + + procedure LLVMRemoveAttribute + (Arg : in llvm.LLVMValueRef; + PA : in llvm.LLVMAttribute); + + procedure LLVMSetParamAlignment + (Arg : in llvm.LLVMValueRef; + align : in Interfaces.C.unsigned); + + function LLVMBasicBlockAsValue + (BB : in llvm.LLVMBasicBlockRef) + return llvm.LLVMValueRef; + + function LLVMValueIsBasicBlock + (Val : in llvm.LLVMValueRef) + return Interfaces.C.int; + + function LLVMValueAsBasicBlock + (Val : in llvm.LLVMValueRef) + return llvm.LLVMBasicBlockRef; + + function LLVMGetBasicBlockParent + (BB : in llvm.LLVMBasicBlockRef) + return llvm.LLVMValueRef; + + function LLVMCountBasicBlocks + (Fn : in llvm.LLVMValueRef) + return Interfaces.C.unsigned; + + procedure LLVMGetBasicBlocks + (Fn : in llvm.LLVMValueRef; + BasicBlocks : access llvm.LLVMBasicBlockRef); + + function LLVMGetFirstBasicBlock + (Fn : in llvm.LLVMValueRef) + return llvm.LLVMBasicBlockRef; + + function LLVMGetLastBasicBlock + (Fn : in llvm.LLVMValueRef) + return llvm.LLVMBasicBlockRef; + + function LLVMGetNextBasicBlock + (BB : in llvm.LLVMBasicBlockRef) + return llvm.LLVMBasicBlockRef; + + function LLVMGetPreviousBasicBlock + (BB : in llvm.LLVMBasicBlockRef) + return llvm.LLVMBasicBlockRef; + + function LLVMGetEntryBasicBlock + (Fn : in llvm.LLVMValueRef) + return llvm.LLVMBasicBlockRef; + + function LLVMAppendBasicBlock + (Fn : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMBasicBlockRef; + + function LLVMInsertBasicBlock + (InsertBeforeBB : in llvm.LLVMBasicBlockRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMBasicBlockRef; + + procedure LLVMDeleteBasicBlock (BB : in llvm.LLVMBasicBlockRef); + + function LLVMGetInstructionParent + (Inst : in llvm.LLVMValueRef) + return llvm.LLVMBasicBlockRef; + + function LLVMGetFirstInstruction + (BB : in llvm.LLVMBasicBlockRef) + return llvm.LLVMValueRef; + + function LLVMGetLastInstruction + (BB : in llvm.LLVMBasicBlockRef) + return llvm.LLVMValueRef; + + function LLVMGetNextInstruction + (Inst : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMGetPreviousInstruction + (Inst : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + procedure LLVMSetInstructionCallConv + (Instr : in llvm.LLVMValueRef; + CC : in Interfaces.C.unsigned); + + function LLVMGetInstructionCallConv + (Instr : in llvm.LLVMValueRef) + return Interfaces.C.unsigned; + + procedure LLVMAddInstrAttribute + (Instr : in llvm.LLVMValueRef; + index : in Interfaces.C.unsigned; + arg_1 : in llvm.LLVMAttribute); + + procedure LLVMRemoveInstrAttribute + (Instr : in llvm.LLVMValueRef; + index : in Interfaces.C.unsigned; + arg_1 : in llvm.LLVMAttribute); + + procedure LLVMSetInstrParamAlignment + (Instr : in llvm.LLVMValueRef; + index : in Interfaces.C.unsigned; + align : in Interfaces.C.unsigned); + + function LLVMIsTailCall + (CallInst : in llvm.LLVMValueRef) + return Interfaces.C.int; + + procedure LLVMSetTailCall + (CallInst : in llvm.LLVMValueRef; + IsTailCall : in Interfaces.C.int); + + procedure LLVMAddIncoming + (PhiNode : in llvm.LLVMValueRef; + IncomingValues : access llvm.LLVMValueRef; + IncomingBlocks : access llvm.LLVMBasicBlockRef; + Count : in Interfaces.C.unsigned); + + function LLVMCountIncoming + (PhiNode : in llvm.LLVMValueRef) + return Interfaces.C.unsigned; + + function LLVMGetIncomingValue + (PhiNode : in llvm.LLVMValueRef; + Index : in Interfaces.C.unsigned) + return llvm.LLVMValueRef; + + function LLVMGetIncomingBlock + (PhiNode : in llvm.LLVMValueRef; + Index : in Interfaces.C.unsigned) + return llvm.LLVMBasicBlockRef; + + function LLVMCreateBuilder return llvm.LLVMBuilderRef; + + procedure LLVMPositionBuilder + (Builder : in llvm.LLVMBuilderRef; + Block : in llvm.LLVMBasicBlockRef; + Instr : in llvm.LLVMValueRef); + + procedure LLVMPositionBuilderBefore + (Builder : in llvm.LLVMBuilderRef; + Instr : in llvm.LLVMValueRef); + + procedure LLVMPositionBuilderAtEnd + (Builder : in llvm.LLVMBuilderRef; + Block : in llvm.LLVMBasicBlockRef); + + function LLVMGetInsertBlock + (Builder : in llvm.LLVMBuilderRef) + return llvm.LLVMBasicBlockRef; + + procedure LLVMClearInsertionPosition (Builder : in llvm.LLVMBuilderRef); + + procedure LLVMInsertIntoBuilder + (Builder : in llvm.LLVMBuilderRef; + Instr : in llvm.LLVMValueRef); + + procedure LLVMDisposeBuilder (Builder : in llvm.LLVMBuilderRef); + + function LLVMBuildRetVoid + (arg_1 : in llvm.LLVMBuilderRef) + return llvm.LLVMValueRef; + + function LLVMBuildRet + (arg_1 : in llvm.LLVMBuilderRef; + V : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMBuildBr + (arg_1 : in llvm.LLVMBuilderRef; + Dest : in llvm.LLVMBasicBlockRef) + return llvm.LLVMValueRef; + + function LLVMBuildCondBr + (arg_1 : in llvm.LLVMBuilderRef; + the_If : in llvm.LLVMValueRef; + the_Then : in llvm.LLVMBasicBlockRef; + the_Else : in llvm.LLVMBasicBlockRef) + return llvm.LLVMValueRef; + + function LLVMBuildSwitch + (arg_1 : in llvm.LLVMBuilderRef; + V : in llvm.LLVMValueRef; + the_Else : in llvm.LLVMBasicBlockRef; + NumCases : in Interfaces.C.unsigned) + return llvm.LLVMValueRef; + + function LLVMBuildInvoke + (arg_1 : in llvm.LLVMBuilderRef; + Fn : in llvm.LLVMValueRef; + Args : access llvm.LLVMValueRef; + NumArgs : in Interfaces.C.unsigned; + the_Then : in llvm.LLVMBasicBlockRef; + Catch : in llvm.LLVMBasicBlockRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildUnwind + (arg_1 : in llvm.LLVMBuilderRef) + return llvm.LLVMValueRef; + + function LLVMBuildUnreachable + (arg_1 : in llvm.LLVMBuilderRef) + return llvm.LLVMValueRef; + + procedure LLVMAddCase + (Switch : in llvm.LLVMValueRef; + OnVal : in llvm.LLVMValueRef; + Dest : in llvm.LLVMBasicBlockRef); + + function LLVMBuildAdd + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildSub + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildMul + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildUDiv + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildSDiv + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildFDiv + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildURem + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildSRem + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildFRem + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildShl + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildLShr + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildAShr + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildAnd + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildOr + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildXor + (arg_1 : in llvm.LLVMBuilderRef; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildNeg + (arg_1 : in llvm.LLVMBuilderRef; + V : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildNot + (arg_1 : in llvm.LLVMBuilderRef; + V : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildMalloc + (arg_1 : in llvm.LLVMBuilderRef; + Ty : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildArrayMalloc + (arg_1 : in llvm.LLVMBuilderRef; + Ty : in llvm.LLVMTypeRef; + Val : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildAlloca + (arg_1 : in llvm.LLVMBuilderRef; + Ty : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildArrayAlloca + (arg_1 : in llvm.LLVMBuilderRef; + Ty : in llvm.LLVMTypeRef; + Val : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildFree + (arg_1 : in llvm.LLVMBuilderRef; + PointerVal : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMBuildLoad + (arg_1 : in llvm.LLVMBuilderRef; + PointerVal : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildStore + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + Ptr : in llvm.LLVMValueRef) + return llvm.LLVMValueRef; + + function LLVMBuildGEP + (B : in llvm.LLVMBuilderRef; + Pointer : in llvm.LLVMValueRef; + Indices : access llvm.LLVMValueRef; + NumIndices : in Interfaces.C.unsigned; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildTrunc + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildZExt + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildSExt + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildFPToUI + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildFPToSI + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildUIToFP + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildSIToFP + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildFPTrunc + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildFPExt + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildPtrToInt + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildIntToPtr + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildBitCast + (arg_1 : in llvm.LLVMBuilderRef; + Val : in llvm.LLVMValueRef; + DestTy : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildICmp + (arg_1 : in llvm.LLVMBuilderRef; + Op : in llvm.LLVMIntPredicate; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildFCmp + (arg_1 : in llvm.LLVMBuilderRef; + Op : in llvm.LLVMRealPredicate; + LHS : in llvm.LLVMValueRef; + RHS : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildPhi + (arg_1 : in llvm.LLVMBuilderRef; + Ty : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildCall + (arg_1 : in llvm.LLVMBuilderRef; + Fn : in llvm.LLVMValueRef; + Args : access llvm.LLVMValueRef; + NumArgs : in Interfaces.C.unsigned; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildSelect + (arg_1 : in llvm.LLVMBuilderRef; + the_If : in llvm.LLVMValueRef; + the_Then : in llvm.LLVMValueRef; + the_Else : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildVAArg + (arg_1 : in llvm.LLVMBuilderRef; + List : in llvm.LLVMValueRef; + Ty : in llvm.LLVMTypeRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildExtractElement + (arg_1 : in llvm.LLVMBuilderRef; + VecVal : in llvm.LLVMValueRef; + Index : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildInsertElement + (arg_1 : in llvm.LLVMBuilderRef; + VecVal : in llvm.LLVMValueRef; + EltVal : in llvm.LLVMValueRef; + Index : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildShuffleVector + (arg_1 : in llvm.LLVMBuilderRef; + V1 : in llvm.LLVMValueRef; + V2 : in llvm.LLVMValueRef; + Mask : in llvm.LLVMValueRef; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildExtractValue + (arg_1 : in llvm.LLVMBuilderRef; + AggVal : in llvm.LLVMValueRef; + Index : in Interfaces.C.unsigned; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMBuildInsertValue + (arg_1 : in llvm.LLVMBuilderRef; + AggVal : in llvm.LLVMValueRef; + EltVal : in llvm.LLVMValueRef; + Index : in Interfaces.C.unsigned; + Name : in Interfaces.C.Strings.chars_ptr) + return llvm.LLVMValueRef; + + function LLVMCreateModuleProviderForExistingModule + (M : in llvm.LLVMModuleRef) + return llvm.LLVMModuleProviderRef; + + procedure LLVMDisposeModuleProvider (MP : in llvm.LLVMModuleProviderRef); + + function LLVMCreateMemoryBufferWithContentsOfFile + (Path : in Interfaces.C.Strings.chars_ptr; + OutMemBuf : access llvm.LLVMMemoryBufferRef; + OutMessage : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + function LLVMCreateMemoryBufferWithSTDIN + (OutMemBuf : access llvm.LLVMMemoryBufferRef; + OutMessage : access Interfaces.C.Strings.chars_ptr) + return Interfaces.C.int; + + procedure LLVMDisposeMemoryBuffer (MemBuf : in llvm.LLVMMemoryBufferRef); + + function LLVMCreatePassManager return llvm.LLVMPassManagerRef; + + function LLVMCreateFunctionPassManager + (MP : in llvm.LLVMModuleProviderRef) + return llvm.LLVMPassManagerRef; + + function LLVMRunPassManager + (PM : in llvm.LLVMPassManagerRef; + M : in llvm.LLVMModuleRef) + return Interfaces.C.int; + + function LLVMInitializeFunctionPassManager + (FPM : in llvm.LLVMPassManagerRef) + return Interfaces.C.int; + + function LLVMRunFunctionPassManager + (FPM : in llvm.LLVMPassManagerRef; + F : in llvm.LLVMValueRef) + return Interfaces.C.int; + + function LLVMFinalizeFunctionPassManager + (FPM : in llvm.LLVMPassManagerRef) + return Interfaces.C.int; + + procedure LLVMDisposePassManager (PM : in llvm.LLVMPassManagerRef); + +private + + pragma Import (C, LLVMDisposeMessage, "Ada_LLVMDisposeMessage"); + pragma Import (C, LLVMContextCreate, "Ada_LLVMContextCreate"); + pragma Import (C, LLVMGetGlobalContext, "Ada_LLVMGetGlobalContext"); + pragma Import (C, LLVMContextDispose, "Ada_LLVMContextDispose"); + pragma Import + (C, + LLVMModuleCreateWithName, + "Ada_LLVMModuleCreateWithName"); + pragma Import + (C, + LLVMModuleCreateWithNameInContext, + "Ada_LLVMModuleCreateWithNameInContext"); + pragma Import (C, LLVMDisposeModule, "Ada_LLVMDisposeModule"); + pragma Import (C, LLVMGetDataLayout, "Ada_LLVMGetDataLayout"); + pragma Import (C, LLVMSetDataLayout, "Ada_LLVMSetDataLayout"); + pragma Import (C, LLVMGetTarget, "Ada_LLVMGetTarget"); + pragma Import (C, LLVMSetTarget, "Ada_LLVMSetTarget"); + pragma Import (C, LLVMAddTypeName, "Ada_LLVMAddTypeName"); + pragma Import (C, LLVMDeleteTypeName, "Ada_LLVMDeleteTypeName"); + pragma Import (C, LLVMGetTypeByName, "Ada_LLVMGetTypeByName"); + pragma Import (C, LLVMDumpModule, "Ada_LLVMDumpModule"); + pragma Import (C, LLVMGetTypeKind, "Ada_LLVMGetTypeKind"); + pragma Import (C, LLVMInt1Type, "Ada_LLVMInt1Type"); + pragma Import (C, LLVMInt8Type, "Ada_LLVMInt8Type"); + pragma Import (C, LLVMInt16Type, "Ada_LLVMInt16Type"); + pragma Import (C, LLVMInt32Type, "Ada_LLVMInt32Type"); + pragma Import (C, LLVMInt64Type, "Ada_LLVMInt64Type"); + pragma Import (C, LLVMIntType, "Ada_LLVMIntType"); + pragma Import (C, LLVMGetIntTypeWidth, "Ada_LLVMGetIntTypeWidth"); + pragma Import (C, LLVMFloatType, "Ada_LLVMFloatType"); + pragma Import (C, LLVMDoubleType, "Ada_LLVMDoubleType"); + pragma Import (C, LLVMX86FP80Type, "Ada_LLVMX86FP80Type"); + pragma Import (C, LLVMFP128Type, "Ada_LLVMFP128Type"); + pragma Import (C, LLVMPPCFP128Type, "Ada_LLVMPPCFP128Type"); + pragma Import (C, LLVMFunctionType, "Ada_LLVMFunctionType"); + pragma Import (C, LLVMIsFunctionVarArg, "Ada_LLVMIsFunctionVarArg"); + pragma Import (C, LLVMGetReturnType, "Ada_LLVMGetReturnType"); + pragma Import (C, LLVMCountParamTypes, "Ada_LLVMCountParamTypes"); + pragma Import (C, LLVMGetParamTypes, "Ada_LLVMGetParamTypes"); + pragma Import (C, LLVMStructType, "Ada_LLVMStructType"); + pragma Import + (C, + LLVMCountStructElementTypes, + "Ada_LLVMCountStructElementTypes"); + pragma Import + (C, + LLVMGetStructElementTypes, + "Ada_LLVMGetStructElementTypes"); + pragma Import (C, LLVMIsPackedStruct, "Ada_LLVMIsPackedStruct"); + pragma Import (C, LLVMArrayType, "Ada_LLVMArrayType"); + pragma Import (C, LLVMPointerType, "Ada_LLVMPointerType"); + pragma Import (C, LLVMVectorType, "Ada_LLVMVectorType"); + pragma Import (C, LLVMGetElementType, "Ada_LLVMGetElementType"); + pragma Import (C, LLVMGetArrayLength, "Ada_LLVMGetArrayLength"); + pragma Import + (C, + LLVMGetPointerAddressSpace, + "Ada_LLVMGetPointerAddressSpace"); + pragma Import (C, LLVMGetVectorSize, "Ada_LLVMGetVectorSize"); + pragma Import (C, LLVMVoidType, "Ada_LLVMVoidType"); + pragma Import (C, LLVMLabelType, "Ada_LLVMLabelType"); + pragma Import (C, LLVMOpaqueType, "Ada_LLVMOpaqueType"); + pragma Import (C, LLVMCreateTypeHandle, "Ada_LLVMCreateTypeHandle"); + pragma Import (C, LLVMRefineType, "Ada_LLVMRefineType"); + pragma Import (C, LLVMResolveTypeHandle, "Ada_LLVMResolveTypeHandle"); + pragma Import (C, LLVMDisposeTypeHandle, "Ada_LLVMDisposeTypeHandle"); + pragma Import (C, LLVMTypeOf, "Ada_LLVMTypeOf"); + pragma Import (C, LLVMGetValueName, "Ada_LLVMGetValueName"); + pragma Import (C, LLVMSetValueName, "Ada_LLVMSetValueName"); + pragma Import (C, LLVMDumpValue, "Ada_LLVMDumpValue"); + pragma Import (C, LLVMIsAArgument, "Ada_LLVMIsAArgument"); + pragma Import (C, LLVMIsABasicBlock, "Ada_LLVMIsABasicBlock"); + pragma Import (C, LLVMIsAInlineAsm, "Ada_LLVMIsAInlineAsm"); + pragma Import (C, LLVMIsAUser, "Ada_LLVMIsAUser"); + pragma Import (C, LLVMIsAConstant, "Ada_LLVMIsAConstant"); + pragma Import + (C, + LLVMIsAConstantAggregateZero, + "Ada_LLVMIsAConstantAggregateZero"); + pragma Import (C, LLVMIsAConstantArray, "Ada_LLVMIsAConstantArray"); + pragma Import (C, LLVMIsAConstantExpr, "Ada_LLVMIsAConstantExpr"); + pragma Import (C, LLVMIsAConstantFP, "Ada_LLVMIsAConstantFP"); + pragma Import (C, LLVMIsAConstantInt, "Ada_LLVMIsAConstantInt"); + pragma Import + (C, + LLVMIsAConstantPointerNull, + "Ada_LLVMIsAConstantPointerNull"); + pragma Import (C, LLVMIsAConstantStruct, "Ada_LLVMIsAConstantStruct"); + pragma Import (C, LLVMIsAConstantVector, "Ada_LLVMIsAConstantVector"); + pragma Import (C, LLVMIsAGlobalValue, "Ada_LLVMIsAGlobalValue"); + pragma Import (C, LLVMIsAFunction, "Ada_LLVMIsAFunction"); + pragma Import (C, LLVMIsAGlobalAlias, "Ada_LLVMIsAGlobalAlias"); + pragma Import (C, LLVMIsAGlobalVariable, "Ada_LLVMIsAGlobalVariable"); + pragma Import (C, LLVMIsAUndefValue, "Ada_LLVMIsAUndefValue"); + pragma Import (C, LLVMIsAInstruction, "Ada_LLVMIsAInstruction"); + pragma Import (C, LLVMIsABinaryOperator, "Ada_LLVMIsABinaryOperator"); + pragma Import (C, LLVMIsACallInst, "Ada_LLVMIsACallInst"); + pragma Import (C, LLVMIsAIntrinsicInst, "Ada_LLVMIsAIntrinsicInst"); + pragma Import (C, LLVMIsADbgInfoIntrinsic, "Ada_LLVMIsADbgInfoIntrinsic"); + pragma Import (C, LLVMIsADbgDeclareInst, "Ada_LLVMIsADbgDeclareInst"); + pragma Import (C, LLVMIsADbgFuncStartInst, "Ada_LLVMIsADbgFuncStartInst"); + pragma Import (C, LLVMIsADbgRegionEndInst, "Ada_LLVMIsADbgRegionEndInst"); + pragma Import + (C, + LLVMIsADbgRegionStartInst, + "Ada_LLVMIsADbgRegionStartInst"); + pragma Import (C, LLVMIsADbgStopPointInst, "Ada_LLVMIsADbgStopPointInst"); + pragma Import (C, LLVMIsAEHSelectorInst, "Ada_LLVMIsAEHSelectorInst"); + pragma Import (C, LLVMIsAMemIntrinsic, "Ada_LLVMIsAMemIntrinsic"); + pragma Import (C, LLVMIsAMemCpyInst, "Ada_LLVMIsAMemCpyInst"); + pragma Import (C, LLVMIsAMemMoveInst, "Ada_LLVMIsAMemMoveInst"); + pragma Import (C, LLVMIsAMemSetInst, "Ada_LLVMIsAMemSetInst"); + pragma Import (C, LLVMIsACmpInst, "Ada_LLVMIsACmpInst"); + pragma Import (C, LLVMIsAFCmpInst, "Ada_LLVMIsAFCmpInst"); + pragma Import (C, LLVMIsAICmpInst, "Ada_LLVMIsAICmpInst"); + pragma Import + (C, + LLVMIsAExtractElementInst, + "Ada_LLVMIsAExtractElementInst"); + pragma Import + (C, + LLVMIsAGetElementPtrInst, + "Ada_LLVMIsAGetElementPtrInst"); + pragma Import + (C, + LLVMIsAInsertElementInst, + "Ada_LLVMIsAInsertElementInst"); + pragma Import (C, LLVMIsAInsertValueInst, "Ada_LLVMIsAInsertValueInst"); + pragma Import (C, LLVMIsAPHINode, "Ada_LLVMIsAPHINode"); + pragma Import (C, LLVMIsASelectInst, "Ada_LLVMIsASelectInst"); + pragma Import + (C, + LLVMIsAShuffleVectorInst, + "Ada_LLVMIsAShuffleVectorInst"); + pragma Import (C, LLVMIsAStoreInst, "Ada_LLVMIsAStoreInst"); + pragma Import (C, LLVMIsATerminatorInst, "Ada_LLVMIsATerminatorInst"); + pragma Import (C, LLVMIsABranchInst, "Ada_LLVMIsABranchInst"); + pragma Import (C, LLVMIsAInvokeInst, "Ada_LLVMIsAInvokeInst"); + pragma Import (C, LLVMIsAReturnInst, "Ada_LLVMIsAReturnInst"); + pragma Import (C, LLVMIsASwitchInst, "Ada_LLVMIsASwitchInst"); + pragma Import (C, LLVMIsAUnreachableInst, "Ada_LLVMIsAUnreachableInst"); + pragma Import (C, LLVMIsAUnwindInst, "Ada_LLVMIsAUnwindInst"); + pragma Import (C, LLVMIsAUnaryInstruction, "Ada_LLVMIsAUnaryInstruction"); + pragma Import (C, LLVMIsAAllocationInst, "Ada_LLVMIsAAllocationInst"); + pragma Import (C, LLVMIsAAllocaInst, "Ada_LLVMIsAAllocaInst"); + pragma Import (C, LLVMIsAMallocInst, "Ada_LLVMIsAMallocInst"); + pragma Import (C, LLVMIsACastInst, "Ada_LLVMIsACastInst"); + pragma Import (C, LLVMIsABitCastInst, "Ada_LLVMIsABitCastInst"); + pragma Import (C, LLVMIsAFPExtInst, "Ada_LLVMIsAFPExtInst"); + pragma Import (C, LLVMIsAFPToSIInst, "Ada_LLVMIsAFPToSIInst"); + pragma Import (C, LLVMIsAFPToUIInst, "Ada_LLVMIsAFPToUIInst"); + pragma Import (C, LLVMIsAFPTruncInst, "Ada_LLVMIsAFPTruncInst"); + pragma Import (C, LLVMIsAIntToPtrInst, "Ada_LLVMIsAIntToPtrInst"); + pragma Import (C, LLVMIsAPtrToIntInst, "Ada_LLVMIsAPtrToIntInst"); + pragma Import (C, LLVMIsASExtInst, "Ada_LLVMIsASExtInst"); + pragma Import (C, LLVMIsASIToFPInst, "Ada_LLVMIsASIToFPInst"); + pragma Import (C, LLVMIsATruncInst, "Ada_LLVMIsATruncInst"); + pragma Import (C, LLVMIsAUIToFPInst, "Ada_LLVMIsAUIToFPInst"); + pragma Import (C, LLVMIsAZExtInst, "Ada_LLVMIsAZExtInst"); + pragma Import (C, LLVMIsAExtractValueInst, "Ada_LLVMIsAExtractValueInst"); + pragma Import (C, LLVMIsAFreeInst, "Ada_LLVMIsAFreeInst"); + pragma Import (C, LLVMIsALoadInst, "Ada_LLVMIsALoadInst"); + pragma Import (C, LLVMIsAVAArgInst, "Ada_LLVMIsAVAArgInst"); + pragma Import (C, LLVMConstNull, "Ada_LLVMConstNull"); + pragma Import (C, LLVMConstAllOnes, "Ada_LLVMConstAllOnes"); + pragma Import (C, LLVMGetUndef, "Ada_LLVMGetUndef"); + pragma Import (C, LLVMIsConstant, "Ada_LLVMIsConstant"); + pragma Import (C, LLVMIsNull, "Ada_LLVMIsNull"); + pragma Import (C, LLVMIsUndef, "Ada_LLVMIsUndef"); + pragma Import (C, LLVMConstPointerNull, "Ada_LLVMConstPointerNull"); + pragma Import (C, LLVMConstInt, "Ada_LLVMConstInt"); + pragma Import (C, LLVMConstReal, "Ada_LLVMConstReal"); + pragma Import (C, LLVMConstRealOfString, "Ada_LLVMConstRealOfString"); + pragma Import (C, LLVMConstString, "Ada_LLVMConstString"); + pragma Import (C, LLVMConstArray, "Ada_LLVMConstArray"); + pragma Import (C, LLVMConstStruct, "Ada_LLVMConstStruct"); + pragma Import (C, LLVMConstVector, "Ada_LLVMConstVector"); + pragma Import (C, LLVMSizeOf, "Ada_LLVMSizeOf"); + pragma Import (C, LLVMConstNeg, "Ada_LLVMConstNeg"); + pragma Import (C, LLVMConstNot, "Ada_LLVMConstNot"); + pragma Import (C, LLVMConstAdd, "Ada_LLVMConstAdd"); + pragma Import (C, LLVMConstSub, "Ada_LLVMConstSub"); + pragma Import (C, LLVMConstMul, "Ada_LLVMConstMul"); + pragma Import (C, LLVMConstUDiv, "Ada_LLVMConstUDiv"); + pragma Import (C, LLVMConstSDiv, "Ada_LLVMConstSDiv"); + pragma Import (C, LLVMConstFDiv, "Ada_LLVMConstFDiv"); + pragma Import (C, LLVMConstURem, "Ada_LLVMConstURem"); + pragma Import (C, LLVMConstSRem, "Ada_LLVMConstSRem"); + pragma Import (C, LLVMConstFRem, "Ada_LLVMConstFRem"); + pragma Import (C, LLVMConstAnd, "Ada_LLVMConstAnd"); + pragma Import (C, LLVMConstOr, "Ada_LLVMConstOr"); + pragma Import (C, LLVMConstXor, "Ada_LLVMConstXor"); + pragma Import (C, LLVMConstICmp, "Ada_LLVMConstICmp"); + pragma Import (C, LLVMConstFCmp, "Ada_LLVMConstFCmp"); + pragma Import (C, LLVMConstShl, "Ada_LLVMConstShl"); + pragma Import (C, LLVMConstLShr, "Ada_LLVMConstLShr"); + pragma Import (C, LLVMConstAShr, "Ada_LLVMConstAShr"); + pragma Import (C, LLVMConstGEP, "Ada_LLVMConstGEP"); + pragma Import (C, LLVMConstTrunc, "Ada_LLVMConstTrunc"); + pragma Import (C, LLVMConstSExt, "Ada_LLVMConstSExt"); + pragma Import (C, LLVMConstZExt, "Ada_LLVMConstZExt"); + pragma Import (C, LLVMConstFPTrunc, "Ada_LLVMConstFPTrunc"); + pragma Import (C, LLVMConstFPExt, "Ada_LLVMConstFPExt"); + pragma Import (C, LLVMConstUIToFP, "Ada_LLVMConstUIToFP"); + pragma Import (C, LLVMConstSIToFP, "Ada_LLVMConstSIToFP"); + pragma Import (C, LLVMConstFPToUI, "Ada_LLVMConstFPToUI"); + pragma Import (C, LLVMConstFPToSI, "Ada_LLVMConstFPToSI"); + pragma Import (C, LLVMConstPtrToInt, "Ada_LLVMConstPtrToInt"); + pragma Import (C, LLVMConstIntToPtr, "Ada_LLVMConstIntToPtr"); + pragma Import (C, LLVMConstBitCast, "Ada_LLVMConstBitCast"); + pragma Import (C, LLVMConstSelect, "Ada_LLVMConstSelect"); + pragma Import (C, LLVMConstExtractElement, "Ada_LLVMConstExtractElement"); + pragma Import (C, LLVMConstInsertElement, "Ada_LLVMConstInsertElement"); + pragma Import (C, LLVMConstShuffleVector, "Ada_LLVMConstShuffleVector"); + pragma Import (C, LLVMConstExtractValue, "Ada_LLVMConstExtractValue"); + pragma Import (C, LLVMConstInsertValue, "Ada_LLVMConstInsertValue"); + pragma Import (C, LLVMConstInlineAsm, "Ada_LLVMConstInlineAsm"); + pragma Import (C, LLVMGetGlobalParent, "Ada_LLVMGetGlobalParent"); + pragma Import (C, LLVMIsDeclaration, "Ada_LLVMIsDeclaration"); + pragma Import (C, LLVMGetLinkage, "Ada_LLVMGetLinkage"); + pragma Import (C, LLVMSetLinkage, "Ada_LLVMSetLinkage"); + pragma Import (C, LLVMGetSection, "Ada_LLVMGetSection"); + pragma Import (C, LLVMSetSection, "Ada_LLVMSetSection"); + pragma Import (C, LLVMGetVisibility, "Ada_LLVMGetVisibility"); + pragma Import (C, LLVMSetVisibility, "Ada_LLVMSetVisibility"); + pragma Import (C, LLVMGetAlignment, "Ada_LLVMGetAlignment"); + pragma Import (C, LLVMSetAlignment, "Ada_LLVMSetAlignment"); + pragma Import (C, LLVMAddGlobal, "Ada_LLVMAddGlobal"); + pragma Import (C, LLVMGetNamedGlobal, "Ada_LLVMGetNamedGlobal"); + pragma Import (C, LLVMGetFirstGlobal, "Ada_LLVMGetFirstGlobal"); + pragma Import (C, LLVMGetLastGlobal, "Ada_LLVMGetLastGlobal"); + pragma Import (C, LLVMGetNextGlobal, "Ada_LLVMGetNextGlobal"); + pragma Import (C, LLVMGetPreviousGlobal, "Ada_LLVMGetPreviousGlobal"); + pragma Import (C, LLVMDeleteGlobal, "Ada_LLVMDeleteGlobal"); + pragma Import (C, LLVMGetInitializer, "Ada_LLVMGetInitializer"); + pragma Import (C, LLVMSetInitializer, "Ada_LLVMSetInitializer"); + pragma Import (C, LLVMIsThreadLocal, "Ada_LLVMIsThreadLocal"); + pragma Import (C, LLVMSetThreadLocal, "Ada_LLVMSetThreadLocal"); + pragma Import (C, LLVMIsGlobalConstant, "Ada_LLVMIsGlobalConstant"); + pragma Import (C, LLVMSetGlobalConstant, "Ada_LLVMSetGlobalConstant"); + pragma Import (C, LLVMAddAlias, "Ada_LLVMAddAlias"); + pragma Import (C, LLVMAddFunction, "Ada_LLVMAddFunction"); + pragma Import (C, LLVMGetNamedFunction, "Ada_LLVMGetNamedFunction"); + pragma Import (C, LLVMGetFirstFunction, "Ada_LLVMGetFirstFunction"); + pragma Import (C, LLVMGetLastFunction, "Ada_LLVMGetLastFunction"); + pragma Import (C, LLVMGetNextFunction, "Ada_LLVMGetNextFunction"); + pragma Import (C, LLVMGetPreviousFunction, "Ada_LLVMGetPreviousFunction"); + pragma Import (C, LLVMDeleteFunction, "Ada_LLVMDeleteFunction"); + pragma Import (C, LLVMGetIntrinsicID, "Ada_LLVMGetIntrinsicID"); + pragma Import (C, LLVMGetFunctionCallConv, "Ada_LLVMGetFunctionCallConv"); + pragma Import (C, LLVMSetFunctionCallConv, "Ada_LLVMSetFunctionCallConv"); + pragma Import (C, LLVMGetGC, "Ada_LLVMGetGC"); + pragma Import (C, LLVMSetGC, "Ada_LLVMSetGC"); + pragma Import (C, LLVMAddFunctionAttr, "Ada_LLVMAddFunctionAttr"); + pragma Import (C, LLVMRemoveFunctionAttr, "Ada_LLVMRemoveFunctionAttr"); + pragma Import (C, LLVMCountParams, "Ada_LLVMCountParams"); + pragma Import (C, LLVMGetParams, "Ada_LLVMGetParams"); + pragma Import (C, LLVMGetParam, "Ada_LLVMGetParam"); + pragma Import (C, LLVMGetParamParent, "Ada_LLVMGetParamParent"); + pragma Import (C, LLVMGetFirstParam, "Ada_LLVMGetFirstParam"); + pragma Import (C, LLVMGetLastParam, "Ada_LLVMGetLastParam"); + pragma Import (C, LLVMGetNextParam, "Ada_LLVMGetNextParam"); + pragma Import (C, LLVMGetPreviousParam, "Ada_LLVMGetPreviousParam"); + pragma Import (C, LLVMAddAttribute, "Ada_LLVMAddAttribute"); + pragma Import (C, LLVMRemoveAttribute, "Ada_LLVMRemoveAttribute"); + pragma Import (C, LLVMSetParamAlignment, "Ada_LLVMSetParamAlignment"); + pragma Import (C, LLVMBasicBlockAsValue, "Ada_LLVMBasicBlockAsValue"); + pragma Import (C, LLVMValueIsBasicBlock, "Ada_LLVMValueIsBasicBlock"); + pragma Import (C, LLVMValueAsBasicBlock, "Ada_LLVMValueAsBasicBlock"); + pragma Import (C, LLVMGetBasicBlockParent, "Ada_LLVMGetBasicBlockParent"); + pragma Import (C, LLVMCountBasicBlocks, "Ada_LLVMCountBasicBlocks"); + pragma Import (C, LLVMGetBasicBlocks, "Ada_LLVMGetBasicBlocks"); + pragma Import (C, LLVMGetFirstBasicBlock, "Ada_LLVMGetFirstBasicBlock"); + pragma Import (C, LLVMGetLastBasicBlock, "Ada_LLVMGetLastBasicBlock"); + pragma Import (C, LLVMGetNextBasicBlock, "Ada_LLVMGetNextBasicBlock"); + pragma Import + (C, + LLVMGetPreviousBasicBlock, + "Ada_LLVMGetPreviousBasicBlock"); + pragma Import (C, LLVMGetEntryBasicBlock, "Ada_LLVMGetEntryBasicBlock"); + pragma Import (C, LLVMAppendBasicBlock, "Ada_LLVMAppendBasicBlock"); + pragma Import (C, LLVMInsertBasicBlock, "Ada_LLVMInsertBasicBlock"); + pragma Import (C, LLVMDeleteBasicBlock, "Ada_LLVMDeleteBasicBlock"); + pragma Import + (C, + LLVMGetInstructionParent, + "Ada_LLVMGetInstructionParent"); + pragma Import (C, LLVMGetFirstInstruction, "Ada_LLVMGetFirstInstruction"); + pragma Import (C, LLVMGetLastInstruction, "Ada_LLVMGetLastInstruction"); + pragma Import (C, LLVMGetNextInstruction, "Ada_LLVMGetNextInstruction"); + pragma Import + (C, + LLVMGetPreviousInstruction, + "Ada_LLVMGetPreviousInstruction"); + pragma Import + (C, + LLVMSetInstructionCallConv, + "Ada_LLVMSetInstructionCallConv"); + pragma Import + (C, + LLVMGetInstructionCallConv, + "Ada_LLVMGetInstructionCallConv"); + pragma Import (C, LLVMAddInstrAttribute, "Ada_LLVMAddInstrAttribute"); + pragma Import + (C, + LLVMRemoveInstrAttribute, + "Ada_LLVMRemoveInstrAttribute"); + pragma Import + (C, + LLVMSetInstrParamAlignment, + "Ada_LLVMSetInstrParamAlignment"); + pragma Import (C, LLVMIsTailCall, "Ada_LLVMIsTailCall"); + pragma Import (C, LLVMSetTailCall, "Ada_LLVMSetTailCall"); + pragma Import (C, LLVMAddIncoming, "Ada_LLVMAddIncoming"); + pragma Import (C, LLVMCountIncoming, "Ada_LLVMCountIncoming"); + pragma Import (C, LLVMGetIncomingValue, "Ada_LLVMGetIncomingValue"); + pragma Import (C, LLVMGetIncomingBlock, "Ada_LLVMGetIncomingBlock"); + pragma Import (C, LLVMCreateBuilder, "Ada_LLVMCreateBuilder"); + pragma Import (C, LLVMPositionBuilder, "Ada_LLVMPositionBuilder"); + pragma Import + (C, + LLVMPositionBuilderBefore, + "Ada_LLVMPositionBuilderBefore"); + pragma Import + (C, + LLVMPositionBuilderAtEnd, + "Ada_LLVMPositionBuilderAtEnd"); + pragma Import (C, LLVMGetInsertBlock, "Ada_LLVMGetInsertBlock"); + pragma Import + (C, + LLVMClearInsertionPosition, + "Ada_LLVMClearInsertionPosition"); + pragma Import (C, LLVMInsertIntoBuilder, "Ada_LLVMInsertIntoBuilder"); + pragma Import (C, LLVMDisposeBuilder, "Ada_LLVMDisposeBuilder"); + pragma Import (C, LLVMBuildRetVoid, "Ada_LLVMBuildRetVoid"); + pragma Import (C, LLVMBuildRet, "Ada_LLVMBuildRet"); + pragma Import (C, LLVMBuildBr, "Ada_LLVMBuildBr"); + pragma Import (C, LLVMBuildCondBr, "Ada_LLVMBuildCondBr"); + pragma Import (C, LLVMBuildSwitch, "Ada_LLVMBuildSwitch"); + pragma Import (C, LLVMBuildInvoke, "Ada_LLVMBuildInvoke"); + pragma Import (C, LLVMBuildUnwind, "Ada_LLVMBuildUnwind"); + pragma Import (C, LLVMBuildUnreachable, "Ada_LLVMBuildUnreachable"); + pragma Import (C, LLVMAddCase, "Ada_LLVMAddCase"); + pragma Import (C, LLVMBuildAdd, "Ada_LLVMBuildAdd"); + pragma Import (C, LLVMBuildSub, "Ada_LLVMBuildSub"); + pragma Import (C, LLVMBuildMul, "Ada_LLVMBuildMul"); + pragma Import (C, LLVMBuildUDiv, "Ada_LLVMBuildUDiv"); + pragma Import (C, LLVMBuildSDiv, "Ada_LLVMBuildSDiv"); + pragma Import (C, LLVMBuildFDiv, "Ada_LLVMBuildFDiv"); + pragma Import (C, LLVMBuildURem, "Ada_LLVMBuildURem"); + pragma Import (C, LLVMBuildSRem, "Ada_LLVMBuildSRem"); + pragma Import (C, LLVMBuildFRem, "Ada_LLVMBuildFRem"); + pragma Import (C, LLVMBuildShl, "Ada_LLVMBuildShl"); + pragma Import (C, LLVMBuildLShr, "Ada_LLVMBuildLShr"); + pragma Import (C, LLVMBuildAShr, "Ada_LLVMBuildAShr"); + pragma Import (C, LLVMBuildAnd, "Ada_LLVMBuildAnd"); + pragma Import (C, LLVMBuildOr, "Ada_LLVMBuildOr"); + pragma Import (C, LLVMBuildXor, "Ada_LLVMBuildXor"); + pragma Import (C, LLVMBuildNeg, "Ada_LLVMBuildNeg"); + pragma Import (C, LLVMBuildNot, "Ada_LLVMBuildNot"); + pragma Import (C, LLVMBuildMalloc, "Ada_LLVMBuildMalloc"); + pragma Import (C, LLVMBuildArrayMalloc, "Ada_LLVMBuildArrayMalloc"); + pragma Import (C, LLVMBuildAlloca, "Ada_LLVMBuildAlloca"); + pragma Import (C, LLVMBuildArrayAlloca, "Ada_LLVMBuildArrayAlloca"); + pragma Import (C, LLVMBuildFree, "Ada_LLVMBuildFree"); + pragma Import (C, LLVMBuildLoad, "Ada_LLVMBuildLoad"); + pragma Import (C, LLVMBuildStore, "Ada_LLVMBuildStore"); + pragma Import (C, LLVMBuildGEP, "Ada_LLVMBuildGEP"); + pragma Import (C, LLVMBuildTrunc, "Ada_LLVMBuildTrunc"); + pragma Import (C, LLVMBuildZExt, "Ada_LLVMBuildZExt"); + pragma Import (C, LLVMBuildSExt, "Ada_LLVMBuildSExt"); + pragma Import (C, LLVMBuildFPToUI, "Ada_LLVMBuildFPToUI"); + pragma Import (C, LLVMBuildFPToSI, "Ada_LLVMBuildFPToSI"); + pragma Import (C, LLVMBuildUIToFP, "Ada_LLVMBuildUIToFP"); + pragma Import (C, LLVMBuildSIToFP, "Ada_LLVMBuildSIToFP"); + pragma Import (C, LLVMBuildFPTrunc, "Ada_LLVMBuildFPTrunc"); + pragma Import (C, LLVMBuildFPExt, "Ada_LLVMBuildFPExt"); + pragma Import (C, LLVMBuildPtrToInt, "Ada_LLVMBuildPtrToInt"); + pragma Import (C, LLVMBuildIntToPtr, "Ada_LLVMBuildIntToPtr"); + pragma Import (C, LLVMBuildBitCast, "Ada_LLVMBuildBitCast"); + pragma Import (C, LLVMBuildICmp, "Ada_LLVMBuildICmp"); + pragma Import (C, LLVMBuildFCmp, "Ada_LLVMBuildFCmp"); + pragma Import (C, LLVMBuildPhi, "Ada_LLVMBuildPhi"); + pragma Import (C, LLVMBuildCall, "Ada_LLVMBuildCall"); + pragma Import (C, LLVMBuildSelect, "Ada_LLVMBuildSelect"); + pragma Import (C, LLVMBuildVAArg, "Ada_LLVMBuildVAArg"); + pragma Import (C, LLVMBuildExtractElement, "Ada_LLVMBuildExtractElement"); + pragma Import (C, LLVMBuildInsertElement, "Ada_LLVMBuildInsertElement"); + pragma Import (C, LLVMBuildShuffleVector, "Ada_LLVMBuildShuffleVector"); + pragma Import (C, LLVMBuildExtractValue, "Ada_LLVMBuildExtractValue"); + pragma Import (C, LLVMBuildInsertValue, "Ada_LLVMBuildInsertValue"); + pragma Import + (C, + LLVMCreateModuleProviderForExistingModule, + "Ada_LLVMCreateModuleProviderForExistingModule"); + pragma Import + (C, + LLVMDisposeModuleProvider, + "Ada_LLVMDisposeModuleProvider"); + pragma Import + (C, + LLVMCreateMemoryBufferWithContentsOfFile, + "Ada_LLVMCreateMemoryBufferWithContentsOfFile"); + pragma Import + (C, + LLVMCreateMemoryBufferWithSTDIN, + "Ada_LLVMCreateMemoryBufferWithSTDIN"); + pragma Import (C, LLVMDisposeMemoryBuffer, "Ada_LLVMDisposeMemoryBuffer"); + pragma Import (C, LLVMCreatePassManager, "Ada_LLVMCreatePassManager"); + pragma Import + (C, + LLVMCreateFunctionPassManager, + "Ada_LLVMCreateFunctionPassManager"); + pragma Import (C, LLVMRunPassManager, "Ada_LLVMRunPassManager"); + pragma Import + (C, + LLVMInitializeFunctionPassManager, + "Ada_LLVMInitializeFunctionPassManager"); + pragma Import + (C, + LLVMRunFunctionPassManager, + "Ada_LLVMRunFunctionPassManager"); + pragma Import + (C, + LLVMFinalizeFunctionPassManager, + "Ada_LLVMFinalizeFunctionPassManager"); + pragma Import (C, LLVMDisposePassManager, "Ada_LLVMDisposePassManager"); + +end llvm.Binding; diff --git a/bindings/ada/llvm/llvm.ads b/bindings/ada/llvm/llvm.ads new file mode 100644 index 0000000000000..d9820f1149104 --- /dev/null +++ b/bindings/ada/llvm/llvm.ads @@ -0,0 +1,493 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with Interfaces.C.Extensions; + + +package llvm is + + -- LLVMCtxt + -- + type LLVMCtxt is new Interfaces.C.Extensions.opaque_structure_def; + + type LLVMCtxt_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMCtxt; + + type LLVMCtxt_view is access all llvm.LLVMCtxt; + + -- LLVMContextRef + -- + type LLVMContextRef is access all llvm.LLVMCtxt; + + type LLVMContextRef_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMContextRef; + + type LLVMContextRef_view is access all llvm.LLVMContextRef; + + -- LLVMOpaqueModule + -- + type LLVMOpaqueModule is new Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueModule_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueModule; + + type LLVMOpaqueModule_view is access all llvm.LLVMOpaqueModule; + + -- LLVMModuleRef + -- + type LLVMModuleRef is access all llvm.LLVMOpaqueModule; + + type LLVMModuleRef_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMModuleRef; + + type LLVMModuleRef_view is access all llvm.LLVMModuleRef; + + -- LLVMOpaqueType + -- + type LLVMOpaqueType is new Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueType_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueType; + + type LLVMOpaqueType_view is access all llvm.LLVMOpaqueType; + + -- LLVMTypeRef + -- + type LLVMTypeRef is access all llvm.LLVMOpaqueType; + + type LLVMTypeRef_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMTypeRef; + + type LLVMTypeRef_view is access all llvm.LLVMTypeRef; + + -- LLVMOpaqueTypeHandle + -- + type LLVMOpaqueTypeHandle is new + Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueTypeHandle_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMOpaqueTypeHandle; + + type LLVMOpaqueTypeHandle_view is access all llvm.LLVMOpaqueTypeHandle; + + -- LLVMTypeHandleRef + -- + type LLVMTypeHandleRef is access all llvm.LLVMOpaqueTypeHandle; + + type LLVMTypeHandleRef_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMTypeHandleRef; + + type LLVMTypeHandleRef_view is access all llvm.LLVMTypeHandleRef; + + -- LLVMOpaqueValue + -- + type LLVMOpaqueValue is new Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueValue_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMOpaqueValue; + + type LLVMOpaqueValue_view is access all llvm.LLVMOpaqueValue; + + -- LLVMValueRef + -- + type LLVMValueRef is access all llvm.LLVMOpaqueValue; + + type LLVMValueRef_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMValueRef; + + type LLVMValueRef_view is access all llvm.LLVMValueRef; + + -- LLVMOpaqueBasicBlock + -- + type LLVMOpaqueBasicBlock is new + Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueBasicBlock_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMOpaqueBasicBlock; + + type LLVMOpaqueBasicBlock_view is access all llvm.LLVMOpaqueBasicBlock; + + -- LLVMBasicBlockRef + -- + type LLVMBasicBlockRef is access all llvm.LLVMOpaqueBasicBlock; + + type LLVMBasicBlockRef_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMBasicBlockRef; + + type LLVMBasicBlockRef_view is access all llvm.LLVMBasicBlockRef; + + -- LLVMOpaqueBuilder + -- + type LLVMOpaqueBuilder is new Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueBuilder_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMOpaqueBuilder; + + type LLVMOpaqueBuilder_view is access all llvm.LLVMOpaqueBuilder; + + -- LLVMBuilderRef + -- + type LLVMBuilderRef is access all llvm.LLVMOpaqueBuilder; + + type LLVMBuilderRef_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMBuilderRef; + + type LLVMBuilderRef_view is access all llvm.LLVMBuilderRef; + + -- LLVMOpaqueModuleProvider + -- + type LLVMOpaqueModuleProvider is new + Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueModuleProvider_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMOpaqueModuleProvider; + + type LLVMOpaqueModuleProvider_view is access all + llvm.LLVMOpaqueModuleProvider; + + -- LLVMModuleProviderRef + -- + type LLVMModuleProviderRef is access all llvm.LLVMOpaqueModuleProvider; + + type LLVMModuleProviderRef_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMModuleProviderRef; + + type LLVMModuleProviderRef_view is access all llvm.LLVMModuleProviderRef; + + -- LLVMOpaqueMemoryBuffer + -- + type LLVMOpaqueMemoryBuffer is new + Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueMemoryBuffer_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMOpaqueMemoryBuffer; + + type LLVMOpaqueMemoryBuffer_view is access all llvm.LLVMOpaqueMemoryBuffer; + + -- LLVMMemoryBufferRef + -- + type LLVMMemoryBufferRef is access all llvm.LLVMOpaqueMemoryBuffer; + + type LLVMMemoryBufferRef_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMMemoryBufferRef; + + type LLVMMemoryBufferRef_view is access all llvm.LLVMMemoryBufferRef; + + -- LLVMOpaquePassManager + -- + type LLVMOpaquePassManager is new + Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaquePassManager_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMOpaquePassManager; + + type LLVMOpaquePassManager_view is access all llvm.LLVMOpaquePassManager; + + -- LLVMPassManagerRef + -- + type LLVMPassManagerRef is access all llvm.LLVMOpaquePassManager; + + type LLVMPassManagerRef_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMPassManagerRef; + + type LLVMPassManagerRef_view is access all llvm.LLVMPassManagerRef; + + -- LLVMAttribute + -- + type LLVMAttribute is ( + LLVMZExtAttribute, + LLVMSExtAttribute, + LLVMNoReturnAttribute, + LLVMInRegAttribute, + LLVMStructRetAttribute, + LLVMNoUnwindAttribute, + LLVMNoAliasAttribute, + LLVMByValAttribute, + LLVMNestAttribute, + LLVMReadNoneAttribute, + LLVMReadOnlyAttribute, + LLVMNoInlineAttribute, + LLVMAlwaysInlineAttribute, + LLVMOptimizeForSizeAttribute, + LLVMStackProtectAttribute, + LLVMStackProtectReqAttribute, + LLVMNoCaptureAttribute, + LLVMNoRedZoneAttribute, + LLVMNoImplicitFloatAttribute, + LLVMNakedAttribute); + + for LLVMAttribute use + (LLVMZExtAttribute => 1, + LLVMSExtAttribute => 2, + LLVMNoReturnAttribute => 4, + LLVMInRegAttribute => 8, + LLVMStructRetAttribute => 16, + LLVMNoUnwindAttribute => 32, + LLVMNoAliasAttribute => 64, + LLVMByValAttribute => 128, + LLVMNestAttribute => 256, + LLVMReadNoneAttribute => 512, + LLVMReadOnlyAttribute => 1024, + LLVMNoInlineAttribute => 2048, + LLVMAlwaysInlineAttribute => 4096, + LLVMOptimizeForSizeAttribute => 8192, + LLVMStackProtectAttribute => 16384, + LLVMStackProtectReqAttribute => 32768, + LLVMNoCaptureAttribute => 2097152, + LLVMNoRedZoneAttribute => 4194304, + LLVMNoImplicitFloatAttribute => 8388608, + LLVMNakedAttribute => 16777216); + + pragma Convention (C, LLVMAttribute); + + type LLVMAttribute_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMAttribute; + + type LLVMAttribute_view is access all llvm.LLVMAttribute; + + -- LLVMTypeKind + -- + type LLVMTypeKind is ( + LLVMVoidTypeKind, + LLVMFloatTypeKind, + LLVMDoubleTypeKind, + LLVMX86_FP80TypeKind, + LLVMFP128TypeKind, + LLVMPPC_FP128TypeKind, + LLVMLabelTypeKind, + LLVMIntegerTypeKind, + LLVMFunctionTypeKind, + LLVMStructTypeKind, + LLVMArrayTypeKind, + LLVMPointerTypeKind, + LLVMOpaqueTypeKind, + LLVMVectorTypeKind, + LLVMMetadataTypeKind); + + for LLVMTypeKind use + (LLVMVoidTypeKind => 0, + LLVMFloatTypeKind => 1, + LLVMDoubleTypeKind => 2, + LLVMX86_FP80TypeKind => 3, + LLVMFP128TypeKind => 4, + LLVMPPC_FP128TypeKind => 5, + LLVMLabelTypeKind => 6, + LLVMIntegerTypeKind => 7, + LLVMFunctionTypeKind => 8, + LLVMStructTypeKind => 9, + LLVMArrayTypeKind => 10, + LLVMPointerTypeKind => 11, + LLVMOpaqueTypeKind => 12, + LLVMVectorTypeKind => 13, + LLVMMetadataTypeKind => 14); + + pragma Convention (C, LLVMTypeKind); + + type LLVMTypeKind_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMTypeKind; + + type LLVMTypeKind_view is access all llvm.LLVMTypeKind; + + -- LLVMLinkage + -- + type LLVMLinkage is ( + LLVMExternalLinkage, + LLVMAvailableExternallyLinkage, + LLVMLinkOnceAnyLinkage, + LLVMLinkOnceODRLinkage, + LLVMWeakAnyLinkage, + LLVMWeakODRLinkage, + LLVMAppendingLinkage, + LLVMInternalLinkage, + LLVMPrivateLinkage, + LLVMDLLImportLinkage, + LLVMDLLExportLinkage, + LLVMExternalWeakLinkage, + LLVMGhostLinkage, + LLVMCommonLinkage, + LLVMLinkerPrivateLinkage); + + for LLVMLinkage use + (LLVMExternalLinkage => 0, + LLVMAvailableExternallyLinkage => 1, + LLVMLinkOnceAnyLinkage => 2, + LLVMLinkOnceODRLinkage => 3, + LLVMWeakAnyLinkage => 4, + LLVMWeakODRLinkage => 5, + LLVMAppendingLinkage => 6, + LLVMInternalLinkage => 7, + LLVMPrivateLinkage => 8, + LLVMDLLImportLinkage => 9, + LLVMDLLExportLinkage => 10, + LLVMExternalWeakLinkage => 11, + LLVMGhostLinkage => 12, + LLVMCommonLinkage => 13, + LLVMLinkerPrivateLinkage => 14); + + pragma Convention (C, LLVMLinkage); + + type LLVMLinkage_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMLinkage; + + type LLVMLinkage_view is access all llvm.LLVMLinkage; + + -- LLVMVisibility + -- + type LLVMVisibility is ( + LLVMDefaultVisibility, + LLVMHiddenVisibility, + LLVMProtectedVisibility); + + for LLVMVisibility use + (LLVMDefaultVisibility => 0, + LLVMHiddenVisibility => 1, + LLVMProtectedVisibility => 2); + + pragma Convention (C, LLVMVisibility); + + type LLVMVisibility_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMVisibility; + + type LLVMVisibility_view is access all llvm.LLVMVisibility; + + -- LLVMCallConv + -- + type LLVMCallConv is ( + LLVMCCallConv, + LLVMFastCallConv, + LLVMColdCallConv, + LLVMX86StdcallCallConv, + LLVMX86FastcallCallConv); + + for LLVMCallConv use + (LLVMCCallConv => 0, + LLVMFastCallConv => 8, + LLVMColdCallConv => 9, + LLVMX86StdcallCallConv => 64, + LLVMX86FastcallCallConv => 65); + + pragma Convention (C, LLVMCallConv); + + type LLVMCallConv_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMCallConv; + + type LLVMCallConv_view is access all llvm.LLVMCallConv; + + -- LLVMIntPredicate + -- + type LLVMIntPredicate is ( + LLVMIntEQ, + LLVMIntNE, + LLVMIntUGT, + LLVMIntUGE, + LLVMIntULT, + LLVMIntULE, + LLVMIntSGT, + LLVMIntSGE, + LLVMIntSLT, + LLVMIntSLE); + + for LLVMIntPredicate use + (LLVMIntEQ => 32, + LLVMIntNE => 33, + LLVMIntUGT => 34, + LLVMIntUGE => 35, + LLVMIntULT => 36, + LLVMIntULE => 37, + LLVMIntSGT => 38, + LLVMIntSGE => 39, + LLVMIntSLT => 40, + LLVMIntSLE => 41); + + pragma Convention (C, LLVMIntPredicate); + + type LLVMIntPredicate_array is + array (Interfaces.C.size_t range <>) of aliased llvm.LLVMIntPredicate; + + type LLVMIntPredicate_view is access all llvm.LLVMIntPredicate; + + -- LLVMRealPredicate + -- + type LLVMRealPredicate is ( + LLVMRealPredicateFalse, + LLVMRealOEQ, + LLVMRealOGT, + LLVMRealOGE, + LLVMRealOLT, + LLVMRealOLE, + LLVMRealONE, + LLVMRealORD, + LLVMRealUNO, + LLVMRealUEQ, + LLVMRealUGT, + LLVMRealUGE, + LLVMRealULT, + LLVMRealULE, + LLVMRealUNE, + LLVMRealPredicateTrue); + + for LLVMRealPredicate use + (LLVMRealPredicateFalse => 0, + LLVMRealOEQ => 1, + LLVMRealOGT => 2, + LLVMRealOGE => 3, + LLVMRealOLT => 4, + LLVMRealOLE => 5, + LLVMRealONE => 6, + LLVMRealORD => 7, + LLVMRealUNO => 8, + LLVMRealUEQ => 9, + LLVMRealUGT => 10, + LLVMRealUGE => 11, + LLVMRealULT => 12, + LLVMRealULE => 13, + LLVMRealUNE => 14, + LLVMRealPredicateTrue => 15); + + pragma Convention (C, LLVMRealPredicate); + + type LLVMRealPredicate_array is + array (Interfaces.C.size_t range <>) + of aliased llvm.LLVMRealPredicate; + + type LLVMRealPredicate_view is access all llvm.LLVMRealPredicate; + + -- ModuleProvider + -- + type ModuleProvider is new Interfaces.C.Extensions.incomplete_class_def; + + type ModuleProvider_array is + array (Interfaces.C.size_t range <>) of aliased llvm.ModuleProvider; + + type ModuleProvider_view is access all llvm.ModuleProvider; + + -- MemoryBuffer + -- + type MemoryBuffer is new Interfaces.C.Extensions.incomplete_class_def; + + type MemoryBuffer_array is + array (Interfaces.C.size_t range <>) of aliased llvm.MemoryBuffer; + + type MemoryBuffer_view is access all llvm.MemoryBuffer; + + -- PassManagerBase + -- + type PassManagerBase is new Interfaces.C.Extensions.incomplete_class_def; + + type PassManagerBase_array is + array (Interfaces.C.size_t range <>) of aliased llvm.PassManagerBase; + + type PassManagerBase_view is access all llvm.PassManagerBase; + +end llvm; diff --git a/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads b/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads new file mode 100644 index 0000000000000..7c0b086b4282e --- /dev/null +++ b/bindings/ada/llvm/llvm_link_time_optimizer-binding.ads @@ -0,0 +1,207 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with Interfaces.C.Strings; + + +package LLVM_link_time_Optimizer.Binding is + + LTO_H : constant := 1; + LTO_API_VERSION : constant := 3; + + function lto_get_version return Interfaces.C.Strings.chars_ptr; + + function lto_get_error_message return Interfaces.C.Strings.chars_ptr; + + function lto_module_is_object_file + (path : in Interfaces.C.Strings.chars_ptr) + return Interfaces.C.Extensions.bool; + + function lto_module_is_object_file_for_target + (path : in Interfaces.C.Strings.chars_ptr; + target_triple_prefix : in Interfaces.C.Strings.chars_ptr) + return Interfaces.C.Extensions.bool; + + function lto_module_is_object_file_in_memory + (mem : access Interfaces.C.Extensions.void; + length : in Interfaces.C.size_t) + return Interfaces.C.Extensions.bool; + + function lto_module_is_object_file_in_memory_for_target + (mem : access Interfaces.C.Extensions.void; + length : in Interfaces.C.size_t; + target_triple_prefix : in Interfaces.C.Strings.chars_ptr) + return Interfaces.C.Extensions.bool; + + function lto_module_create + (path : in Interfaces.C.Strings.chars_ptr) + return LLVM_link_time_Optimizer.lto_module_t; + + function lto_module_create_from_memory + (mem : access Interfaces.C.Extensions.void; + length : in Interfaces.C.size_t) + return LLVM_link_time_Optimizer.lto_module_t; + + procedure lto_module_dispose + (the_mod : in LLVM_link_time_Optimizer.lto_module_t); + + function lto_module_get_target_triple + (the_mod : in LLVM_link_time_Optimizer.lto_module_t) + return Interfaces.C.Strings.chars_ptr; + + function lto_module_get_num_symbols + (the_mod : in LLVM_link_time_Optimizer.lto_module_t) + return Interfaces.C.unsigned; + + function lto_module_get_symbol_name + (the_mod : in LLVM_link_time_Optimizer.lto_module_t; + index : in Interfaces.C.unsigned) + return Interfaces.C.Strings.chars_ptr; + + function lto_module_get_symbol_attribute + (the_mod : in LLVM_link_time_Optimizer.lto_module_t; + index : in Interfaces.C.unsigned) + return LLVM_link_time_Optimizer.lto_symbol_attributes; + + function lto_codegen_create return LLVM_link_time_Optimizer.lto_code_gen_t; + + procedure lto_codegen_dispose + (arg_1 : in LLVM_link_time_Optimizer.lto_code_gen_t); + + function lto_codegen_add_module + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + the_mod : in LLVM_link_time_Optimizer.lto_module_t) + return Interfaces.C.Extensions.bool; + + function lto_codegen_set_debug_model + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + arg_1 : in LLVM_link_time_Optimizer.lto_debug_model) + return Interfaces.C.Extensions.bool; + + function lto_codegen_set_pic_model + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + arg_1 : in LLVM_link_time_Optimizer.lto_codegen_model) + return Interfaces.C.Extensions.bool; + + procedure lto_codegen_set_gcc_path + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + path : in Interfaces.C.Strings.chars_ptr); + + procedure lto_codegen_set_assembler_path + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + path : in Interfaces.C.Strings.chars_ptr); + + procedure lto_codegen_add_must_preserve_symbol + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + symbol : in Interfaces.C.Strings.chars_ptr); + + function lto_codegen_write_merged_modules + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + path : in Interfaces.C.Strings.chars_ptr) + return Interfaces.C.Extensions.bool; + + function lto_codegen_compile + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + length : access Interfaces.C.size_t) + return access Interfaces.C.Extensions.void; + + procedure lto_codegen_debug_options + (cg : in LLVM_link_time_Optimizer.lto_code_gen_t; + arg_1 : in Interfaces.C.Strings.chars_ptr); + + function llvm_create_optimizer return + LLVM_link_time_Optimizer.llvm_lto_t; + + procedure llvm_destroy_optimizer + (lto : in LLVM_link_time_Optimizer.llvm_lto_t); + + function llvm_read_object_file + (lto : in LLVM_link_time_Optimizer.llvm_lto_t; + input_filename : in Interfaces.C.Strings.chars_ptr) + return LLVM_link_time_Optimizer.llvm_lto_status_t; + + function llvm_optimize_modules + (lto : in LLVM_link_time_Optimizer.llvm_lto_t; + output_filename : in Interfaces.C.Strings.chars_ptr) + return LLVM_link_time_Optimizer.llvm_lto_status_t; + +private + + pragma Import (C, lto_get_version, "Ada_lto_get_version"); + pragma Import (C, lto_get_error_message, "Ada_lto_get_error_message"); + pragma Import + (C, + lto_module_is_object_file, + "Ada_lto_module_is_object_file"); + pragma Import + (C, + lto_module_is_object_file_for_target, + "Ada_lto_module_is_object_file_for_target"); + pragma Import + (C, + lto_module_is_object_file_in_memory, + "Ada_lto_module_is_object_file_in_memory"); + pragma Import + (C, + lto_module_is_object_file_in_memory_for_target, + "Ada_lto_module_is_object_file_in_memory_for_target"); + pragma Import (C, lto_module_create, "Ada_lto_module_create"); + pragma Import + (C, + lto_module_create_from_memory, + "Ada_lto_module_create_from_memory"); + pragma Import (C, lto_module_dispose, "Ada_lto_module_dispose"); + pragma Import + (C, + lto_module_get_target_triple, + "Ada_lto_module_get_target_triple"); + pragma Import + (C, + lto_module_get_num_symbols, + "Ada_lto_module_get_num_symbols"); + pragma Import + (C, + lto_module_get_symbol_name, + "Ada_lto_module_get_symbol_name"); + pragma Import + (C, + lto_module_get_symbol_attribute, + "Ada_lto_module_get_symbol_attribute"); + pragma Import (C, lto_codegen_create, "Ada_lto_codegen_create"); + pragma Import (C, lto_codegen_dispose, "Ada_lto_codegen_dispose"); + pragma Import (C, lto_codegen_add_module, "Ada_lto_codegen_add_module"); + pragma Import + (C, + lto_codegen_set_debug_model, + "Ada_lto_codegen_set_debug_model"); + pragma Import + (C, + lto_codegen_set_pic_model, + "Ada_lto_codegen_set_pic_model"); + pragma Import + (C, + lto_codegen_set_gcc_path, + "Ada_lto_codegen_set_gcc_path"); + pragma Import + (C, + lto_codegen_set_assembler_path, + "Ada_lto_codegen_set_assembler_path"); + pragma Import + (C, + lto_codegen_add_must_preserve_symbol, + "Ada_lto_codegen_add_must_preserve_symbol"); + pragma Import + (C, + lto_codegen_write_merged_modules, + "Ada_lto_codegen_write_merged_modules"); + pragma Import (C, lto_codegen_compile, "Ada_lto_codegen_compile"); + pragma Import + (C, + lto_codegen_debug_options, + "Ada_lto_codegen_debug_options"); + pragma Import (C, llvm_create_optimizer, "Ada_llvm_create_optimizer"); + pragma Import (C, llvm_destroy_optimizer, "Ada_llvm_destroy_optimizer"); + pragma Import (C, llvm_read_object_file, "Ada_llvm_read_object_file"); + pragma Import (C, llvm_optimize_modules, "Ada_llvm_optimize_modules"); + +end LLVM_link_time_Optimizer.Binding; diff --git a/bindings/ada/llvm/llvm_link_time_optimizer.ads b/bindings/ada/llvm/llvm_link_time_optimizer.ads new file mode 100644 index 0000000000000..c27f7c5893b61 --- /dev/null +++ b/bindings/ada/llvm/llvm_link_time_optimizer.ads @@ -0,0 +1,184 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with Interfaces.C.Extensions; + + +package LLVM_link_time_Optimizer is + + -- lto_symbol_attributes + -- + type lto_symbol_attributes is ( + LTO_SYMBOL_ALIGNMENT_MASK, + LTO_SYMBOL_PERMISSIONS_RODATA, + LTO_SYMBOL_PERMISSIONS_CODE, + LTO_SYMBOL_PERMISSIONS_DATA, + LTO_SYMBOL_PERMISSIONS_MASK, + LTO_SYMBOL_DEFINITION_REGULAR, + LTO_SYMBOL_DEFINITION_TENTATIVE, + LTO_SYMBOL_DEFINITION_WEAK, + LTO_SYMBOL_DEFINITION_UNDEFINED, + LTO_SYMBOL_DEFINITION_WEAKUNDEF, + LTO_SYMBOL_DEFINITION_MASK, + LTO_SYMBOL_SCOPE_INTERNAL, + LTO_SYMBOL_SCOPE_HIDDEN, + LTO_SYMBOL_SCOPE_DEFAULT, + LTO_SYMBOL_SCOPE_PROTECTED, + LTO_SYMBOL_SCOPE_MASK); + + for lto_symbol_attributes use + (LTO_SYMBOL_ALIGNMENT_MASK => 31, + LTO_SYMBOL_PERMISSIONS_RODATA => 128, + LTO_SYMBOL_PERMISSIONS_CODE => 160, + LTO_SYMBOL_PERMISSIONS_DATA => 192, + LTO_SYMBOL_PERMISSIONS_MASK => 224, + LTO_SYMBOL_DEFINITION_REGULAR => 256, + LTO_SYMBOL_DEFINITION_TENTATIVE => 512, + LTO_SYMBOL_DEFINITION_WEAK => 768, + LTO_SYMBOL_DEFINITION_UNDEFINED => 1024, + LTO_SYMBOL_DEFINITION_WEAKUNDEF => 1280, + LTO_SYMBOL_DEFINITION_MASK => 1792, + LTO_SYMBOL_SCOPE_INTERNAL => 2048, + LTO_SYMBOL_SCOPE_HIDDEN => 4096, + LTO_SYMBOL_SCOPE_DEFAULT => 6144, + LTO_SYMBOL_SCOPE_PROTECTED => 8192, + LTO_SYMBOL_SCOPE_MASK => 14336); + + pragma Convention (C, lto_symbol_attributes); + + type lto_symbol_attributes_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.lto_symbol_attributes; + + type lto_symbol_attributes_view is access all + LLVM_link_time_Optimizer.lto_symbol_attributes; + + -- lto_debug_model + -- + type lto_debug_model is (LTO_DEBUG_MODEL_NONE, LTO_DEBUG_MODEL_DWARF); + + for lto_debug_model use + (LTO_DEBUG_MODEL_NONE => 0, + LTO_DEBUG_MODEL_DWARF => 1); + + pragma Convention (C, lto_debug_model); + + type lto_debug_model_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.lto_debug_model; + + type lto_debug_model_view is access all + LLVM_link_time_Optimizer.lto_debug_model; + + -- lto_codegen_model + -- + type lto_codegen_model is ( + LTO_CODEGEN_PIC_MODEL_STATIC, + LTO_CODEGEN_PIC_MODEL_DYNAMIC, + LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC); + + for lto_codegen_model use + (LTO_CODEGEN_PIC_MODEL_STATIC => 0, + LTO_CODEGEN_PIC_MODEL_DYNAMIC => 1, + LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC => 2); + + pragma Convention (C, lto_codegen_model); + + type lto_codegen_model_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.lto_codegen_model; + + type lto_codegen_model_view is access all + LLVM_link_time_Optimizer.lto_codegen_model; + + -- LTOModule + -- + type LTOModule is new Interfaces.C.Extensions.opaque_structure_def; + + type LTOModule_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.LTOModule; + + type LTOModule_view is access all LLVM_link_time_Optimizer.LTOModule; + + -- lto_module_t + -- + type lto_module_t is access all LLVM_link_time_Optimizer.LTOModule; + + type lto_module_t_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.lto_module_t; + + type lto_module_t_view is access all LLVM_link_time_Optimizer.lto_module_t; + + -- LTOCodeGenerator + -- + type LTOCodeGenerator is new Interfaces.C.Extensions.opaque_structure_def; + + type LTOCodeGenerator_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.LTOCodeGenerator; + + type LTOCodeGenerator_view is access all + LLVM_link_time_Optimizer.LTOCodeGenerator; + + -- lto_code_gen_t + -- + type lto_code_gen_t is access all LLVM_link_time_Optimizer.LTOCodeGenerator; + + type lto_code_gen_t_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.lto_code_gen_t; + + type lto_code_gen_t_view is access all + LLVM_link_time_Optimizer.lto_code_gen_t; + + -- llvm_lto_status_t + -- + type llvm_lto_status_t is ( + LLVM_LTO_UNKNOWN, + LLVM_LTO_OPT_SUCCESS, + LLVM_LTO_READ_SUCCESS, + LLVM_LTO_READ_FAILURE, + LLVM_LTO_WRITE_FAILURE, + LLVM_LTO_NO_TARGET, + LLVM_LTO_NO_WORK, + LLVM_LTO_MODULE_MERGE_FAILURE, + LLVM_LTO_ASM_FAILURE, + LLVM_LTO_NULL_OBJECT); + + for llvm_lto_status_t use + (LLVM_LTO_UNKNOWN => 0, + LLVM_LTO_OPT_SUCCESS => 1, + LLVM_LTO_READ_SUCCESS => 2, + LLVM_LTO_READ_FAILURE => 3, + LLVM_LTO_WRITE_FAILURE => 4, + LLVM_LTO_NO_TARGET => 5, + LLVM_LTO_NO_WORK => 6, + LLVM_LTO_MODULE_MERGE_FAILURE => 7, + LLVM_LTO_ASM_FAILURE => 8, + LLVM_LTO_NULL_OBJECT => 9); + + pragma Convention (C, llvm_lto_status_t); + + type llvm_lto_status_t_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.llvm_lto_status_t; + + type llvm_lto_status_t_view is access all + LLVM_link_time_Optimizer.llvm_lto_status_t; + + + -- llvm_lto_t + -- + type llvm_lto_t is access all Interfaces.C.Extensions.void; + + type llvm_lto_t_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_link_time_Optimizer.llvm_lto_t; + + type llvm_lto_t_view is access all + LLVM_link_time_Optimizer.llvm_lto_t; + + +end LLVM_link_time_Optimizer; diff --git a/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx b/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx new file mode 100644 index 0000000000000..eb2e7ab15633e --- /dev/null +++ b/bindings/ada/llvm/llvm_linktimeoptimizer_wrap.cxx @@ -0,0 +1,923 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.36 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + + +#ifdef __cplusplus +template class SwigValueWrapper { + T *tt; +public: + SwigValueWrapper() : tt(0) { } + SwigValueWrapper(const SwigValueWrapper& rhs) : tt(new T(*rhs.tt)) { } + SwigValueWrapper(const T& t) : tt(new T(t)) { } + ~SwigValueWrapper() { delete tt; } + SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; } + operator T&() const { return *tt; } + T *operator&() { return tt; } +private: + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); +}; + +template T SwigValueInit() { + return T(); +} +#endif + +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + + + +#include +#include +#include +#if defined(_WIN32) || defined(__CYGWIN32__) +# define DllExport __declspec( dllexport ) +# define SWIGSTDCALL __stdcall +#else +# define DllExport +# define SWIGSTDCALL +#endif + + +#ifdef __cplusplus +# include +#endif + + + + +/* Support for throwing Ada exceptions from C/C++ */ + +typedef enum +{ + SWIG_AdaException, + SWIG_AdaOutOfMemoryException, + SWIG_AdaIndexOutOfRangeException, + SWIG_AdaDivideByZeroException, + SWIG_AdaArgumentOutOfRangeException, + SWIG_AdaNullReferenceException +} SWIG_AdaExceptionCodes; + + +typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *); + + +typedef struct +{ + SWIG_AdaExceptionCodes code; + SWIG_AdaExceptionCallback_t callback; +} + SWIG_AdaExceptions_t; + + +static +SWIG_AdaExceptions_t +SWIG_ada_exceptions[] = +{ + { SWIG_AdaException, NULL }, + { SWIG_AdaOutOfMemoryException, NULL }, + { SWIG_AdaIndexOutOfRangeException, NULL }, + { SWIG_AdaDivideByZeroException, NULL }, + { SWIG_AdaArgumentOutOfRangeException, NULL }, + { SWIG_AdaNullReferenceException, NULL } +}; + + +static +void +SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) +{ + SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback; + if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) { + callback = SWIG_ada_exceptions[code].callback; + } + callback(msg); +} + + + +#ifdef __cplusplus +extern "C" +#endif + +DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_link_time_Optimizer (SWIG_AdaExceptionCallback_t systemException, + SWIG_AdaExceptionCallback_t outOfMemory, + SWIG_AdaExceptionCallback_t indexOutOfRange, + SWIG_AdaExceptionCallback_t divideByZero, + SWIG_AdaExceptionCallback_t argumentOutOfRange, + SWIG_AdaExceptionCallback_t nullReference) +{ + SWIG_ada_exceptions [SWIG_AdaException].callback = systemException; + SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback = outOfMemory; + SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback = indexOutOfRange; + SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback = divideByZero; + SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange; + SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback = nullReference; +} + + +/* Callback for returning strings to Ada without leaking memory */ + +typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *); +static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL; + + + +/* probably obsolete ... +#ifdef __cplusplus +extern "C" +#endif +DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_link_time_Optimizer(SWIG_AdaStringHelperCallback callback) { + SWIG_ada_string_callback = callback; +} +*/ + + + +/* Contract support */ + +#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else + + +#define protected public +#define private public + +#include "llvm-c/lto.h" +#include "llvm-c/LinkTimeOptimizer.h" + + + +// struct LLVMCtxt; + + +#undef protected +#undef private +#ifdef __cplusplus +extern "C" { +#endif +DllExport char * SWIGSTDCALL Ada_lto_get_version ( + ) +{ + char * jresult ; + char *result = 0 ; + + result = (char *)lto_get_version(); + jresult = result; + + + + return jresult; + +} + + + +DllExport char * SWIGSTDCALL Ada_lto_get_error_message ( + ) +{ + char * jresult ; + char *result = 0 ; + + result = (char *)lto_get_error_message(); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file ( + char * jarg1 + ) +{ + unsigned int jresult ; + char *arg1 = (char *) 0 ; + bool result; + + arg1 = jarg1; + + result = (bool)lto_module_is_object_file((char const *)arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_for_target ( + char * jarg1 + , + + char * jarg2 + ) +{ + unsigned int jresult ; + char *arg1 = (char *) 0 ; + char *arg2 = (char *) 0 ; + bool result; + + arg1 = jarg1; + + arg2 = jarg2; + + result = (bool)lto_module_is_object_file_for_target((char const *)arg1,(char const *)arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_in_memory ( + void* jarg1 + , + + size_t jarg2 + ) +{ + unsigned int jresult ; + void *arg1 = (void *) 0 ; + size_t arg2 ; + bool result; + + arg1 = (void *)jarg1; + + + arg2 = (size_t) jarg2; + + + result = (bool)lto_module_is_object_file_in_memory((void const *)arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_module_is_object_file_in_memory_for_target ( + void* jarg1 + , + + size_t jarg2 + , + + char * jarg3 + ) +{ + unsigned int jresult ; + void *arg1 = (void *) 0 ; + size_t arg2 ; + char *arg3 = (char *) 0 ; + bool result; + + arg1 = (void *)jarg1; + + + arg2 = (size_t) jarg2; + + + arg3 = jarg3; + + result = (bool)lto_module_is_object_file_in_memory_for_target((void const *)arg1,arg2,(char const *)arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_lto_module_create ( + char * jarg1 + ) +{ + void * jresult ; + char *arg1 = (char *) 0 ; + lto_module_t result; + + arg1 = jarg1; + + result = (lto_module_t)lto_module_create((char const *)arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_lto_module_create_from_memory ( + void* jarg1 + , + + size_t jarg2 + ) +{ + void * jresult ; + void *arg1 = (void *) 0 ; + size_t arg2 ; + lto_module_t result; + + arg1 = (void *)jarg1; + + + arg2 = (size_t) jarg2; + + + result = (lto_module_t)lto_module_create_from_memory((void const *)arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_lto_module_dispose ( + void * jarg1 + ) +{ + lto_module_t arg1 = (lto_module_t) 0 ; + + arg1 = (lto_module_t)jarg1; + + lto_module_dispose(arg1); + + +} + + + +DllExport char * SWIGSTDCALL Ada_lto_module_get_target_triple ( + void * jarg1 + ) +{ + char * jresult ; + lto_module_t arg1 = (lto_module_t) 0 ; + char *result = 0 ; + + arg1 = (lto_module_t)jarg1; + + result = (char *)lto_module_get_target_triple(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_module_get_num_symbols ( + void * jarg1 + ) +{ + unsigned int jresult ; + lto_module_t arg1 = (lto_module_t) 0 ; + unsigned int result; + + arg1 = (lto_module_t)jarg1; + + result = (unsigned int)lto_module_get_num_symbols(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport char * SWIGSTDCALL Ada_lto_module_get_symbol_name ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + char * jresult ; + lto_module_t arg1 = (lto_module_t) 0 ; + unsigned int arg2 ; + char *result = 0 ; + + arg1 = (lto_module_t)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (char *)lto_module_get_symbol_name(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_lto_module_get_symbol_attribute ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + int jresult ; + lto_module_t arg1 = (lto_module_t) 0 ; + unsigned int arg2 ; + lto_symbol_attributes result; + + arg1 = (lto_module_t)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (lto_symbol_attributes)lto_module_get_symbol_attribute(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_lto_codegen_create ( + ) +{ + void * jresult ; + lto_code_gen_t result; + + result = (lto_code_gen_t)lto_codegen_create(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_lto_codegen_dispose ( + void * jarg1 + ) +{ + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + + arg1 = (lto_code_gen_t)jarg1; + + lto_codegen_dispose(arg1); + + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_add_module ( + void * jarg1 + , + + void * jarg2 + ) +{ + unsigned int jresult ; + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + lto_module_t arg2 = (lto_module_t) 0 ; + bool result; + + arg1 = (lto_code_gen_t)jarg1; + + arg2 = (lto_module_t)jarg2; + + result = (bool)lto_codegen_add_module(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_set_debug_model ( + void * jarg1 + , + + int jarg2 + ) +{ + unsigned int jresult ; + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + lto_debug_model arg2 ; + bool result; + + arg1 = (lto_code_gen_t)jarg1; + + arg2 = (lto_debug_model) jarg2; + + result = (bool)lto_codegen_set_debug_model(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_set_pic_model ( + void * jarg1 + , + + int jarg2 + ) +{ + unsigned int jresult ; + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + lto_codegen_model arg2 ; + bool result; + + arg1 = (lto_code_gen_t)jarg1; + + arg2 = (lto_codegen_model) jarg2; + + result = (bool)lto_codegen_set_pic_model(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_lto_codegen_set_gcc_path ( + void * jarg1 + , + + char * jarg2 + ) +{ + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (lto_code_gen_t)jarg1; + + arg2 = jarg2; + + lto_codegen_set_gcc_path(arg1,(char const *)arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_lto_codegen_set_assembler_path ( + void * jarg1 + , + + char * jarg2 + ) +{ + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (lto_code_gen_t)jarg1; + + arg2 = jarg2; + + lto_codegen_set_assembler_path(arg1,(char const *)arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_lto_codegen_add_must_preserve_symbol ( + void * jarg1 + , + + char * jarg2 + ) +{ + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (lto_code_gen_t)jarg1; + + arg2 = jarg2; + + lto_codegen_add_must_preserve_symbol(arg1,(char const *)arg2); + + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_lto_codegen_write_merged_modules ( + void * jarg1 + , + + char * jarg2 + ) +{ + unsigned int jresult ; + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + char *arg2 = (char *) 0 ; + bool result; + + arg1 = (lto_code_gen_t)jarg1; + + arg2 = jarg2; + + result = (bool)lto_codegen_write_merged_modules(arg1,(char const *)arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport void* SWIGSTDCALL Ada_lto_codegen_compile ( + void * jarg1 + , + + size_t* jarg2 + ) +{ + void* jresult ; + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + size_t *arg2 = (size_t *) 0 ; + void *result = 0 ; + + arg1 = (lto_code_gen_t)jarg1; + + + arg2 = (size_t *) jarg2; + + + result = (void *)lto_codegen_compile(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_lto_codegen_debug_options ( + void * jarg1 + , + + char * jarg2 + ) +{ + lto_code_gen_t arg1 = (lto_code_gen_t) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (lto_code_gen_t)jarg1; + + arg2 = jarg2; + + lto_codegen_debug_options(arg1,(char const *)arg2); + + +} + + + +DllExport void* SWIGSTDCALL Ada_llvm_create_optimizer ( + ) +{ + void* jresult ; + llvm_lto_t result; + + result = (llvm_lto_t)llvm_create_optimizer(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_llvm_destroy_optimizer ( + void* jarg1 + ) +{ + llvm_lto_t arg1 = (llvm_lto_t) 0 ; + + arg1 = (llvm_lto_t)jarg1; + + llvm_destroy_optimizer(arg1); + + +} + + + +DllExport int SWIGSTDCALL Ada_llvm_read_object_file ( + void* jarg1 + , + + char * jarg2 + ) +{ + int jresult ; + llvm_lto_t arg1 = (llvm_lto_t) 0 ; + char *arg2 = (char *) 0 ; + llvm_lto_status_t result; + + arg1 = (llvm_lto_t)jarg1; + + arg2 = jarg2; + + result = (llvm_lto_status_t)llvm_read_object_file(arg1,(char const *)arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_llvm_optimize_modules ( + void* jarg1 + , + + char * jarg2 + ) +{ + int jresult ; + llvm_lto_t arg1 = (llvm_lto_t) 0 ; + char *arg2 = (char *) 0 ; + llvm_lto_status_t result; + + arg1 = (llvm_lto_t)jarg1; + + arg2 = jarg2; + + result = (llvm_lto_status_t)llvm_optimize_modules(arg1,(char const *)arg2); + jresult = result; + + + + return jresult; + +} + + + +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + diff --git a/bindings/ada/llvm/llvm_wrap.cxx b/bindings/ada/llvm/llvm_wrap.cxx new file mode 100644 index 0000000000000..79b19ff4c0bbb --- /dev/null +++ b/bindings/ada/llvm/llvm_wrap.cxx @@ -0,0 +1,8817 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.36 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + + +#ifdef __cplusplus +template class SwigValueWrapper { + T *tt; +public: + SwigValueWrapper() : tt(0) { } + SwigValueWrapper(const SwigValueWrapper& rhs) : tt(new T(*rhs.tt)) { } + SwigValueWrapper(const T& t) : tt(new T(t)) { } + ~SwigValueWrapper() { delete tt; } + SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; } + operator T&() const { return *tt; } + T *operator&() { return tt; } +private: + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); +}; + +template T SwigValueInit() { + return T(); +} +#endif + +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + + + +#include +#include +#include +#if defined(_WIN32) || defined(__CYGWIN32__) +# define DllExport __declspec( dllexport ) +# define SWIGSTDCALL __stdcall +#else +# define DllExport +# define SWIGSTDCALL +#endif + + +#ifdef __cplusplus +# include +#endif + + + + +/* Support for throwing Ada exceptions from C/C++ */ + +typedef enum +{ + SWIG_AdaException, + SWIG_AdaOutOfMemoryException, + SWIG_AdaIndexOutOfRangeException, + SWIG_AdaDivideByZeroException, + SWIG_AdaArgumentOutOfRangeException, + SWIG_AdaNullReferenceException +} SWIG_AdaExceptionCodes; + + +typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *); + + +typedef struct +{ + SWIG_AdaExceptionCodes code; + SWIG_AdaExceptionCallback_t callback; +} + SWIG_AdaExceptions_t; + + +static +SWIG_AdaExceptions_t +SWIG_ada_exceptions[] = +{ + { SWIG_AdaException, NULL }, + { SWIG_AdaOutOfMemoryException, NULL }, + { SWIG_AdaIndexOutOfRangeException, NULL }, + { SWIG_AdaDivideByZeroException, NULL }, + { SWIG_AdaArgumentOutOfRangeException, NULL }, + { SWIG_AdaNullReferenceException, NULL } +}; + + +static +void +SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) +{ + SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback; + if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) { + callback = SWIG_ada_exceptions[code].callback; + } + callback(msg); +} + + + +#ifdef __cplusplus +extern "C" +#endif + +DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_llvm (SWIG_AdaExceptionCallback_t systemException, + SWIG_AdaExceptionCallback_t outOfMemory, + SWIG_AdaExceptionCallback_t indexOutOfRange, + SWIG_AdaExceptionCallback_t divideByZero, + SWIG_AdaExceptionCallback_t argumentOutOfRange, + SWIG_AdaExceptionCallback_t nullReference) +{ + SWIG_ada_exceptions [SWIG_AdaException].callback = systemException; + SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback = outOfMemory; + SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback = indexOutOfRange; + SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback = divideByZero; + SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange; + SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback = nullReference; +} + + +/* Callback for returning strings to Ada without leaking memory */ + +typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *); +static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL; + + + +/* probably obsolete ... +#ifdef __cplusplus +extern "C" +#endif +DllExport void SWIGSTDCALL SWIGRegisterStringCallback_llvm(SWIG_AdaStringHelperCallback callback) { + SWIG_ada_string_callback = callback; +} +*/ + + + +/* Contract support */ + +#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else + + +#define protected public +#define private public + +//#include "llvm-c/Analysis.h" +//#include "llvm-c/BitReader.h" +//#include "llvm-c/BitWriter.h" +#include "llvm-c/Core.h" +//#include "llvm-c/ExecutionEngine.h" +//#include "llvm-c/LinkTimeOptimizer.h" +//#include "llvm-c/lto.h" +//#include "llvm-c/Target.h" + + + + struct LLVMCtxt; +// struct LLVMOpaqueType; +// struct LLVMOpaqueValue; + +#undef protected +#undef private +#ifdef __cplusplus +extern "C" { +#endif +DllExport void SWIGSTDCALL Ada_LLVMDisposeMessage ( + char * jarg1 + ) +{ + char *arg1 = (char *) 0 ; + + arg1 = jarg1; + + LLVMDisposeMessage(arg1); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMContextCreate ( + ) +{ + void * jresult ; + LLVMContextRef result; + + result = (LLVMContextRef)LLVMContextCreate(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetGlobalContext ( + ) +{ + void * jresult ; + LLVMContextRef result; + + result = (LLVMContextRef)LLVMGetGlobalContext(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMContextDispose ( + void * jarg1 + ) +{ + LLVMContextRef arg1 = (LLVMContextRef) 0 ; + + arg1 = (LLVMContextRef)jarg1; + + LLVMContextDispose(arg1); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMModuleCreateWithName ( + char * jarg1 + ) +{ + void * jresult ; + char *arg1 = (char *) 0 ; + LLVMModuleRef result; + + arg1 = jarg1; + + result = (LLVMModuleRef)LLVMModuleCreateWithName((char const *)arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMModuleCreateWithNameInContext ( + char * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + char *arg1 = (char *) 0 ; + LLVMContextRef arg2 = (LLVMContextRef) 0 ; + LLVMModuleRef result; + + arg1 = jarg1; + + arg2 = (LLVMContextRef)jarg2; + + result = (LLVMModuleRef)LLVMModuleCreateWithNameInContext((char const *)arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposeModule ( + void * jarg1 + ) +{ + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + + arg1 = (LLVMModuleRef)jarg1; + + LLVMDisposeModule(arg1); + + +} + + + +DllExport char * SWIGSTDCALL Ada_LLVMGetDataLayout ( + void * jarg1 + ) +{ + char * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *result = 0 ; + + arg1 = (LLVMModuleRef)jarg1; + + result = (char *)LLVMGetDataLayout(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetDataLayout ( + void * jarg1 + , + + char * jarg2 + ) +{ + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + LLVMSetDataLayout(arg1,(char const *)arg2); + + +} + + + +DllExport char * SWIGSTDCALL Ada_LLVMGetTarget ( + void * jarg1 + ) +{ + char * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *result = 0 ; + + arg1 = (LLVMModuleRef)jarg1; + + result = (char *)LLVMGetTarget(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetTarget ( + void * jarg1 + , + + char * jarg2 + ) +{ + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + LLVMSetTarget(arg1,(char const *)arg2); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMAddTypeName ( + void * jarg1 + , + + char * jarg2 + , + + void * jarg3 + ) +{ + int jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + int result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + result = (int)LLVMAddTypeName(arg1,(char const *)arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDeleteTypeName ( + void * jarg1 + , + + char * jarg2 + ) +{ + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + LLVMDeleteTypeName(arg1,(char const *)arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetTypeByName ( + void * jarg1 + , + + char * jarg2 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMTypeRef result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + result = (LLVMTypeRef)LLVMGetTypeByName(arg1,(char const *)arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDumpModule ( + void * jarg1 + ) +{ + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + + arg1 = (LLVMModuleRef)jarg1; + + LLVMDumpModule(arg1); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMGetTypeKind ( + void * jarg1 + ) +{ + int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMTypeKind result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMTypeKind)LLVMGetTypeKind(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMInt1Type ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMInt1Type(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMInt8Type ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMInt8Type(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMInt16Type ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMInt16Type(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMInt32Type ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMInt32Type(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMInt64Type ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMInt64Type(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIntType ( + unsigned int jarg1 + ) +{ + void * jresult ; + unsigned int arg1 ; + LLVMTypeRef result; + + + arg1 = (unsigned int) jarg1; + + + result = (LLVMTypeRef)LLVMIntType(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGetIntTypeWidth ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (unsigned int)LLVMGetIntTypeWidth(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMFloatType ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMFloatType(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMDoubleType ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMDoubleType(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMX86FP80Type ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMX86FP80Type(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMFP128Type ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMFP128Type(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMPPCFP128Type ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMPPCFP128Type(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMFunctionType ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int jarg3 + , + + int jarg4 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ; + unsigned int arg3 ; + int arg4 ; + LLVMTypeRef result; + + arg1 = (LLVMTypeRef)jarg1; + + arg2 = (LLVMTypeRef *)jarg2; + + + arg3 = (unsigned int) jarg3; + + + + arg4 = (int) jarg4; + + + result = (LLVMTypeRef)LLVMFunctionType(arg1,arg2,arg3,arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsFunctionVarArg ( + void * jarg1 + ) +{ + int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + int result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (int)LLVMIsFunctionVarArg(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetReturnType ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMTypeRef result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMTypeRef)LLVMGetReturnType(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMCountParamTypes ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (unsigned int)LLVMCountParamTypes(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMGetParamTypes ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ; + + arg1 = (LLVMTypeRef)jarg1; + + arg2 = (LLVMTypeRef *)jarg2; + + LLVMGetParamTypes(arg1,arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMStructType ( + void * jarg1 + , + + unsigned int jarg2 + , + + int jarg3 + ) +{ + void * jresult ; + LLVMTypeRef *arg1 = (LLVMTypeRef *) 0 ; + unsigned int arg2 ; + int arg3 ; + LLVMTypeRef result; + + arg1 = (LLVMTypeRef *)jarg1; + + + arg2 = (unsigned int) jarg2; + + + + arg3 = (int) jarg3; + + + result = (LLVMTypeRef)LLVMStructType(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMCountStructElementTypes ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (unsigned int)LLVMCountStructElementTypes(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMGetStructElementTypes ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMTypeRef *arg2 = (LLVMTypeRef *) 0 ; + + arg1 = (LLVMTypeRef)jarg1; + + arg2 = (LLVMTypeRef *)jarg2; + + LLVMGetStructElementTypes(arg1,arg2); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsPackedStruct ( + void * jarg1 + ) +{ + int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + int result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (int)LLVMIsPackedStruct(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMArrayType ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int arg2 ; + LLVMTypeRef result; + + arg1 = (LLVMTypeRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (LLVMTypeRef)LLVMArrayType(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMPointerType ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int arg2 ; + LLVMTypeRef result; + + arg1 = (LLVMTypeRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (LLVMTypeRef)LLVMPointerType(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMVectorType ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int arg2 ; + LLVMTypeRef result; + + arg1 = (LLVMTypeRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (LLVMTypeRef)LLVMVectorType(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetElementType ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMTypeRef result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMTypeRef)LLVMGetElementType(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGetArrayLength ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (unsigned int)LLVMGetArrayLength(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGetPointerAddressSpace ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (unsigned int)LLVMGetPointerAddressSpace(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGetVectorSize ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (unsigned int)LLVMGetVectorSize(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMVoidType ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMVoidType(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMLabelType ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMLabelType(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMOpaqueType ( + ) +{ + void * jresult ; + LLVMTypeRef result; + + result = (LLVMTypeRef)LLVMOpaqueType(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreateTypeHandle ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMTypeHandleRef result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMTypeHandleRef)LLVMCreateTypeHandle(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMRefineType ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + + arg1 = (LLVMTypeRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + LLVMRefineType(arg1,arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMResolveTypeHandle ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeHandleRef arg1 = (LLVMTypeHandleRef) 0 ; + LLVMTypeRef result; + + arg1 = (LLVMTypeHandleRef)jarg1; + + result = (LLVMTypeRef)LLVMResolveTypeHandle(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposeTypeHandle ( + void * jarg1 + ) +{ + LLVMTypeHandleRef arg1 = (LLVMTypeHandleRef) 0 ; + + arg1 = (LLVMTypeHandleRef)jarg1; + + LLVMDisposeTypeHandle(arg1); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMTypeOf ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMTypeRef)LLVMTypeOf(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport char * SWIGSTDCALL Ada_LLVMGetValueName ( + void * jarg1 + ) +{ + char * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + char *result = 0 ; + + arg1 = (LLVMValueRef)jarg1; + + result = (char *)LLVMGetValueName(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetValueName ( + void * jarg1 + , + + char * jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = jarg2; + + LLVMSetValueName(arg1,(char const *)arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDumpValue ( + void * jarg1 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + LLVMDumpValue(arg1); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAArgument ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAArgument(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsABasicBlock ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsABasicBlock(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAInlineAsm ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAInlineAsm(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAUser ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAUser(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstant ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstant(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantAggregateZero ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstantAggregateZero(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantArray ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstantArray(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantExpr ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstantExpr(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantFP ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstantFP(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantInt ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstantInt(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantPointerNull ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstantPointerNull(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantStruct ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstantStruct(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAConstantVector ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAConstantVector(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalValue ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAGlobalValue(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAFunction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAFunction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalAlias ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAGlobalAlias(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAGlobalVariable ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAGlobalVariable(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAUndefValue ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAUndefValue(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAInstruction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAInstruction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsABinaryOperator ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsABinaryOperator(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsACallInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsACallInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAIntrinsicInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAIntrinsicInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsADbgInfoIntrinsic ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsADbgInfoIntrinsic(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsADbgDeclareInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsADbgDeclareInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsADbgFuncStartInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsADbgFuncStartInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsADbgRegionEndInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsADbgRegionEndInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsADbgRegionStartInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsADbgRegionStartInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsADbgStopPointInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsADbgStopPointInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAEHSelectorInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAEHSelectorInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAMemIntrinsic ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAMemIntrinsic(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAMemCpyInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAMemCpyInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAMemMoveInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAMemMoveInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAMemSetInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAMemSetInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsACmpInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsACmpInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAFCmpInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAFCmpInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAICmpInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAICmpInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAExtractElementInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAExtractElementInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAGetElementPtrInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAGetElementPtrInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAInsertElementInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAInsertElementInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAInsertValueInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAInsertValueInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAPHINode ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAPHINode(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsASelectInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsASelectInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAShuffleVectorInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAShuffleVectorInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAStoreInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAStoreInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsATerminatorInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsATerminatorInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsABranchInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsABranchInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAInvokeInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAInvokeInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAReturnInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAReturnInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsASwitchInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsASwitchInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAUnreachableInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAUnreachableInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAUnwindInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAUnwindInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAUnaryInstruction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAUnaryInstruction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAAllocationInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAAllocationInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAAllocaInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAAllocaInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAMallocInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAMallocInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsACastInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsACastInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsABitCastInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsABitCastInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAFPExtInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAFPExtInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAFPToSIInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAFPToSIInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAFPToUIInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAFPToUIInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAFPTruncInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAFPTruncInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAIntToPtrInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAIntToPtrInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAPtrToIntInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAPtrToIntInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsASExtInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsASExtInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsASIToFPInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsASIToFPInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsATruncInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsATruncInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAUIToFPInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAUIToFPInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAZExtInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAZExtInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAExtractValueInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAExtractValueInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAFreeInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAFreeInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsALoadInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsALoadInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIsAVAArgInst ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMIsAVAArgInst(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstNull ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMValueRef)LLVMConstNull(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstAllOnes ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMValueRef)LLVMConstAllOnes(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetUndef ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMValueRef)LLVMGetUndef(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsConstant ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (int)LLVMIsConstant(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsNull ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (int)LLVMIsNull(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsUndef ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (int)LLVMIsUndef(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstPointerNull ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMValueRef)LLVMConstPointerNull(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstInt ( + void * jarg1 + , + + unsigned long long jarg2 + , + + int jarg3 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + unsigned long long arg2 ; + int arg3 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + + arg2 = (unsigned long long) jarg2; + + + + arg3 = (int) jarg3; + + + result = (LLVMValueRef)LLVMConstInt(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstReal ( + void * jarg1 + , + + double jarg2 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + double arg2 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + + arg2 = (double) jarg2; + + + result = (LLVMValueRef)LLVMConstReal(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstRealOfString ( + void * jarg1 + , + + char * jarg2 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + arg2 = jarg2; + + result = (LLVMValueRef)LLVMConstRealOfString(arg1,(char const *)arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstString ( + char * jarg1 + , + + unsigned int jarg2 + , + + int jarg3 + ) +{ + void * jresult ; + char *arg1 = (char *) 0 ; + unsigned int arg2 ; + int arg3 ; + LLVMValueRef result; + + arg1 = jarg1; + + + arg2 = (unsigned int) jarg2; + + + + arg3 = (int) jarg3; + + + result = (LLVMValueRef)LLVMConstString((char const *)arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstArray ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int jarg3 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMValueRef *arg2 = (LLVMValueRef *) 0 ; + unsigned int arg3 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + arg2 = (LLVMValueRef *)jarg2; + + + arg3 = (unsigned int) jarg3; + + + result = (LLVMValueRef)LLVMConstArray(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstStruct ( + void * jarg1 + , + + unsigned int jarg2 + , + + int jarg3 + ) +{ + void * jresult ; + LLVMValueRef *arg1 = (LLVMValueRef *) 0 ; + unsigned int arg2 ; + int arg3 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef *)jarg1; + + + arg2 = (unsigned int) jarg2; + + + + arg3 = (int) jarg3; + + + result = (LLVMValueRef)LLVMConstStruct(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstVector ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + void * jresult ; + LLVMValueRef *arg1 = (LLVMValueRef *) 0 ; + unsigned int arg2 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef *)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (LLVMValueRef)LLVMConstVector(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMSizeOf ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + result = (LLVMValueRef)LLVMSizeOf(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstNeg ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMConstNeg(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstNot ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMConstNot(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstAdd ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstAdd(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstSub ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstSub(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstMul ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstMul(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstUDiv ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstUDiv(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstSDiv ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstSDiv(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstFDiv ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstFDiv(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstURem ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstURem(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstSRem ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstSRem(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstFRem ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstFRem(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstAnd ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstAnd(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstOr ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstOr(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstXor ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstXor(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstICmp ( + int jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + void * jresult ; + LLVMIntPredicate arg1 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMIntPredicate) jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + result = (LLVMValueRef)LLVMConstICmp(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstFCmp ( + int jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + void * jresult ; + LLVMRealPredicate arg1 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMRealPredicate) jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + result = (LLVMValueRef)LLVMConstFCmp(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstShl ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstShl(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstLShr ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstLShr(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstAShr ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstAShr(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstGEP ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int jarg3 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef *arg2 = (LLVMValueRef *) 0 ; + unsigned int arg3 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef *)jarg2; + + + arg3 = (unsigned int) jarg3; + + + result = (LLVMValueRef)LLVMConstGEP(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstTrunc ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstTrunc(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstSExt ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstSExt(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstZExt ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstZExt(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstFPTrunc ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstFPTrunc(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstFPExt ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstFPExt(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstUIToFP ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstUIToFP(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstSIToFP ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstSIToFP(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstFPToUI ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstFPToUI(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstFPToSI ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstFPToSI(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstPtrToInt ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstPtrToInt(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstIntToPtr ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstIntToPtr(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstBitCast ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (LLVMValueRef)LLVMConstBitCast(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstSelect ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + result = (LLVMValueRef)LLVMConstSelect(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstExtractElement ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMConstExtractElement(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstInsertElement ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + result = (LLVMValueRef)LLVMConstInsertElement(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstShuffleVector ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + result = (LLVMValueRef)LLVMConstShuffleVector(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstExtractValue ( + void * jarg1 + , + + unsigned int* jarg2 + , + + unsigned int jarg3 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int *arg2 = (unsigned int *) 0 ; + unsigned int arg3 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int *) jarg2; + + + + arg3 = (unsigned int) jarg3; + + + result = (LLVMValueRef)LLVMConstExtractValue(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstInsertValue ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int* jarg3 + , + + unsigned int jarg4 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + unsigned int *arg3 = (unsigned int *) 0 ; + unsigned int arg4 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + + arg3 = (unsigned int *) jarg3; + + + + arg4 = (unsigned int) jarg4; + + + result = (LLVMValueRef)LLVMConstInsertValue(arg1,arg2,arg3,arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMConstInlineAsm ( + void * jarg1 + , + + char * jarg2 + , + + char * jarg3 + , + + int jarg4 + ) +{ + void * jresult ; + LLVMTypeRef arg1 = (LLVMTypeRef) 0 ; + char *arg2 = (char *) 0 ; + char *arg3 = (char *) 0 ; + int arg4 ; + LLVMValueRef result; + + arg1 = (LLVMTypeRef)jarg1; + + arg2 = jarg2; + + arg3 = jarg3; + + + arg4 = (int) jarg4; + + + result = (LLVMValueRef)LLVMConstInlineAsm(arg1,(char const *)arg2,(char const *)arg3,arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetGlobalParent ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMModuleRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMModuleRef)LLVMGetGlobalParent(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsDeclaration ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (int)LLVMIsDeclaration(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMGetLinkage ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMLinkage result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMLinkage)LLVMGetLinkage(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetLinkage ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMLinkage arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMLinkage) jarg2; + + LLVMSetLinkage(arg1,arg2); + + +} + + + +DllExport char * SWIGSTDCALL Ada_LLVMGetSection ( + void * jarg1 + ) +{ + char * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + char *result = 0 ; + + arg1 = (LLVMValueRef)jarg1; + + result = (char *)LLVMGetSection(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetSection ( + void * jarg1 + , + + char * jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = jarg2; + + LLVMSetSection(arg1,(char const *)arg2); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMGetVisibility ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMVisibility result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMVisibility)LLVMGetVisibility(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetVisibility ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMVisibility arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMVisibility) jarg2; + + LLVMSetVisibility(arg1,arg2); + + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGetAlignment ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (unsigned int)LLVMGetAlignment(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetAlignment ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + LLVMSetAlignment(arg1,arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMAddGlobal ( + void * jarg1 + , + + void * jarg2 + , + + char * jarg3 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + char *arg3 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + arg3 = jarg3; + + result = (LLVMValueRef)LLVMAddGlobal(arg1,arg2,(char const *)arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetNamedGlobal ( + void * jarg1 + , + + char * jarg2 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + result = (LLVMValueRef)LLVMGetNamedGlobal(arg1,(char const *)arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetFirstGlobal ( + void * jarg1 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + result = (LLVMValueRef)LLVMGetFirstGlobal(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetLastGlobal ( + void * jarg1 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + result = (LLVMValueRef)LLVMGetLastGlobal(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetNextGlobal ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetNextGlobal(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousGlobal ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetPreviousGlobal(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDeleteGlobal ( + void * jarg1 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + LLVMDeleteGlobal(arg1); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetInitializer ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetInitializer(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetInitializer ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + LLVMSetInitializer(arg1,arg2); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsThreadLocal ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (int)LLVMIsThreadLocal(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetThreadLocal ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (int) jarg2; + + + LLVMSetThreadLocal(arg1,arg2); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsGlobalConstant ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (int)LLVMIsGlobalConstant(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetGlobalConstant ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (int) jarg2; + + + LLVMSetGlobalConstant(arg1,arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMAddAlias ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMAddAlias(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMAddFunction ( + void * jarg1 + , + + char * jarg2 + , + + void * jarg3 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + result = (LLVMValueRef)LLVMAddFunction(arg1,(char const *)arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetNamedFunction ( + void * jarg1 + , + + char * jarg2 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + arg2 = jarg2; + + result = (LLVMValueRef)LLVMGetNamedFunction(arg1,(char const *)arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetFirstFunction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + result = (LLVMValueRef)LLVMGetFirstFunction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetLastFunction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMModuleRef)jarg1; + + result = (LLVMValueRef)LLVMGetLastFunction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetNextFunction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetNextFunction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousFunction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetPreviousFunction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDeleteFunction ( + void * jarg1 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + LLVMDeleteFunction(arg1); + + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGetIntrinsicID ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (unsigned int)LLVMGetIntrinsicID(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGetFunctionCallConv ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (unsigned int)LLVMGetFunctionCallConv(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetFunctionCallConv ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + LLVMSetFunctionCallConv(arg1,arg2); + + +} + + + +DllExport char * SWIGSTDCALL Ada_LLVMGetGC ( + void * jarg1 + ) +{ + char * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + char *result = 0 ; + + arg1 = (LLVMValueRef)jarg1; + + result = (char *)LLVMGetGC(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetGC ( + void * jarg1 + , + + char * jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + char *arg2 = (char *) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = jarg2; + + LLVMSetGC(arg1,(char const *)arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddFunctionAttr ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMAttribute arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMAttribute) jarg2; + + LLVMAddFunctionAttr(arg1,arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMRemoveFunctionAttr ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMAttribute arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMAttribute) jarg2; + + LLVMRemoveFunctionAttr(arg1,arg2); + + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMCountParams ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (unsigned int)LLVMCountParams(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMGetParams ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef *arg2 = (LLVMValueRef *) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef *)jarg2; + + LLVMGetParams(arg1,arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetParam ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (LLVMValueRef)LLVMGetParam(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetParamParent ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetParamParent(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetFirstParam ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetFirstParam(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetLastParam ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetLastParam(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetNextParam ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetNextParam(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousParam ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetPreviousParam(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddAttribute ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMAttribute arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMAttribute) jarg2; + + LLVMAddAttribute(arg1,arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMRemoveAttribute ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMAttribute arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMAttribute) jarg2; + + LLVMRemoveAttribute(arg1,arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetParamAlignment ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + LLVMSetParamAlignment(arg1,arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBasicBlockAsValue ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBasicBlockRef)jarg1; + + result = (LLVMValueRef)LLVMBasicBlockAsValue(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMValueIsBasicBlock ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (int)LLVMValueIsBasicBlock(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMValueAsBasicBlock ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMBasicBlockRef)LLVMValueAsBasicBlock(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetBasicBlockParent ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBasicBlockRef)jarg1; + + result = (LLVMValueRef)LLVMGetBasicBlockParent(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMCountBasicBlocks ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (unsigned int)LLVMCountBasicBlocks(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMGetBasicBlocks ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef *arg2 = (LLVMBasicBlockRef *) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMBasicBlockRef *)jarg2; + + LLVMGetBasicBlocks(arg1,arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetFirstBasicBlock ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMBasicBlockRef)LLVMGetFirstBasicBlock(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetLastBasicBlock ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMBasicBlockRef)LLVMGetLastBasicBlock(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetNextBasicBlock ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMBasicBlockRef)jarg1; + + result = (LLVMBasicBlockRef)LLVMGetNextBasicBlock(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousBasicBlock ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMBasicBlockRef)jarg1; + + result = (LLVMBasicBlockRef)LLVMGetPreviousBasicBlock(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetEntryBasicBlock ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMBasicBlockRef)LLVMGetEntryBasicBlock(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMAppendBasicBlock ( + void * jarg1 + , + + char * jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = jarg2; + + result = (LLVMBasicBlockRef)LLVMAppendBasicBlock(arg1,(char const *)arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMInsertBasicBlock ( + void * jarg1 + , + + char * jarg2 + ) +{ + void * jresult ; + LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ; + char *arg2 = (char *) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMBasicBlockRef)jarg1; + + arg2 = jarg2; + + result = (LLVMBasicBlockRef)LLVMInsertBasicBlock(arg1,(char const *)arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDeleteBasicBlock ( + void * jarg1 + ) +{ + LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ; + + arg1 = (LLVMBasicBlockRef)jarg1; + + LLVMDeleteBasicBlock(arg1); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetInstructionParent ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMBasicBlockRef)LLVMGetInstructionParent(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetFirstInstruction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBasicBlockRef)jarg1; + + result = (LLVMValueRef)LLVMGetFirstInstruction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetLastInstruction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBasicBlockRef arg1 = (LLVMBasicBlockRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBasicBlockRef)jarg1; + + result = (LLVMValueRef)LLVMGetLastInstruction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetNextInstruction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetNextInstruction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetPreviousInstruction ( + void * jarg1 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + result = (LLVMValueRef)LLVMGetPreviousInstruction(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetInstructionCallConv ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + LLVMSetInstructionCallConv(arg1,arg2); + + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMGetInstructionCallConv ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (unsigned int)LLVMGetInstructionCallConv(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddInstrAttribute ( + void * jarg1 + , + + unsigned int jarg2 + , + + int jarg3 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + LLVMAttribute arg3 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + arg3 = (LLVMAttribute) jarg3; + + LLVMAddInstrAttribute(arg1,arg2,arg3); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMRemoveInstrAttribute ( + void * jarg1 + , + + unsigned int jarg2 + , + + int jarg3 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + LLVMAttribute arg3 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + arg3 = (LLVMAttribute) jarg3; + + LLVMRemoveInstrAttribute(arg1,arg2,arg3); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetInstrParamAlignment ( + void * jarg1 + , + + unsigned int jarg2 + , + + unsigned int jarg3 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + unsigned int arg3 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + + arg3 = (unsigned int) jarg3; + + + LLVMSetInstrParamAlignment(arg1,arg2,arg3); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMIsTailCall ( + void * jarg1 + ) +{ + int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (int)LLVMIsTailCall(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMSetTailCall ( + void * jarg1 + , + + int jarg2 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + int arg2 ; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (int) jarg2; + + + LLVMSetTailCall(arg1,arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddIncoming ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + unsigned int jarg4 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef *arg2 = (LLVMValueRef *) 0 ; + LLVMBasicBlockRef *arg3 = (LLVMBasicBlockRef *) 0 ; + unsigned int arg4 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef *)jarg2; + + arg3 = (LLVMBasicBlockRef *)jarg3; + + + arg4 = (unsigned int) jarg4; + + + LLVMAddIncoming(arg1,arg2,arg3,arg4); + + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMCountIncoming ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMValueRef)jarg1; + + result = (unsigned int)LLVMCountIncoming(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetIncomingValue ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + LLVMValueRef result; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (LLVMValueRef)LLVMGetIncomingValue(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetIncomingBlock ( + void * jarg1 + , + + unsigned int jarg2 + ) +{ + void * jresult ; + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + unsigned int arg2 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMValueRef)jarg1; + + + arg2 = (unsigned int) jarg2; + + + result = (LLVMBasicBlockRef)LLVMGetIncomingBlock(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreateBuilder ( + ) +{ + void * jresult ; + LLVMBuilderRef result; + + result = (LLVMBuilderRef)LLVMCreateBuilder(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMPositionBuilder ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMBasicBlockRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + LLVMPositionBuilder(arg1,arg2,arg3); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMPositionBuilderBefore ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + LLVMPositionBuilderBefore(arg1,arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMPositionBuilderAtEnd ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMBasicBlockRef)jarg2; + + LLVMPositionBuilderAtEnd(arg1,arg2); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMGetInsertBlock ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMBasicBlockRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + result = (LLVMBasicBlockRef)LLVMGetInsertBlock(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMClearInsertionPosition ( + void * jarg1 + ) +{ + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + + arg1 = (LLVMBuilderRef)jarg1; + + LLVMClearInsertionPosition(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMInsertIntoBuilder ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + LLVMInsertIntoBuilder(arg1,arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposeBuilder ( + void * jarg1 + ) +{ + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + + arg1 = (LLVMBuilderRef)jarg1; + + LLVMDisposeBuilder(arg1); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildRetVoid ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + result = (LLVMValueRef)LLVMBuildRetVoid(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildRet ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMBuildRet(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildBr ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMBasicBlockRef arg2 = (LLVMBasicBlockRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMBasicBlockRef)jarg2; + + result = (LLVMValueRef)LLVMBuildBr(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildCondBr ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + void * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ; + LLVMBasicBlockRef arg4 = (LLVMBasicBlockRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMBasicBlockRef)jarg3; + + arg4 = (LLVMBasicBlockRef)jarg4; + + result = (LLVMValueRef)LLVMBuildCondBr(arg1,arg2,arg3,arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildSwitch ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + unsigned int jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ; + unsigned int arg4 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMBasicBlockRef)jarg3; + + + arg4 = (unsigned int) jarg4; + + + result = (LLVMValueRef)LLVMBuildSwitch(arg1,arg2,arg3,arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildInvoke ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + unsigned int jarg4 + , + + void * jarg5 + , + + void * jarg6 + , + + char * jarg7 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef *arg3 = (LLVMValueRef *) 0 ; + unsigned int arg4 ; + LLVMBasicBlockRef arg5 = (LLVMBasicBlockRef) 0 ; + LLVMBasicBlockRef arg6 = (LLVMBasicBlockRef) 0 ; + char *arg7 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef *)jarg3; + + + arg4 = (unsigned int) jarg4; + + + arg5 = (LLVMBasicBlockRef)jarg5; + + arg6 = (LLVMBasicBlockRef)jarg6; + + arg7 = jarg7; + + result = (LLVMValueRef)LLVMBuildInvoke(arg1,arg2,arg3,arg4,arg5,arg6,(char const *)arg7); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildUnwind ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + result = (LLVMValueRef)LLVMBuildUnwind(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildUnreachable ( + void * jarg1 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + result = (LLVMValueRef)LLVMBuildUnreachable(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddCase ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + LLVMValueRef arg1 = (LLVMValueRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMBasicBlockRef arg3 = (LLVMBasicBlockRef) 0 ; + + arg1 = (LLVMValueRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMBasicBlockRef)jarg3; + + LLVMAddCase(arg1,arg2,arg3); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildAdd ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildAdd(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildSub ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildSub(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildMul ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildMul(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildUDiv ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildUDiv(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildSDiv ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildSDiv(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildFDiv ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildFDiv(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildURem ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildURem(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildSRem ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildSRem(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildFRem ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildFRem(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildShl ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildShl(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildLShr ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildLShr(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildAShr ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildAShr(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildAnd ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildAnd(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildOr ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildOr(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildXor ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildXor(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildNeg ( + void * jarg1 + , + + void * jarg2 + , + + char * jarg3 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + char *arg3 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = jarg3; + + result = (LLVMValueRef)LLVMBuildNeg(arg1,arg2,(char const *)arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildNot ( + void * jarg1 + , + + void * jarg2 + , + + char * jarg3 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + char *arg3 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = jarg3; + + result = (LLVMValueRef)LLVMBuildNot(arg1,arg2,(char const *)arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildMalloc ( + void * jarg1 + , + + void * jarg2 + , + + char * jarg3 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + char *arg3 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + arg3 = jarg3; + + result = (LLVMValueRef)LLVMBuildMalloc(arg1,arg2,(char const *)arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildArrayMalloc ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildArrayMalloc(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildAlloca ( + void * jarg1 + , + + void * jarg2 + , + + char * jarg3 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + char *arg3 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + arg3 = jarg3; + + result = (LLVMValueRef)LLVMBuildAlloca(arg1,arg2,(char const *)arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildArrayAlloca ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildArrayAlloca(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildFree ( + void * jarg1 + , + + void * jarg2 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (LLVMValueRef)LLVMBuildFree(arg1,arg2); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildLoad ( + void * jarg1 + , + + void * jarg2 + , + + char * jarg3 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + char *arg3 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = jarg3; + + result = (LLVMValueRef)LLVMBuildLoad(arg1,arg2,(char const *)arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildStore ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + result = (LLVMValueRef)LLVMBuildStore(arg1,arg2,arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildGEP ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + unsigned int jarg4 + , + + char * jarg5 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef *arg3 = (LLVMValueRef *) 0 ; + unsigned int arg4 ; + char *arg5 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef *)jarg3; + + + arg4 = (unsigned int) jarg4; + + + arg5 = jarg5; + + result = (LLVMValueRef)LLVMBuildGEP(arg1,arg2,arg3,arg4,(char const *)arg5); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildTrunc ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildTrunc(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildZExt ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildZExt(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildSExt ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildSExt(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildFPToUI ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildFPToUI(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildFPToSI ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildFPToSI(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildUIToFP ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildUIToFP(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildSIToFP ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildSIToFP(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildFPTrunc ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildFPTrunc(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildFPExt ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildFPExt(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildPtrToInt ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildPtrToInt(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildIntToPtr ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildIntToPtr(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildBitCast ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildBitCast(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildICmp ( + void * jarg1 + , + + int jarg2 + , + + void * jarg3 + , + + void * jarg4 + , + + char * jarg5 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMIntPredicate arg2 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef arg4 = (LLVMValueRef) 0 ; + char *arg5 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMIntPredicate) jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = (LLVMValueRef)jarg4; + + arg5 = jarg5; + + result = (LLVMValueRef)LLVMBuildICmp(arg1,arg2,arg3,arg4,(char const *)arg5); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildFCmp ( + void * jarg1 + , + + int jarg2 + , + + void * jarg3 + , + + void * jarg4 + , + + char * jarg5 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMRealPredicate arg2 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef arg4 = (LLVMValueRef) 0 ; + char *arg5 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMRealPredicate) jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = (LLVMValueRef)jarg4; + + arg5 = jarg5; + + result = (LLVMValueRef)LLVMBuildFCmp(arg1,arg2,arg3,arg4,(char const *)arg5); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildPhi ( + void * jarg1 + , + + void * jarg2 + , + + char * jarg3 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + char *arg3 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + arg3 = jarg3; + + result = (LLVMValueRef)LLVMBuildPhi(arg1,arg2,(char const *)arg3); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildCall ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + unsigned int jarg4 + , + + char * jarg5 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef *arg3 = (LLVMValueRef *) 0 ; + unsigned int arg4 ; + char *arg5 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef *)jarg3; + + + arg4 = (unsigned int) jarg4; + + + arg5 = jarg5; + + result = (LLVMValueRef)LLVMBuildCall(arg1,arg2,arg3,arg4,(char const *)arg5); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildSelect ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + void * jarg4 + , + + char * jarg5 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef arg4 = (LLVMValueRef) 0 ; + char *arg5 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = (LLVMValueRef)jarg4; + + arg5 = jarg5; + + result = (LLVMValueRef)LLVMBuildSelect(arg1,arg2,arg3,arg4,(char const *)arg5); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildVAArg ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMTypeRef arg3 = (LLVMTypeRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMTypeRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildVAArg(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildExtractElement ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildExtractElement(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildInsertElement ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + void * jarg4 + , + + char * jarg5 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef arg4 = (LLVMValueRef) 0 ; + char *arg5 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = (LLVMValueRef)jarg4; + + arg5 = jarg5; + + result = (LLVMValueRef)LLVMBuildInsertElement(arg1,arg2,arg3,arg4,(char const *)arg5); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildShuffleVector ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + void * jarg4 + , + + char * jarg5 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + LLVMValueRef arg4 = (LLVMValueRef) 0 ; + char *arg5 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + arg4 = (LLVMValueRef)jarg4; + + arg5 = jarg5; + + result = (LLVMValueRef)LLVMBuildShuffleVector(arg1,arg2,arg3,arg4,(char const *)arg5); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildExtractValue ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int jarg3 + , + + char * jarg4 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + unsigned int arg3 ; + char *arg4 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + + arg3 = (unsigned int) jarg3; + + + arg4 = jarg4; + + result = (LLVMValueRef)LLVMBuildExtractValue(arg1,arg2,arg3,(char const *)arg4); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMBuildInsertValue ( + void * jarg1 + , + + void * jarg2 + , + + void * jarg3 + , + + unsigned int jarg4 + , + + char * jarg5 + ) +{ + void * jresult ; + LLVMBuilderRef arg1 = (LLVMBuilderRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + LLVMValueRef arg3 = (LLVMValueRef) 0 ; + unsigned int arg4 ; + char *arg5 = (char *) 0 ; + LLVMValueRef result; + + arg1 = (LLVMBuilderRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + arg3 = (LLVMValueRef)jarg3; + + + arg4 = (unsigned int) jarg4; + + + arg5 = jarg5; + + result = (LLVMValueRef)LLVMBuildInsertValue(arg1,arg2,arg3,arg4,(char const *)arg5); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreateModuleProviderForExistingModule ( + void * jarg1 + ) +{ + void * jresult ; + LLVMModuleRef arg1 = (LLVMModuleRef) 0 ; + LLVMModuleProviderRef result; + + arg1 = (LLVMModuleRef)jarg1; + + result = (LLVMModuleProviderRef)LLVMCreateModuleProviderForExistingModule(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposeModuleProvider ( + void * jarg1 + ) +{ + LLVMModuleProviderRef arg1 = (LLVMModuleProviderRef) 0 ; + + arg1 = (LLVMModuleProviderRef)jarg1; + + LLVMDisposeModuleProvider(arg1); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMCreateMemoryBufferWithContentsOfFile ( + char * jarg1 + , + + void * jarg2 + , + + void * jarg3 + ) +{ + int jresult ; + char *arg1 = (char *) 0 ; + LLVMMemoryBufferRef *arg2 = (LLVMMemoryBufferRef *) 0 ; + char **arg3 = (char **) 0 ; + int result; + + arg1 = jarg1; + + arg2 = (LLVMMemoryBufferRef *)jarg2; + + arg3 = (char **)jarg3; + + result = (int)LLVMCreateMemoryBufferWithContentsOfFile((char const *)arg1,arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMCreateMemoryBufferWithSTDIN ( + void * jarg1 + , + + void * jarg2 + ) +{ + int jresult ; + LLVMMemoryBufferRef *arg1 = (LLVMMemoryBufferRef *) 0 ; + char **arg2 = (char **) 0 ; + int result; + + arg1 = (LLVMMemoryBufferRef *)jarg1; + + arg2 = (char **)jarg2; + + result = (int)LLVMCreateMemoryBufferWithSTDIN(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposeMemoryBuffer ( + void * jarg1 + ) +{ + LLVMMemoryBufferRef arg1 = (LLVMMemoryBufferRef) 0 ; + + arg1 = (LLVMMemoryBufferRef)jarg1; + + LLVMDisposeMemoryBuffer(arg1); + + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreatePassManager ( + ) +{ + void * jresult ; + LLVMPassManagerRef result; + + result = (LLVMPassManagerRef)LLVMCreatePassManager(); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreateFunctionPassManager ( + void * jarg1 + ) +{ + void * jresult ; + LLVMModuleProviderRef arg1 = (LLVMModuleProviderRef) 0 ; + LLVMPassManagerRef result; + + arg1 = (LLVMModuleProviderRef)jarg1; + + result = (LLVMPassManagerRef)LLVMCreateFunctionPassManager(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMRunPassManager ( + void * jarg1 + , + + void * jarg2 + ) +{ + int jresult ; + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + LLVMModuleRef arg2 = (LLVMModuleRef) 0 ; + int result; + + arg1 = (LLVMPassManagerRef)jarg1; + + arg2 = (LLVMModuleRef)jarg2; + + result = (int)LLVMRunPassManager(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMInitializeFunctionPassManager ( + void * jarg1 + ) +{ + int jresult ; + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + int result; + + arg1 = (LLVMPassManagerRef)jarg1; + + result = (int)LLVMInitializeFunctionPassManager(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMRunFunctionPassManager ( + void * jarg1 + , + + void * jarg2 + ) +{ + int jresult ; + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + int result; + + arg1 = (LLVMPassManagerRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (int)LLVMRunFunctionPassManager(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMFinalizeFunctionPassManager ( + void * jarg1 + ) +{ + int jresult ; + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + int result; + + arg1 = (LLVMPassManagerRef)jarg1; + + result = (int)LLVMFinalizeFunctionPassManager(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposePassManager ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMDisposePassManager(arg1); + + +} + + + +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + diff --git a/bindings/ada/target/llvm_target-binding.ads b/bindings/ada/target/llvm_target-binding.ads new file mode 100644 index 0000000000000..61201c8d17532 --- /dev/null +++ b/bindings/ada/target/llvm_target-binding.ads @@ -0,0 +1,138 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with llvm; +with Interfaces.C.Strings; + + +package LLVM_Target.Binding is + + LLVMBigEndian : constant := 0; + LLVMLittleEndian : constant := 1; + + procedure LLVMInitializeAllTargets; + + function LLVMInitializeNativeTarget return Interfaces.C.int; + + function LLVMCreateTargetData + (StringRep : in Interfaces.C.Strings.chars_ptr) + return LLVM_Target.LLVMTargetDataRef; + + procedure LLVMAddTargetData + (arg_2_1 : in LLVM_Target.LLVMTargetDataRef; + arg_2_2 : in llvm.LLVMPassManagerRef); + + function LLVMCopyStringRepOfTargetData + (arg_1 : in LLVM_Target.LLVMTargetDataRef) + return Interfaces.C.Strings.chars_ptr; + + function LLVMByteOrder + (arg_1 : in LLVM_Target.LLVMTargetDataRef) + return LLVM_Target.LLVMByteOrdering; + + function LLVMPointerSize + (arg_1 : in LLVM_Target.LLVMTargetDataRef) + return Interfaces.C.unsigned; + + function LLVMIntPtrType + (arg_1 : in LLVM_Target.LLVMTargetDataRef) + return llvm.LLVMTypeRef; + + function LLVMSizeOfTypeInBits + (arg_2_1 : in LLVM_Target.LLVMTargetDataRef; + arg_2_2 : in llvm.LLVMTypeRef) + return Interfaces.C.Extensions.unsigned_long_long; + + function LLVMStoreSizeOfType + (arg_2_1 : in LLVM_Target.LLVMTargetDataRef; + arg_2_2 : in llvm.LLVMTypeRef) + return Interfaces.C.Extensions.unsigned_long_long; + + function LLVMABISizeOfType + (arg_2_1 : in LLVM_Target.LLVMTargetDataRef; + arg_2_2 : in llvm.LLVMTypeRef) + return Interfaces.C.Extensions.unsigned_long_long; + + function LLVMABIAlignmentOfType + (arg_2_1 : in LLVM_Target.LLVMTargetDataRef; + arg_2_2 : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + function LLVMCallFrameAlignmentOfType + (arg_2_1 : in LLVM_Target.LLVMTargetDataRef; + arg_2_2 : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + function LLVMPreferredAlignmentOfType + (arg_2_1 : in LLVM_Target.LLVMTargetDataRef; + arg_2_2 : in llvm.LLVMTypeRef) + return Interfaces.C.unsigned; + + function LLVMPreferredAlignmentOfGlobal + (arg_1 : in LLVM_Target.LLVMTargetDataRef; + GlobalVar : in llvm.LLVMValueRef) + return Interfaces.C.unsigned; + + function LLVMElementAtOffset + (arg_1 : in LLVM_Target.LLVMTargetDataRef; + StructTy : in llvm.LLVMTypeRef; + Offset : in Interfaces.C.Extensions.unsigned_long_long) + return Interfaces.C.unsigned; + + function LLVMOffsetOfElement + (arg_1 : in LLVM_Target.LLVMTargetDataRef; + StructTy : in llvm.LLVMTypeRef; + Element : in Interfaces.C.unsigned) + return Interfaces.C.Extensions.unsigned_long_long; + + procedure LLVMInvalidateStructLayout + (arg_1 : in LLVM_Target.LLVMTargetDataRef; + StructTy : in llvm.LLVMTypeRef); + + procedure LLVMDisposeTargetData + (arg_1 : in LLVM_Target.LLVMTargetDataRef); + +private + + pragma Import + (C, + LLVMInitializeAllTargets, + "Ada_LLVMInitializeAllTargets"); + pragma Import + (C, + LLVMInitializeNativeTarget, + "Ada_LLVMInitializeNativeTarget"); + pragma Import (C, LLVMCreateTargetData, "Ada_LLVMCreateTargetData"); + pragma Import (C, LLVMAddTargetData, "Ada_LLVMAddTargetData"); + pragma Import + (C, + LLVMCopyStringRepOfTargetData, + "Ada_LLVMCopyStringRepOfTargetData"); + pragma Import (C, LLVMByteOrder, "Ada_LLVMByteOrder"); + pragma Import (C, LLVMPointerSize, "Ada_LLVMPointerSize"); + pragma Import (C, LLVMIntPtrType, "Ada_LLVMIntPtrType"); + pragma Import (C, LLVMSizeOfTypeInBits, "Ada_LLVMSizeOfTypeInBits"); + pragma Import (C, LLVMStoreSizeOfType, "Ada_LLVMStoreSizeOfType"); + pragma Import (C, LLVMABISizeOfType, "Ada_LLVMABISizeOfType"); + pragma Import (C, LLVMABIAlignmentOfType, "Ada_LLVMABIAlignmentOfType"); + pragma Import + (C, + LLVMCallFrameAlignmentOfType, + "Ada_LLVMCallFrameAlignmentOfType"); + pragma Import + (C, + LLVMPreferredAlignmentOfType, + "Ada_LLVMPreferredAlignmentOfType"); + pragma Import + (C, + LLVMPreferredAlignmentOfGlobal, + "Ada_LLVMPreferredAlignmentOfGlobal"); + pragma Import (C, LLVMElementAtOffset, "Ada_LLVMElementAtOffset"); + pragma Import (C, LLVMOffsetOfElement, "Ada_LLVMOffsetOfElement"); + pragma Import + (C, + LLVMInvalidateStructLayout, + "Ada_LLVMInvalidateStructLayout"); + pragma Import (C, LLVMDisposeTargetData, "Ada_LLVMDisposeTargetData"); + +end LLVM_Target.Binding; diff --git a/bindings/ada/target/llvm_target.ads b/bindings/ada/target/llvm_target.ads new file mode 100644 index 0000000000000..11cb05d55b35d --- /dev/null +++ b/bindings/ada/target/llvm_target.ads @@ -0,0 +1,72 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with Interfaces.C.Extensions; + + +package LLVM_Target is + + -- LLVMOpaqueTargetData + -- + type LLVMOpaqueTargetData is new + Interfaces.C.Extensions.opaque_structure_def; + + type LLVMOpaqueTargetData_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_Target.LLVMOpaqueTargetData; + + type LLVMOpaqueTargetData_view is access all + LLVM_Target.LLVMOpaqueTargetData; + + -- LLVMTargetDataRef + -- + type LLVMTargetDataRef is access all LLVM_Target.LLVMOpaqueTargetData; + + type LLVMTargetDataRef_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_Target.LLVMTargetDataRef; + + type LLVMTargetDataRef_view is access all LLVM_Target.LLVMTargetDataRef; + + -- LLVMStructLayout + -- + type LLVMStructLayout is new Interfaces.C.Extensions.opaque_structure_def; + + type LLVMStructLayout_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_Target.LLVMStructLayout; + + type LLVMStructLayout_view is access all LLVM_Target.LLVMStructLayout; + + -- LLVMStructLayoutRef + -- + type LLVMStructLayoutRef is access all LLVM_Target.LLVMStructLayout; + + type LLVMStructLayoutRef_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_Target.LLVMStructLayoutRef; + + type LLVMStructLayoutRef_view is access all LLVM_Target.LLVMStructLayoutRef; + + -- TargetData + -- + type TargetData is new Interfaces.C.Extensions.incomplete_class_def; + + type TargetData_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_Target.TargetData; + + type TargetData_view is access all LLVM_Target.TargetData; + + -- LLVMByteOrdering + -- + type LLVMByteOrdering is new Interfaces.C.int; + + type LLVMByteOrdering_array is + array (Interfaces.C.size_t range <>) + of aliased LLVM_Target.LLVMByteOrdering; + + type LLVMByteOrdering_view is access all LLVM_Target.LLVMByteOrdering; + + +end LLVM_Target; diff --git a/bindings/ada/target/llvm_target_wrap.cxx b/bindings/ada/target/llvm_target_wrap.cxx new file mode 100644 index 0000000000000..16aca8a4379a8 --- /dev/null +++ b/bindings/ada/target/llvm_target_wrap.cxx @@ -0,0 +1,720 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.36 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + + +#ifdef __cplusplus +template class SwigValueWrapper { + T *tt; +public: + SwigValueWrapper() : tt(0) { } + SwigValueWrapper(const SwigValueWrapper& rhs) : tt(new T(*rhs.tt)) { } + SwigValueWrapper(const T& t) : tt(new T(t)) { } + ~SwigValueWrapper() { delete tt; } + SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; } + operator T&() const { return *tt; } + T *operator&() { return tt; } +private: + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); +}; + +template T SwigValueInit() { + return T(); +} +#endif + +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + + + +#include +#include +#include +#if defined(_WIN32) || defined(__CYGWIN32__) +# define DllExport __declspec( dllexport ) +# define SWIGSTDCALL __stdcall +#else +# define DllExport +# define SWIGSTDCALL +#endif + + +#ifdef __cplusplus +# include +#endif + + + + +/* Support for throwing Ada exceptions from C/C++ */ + +typedef enum +{ + SWIG_AdaException, + SWIG_AdaOutOfMemoryException, + SWIG_AdaIndexOutOfRangeException, + SWIG_AdaDivideByZeroException, + SWIG_AdaArgumentOutOfRangeException, + SWIG_AdaNullReferenceException +} SWIG_AdaExceptionCodes; + + +typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *); + + +typedef struct +{ + SWIG_AdaExceptionCodes code; + SWIG_AdaExceptionCallback_t callback; +} + SWIG_AdaExceptions_t; + + +static +SWIG_AdaExceptions_t +SWIG_ada_exceptions[] = +{ + { SWIG_AdaException, NULL }, + { SWIG_AdaOutOfMemoryException, NULL }, + { SWIG_AdaIndexOutOfRangeException, NULL }, + { SWIG_AdaDivideByZeroException, NULL }, + { SWIG_AdaArgumentOutOfRangeException, NULL }, + { SWIG_AdaNullReferenceException, NULL } +}; + + +static +void +SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) +{ + SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback; + if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) { + callback = SWIG_ada_exceptions[code].callback; + } + callback(msg); +} + + + +#ifdef __cplusplus +extern "C" +#endif + +DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Target (SWIG_AdaExceptionCallback_t systemException, + SWIG_AdaExceptionCallback_t outOfMemory, + SWIG_AdaExceptionCallback_t indexOutOfRange, + SWIG_AdaExceptionCallback_t divideByZero, + SWIG_AdaExceptionCallback_t argumentOutOfRange, + SWIG_AdaExceptionCallback_t nullReference) +{ + SWIG_ada_exceptions [SWIG_AdaException].callback = systemException; + SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback = outOfMemory; + SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback = indexOutOfRange; + SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback = divideByZero; + SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange; + SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback = nullReference; +} + + +/* Callback for returning strings to Ada without leaking memory */ + +typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *); +static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL; + + + +/* probably obsolete ... +#ifdef __cplusplus +extern "C" +#endif +DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Target(SWIG_AdaStringHelperCallback callback) { + SWIG_ada_string_callback = callback; +} +*/ + + + +/* Contract support */ + +#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else + + +#define protected public +#define private public + +#include "llvm-c/Target.h" + + + +// struct LLVMCtxt; + + +#undef protected +#undef private +#ifdef __cplusplus +extern "C" { +#endif +DllExport void SWIGSTDCALL Ada_LLVMInitializeAllTargets ( + ) +{ + LLVMInitializeAllTargets(); + + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMInitializeNativeTarget ( + ) +{ + int jresult ; + int result; + + result = (int)LLVMInitializeNativeTarget(); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMCreateTargetData ( + char * jarg1 + ) +{ + void * jresult ; + char *arg1 = (char *) 0 ; + LLVMTargetDataRef result; + + arg1 = jarg1; + + result = (LLVMTargetDataRef)LLVMCreateTargetData((char const *)arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddTargetData ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMPassManagerRef arg2 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMPassManagerRef)jarg2; + + LLVMAddTargetData(arg1,arg2); + + +} + + + +DllExport char * SWIGSTDCALL Ada_LLVMCopyStringRepOfTargetData ( + void * jarg1 + ) +{ + char * jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + char *result = 0 ; + + arg1 = (LLVMTargetDataRef)jarg1; + + result = (char *)LLVMCopyStringRepOfTargetData(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport int SWIGSTDCALL Ada_LLVMByteOrder ( + void * jarg1 + ) +{ + int jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMByteOrdering result; + + arg1 = (LLVMTargetDataRef)jarg1; + + result = (LLVMByteOrdering)LLVMByteOrder(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMPointerSize ( + void * jarg1 + ) +{ + unsigned int jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + unsigned int result; + + arg1 = (LLVMTargetDataRef)jarg1; + + result = (unsigned int)LLVMPointerSize(arg1); + jresult = result; + + + + return jresult; + +} + + + +DllExport void * SWIGSTDCALL Ada_LLVMIntPtrType ( + void * jarg1 + ) +{ + void * jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef result; + + arg1 = (LLVMTargetDataRef)jarg1; + + result = (LLVMTypeRef)LLVMIntPtrType(arg1); + jresult = (void *) result; + + + + return jresult; + +} + + + +DllExport unsigned long long SWIGSTDCALL Ada_LLVMSizeOfTypeInBits ( + void * jarg1 + , + + void * jarg2 + ) +{ + unsigned long long jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + unsigned long long result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (unsigned long long)LLVMSizeOfTypeInBits(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned long long SWIGSTDCALL Ada_LLVMStoreSizeOfType ( + void * jarg1 + , + + void * jarg2 + ) +{ + unsigned long long jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + unsigned long long result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (unsigned long long)LLVMStoreSizeOfType(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned long long SWIGSTDCALL Ada_LLVMABISizeOfType ( + void * jarg1 + , + + void * jarg2 + ) +{ + unsigned long long jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + unsigned long long result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (unsigned long long)LLVMABISizeOfType(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMABIAlignmentOfType ( + void * jarg1 + , + + void * jarg2 + ) +{ + unsigned int jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (unsigned int)LLVMABIAlignmentOfType(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMCallFrameAlignmentOfType ( + void * jarg1 + , + + void * jarg2 + ) +{ + unsigned int jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (unsigned int)LLVMCallFrameAlignmentOfType(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMPreferredAlignmentOfType ( + void * jarg1 + , + + void * jarg2 + ) +{ + unsigned int jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + unsigned int result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + result = (unsigned int)LLVMPreferredAlignmentOfType(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMPreferredAlignmentOfGlobal ( + void * jarg1 + , + + void * jarg2 + ) +{ + unsigned int jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMValueRef arg2 = (LLVMValueRef) 0 ; + unsigned int result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMValueRef)jarg2; + + result = (unsigned int)LLVMPreferredAlignmentOfGlobal(arg1,arg2); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned int SWIGSTDCALL Ada_LLVMElementAtOffset ( + void * jarg1 + , + + void * jarg2 + , + + unsigned long long jarg3 + ) +{ + unsigned int jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + unsigned long long arg3 ; + unsigned int result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + + arg3 = (unsigned long long) jarg3; + + + result = (unsigned int)LLVMElementAtOffset(arg1,arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport unsigned long long SWIGSTDCALL Ada_LLVMOffsetOfElement ( + void * jarg1 + , + + void * jarg2 + , + + unsigned int jarg3 + ) +{ + unsigned long long jresult ; + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + unsigned int arg3 ; + unsigned long long result; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + + arg3 = (unsigned int) jarg3; + + + result = (unsigned long long)LLVMOffsetOfElement(arg1,arg2,arg3); + jresult = result; + + + + return jresult; + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMInvalidateStructLayout ( + void * jarg1 + , + + void * jarg2 + ) +{ + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + LLVMTypeRef arg2 = (LLVMTypeRef) 0 ; + + arg1 = (LLVMTargetDataRef)jarg1; + + arg2 = (LLVMTypeRef)jarg2; + + LLVMInvalidateStructLayout(arg1,arg2); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMDisposeTargetData ( + void * jarg1 + ) +{ + LLVMTargetDataRef arg1 = (LLVMTargetDataRef) 0 ; + + arg1 = (LLVMTargetDataRef)jarg1; + + LLVMDisposeTargetData(arg1); + + +} + + + +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + diff --git a/bindings/ada/transforms/llvm_transforms-binding.ads b/bindings/ada/transforms/llvm_transforms-binding.ads new file mode 100644 index 0000000000000..2254b6eec2c3a --- /dev/null +++ b/bindings/ada/transforms/llvm_transforms-binding.ads @@ -0,0 +1,206 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +with llvm; + + +package LLVM_Transforms.Binding is + + procedure LLVMAddArgumentPromotionPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddConstantMergePass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddDeadArgEliminationPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddDeadTypeEliminationPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddFunctionAttrsPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddFunctionInliningPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddGlobalDCEPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddGlobalOptimizerPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddIPConstantPropagationPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddLowerSetJmpPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddPruneEHPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddRaiseAllocationsPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddStripDeadPrototypesPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddStripSymbolsPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddAggressiveDCEPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddCFGSimplificationPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddCondPropagationPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddDeadStoreEliminationPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddGVNPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddIndVarSimplifyPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddInstructionCombiningPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddJumpThreadingPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddLICMPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddLoopDeletionPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddLoopIndexSplitPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddLoopRotatePass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddLoopUnrollPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddLoopUnswitchPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddMemCpyOptPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddPromoteMemoryToRegisterPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddReassociatePass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddSCCPPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddScalarReplAggregatesPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddSimplifyLibCallsPass (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddTailCallEliminationPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddConstantPropagationPass + (PM : in llvm.LLVMPassManagerRef); + + procedure LLVMAddDemoteMemoryToRegisterPass + (PM : in llvm.LLVMPassManagerRef); + +private + + pragma Import + (C, + LLVMAddArgumentPromotionPass, + "Ada_LLVMAddArgumentPromotionPass"); + pragma Import + (C, + LLVMAddConstantMergePass, + "Ada_LLVMAddConstantMergePass"); + pragma Import + (C, + LLVMAddDeadArgEliminationPass, + "Ada_LLVMAddDeadArgEliminationPass"); + pragma Import + (C, + LLVMAddDeadTypeEliminationPass, + "Ada_LLVMAddDeadTypeEliminationPass"); + pragma Import + (C, + LLVMAddFunctionAttrsPass, + "Ada_LLVMAddFunctionAttrsPass"); + pragma Import + (C, + LLVMAddFunctionInliningPass, + "Ada_LLVMAddFunctionInliningPass"); + pragma Import (C, LLVMAddGlobalDCEPass, "Ada_LLVMAddGlobalDCEPass"); + pragma Import + (C, + LLVMAddGlobalOptimizerPass, + "Ada_LLVMAddGlobalOptimizerPass"); + pragma Import + (C, + LLVMAddIPConstantPropagationPass, + "Ada_LLVMAddIPConstantPropagationPass"); + pragma Import (C, LLVMAddLowerSetJmpPass, "Ada_LLVMAddLowerSetJmpPass"); + pragma Import (C, LLVMAddPruneEHPass, "Ada_LLVMAddPruneEHPass"); + pragma Import + (C, + LLVMAddRaiseAllocationsPass, + "Ada_LLVMAddRaiseAllocationsPass"); + pragma Import + (C, + LLVMAddStripDeadPrototypesPass, + "Ada_LLVMAddStripDeadPrototypesPass"); + pragma Import (C, LLVMAddStripSymbolsPass, "Ada_LLVMAddStripSymbolsPass"); + pragma Import + (C, + LLVMAddAggressiveDCEPass, + "Ada_LLVMAddAggressiveDCEPass"); + pragma Import + (C, + LLVMAddCFGSimplificationPass, + "Ada_LLVMAddCFGSimplificationPass"); + pragma Import + (C, + LLVMAddCondPropagationPass, + "Ada_LLVMAddCondPropagationPass"); + pragma Import + (C, + LLVMAddDeadStoreEliminationPass, + "Ada_LLVMAddDeadStoreEliminationPass"); + pragma Import (C, LLVMAddGVNPass, "Ada_LLVMAddGVNPass"); + pragma Import + (C, + LLVMAddIndVarSimplifyPass, + "Ada_LLVMAddIndVarSimplifyPass"); + pragma Import + (C, + LLVMAddInstructionCombiningPass, + "Ada_LLVMAddInstructionCombiningPass"); + pragma Import + (C, + LLVMAddJumpThreadingPass, + "Ada_LLVMAddJumpThreadingPass"); + pragma Import (C, LLVMAddLICMPass, "Ada_LLVMAddLICMPass"); + pragma Import (C, LLVMAddLoopDeletionPass, "Ada_LLVMAddLoopDeletionPass"); + pragma Import + (C, + LLVMAddLoopIndexSplitPass, + "Ada_LLVMAddLoopIndexSplitPass"); + pragma Import (C, LLVMAddLoopRotatePass, "Ada_LLVMAddLoopRotatePass"); + pragma Import (C, LLVMAddLoopUnrollPass, "Ada_LLVMAddLoopUnrollPass"); + pragma Import (C, LLVMAddLoopUnswitchPass, "Ada_LLVMAddLoopUnswitchPass"); + pragma Import (C, LLVMAddMemCpyOptPass, "Ada_LLVMAddMemCpyOptPass"); + pragma Import + (C, + LLVMAddPromoteMemoryToRegisterPass, + "Ada_LLVMAddPromoteMemoryToRegisterPass"); + pragma Import (C, LLVMAddReassociatePass, "Ada_LLVMAddReassociatePass"); + pragma Import (C, LLVMAddSCCPPass, "Ada_LLVMAddSCCPPass"); + pragma Import + (C, + LLVMAddScalarReplAggregatesPass, + "Ada_LLVMAddScalarReplAggregatesPass"); + pragma Import + (C, + LLVMAddSimplifyLibCallsPass, + "Ada_LLVMAddSimplifyLibCallsPass"); + pragma Import + (C, + LLVMAddTailCallEliminationPass, + "Ada_LLVMAddTailCallEliminationPass"); + pragma Import + (C, + LLVMAddConstantPropagationPass, + "Ada_LLVMAddConstantPropagationPass"); + pragma Import + (C, + LLVMAddDemoteMemoryToRegisterPass, + "Ada_LLVMAddDemoteMemoryToRegisterPass"); + +end LLVM_Transforms.Binding; diff --git a/bindings/ada/transforms/llvm_transforms.ads b/bindings/ada/transforms/llvm_transforms.ads new file mode 100644 index 0000000000000..4f37aafe805c6 --- /dev/null +++ b/bindings/ada/transforms/llvm_transforms.ads @@ -0,0 +1,6 @@ +-- This file is generated by SWIG. Do *not* modify by hand. +-- + +package LLVM_Transforms is + +end LLVM_Transforms; diff --git a/bindings/ada/transforms/llvm_transforms_wrap.cxx b/bindings/ada/transforms/llvm_transforms_wrap.cxx new file mode 100644 index 0000000000000..8cb04db791aa8 --- /dev/null +++ b/bindings/ada/transforms/llvm_transforms_wrap.cxx @@ -0,0 +1,828 @@ +/* ---------------------------------------------------------------------------- + * This file was automatically generated by SWIG (http://www.swig.org). + * Version 1.3.36 + * + * This file is not intended to be easily readable and contains a number of + * coding conventions designed to improve portability and efficiency. Do not make + * changes to this file unless you know what you are doing--modify the SWIG + * interface file instead. + * ----------------------------------------------------------------------------- */ + + +#ifdef __cplusplus +template class SwigValueWrapper { + T *tt; +public: + SwigValueWrapper() : tt(0) { } + SwigValueWrapper(const SwigValueWrapper& rhs) : tt(new T(*rhs.tt)) { } + SwigValueWrapper(const T& t) : tt(new T(t)) { } + ~SwigValueWrapper() { delete tt; } + SwigValueWrapper& operator=(const T& t) { delete tt; tt = new T(t); return *this; } + operator T&() const { return *tt; } + T *operator&() { return tt; } +private: + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); +}; + +template T SwigValueInit() { + return T(); +} +#endif + +/* ----------------------------------------------------------------------------- + * This section contains generic SWIG labels for method/variable + * declarations/attributes, and other compiler dependent labels. + * ----------------------------------------------------------------------------- */ + +/* template workaround for compilers that cannot correctly implement the C++ standard */ +#ifndef SWIGTEMPLATEDISAMBIGUATOR +# if defined(__SUNPRO_CC) && (__SUNPRO_CC <= 0x560) +# define SWIGTEMPLATEDISAMBIGUATOR template +# elif defined(__HP_aCC) +/* Needed even with `aCC -AA' when `aCC -V' reports HP ANSI C++ B3910B A.03.55 */ +/* If we find a maximum version that requires this, the test would be __HP_aCC <= 35500 for A.03.55 */ +# define SWIGTEMPLATEDISAMBIGUATOR template +# else +# define SWIGTEMPLATEDISAMBIGUATOR +# endif +#endif + +/* inline attribute */ +#ifndef SWIGINLINE +# if defined(__cplusplus) || (defined(__GNUC__) && !defined(__STRICT_ANSI__)) +# define SWIGINLINE inline +# else +# define SWIGINLINE +# endif +#endif + +/* attribute recognised by some compilers to avoid 'unused' warnings */ +#ifndef SWIGUNUSED +# if defined(__GNUC__) +# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +# elif defined(__ICC) +# define SWIGUNUSED __attribute__ ((__unused__)) +# else +# define SWIGUNUSED +# endif +#endif + +#ifndef SWIGUNUSEDPARM +# ifdef __cplusplus +# define SWIGUNUSEDPARM(p) +# else +# define SWIGUNUSEDPARM(p) p SWIGUNUSED +# endif +#endif + +/* internal SWIG method */ +#ifndef SWIGINTERN +# define SWIGINTERN static SWIGUNUSED +#endif + +/* internal inline SWIG method */ +#ifndef SWIGINTERNINLINE +# define SWIGINTERNINLINE SWIGINTERN SWIGINLINE +#endif + +/* exporting methods */ +#if (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# ifndef GCC_HASCLASSVISIBILITY +# define GCC_HASCLASSVISIBILITY +# endif +#endif + +#ifndef SWIGEXPORT +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# if defined(STATIC_LINKED) +# define SWIGEXPORT +# else +# define SWIGEXPORT __declspec(dllexport) +# endif +# else +# if defined(__GNUC__) && defined(GCC_HASCLASSVISIBILITY) +# define SWIGEXPORT __attribute__ ((visibility("default"))) +# else +# define SWIGEXPORT +# endif +# endif +#endif + +/* calling conventions for Windows */ +#ifndef SWIGSTDCALL +# if defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) +# define SWIGSTDCALL __stdcall +# else +# define SWIGSTDCALL +# endif +#endif + +/* Deal with Microsoft's attempt at deprecating C standard runtime functions */ +#if !defined(SWIG_NO_CRT_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_CRT_SECURE_NO_DEPRECATE) +# define _CRT_SECURE_NO_DEPRECATE +#endif + +/* Deal with Microsoft's attempt at deprecating methods in the standard C++ library */ +#if !defined(SWIG_NO_SCL_SECURE_NO_DEPRECATE) && defined(_MSC_VER) && !defined(_SCL_SECURE_NO_DEPRECATE) +# define _SCL_SECURE_NO_DEPRECATE +#endif + + + +#include +#include +#include +#if defined(_WIN32) || defined(__CYGWIN32__) +# define DllExport __declspec( dllexport ) +# define SWIGSTDCALL __stdcall +#else +# define DllExport +# define SWIGSTDCALL +#endif + + +#ifdef __cplusplus +# include +#endif + + + + +/* Support for throwing Ada exceptions from C/C++ */ + +typedef enum +{ + SWIG_AdaException, + SWIG_AdaOutOfMemoryException, + SWIG_AdaIndexOutOfRangeException, + SWIG_AdaDivideByZeroException, + SWIG_AdaArgumentOutOfRangeException, + SWIG_AdaNullReferenceException +} SWIG_AdaExceptionCodes; + + +typedef void (SWIGSTDCALL* SWIG_AdaExceptionCallback_t)(const char *); + + +typedef struct +{ + SWIG_AdaExceptionCodes code; + SWIG_AdaExceptionCallback_t callback; +} + SWIG_AdaExceptions_t; + + +static +SWIG_AdaExceptions_t +SWIG_ada_exceptions[] = +{ + { SWIG_AdaException, NULL }, + { SWIG_AdaOutOfMemoryException, NULL }, + { SWIG_AdaIndexOutOfRangeException, NULL }, + { SWIG_AdaDivideByZeroException, NULL }, + { SWIG_AdaArgumentOutOfRangeException, NULL }, + { SWIG_AdaNullReferenceException, NULL } +}; + + +static +void +SWIG_AdaThrowException (SWIG_AdaExceptionCodes code, const char *msg) +{ + SWIG_AdaExceptionCallback_t callback = SWIG_ada_exceptions[SWIG_AdaException].callback; + if (code >=0 && (size_t)code < sizeof(SWIG_ada_exceptions)/sizeof(SWIG_AdaExceptions_t)) { + callback = SWIG_ada_exceptions[code].callback; + } + callback(msg); +} + + + +#ifdef __cplusplus +extern "C" +#endif + +DllExport void SWIGSTDCALL SWIGRegisterExceptionCallbacks_LLVM_Transforms (SWIG_AdaExceptionCallback_t systemException, + SWIG_AdaExceptionCallback_t outOfMemory, + SWIG_AdaExceptionCallback_t indexOutOfRange, + SWIG_AdaExceptionCallback_t divideByZero, + SWIG_AdaExceptionCallback_t argumentOutOfRange, + SWIG_AdaExceptionCallback_t nullReference) +{ + SWIG_ada_exceptions [SWIG_AdaException].callback = systemException; + SWIG_ada_exceptions [SWIG_AdaOutOfMemoryException].callback = outOfMemory; + SWIG_ada_exceptions [SWIG_AdaIndexOutOfRangeException].callback = indexOutOfRange; + SWIG_ada_exceptions [SWIG_AdaDivideByZeroException].callback = divideByZero; + SWIG_ada_exceptions [SWIG_AdaArgumentOutOfRangeException].callback = argumentOutOfRange; + SWIG_ada_exceptions [SWIG_AdaNullReferenceException].callback = nullReference; +} + + +/* Callback for returning strings to Ada without leaking memory */ + +typedef char * (SWIGSTDCALL* SWIG_AdaStringHelperCallback)(const char *); +static SWIG_AdaStringHelperCallback SWIG_ada_string_callback = NULL; + + + +/* probably obsolete ... +#ifdef __cplusplus +extern "C" +#endif +DllExport void SWIGSTDCALL SWIGRegisterStringCallback_LLVM_Transforms(SWIG_AdaStringHelperCallback callback) { + SWIG_ada_string_callback = callback; +} +*/ + + + +/* Contract support */ + +#define SWIG_contract_assert(nullreturn, expr, msg) if (!(expr)) {SWIG_AdaThrowException(SWIG_AdaArgumentOutOfRangeException, msg); return nullreturn; } else + + +#define protected public +#define private public + +#include "llvm-c/Transforms/IPO.h" +#include "llvm-c/Transforms/Scalar.h" + + + +// struct LLVMCtxt; + + +#undef protected +#undef private +#ifdef __cplusplus +extern "C" { +#endif +DllExport void SWIGSTDCALL Ada_LLVMAddArgumentPromotionPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddArgumentPromotionPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddConstantMergePass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddConstantMergePass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddDeadArgEliminationPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddDeadArgEliminationPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddDeadTypeEliminationPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddDeadTypeEliminationPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddFunctionAttrsPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddFunctionAttrsPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddFunctionInliningPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddFunctionInliningPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddGlobalDCEPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddGlobalDCEPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddGlobalOptimizerPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddGlobalOptimizerPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddIPConstantPropagationPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddIPConstantPropagationPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddLowerSetJmpPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddLowerSetJmpPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddPruneEHPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddPruneEHPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddRaiseAllocationsPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddRaiseAllocationsPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddStripDeadPrototypesPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddStripDeadPrototypesPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddStripSymbolsPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddStripSymbolsPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddAggressiveDCEPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddAggressiveDCEPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddCFGSimplificationPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddCFGSimplificationPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddCondPropagationPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddCondPropagationPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddDeadStoreEliminationPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddDeadStoreEliminationPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddGVNPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddGVNPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddIndVarSimplifyPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddIndVarSimplifyPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddInstructionCombiningPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddInstructionCombiningPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddJumpThreadingPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddJumpThreadingPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddLICMPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddLICMPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddLoopDeletionPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddLoopDeletionPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddLoopIndexSplitPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddLoopIndexSplitPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddLoopRotatePass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddLoopRotatePass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddLoopUnrollPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddLoopUnrollPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddLoopUnswitchPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddLoopUnswitchPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddMemCpyOptPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddMemCpyOptPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddPromoteMemoryToRegisterPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddPromoteMemoryToRegisterPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddReassociatePass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddReassociatePass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddSCCPPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddSCCPPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddScalarReplAggregatesPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddScalarReplAggregatesPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddSimplifyLibCallsPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddSimplifyLibCallsPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddTailCallEliminationPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddTailCallEliminationPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddConstantPropagationPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddConstantPropagationPass(arg1); + + +} + + + +DllExport void SWIGSTDCALL Ada_LLVMAddDemoteMemoryToRegisterPass ( + void * jarg1 + ) +{ + LLVMPassManagerRef arg1 = (LLVMPassManagerRef) 0 ; + + arg1 = (LLVMPassManagerRef)jarg1; + + LLVMAddDemoteMemoryToRegisterPass(arg1); + + +} + + + +#ifdef __cplusplus +} +#endif +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif + diff --git a/bindings/ocaml/Makefile.ocaml b/bindings/ocaml/Makefile.ocaml index d98a489f7c424..c46f6027cf634 100644 --- a/bindings/ocaml/Makefile.ocaml +++ b/bindings/ocaml/Makefile.ocaml @@ -31,13 +31,16 @@ PROJ_libocamldir := $(DESTDIR)$(OCAML_LIBDIR) OcamlDir := $(LibDir)/ocaml # Info from llvm-config and similar +ifndef IS_CLEANING_TARGET ifdef UsedComponents UsedLibs = $(shell $(LLVM_CONFIG) --libs $(UsedComponents)) UsedLibNames = $(shell $(LLVM_CONFIG) --libnames $(UsedComponents)) endif +endif # Tools OCAMLCFLAGS += -I $(ObjDir) -I $(OcamlDir) +ifndef IS_CLEANING_TARGET ifneq ($(ObjectsO),) OCAMLAFLAGS += $(patsubst %,-cclib %, \ $(filter-out -L$(LibDir),-l$(LIBRARYNAME) \ @@ -48,6 +51,7 @@ OCAMLAFLAGS += $(patsubst %,-cclib %, \ $(filter-out -L$(LibDir),$(shell $(LLVM_CONFIG) --ldflags)) \ $(UsedLibs)) endif +endif # -g was introduced in 3.10.0. #ifneq ($(ENABLE_OPTIMIZED),1) diff --git a/bindings/ocaml/bitreader/bitreader_ocaml.c b/bindings/ocaml/bitreader/bitreader_ocaml.c index 0fd484f123435..5fd9f854d9da2 100644 --- a/bindings/ocaml/bitreader/bitreader_ocaml.c +++ b/bindings/ocaml/bitreader/bitreader_ocaml.c @@ -45,27 +45,29 @@ static void llvm_raise(value Prototype, char *Message) { /*===-- Modules -----------------------------------------------------------===*/ -/* Llvm.llmemorybuffer -> Llvm.module */ -CAMLprim value llvm_get_module_provider(LLVMMemoryBufferRef MemBuf) { +/* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */ +CAMLprim value llvm_get_module_provider(LLVMContextRef C, + LLVMMemoryBufferRef MemBuf) { CAMLparam0(); CAMLlocal2(Variant, MessageVal); char *Message; LLVMModuleProviderRef MP; - if (LLVMGetBitcodeModuleProvider(MemBuf, &MP, &Message)) + if (LLVMGetBitcodeModuleProviderInContext(C, MemBuf, &MP, &Message)) llvm_raise(llvm_bitreader_error_exn, Message); CAMLreturn((value) MemBuf); } -/* Llvm.llmemorybuffer -> Llvm.llmodule */ -CAMLprim value llvm_parse_bitcode(LLVMMemoryBufferRef MemBuf) { +/* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */ +CAMLprim value llvm_parse_bitcode(LLVMContextRef C, + LLVMMemoryBufferRef MemBuf) { CAMLparam0(); CAMLlocal2(Variant, MessageVal); LLVMModuleRef M; char *Message; - if (LLVMParseBitcode(MemBuf, &M, &Message)) + if (LLVMParseBitcodeInContext(C, MemBuf, &M, &Message)) llvm_raise(llvm_bitreader_error_exn, Message); CAMLreturn((value) M); diff --git a/bindings/ocaml/bitreader/llvm_bitreader.ml b/bindings/ocaml/bitreader/llvm_bitreader.ml index 816e1565526c3..88587cbe1ef94 100644 --- a/bindings/ocaml/bitreader/llvm_bitreader.ml +++ b/bindings/ocaml/bitreader/llvm_bitreader.ml @@ -13,7 +13,9 @@ exception Error of string external register_exns : exn -> unit = "llvm_register_bitreader_exns" let _ = register_exns (Error "") -external get_module_provider : Llvm.llmemorybuffer -> Llvm.llmoduleprovider +external get_module_provider : Llvm.llcontext -> Llvm.llmemorybuffer -> + Llvm.llmoduleprovider = "llvm_get_module_provider" -external parse_bitcode : Llvm.llmemorybuffer -> Llvm.llmodule + +external parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule = "llvm_parse_bitcode" diff --git a/bindings/ocaml/bitreader/llvm_bitreader.mli b/bindings/ocaml/bitreader/llvm_bitreader.mli index 15b389bb83c75..5648b35fee223 100644 --- a/bindings/ocaml/bitreader/llvm_bitreader.mli +++ b/bindings/ocaml/bitreader/llvm_bitreader.mli @@ -14,16 +14,18 @@ exception Error of string -(** [read_bitcode_file path] reads the bitcode for a new module [m] from the - file at [path]. Returns [Success m] if successful, and [Failure msg] - otherwise, where [msg] is a description of the error encountered. - See the function [llvm::getBitcodeModuleProvider]. *) -external get_module_provider : Llvm.llmemorybuffer -> Llvm.llmoduleprovider +(** [get_module_provider context mb] reads the bitcode for a new + module provider [m] from the memory buffer [mb] in the context [context]. + Returns [m] if successful, or raises [Error msg] otherwise, where [msg] is a + description of the error encountered. See the function + [llvm::getBitcodeModuleProvider]. *) +external get_module_provider : Llvm.llcontext -> Llvm.llmemorybuffer -> + Llvm.llmoduleprovider = "llvm_get_module_provider" -(** [parse_bitcode mb] parses the bitcode for a new module [m] from the memory - buffer [mb]. Returns [Success m] if successful, and [Failure msg] otherwise, - where [msg] is a description of the error encountered. - See the function [llvm::ParseBitcodeFile]. *) -external parse_bitcode : Llvm.llmemorybuffer -> Llvm.llmodule +(** [parse_bitcode context mb] parses the bitcode for a new module [m] from the + memory buffer [mb] in the context [context]. Returns [m] if successful, or + raises [Error msg] otherwise, where [msg] is a description of the error + encountered. See the function [llvm::ParseBitcodeFile]. *) +external parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule = "llvm_parse_bitcode" diff --git a/bindings/ocaml/executionengine/executionengine_ocaml.c b/bindings/ocaml/executionengine/executionengine_ocaml.c index 647759fb07442..072d583bf8fb6 100644 --- a/bindings/ocaml/executionengine/executionengine_ocaml.c +++ b/bindings/ocaml/executionengine/executionengine_ocaml.c @@ -24,11 +24,15 @@ #include #include -/* Force the LLVM interpreter, JIT, and native target to be linked in. */ +/* Force the LLVM interpreter and JIT to be linked in. */ void llvm_initialize(void) { LLVMLinkInInterpreter(); LLVMLinkInJIT(); - LLVMInitializeNativeTarget(); +} + +/* unit -> bool */ +CAMLprim value llvm_initialize_native_target(value Unit) { + return Val_bool(LLVMInitializeNativeTarget()); } /* Can't use the recommended caml_named_value mechanism for backwards diff --git a/bindings/ocaml/executionengine/llvm_executionengine.ml b/bindings/ocaml/executionengine/llvm_executionengine.ml index cf9acc7cb6b82..c9e8f18b22409 100644 --- a/bindings/ocaml/executionengine/llvm_executionengine.ml +++ b/bindings/ocaml/executionengine/llvm_executionengine.ml @@ -109,3 +109,6 @@ module ExecutionEngine = struct *) end + +external initialize_native_target : unit -> bool + = "llvm_initialize_native_target" diff --git a/bindings/ocaml/executionengine/llvm_executionengine.mli b/bindings/ocaml/executionengine/llvm_executionengine.mli index 17da1dffe556b..6c2fdfb7868c4 100644 --- a/bindings/ocaml/executionengine/llvm_executionengine.mli +++ b/bindings/ocaml/executionengine/llvm_executionengine.mli @@ -89,14 +89,14 @@ module ExecutionEngine: sig module provider [mp] if successful. Creates a JIT if possible, else falls back to an interpreter. Raises [Error msg] if an error occurrs. The execution engine is not garbage collected and must be destroyed with - [dispose ee]. See the function [llvm::ExecutionEngine::create]. *) + [dispose ee]. See the function [llvm::EngineBuilder::create]. *) val create: Llvm.llmoduleprovider -> t (** [create_interpreter mp] creates a new interpreter, taking ownership of the module provider [mp] if successful. Raises [Error msg] if an error occurrs. The execution engine is not garbage collected and must be destroyed with [dispose ee]. - See the function [llvm::ExecutionEngine::create]. *) + See the function [llvm::EngineBuilder::create]. *) val create_interpreter: Llvm.llmoduleprovider -> t (** [create_jit mp] creates a new JIT (just-in-time compiler), taking @@ -104,7 +104,7 @@ module ExecutionEngine: sig a JIT which favors code quality over compilation speed. Raises [Error msg] if an error occurrs. The execution engine is not garbage collected and must be destroyed with [dispose ee]. - See the function [llvm::ExecutionEngine::create]. *) + See the function [llvm::EngineBuilder::create]. *) val create_jit: Llvm.llmoduleprovider -> t (** [create_fast_jit mp] creates a new JIT (just-in-time compiler) which @@ -112,7 +112,7 @@ module ExecutionEngine: sig module provider [mp] if successful. Raises [Error msg] if an error occurrs. The execution engine is not garbage collected and must be destroyed with [dispose ee]. - See the function [llvm::ExecutionEngine::create]. *) + See the function [llvm::EngineBuilder::create]. *) val create_fast_jit: Llvm.llmoduleprovider -> t (** [dispose ee] releases the memory used by the execution engine and must be @@ -161,3 +161,6 @@ module ExecutionEngine: sig [ee]. *) val target_data: t -> Llvm_target.TargetData.t end + +external initialize_native_target : unit -> bool + = "llvm_initialize_native_target" diff --git a/bindings/ocaml/llvm/Makefile b/bindings/ocaml/llvm/Makefile index cd974d482011e..99e347bc13129 100644 --- a/bindings/ocaml/llvm/Makefile +++ b/bindings/ocaml/llvm/Makefile @@ -1,4 +1,4 @@ -##===- bindings/ocaml/bitwriter/Makefile -------------------*- Makefile -*-===## +##===- bindings/ocaml/llvm/Makefile ------------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml index 49975650a592b..37d0fd7c85f44 100644 --- a/bindings/ocaml/llvm/llvm.ml +++ b/bindings/ocaml/llvm/llvm.ml @@ -8,6 +8,7 @@ *===----------------------------------------------------------------------===*) +type llcontext type llmodule type lltype type lltypehandle @@ -26,7 +27,6 @@ module TypeKind = struct | Fp128 | Ppc_fp128 | Label - | Metadata | Integer | Function | Struct @@ -34,6 +34,7 @@ module TypeKind = struct | Pointer | Opaque | Vector + | Metadata end module Linkage = struct @@ -41,13 +42,18 @@ module Linkage = struct | External | Available_externally | Link_once + | Link_once_odr | Weak + | Weak_odr | Appending | Internal + | Private | Dllimport | Dllexport | External_weak | Ghost + | Common + | Linker_private end module Visibility = struct @@ -78,6 +84,16 @@ module Attribute = struct | Nest | Readnone | Readonly + | Noinline + | Alwaysinline + | Optsize + | Ssp + | Sspreq + | Nocapture + | Noredzone + | Noimplicitfloat + | Naked + | Inlinehint end module Icmp = struct @@ -127,10 +143,13 @@ type ('a, 'b) llrev_pos = | At_start of 'a | After of 'b +(*===-- Contexts ----------------------------------------------------------===*) +external create_context : unit -> llcontext = "llvm_create_context" +external dispose_context : unit -> llcontext = "llvm_dispose_context" +external global_context : unit -> llcontext = "llvm_global_context" (*===-- Modules -----------------------------------------------------------===*) - -external create_module : string -> llmodule = "llvm_create_module" +external create_module : llcontext -> string -> llmodule = "llvm_create_module" external dispose_module : llmodule -> unit = "llvm_dispose_module" external target_triple: llmodule -> string = "llvm_target_triple" @@ -147,37 +166,25 @@ external delete_type_name : string -> llmodule -> unit external dump_module : llmodule -> unit = "llvm_dump_module" (*===-- Types -------------------------------------------------------------===*) - external classify_type : lltype -> TypeKind.t = "llvm_classify_type" +external type_context : lltype -> llcontext = "llvm_type_context" (*--... Operations on integer types ........................................--*) -external _i1_type : unit -> lltype = "llvm_i1_type" -external _i8_type : unit -> lltype = "llvm_i8_type" -external _i16_type : unit -> lltype = "llvm_i16_type" -external _i32_type : unit -> lltype = "llvm_i32_type" -external _i64_type : unit -> lltype = "llvm_i64_type" - -let i1_type = _i1_type () -let i8_type = _i8_type () -let i16_type = _i16_type () -let i32_type = _i32_type () -let i64_type = _i64_type () - -external integer_type : int -> lltype = "llvm_integer_type" +external i1_type : llcontext -> lltype = "llvm_i1_type" +external i8_type : llcontext -> lltype = "llvm_i8_type" +external i16_type : llcontext -> lltype = "llvm_i16_type" +external i32_type : llcontext -> lltype = "llvm_i32_type" +external i64_type : llcontext -> lltype = "llvm_i64_type" + +external integer_type : llcontext -> int -> lltype = "llvm_integer_type" external integer_bitwidth : lltype -> int = "llvm_integer_bitwidth" (*--... Operations on real types ...........................................--*) -external _float_type : unit -> lltype = "llvm_float_type" -external _double_type : unit -> lltype = "llvm_double_type" -external _x86fp80_type : unit -> lltype = "llvm_x86fp80_type" -external _fp128_type : unit -> lltype = "llvm_fp128_type" -external _ppc_fp128_type : unit -> lltype = "llvm_ppc_fp128_type" - -let float_type = _float_type () -let double_type = _double_type () -let x86fp80_type = _x86fp80_type () -let fp128_type = _fp128_type () -let ppc_fp128_type = _ppc_fp128_type () +external float_type : llcontext -> lltype = "llvm_float_type" +external double_type : llcontext -> lltype = "llvm_double_type" +external x86fp80_type : llcontext -> lltype = "llvm_x86fp80_type" +external fp128_type : llcontext -> lltype = "llvm_fp128_type" +external ppc_fp128_type : llcontext -> lltype = "llvm_ppc_fp128_type" (*--... Operations on function types .......................................--*) external function_type : lltype -> lltype array -> lltype = "llvm_function_type" @@ -188,8 +195,9 @@ external return_type : lltype -> lltype = "LLVMGetReturnType" external param_types : lltype -> lltype array = "llvm_param_types" (*--... Operations on struct types .........................................--*) -external struct_type : lltype array -> lltype = "llvm_struct_type" -external packed_struct_type : lltype array -> lltype = "llvm_packed_struct_type" +external struct_type : llcontext -> lltype array -> lltype = "llvm_struct_type" +external packed_struct_type : llcontext -> lltype array -> lltype + = "llvm_packed_struct_type" external element_types : lltype -> lltype array = "llvm_element_types" external is_packed : lltype -> bool = "llvm_is_packed" @@ -206,12 +214,9 @@ external address_space : lltype -> int = "llvm_address_space" external vector_size : lltype -> int = "llvm_vector_size" (*--... Operations on other types ..........................................--*) -external opaque_type : unit -> lltype = "llvm_opaque_type" -external _void_type : unit -> lltype = "llvm_void_type" -external _label_type : unit -> lltype = "llvm_label_type" - -let void_type = _void_type () -let label_type = _label_type () +external opaque_type : llcontext -> lltype = "llvm_opaque_type" +external void_type : llcontext -> lltype = "llvm_void_type" +external label_type : llcontext -> lltype = "llvm_label_type" (*--... Operations on type handles .........................................--*) external handle_to_type : lltype -> lltypehandle = "llvm_handle_to_type" @@ -220,7 +225,6 @@ external refine_type : lltype -> lltype -> unit = "llvm_refine_type" (*===-- Values ------------------------------------------------------------===*) - external type_of : llvalue -> lltype = "llvm_type_of" external value_name : llvalue -> string = "llvm_value_name" external set_value_name : string -> llvalue -> unit = "llvm_set_value_name" @@ -238,14 +242,19 @@ external is_undef : llvalue -> bool = "llvm_is_undef" external const_int : lltype -> int -> llvalue = "llvm_const_int" external const_of_int64 : lltype -> Int64.t -> bool -> llvalue = "llvm_const_of_int64" +external const_int_of_string : lltype -> string -> int -> llvalue + = "llvm_const_int_of_string" external const_float : lltype -> float -> llvalue = "llvm_const_float" +external const_float_of_string : lltype -> string -> llvalue + = "llvm_const_float_of_string" (*--... Operations on composite constants ..................................--*) -external const_string : string -> llvalue = "llvm_const_string" -external const_stringz : string -> llvalue = "llvm_const_stringz" +external const_string : llcontext -> string -> llvalue = "llvm_const_string" +external const_stringz : llcontext -> string -> llvalue = "llvm_const_stringz" external const_array : lltype -> llvalue array -> llvalue = "llvm_const_array" -external const_struct : llvalue array -> llvalue = "llvm_const_struct" -external const_packed_struct : llvalue array -> llvalue +external const_struct : llcontext -> llvalue array -> llvalue + = "llvm_const_struct" +external const_packed_struct : llcontext -> llvalue array -> llvalue = "llvm_const_packed_struct" external const_vector : llvalue array -> llvalue = "llvm_const_vector" @@ -253,12 +262,18 @@ external const_vector : llvalue array -> llvalue = "llvm_const_vector" external align_of : lltype -> llvalue = "LLVMAlignOf" external size_of : lltype -> llvalue = "LLVMSizeOf" external const_neg : llvalue -> llvalue = "LLVMConstNeg" +external const_fneg : llvalue -> llvalue = "LLVMConstFNeg" external const_not : llvalue -> llvalue = "LLVMConstNot" external const_add : llvalue -> llvalue -> llvalue = "LLVMConstAdd" +external const_nsw_add : llvalue -> llvalue -> llvalue = "LLVMConstNSWAdd" +external const_fadd : llvalue -> llvalue -> llvalue = "LLVMConstFAdd" external const_sub : llvalue -> llvalue -> llvalue = "LLVMConstSub" +external const_fsub : llvalue -> llvalue -> llvalue = "LLVMConstFSub" external const_mul : llvalue -> llvalue -> llvalue = "LLVMConstMul" +external const_fmul : llvalue -> llvalue -> llvalue = "LLVMConstFMul" external const_udiv : llvalue -> llvalue -> llvalue = "LLVMConstUDiv" external const_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstSDiv" +external const_exact_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstExactSDiv" external const_fdiv : llvalue -> llvalue -> llvalue = "LLVMConstFDiv" external const_urem : llvalue -> llvalue -> llvalue = "LLVMConstURem" external const_srem : llvalue -> llvalue -> llvalue = "LLVMConstSRem" @@ -274,6 +289,8 @@ external const_shl : llvalue -> llvalue -> llvalue = "LLVMConstShl" external const_lshr : llvalue -> llvalue -> llvalue = "LLVMConstLShr" external const_ashr : llvalue -> llvalue -> llvalue = "LLVMConstAShr" external const_gep : llvalue -> llvalue array -> llvalue = "llvm_const_gep" +external const_in_bounds_gep : llvalue -> llvalue array -> llvalue + = "llvm_const_in_bounds_gep" external const_trunc : llvalue -> lltype -> llvalue = "LLVMConstTrunc" external const_sext : llvalue -> lltype -> llvalue = "LLVMConstSExt" external const_zext : llvalue -> lltype -> llvalue = "LLVMConstZExt" @@ -286,6 +303,16 @@ external const_fptosi : llvalue -> lltype -> llvalue = "LLVMConstFPToSI" external const_ptrtoint : llvalue -> lltype -> llvalue = "LLVMConstPtrToInt" external const_inttoptr : llvalue -> lltype -> llvalue = "LLVMConstIntToPtr" external const_bitcast : llvalue -> lltype -> llvalue = "LLVMConstBitCast" +external const_zext_or_bitcast : llvalue -> lltype -> llvalue + = "LLVMConstZExtOrBitCast" +external const_sext_or_bitcast : llvalue -> lltype -> llvalue + = "LLVMConstSExtOrBitCast" +external const_trunc_or_bitcast : llvalue -> lltype -> llvalue + = "LLVMConstTruncOrBitCast" +external const_pointercast : llvalue -> lltype -> llvalue + = "LLVMConstPointerCast" +external const_intcast : llvalue -> lltype -> llvalue = "LLVMConstIntCast" +external const_fpcast : llvalue -> lltype -> llvalue = "LLVMConstFPCast" external const_select : llvalue -> llvalue -> llvalue -> llvalue = "LLVMConstSelect" external const_extractelement : llvalue -> llvalue -> llvalue @@ -294,6 +321,10 @@ external const_insertelement : llvalue -> llvalue -> llvalue -> llvalue = "LLVMConstInsertElement" external const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue = "LLVMConstShuffleVector" +external const_extractvalue : llvalue -> int array -> llvalue + = "llvm_const_extractvalue" +external const_insertvalue : llvalue -> llvalue -> int array -> llvalue + = "llvm_const_insertvalue" (*--... Operations on global variables, functions, and aliases (globals) ...--*) external global_parent : llvalue -> llmodule = "LLVMGetGlobalParent" @@ -504,8 +535,9 @@ external block_parent : llbasicblock -> llvalue = "LLVMGetBasicBlockParent" external basic_blocks : llvalue -> llbasicblock array = "llvm_basic_blocks" external entry_block : llvalue -> llbasicblock = "LLVMGetEntryBasicBlock" external delete_block : llbasicblock -> unit = "llvm_delete_block" -external append_block : string -> llvalue -> llbasicblock = "llvm_append_block" -external insert_block : string -> llbasicblock -> llbasicblock +external append_block : llcontext -> string -> llvalue -> llbasicblock + = "llvm_append_block" +external insert_block : llcontext -> string -> llbasicblock -> llbasicblock = "llvm_insert_block" external block_begin : llvalue -> (llvalue, llbasicblock) llpos = "llvm_block_begin" @@ -629,18 +661,20 @@ external incoming : llvalue -> (llvalue * llbasicblock) list = "llvm_incoming" (*===-- Instruction builders ----------------------------------------------===*) -external builder : unit -> llbuilder = "llvm_builder" +external builder : llcontext -> llbuilder = "llvm_builder" external position_builder : (llbasicblock, llvalue) llpos -> llbuilder -> unit = "llvm_position_builder" external insertion_block : llbuilder -> llbasicblock = "llvm_insertion_block" +external insert_into_builder : llvalue -> string -> llbuilder -> unit + = "llvm_insert_into_builder" -let builder_at ip = - let b = builder () in +let builder_at context ip = + let b = builder context in position_builder ip b; b -let builder_before i = builder_at (Before i) -let builder_at_end bb = builder_at (At_end bb) +let builder_before context i = builder_at context (Before i) +let builder_at_end context bb = builder_at context (At_end bb) let position_before i = position_builder (Before i) let position_at_end bb = position_builder (At_end bb) @@ -649,6 +683,8 @@ let position_at_end bb = position_builder (At_end bb) (*--... Terminators ........................................................--*) external build_ret_void : llbuilder -> llvalue = "llvm_build_ret_void" external build_ret : llvalue -> llbuilder -> llvalue = "llvm_build_ret" +external build_aggregate_ret : llvalue array -> llbuilder -> llvalue + = "llvm_build_aggregate_ret" external build_br : llbasicblock -> llbuilder -> llvalue = "llvm_build_br" external build_cond_br : llvalue -> llbasicblock -> llbasicblock -> llbuilder -> llvalue = "llvm_build_cond_br" @@ -665,14 +701,24 @@ external build_unreachable : llbuilder -> llvalue = "llvm_build_unreachable" (*--... Arithmetic .........................................................--*) external build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_add" +external build_nsw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_nsw_add" +external build_fadd : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_fadd" external build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_sub" +external build_fsub : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_fsub" external build_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_mul" +external build_fmul : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_fmul" external build_udiv : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_udiv" external build_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_sdiv" +external build_exact_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_exact_sdiv" external build_fdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_fdiv" external build_urem : llvalue -> llvalue -> string -> llbuilder -> llvalue @@ -714,6 +760,15 @@ external build_store : llvalue -> llvalue -> llbuilder -> llvalue = "llvm_build_store" external build_gep : llvalue -> llvalue array -> string -> llbuilder -> llvalue = "llvm_build_gep" +external build_in_bounds_gep : llvalue -> llvalue array -> string -> + llbuilder -> llvalue = "llvm_build_in_bounds_gep" +external build_struct_gep : llvalue -> int -> string -> llbuilder -> llvalue + = "llvm_build_struct_gep" + +external build_global_string : string -> string -> llbuilder -> llvalue + = "llvm_build_global_string" +external build_global_stringptr : string -> string -> llbuilder -> llvalue + = "llvm_build_global_stringptr" (*--... Casts ..............................................................--*) external build_trunc : llvalue -> lltype -> string -> llbuilder -> llvalue @@ -740,6 +795,18 @@ external build_inttoptr : llvalue -> lltype -> string -> llbuilder -> llvalue = "llvm_build_inttoptr" external build_bitcast : llvalue -> lltype -> string -> llbuilder -> llvalue = "llvm_build_bitcast" +external build_zext_or_bitcast : llvalue -> lltype -> string -> llbuilder -> + llvalue = "llvm_build_zext_or_bitcast" +external build_sext_or_bitcast : llvalue -> lltype -> string -> llbuilder -> + llvalue = "llvm_build_sext_or_bitcast" +external build_trunc_or_bitcast : llvalue -> lltype -> string -> llbuilder -> + llvalue = "llvm_build_trunc_or_bitcast" +external build_pointercast : llvalue -> lltype -> string -> llbuilder -> llvalue + = "llvm_build_pointercast" +external build_intcast : llvalue -> lltype -> string -> llbuilder -> llvalue + = "llvm_build_intcast" +external build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue + = "llvm_build_fpcast" (*--... Comparisons ........................................................--*) external build_icmp : Icmp.t -> llvalue -> llvalue -> string -> @@ -762,7 +829,17 @@ external build_insertelement : llvalue -> llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_insertelement" external build_shufflevector : llvalue -> llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_shufflevector" - +external build_extractvalue : llvalue -> int -> string -> llbuilder -> llvalue + = "llvm_build_extractvalue" +external build_insertvalue : llvalue -> llvalue -> int -> string -> llbuilder -> + llvalue = "llvm_build_insertvalue" + +external build_is_null : llvalue -> string -> llbuilder -> llvalue + = "llvm_build_is_null" +external build_is_not_null : llvalue -> string -> llbuilder -> llvalue + = "llvm_build_is_not_null" +external build_ptrdiff : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_ptrdiff" (*===-- Module providers --------------------------------------------------===*) diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli index 35c218a752644..a7c2bcfd719b7 100644 --- a/bindings/ocaml/llvm/llvm.mli +++ b/bindings/ocaml/llvm/llvm.mli @@ -17,6 +17,10 @@ These abstract types correlate directly to the LLVM VMCore classes. *) +(** The top-level container for all LLVM global data. See the + [llvm::LLVMContext] class. *) +type llcontext + (** The top-level container for all other LLVM Intermediate Representation (IR) objects. See the [llvm::Module] class. *) type llmodule @@ -61,7 +65,6 @@ module TypeKind : sig | Fp128 | Ppc_fp128 | Label - | Metadata | Integer | Function | Struct @@ -69,6 +72,7 @@ module TypeKind : sig | Pointer | Opaque | Vector + | Metadata end (** The linkage of a global value, accessed with {!linkage} and @@ -78,13 +82,18 @@ module Linkage : sig External | Available_externally | Link_once + | Link_once_odr | Weak + | Weak_odr | Appending | Internal + | Private | Dllimport | Dllexport | External_weak | Ghost + | Common + | Linker_private end (** The linker visibility of a global value, accessed with {!visibility} and @@ -125,6 +134,16 @@ module Attribute : sig | Nest | Readnone | Readonly + | Noinline + | Alwaysinline + | Optsize + | Ssp + | Sspreq + | Nocapture + | Noredzone + | Noimplicitfloat + | Naked + | Inlinehint end (** The predicate for an integer comparison ([icmp]) instruction. @@ -188,12 +207,27 @@ type ('a, 'b) llrev_pos = exception IoError of string +(** {6 Contexts} *) + +(** [create_context ()] creates a context for storing the "global" state in + LLVM. See the constructor [llvm::LLVMContext]. *) +external create_context : unit -> llcontext = "llvm_create_context" + +(** [destroy_context ()] destroys a context. See the destructor + [llvm::LLVMContext::~LLVMContext]. *) +external dispose_context : unit -> llcontext = "llvm_dispose_context" + +(** See the function [llvm::getGlobalContext]. *) +external global_context : unit -> llcontext = "llvm_global_context" + + (** {6 Modules} *) -(** [create_module id] creates a module with the supplied module ID. Modules are - not garbage collected; it is mandatory to call {!dispose_module} to free - memory. See the constructor [llvm::Module::Module]. *) -external create_module : string -> llmodule = "llvm_create_module" +(** [create_module context id] creates a module with the supplied module ID in + the context [context]. Modules are not garbage collected; it is mandatory + to call {!dispose_module} to free memory. See the constructor + [llvm::Module::Module]. *) +external create_module : llcontext -> string -> llmodule = "llvm_create_module" (** [dispose_module m] destroys a module [m] and all of the IR objects it contained. All references to subordinate objects are invalidated; @@ -245,51 +279,65 @@ external dump_module : llmodule -> unit = "llvm_dump_module" See the method [llvm::Type::getTypeID]. *) external classify_type : lltype -> TypeKind.t = "llvm_classify_type" +(** [type_context ty] returns the {!llcontext} corresponding to the type [ty]. + See the method [llvm::Type::getContext]. *) +external type_context : lltype -> llcontext = "llvm_type_context" + (** [string_of_lltype ty] returns a string describing the type [ty]. *) val string_of_lltype : lltype -> string (** {7 Operations on integer types} *) -(** The 1-bit integer type. See [llvm::Type::Int1Ty]. *) -val i1_type : lltype +(** [i1_type c] returns an integer type of bitwidth 1 in the context [c]. See + [llvm::Type::Int1Ty]. *) +external i1_type : llcontext -> lltype = "llvm_i1_type" -(** The 8-bit integer type. See [llvm::Type::Int8Ty]. *) -val i8_type : lltype +(** [i8_type c] returns an integer type of bitwidth 8 in the context [c]. See + [llvm::Type::Int8Ty]. *) +external i8_type : llcontext -> lltype = "llvm_i8_type" -(** The 16-bit integer type. See [llvm::Type::Int16Ty]. *) -val i16_type : lltype +(** [i16_type c] returns an integer type of bitwidth 16 in the context [c]. See + [llvm::Type::Int16Ty]. *) +external i16_type : llcontext -> lltype = "llvm_i16_type" -(** The 32-bit integer type. See [llvm::Type::Int32Ty]. *) -val i32_type : lltype +(** [i32_type c] returns an integer type of bitwidth 32 in the context [c]. See + [llvm::Type::Int32Ty]. *) +external i32_type : llcontext -> lltype = "llvm_i32_type" -(** The 64-bit integer type. See [llvm::Type::Int64Ty]. *) -val i64_type : lltype +(** [i64_type c] returns an integer type of bitwidth 64 in the context [c]. See + [llvm::Type::Int64Ty]. *) +external i64_type : llcontext -> lltype = "llvm_i64_type" -(** [integer_type n] returns an integer type of bitwidth [n]. - See the method [llvm::IntegerType::get]. *) -external integer_type : int -> lltype = "llvm_integer_type" +(** [integer_type c n] returns an integer type of bitwidth [n] in the context + [c]. See the method [llvm::IntegerType::get]. *) +external integer_type : llcontext -> int -> lltype = "llvm_integer_type" -(** [integer_bitwidth ty] returns the number of bits in the integer type [ty]. - See the method [llvm::IntegerType::getBitWidth]. *) +(** [integer_bitwidth c ty] returns the number of bits in the integer type [ty] + in the context [c]. See the method [llvm::IntegerType::getBitWidth]. *) external integer_bitwidth : lltype -> int = "llvm_integer_bitwidth" (** {7 Operations on real types} *) -(** The IEEE 32-bit floating point type. See [llvm::Type::FloatTy]. *) -val float_type : lltype +(** [float_type c] returns the IEEE 32-bit floating point type in the context + [c]. See [llvm::Type::FloatTy]. *) +external float_type : llcontext -> lltype = "llvm_float_type" -(** The IEEE 64-bit floating point type. See [llvm::Type::DoubleTy]. *) -val double_type : lltype +(** [double_type c] returns the IEEE 64-bit floating point type in the context + [c]. See [llvm::Type::DoubleTy]. *) +external double_type : llcontext -> lltype = "llvm_double_type" -(** The x87 80-bit floating point type. See [llvm::Type::X86_FP80Ty]. *) -val x86fp80_type : lltype +(** [x86fp80_type c] returns the x87 80-bit floating point type in the context + [c]. See [llvm::Type::X86_FP80Ty]. *) +external x86fp80_type : llcontext -> lltype = "llvm_x86fp80_type" -(** The IEEE 128-bit floating point type. See [llvm::Type::FP128Ty]. *) -val fp128_type : lltype +(** [fp128_type c] returns the IEEE 128-bit floating point type in the context + [c]. See [llvm::Type::FP128Ty]. *) +external fp128_type : llcontext -> lltype = "llvm_fp128_type" -(** The PowerPC 128-bit floating point type. See [llvm::Type::PPC_FP128Ty]. *) -val ppc_fp128_type : lltype +(** [ppc_fp128_type c] returns the PowerPC 128-bit floating point type in the + context [c]. See [llvm::Type::PPC_FP128Ty]. *) +external ppc_fp128_type : llcontext -> lltype = "llvm_ppc_fp128_type" (** {7 Operations on function types} *) @@ -321,13 +369,17 @@ external param_types : lltype -> lltype array = "llvm_param_types" (** {7 Operations on struct types} *) -(** [struct_type tys] returns the structure type containing in the types in the - array [tys]. See the method [llvm::StructType::get]. *) -external struct_type : lltype array -> lltype = "llvm_struct_type" +(** [struct_type context tys] returns the structure type in the context + [context] containing in the types in the array [tys]. See the method + [llvm::StructType::get]. *) +external struct_type : llcontext -> lltype array -> lltype + = "llvm_struct_type" -(** [struct_type tys] returns the packed structure type containing in the types - in the array [tys]. See the method [llvm::StructType::get]. *) -external packed_struct_type : lltype array -> lltype = "llvm_packed_struct_type" +(** [packed_struct_type context ys] returns the packed structure type in the + context [context] containing in the types in the array [tys]. See the method + [llvm::StructType::get]. *) +external packed_struct_type : llcontext -> lltype array -> lltype + = "llvm_packed_struct_type" (** [element_types sty] returns the constituent types of the struct type [sty]. See the method [llvm::StructType::getElementType]. *) @@ -378,18 +430,18 @@ external vector_size : lltype -> int = "llvm_vector_size" (** {7 Operations on other types} *) -(** [opaque_type ()] creates a new opaque type distinct from any other. - Opaque types are useful for building recursive types in combination with - {!refine_type}. - See [llvm::OpaqueType::get]. *) -external opaque_type : unit -> lltype = "llvm_opaque_type" +(** [opaque_type c] creates a new opaque type distinct from any other in the + context [c]. Opaque types are useful for building recursive types in + combination with {!refine_type}. See [llvm::OpaqueType::get]. *) +external opaque_type : llcontext -> lltype = "llvm_opaque_type" -(** [void_type] is the type of a function which does not return any value. - See [llvm::Type::VoidTy]. *) -val void_type : lltype +(** [void_type c] creates a type of a function which does not return any + value in the context [c]. See [llvm::Type::VoidTy]. *) +external void_type : llcontext -> lltype = "llvm_void_type" -(** [label_type] is the type of a basic block. See [llvm::Type::LabelTy]. *) -val label_type : lltype +(** [label_type c] creates a type of a basic block in the context [c]. See + [llvm::Type::LabelTy]. *) +external label_type : llcontext -> lltype = "llvm_label_type" (** {7 Operations on type handles} *) @@ -469,24 +521,35 @@ external const_int : lltype -> int -> llvalue = "llvm_const_int" external const_of_int64 : lltype -> Int64.t -> bool -> llvalue = "llvm_const_of_int64" +(** [const_int_of_string ty s r] returns the integer constant of type [ty] and + * value [s], with the radix [r]. See the method [llvm::ConstantInt::get]. *) +external const_int_of_string : lltype -> string -> int -> llvalue + = "llvm_const_int_of_string" + (** [const_float ty n] returns the floating point constant of type [ty] and - value [n]. See the method [llvm::ConstantInt::get]. *) + value [n]. See the method [llvm::ConstantFP::get]. *) external const_float : lltype -> float -> llvalue = "llvm_const_float" +(** [const_float_of_string ty s] returns the floating point constant of type + [ty] and value [n]. See the method [llvm::ConstantFP::get]. *) +external const_float_of_string : lltype -> string -> llvalue + = "llvm_const_float_of_string" + (** {7 Operations on composite constants} *) -(** [const_string s] returns the constant [i8] array with the values of the - characters in the string [s]. The array is not null-terminated (but see - {!const_stringz}). This value can in turn be used as the initializer for a - global variable. See the method [llvm::ConstantArray::get]. *) -external const_string : string -> llvalue = "llvm_const_string" +(** [const_string c s] returns the constant [i8] array with the values of the + characters in the string [s] in the context [c]. The array is not + null-terminated (but see {!const_stringz}). This value can in turn be used + as the initializer for a global variable. See the method + [llvm::ConstantArray::get]. *) +external const_string : llcontext -> string -> llvalue = "llvm_const_string" -(** [const_stringz s] returns the constant [i8] array with the values of the - characters in the string [s] and a null terminator. This value can in turn - be used as the initializer for a global variable. +(** [const_stringz c s] returns the constant [i8] array with the values of the + characters in the string [s] and a null terminator in the context [c]. This + value can in turn be used as the initializer for a global variable. See the method [llvm::ConstantArray::get]. *) -external const_stringz : string -> llvalue = "llvm_const_stringz" +external const_stringz : llcontext -> string -> llvalue = "llvm_const_stringz" (** [const_array ty elts] returns the constant array of type [array_type ty (Array.length elts)] and containing the values [elts]. @@ -494,17 +557,19 @@ external const_stringz : string -> llvalue = "llvm_const_stringz" See the method [llvm::ConstantArray::get]. *) external const_array : lltype -> llvalue array -> llvalue = "llvm_const_array" -(** [const_struct elts] returns the structured constant of type - [struct_type (Array.map type_of elts)] and containing the values [elts]. - This value can in turn be used as the initializer for a global variable. - See the method [llvm::ConstantStruct::get]. *) -external const_struct : llvalue array -> llvalue = "llvm_const_struct" - -(** [const_packed_struct elts] returns the structured constant of type - {!packed_struct_type} [(Array.map type_of elts)] and containing the values - [elts]. This value can in turn be used as the initializer for a global - variable. See the method [llvm::ConstantStruct::get]. *) -external const_packed_struct : llvalue array -> llvalue +(** [const_struct context elts] returns the structured constant of type + [struct_type (Array.map type_of elts)] and containing the values [elts] + in the context [context]. This value can in turn be used as the initializer + for a global variable. See the method [llvm::ConstantStruct::get]. *) +external const_struct : llcontext -> llvalue array -> llvalue + = "llvm_const_struct" + +(** [const_packed_struct context elts] returns the structured constant of + type {!packed_struct_type} [(Array.map type_of elts)] and containing the + values [elts] in the context [context]. This value can in turn be used as + the initializer for a global variable. See the method + [llvm::ConstantStruct::get]. *) +external const_packed_struct : llcontext -> llvalue array -> llvalue = "llvm_const_packed_struct" (** [const_vector elts] returns the vector constant of type @@ -531,6 +596,10 @@ external size_of : lltype -> llvalue = "LLVMSizeOf" See the method [llvm::ConstantExpr::getNeg]. *) external const_neg : llvalue -> llvalue = "LLVMConstNeg" +(** [const_fneg c] returns the arithmetic negation of the constant float [c]. + See the method [llvm::ConstantExpr::getFNeg]. *) +external const_fneg : llvalue -> llvalue = "LLVMConstFNeg" + (** [const_not c] returns the bitwise inverse of the constant [c]. See the method [llvm::ConstantExpr::getNot]. *) external const_not : llvalue -> llvalue = "LLVMConstNot" @@ -539,14 +608,31 @@ external const_not : llvalue -> llvalue = "LLVMConstNot" See the method [llvm::ConstantExpr::getAdd]. *) external const_add : llvalue -> llvalue -> llvalue = "LLVMConstAdd" +(** [const_nsw_add c1 c2] returns the constant sum of two constants with no + signed wrapping. The result is undefined if the sum overflows. + See the method [llvm::ConstantExpr::getNSWAdd]. *) +external const_nsw_add : llvalue -> llvalue -> llvalue = "LLVMConstNSWAdd" + +(** [const_fadd c1 c2] returns the constant sum of two constant floats. + See the method [llvm::ConstantExpr::getFAdd]. *) +external const_fadd : llvalue -> llvalue -> llvalue = "LLVMConstFAdd" + (** [const_sub c1 c2] returns the constant difference, [c1 - c2], of two constants. See the method [llvm::ConstantExpr::getSub]. *) external const_sub : llvalue -> llvalue -> llvalue = "LLVMConstSub" +(** [const_fsub c1 c2] returns the constant difference, [c1 - c2], of two + constant floats. See the method [llvm::ConstantExpr::getFSub]. *) +external const_fsub : llvalue -> llvalue -> llvalue = "LLVMConstFSub" + (** [const_mul c1 c2] returns the constant product of two constants. See the method [llvm::ConstantExpr::getMul]. *) external const_mul : llvalue -> llvalue -> llvalue = "LLVMConstMul" +(** [const_fmul c1 c2] returns the constant product of two constants floats. + See the method [llvm::ConstantExpr::getFMul]. *) +external const_fmul : llvalue -> llvalue -> llvalue = "LLVMConstFMul" + (** [const_udiv c1 c2] returns the constant quotient [c1 / c2] of two unsigned integer constants. See the method [llvm::ConstantExpr::getUDiv]. *) @@ -554,20 +640,25 @@ external const_udiv : llvalue -> llvalue -> llvalue = "LLVMConstUDiv" (** [const_sdiv c1 c2] returns the constant quotient [c1 / c2] of two signed integer constants. - See the method [llvm::ConstantExpr::]. *) + See the method [llvm::ConstantExpr::getSDiv]. *) external const_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstSDiv" +(** [const_exact_sdiv c1 c2] returns the constant quotient [c1 / c2] of two + signed integer constants. The result is undefined if the result is rounded + or overflows. See the method [llvm::ConstantExpr::getExactSDiv]. *) +external const_exact_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstExactSDiv" + (** [const_fdiv c1 c2] returns the constant quotient [c1 / c2] of two floating point constants. See the method [llvm::ConstantExpr::getFDiv]. *) external const_fdiv : llvalue -> llvalue -> llvalue = "LLVMConstFDiv" -(** [const_udiv c1 c2] returns the constant remainder [c1 MOD c2] of two +(** [const_urem c1 c2] returns the constant remainder [c1 MOD c2] of two unsigned integer constants. See the method [llvm::ConstantExpr::getURem]. *) external const_urem : llvalue -> llvalue -> llvalue = "LLVMConstURem" -(** [const_sdiv c1 c2] returns the constant remainder [c1 MOD c2] of two +(** [const_srem c1 c2] returns the constant remainder [c1 MOD c2] of two signed integer constants. See the method [llvm::ConstantExpr::getSRem]. *) external const_srem : llvalue -> llvalue -> llvalue = "LLVMConstSRem" @@ -624,6 +715,12 @@ external const_ashr : llvalue -> llvalue -> llvalue = "LLVMConstAShr" See the method [llvm::ConstantExpr::getGetElementPtr]. *) external const_gep : llvalue -> llvalue array -> llvalue = "llvm_const_gep" +(** [const_in_bounds_gep pc indices] returns the constant [getElementPtr] of [p1] + with the constant integers indices from the array [indices]. + See the method [llvm::ConstantExpr::getInBoundsGetElementPtr]. *) +external const_in_bounds_gep : llvalue -> llvalue array -> llvalue + = "llvm_const_in_bounds_gep" + (** [const_trunc c ty] returns the constant truncation of integer constant [c] to the smaller integer type [ty]. See the method [llvm::ConstantExpr::getTrunc]. *) @@ -684,6 +781,42 @@ external const_inttoptr : llvalue -> lltype -> llvalue = "LLVMConstIntToPtr" See the method [llvm::ConstantExpr::getBitCast]. *) external const_bitcast : llvalue -> lltype -> llvalue = "LLVMConstBitCast" +(** [const_zext_or_bitcast c ty] returns a constant zext or bitwise cast + conversion of constant [c] to type [ty]. + See the method [llvm::ConstantExpr::getZExtOrBitCast]. *) +external const_zext_or_bitcast : llvalue -> lltype -> llvalue + = "LLVMConstZExtOrBitCast" + +(** [const_sext_or_bitcast c ty] returns a constant sext or bitwise cast + conversion of constant [c] to type [ty]. + See the method [llvm::ConstantExpr::getSExtOrBitCast]. *) +external const_sext_or_bitcast : llvalue -> lltype -> llvalue + = "LLVMConstSExtOrBitCast" + +(** [const_trunc_or_bitcast c ty] returns a constant trunc or bitwise cast + conversion of constant [c] to type [ty]. + See the method [llvm::ConstantExpr::getTruncOrBitCast]. *) +external const_trunc_or_bitcast : llvalue -> lltype -> llvalue + = "LLVMConstTruncOrBitCast" + +(** [const_pointercast c ty] returns a constant bitcast or a pointer-to-int + cast conversion of constant [c] to type [ty] of equal size. + See the method [llvm::ConstantExpr::getPointerCast]. *) +external const_pointercast : llvalue -> lltype -> llvalue + = "LLVMConstPointerCast" + +(** [const_intcast c ty] returns a constant zext, bitcast, or trunc for integer + -> integer casts of constant [c] to type [ty]. + See the method [llvm::ConstantExpr::getIntCast]. *) +external const_intcast : llvalue -> lltype -> llvalue + = "LLVMConstIntCast" + +(** [const_fpcast c ty] returns a constant fpext, bitcast, or fptrunc for fp -> + fp casts of constant [c] to type [ty]. + See the method [llvm::ConstantExpr::getFPCast]. *) +external const_fpcast : llvalue -> lltype -> llvalue + = "LLVMConstFPCast" + (** [const_select cond t f] returns the constant conditional which returns value [t] if the boolean constant [cond] is true and the value [f] otherwise. See the method [llvm::ConstantExpr::getSelect]. *) @@ -713,6 +846,18 @@ external const_insertelement : llvalue -> llvalue -> llvalue -> llvalue external const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue = "LLVMConstShuffleVector" +(** [const_extractvalue agg idxs] returns the constant [idxs]th value of + constant aggregate [agg]. Each [idxs] must be less than the size of the + aggregate. See the method [llvm::ConstantExpr::getExtractValue]. *) +external const_extractvalue : llvalue -> int array -> llvalue + = "llvm_const_extractvalue" + +(** [const_insertvalue agg val idxs] inserts the value [val] in the specified + indexs [idxs] in the aggegate [agg]. Each [idxs] must be less than the size + of the aggregate. See the method [llvm::ConstantExpr::getInsertValue]. *) +external const_insertvalue : llvalue -> llvalue -> int array -> llvalue + = "llvm_const_insertvalue" + (** {7 Operations on global variables, functions, and aliases (globals)} *) @@ -1040,15 +1185,16 @@ external entry_block : llvalue -> llbasicblock = "LLVMGetEntryBasicBlock" See the method [llvm::BasicBlock::eraseFromParent]. *) external delete_block : llbasicblock -> unit = "llvm_delete_block" -(** [append_block name f] creates a new basic block named [name] at the end of - function [f]. +(** [append_block c name f] creates a new basic block named [name] at the end of + function [f] in the context [c]. See the constructor of [llvm::BasicBlock]. *) -external append_block : string -> llvalue -> llbasicblock = "llvm_append_block" +external append_block : llcontext -> string -> llvalue -> llbasicblock + = "llvm_append_block" -(** [insert_block name bb] creates a new basic block named [name] before the - basic block [bb]. +(** [insert_block c name bb] creates a new basic block named [name] before the + basic block [bb] in the context [c]. See the constructor of [llvm::BasicBlock]. *) -external insert_block : string -> llbasicblock -> llbasicblock +external insert_block : llcontext -> string -> llbasicblock -> llbasicblock = "llvm_insert_block" (** [block_parent bb] returns the parent function that owns the basic block. @@ -1207,22 +1353,23 @@ external incoming : llvalue -> (llvalue * llbasicblock) list = "llvm_incoming" (** {6 Instruction builders} *) -(** [builder ()] creates an instruction builder with no position. It is invalid - to use this builder until its position is set with {!position_before} or - {!position_at_end}. See the constructor for [llvm::LLVMBuilder]. *) -external builder : unit -> llbuilder = "llvm_builder" +(** [builder context] creates an instruction builder with no position in + the context [context]. It is invalid to use this builder until its position + is set with {!position_before} or {!position_at_end}. See the constructor + for [llvm::LLVMBuilder]. *) +external builder : llcontext -> llbuilder = "llvm_builder" (** [builder_at ip] creates an instruction builder positioned at [ip]. See the constructor for [llvm::LLVMBuilder]. *) -val builder_at : (llbasicblock, llvalue) llpos -> llbuilder +val builder_at : llcontext -> (llbasicblock, llvalue) llpos -> llbuilder (** [builder_before ins] creates an instruction builder positioned before the instruction [isn]. See the constructor for [llvm::LLVMBuilder]. *) -val builder_before : llvalue -> llbuilder +val builder_before : llcontext -> llvalue -> llbuilder (** [builder_at_end bb] creates an instruction builder positioned at the end of the basic block [bb]. See the constructor for [llvm::LLVMBuilder]. *) -val builder_at_end : llbasicblock -> llbuilder +val builder_at_end : llcontext -> llbasicblock -> llbuilder (** [position_builder ip bb] moves the instruction builder [bb] to the position [ip]. @@ -1244,6 +1391,12 @@ val position_at_end : llbasicblock -> llbuilder -> unit See the method [llvm::LLVMBuilder::GetInsertBlock]. *) external insertion_block : llbuilder -> llbasicblock = "llvm_insertion_block" +(** [insert_into_builder i name b] inserts the specified instruction [i] at the + position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::Insert]. *) +external insert_into_builder : llvalue -> string -> llbuilder -> unit + = "llvm_insert_into_builder" + (** {7 Terminators} *) @@ -1259,6 +1412,13 @@ external build_ret_void : llbuilder -> llvalue = "llvm_build_ret_void" See the method [llvm::LLVMBuilder::CreateRet]. *) external build_ret : llvalue -> llbuilder -> llvalue = "llvm_build_ret" +(** [build_aggregate_ret vs b] creates a + [ret {...} { %v1, %v2, ... } ] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateAggregateRet]. *) +external build_aggregate_ret : llvalue array -> llbuilder -> llvalue + = "llvm_build_aggregate_ret" + (** [build_br bb b] creates a [b %bb] instruction at the position specified by the instruction builder [b]. @@ -1316,6 +1476,20 @@ external build_unreachable : llbuilder -> llvalue = "llvm_build_unreachable" external build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_add" +(** [build_nswadd x y name b] creates a + [%name = nsw add %x, %y] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateNSWAdd]. *) +external build_nsw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_nsw_add" + +(** [build_fadd x y name b] creates a + [%name = fadd %x, %y] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateFAdd]. *) +external build_fadd : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_fadd" + (** [build_sub x y name b] creates a [%name = sub %x, %y] instruction at the position specified by the instruction builder [b]. @@ -1323,6 +1497,13 @@ external build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue external build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_sub" +(** [build_fsub x y name b] creates a + [%name = fsub %x, %y] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateFSub]. *) +external build_fsub : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_fsub" + (** [build_mul x y name b] creates a [%name = mul %x, %y] instruction at the position specified by the instruction builder [b]. @@ -1330,6 +1511,13 @@ external build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue external build_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_mul" +(** [build_fmul x y name b] creates a + [%name = fmul %x, %y] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateFMul]. *) +external build_fmul : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_fmul" + (** [build_udiv x y name b] creates a [%name = udiv %x, %y] instruction at the position specified by the instruction builder [b]. @@ -1344,6 +1532,13 @@ external build_udiv : llvalue -> llvalue -> string -> llbuilder -> llvalue external build_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_sdiv" +(** [build_exact_sdiv x y name b] creates a + [%name = exact sdiv %x, %y] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateExactSDiv]. *) +external build_exact_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_exact_sdiv" + (** [build_fdiv x y name b] creates a [%name = fdiv %x, %y] instruction at the position specified by the instruction builder [b]. @@ -1482,12 +1677,39 @@ external build_store : llvalue -> llvalue -> llbuilder -> llvalue = "llvm_build_store" (** [build_gep p indices name b] creates a - [%name = gep %p, indices...] + [%name = getelementptr %p, indices...] instruction at the position specified by the instruction builder [b]. See the method [llvm::LLVMBuilder::CreateGetElementPtr]. *) external build_gep : llvalue -> llvalue array -> string -> llbuilder -> llvalue = "llvm_build_gep" +(** [build_in_bounds_gep p indices name b] creates a + [%name = gelementptr inbounds %p, indices...] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateInBoundsGetElementPtr]. *) +external build_in_bounds_gep : llvalue -> llvalue array -> string -> llbuilder -> + llvalue = "llvm_build_in_bounds_gep" + +(** [build_struct_gep p idx name b] creates a + [%name = getelementptr %p, 0, idx] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateStructGetElementPtr]. *) +external build_struct_gep : llvalue -> int -> string -> llbuilder -> + llvalue = "llvm_build_struct_gep" + +(** [build_global_string str name b] creates a series of instructions that adds + a global string at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateGlobalString]. *) +external build_global_string : string -> string -> llbuilder -> llvalue + = "llvm_build_global_string" + +(** [build_global_stringptr str name b] creates a series of instructions that + adds a global string pointer at the position specified by the instruction + builder [b]. + See the method [llvm::LLVMBuilder::CreateGlobalStringPtr]. *) +external build_global_stringptr : string -> string -> llbuilder -> llvalue + = "llvm_build_global_stringptr" + (** {7 Casts} *) @@ -1571,10 +1793,46 @@ external build_inttoptr : llvalue -> lltype -> string -> llbuilder -> llvalue (** [build_bitcast v ty name b] creates a [%name = bitcast %p to %ty] instruction at the position specified by the instruction builder [b]. - See the method [llvm::LLVMBuilder::CreateBitcast]. *) + See the method [llvm::LLVMBuilder::CreateBitCast]. *) external build_bitcast : llvalue -> lltype -> string -> llbuilder -> llvalue = "llvm_build_bitcast" +(** [build_zext_or_bitcast v ty name b] creates a zext or bitcast + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateZExtOrBitCast]. *) +external build_zext_or_bitcast : llvalue -> lltype -> string -> llbuilder -> + llvalue = "llvm_build_zext_or_bitcast" + +(** [build_sext_or_bitcast v ty name b] creates a sext or bitcast + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateSExtOrBitCast]. *) +external build_sext_or_bitcast : llvalue -> lltype -> string -> llbuilder -> + llvalue = "llvm_build_sext_or_bitcast" + +(** [build_trunc_or_bitcast v ty name b] creates a trunc or bitcast + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateZExtOrBitCast]. *) +external build_trunc_or_bitcast : llvalue -> lltype -> string -> llbuilder -> + llvalue = "llvm_build_trunc_or_bitcast" + +(** [build_pointercast v ty name b] creates a bitcast or pointer-to-int + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreatePointerCast]. *) +external build_pointercast : llvalue -> lltype -> string -> llbuilder -> llvalue + = "llvm_build_pointercast" + +(** [build_intcast v ty name b] creates a zext, bitcast, or trunc + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateIntCast]. *) +external build_intcast : llvalue -> lltype -> string -> llbuilder -> llvalue + = "llvm_build_intcast" + +(** [build_fpcast v ty name b] creates a fpext, bitcast, or fptrunc + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateFPCast]. *) +external build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue + = "llvm_build_fpcast" + (** {7 Comparisons} *) @@ -1645,6 +1903,40 @@ external build_insertelement : llvalue -> llvalue -> llvalue -> string -> external build_shufflevector : llvalue -> llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_shufflevector" +(** [build_insertvalue agg idx name b] creates a + [%name = extractvalue %agg, %idx] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateExtractValue]. *) +external build_extractvalue : llvalue -> int -> string -> llbuilder -> llvalue + = "llvm_build_extractvalue" + +(** [build_insertvalue agg val idx name b] creates a + [%name = insertvalue %agg, %val, %idx] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateInsertValue]. *) +external build_insertvalue : llvalue -> llvalue -> int -> string -> llbuilder -> + llvalue = "llvm_build_insertvalue" + +(** [build_is_null val name b] creates a + [%name = icmp eq %val, null] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateIsNull]. *) +external build_is_null : llvalue -> string -> llbuilder -> llvalue + = "llvm_build_is_null" + +(** [build_is_not_null val name b] creates a + [%name = icmp ne %val, null] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateIsNotNull]. *) +external build_is_not_null : llvalue -> string -> llbuilder -> llvalue + = "llvm_build_is_not_null" + +(** [build_ptrdiff lhs rhs name b] creates a series of instructions that measure + the difference between two pointer values at the position specified by the + instruction builder [b]. + See the method [llvm::LLVMBuilder::CreatePtrDiff]. *) +external build_ptrdiff : llvalue -> llvalue -> string -> llbuilder -> llvalue + = "llvm_build_ptrdiff" (** {6 Module providers} *) diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c index c4eba13db0fdc..8868d07ffc7a3 100644 --- a/bindings/ocaml/llvm/llvm_ocaml.c +++ b/bindings/ocaml/llvm/llvm_ocaml.c @@ -92,6 +92,24 @@ static value alloc_variant(int tag, void *Value) { } +/*===-- Contexts ----------------------------------------------------------===*/ + +/* unit -> llcontext */ +CAMLprim LLVMContextRef llvm_create_context(value Unit) { + return LLVMContextCreate(); +} + +/* llcontext -> unit */ +CAMLprim value llvm_dispose_context(LLVMContextRef C) { + LLVMContextDispose(C); + return Val_unit; +} + +/* unit -> llcontext */ +CAMLprim LLVMContextRef llvm_global_context(value Unit) { + return LLVMGetGlobalContext(); +} + /*===-- Modules -----------------------------------------------------------===*/ /* string -> llmodule */ @@ -153,18 +171,41 @@ CAMLprim value llvm_classify_type(LLVMTypeRef Ty) { return Val_int(LLVMGetTypeKind(Ty)); } +/* lltype -> llcontext */ +CAMLprim LLVMContextRef llvm_type_context(LLVMTypeRef Ty) { + return LLVMGetTypeContext(Ty); +} + /*--... Operations on integer types ........................................--*/ -/* unit -> lltype */ -CAMLprim LLVMTypeRef llvm_i1_type (value Unit) { return LLVMInt1Type(); } -CAMLprim LLVMTypeRef llvm_i8_type (value Unit) { return LLVMInt8Type(); } -CAMLprim LLVMTypeRef llvm_i16_type(value Unit) { return LLVMInt16Type(); } -CAMLprim LLVMTypeRef llvm_i32_type(value Unit) { return LLVMInt32Type(); } -CAMLprim LLVMTypeRef llvm_i64_type(value Unit) { return LLVMInt64Type(); } +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_i1_type (LLVMContextRef Context) { + return LLVMInt1TypeInContext(Context); +} -/* int -> lltype */ -CAMLprim LLVMTypeRef llvm_integer_type(value Width) { - return LLVMIntType(Int_val(Width)); +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_i8_type (LLVMContextRef Context) { + return LLVMInt8TypeInContext(Context); +} + +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_i16_type (LLVMContextRef Context) { + return LLVMInt16TypeInContext(Context); +} + +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_i32_type (LLVMContextRef Context) { + return LLVMInt32TypeInContext(Context); +} + +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_i64_type (LLVMContextRef Context) { + return LLVMInt64TypeInContext(Context); +} + +/* llcontext -> int -> lltype */ +CAMLprim LLVMTypeRef llvm_integer_type(LLVMContextRef Context, value Width) { + return LLVMIntTypeInContext(Context, Int_val(Width)); } /* lltype -> int */ @@ -174,29 +215,29 @@ CAMLprim value llvm_integer_bitwidth(LLVMTypeRef IntegerTy) { /*--... Operations on real types ...........................................--*/ -/* unit -> lltype */ -CAMLprim LLVMTypeRef llvm_float_type(value Unit) { - return LLVMFloatType(); +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_float_type(LLVMContextRef Context) { + return LLVMFloatTypeInContext(Context); } -/* unit -> lltype */ -CAMLprim LLVMTypeRef llvm_double_type(value Unit) { - return LLVMDoubleType(); +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_double_type(LLVMContextRef Context) { + return LLVMDoubleTypeInContext(Context); } -/* unit -> lltype */ -CAMLprim LLVMTypeRef llvm_x86fp80_type(value Unit) { - return LLVMX86FP80Type(); +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_x86fp80_type(LLVMContextRef Context) { + return LLVMX86FP80TypeInContext(Context); } -/* unit -> lltype */ -CAMLprim LLVMTypeRef llvm_fp128_type(value Unit) { - return LLVMFP128Type(); +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_fp128_type(LLVMContextRef Context) { + return LLVMFP128TypeInContext(Context); } -/* unit -> lltype */ -CAMLprim LLVMTypeRef llvm_ppc_fp128_type(value Unit) { - return LLVMPPCFP128Type(); +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_ppc_fp128_type(LLVMContextRef Context) { + return LLVMPPCFP128TypeInContext(Context); } /*--... Operations on function types .......................................--*/ @@ -228,16 +269,17 @@ CAMLprim value llvm_param_types(LLVMTypeRef FunTy) { /*--... Operations on struct types .........................................--*/ -/* lltype array -> lltype */ -CAMLprim LLVMTypeRef llvm_struct_type(value ElementTypes) { - return LLVMStructType((LLVMTypeRef *) ElementTypes, - Wosize_val(ElementTypes), 0); +/* llcontext -> lltype array -> lltype */ +CAMLprim LLVMTypeRef llvm_struct_type(LLVMContextRef C, value ElementTypes) { + return LLVMStructTypeInContext(C, (LLVMTypeRef *) ElementTypes, + Wosize_val(ElementTypes), 0); } -/* lltype array -> lltype */ -CAMLprim LLVMTypeRef llvm_packed_struct_type(value ElementTypes) { - return LLVMStructType((LLVMTypeRef *) ElementTypes, - Wosize_val(ElementTypes), 1); +/* llcontext -> lltype array -> lltype */ +CAMLprim LLVMTypeRef llvm_packed_struct_type(LLVMContextRef C, + value ElementTypes) { + return LLVMStructTypeInContext(C, (LLVMTypeRef *) ElementTypes, + Wosize_val(ElementTypes), 1); } /* lltype -> lltype array */ @@ -292,13 +334,19 @@ CAMLprim value llvm_vector_size(LLVMTypeRef VectorTy) { /*--... Operations on other types ..........................................--*/ -/* unit -> lltype */ -CAMLprim LLVMTypeRef llvm_void_type (value Unit) { return LLVMVoidType(); } -CAMLprim LLVMTypeRef llvm_label_type(value Unit) { return LLVMLabelType(); } +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_void_type (LLVMContextRef Context) { + return LLVMVoidTypeInContext(Context); +} -/* unit -> lltype */ -CAMLprim LLVMTypeRef llvm_opaque_type(value Unit) { - return LLVMOpaqueType(); +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_label_type(LLVMContextRef Context) { + return LLVMLabelTypeInContext(Context); +} + +/* llcontext -> lltype */ +CAMLprim LLVMTypeRef llvm_opaque_type(LLVMContextRef Context) { + return LLVMOpaqueTypeInContext(Context); } /*--... Operations on type handles .........................................--*/ @@ -388,21 +436,38 @@ CAMLprim LLVMValueRef llvm_const_of_int64(LLVMTypeRef IntTy, value N, return LLVMConstInt(IntTy, Int64_val(N), Bool_val(SExt)); } +/* lltype -> string -> int -> llvalue */ +CAMLprim LLVMValueRef llvm_const_int_of_string(LLVMTypeRef IntTy, value S, + value Radix) { + return LLVMConstIntOfStringAndSize(IntTy, String_val(S), caml_string_length(S), + Int_val(Radix)); +} + /* lltype -> float -> llvalue */ CAMLprim LLVMValueRef llvm_const_float(LLVMTypeRef RealTy, value N) { return LLVMConstReal(RealTy, Double_val(N)); } +/* lltype -> string -> llvalue */ +CAMLprim LLVMValueRef llvm_const_float_of_string(LLVMTypeRef RealTy, value S) { + return LLVMConstRealOfStringAndSize(RealTy, String_val(S), + caml_string_length(S)); +} + /*--... Operations on composite constants ..................................--*/ -/* string -> llvalue */ -CAMLprim LLVMValueRef llvm_const_string(value Str, value NullTerminate) { - return LLVMConstString(String_val(Str), string_length(Str), 1); +/* llcontext -> string -> llvalue */ +CAMLprim LLVMValueRef llvm_const_string(LLVMContextRef Context, value Str, + value NullTerminate) { + return LLVMConstStringInContext(Context, String_val(Str), string_length(Str), + 1); } -/* string -> llvalue */ -CAMLprim LLVMValueRef llvm_const_stringz(value Str, value NullTerminate) { - return LLVMConstString(String_val(Str), string_length(Str), 0); +/* llcontext -> string -> llvalue */ +CAMLprim LLVMValueRef llvm_const_stringz(LLVMContextRef Context, value Str, + value NullTerminate) { + return LLVMConstStringInContext(Context, String_val(Str), string_length(Str), + 0); } /* lltype -> llvalue array -> llvalue */ @@ -412,16 +477,17 @@ CAMLprim LLVMValueRef llvm_const_array(LLVMTypeRef ElementTy, Wosize_val(ElementVals)); } -/* llvalue array -> llvalue */ -CAMLprim LLVMValueRef llvm_const_struct(value ElementVals) { - return LLVMConstStruct((LLVMValueRef *) Op_val(ElementVals), - Wosize_val(ElementVals), 0); +/* llcontext -> llvalue array -> llvalue */ +CAMLprim LLVMValueRef llvm_const_struct(LLVMContextRef C, value ElementVals) { + return LLVMConstStructInContext(C, (LLVMValueRef *) Op_val(ElementVals), + Wosize_val(ElementVals), 0); } -/* llvalue array -> llvalue */ -CAMLprim LLVMValueRef llvm_const_packed_struct(value ElementVals) { - return LLVMConstStruct((LLVMValueRef *) Op_val(ElementVals), - Wosize_val(ElementVals), 1); +/* llcontext -> llvalue array -> llvalue */ +CAMLprim LLVMValueRef llvm_const_packed_struct(LLVMContextRef C, + value ElementVals) { + return LLVMConstStructInContext(C, (LLVMValueRef *) Op_val(ElementVals), + Wosize_val(ElementVals), 1); } /* llvalue array -> llvalue */ @@ -452,6 +518,49 @@ CAMLprim LLVMValueRef llvm_const_gep(LLVMValueRef ConstantVal, value Indices) { Wosize_val(Indices)); } +/* llvalue -> llvalue array -> llvalue */ +CAMLprim LLVMValueRef llvm_const_in_bounds_gep(LLVMValueRef ConstantVal, + value Indices) { + return LLVMConstInBoundsGEP(ConstantVal, (LLVMValueRef*) Op_val(Indices), + Wosize_val(Indices)); +} + +/* llvalue -> int array -> llvalue */ +CAMLprim LLVMValueRef llvm_const_extractvalue(LLVMValueRef Aggregate, + value Indices) { + CAMLparam1(Indices); + int size = Wosize_val(Indices); + int i; + LLVMValueRef result; + + unsigned* idxs = (unsigned*)malloc(size * sizeof(unsigned)); + for (i = 0; i < size; i++) { + idxs[i] = Int_val(Field(Indices, i)); + } + + result = LLVMConstExtractValue(Aggregate, idxs, size); + free(idxs); + CAMLreturnT(LLVMValueRef, result); +} + +/* llvalue -> llvalue -> int array -> llvalue */ +CAMLprim LLVMValueRef llvm_const_insertvalue(LLVMValueRef Aggregate, + LLVMValueRef Val, value Indices) { + CAMLparam1(Indices); + int size = Wosize_val(Indices); + int i; + LLVMValueRef result; + + unsigned* idxs = (unsigned*)malloc(size * sizeof(unsigned)); + for (i = 0; i < size; i++) { + idxs[i] = Int_val(Field(Indices, i)); + } + + result = LLVMConstInsertValue(Aggregate, Val, idxs, size); + free(idxs); + CAMLreturnT(LLVMValueRef, result); +} + /*--... Operations on global variables, functions, and aliases (globals) ...--*/ /* llvalue -> bool */ @@ -616,7 +725,7 @@ CAMLprim value llvm_lookup_function(value Name, LLVMModuleRef M) { CAMLprim LLVMValueRef llvm_define_function(value Name, LLVMTypeRef Ty, LLVMModuleRef M) { LLVMValueRef Fn = LLVMAddFunction(M, String_val(Name), Ty); - LLVMAppendBasicBlock(Fn, "entry"); + LLVMAppendBasicBlockInContext(LLVMGetTypeContext(Ty), Fn, "entry"); return Fn; } @@ -729,13 +838,15 @@ CAMLprim value llvm_delete_block(LLVMBasicBlockRef BB) { } /* string -> llvalue -> llbasicblock */ -CAMLprim LLVMBasicBlockRef llvm_append_block(value Name, LLVMValueRef Fn) { - return LLVMAppendBasicBlock(Fn, String_val(Name)); +CAMLprim LLVMBasicBlockRef llvm_append_block(LLVMContextRef Context, value Name, + LLVMValueRef Fn) { + return LLVMAppendBasicBlockInContext(Context, Fn, String_val(Name)); } /* string -> llbasicblock -> llbasicblock */ -CAMLprim LLVMBasicBlockRef llvm_insert_block(value Name, LLVMBasicBlockRef BB) { - return LLVMInsertBasicBlock(BB, String_val(Name)); +CAMLprim LLVMBasicBlockRef llvm_insert_block(LLVMContextRef Context, value Name, + LLVMBasicBlockRef BB) { + return LLVMInsertBasicBlockInContext(Context, BB, String_val(Name)); } /* llvalue -> bool */ @@ -849,9 +960,9 @@ static value alloc_builder(LLVMBuilderRef B) { return V; } -/* unit-> llbuilder */ -CAMLprim value llvm_builder(value Unit) { - return alloc_builder(LLVMCreateBuilder()); +/* llcontext -> llbuilder */ +CAMLprim value llvm_builder(LLVMContextRef C) { + return alloc_builder(LLVMCreateBuilderInContext(C)); } /* (llbasicblock, llvalue) llpos -> llbuilder -> unit */ @@ -874,6 +985,13 @@ CAMLprim LLVMBasicBlockRef llvm_insertion_block(LLVMBuilderRef B) { return InsertBlock; } +/* llvalue -> string -> llbuilder -> unit */ +CAMLprim value llvm_insert_into_builder(LLVMValueRef I, value Name, + LLVMBuilderRef B) { + LLVMInsertIntoBuilderWithName(B, I, String_val(Name)); + return Val_unit; +} + /*--... Terminators ........................................................--*/ /* llbuilder -> llvalue */ @@ -886,6 +1004,12 @@ CAMLprim LLVMValueRef llvm_build_ret(LLVMValueRef Val, value B) { return LLVMBuildRet(Builder_val(B), Val); } +/* llvalue array -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_aggregate_ret(value RetVals, value B) { + return LLVMBuildAggregateRet(Builder_val(B), (LLVMValueRef *) Op_val(RetVals), + Wosize_val(RetVals)); +} + /* llbasicblock -> llbuilder -> llvalue */ CAMLprim LLVMValueRef llvm_build_br(LLVMBasicBlockRef BB, value B) { return LLVMBuildBr(Builder_val(B), BB); @@ -951,18 +1075,42 @@ CAMLprim LLVMValueRef llvm_build_add(LLVMValueRef LHS, LLVMValueRef RHS, return LLVMBuildAdd(Builder_val(B), LHS, RHS, String_val(Name)); } +/* llvalue -> llvalue -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_nsw_add(LLVMValueRef LHS, LLVMValueRef RHS, + value Name, value B) { + return LLVMBuildNSWAdd(Builder_val(B), LHS, RHS, String_val(Name)); +} + +/* llvalue -> llvalue -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_fadd(LLVMValueRef LHS, LLVMValueRef RHS, + value Name, value B) { + return LLVMBuildFAdd(Builder_val(B), LHS, RHS, String_val(Name)); +} + /* llvalue -> llvalue -> string -> llbuilder -> llvalue */ CAMLprim LLVMValueRef llvm_build_sub(LLVMValueRef LHS, LLVMValueRef RHS, value Name, value B) { return LLVMBuildSub(Builder_val(B), LHS, RHS, String_val(Name)); } +/* llvalue -> llvalue -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_fsub(LLVMValueRef LHS, LLVMValueRef RHS, + value Name, value B) { + return LLVMBuildFSub(Builder_val(B), LHS, RHS, String_val(Name)); +} + /* llvalue -> llvalue -> string -> llbuilder -> llvalue */ CAMLprim LLVMValueRef llvm_build_mul(LLVMValueRef LHS, LLVMValueRef RHS, value Name, value B) { return LLVMBuildMul(Builder_val(B), LHS, RHS, String_val(Name)); } +/* llvalue -> llvalue -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_fmul(LLVMValueRef LHS, LLVMValueRef RHS, + value Name, value B) { + return LLVMBuildFMul(Builder_val(B), LHS, RHS, String_val(Name)); +} + /* llvalue -> llvalue -> string -> llbuilder -> llvalue */ CAMLprim LLVMValueRef llvm_build_udiv(LLVMValueRef LHS, LLVMValueRef RHS, value Name, value B) { @@ -975,6 +1123,12 @@ CAMLprim LLVMValueRef llvm_build_sdiv(LLVMValueRef LHS, LLVMValueRef RHS, return LLVMBuildSDiv(Builder_val(B), LHS, RHS, String_val(Name)); } +/* llvalue -> llvalue -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_exact_sdiv(LLVMValueRef LHS, LLVMValueRef RHS, + value Name, value B) { + return LLVMBuildExactSDiv(Builder_val(B), LHS, RHS, String_val(Name)); +} + /* llvalue -> llvalue -> string -> llbuilder -> llvalue */ CAMLprim LLVMValueRef llvm_build_fdiv(LLVMValueRef LHS, LLVMValueRef RHS, value Name, value B) { @@ -1098,6 +1252,37 @@ CAMLprim LLVMValueRef llvm_build_gep(LLVMValueRef Pointer, value Indices, String_val(Name)); } +/* llvalue -> llvalue array -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_in_bounds_gep(LLVMValueRef Pointer, + value Indices, value Name, + value B) { + return LLVMBuildInBoundsGEP(Builder_val(B), Pointer, + (LLVMValueRef *) Op_val(Indices), + Wosize_val(Indices), String_val(Name)); +} + +/* llvalue -> int -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_struct_gep(LLVMValueRef Pointer, + value Indices, value Name, + value B) { + return LLVMBuildInBoundsGEP(Builder_val(B), Pointer, + (LLVMValueRef *) Op_val(Indices), + Wosize_val(Indices), String_val(Name)); +} + +/* string -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_global_string(value Str, value Name, value B) { + return LLVMBuildGlobalString(Builder_val(B), String_val(Str), + String_val(Name)); +} + +/* string -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_global_stringptr(value Str, value Name, + value B) { + return LLVMBuildGlobalStringPtr(Builder_val(B), String_val(Str), + String_val(Name)); +} + /*--... Casts ..............................................................--*/ /* llvalue -> lltype -> string -> llbuilder -> llvalue */ @@ -1172,6 +1357,43 @@ CAMLprim LLVMValueRef llvm_build_bitcast(LLVMValueRef X, LLVMTypeRef Ty, return LLVMBuildBitCast(Builder_val(B), X, Ty, String_val(Name)); } +/* llvalue -> lltype -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_zext_or_bitcast(LLVMValueRef X, LLVMTypeRef Ty, + value Name, value B) { + return LLVMBuildZExtOrBitCast(Builder_val(B), X, Ty, String_val(Name)); +} + +/* llvalue -> lltype -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_sext_or_bitcast(LLVMValueRef X, LLVMTypeRef Ty, + value Name, value B) { + return LLVMBuildSExtOrBitCast(Builder_val(B), X, Ty, String_val(Name)); +} + +/* llvalue -> lltype -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_trunc_or_bitcast(LLVMValueRef X, + LLVMTypeRef Ty, value Name, + value B) { + return LLVMBuildTruncOrBitCast(Builder_val(B), X, Ty, String_val(Name)); +} + +/* llvalue -> lltype -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_pointercast(LLVMValueRef X, LLVMTypeRef Ty, + value Name, value B) { + return LLVMBuildPointerCast(Builder_val(B), X, Ty, String_val(Name)); +} + +/* llvalue -> lltype -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_intcast(LLVMValueRef X, LLVMTypeRef Ty, + value Name, value B) { + return LLVMBuildIntCast(Builder_val(B), X, Ty, String_val(Name)); +} + +/* llvalue -> lltype -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_fpcast(LLVMValueRef X, LLVMTypeRef Ty, + value Name, value B) { + return LLVMBuildFPCast(Builder_val(B), X, Ty, String_val(Name)); +} + /*--... Comparisons ........................................................--*/ /* Icmp.t -> llvalue -> llvalue -> string -> llbuilder -> llvalue */ @@ -1256,6 +1478,38 @@ CAMLprim LLVMValueRef llvm_build_shufflevector(LLVMValueRef V1, LLVMValueRef V2, return LLVMBuildShuffleVector(Builder_val(B), V1, V2, Mask, String_val(Name)); } +/* llvalue -> int -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_extractvalue(LLVMValueRef Aggregate, + value Idx, value Name, value B) { + return LLVMBuildExtractValue(Builder_val(B), Aggregate, Int_val(Idx), + String_val(Name)); +} + +/* llvalue -> llvalue -> int -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_insertvalue(LLVMValueRef Aggregate, + LLVMValueRef Val, value Idx, + value Name, value B) { + return LLVMBuildInsertValue(Builder_val(B), Aggregate, Val, Int_val(Idx), + String_val(Name)); +} + +/* llvalue -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_is_null(LLVMValueRef Val, value Name, + value B) { + return LLVMBuildIsNull(Builder_val(B), Val, String_val(Name)); +} + +/* llvalue -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_is_not_null(LLVMValueRef Val, value Name, + value B) { + return LLVMBuildIsNotNull(Builder_val(B), Val, String_val(Name)); +} + +/* llvalue -> llvalue -> string -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_ptrdiff(LLVMValueRef LHS, LLVMValueRef RHS, + value Name, value B) { + return LLVMBuildPtrDiff(Builder_val(B), LHS, RHS, String_val(Name)); +} /*===-- Module Providers --------------------------------------------------===*/ diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 731071ef85b26..320335cf0fe03 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -1,7 +1,28 @@ +include(CheckIncludeFile) +include(CheckLibraryExists) +include(CheckSymbolExists) +include(CheckFunctionExists) +include(CheckCXXSourceCompiles) + +# Helper macros and functions +macro(add_cxx_include result files) + set(${result} "") + foreach (file_name ${files}) + set(${result} "${${result}}#include<${file_name}>\n") + endforeach() +endmacro(add_cxx_include files result) + +function(check_type_exists type files variable) + add_cxx_include(includes "${files}") + CHECK_CXX_SOURCE_COMPILES(" + ${includes} ${type} typeVar; + int main() { + return 0; + } + " ${variable}) +endfunction() # include checks -include(CheckIncludeFile) -check_include_file(alloca.h HAVE_ALLOCA_H) check_include_file(argz.h HAVE_ARGZ_H) check_include_file(assert.h HAVE_ASSERT_H) check_include_file(dirent.h HAVE_DIRENT_H) @@ -42,22 +63,19 @@ check_include_file(utime.h HAVE_UTIME_H) check_include_file(windows.h HAVE_WINDOWS_H) # library checks -include(CheckLibraryExists) check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD) check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC) check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT) check_library_exists(dl dlopen "" HAVE_LIBDL) # function checks -include(CheckSymbolExists) -include(CheckFunctionExists) -check_symbol_exists(alloca alloca.h HAVE_ALLOCA) check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE) check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE) check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT) check_function_exists(isatty HAVE_ISATTY) check_symbol_exists(isinf cmath HAVE_ISINF_IN_CMATH) check_symbol_exists(isinf math.h HAVE_ISINF_IN_MATH_H) +check_symbol_exists(finite ieeefp.h HAVE_FINITE_IN_IEEEFP_H) check_symbol_exists(isnan cmath HAVE_ISNAN_IN_CMATH) check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H) check_symbol_exists(ceilf math.h HAVE_CEILF) @@ -65,32 +83,70 @@ check_symbol_exists(floorf math.h HAVE_FLOORF) check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO) check_symbol_exists(malloc_zone_statistics malloc/malloc.h HAVE_MALLOC_ZONE_STATISTICS) +check_symbol_exists(mkdtemp unistd.h HAVE_MKDTEMP) +check_symbol_exists(mkstemp unistd.h HAVE_MKSTEMP) +check_symbol_exists(mktemp unistd.h HAVE_MKTEMP) check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK) +check_symbol_exists(sbrk unistd.h HAVE_SBRK) check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL) check_symbol_exists(strerror string.h HAVE_STRERROR) check_symbol_exists(strerror_r string.h HAVE_STRERROR_R) check_symbol_exists(strerror_s string.h HAVE_STRERROR_S) +check_symbol_exists(setenv stdlib.h HAVE_SETENV) check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC) if( LLVM_USING_GLIBC ) add_llvm_definitions( -D_GNU_SOURCE ) endif() +# Type checks +check_type_exists(std::bidirectional_iterator "iterator;iostream" HAVE_BI_ITERATOR) +check_type_exists(std::iterator iterator HAVE_STD_ITERATOR) +check_type_exists(std::forward_iterator iterator HAVE_FWD_ITERATOR) + +set(headers "") +if (HAVE_SYS_TYPES_H) + set(headers ${headers} "sys/types.h") +endif() + +if (HAVE_INTTYPES_H) + set(headers ${headers} "inttypes.h") +endif() + +if (HAVE_STDINT_H) + set(headers ${headers} "stdint.h") +endif() + +check_type_exists(uint64_t "${headers}" HAVE_UINT64_T) +check_type_exists(u_int64_t "${headers}" HAVE_U_INT64_T) + +# Define LLVM_MULTITHREADED if gcc atomic builtins exists. +include(CheckAtomic) + include(CheckCXXCompilerFlag) -check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG) +# On windows all code is position-independent and mingw warns if -fPIC +# is in the command-line. +if( NOT WIN32 ) + check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG) +endif() include(GetTargetTriple) get_target_triple(LLVM_HOSTTRIPLE) message(STATUS "LLVM_HOSTTRIPLE: ${LLVM_HOSTTRIPLE}") # Determine the native architecture. -# FIXME: this will have to change for cross-compiling. -string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOSTTRIPLE}) +string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH) +if( LLVM_NATIVE_ARCH STREQUAL "host" ) + string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOSTTRIPLE}) +endif () + if (LLVM_NATIVE_ARCH MATCHES "i[2-6]86") set(LLVM_NATIVE_ARCH X86) -elseif (LLVM_NATIVE_ARCH STREQUAL amd64) +elseif (LLVM_NATIVE_ARCH STREQUAL "x86") + set(LLVM_NATIVE_ARCH X86) +elseif (LLVM_NATIVE_ARCH STREQUAL "amd64") set(LLVM_NATIVE_ARCH X86) -elseif (LLVM_NATIVE_ARCH STREQUAL x86_64) +elseif (LLVM_NATIVE_ARCH STREQUAL "x86_64") set(LLVM_NATIVE_ARCH X86) elseif (LLVM_NATIVE_ARCH MATCHES "sparc") set(LLVM_NATIVE_ARCH Sparc) @@ -98,8 +154,6 @@ elseif (LLVM_NATIVE_ARCH MATCHES "powerpc") set(LLVM_NATIVE_ARCH PowerPC) elseif (LLVM_NATIVE_ARCH MATCHES "alpha") set(LLVM_NATIVE_ARCH Alpha) -elseif (LLVM_NATIVE_ARCH MATCHES "ia64") - set(LLVM_NATIVE_ARCH IA64) elseif (LLVM_NATIVE_ARCH MATCHES "arm") set(LLVM_NATIVE_ARCH ARM) elseif (LLVM_NATIVE_ARCH MATCHES "mips") @@ -174,11 +228,6 @@ configure_file( ${LLVM_BINARY_DIR}/include/llvm/Config/config.h ) -configure_file( - ${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT/iterator.cmake - ${LLVM_BINARY_DIR}/include/llvm/ADT/iterator.h - ) - configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake ${LLVM_BINARY_DIR}/include/llvm/Support/DataTypes.h diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 660bd70003eec..205ddb7663993 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -12,6 +12,13 @@ macro(add_llvm_library name) install(TARGETS ${name} LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}) + # The LLVM Target library shall be built before its sublibraries + # (asmprinter, etc) because those may use tablegenned files which + # generation is triggered by the main LLVM target library. Necessary + # for parallel builds: + if( CURRENT_LLVM_TARGET ) + add_dependencies(${name} ${CURRENT_LLVM_TARGET}) + endif() endmacro(add_llvm_library name) @@ -26,11 +33,13 @@ macro(add_llvm_executable name) if( LLVM_LINK_COMPONENTS ) llvm_config(${name} ${LLVM_LINK_COMPONENTS}) endif( LLVM_LINK_COMPONENTS ) - target_link_libraries(${name} ${llvm_libs}) get_system_libs(llvm_system_libs) if( llvm_system_libs ) target_link_libraries(${name} ${llvm_system_libs}) endif() + if( LLVM_COMMON_DEPENDS ) + add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} ) + endif( LLVM_COMMON_DEPENDS ) endmacro(add_llvm_executable name) @@ -61,4 +70,5 @@ macro(add_llvm_target target_name) if ( TABLEGEN_OUTPUT ) add_dependencies(LLVM${target_name} ${target_name}Table_gen) endif (TABLEGEN_OUTPUT) + set(CURRENT_LLVM_TARGET LLVM${target_name} PARENT_SCOPE) endmacro(add_llvm_target) diff --git a/cmake/modules/CheckAtomic.cmake b/cmake/modules/CheckAtomic.cmake new file mode 100644 index 0000000000000..27bbaba6998c9 --- /dev/null +++ b/cmake/modules/CheckAtomic.cmake @@ -0,0 +1,18 @@ +# atomic builtins are required for threading support. + +INCLUDE(CheckCXXSourceCompiles) + +CHECK_CXX_SOURCE_COMPILES(" +int main() { + volatile unsigned long val = 1; + __sync_synchronize(); + __sync_val_compare_and_swap(&val, 1, 0); + __sync_add_and_fetch(&val, 1); + __sync_sub_and_fetch(&val, 1); + return 0; + } +" LLVM_MULTITHREADED) + +if( NOT LLVM_MULTITHREADED ) + message(STATUS "Warning: LLVM will be built thread-unsafe because atomic builtins are missing") +endif() diff --git a/cmake/modules/GetTargetTriple.cmake b/cmake/modules/GetTargetTriple.cmake index c915a9a6ae607..87262add59d3a 100644 --- a/cmake/modules/GetTargetTriple.cmake +++ b/cmake/modules/GetTargetTriple.cmake @@ -3,7 +3,13 @@ function( get_target_triple var ) if( MSVC ) - set( ${var} "i686-pc-win32" PARENT_SCOPE ) + if( CMAKE_CL_64 ) + set( ${var} "x86_64-pc-win32" PARENT_SCOPE ) + else() + set( ${var} "i686-pc-win32" PARENT_SCOPE ) + endif() + elseif( MINGW AND NOT MSYS ) + set( ${var} "i686-pc-mingw32" PARENT_SCOPE ) else( MSVC ) set(config_guess ${LLVM_MAIN_SRC_DIR}/autoconf/config.guess) execute_process(COMMAND sh ${config_guess} diff --git a/cmake/modules/LLVMConfig.cmake b/cmake/modules/LLVMConfig.cmake index 5fa08a39d6a8a..d1c297c158614 100755 --- a/cmake/modules/LLVMConfig.cmake +++ b/cmake/modules/LLVMConfig.cmake @@ -1,6 +1,3 @@ -include(FindPerl) - - function(get_system_libs return_var) # Returns in `return_var' a list of system libraries used by LLVM. if( NOT MSVC ) @@ -27,43 +24,15 @@ endmacro(llvm_config) function(explicit_llvm_config executable) set( link_components ${ARGN} ) - set(lfgs) - if (MSVC) - if( CMAKE_CL_64 ) - set(include_lflag "/INCLUDE:") - else( CMAKE_CL_64 ) - set(include_lflag "/INCLUDE:_") - endif() - foreach(c ${link_components}) - if( c STREQUAL "jit" ) - set(lfgs "${lfgs} ${include_lflag}X86TargetMachineModule") - endif( c STREQUAL "jit" ) - list(FIND LLVM_TARGETS_TO_BUILD ${c} idx) - if( NOT idx LESS 0 ) - set(lfgs "${lfgs} ${include_lflag}${c}TargetMachineModule") - list(FIND LLVM_ASMPRINTERS_FORCE_LINK ${c} idx) - if( NOT idx LESS 0 ) - set(lfgs "${lfgs} ${include_lflag}${c}AsmPrinterForceLink") - endif() - endif() - endforeach(c) - endif () - explicit_map_components_to_libraries(LIBRARIES ${link_components}) target_link_libraries(${executable} ${LIBRARIES}) - - if( lfgs ) - set_target_properties(${executable} - PROPERTIES - LINK_FLAGS ${lfgs}) - endif() endfunction(explicit_llvm_config) function(explicit_map_components_to_libraries out_libs) set( link_components ${ARGN} ) foreach(c ${link_components}) - # add codegen/asmprinter + # add codegen, asmprinter, asmparser list(FIND LLVM_TARGETS_TO_BUILD ${c} idx) if( NOT idx LESS 0 ) list(FIND llvm_libs "LLVM${c}CodeGen" idx) @@ -81,12 +50,18 @@ function(explicit_map_components_to_libraries out_libs) if( NOT asmidx LESS 0 ) list(APPEND expanded_components "LLVM${c}AsmPrinter") endif() + list(FIND llvm_libs "LLVM${c}AsmParser" asmidx) + if( NOT asmidx LESS 0 ) + list(APPEND expanded_components "LLVM${c}AsmParser") + endif() + list(FIND llvm_libs "LLVM${c}Info" asmidx) + if( NOT asmidx LESS 0 ) + list(APPEND expanded_components "LLVM${c}Info") + endif() elseif( c STREQUAL "native" ) - # TODO: we assume ARCH is X86. In this case, we must use nativecodegen - # component instead. Do nothing, as in llvm-config script. + list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen") elseif( c STREQUAL "nativecodegen" ) - # TODO: we assume ARCH is X86. - list(APPEND expanded_components "LLVMX86CodeGen") + list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen") elseif( c STREQUAL "backend" ) # same case as in `native'. elseif( c STREQUAL "engine" ) @@ -101,94 +76,50 @@ function(explicit_map_components_to_libraries out_libs) # We must match capitalization. string(TOUPPER "${llvm_libs}" capitalized_libs) list(REMOVE_DUPLICATES expanded_components) - set(curr_idx 0) list(LENGTH expanded_components lst_size) - while( ${curr_idx} LESS ${lst_size} ) - list(GET expanded_components ${curr_idx} c) + set(result "") + while( 0 LESS ${lst_size} ) + list(GET expanded_components 0 c) string(TOUPPER "${c}" capitalized) list(FIND capitalized_libs ${capitalized} idx) if( idx LESS 0 ) message(FATAL_ERROR "Library ${c} not found in list of llvm libraries.") endif( idx LESS 0 ) list(GET llvm_libs ${idx} canonical_lib) + list(REMOVE_ITEM result ${canonical_lib}) list(APPEND result ${canonical_lib}) - list(APPEND result ${MSVC_LIB_DEPS_${canonical_lib}}) + foreach(c ${MSVC_LIB_DEPS_${canonical_lib}}) + list(REMOVE_ITEM expanded_components ${c}) + endforeach() list(APPEND expanded_components ${MSVC_LIB_DEPS_${canonical_lib}}) - list(REMOVE_DUPLICATES expanded_components) + list(REMOVE_AT expanded_components 0) list(LENGTH expanded_components lst_size) - math(EXPR curr_idx "${curr_idx} + 1") - endwhile( ${curr_idx} LESS ${lst_size} ) - list(REMOVE_DUPLICATES result) + endwhile( 0 LESS ${lst_size} ) set(${out_libs} ${result} PARENT_SCOPE) endfunction(explicit_map_components_to_libraries) -# This data is used to establish executable/library -# dependencies. Comes from the llvm-config script, which is built and -# installed on the bin directory for MinGW or Linux. At the end of the -# script, you'll see lines like this: + +# The library dependency data is contained in the file +# LLVMLibDeps.cmake on this directory. It is automatically generated +# by tools/llvm-config/CMakeLists.txt when the build comprises all the +# targets and we are on a environment Posix enough to build the +# llvm-config script. This, in practice, just excludes MSVC. + +# When you remove or rename a library from the build, be sure to +# remove its file from lib/ as well, or the GenLibDeps.pl script will +# include it on its analysis! + +# The format generated by GenLibDeps.pl # LLVMARMAsmPrinter.o: LLVMARMCodeGen.o libLLVMAsmPrinter.a libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMTarget.a -# This is translated to: +# is translated to: # set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget) -# It is necessary to remove the `lib' prefix, the `.a' and `.o' -# suffixes. Watch out for this line: - -# LLVMExecutionEngine.o LLVMJIT.o: libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMSystem.a libLLVMTarget.a - -# See how there are two elements before the colon. This must be -# translated as if it were: - -# LLVMExecutionEngine.o: libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMSystem.a libLLVMTarget.a -# LLVMJIT.o: libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMSystem.a libLLVMTarget.a - -# TODO: do this transformations on cmake. - -# It is very important that the LLVM built for extracting this data -# must contain all targets, not just X86. - - -set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa) -set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMIA64 LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMInterpreter LLVMExecutionEngine LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa) -set(MSVC_LIB_DEPS_LLVMMips LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMPowerPCCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMSparcAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMX86CodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMXCore LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport) -set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport) -set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) -set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMDebugger LLVMAnalysis LLVMBitReader LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMHello LLVMCore LLVMSupport) -set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMCore LLVMScalarOpts LLVMSupport LLVMTransformUtils) -set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMTransformUtils) -set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) -set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem) -set(MSVC_LIB_DEPS_LLVMSystem ) -set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMSupport) -set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMipa) -set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport) -set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa) +# It is necessary to remove the `lib' prefix and the `.a'. + +# This 'sed' script should do the trick: +# sed -e s'#\.a##g' -e 's#libLLVM#LLVM#g' -e 's#: # #' -e 's#\(.*\)#set(MSVC_LIB_DEPS_\1)#' ~/llvm/tools/llvm-config/LibDeps.txt + +include(LLVMLibDeps) diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake new file mode 100644 index 0000000000000..fba999ee77293 --- /dev/null +++ b/cmake/modules/LLVMLibDeps.cmake @@ -0,0 +1,68 @@ +set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMC) +set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget) +set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa) +set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMCellSPUAsmPrinter LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget) +set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) +set(MSVC_LIB_DEPS_LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMDebugger LLVMAnalysis LLVMBitReader LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMHello LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTransformUtils) +set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa) +set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMTarget) +set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport) +set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport) +set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils) +set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMSystem ) +set(MSVC_LIB_DEPS_LLVMSystemZAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMSystemZInfo LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget) +set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMipa) +set(MSVC_LIB_DEPS_LLVMX86AsmParser LLVMMC LLVMX86Info) +set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget LLVMX86CodeGen LLVMX86Info) +set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86Info) +set(MSVC_LIB_DEPS_LLVMX86Info LLVMSupport) +set(MSVC_LIB_DEPS_LLVMXCore LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMXCoreInfo) +set(MSVC_LIB_DEPS_LLVMXCoreAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget LLVMXCoreInfo) +set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMSupport) +set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport LLVMSystem) +set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa) diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index 0a96b5514db6f..f6da1b83cddb1 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -3,20 +3,15 @@ # Adds the name of the generated file to TABLEGEN_OUTPUT. macro(tablegen ofn) - file(GLOB all_tds "*.td") + file(GLOB local_tds "*.td") + file(GLOB_RECURSE global_tds "${LLVM_MAIN_SRC_DIR}/include/llvm/*.td") - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} COMMAND ${LLVM_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} -I ${LLVM_MAIN_SRC_DIR}/lib/Target -I ${LLVM_MAIN_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS} - -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp - DEPENDS tblgen ${all_tds} - COMMENT "Building ${ofn}.tmp..." - ) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} - COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp ${CMAKE_CURRENT_BINARY_DIR}/${ofn} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp + -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn} + DEPENDS tblgen ${local_tds} ${global_tds} COMMENT "Building ${ofn}..." ) set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn}) diff --git a/configure b/configure index a38067bc2d0e0..102d3f033a38b 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.60 for llvm 2.6svn. +# Generated by GNU Autoconf 2.60 for llvm 2.7svn. # # Report bugs to . # @@ -9,7 +9,7 @@ # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # -# Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign. +# Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign. ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## @@ -715,8 +715,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='llvm' PACKAGE_TARNAME='-llvm-' -PACKAGE_VERSION='2.6svn' -PACKAGE_STRING='llvm 2.6svn' +PACKAGE_VERSION='2.7svn' +PACKAGE_STRING='llvm 2.7svn' PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu' ac_unique_file="lib/VMCore/Module.cpp" @@ -808,6 +808,8 @@ target_cpu target_vendor target_os OS +HOST_OS +TARGET_OS LINKALL NOLINKALL LLVM_ON_UNIX @@ -835,6 +837,7 @@ DISABLE_ASSERTIONS ENABLE_EXPENSIVE_CHECKS EXPENSIVE_CHECKS DEBUG_RUNTIME +DEBUG_SYMBOLS JIT TARGET_HAS_JIT ENABLE_DOXYGEN @@ -843,9 +846,12 @@ ENABLE_PIC TARGETS_TO_BUILD LLVM_ENUM_TARGETS LLVM_ENUM_ASM_PRINTERS +LLVM_ENUM_ASM_PARSERS ENABLE_CBE_PRINTF_A EXTRA_OPTIONS BINUTILS_INCDIR +ENABLE_LLVMC_DYNAMIC +ENABLE_LLVMC_DYNAMIC_PLUGINS CXX CXXFLAGS ac_ct_CXX @@ -865,6 +871,10 @@ TAR BINPWD GRAPHVIZ DOT +FDP +NEATO +TWOPI +CIRCO GV DOTTY PERL @@ -904,6 +914,7 @@ LLVMGXXCOMMAND LLVMGCC LLVMGXX USE_UDIS86 +USE_OPROFILE HAVE_PTHREAD HUGE_VAL_SANITY ALLOCA @@ -1462,7 +1473,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures llvm 2.6svn to adapt to many kinds of systems. +\`configure' configures llvm 2.7svn to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1528,7 +1539,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of llvm 2.6svn:";; + short | recursive ) echo "Configuration of llvm 2.7svn:";; esac cat <<\_ACEOF @@ -1544,18 +1555,27 @@ Optional Features: is NO) --enable-debug-runtime Build runtime libs with debug symbols (default is NO) + --enable-debug-symbols Build compiler with debug symbols (default is NO if + optimization is on and YES if it's off) --enable-jit Enable Just In Time Compiling (default is YES) --enable-doxygen Build doxygen documentation (default is NO) --enable-threads Use threads if available (default is YES) --enable-pic Build LLVM with Position Independent Code (default is YES) - --enable-targets Build specific host targets: - all,host-only,{target-name} (default=all) + --enable-targets Build specific host targets: all or + target1,target2,... Valid targets are: host, x86, + x86_64, sparc, powerpc, alpha, arm, mips, spu, + pic16, xcore, msp430, systemz, blackfin, cbe, msil, + and cpp (default=all) --enable-cbe-printf-a Enable C Backend output with hex floating point via %a (default is YES) --enable-bindings Build specific language bindings: all,auto,none,{binding-name} (default=auto) --enable-libffi Check for the presence of libffi (default is YES) + --enable-llvmc-dynamic Link LLVMC dynamically (default is NO, unless on + Win32) + --enable-llvmc-dynamic-plugins + Enable dynamic LLVMC plugins (default is YES) --enable-ltdl-install install libltdl --enable-shared[=PKGS] build shared libraries [default=yes] @@ -1586,6 +1606,8 @@ Optional Packages: both] --with-tags[=TAGS] include additional configurations [automatic] --with-udis86= Use udis86 external x86 disassembler library + --with-oprofile= + Tell OProfile >= 0.9.4 how to symbolize JIT output Some influential environment variables: CC C compiler command @@ -1665,7 +1687,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -llvm configure 2.6svn +llvm configure 2.7svn generated by GNU Autoconf 2.60 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1673,7 +1695,7 @@ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. -Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign. +Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign. _ACEOF exit fi @@ -1681,7 +1703,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by llvm $as_me 2.6svn, which was +It was created by llvm $as_me 2.7svn, which was generated by GNU Autoconf 2.60. Invocation command line was $ $0 $@ @@ -2035,7 +2057,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu -LLVM_COPYRIGHT="Copyright (c) 2003-2008 University of Illinois at Urbana-Champaign." +LLVM_COPYRIGHT="Copyright (c) 2003-2009 University of Illinois at Urbana-Champaign." @@ -2316,6 +2338,11 @@ else llvm_cv_no_link_all_option="-Wl,-z,defaultextract" llvm_cv_os_type="SunOS" llvm_cv_platform_type="Unix" ;; + *-*-auroraux*) + llvm_cv_link_all_option="-Wl,-z,allextract" + llvm_cv_link_all_option="-Wl,-z,defaultextract" + llvm_cv_os_type="AuroraUX" + llvm_cv_platform_type="Unix" ;; *-*-win32*) llvm_cv_link_all_option="-Wl,--whole-archive" llvm_cv_no_link_all_option="-Wl,--no-whole-archive" @@ -2326,6 +2353,21 @@ else llvm_cv_no_link_all_option="-Wl,--no-whole-archive" llvm_cv_os_type="MingW" llvm_cv_platform_type="Win32" ;; + *-*-haiku*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="Haiku" + llvm_cv_platform_type="Unix" ;; + *-unknown-eabi*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="Freestanding" + llvm_cv_platform_type="Unix" ;; + *-unknown-elf*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="Freestanding" + llvm_cv_platform_type="Unix" ;; *) llvm_cv_link_all_option="" llvm_cv_no_link_all_option="" @@ -2336,6 +2378,53 @@ fi { echo "$as_me:$LINENO: result: $llvm_cv_os_type" >&5 echo "${ECHO_T}$llvm_cv_os_type" >&6; } +{ echo "$as_me:$LINENO: checking type of operating system we're going to target" >&5 +echo $ECHO_N "checking type of operating system we're going to target... $ECHO_C" >&6; } +if test "${llvm_cv_target_os_type+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + case $target in + *-*-aix*) + llvm_cv_target_os_type="AIX" ;; + *-*-irix*) + llvm_cv_target_os_type="IRIX" ;; + *-*-cygwin*) + llvm_cv_target_os_type="Cygwin" ;; + *-*-darwin*) + llvm_cv_target_os_type="Darwin" ;; + *-*-freebsd*) + llvm_cv_target_os_type="FreeBSD" ;; + *-*-openbsd*) + llvm_cv_target_os_type="OpenBSD" ;; + *-*-netbsd*) + llvm_cv_target_os_type="NetBSD" ;; + *-*-dragonfly*) + llvm_cv_target_os_type="DragonFly" ;; + *-*-hpux*) + llvm_cv_target_os_type="HP-UX" ;; + *-*-interix*) + llvm_cv_target_os_type="Interix" ;; + *-*-linux*) + llvm_cv_target_os_type="Linux" ;; + *-*-solaris*) + llvm_cv_target_os_type="SunOS" ;; + *-*-auroraux*) + llvm_cv_target_os_type="AuroraUX" ;; + *-*-win32*) + llvm_cv_target_os_type="Win32" ;; + *-*-mingw*) + llvm_cv_target_os_type="MingW" ;; + *-*-haiku*) + llvm_cv_target_os_type="Haiku" ;; + *-unknown-eabi*) + llvm_cv_target_os_type="Freestanding" ;; + *) + llvm_cv_target_os_type="Unknown" ;; +esac +fi +{ echo "$as_me:$LINENO: result: $llvm_cv_target_os_type" >&5 +echo "${ECHO_T}$llvm_cv_target_os_type" >&6; } + if test "$llvm_cv_os_type" = "Unknown" ; then { { echo "$as_me:$LINENO: error: Operating system is unknown, configure can't continue" >&5 echo "$as_me: error: Operating system is unknown, configure can't continue" >&2;} @@ -2344,6 +2433,10 @@ fi OS=$llvm_cv_os_type +HOST_OS=$llvm_cv_os_type + +TARGET_OS=$llvm_cv_target_os_type + LINKALL=$llvm_cv_link_all_option @@ -2386,12 +2479,13 @@ else sparc*-*) llvm_cv_target_arch="Sparc" ;; powerpc*-*) llvm_cv_target_arch="PowerPC" ;; alpha*-*) llvm_cv_target_arch="Alpha" ;; - ia64-*) llvm_cv_target_arch="IA64" ;; arm*-*) llvm_cv_target_arch="ARM" ;; mips-*) llvm_cv_target_arch="Mips" ;; pic16-*) llvm_cv_target_arch="PIC16" ;; xcore-*) llvm_cv_target_arch="XCore" ;; msp430-*) llvm_cv_target_arch="MSP430" ;; + s390x-*) llvm_cv_target_arch="SystemZ" ;; + bfin-*) llvm_cv_target_arch="Blackfin" ;; *) llvm_cv_target_arch="Unknown" ;; esac fi @@ -4805,6 +4899,21 @@ else fi +# Check whether --enable-debug-symbols was given. +if test "${enable_debug_symbols+set}" = set; then + enableval=$enable_debug_symbols; +else + enableval=no +fi + +if test ${enableval} = "no" ; then + DEBUG_SYMBOLS= + +else + DEBUG_SYMBOLS=DEBUG_SYMBOLS=1 + +fi + # Check whether --enable-jit was given. if test "${enable_jit+set}" = set; then enableval=$enable_jit; @@ -4818,29 +4927,31 @@ then else case "$llvm_cv_target_arch" in - x86) TARGET_HAS_JIT=1 + x86) TARGET_HAS_JIT=1 + ;; + Sparc) TARGET_HAS_JIT=0 ;; - Sparc) TARGET_HAS_JIT=0 + PowerPC) TARGET_HAS_JIT=1 ;; - PowerPC) TARGET_HAS_JIT=1 + x86_64) TARGET_HAS_JIT=1 ;; - x86_64) TARGET_HAS_JIT=1 + Alpha) TARGET_HAS_JIT=1 ;; - Alpha) TARGET_HAS_JIT=1 + ARM) TARGET_HAS_JIT=1 ;; - IA64) TARGET_HAS_JIT=0 + Mips) TARGET_HAS_JIT=0 ;; - ARM) TARGET_HAS_JIT=0 + PIC16) TARGET_HAS_JIT=0 ;; - Mips) TARGET_HAS_JIT=0 + XCore) TARGET_HAS_JIT=0 ;; - PIC16) TARGET_HAS_JIT=0 + MSP430) TARGET_HAS_JIT=0 ;; - XCore) TARGET_HAS_JIT=0 + SystemZ) TARGET_HAS_JIT=0 ;; - MSP430) TARGET_HAS_JIT=0 + Blackfin) TARGET_HAS_JIT=0 ;; - *) TARGET_HAS_JIT=0 + *) TARGET_HAS_JIT=0 ;; esac fi @@ -4920,44 +5031,47 @@ else enableval=all fi +if test "$enableval" = host-only ; then + enableval=host +fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha IA64 ARM Mips CellSPU PIC16 XCore MSP430 CBackend MSIL CppBackend" ;; - host-only) - case "$llvm_cv_target_arch" in - x86) TARGETS_TO_BUILD="X86" ;; - x86_64) TARGETS_TO_BUILD="X86" ;; - Sparc) TARGETS_TO_BUILD="Sparc" ;; - PowerPC) TARGETS_TO_BUILD="PowerPC" ;; - Alpha) TARGETS_TO_BUILD="Alpha" ;; - IA64) TARGETS_TO_BUILD="IA64" ;; - ARM) TARGETS_TO_BUILD="ARM" ;; - Mips) TARGETS_TO_BUILD="Mips" ;; - CellSPU|SPU) TARGETS_TO_BUILD="CellSPU" ;; - PIC16) TARGETS_TO_BUILD="PIC16" ;; - XCore) TARGETS_TO_BUILD="XCore" ;; - MSP430) TARGETS_TO_BUILD="MSP430" ;; - *) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5 -echo "$as_me: error: Can not set target to build" >&2;} - { (exit 1); exit 1; }; } ;; - esac - ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in - x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; - x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; - sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; - powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; - alpha) TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;; - ia64) TARGETS_TO_BUILD="IA64 $TARGETS_TO_BUILD" ;; - arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; - mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; - spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; - pic16) TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;; - xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; - msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; - cbe) TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;; - msil) TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;; - cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; + x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; + x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; + sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; + powerpc) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; + alpha) TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;; + arm) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; + mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; + spu) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; + pic16) TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;; + xcore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; + msp430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; + systemz) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;; + blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;; + cbe) TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;; + msil) TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;; + cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;; + host) case "$llvm_cv_target_arch" in + x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; + x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; + Sparc) TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;; + PowerPC) TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;; + Alpha) TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;; + ARM) TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;; + Mips) TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;; + CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;; + PIC16) TARGETS_TO_BUILD="PIC16 $TARGETS_TO_BUILD" ;; + XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;; + MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;; + SystemZ) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;; + Blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;; + *) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5 +echo "$as_me: error: Can not set target to build" >&2;} + { (exit 1); exit 1; }; } ;; + esac ;; *) { { echo "$as_me:$LINENO: error: Unrecognized target $a_target" >&5 echo "$as_me: error: Unrecognized target $a_target" >&2;} { (exit 1); exit 1; }; } ;; @@ -4982,18 +5096,23 @@ _ACEOF done # Build the LLVM_TARGET and LLVM_ASM_PRINTER macro uses for -# Targets.def and AsmPrinters.def. +# Targets.def, AsmPrinters.def, and AsmParsers.def. LLVM_ENUM_TARGETS="" LLVM_ENUM_ASM_PRINTERS="" +LLVM_ENUM_ASM_PARSERS="" for target_to_build in $TARGETS_TO_BUILD; do LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS" if test -f ${srcdir}/lib/Target/${target_to_build}/AsmPrinter/Makefile ; then LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS"; fi + if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then + LLVM_ENUM_ASM_PARSERS="LLVM_ASM_PARSER($target_to_build) $LLVM_ENUM_ASM_PARSERS"; + fi done + # Check whether --enable-cbe-printf-a was given. if test "${enable_cbe_printf_a+set}" = set; then enableval=$enable_cbe_printf_a; @@ -5165,6 +5284,42 @@ echo "$as_me: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" { (exit 1); exit 1; }; } ;; esac +if test "$llvm_cv_os_type" = "Win32" ; then + llvmc_dynamic="yes" +else + llvmc_dynamic="no" +fi + +# Check whether --enable-llvmc-dynamic was given. +if test "${enable_llvmc_dynamic+set}" = set; then + enableval=$enable_llvmc_dynamic; +else + enableval=$llvmc_dynamic +fi + +if test ${enableval} = "yes" && test "$ENABLE_PIC" -eq 1 ; then + ENABLE_LLVMC_DYNAMIC=ENABLE_LLVMC_DYNAMIC=1 + +else + ENABLE_LLVMC_DYNAMIC= + +fi + +# Check whether --enable-llvmc-dynamic-plugins was given. +if test "${enable_llvmc_dynamic_plugins+set}" = set; then + enableval=$enable_llvmc_dynamic_plugins; +else + enableval=yes +fi + +if test ${enableval} = "yes" ; then + ENABLE_LLVMC_DYNAMIC_PLUGINS=ENABLE_LLVMC_DYNAMIC_PLUGINS=1 + +else + ENABLE_LLVMC_DYNAMIC_PLUGINS= + +fi + ac_ext=c ac_cpp='$CPP $CPPFLAGS' @@ -7173,6 +7328,230 @@ cat >>confdefs.h <<_ACEOF #define LLVM_PATH_DOT "$DOT${EXEEXT}" _ACEOF +fi +# Extract the first word of "fdp", so it can be a program name with args. +set dummy fdp; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_path_FDP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + case $FDP in + [\\/]* | ?:[\\/]*) + ac_cv_path_FDP="$FDP" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_path_FDP="$as_dir/$ac_word$ac_exec_ext" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_path_FDP" && ac_cv_path_FDP="echo fdp" + ;; +esac +fi +FDP=$ac_cv_path_FDP +if test -n "$FDP"; then + { echo "$as_me:$LINENO: result: $FDP" >&5 +echo "${ECHO_T}$FDP" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +if test "$FDP" != "echo fdp" ; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_FDP 1 +_ACEOF + + if test "$llvm_cv_os_type" = "MingW" ; then + FDP=`echo $FDP | sed 's/^\/\([A-Za-z]\)\//\1:\//' ` + fi + +cat >>confdefs.h <<_ACEOF +#define LLVM_PATH_FDP "$FDP${EXEEXT}" +_ACEOF + +fi +# Extract the first word of "neato", so it can be a program name with args. +set dummy neato; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_path_NEATO+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + case $NEATO in + [\\/]* | ?:[\\/]*) + ac_cv_path_NEATO="$NEATO" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_path_NEATO="$as_dir/$ac_word$ac_exec_ext" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_path_NEATO" && ac_cv_path_NEATO="echo neato" + ;; +esac +fi +NEATO=$ac_cv_path_NEATO +if test -n "$NEATO"; then + { echo "$as_me:$LINENO: result: $NEATO" >&5 +echo "${ECHO_T}$NEATO" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +if test "$NEATO" != "echo neato" ; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_NEATO 1 +_ACEOF + + if test "$llvm_cv_os_type" = "MingW" ; then + NEATO=`echo $NEATO | sed 's/^\/\([A-Za-z]\)\//\1:\//' ` + fi + +cat >>confdefs.h <<_ACEOF +#define LLVM_PATH_NEATO "$NEATO${EXEEXT}" +_ACEOF + +fi +# Extract the first word of "twopi", so it can be a program name with args. +set dummy twopi; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_path_TWOPI+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + case $TWOPI in + [\\/]* | ?:[\\/]*) + ac_cv_path_TWOPI="$TWOPI" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_path_TWOPI="$as_dir/$ac_word$ac_exec_ext" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_path_TWOPI" && ac_cv_path_TWOPI="echo twopi" + ;; +esac +fi +TWOPI=$ac_cv_path_TWOPI +if test -n "$TWOPI"; then + { echo "$as_me:$LINENO: result: $TWOPI" >&5 +echo "${ECHO_T}$TWOPI" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +if test "$TWOPI" != "echo twopi" ; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_TWOPI 1 +_ACEOF + + if test "$llvm_cv_os_type" = "MingW" ; then + TWOPI=`echo $TWOPI | sed 's/^\/\([A-Za-z]\)\//\1:\//' ` + fi + +cat >>confdefs.h <<_ACEOF +#define LLVM_PATH_TWOPI "$TWOPI${EXEEXT}" +_ACEOF + +fi +# Extract the first word of "circo", so it can be a program name with args. +set dummy circo; ac_word=$2 +{ echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } +if test "${ac_cv_path_CIRCO+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + case $CIRCO in + [\\/]* | ?:[\\/]*) + ac_cv_path_CIRCO="$CIRCO" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then + ac_cv_path_CIRCO="$as_dir/$ac_word$ac_exec_ext" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done +IFS=$as_save_IFS + + test -z "$ac_cv_path_CIRCO" && ac_cv_path_CIRCO="echo circo" + ;; +esac +fi +CIRCO=$ac_cv_path_CIRCO +if test -n "$CIRCO"; then + { echo "$as_me:$LINENO: result: $CIRCO" >&5 +echo "${ECHO_T}$CIRCO" >&6; } +else + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } +fi + + +if test "$CIRCO" != "echo circo" ; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_CIRCO 1 +_ACEOF + + if test "$llvm_cv_os_type" = "MingW" ; then + CIRCO=`echo $CIRCO | sed 's/^\/\([A-Za-z]\)\//\1:\//' ` + fi + +cat >>confdefs.h <<_ACEOF +#define LLVM_PATH_CIRCO "$CIRCO${EXEEXT}" +_ACEOF + fi for ac_prog in gv gsview32 do @@ -10629,7 +11008,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < conftest.$ac_ext + echo '#line 13155 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -14491,11 +14870,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14494: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14873: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:14498: \$? = $ac_status" >&5 + echo "$as_me:14877: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -14759,11 +15138,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14762: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15141: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:14766: \$? = $ac_status" >&5 + echo "$as_me:15145: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -14863,11 +15242,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14866: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15245: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:14870: \$? = $ac_status" >&5 + echo "$as_me:15249: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -17315,7 +17694,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < conftest.$ac_ext <&5) + (eval echo "\"\$as_me:20165: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:19790: \$? = $ac_status" >&5 + echo "$as_me:20169: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -19887,11 +20266,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:19890: $lt_compile\"" >&5) + (eval echo "\"\$as_me:20269: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:19894: \$? = $ac_status" >&5 + echo "$as_me:20273: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -21457,11 +21836,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:21460: $lt_compile\"" >&5) + (eval echo "\"\$as_me:21839: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:21464: \$? = $ac_status" >&5 + echo "$as_me:21843: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -21561,11 +21940,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:21564: $lt_compile\"" >&5) + (eval echo "\"\$as_me:21943: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:21568: \$? = $ac_status" >&5 + echo "$as_me:21947: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -23796,11 +24175,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:23799: $lt_compile\"" >&5) + (eval echo "\"\$as_me:24178: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:23803: \$? = $ac_status" >&5 + echo "$as_me:24182: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -24064,11 +24443,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:24067: $lt_compile\"" >&5) + (eval echo "\"\$as_me:24446: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:24071: \$? = $ac_status" >&5 + echo "$as_me:24450: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -24168,11 +24547,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:24171: $lt_compile\"" >&5) + (eval echo "\"\$as_me:24550: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:24175: \$? = $ac_status" >&5 + echo "$as_me:24554: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -27082,13 +27461,13 @@ echo "${ECHO_T}ok" >&6; } -{ echo "$as_me:$LINENO: checking for elf_begin in -lelf" >&5 -echo $ECHO_N "checking for elf_begin in -lelf... $ECHO_C" >&6; } -if test "${ac_cv_lib_elf_elf_begin+set}" = set; then +{ echo "$as_me:$LINENO: checking for sin in -lm" >&5 +echo $ECHO_N "checking for sin in -lm... $ECHO_C" >&6; } +if test "${ac_cv_lib_m_sin+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS -LIBS="-lelf $LIBS" +LIBS="-lm $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -27102,11 +27481,11 @@ cat >>conftest.$ac_ext <<_ACEOF #ifdef __cplusplus extern "C" #endif -char elf_begin (); +char sin (); int main () { -return elf_begin (); +return sin (); ; return 0; } @@ -27145,37 +27524,38 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then - ac_cv_lib_elf_elf_begin=yes + ac_cv_lib_m_sin=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - ac_cv_lib_elf_elf_begin=no + ac_cv_lib_m_sin=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ echo "$as_me:$LINENO: result: $ac_cv_lib_elf_elf_begin" >&5 -echo "${ECHO_T}$ac_cv_lib_elf_elf_begin" >&6; } -if test $ac_cv_lib_elf_elf_begin = yes; then +{ echo "$as_me:$LINENO: result: $ac_cv_lib_m_sin" >&5 +echo "${ECHO_T}$ac_cv_lib_m_sin" >&6; } +if test $ac_cv_lib_m_sin = yes; then cat >>confdefs.h <<_ACEOF -#define HAVE_LIBELF 1 +#define HAVE_LIBM 1 _ACEOF - LIBS="-lelf $LIBS" + LIBS="-lm $LIBS" fi +if test "$llvm_cv_os_type" = "MingW" ; then -{ echo "$as_me:$LINENO: checking for sin in -lm" >&5 -echo $ECHO_N "checking for sin in -lm... $ECHO_C" >&6; } -if test "${ac_cv_lib_m_sin+set}" = set; then +{ echo "$as_me:$LINENO: checking for main in -limagehlp" >&5 +echo $ECHO_N "checking for main in -limagehlp... $ECHO_C" >&6; } +if test "${ac_cv_lib_imagehlp_main+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS -LIBS="-lm $LIBS" +LIBS="-limagehlp $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -27183,99 +27563,11 @@ cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char sin (); + int main () { -return sin (); - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext conftest$ac_exeext -if { (ac_try="$ac_link" -case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_link") 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest$ac_exeext' - { (case "(($ac_try" in - *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; - *) ac_try_echo=$ac_try;; -esac -eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 - (eval "$ac_try") 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - ac_cv_lib_m_sin=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - - ac_cv_lib_m_sin=no -fi - -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ echo "$as_me:$LINENO: result: $ac_cv_lib_m_sin" >&5 -echo "${ECHO_T}$ac_cv_lib_m_sin" >&6; } -if test $ac_cv_lib_m_sin = yes; then - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBM 1 -_ACEOF - - LIBS="-lm $LIBS" - -fi - -if test "$llvm_cv_os_type" = "MingW" ; then - -{ echo "$as_me:$LINENO: checking for main in -limagehlp" >&5 -echo $ECHO_N "checking for main in -limagehlp... $ECHO_C" >&6; } -if test "${ac_cv_lib_imagehlp_main+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-limagehlp $LIBS" -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ - - -int -main () -{ -return main (); +return main (); ; return 0; } @@ -28254,6 +28546,404 @@ _ACEOF +# Check whether --with-oprofile was given. +if test "${with_oprofile+set}" = set; then + withval=$with_oprofile; + USE_OPROFILE=1 + + case "$withval" in + /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;; + no) llvm_cv_oppath= + USE_OPROFILE=0 + ;; + *) llvm_cv_oppath="${withval}/lib/oprofile" + CPPFLAGS="-I${withval}/include";; + esac + if test -n "$llvm_cv_oppath" ; then + LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}" + { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5 +echo $ECHO_N "checking for library containing bfd_init... $ECHO_C" >&6; } +if test "${ac_cv_search_bfd_init+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_func_search_save_LIBS=$LIBS +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char bfd_init (); +int +main () +{ +return bfd_init (); + ; + return 0; +} +_ACEOF +for ac_lib in '' bfd; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_search_bfd_init=$ac_res +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if test "${ac_cv_search_bfd_init+set}" = set; then + break +fi +done +if test "${ac_cv_search_bfd_init+set}" = set; then + : +else + ac_cv_search_bfd_init=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ echo "$as_me:$LINENO: result: $ac_cv_search_bfd_init" >&5 +echo "${ECHO_T}$ac_cv_search_bfd_init" >&6; } +ac_res=$ac_cv_search_bfd_init +if test "$ac_res" != no; then + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +fi + + { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5 +echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; } +if test "${ac_cv_search_op_open_agent+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_func_search_save_LIBS=$LIBS +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char op_open_agent (); +int +main () +{ +return op_open_agent (); + ; + return 0; +} +_ACEOF +for ac_lib in '' opagent; do + if test -z "$ac_lib"; then + ac_res="none required" + else + ac_res=-l$ac_lib + LIBS="-l$ac_lib $ac_func_search_save_LIBS" + fi + rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_search_op_open_agent=$ac_res +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + +fi + +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext + if test "${ac_cv_search_op_open_agent+set}" = set; then + break +fi +done +if test "${ac_cv_search_op_open_agent+set}" = set; then + : +else + ac_cv_search_op_open_agent=no +fi +rm conftest.$ac_ext +LIBS=$ac_func_search_save_LIBS +fi +{ echo "$as_me:$LINENO: result: $ac_cv_search_op_open_agent" >&5 +echo "${ECHO_T}$ac_cv_search_op_open_agent" >&6; } +ac_res=$ac_cv_search_op_open_agent +if test "$ac_res" != no; then + test "$ac_res" = "none required" || LIBS="$ac_res $LIBS" + +else + + echo "Error! You need to have libopagent around." + exit -1 + +fi + + if test "${ac_cv_header_opagent_h+set}" = set; then + { echo "$as_me:$LINENO: checking for opagent.h" >&5 +echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; } +if test "${ac_cv_header_opagent_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5 +echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; } +else + # Is the header compilable? +{ echo "$as_me:$LINENO: checking opagent.h usability" >&5 +echo $ECHO_N "checking opagent.h usability... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_compiler=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6; } + +# Is the header present? +{ echo "$as_me:$LINENO: checking opagent.h presence" >&5 +echo $ECHO_N "checking opagent.h presence... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi + +rm -f conftest.err conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: opagent.h: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: opagent.h: present but cannot be compiled" >&5 +echo "$as_me: WARNING: opagent.h: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: opagent.h: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: opagent.h: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: opagent.h: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: opagent.h: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: opagent.h: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: opagent.h: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: opagent.h: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: opagent.h: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: opagent.h: in the future, the compiler will take precedence" >&2;} + ( cat <<\_ASBOX +## ----------------------------------- ## +## Report this to llvmbugs@cs.uiuc.edu ## +## ----------------------------------- ## +_ASBOX + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +{ echo "$as_me:$LINENO: checking for opagent.h" >&5 +echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; } +if test "${ac_cv_header_opagent_h+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_header_opagent_h=$ac_header_preproc +fi +{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5 +echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; } + +fi +if test $ac_cv_header_opagent_h = yes; then + : +else + + echo "Error! You need to have opagent.h around." + exit -1 + +fi + + + fi + +else + + USE_OPROFILE=0 + + +fi + + +cat >>confdefs.h <<_ACEOF +#define USE_OPROFILE $USE_OPROFILE +_ACEOF + + + @@ -31468,7 +32158,8 @@ done -for ac_func in strerror strerror_r strerror_s + +for ac_func in strerror strerror_r strerror_s setenv do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` { echo "$as_me:$LINENO: checking for $ac_func" >&5 @@ -34060,6 +34751,96 @@ rm -f core conftest.err conftest.$ac_objext \ +if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then + { echo "$as_me:$LINENO: checking for 32-bit userspace on 64-bit system" >&5 +echo $ECHO_N "checking for 32-bit userspace on 64-bit system... $ECHO_C" >&6; } +if test "${llvm_cv_linux_mixed+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifndef __x86_64__ + error: Not x86-64 even if uname says so! + #endif + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + llvm_cv_linux_mixed=no +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + llvm_cv_linux_mixed=yes +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +fi +{ echo "$as_me:$LINENO: result: $llvm_cv_linux_mixed" >&5 +echo "${ECHO_T}$llvm_cv_linux_mixed" >&6; } + + if test "$llvm_cv_linux_mixed" = "yes"; then + llvm_cv_target_arch="x86" + ARCH="x86" + fi +fi + for ac_func in __dso_handle do @@ -34477,9 +35258,9 @@ ac_config_files="$ac_config_files include/llvm/Config/Targets.def" ac_config_files="$ac_config_files include/llvm/Config/AsmPrinters.def" -ac_config_headers="$ac_config_headers include/llvm/Support/DataTypes.h" +ac_config_files="$ac_config_files include/llvm/Config/AsmParsers.def" -ac_config_headers="$ac_config_headers include/llvm/ADT/iterator.h" +ac_config_headers="$ac_config_headers include/llvm/Support/DataTypes.h" ac_config_files="$ac_config_files Makefile.config" @@ -34936,7 +35717,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by llvm $as_me 2.6svn, which was +This file was extended by llvm $as_me 2.7svn, which was generated by GNU Autoconf 2.60. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -34989,7 +35770,7 @@ Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -llvm config.status 2.6svn +llvm config.status 2.7svn configured by $0, generated by GNU Autoconf 2.60, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" @@ -35105,8 +35886,8 @@ do "include/llvm/Config/config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Config/config.h" ;; "include/llvm/Config/Targets.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Targets.def" ;; "include/llvm/Config/AsmPrinters.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmPrinters.def" ;; + "include/llvm/Config/AsmParsers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmParsers.def" ;; "include/llvm/Support/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Support/DataTypes.h" ;; - "include/llvm/ADT/iterator.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/ADT/iterator.h" ;; "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;; "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;; "docs/doxygen.cfg") CONFIG_FILES="$CONFIG_FILES docs/doxygen.cfg" ;; @@ -35240,6 +36021,8 @@ target_cpu!$target_cpu$ac_delim target_vendor!$target_vendor$ac_delim target_os!$target_os$ac_delim OS!$OS$ac_delim +HOST_OS!$HOST_OS$ac_delim +TARGET_OS!$TARGET_OS$ac_delim LINKALL!$LINKALL$ac_delim NOLINKALL!$NOLINKALL$ac_delim LLVM_ON_UNIX!$LLVM_ON_UNIX$ac_delim @@ -35267,6 +36050,7 @@ DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim +DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim JIT!$JIT$ac_delim TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim @@ -35275,16 +36059,13 @@ ENABLE_PIC!$ENABLE_PIC$ac_delim TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim +LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim +ENABLE_LLVMC_DYNAMIC!$ENABLE_LLVMC_DYNAMIC$ac_delim +ENABLE_LLVMC_DYNAMIC_PLUGINS!$ENABLE_LLVMC_DYNAMIC_PLUGINS$ac_delim CXX!$CXX$ac_delim -CXXFLAGS!$CXXFLAGS$ac_delim -ac_ct_CXX!$ac_ct_CXX$ac_delim -NM!$NM$ac_delim -ifGNUmake!$ifGNUmake$ac_delim -LN_S!$LN_S$ac_delim -CMP!$CMP$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -35326,6 +36107,12 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +CXXFLAGS!$CXXFLAGS$ac_delim +ac_ct_CXX!$ac_ct_CXX$ac_delim +NM!$NM$ac_delim +ifGNUmake!$ifGNUmake$ac_delim +LN_S!$LN_S$ac_delim +CMP!$CMP$ac_delim CP!$CP$ac_delim DATE!$DATE$ac_delim FIND!$FIND$ac_delim @@ -35338,6 +36125,10 @@ TAR!$TAR$ac_delim BINPWD!$BINPWD$ac_delim GRAPHVIZ!$GRAPHVIZ$ac_delim DOT!$DOT$ac_delim +FDP!$FDP$ac_delim +NEATO!$NEATO$ac_delim +TWOPI!$TWOPI$ac_delim +CIRCO!$CIRCO$ac_delim GV!$GV$ac_delim DOTTY!$DOTTY$ac_delim PERL!$PERL$ac_delim @@ -35377,6 +36168,7 @@ LLVMGXXCOMMAND!$LLVMGXXCOMMAND$ac_delim LLVMGCC!$LLVMGCC$ac_delim LLVMGXX!$LLVMGXX$ac_delim USE_UDIS86!$USE_UDIS86$ac_delim +USE_OPROFILE!$USE_OPROFILE$ac_delim HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim ALLOCA!$ALLOCA$ac_delim @@ -35409,7 +36201,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 81; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 92; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 @@ -35813,31 +36605,31 @@ echo "$as_me: executing $ac_file commands" >&6;} case $ac_file$ac_mode in "Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/Makefile Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile Makefile ;; "Makefile.common":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile.common` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/Makefile.common Makefile.common ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile.common Makefile.common ;; "examples/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname examples/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/examples/Makefile examples/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/examples/Makefile examples/Makefile ;; "lib/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/lib/Makefile lib/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/Makefile lib/Makefile ;; "runtime/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname runtime/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/runtime/Makefile runtime/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/runtime/Makefile runtime/Makefile ;; "test/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname test/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/test/Makefile test/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/test/Makefile test/Makefile ;; "test/Makefile.tests":C) ${llvm_src}/autoconf/mkinstalldirs `dirname test/Makefile.tests` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/test/Makefile.tests test/Makefile.tests ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/test/Makefile.tests test/Makefile.tests ;; "unittests/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname unittests/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/unittests/Makefile unittests/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/unittests/Makefile unittests/Makefile ;; "tools/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/tools/Makefile tools/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/Makefile tools/Makefile ;; "utils/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname utils/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/utils/Makefile utils/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/utils/Makefile utils/Makefile ;; "projects/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname projects/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/projects/Makefile projects/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/projects/Makefile projects/Makefile ;; "bindings/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname bindings/Makefile` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/bindings/Makefile bindings/Makefile ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/bindings/Makefile bindings/Makefile ;; "bindings/ocaml/Makefile.ocaml":C) ${llvm_src}/autoconf/mkinstalldirs `dirname bindings/ocaml/Makefile.ocaml` - ${SHELL} ${llvm_src}/autoconf/install-sh -c ${srcdir}/bindings/ocaml/Makefile.ocaml bindings/ocaml/Makefile.ocaml ;; + ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/bindings/ocaml/Makefile.ocaml bindings/ocaml/Makefile.ocaml ;; esac done # for ac_tag diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html index df0a74d798547..e75887b739025 100644 --- a/docs/BitCodeFormat.html +++ b/docs/BitCodeFormat.html @@ -254,7 +254,7 @@ Blocks in a bitstream denote nested regions of the stream, and are identified by a content-specific id number (for example, LLVM IR uses an ID of 12 to represent function bodies). Block IDs 0-7 are reserved for standard blocks whose meaning is defined by Bitcode; block IDs 8 and greater are -application specific. Nested blocks capture the hierachical structure of the data +application specific. Nested blocks capture the hierarchical structure of the data encoded in it, and various properties are associated with blocks as the file is parsed. Block definitions allow the reader to efficiently skip blocks in constant time if the reader wants a summary of blocks, or if it wants to @@ -462,23 +462,23 @@ emitted as their code, followed by the extra data.

The possible operand encodings are:

    -
  1. Fixed: The field should be emitted as +
  2. Fixed: The field should be emitted as a fixed-width value, whose width is specified by the operand's extra data.
  3. -
  4. VBR: The field should be emitted as +
  5. VBR: The field should be emitted as a variable-width value, whose width is specified by the operand's extra data.
  6. -
  7. Array: This field is an array of values. The array operand +
  8. Array: This field is an array of values. The array operand has no extra data, but expects another operand to follow it which indicates the element type of the array. When reading an array in an abbreviated record, the first integer is a vbr6 that indicates the array length, followed by the encoded elements of the array. An array may only occur as the last operand of an abbreviation (except for the one final operand that gives the array's type).
  9. -
  10. Char6: This field should be emitted as +
  11. Char6: This field should be emitted as a char6-encoded value. This operand type takes no extra data.
  12. -
  13. Blob: This field is emitted as a vbr6, followed by padding to a +
  14. Blob: This field is emitted as a vbr6, followed by padding to a 32-bit boundary (for alignment) and an array of 8-bit objects. The array of bytes is further followed by tail padding to ensure that its total length is a multiple of 4 bytes. This makes it very efficient for the reader to @@ -755,7 +755,7 @@ LLVM IR is defined with the following blocks: src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
    The LLVM Compiler Infrastructure
    -Last modified: $Date: 2009-04-27 00:21:57 +0200 (Mon, 27 Apr 2009) $ +Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/Bugpoint.html b/docs/Bugpoint.html index 7b2679689534e..c89b4c5826a73 100644 --- a/docs/Bugpoint.html +++ b/docs/Bugpoint.html @@ -216,6 +216,17 @@ non-obvious ways. Here are some hints and tips:

    the list of specified optimizations to be randomized and applied to the program. This process will repeat until a bug is found or the user kills bugpoint. + +

  15. bugpoint does not understand the -O option + that is used to specify optimization level to opt. You + can use e.g.

    + +
    +

    opt -O2 -debug-pass=Arguments foo.bc -disable-output

    +
    + +

    to get a list of passes that are used with -O2 and + then pass this list to bugpoint.

@@ -232,7 +243,7 @@ non-obvious ways. Here are some hints and tips:

Chris Lattner
LLVM Compiler Infrastructure
- Last modified: $Date: 2008-12-11 18:34:48 +0100 (Thu, 11 Dec 2008) $ + Last modified: $Date: 2009-10-12 20:12:47 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/CMake.html b/docs/CMake.html index 1f50d397f006d..25f471081f59d 100644 --- a/docs/CMake.html +++ b/docs/CMake.html @@ -67,7 +67,7 @@

    -
  1. Download +

  2. Download and install CMake. Version 2.6.2 is the minimum required.

  3. Open a shell. Your development tools must be reachable from this @@ -180,7 +180,7 @@

    Variables are stored on the CMake cache. This is a file - named CMakeCache.txt on the root of the build + named CMakeCache.txt on the root of the build directory. Do not hand-edit it.

    Variables are listed here appending its type after a colon. It is @@ -250,6 +250,12 @@ to X86. On the other cases defaults to all. Example: -DLLVM_TARGETS_TO_BUILD="X86;PowerPC;Alpha". +

    LLVM_BUILD_TOOLS:BOOL
    +
    Build LLVM tools. Defaults to ON.
    + +
    LLVM_BUILD_EXAMPLES:BOOL
    +
    Build LLVM examples. Defaults to ON.
    +
    LLVM_ENABLE_THREADS:BOOL
    Build with threads support, if available. Defaults to ON.
    @@ -258,18 +264,21 @@ CMAKE_BUILD_TYPE is Release.
    LLVM_ENABLE_PIC:BOOL
    -
    Add the -fPIC flag to the compiler command-line, if the - compiler supports this flag. Some systems, like Windows, does not - need this flag. Defaults to OFF.
    +
    Add the -fPIC flag for the compiler command-line, if the + compiler supports this flag. Some systems, like Windows, do not + need this flag. Defaults to ON.
    LLVM_BUILD_32_BITS:BOOL
    Build 32-bits executables and libraries on 64-bits systems. This option is available only on some 64-bits unix systems. Defaults to OFF.
    -
    LLVM_PLO_FLAGS:STRING
    -
    Extra flags for creating partially linked objects. Visual C++ - does not use this.
    +
    LLVM_TARGET_ARCH:STRING
    +
    LLVM target to use for native code generation. This is required + for JIT generation. It defaults to "host", meaning that it shall + pick the architecture of the machine where LLVM is being built. If + you are cross-compiling, set it to the target architecture + name.
    LLVM_TABLEGEN:STRING
    Full path to a native TableGen executable (usually @@ -309,6 +318,9 @@ this section for a quick solution.

    +

    Also see the LLVM-specific variables + section for variables used when cross-compiling.

    + @@ -337,26 +349,6 @@ - - - -
    - -

    For linking the JIT into your executable, add

    - -
    -

    /INCLUDE:_X86TargetMachineModule

    -
    - -

    to your linker options. This is required for adding the relevant - LLVM object code to the executable. Not doing this will result on - some methods returning NULL (ExecutionEngine::create, for - instance).

    - -
    -
    diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html index 25101fc31bf63..d39de19ec6a51 100644 --- a/docs/CodeGenerator.html +++ b/docs/CodeGenerator.html @@ -1380,9 +1380,9 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf, for RegisterClass, the last parameter of which is a list of registers. Just commenting some out is one simple way to avoid them being used. A more polite way is to explicitly exclude some registers from - the allocation order. See the definition of the GR register - class in lib/Target/IA64/IA64RegisterInfo.td for an example of this - (e.g., numReservedRegs registers are hidden.)

    + the allocation order. See the definition of the GR8 register + class in lib/Target/X86/X86RegisterInfo.td for an example of this. +

    Virtual registers are also denoted by integer numbers. Contrary to physical registers, different virtual registers never share the same number. The @@ -1616,9 +1616,9 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf,

    -$ llc -f -regalloc=simple file.bc -o sp.s;
    -$ llc -f -regalloc=local file.bc -o lc.s;
    -$ llc -f -regalloc=linearscan file.bc -o ln.s;
    +$ llc -regalloc=simple file.bc -o sp.s;
    +$ llc -regalloc=local file.bc -o lc.s;
    +$ llc -regalloc=linearscan file.bc -o ln.s;
     
    @@ -1812,24 +1812,27 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
    -Base + [1,2,4,8] * IndexReg + Disp32
    +SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32
     
    -

    In order to represent this, LLVM tracks no less than 4 operands for each +

    In order to represent this, LLVM tracks no less than 5 operands for each memory operand of this form. This means that the "load" form of 'mov' has the following MachineOperands in this order:

    -Index:        0     |    1        2       3           4
    -Meaning:   DestReg, | BaseReg,  Scale, IndexReg, Displacement
    -OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm
    +Index:        0     |    1        2       3           4          5
    +Meaning:   DestReg, | BaseReg,  Scale, IndexReg, Displacement Segment
    +OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm  PhysReg
     

    Stores, and all other instructions, treat the four memory operands in the - same way and in the same order.

    + same way and in the same order. If the segment register is unspecified + (regno = 0), then no segment override is generated. "Lea" operations do not + have a segment register specified, so they only have 4 operands for their + memory reference.

    @@ -2118,7 +2121,7 @@ MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-06-15 12:17:44 +0000 (Mon, 15 Jun 2009) $ + Last modified: $Date: 2009-10-10 23:30:55 +0200 (Sat, 10 Oct 2009) $ diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html index cf9111071f56e..f93e1eac5de80 100644 --- a/docs/CodingStandards.html +++ b/docs/CodingStandards.html @@ -41,8 +41,12 @@
  4. #include as Little as Possible
  5. Keep "internal" Headers Private
  6. -
  7. #include <iostream> is - forbidden
  8. +
  9. Use Early Exits and 'continue' to Simplify + Code
  10. +
  11. Don't use "else" after a + return
  12. +
  13. Turn Predicate Loops into Predicate + Functions
  • The Low Level Issues
      @@ -52,16 +56,27 @@ classes in headers
    1. Don't evaluate end() every time through a loop
    2. -
    3. Prefer Preincrement
    4. +
    5. #include <iostream> is + forbidden
    6. Avoid std::endl
    7. +
    8. Use raw_ostream
  • + +
  • Microscopic Details +
      +
    1. Spaces Before Parentheses
    2. +
    3. Prefer Preincrement
    4. +
    5. Namespace Indentation
    6. +
    7. Anonymous Namespaces
    8. +
  • + +
  • See Also
  • -

    Written by Chris Lattner and - Bill Wendling

    +

    Written by Chris Lattner

    @@ -118,7 +133,9 @@ href="mailto:sabre@nondot.org">Chris.

    Comments are one critical part of readability and maintainability. Everyone -knows they should comment, so should you. Although we all should probably +knows they should comment, so should you. When writing comments, write them as +English prose, which means they should use proper capitalization, punctuation, +etc. Although we all should probably comment our code more than we do, there are a few very critical places that documentation is very useful:

    @@ -286,7 +303,7 @@ for debate.

    In all cases, prefer spaces to tabs in source files. People have different -prefered indentation levels, and different styles of indentation that they +preferred indentation levels, and different styles of indentation that they like... this is fine. What isn't is that different editors/viewers expand tabs out to different tab stops. This can cause your code to look completely unreadable, and it is not worth dealing with.

    @@ -402,7 +419,8 @@ different symbols based on whether class or struct was used to declare the symbol. This can lead to problems at link time.

    So, the rule for LLVM is to always use the class keyword, unless -all members are public, in which case struct is allowed.

    +all members are public and the type is a C++ "POD" type, in which case +struct is allowed.

    @@ -417,6 +435,7 @@ declare the symbol. This can lead to problems at link time.

    + @@ -472,7 +491,7 @@ most cases, you simply don't need the definition of a class... and not must include all of the header files that you are using -- you can include them either directly or indirectly (through another header file). To make sure that you don't -accidently forget to include a header file in your module header, make sure to +accidentally forget to include a header file in your module header, make sure to include your module header first in the implementation file (as mentioned above). This way there won't be any hidden dependencies that you'll find out about later...

    @@ -502,34 +521,256 @@ class itself... just make them private (or protected), and all is well.

    -

    The use of #include <iostream> in library files is -hereby forbidden. The primary reason for doing this is to -support clients using LLVM libraries as part of larger systems. In particular, -we statically link LLVM into some dynamic libraries. Even if LLVM isn't used, -the static c'tors are run whenever an application start up that uses the dynamic -library. There are two problems with this:

    +

    When reading code, keep in mind how much state and how many previous +decisions have to be remembered by the reader to understand a block of code. +Aim to reduce indentation where possible when it doesn't make it more difficult +to understand the code. One great way to do this is by making use of early +exits and the 'continue' keyword in long loops. As an example of using an early +exit from a function, consider this "bad" code:

    -
      -
    1. The time to run the static c'tors impacts startup time of - applications—a critical time for GUI apps.
    2. -
    3. The static c'tors cause the app to pull many extra pages of memory off the - disk: both the code for the static c'tors in each .o file and the - small amount of data that gets touched. In addition, touched/dirty pages - put more pressure on the VM system on low-memory machines.
    4. -
    +
    +
    +Value *DoSomething(Instruction *I) {
    +  if (!isa<TerminatorInst>(I) &&
    +      I->hasOneUse() && SomeOtherThing(I)) {
    +    ... some long code ....
    +  }
    +  
    +  return 0;
    +}
    +
    +
    -

    Note that using the other stream headers (<sstream> for -example) is allowed normally, it is just <iostream> that is -causing problems.

    +

    This code has several problems if the body of the 'if' is large. When you're +looking at the top of the function, it isn't immediately clear that this +only does interesting things with non-terminator instructions, and only +applies to things with the other predicates. Second, it is relatively difficult +to describe (in comments) why these predicates are important because the if +statement makes it difficult to lay out the comments. Third, when you're deep +within the body of the code, it is indented an extra level. Finally, when +reading the top of the function, it isn't clear what the result is if the +predicate isn't true, you have to read to the end of the function to know that +it returns null.

    + +

    It is much preferred to format the code like this:

    + +
    +
    +Value *DoSomething(Instruction *I) {
    +  // Terminators never need 'something' done to them because, ... 
    +  if (isa<TerminatorInst>(I))
    +    return 0;
    +
    +  // We conservatively avoid transforming instructions with multiple uses
    +  // because goats like cheese.
    +  if (!I->hasOneUse())
    +    return 0;
    +
    +  // This is really just here for example.
    +  if (!SomeOtherThing(I))
    +    return 0;
    +    
    +  ... some long code ....
    +}
    +
    +
    + +

    This fixes these problems. A similar problem frequently happens in for +loops. A silly example is something like this:

    + +
    +
    +  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
    +    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(II)) {
    +      Value *LHS = BO->getOperand(0);
    +      Value *RHS = BO->getOperand(1);
    +      if (LHS != RHS) {
    +        ...
    +      }
    +    }
    +  }
    +
    +
    -

    The preferred replacement for stream functionality is the -llvm::raw_ostream class (for writing to output streams of various -sorts) and the llvm::MemoryBuffer API (for reading in files).

    +

    When you have very very small loops, this sort of structure is fine, but if +it exceeds more than 10-15 lines, it becomes difficult for people to read and +understand at a glance. +The problem with this sort of code is that it gets very nested very quickly, +meaning that the reader of the code has to keep a lot of context in their brain +to remember what is going immediately on in the loop, because they don't know +if/when the if conditions will have elses etc. It is strongly preferred to +structure the loop like this:

    + +
    +
    +  for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
    +    BinaryOperator *BO = dyn_cast<BinaryOperator>(II);
    +    if (!BO) continue;
    +    
    +    Value *LHS = BO->getOperand(0);
    +    Value *RHS = BO->getOperand(1);
    +    if (LHS == RHS) continue;
    +  }
    +
    +
    + +

    This has all the benefits of using early exits from functions: it reduces +nesting of the loop, it makes it easier to describe why the conditions are true, +and it makes it obvious to the reader that there is no "else" coming up that +they have to push context into their brain for. If a loop is large, this can +be a big understandability win.

    + +
    + + + + +
    + +

    For similar reasons above (reduction of indentation and easier reading), + please do not use "else" or "else if" after something that interrupts + control flow like return, break, continue, goto, etc. For example, this is + "bad":

    + +
    +
    +  case 'J': {
    +    if (Signed) {
    +      Type = Context.getsigjmp_bufType();
    +      if (Type.isNull()) {
    +        Error = ASTContext::GE_Missing_sigjmp_buf;
    +        return QualType();
    +      } else {
    +        break;
    +      }
    +    } else {
    +      Type = Context.getjmp_bufType();
    +      if (Type.isNull()) {
    +        Error = ASTContext::GE_Missing_jmp_buf;
    +        return QualType();
    +      } else {
    +        break;
    +      }
    +    }
    +  }
    +  }
    +
    +
    + +

    It is better to write this something like:

    + +
    +
    +  case 'J':
    +    if (Signed) {
    +      Type = Context.getsigjmp_bufType();
    +      if (Type.isNull()) {
    +        Error = ASTContext::GE_Missing_sigjmp_buf;
    +        return QualType();
    +      }
    +    } else {
    +      Type = Context.getjmp_bufType();
    +      if (Type.isNull()) {
    +        Error = ASTContext::GE_Missing_jmp_buf;
    +        return QualType();
    +      }
    +    }
    +    break;
    +
    +
    + +

    Or better yet (in this case), as:

    + +
    +
    +  case 'J':
    +    if (Signed)
    +      Type = Context.getsigjmp_bufType();
    +    else
    +      Type = Context.getjmp_bufType();
    +    
    +    if (Type.isNull()) {
    +      Error = Signed ? ASTContext::GE_Missing_sigjmp_buf :
    +                       ASTContext::GE_Missing_jmp_buf;
    +      return QualType();
    +    }
    +    break;
    +
    +
    + +

    The idea is to reduce indentation and the amount of code you have to keep + track of when reading the code.

    + +
    + + + + +
    + +

    It is very common to write small loops that just compute a boolean + value. There are a number of ways that people commonly write these, but an + example of this sort of thing is:

    + +
    +
    +  bool FoundFoo = false;
    +  for (unsigned i = 0, e = BarList.size(); i != e; ++i)
    +    if (BarList[i]->isFoo()) {
    +      FoundFoo = true;
    +      break;
    +    }
    +    
    +  if (FoundFoo) {
    +    ...
    +  }
    +
    +
    + +

    This sort of code is awkward to write, and is almost always a bad sign. +Instead of this sort of loop, we strongly prefer to use a predicate function +(which may be static) that uses +early exits to compute the predicate. We prefer +the code to be structured like this: +

    + + +
    +
    +/// ListContainsFoo - Return true if the specified list has an element that is
    +/// a foo.
    +static bool ListContainsFoo(const std::vector<Bar*> &List) {
    +  for (unsigned i = 0, e = List.size(); i != e; ++i)
    +    if (List[i]->isFoo())
    +      return true;
    +  return false;
    +}
    +...
    +
    +  if (ListContainsFoo(BarList)) {
    +    ...
    +  }
    +
    +
    + +

    There are many reasons for doing this: it reduces indentation and factors out +code which can often be shared by other code that checks for the same predicate. +More importantly, it forces you to pick a name for the function, and +forces you to write a comment for it. In this silly example, this doesn't add +much value. However, if the condition is complex, this can make it a lot easier +for the reader to understand the code that queries for this predicate. Instead +of being faced with the in-line details of how we check to see if the BarList +contains a foo, we can trust the function name and continue reading with better +locality.

    @@ -538,6 +779,7 @@ sorts) and the llvm::MemoryBuffer API (for reading in files).

    + @@ -548,7 +790,7 @@ sorts) and the llvm::MemoryBuffer API (for reading in files).

    Use the "assert" function to its fullest. Check all of your -preconditions and assumptions, you never know when a bug (not neccesarily even +preconditions and assumptions, you never know when a bug (not necessarily even yours) might be caught early by an assertion, which reduces debugging time dramatically. The "<cassert>" header file is probably already included by the header files you are using, so it doesn't cost anything to use @@ -724,10 +966,156 @@ prefer it.

    + + + +
    + +

    The use of #include <iostream> in library files is +hereby forbidden. The primary reason for doing this is to +support clients using LLVM libraries as part of larger systems. In particular, +we statically link LLVM into some dynamic libraries. Even if LLVM isn't used, +the static c'tors are run whenever an application start up that uses the dynamic +library. There are two problems with this:

    + +
      +
    1. The time to run the static c'tors impacts startup time of + applications—a critical time for GUI apps.
    2. +
    3. The static c'tors cause the app to pull many extra pages of memory off the + disk: both the code for the static c'tors in each .o file and the + small amount of data that gets touched. In addition, touched/dirty pages + put more pressure on the VM system on low-memory machines.
    4. +
    + +

    Note that using the other stream headers (<sstream> for +example) is not problematic in this regard (just <iostream>). +However, raw_ostream provides various APIs that are better performing for almost +every use than std::ostream style APIs, so you should just use it for new +code.

    + +

    New code should always +use raw_ostream for writing, or +the llvm::MemoryBuffer API for reading files.

    + +
    + + + + + +
    + +

    The std::endl modifier, when used with iostreams outputs a newline +to the output stream specified. In addition to doing this, however, it also +flushes the output stream. In other words, these are equivalent:

    + +
    +
    +std::cout << std::endl;
    +std::cout << '\n' << std::flush;
    +
    +
    + +

    Most of the time, you probably have no reason to flush the output stream, so +it's better to use a literal '\n'.

    + +
    + + + + + +
    + +

    LLVM includes a lightweight, simple, and efficient stream implementation +in llvm/Support/raw_ostream.h which provides all of the common features +of std::ostream. All new code should use raw_ostream instead +of ostream.

    + +

    Unlike std::ostream, raw_ostream is not a template and can +be forward declared as class raw_ostream. Public headers should +generally not include the raw_ostream header, but use forward +declarations and constant references to raw_ostream instances.

    + +
    + + + + + + +

    This section describes preferred low-level formatting guidelines along with +reasoning on why we prefer them.

    + + + + +
    + +

    We prefer to put a space before a parentheses only in control flow +statements, but not in normal function call expressions and function-like +macros. For example, this is good:

    + +
    +
    +  if (x) ...
    +  for (i = 0; i != 100; ++i) ...
    +  while (llvm_rocks) ...
    +
    +  somefunc(42);
    +  assert(3 != 4 && "laws of math are failing me");
    +  
    +  a = foo(42, 92) + bar(x);
    +  
    +
    + +

    ... and this is bad:

    + +
    +
    +  if(x) ...
    +  for(i = 0; i != 100; ++i) ...
    +  while(llvm_rocks) ...
    +
    +  somefunc (42);
    +  assert (3 != 4 && "laws of math are failing me");
    +  
    +  a = foo (42, 92) + bar (x);
    +
    +
    + +

    The reason for doing this is not completely arbitrary. This style makes + control flow operators stand out more, and makes expressions flow better. The + function call operator binds very tightly as a postfix operator. Putting + a space after a function name (as in the last example) makes it appear that + the code might bind the arguments of the left-hand-side of a binary operator + with the argument list of a function and the name of the right side. More + specifically, it is easy to misread the "a" example as:

    + +
    +
    +  a = foo ((42, 92) + bar) (x);
    +
    +
    + +

    ... when skimming through the code. By avoiding a space in a function, we +avoid this misinterpretation.

    + +
    @@ -747,27 +1135,178 @@ get in the habit of always using preincrement, and you won't have a problem.

    -

    The std::endl modifier, when used with iostreams outputs a newline -to the output stream specified. In addition to doing this, however, it also -flushes the output stream. In other words, these are equivalent:

    +

    +In general, we strive to reduce indentation where ever possible. This is useful +because we want code to fit into 80 columns without +wrapping horribly, but also because it makes it easier to understand the code. +Namespaces are a funny thing: they are often large, and we often desire to put +lots of stuff into them (so they can be large). Other times they are tiny, +because they just hold an enum or something similar. In order to balance this, +we use different approaches for small versus large namespaces. +

    + +

    +If a namespace definition is small and easily fits on a screen (say, +less than 35 lines of code), then you should indent its body. Here's an +example: +

    -std::cout << std::endl;
    -std::cout << '\n' << std::flush;
    +namespace llvm {
    +  namespace X86 {
    +    /// RelocationType - An enum for the x86 relocation codes. Note that
    +    /// the terminology here doesn't follow x86 convention - word means
    +    /// 32-bit and dword means 64-bit.
    +    enum RelocationType {
    +      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
    +      /// the value already in memory, after we adjust it for where the PC is.
    +      reloc_pcrel_word = 0,
    +
    +      /// reloc_picrel_word - PIC base relative relocation, add the relocated
    +      /// value to the value already in memory, after we adjust it for where the
    +      /// PIC base is.
    +      reloc_picrel_word = 1,
    +      
    +      /// reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
    +      /// add the relocated value to the value already in memory.
    +      reloc_absolute_word = 2,
    +      reloc_absolute_dword = 3
    +    };
    +  }
    +}
     
    -

    Most of the time, you probably have no reason to flush the output stream, so -it's better to use a literal '\n'.

    +

    Since the body is small, indenting adds value because it makes it very clear +where the namespace starts and ends, and it is easy to take the whole thing in +in one "gulp" when reading the code. If the blob of code in the namespace is +larger (as it typically is in a header in the llvm or clang namespaces), do not +indent the code, and add a comment indicating what namespace is being closed. +For example:

    +
    +
    +namespace llvm {
    +namespace knowledge {
    +
    +/// Grokable - This class represents things that Smith can have an intimate
    +/// understanding of and contains the data associated with it.
    +class Grokable {
    +...
    +public:
    +  explicit Grokable() { ... }
    +  virtual ~Grokable() = 0;
    +  
    +  ...
    +
    +};
    +
    +} // end namespace knowledge
    +} // end namespace llvm
    +
    +

    Because the class is large, we don't expect that the reader can easily +understand the entire concept in a glance, and the end of the file (where the +namespaces end) may be a long ways away from the place they open. As such, +indenting the contents of the namespace doesn't add any value, and detracts from +the readability of the class. In these cases it is best to not indent +the contents of the namespace.

    + +
    + + + + +
    + +

    After talking about namespaces in general, you may be wondering about +anonymous namespaces in particular. +Anonymous namespaces are a great language feature that tells the C++ compiler +that the contents of the namespace are only visible within the current +translation unit, allowing more aggressive optimization and eliminating the +possibility of symbol name collisions. Anonymous namespaces are to C++ as +"static" is to C functions and global variables. While "static" is available +in C++, anonymous namespaces are more general: they can make entire classes +private to a file.

    + +

    The problem with anonymous namespaces is that they naturally want to +encourage indentation of their body, and they reduce locality of reference: if +you see a random function definition in a C++ file, it is easy to see if it is +marked static, but seeing if it is in an anonymous namespace requires scanning +a big chunk of the file.

    + +

    Because of this, we have a simple guideline: make anonymous namespaces as +small as possible, and only use them for class declarations. For example, this +is good:

    + +
    +
    +namespace {
    +  class StringSort {
    +  ...
    +  public:
    +    StringSort(...)
    +    bool operator<(const char *RHS) const;
    +  };
    +} // end anonymous namespace
    +
    +static void Helper() { 
    +  ... 
    +}
    +
    +bool StringSort::operator<(const char *RHS) const {
    +  ...
    +}
    +
    +
    +
    + +

    This is bad:

    + + +
    +
    +namespace {
    +class StringSort {
    +...
    +public:
    +  StringSort(...)
    +  bool operator<(const char *RHS) const;
    +};
    +
    +void Helper() { 
    +  ... 
    +}
    +
    +bool StringSort::operator<(const char *RHS) const {
    +  ...
    +}
    +
    +} // end anonymous namespace
    +
    +
    +
    + + +

    This is bad specifically because if you're looking at "Helper" in the middle +of a large C++ file, that you have no immediate way to tell if it is local to +the file. When it is marked static explicitly, this is immediately obvious. +Also, there is no reason to enclose the definition of "operator<" in the +namespace just because it was declared there. +

    + +
    + +
    @@ -807,7 +1346,7 @@ something.

    Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-06-30 08:27:54 +0200 (Tue, 30 Jun 2009) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/CommandGuide/FileCheck.pod b/docs/CommandGuide/FileCheck.pod new file mode 100644 index 0000000000000..539f66fea5457 --- /dev/null +++ b/docs/CommandGuide/FileCheck.pod @@ -0,0 +1,65 @@ + +=pod + +=head1 NAME + +FileCheck - Flexible pattern matching file verifier + +=head1 SYNOPSIS + +B I [I<--check-prefix=XXX>] [I<--strict-whitespace>] + +=head1 DESCRIPTION + +B reads two files (one from standard input, and one specified on the +command line) and uses one to verify the other. This behavior is particularly +useful for the testsuite, which wants to verify that the output of some tool +(e.g. llc) contains the expected information (for example, a movsd from esp or +whatever is interesting). This is similar to using grep, but it is optimized +for matching multiple different inputs in one file in a specific order. + +The I file specifies the file that contains the patterns to +match. The file to verify is always read from standard input. + +The input and output of B is beyond the scope of this short +introduction. Please see the I page in the LLVM documentation. + +=head1 OPTIONS + +=over + +=item B<--help> + +Print a summary of command line options. + +=item B<--check-prefix> I + +FileCheck searches the contents of I for patterns to match. By +default, these patterns are prefixed with "CHECK:". If you'd like to use a +different prefix (e.g. because the same input file is checking multiple +different tool or options), the B<--check-prefix> argument allows you to specify +a specific prefix to match. + +=item B<--strict-whitespace> + +By default, FileCheck canonicalizes input horizontal whitespace (spaces and +tabs) which causes it to ignore these differences (a space will match a tab). +The --strict-whitespace argument disables this behavior. + +=item B<-version> + +Show the version number of this program. + +=back + +=head1 EXIT STATUS + +If B verifies that the file matches the expected contents, it exits +with 0. Otherwise, if not, or if an error occurs, it will exit with a non-zero +value. + +=head1 AUTHORS + +Maintained by The LLVM Team (L). + +=cut diff --git a/docs/CommandGuide/Makefile b/docs/CommandGuide/Makefile index cf77e6a33db06..3b65183107263 100644 --- a/docs/CommandGuide/Makefile +++ b/docs/CommandGuide/Makefile @@ -48,6 +48,12 @@ HTML := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_HTML_DIR)%.html, $(POD)) MAN := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_MAN_DIR)%.1, $(POD)) PS := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_PS_DIR)%.ps, $(POD)) +# The set of man pages we will not install +NO_INSTALL_MANS = $(DST_MAN_DIR)FileCheck.1 + +# The set of man pages that we will install +INSTALL_MANS = $(filter-out $(NO_INSTALL_MANS), $(MAN)) + .SUFFIXES: .SUFFIXES: .html .pod .1 .ps @@ -75,7 +81,7 @@ HTML_DIR := $(PROJ_docsdir)/html/CommandGuide MAN_DIR := $(PROJ_mandir)/man1 PS_DIR := $(PROJ_docsdir)/ps -install-local:: $(HTML) $(MAN) $(PS) +install-local:: $(HTML) $(INSTALL_MANS) $(PS) $(Echo) Installing HTML CommandGuide Documentation $(Verb) $(MKDIR) $(HTML_DIR) $(Verb) $(DataInstall) $(HTML) $(HTML_DIR) @@ -83,7 +89,7 @@ install-local:: $(HTML) $(MAN) $(PS) $(Verb) $(DataInstall) $(PROJ_SRC_DIR)/manpage.css $(HTML_DIR) $(Echo) Installing MAN CommandGuide Documentation $(Verb) $(MKDIR) $(MAN_DIR) - $(Verb) $(DataInstall) $(MAN) $(MAN_DIR) + $(Verb) $(DataInstall) $(INSTALL_MANS) $(MAN_DIR) $(Echo) Installing PS CommandGuide Documentation $(Verb) $(MKDIR) $(PS_DIR) $(Verb) $(DataInstall) $(PS) $(PS_DIR) diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html index f05260bfebb35..f1046fac44809 100644 --- a/docs/CommandGuide/index.html +++ b/docs/CommandGuide/index.html @@ -128,6 +128,8 @@ options) arguments to the tool you are interested in.

      +
    • FileCheck - + Flexible file verifier used extensively by the testing harness
    • tblgen - target description reader and generator
    • @@ -144,7 +146,7 @@ options) arguments to the tool you are interested in.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
      - Last modified: $Date: 2008-12-11 18:12:52 +0100 (Thu, 11 Dec 2008) $ + Last modified: $Date: 2009-08-15 17:38:11 +0200 (Sat, 15 Aug 2009) $ diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod new file mode 100644 index 0000000000000..a818302c242aa --- /dev/null +++ b/docs/CommandGuide/lit.pod @@ -0,0 +1,222 @@ +=pod + +=head1 NAME + +lit - LLVM Integrated Tester + +=head1 SYNOPSIS + +B [I] [I] + +=head1 DESCRIPTION + +B is a portable tool for executing LLVM and Clang style test suites, +summarizing their results, and providing indication of failures. B is +designed to be a lightweight testing tool with as simple a user interface as +possible. + +B should be run with one or more I to run specified on the command +line. Tests can be either individual test files or directories to search for +tests (see L<"TEST DISCOVERY">). + +Each specified test will be executed (potentially in parallel) and once all +tests have been run B will print summary information on the number of tests +which passed or failed (see L<"TEST STATUS RESULTS">). The B program will +execute with a non-zero exit code if any tests fail. + +By default B will use a succinct progress display and will only print +summary information for test failures. See L<"OUTPUT OPTIONS"> for options +controlling the B progress display and output. + +B also includes a number of options for controlling how tests are exected +(specific features may depend on the particular test format). See L<"EXECUTION +OPTIONS"> for more information. + +Finally, B also supports additional options for only running a subset of +the options specified on the command line, see L<"SELECTION OPTIONS"> for +more information. + +=head1 GENERAL OPTIONS + +=over + +=item B<-h>, B<--help> + +Show the B help message. + +=item B<-j> I, B<--threads>=I + +Run I tests in parallel. By default, this is automatically chose to match the +number of detected available CPUs. + +=back + +=head1 OUTPUT OPTIONS + +=over + +=item B<-q>, B<--quiet> + +Suppress any output except for test failures. + +=item B<-s>, B<--succinct> + +Show less output, for example don't show information on tests that pass. + +=item B<-v>, B<--verbose> + +Show more information on test failures, for example the entire test output +instead of just the test result. + +=item B<--no-progress-bar> + +Do not use curses based progress bar. + +=back + +=head1 EXECUTION OPTIONS + +=over + +=item B<--path>=I + +Specify an addition I to use when searching for executables in tests. + +=item B<--vg> + +Run individual tests under valgrind (using the memcheck tool). The +I<--error-exitcode> argument for valgrind is used so that valgrind failures will +cause the program to exit with a non-zero status. + +=item B<--vg-arg>=I + +When I<--vg> is used, specify an additional argument to pass to valgrind itself. + +=item B<--time-tests> + +Track the wall time individual tests take to execute and includes the results in +the summary output. This is useful for determining which tests in a test suite +take the most time to execute. Note that this option is most useful with I<-j +1>. + +=back + +=head1 SELECTION OPTIONS + +=over + +=item B<--max-tests>=I + +Run at most I tests and then terminate. + +=item B<--max-time>=I + +Spend at most I seconds (approximately) running tests and then terminate. + +=item B<--shuffle> + +Run the tests in a random order. + +=back + +=head1 ADDITIONAL OPTIONS + +=over + +=item B<--debug> + +Run B in debug mode, for debugging configuration issues and B itself. + +=item B<--show-suites> + +List the discovered test suites as part of the standard output. + +=item B<--no-tcl-as-sh> + +Run Tcl scripts internally (instead of converting to shell scripts). + +=back + +=head1 EXIT STATUS + +B will exit with an exit code of 1 if there are any FAIL or XPASS +results. Otherwise, it will exit with the status 0. Other exit codes used for +non-test related failures (for example a user error or an internal program +error). + +=head1 TEST DISCOVERY + +The inputs passed to B can be either individual tests, or entire +directories or hierarchies of tests to run. When B starts up, the first +thing it does is convert the inputs into a complete list of tests to run as part +of I. + +In the B model, every test must exist inside some I. B +resolves the inputs specified on the command line to test suites by searching +upwards from the input path until it finds a I or I +file. These files serve as both a marker of test suites and as configuration +files which B loads in order to understand how to find and run the tests +inside the test suite. + +Once B has mapped the inputs into test suites it traverses the list of +inputs adding tests for individual files and recursively searching for tests in +directories. + +This behavior makes it easy to specify a subset of tests to run, while still +allowing the test suite configuration to control exactly how tests are +interpreted. In addition, B always identifies tests by the test suite they +are in, and their relative path inside the test suite. For appropriately +configured projects, this allows B to provide convenient and flexible +support for out-of-tree builds. + +=head1 TEST STATUS RESULTS + +Each test ultimately produces one of the following six results: + +=over + +=item B + +The test succeeded. + +=item B + +The test failed, but that is expected. This is used for test formats which allow +specifying that a test does not currently work, but wish to leave it in the test +suite. + +=item B + +The test succeeded, but it was expected to fail. This is used for tests which +were specified as expected to fail, but are now succeeding (generally because +the feautre they test was broken and has been fixed). + +=item B + +The test failed. + +=item B + +The test result could not be determined. For example, this occurs when the test +could not be run, the test itself is invalid, or the test was interrupted. + +=item B + +The test is not supported in this environment. This is used by test formats +which can report unsupported tests. + +=back + +Depending on the test format tests may produce additional information about +their status (generally only for failures). See the L +section for more information. + +=head1 SEE ALSO + +L + +=head1 AUTHOR + +Written by Daniel Dunbar and maintained by the LLVM Team (L). + +=cut diff --git a/docs/CommandGuide/llc.pod b/docs/CommandGuide/llc.pod index eba7859e28827..8adfb682be01b 100644 --- a/docs/CommandGuide/llc.pod +++ b/docs/CommandGuide/llc.pod @@ -10,18 +10,19 @@ B [I] [I] =head1 DESCRIPTION -The B command compiles LLVM bitcode into assembly language for a +The B command compiles LLVM source inputs into assembly language for a specified architecture. The assembly language output can then be passed through a native assembler and linker to generate a native executable. The choice of architecture for the output assembly code is automatically -determined from the input bitcode file, unless the B<-march> option is used to -override the default. +determined from the input file, unless the B<-march> option is used to override +the default. =head1 OPTIONS -If I is - or omitted, B reads LLVM bitcode from standard input. -Otherwise, it will read LLVM bitcode from I. +If I is - or omitted, B reads from standard input. Otherwise, it +will from I. Inputs can be in either the LLVM assembly language +format (.ll) or the LLVM bitcode format (.bc). If the B<-o> option is omitted, then B will send its output to standard output if the input is from standard input. If the B<-o> option specifies -, @@ -47,20 +48,15 @@ Generate code at different optimization levels. These correspond to the I<-O0>, I<-O1>, I<-O2>, I<-O3>, and I<-O4> optimization levels used by B and B. -=item B<-f> - -Overwrite output files. By default, B will refuse to overwrite -an output file which already exists. - =item B<-mtriple>=I -Override the target triple specified in the input bitcode file with the -specified string. +Override the target triple specified in the input file with the specified +string. =item B<-march>=I Specify the architecture for which to generate assembly, overriding the target -encoded in the bitcode file. See the output of B for a list of +encoded in the input file. See the output of B for a list of valid architectures. By default this is inferred from the target triple or autodetected to the current architecture. diff --git a/docs/CommandGuide/llvm-as.pod b/docs/CommandGuide/llvm-as.pod index 2befed13ae009..045a9245b6097 100644 --- a/docs/CommandGuide/llvm-as.pod +++ b/docs/CommandGuide/llvm-as.pod @@ -46,9 +46,9 @@ suffix is appended. =item B<-f> -Force overwrite. Normally, B will refuse to overwrite an -output file that already exists. With this option, B -will overwrite the output file and replace it with new bitcode. +Enable binary output on terminals. Normally, B will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +B will write raw bitcode regardless of the output device. =item B<--help> diff --git a/docs/CommandGuide/llvm-dis.pod b/docs/CommandGuide/llvm-dis.pod index 8df382d2e9c00..2b83290c9b2e2 100644 --- a/docs/CommandGuide/llvm-dis.pod +++ b/docs/CommandGuide/llvm-dis.pod @@ -29,9 +29,9 @@ B<-o> option. =item B<-f> -Force overwrite. Normally, B will refuse to overwrite -an output file that already exists. With this option, B -will overwrite the output file. +Enable binary output on terminals. Normally, B will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +B will write raw bitcode regardless of the output device. =item B<--help> diff --git a/docs/CommandGuide/llvm-extract.pod b/docs/CommandGuide/llvm-extract.pod index d916612ec5c65..b62e8ae312bf4 100644 --- a/docs/CommandGuide/llvm-extract.pod +++ b/docs/CommandGuide/llvm-extract.pod @@ -28,9 +28,9 @@ unless the B<-o> option is specified (see below). =item B<-f> -Force overwrite. Normally, B will refuse to overwrite an -output file that already exists. With this option, B -will overwrite the output file and replace it with new bitcode. +Enable binary output on terminals. Normally, B will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +B will write raw bitcode regardless of the output device. =item B<--func> I @@ -45,6 +45,10 @@ Print a summary of command line options. Specify the output filename. If filename is "-" (the default), then B sends its output to standard output. +=item B<-S> + +Write output in LLVM intermediate language (instead of bitcode). + =back =head1 EXIT STATUS diff --git a/docs/CommandGuide/llvm-ld.pod b/docs/CommandGuide/llvm-ld.pod index 224939c77c94e..536ab0fa43d5f 100644 --- a/docs/CommandGuide/llvm-ld.pod +++ b/docs/CommandGuide/llvm-ld.pod @@ -104,6 +104,12 @@ should be generated by the linker. By default, B generates a file named F for compatibility with B. The output will be written to F. +=item B<-b> F + +This option can be used to override the output bitcode file name. By default, +the name of the bitcode output file is one more ".bc" suffix added to the name +specified by B<-o filename> option. + =item B<-l>F This option specifies the F of a library to search when resolving symbols diff --git a/docs/CommandGuide/llvm-link.pod b/docs/CommandGuide/llvm-link.pod index 5f4dcb6e354d0..e1a1267c52ec1 100644 --- a/docs/CommandGuide/llvm-link.pod +++ b/docs/CommandGuide/llvm-link.pod @@ -33,14 +33,19 @@ the order in which they were specified on the command line. =item B<-f> -Overwrite output files. By default, B will not overwrite an output -file if it already exists. +Enable binary output on terminals. Normally, B will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +B will write raw bitcode regardless of the output device. =item B<-o> F Specify the output file name. If F is C<->, then B will write its output to standard output. +=item B<-S> + +Write output in LLVM intermediate language (instead of bitcode). + =item B<-d> If specified, B prints a human-readable version of the output diff --git a/docs/CommandGuide/llvmc.pod b/docs/CommandGuide/llvmc.pod index 97445edf41272..e3031e123d383 100644 --- a/docs/CommandGuide/llvmc.pod +++ b/docs/CommandGuide/llvmc.pod @@ -71,6 +71,12 @@ write files into the directory specified with the I<-o> option. The I<--save-temps=cwd> and I<--save-temps> switches are both synonyms for the default behaviour. +=item B<--temp-dir> I + +Store temporary files in the given directory. This directory is deleted on exit +unless I<--save-temps> is specified. If I<--save-temps=obj> is also specified, +I<--temp-dir> is given the precedence. + =item B<--help> Print a summary of command-line options and exit. diff --git a/docs/CommandGuide/opt.pod b/docs/CommandGuide/opt.pod index 75b7eddd4cf13..d1d1db5ef67ed 100644 --- a/docs/CommandGuide/opt.pod +++ b/docs/CommandGuide/opt.pod @@ -11,24 +11,25 @@ B [I] [I] =head1 DESCRIPTION The B command is the modular LLVM optimizer and analyzer. It takes LLVM -bitcode as input, runs the specified optimizations or analyses on it, and then -outputs the optimized LLVM bitcode or the analysis results. The function of +source files as input, runs the specified optimizations or analyses on it, and then +outputs the optimized file or the analysis results. The function of B depends on whether the B<-analyze> option is given. -When B<-analyze> is specified, B performs various analyses of LLVM -bitcode. It will usually print the results on standard output, but in a few -cases, it will print output to standard error or generate a file with the -analysis output, which is usually done when the output is meant for another -program. +When B<-analyze> is specified, B performs various analyses of the input +source. It will usually print the results on standard output, but in a few +cases, it will print output to standard error or generate a file with the +analysis output, which is usually done when the output is meant for another +program. While B<-analyze> is I given, B attempts to produce an optimized -bitcode file. The optimizations available via B depend upon what +output file. The optimizations available via B depend upon what libraries were linked into it as well as any additional libraries that have been loaded with the B<-load> option. Use the B<-help> option to determine what optimizations you can use. If I is omitted from the command line or is I<->, B reads its -input from standard input. The input must be an LLVM bitcode file. +input from standard input. Inputs can be in either the LLVM assembly language +format (.ll) or the LLVM bitcode format (.bc). If an output filename is not specified with the B<-o> option, B writes its output to the standard output. @@ -39,9 +40,9 @@ writes its output to the standard output. =item B<-f> -Force overwrite. Normally, B will refuse to overwrite an -output file that already exists. With this option, B will -overwrite the output file and replace it with new bitcode. +Enable binary output on terminals. Normally, B will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +B will write raw bitcode regardless of the output device. =item B<-help> @@ -51,6 +52,10 @@ Print a summary of command line options. Specify the output filename. +=item B<-S> + +Write output in LLVM intermediate language (instead of bitcode). + =item B<-{passname}> B provides the ability to run any of LLVM's optimization or analysis passes diff --git a/docs/CommandLine.html b/docs/CommandLine.html index d6cf48ce5111c..7e6e2f2e13d34 100644 --- a/docs/CommandLine.html +++ b/docs/CommandLine.html @@ -331,13 +331,13 @@ OPTIONS:

      In addition to input and output filenames, we would like the compiler example -to support three boolean flags: "-f" to force overwriting of the output -file, "--quiet" to enable quiet mode, and "-q" for backwards -compatibility with some of our users. We can support these by declaring options -of boolean type like this:

      +to support three boolean flags: "-f" to force writing binary output to +a terminal, "--quiet" to enable quiet mode, and "-q" for +backwards compatibility with some of our users. We can support these by +declaring options of boolean type like this:

      -cl::opt<bool> Force ("f", cl::desc("Overwrite output files"));
      +cl::opt<bool> Force ("f", cl::desc("Enable binary output on terminals"));
       cl::opt<bool> Quiet ("quiet", cl::desc("Don't print informational messages"));
       cl::opt<bool> Quiet2("q", cl::desc("Don't print informational messages"), cl::Hidden);
       
      @@ -378,7 +378,7 @@ library calls to parse the string value into the specified data type.

      USAGE: compiler [options] <input file> OPTIONS: - -f - Overwrite output files + -f - Enable binary output on terminals -o - Override output filename -quiet - Don't print informational messages -help - display available options (--help-hidden for more) @@ -390,7 +390,7 @@ OPTIONS: USAGE: compiler [options] <input file> OPTIONS: - -f - Overwrite output files + -f - Enable binary output on terminals -o - Override output filename -q - Don't print informational messages -quiet - Don't print informational messages @@ -530,7 +530,7 @@ OPTIONS: -O1 - Enable trivial optimizations -O2 - Enable default optimizations -O3 - Enable expensive optimizations - -f - Overwrite output files + -f - Enable binary output on terminals -help - display available options (--help-hidden for more) -o <filename> - Specify output filename -quiet - Don't print informational messages @@ -614,7 +614,7 @@ OPTIONS: =none - disable debug information =quick - enable quick debug information =detailed - enable detailed debug information - -f - Overwrite output files + -f - Enable binary output on terminals -help - display available options (--help-hidden for more) -o <filename> - Specify output filename -quiet - Don't print informational messages @@ -1022,7 +1022,7 @@ files that use them. This is called the internal storage model.

      code from the storage of the value parsed. For example, lets say that we have a '-debug' option that we would like to use to enable debug information across the entire body of our program. In this case, the boolean value -controlling the debug code should be globally accessable (in a header file, for +controlling the debug code should be globally accessible (in a header file, for example) yet the command line option processing code should not be exposed to all of these clients (requiring lots of .cpp files to #include CommandLine.h).

      @@ -1107,7 +1107,7 @@ a command line option. Look here for an example.
    • The cl::init attribute specifies an -inital value for a scalar option. If this attribute is +initial value for a scalar option. If this attribute is not specified then the command line option value defaults to the value created by the default constructor for the type. Warning: If you specify both cl::init and cl::location for an option, @@ -1178,7 +1178,7 @@ href="#cl::list">cl::list. These modifiers give you the ability to tweak how options are parsed and how --help output is generated to fit your application well.

      -

      These options fall into five main catagories:

      +

      These options fall into five main categories:

      1. Hiding an option from --help output
      2. @@ -1190,9 +1190,9 @@ your application well.

      3. Miscellaneous option modifiers
      -

      It is not possible to specify two options from the same catagory (you'll get +

      It is not possible to specify two options from the same category (you'll get a runtime error) to a single option, except for options in the miscellaneous -catagory. The CommandLine library specifies defaults for all of these settings +category. The CommandLine library specifies defaults for all of these settings that are the most useful in practice and the most common, which mean that you usually shouldn't have to worry about these.

      @@ -1441,9 +1441,9 @@ string "-pos1 -foo -bar baz -pos2 -bork" would cause the "-foo -bar
    • The cl::Sink modifier is used to handle unknown options. If there is at least one option with -cl::Sink modifier specified, the parser passes +cl::Sink modifier specified, the parser passes unrecognized option strings to it as values instead of signaling an -error. As with cl::CommaSeparated, this modifier +error. As with cl::CommaSeparated, this modifier only makes sense with a cl::list option.
    @@ -1536,7 +1536,7 @@ not be available, it can't just look in argv[0]), the name of the environment variable to examine, the optional additional extra text to emit when the --help option is invoked, and the boolean -switch that controls whether reponse files +switch that controls whether response files should be read.

    cl::ParseEnvironmentOptions will break the environment @@ -1883,7 +1883,7 @@ our example, we implement parse as:

    default: // Print an error message if unrecognized character! - return O.error(": '" + Arg + "' value invalid for file size argument!"); + return O.error("'" + Arg + "' value invalid for file size argument!"); } } } @@ -1972,7 +1972,7 @@ tutorial.

    Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-06-17 03:09:39 +0000 (Wed, 17 Jun 2009) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html index 6b531c4e47f0c..7a40a4d83a558 100644 --- a/docs/CompilerDriver.html +++ b/docs/CompilerDriver.html @@ -114,6 +114,10 @@ delete them on exit. This option can also take an argument: the --save-temps=obj switch will write files into the directory specified with the -o option. The --save-temps=cwd and --save-temps switches are both synonyms for the default behaviour. +
  • --temp-dir DIRECTORY - Store temporary files in the given directory. This +directory is deleted on exit unless --save-temps is specified. If +--save-temps=obj is also specified, --temp-dir is given the +precedence.
  • --check-graph - Check the compilation for common errors like mismatched output/input language names, multiple default edges and cycles. Because of plugins, these checks can't be performed at compile-time. Exit with code zero @@ -303,13 +307,13 @@ separate option groups syntactically.

    -std=c99. It is also allowed to use spaces instead of the equality sign: -std c99. At most one occurrence is allowed.
  • parameter_list_option - same as the above, but more than one option -occurence is allowed.
  • +occurrence is allowed.
  • prefix_option - same as the parameter_option, but the option name and argument do not have to be separated. Example: -ofile. This can be also specified as -o file; however, -o=file will be parsed incorrectly (=file will be interpreted as option value). At most one occurrence is allowed.
  • -
  • prefix_list_option - same as the above, but more than one occurence of +
  • prefix_list_option - same as the above, but more than one occurrence of the option is allowed; example: -lm -lpthread.
  • alias_option - a special option type for creating aliases. Unlike other option types, aliases are not allowed to have any properties besides the @@ -341,6 +345,11 @@ output.
  • special cases). Usage example: (parameter_list_option "foo", (multi_val 3)). Only list options can have this attribute; you can, however, use the one_or_more and zero_or_one properties. +
  • init - this option has a default value, either a string (if it is a +parameter), or a boolean (if it is a switch; boolean constants are called +true and false). List options can't have this attribute. Usage +examples: (switch_option "foo", (init true)); (prefix_option "bar", +(init "baz")).
  • extern - this option is defined in some other plugin, see below.
  • @@ -358,7 +367,8 @@ for. Example:

    (switch_option "E", (extern)) ... -

    See also the section on plugin priorities.

    +

    If an external option has additional attributes besides 'extern', they are +ignored. See also the section on plugin priorities.

    @@ -428,15 +438,21 @@ user. Example: (not_empty "o").
  • empty - The opposite of not_empty. Equivalent to (not (not_empty X)). Provided for convenience.
  • +
  • single_input_file - Returns true if there was only one input file +provided on the command-line. Used without arguments: +(single_input_file).
  • +
  • multiple_input_files - Equivalent to (not (single_input_file)) (the +case of zero input files is considered an error).
  • default - Always evaluates to true. Should always be the last test in the case expression.
  • -
  • and - A standard logical combinator that returns true iff all -of its arguments return true. Used like this: (and (test1), -(test2), ... (testN)). Nesting of and and or is allowed, -but not encouraged.
  • -
  • or - Another logical combinator that returns true only if any -one of its arguments returns true. Example: (or (test1), -(test2), ... (testN)).
  • +
  • and - A standard binary logical combinator that returns true iff all of +its arguments return true. Used like this: (and (test1), (test2), +... (testN)). Nesting of and and or is allowed, but not +encouraged.
  • +
  • or - A binary logical combinator that returns true iff any of its +arguments returns true. Example: (or (test1), (test2), ... (testN)).
  • +
  • not - Standard unary logical combinator that negates its +argument. Example: (not (or (test1), (test2), ... (testN))).
  • @@ -666,7 +682,7 @@ the Base plugin behav Mikhail Glushenkov
    LLVM Compiler Infrastructure
    -Last modified: $Date: 2009-06-30 02:16:43 +0200 (Tue, 30 Jun 2009) $ +Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
    diff --git a/docs/DebuggingJITedCode.html b/docs/DebuggingJITedCode.html new file mode 100644 index 0000000000000..92570f454c929 --- /dev/null +++ b/docs/DebuggingJITedCode.html @@ -0,0 +1,171 @@ + + + + Debugging JITed Code With GDB + + + + +
    Debugging JITed Code With GDB
    +
      +
    1. Introduction
    2. +
    3. Quickstart
    4. +
    5. Example with clang and lli
    6. +
    +
    Written by Reid Kleckner
    + + + + +
    + +

    Without special runtime support, debugging dynamically generated code with +GDB (as well as most debuggers) can be quite painful. Debuggers generally read +debug information from the object file of the code, but for JITed code, there is +no such file to look for. +

    + +

    Depending on the architecture, this can impact the debugging experience in +different ways. For example, on most 32-bit x86 architectures, you can simply +compile with -fno-omit-framepointer for GCC and -fdisable-fp-elim for LLVM. +When GDB creates a backtrace, it can properly unwind the stack, but the stack +frames owned by JITed code have ??'s instead of the appropriate symbol name. +However, on Linux x86_64 in particular, GDB relies on the DWARF CFA debug +information to unwind the stack, so even if you compile your program to leave +the frame pointer untouched, GDB will usually be unable to unwind the stack past +any JITed code stack frames. +

    + +

    In order to communicate the necessary debug info to GDB, an interface for +registering JITed code with debuggers has been designed and implemented for +GDB and LLVM. At a high level, whenever LLVM generates new machine code, it +also generates an object file in memory containing the debug information. LLVM +then adds the object file to the global list of object files and calls a special +function (__jit_debug_register_code) marked noinline that GDB knows about. When +GDB attaches to a process, it puts a breakpoint in this function and loads all +of the object files in the global list. When LLVM calls the registration +function, GDB catches the breakpoint signal, loads the new object file from +LLVM's memory, and resumes the execution. In this way, GDB can get the +necessary debug information. +

    + +

    At the time of this writing, LLVM only supports architectures that use ELF +object files and it only generates symbols and DWARF CFA information. However, +it would be easy to add more information to the object file, so we don't need to +coordinate with GDB to get better debug information. +

    +
    + + + + +
    + +

    In order to debug code JITed by LLVM, you need to install a recent version +of GDB. The interface was added on 2009-08-19, so you need a snapshot of GDB +more recent than that. Either download a snapshot of GDB or checkout CVS as +instructed here. Here +are the commands for doing a checkout and building the code: +

    + +
    +$ cvs -z 3 -d :pserver:anoncvs@sourceware.org:/cvs/src co gdb
    +$ mv src gdb   # You probably don't want this checkout called "src".
    +$ cd gdb
    +$ ./configure --prefix="$GDB_INSTALL"
    +$ make
    +$ make install
    +
    + +

    You can then use -jit-emit-debug in the LLVM command line arguments to enable +the interface. +

    +
    + + + + +
    + +

    For example, consider debugging running lli on the following C code in +foo.c: +

    + +
    +#include <stdio.h>
    +
    +void foo() {
    +    printf("%d\n", *(int*)NULL);  // Crash here
    +}
    +
    +void bar() {
    +    foo();
    +}
    +
    +void baz() {
    +    bar();
    +}
    +
    +int main(int argc, char **argv) {
    +    baz();
    +}
    +
    + +

    Here are the commands to run that application under GDB and print the stack +trace at the crash: +

    + +
    +# Compile foo.c to bitcode.  You can use either clang or llvm-gcc with this
    +# command line.  Both require -fexceptions, or the calls are all marked
    +# 'nounwind' which disables DWARF CFA info.
    +$ clang foo.c -fexceptions -emit-llvm -c -o foo.bc
    +
    +# Run foo.bc under lli with -jit-emit-debug.  If you built lli in debug mode,
    +# -jit-emit-debug defaults to true.
    +$ $GDB_INSTALL/gdb --args lli -jit-emit-debug foo.bc
    +...
    +
    +# Run the code.
    +(gdb) run
    +Starting program: /tmp/gdb/lli -jit-emit-debug foo.bc
    +[Thread debugging using libthread_db enabled]
    +
    +Program received signal SIGSEGV, Segmentation fault.
    +0x00007ffff7f55164 in foo ()
    +
    +# Print the backtrace, this time with symbols instead of ??.
    +(gdb) bt
    +#0  0x00007ffff7f55164 in foo ()
    +#1  0x00007ffff7f550f9 in bar ()
    +#2  0x00007ffff7f55099 in baz ()
    +#3  0x00007ffff7f5502a in main ()
    +#4  0x00000000007c0225 in llvm::JIT::runFunction(llvm::Function*,
    +    std::vector<llvm::GenericValue,
    +    std::allocator<llvm::GenericValue> > const&) ()
    +#5  0x00000000007d6d98 in
    +    llvm::ExecutionEngine::runFunctionAsMain(llvm::Function*,
    +    std::vector<std::string,
    +    std::allocator<std::string> > const&, char const* const*) ()
    +#6  0x00000000004dab76 in main ()
    +
    +
    + +

    As you can see, GDB can correctly unwind the stack and has the appropriate +function names. +

    + + +
    +
    + Valid CSS + Valid HTML 4.01 + Reid Kleckner
    + The LLVM Compiler Infrastructure
    + Last modified: $Date: 2009-01-01 23:10:51 -0800 (Thu, 01 Jan 2009) $ +
    + + diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html index 13a908e5a9d39..49866061c9f6d 100644 --- a/docs/DeveloperPolicy.html +++ b/docs/DeveloperPolicy.html @@ -99,7 +99,9 @@
    1. Make your patch against the Subversion trunk, not a branch, and not an old - version of LLVM. This makes it easy to apply the patch.
    2. + version of LLVM. This makes it easy to apply the patch. For information + on how to check out SVN trunk, please see the Getting Started Guide.
    3. Similarly, patches should be submitted soon after they are generated. Old patches may not apply correctly if the underlying code changes between the @@ -185,14 +187,18 @@ svn diff else. The current code owners are:

        +
      1. Evan Cheng: Code generator and all targets.
      2. + +
      3. Doug Gregor: Clang Basic, Lex, Parse, and Sema Libraries.
      4. +
      5. Anton Korobeynikov: Exception handling, debug information, and Windows codegen.
      6. -
      7. Duncan Sands: llvm-gcc 4.2.
      8. - -
      9. Evan Cheng: Code generator and all targets.
      10. +
      11. Ted Kremenek: Clang Static Analyzer.
      12. -
      13. Chris Lattner: Everything else.
      14. +
      15. Chris Lattner: Everything not covered by someone else.
      16. + +
      17. Duncan Sands: llvm-gcc 4.2.

      Note that code ownership is completely different than reviewers: anyone can @@ -287,9 +293,12 @@ svn diff

      We prefer for this to be handled before submission but understand that it - isn't possible to test all of this for every submission. Our nightly testing - infrastructure normally finds these problems. A good rule of thumb is to - check the nightly testers for regressions the day after your change.

      + isn't possible to test all of this for every submission. Our build bots and + nightly testing infrastructure normally finds these problems. A good rule of + thumb is to check the nightly testers for regressions the day after your + change. Build bots will directly email you if a group of commits that + included yours caused a failure. You are expected to check the build bot + messages to see if they are your fault and, if so, fix the breakage.

      Commits that violate these quality standards (e.g. are very broken) may be reverted. This is necessary when the change blocks other developers from @@ -592,7 +601,7 @@ Changes Written by the LLVM Oversight Group
      The LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-06-15 04:18:54 +0000 (Mon, 15 Jun 2009) $ + Last modified: $Date: 2009-10-10 23:37:16 +0200 (Sat, 10 Oct 2009) $ diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html index a03568f7e3675..0ca702f477bfd 100644 --- a/docs/ExceptionHandling.html +++ b/docs/ExceptionHandling.html @@ -3,8 +3,12 @@ Exception Handling in LLVM + + +

      Exception Handling in LLVM
      @@ -16,6 +20,7 @@
    4. Introduction
      1. Itanium ABI Zero-cost Exception Handling
      2. +
      3. Setjmp/Longjmp Exception Handling
      4. Overview
    5. LLVM Code Generation @@ -33,6 +38,7 @@
    6. llvm.eh.typeid.for
    7. llvm.eh.sjlj.setjmp
    8. llvm.eh.sjlj.longjmp
    9. +
    10. llvm.eh.sjlj.lsda
  • Asm Table Formats
      @@ -50,17 +56,17 @@ - +

      This document is the central repository for all information pertaining to -exception handling in LLVM. It describes the format that LLVM exception -handling information takes, which is useful for those interested in creating -front-ends or dealing directly with the information. Further, this document -provides specific examples of what exception handling information is used for -C/C++.

      + exception handling in LLVM. It describes the format that LLVM exception + handling information takes, which is useful for those interested in creating + front-ends or dealing directly with the information. Further, this document + provides specific examples of what exception handling information is used for + in C/C++.

      @@ -72,30 +78,63 @@ C/C++.

      Exception handling for most programming languages is designed to recover from -conditions that rarely occur during general use of an application. To that end, -exception handling should not interfere with the main flow of an -application's algorithm by performing checkpointing tasks such as saving -the current pc or register state.

      + conditions that rarely occur during general use of an application. To that + end, exception handling should not interfere with the main flow of an + application's algorithm by performing checkpointing tasks, such as saving the + current pc or register state.

      The Itanium ABI Exception Handling Specification defines a methodology for -providing outlying data in the form of exception tables without inlining -speculative exception handling code in the flow of an application's main -algorithm. Thus, the specification is said to add "zero-cost" to the normal -execution of an application.

      + providing outlying data in the form of exception tables without inlining + speculative exception handling code in the flow of an application's main + algorithm. Thus, the specification is said to add "zero-cost" to the normal + execution of an application.

      A more complete description of the Itanium ABI exception handling runtime -support of can be found at Itanium C++ ABI: -Exception Handling. A description of the exception frame format can be found -at Exception Frames, with details of the Dwarf -specification at Dwarf 3 -Standard. A description for the C++ exception table formats can be found at -Exception Handling -Tables.

      + support of can be found at + Itanium C++ ABI: + Exception Handling. A description of the exception frame format can be + found at + Exception + Frames, with details of the DWARF 3 specification at + DWARF 3 Standard. + A description for the C++ exception table formats can be found at + Exception Handling + Tables.

      + + + +
      + +

      Setjmp/Longjmp (SJLJ) based exception handling uses LLVM intrinsics + llvm.eh.sjlj.setjmp and + llvm.eh.sjlj.longjmp to + handle control flow for exception handling.

      + +

      For each function which does exception processing, be it try/catch blocks + or cleanups, that function registers itself on a global frame list. When + exceptions are being unwound, the runtime uses this list to identify which + functions need processing.

      + +

      Landing pad selection is encoded in the call site entry of the function + context. The runtime returns to the function via + llvm.eh.sjlj.longjmp, where + a switch table transfers control to the appropriate landing pad based on + the index stored in the function context.

      + +

      In contrast to DWARF exception handling, which encodes exception regions + and frame information in out-of-line tables, SJLJ exception handling + builds and removes the unwind frame context at runtime. This results in + faster exception handling at the expense of slower execution when no + exceptions are thrown. As exceptions are, by their nature, intended for + uncommon code paths, DWARF exception handling is generally preferred to + SJLJ.

      +
      +
      Overview @@ -103,41 +142,44 @@ Tables.

      -

      When an exception is thrown in llvm code, the runtime does a best effort to -find a handler suited to process the circumstance.

      +

      When an exception is thrown in LLVM code, the runtime does its best to find a + handler suited to processing the circumstance.

      The runtime first attempts to find an exception frame corresponding to -the function where the exception was thrown. If the programming language (ex. -C++) supports exception handling, the exception frame contains a reference to an -exception table describing how to process the exception. If the language (ex. -C) does not support exception handling or if the exception needs to be forwarded -to a prior activation, the exception frame contains information about how to -unwind the current activation and restore the state of the prior activation. -This process is repeated until the exception is handled. If the exception is -not handled and no activations remain, then the application is terminated with -an appropriate error message.

      - -

      Since different programming languages have different behaviors when handling -exceptions, the exception handling ABI provides a mechanism for supplying -personalities. An exception handling personality is defined by way of a -personality function (ex. for C++ __gxx_personality_v0) which -receives the context of the exception, an exception structure containing -the exception object type and value, and a reference to the exception table for -the current function. The personality function for the current compile unit is -specified in a common exception frame.

      + the function where the exception was thrown. If the programming language + (e.g. C++) supports exception handling, the exception frame contains a + reference to an exception table describing how to process the exception. If + the language (e.g. C) does not support exception handling, or if the + exception needs to be forwarded to a prior activation, the exception frame + contains information about how to unwind the current activation and restore + the state of the prior activation. This process is repeated until the + exception is handled. If the exception is not handled and no activations + remain, then the application is terminated with an appropriate error + message.

      + +

      Because different programming languages have different behaviors when + handling exceptions, the exception handling ABI provides a mechanism for + supplying personalities. An exception handling personality is defined + by way of a personality function (e.g. __gxx_personality_v0 + in C++), which receives the context of the exception, an exception + structure containing the exception object type and value, and a reference + to the exception table for the current function. The personality function + for the current compile unit is specified in a common exception + frame.

      The organization of an exception table is language dependent. For C++, an -exception table is organized as a series of code ranges defining what to do if -an exception occurs in that range. Typically, the information associated with a -range defines which types of exception objects (using C++ type info) that -are handled in that range, and an associated action that should take place. -Actions typically pass control to a landing pad.

      + exception table is organized as a series of code ranges defining what to do + if an exception occurs in that range. Typically, the information associated + with a range defines which types of exception objects (using C++ type + info) that are handled in that range, and an associated action that + should take place. Actions typically pass control to a landing + pad.

      -

      A landing pad corresponds to the code found in the catch portion of a -try/catch sequence. When execution resumes at a landing pad, it receives the -exception structure and a selector corresponding to the type of exception -thrown. The selector is then used to determine which catch should actually -process the exception.

      +

      A landing pad corresponds to the code found in the catch portion of + a try/catch sequence. When execution resumes at a landing + pad, it receives the exception structure and a selector corresponding to + the type of exception thrown. The selector is then used to determine + which catch should actually process the exception.

      @@ -149,12 +191,12 @@ process the exception.

      At the time of this writing, only C++ exception handling support is available -in LLVM. So the remainder of this document will be somewhat C++-centric.

      + in LLVM. So the remainder of this document will be somewhat C++-centric.

      From the C++ developers perspective, exceptions are defined in terms of the -throw and try/catch statements. In this section we will -describe the implementation of llvm exception handling in terms of C++ -examples.

      + throw and try/catch statements. In this section + we will describe the implementation of LLVM exception handling in terms of + C++ examples.

      @@ -166,17 +208,17 @@ examples.

      Languages that support exception handling typically provide a throw -operation to initiate the exception process. Internally, a throw operation -breaks down into two steps. First, a request is made to allocate exception -space for an exception structure. This structure needs to survive beyond the -current activation. This structure will contain the type and value of the -object being thrown. Second, a call is made to the runtime to raise the -exception, passing the exception structure as an argument.

      + operation to initiate the exception process. Internally, a throw operation + breaks down into two steps. First, a request is made to allocate exception + space for an exception structure. This structure needs to survive beyond the + current activation. This structure will contain the type and value of the + object being thrown. Second, a call is made to the runtime to raise the + exception, passing the exception structure as an argument.

      -

      In C++, the allocation of the exception structure is done by the -__cxa_allocate_exception runtime function. The exception raising is -handled by __cxa_throw. The type of the exception is represented using -a C++ RTTI type info structure.

      +

      In C++, the allocation of the exception structure is done by + the __cxa_allocate_exception runtime function. The exception + raising is handled by __cxa_throw. The type of the exception is + represented using a C++ RTTI structure.

      @@ -187,67 +229,84 @@ a C++ RTTI type info structure.

      -

      A call within the scope of a try statement can potentially raise an exception. -In those circumstances, the LLVM C++ front-end replaces the call with an -invoke instruction. Unlike a call, the invoke has two potential -continuation points; where to continue when the call succeeds as per normal, and -where to continue if the call raises an exception, either by a throw or the -unwinding of a throw.

      - -

      The term used to define a the place where an invoke continues after an -exception is called a landing pad. LLVM landing pads are conceptually -alternative function entry points where a exception structure reference and a type -info index are passed in as arguments. The landing pad saves the exception -structure reference and then proceeds to select the catch block that corresponds -to the type info of the exception object.

      +

      A call within the scope of a try statement can potentially raise an + exception. In those circumstances, the LLVM C++ front-end replaces the call + with an invoke instruction. Unlike a call, the invoke has + two potential continuation points: where to continue when the call succeeds + as per normal; and where to continue if the call raises an exception, either + by a throw or the unwinding of a throw.

      -

      Two llvm intrinsic functions are used convey information about the landing -pad to the back end.

      +

      The term used to define a the place where an invoke continues after + an exception is called a landing pad. LLVM landing pads are + conceptually alternative function entry points where an exception structure + reference and a type info index are passed in as arguments. The landing pad + saves the exception structure reference and then proceeds to select the catch + block that corresponds to the type info of the exception object.

      -

      llvm.eh.exception takes no -arguments and returns a pointer to the exception structure. This only returns a -sensible value if called after an invoke has branched to a landing pad. Due to -codegen limitations, it must currently be called in the landing pad itself.

      +

      Two LLVM intrinsic functions are used to convey information about the landing + pad to the back end.

      -

      llvm.eh.selector takes a minimum of -three arguments. The first argument is the reference to the exception -structure. The second argument is a reference to the personality function to be -used for this try catch sequence. Each of the remaining arguments is either a -reference to the type info for a catch statement, -a filter expression, -or the number zero representing a cleanup. -The exception is tested against the arguments sequentially from first to last. -The result of the llvm.eh.selector is a -positive number if the exception matched a type info, a negative number if it matched -a filter, and zero if it matched a cleanup. If nothing is matched, the behaviour of -the program is undefined. -This only returns a sensible value if called after an invoke has branched to a -landing pad. Due to codegen limitations, it must currently be called in the -landing pad itself. -If a type info matched then the selector value is the index of the type info in -the exception table, which can be obtained using the -llvm.eh.typeid.for intrinsic.

      +
        +
      1. llvm.eh.exception takes no + arguments and returns a pointer to the exception structure. This only + returns a sensible value if called after an invoke has branched + to a landing pad. Due to code generation limitations, it must currently + be called in the landing pad itself.
      2. + +
      3. llvm.eh.selector takes a minimum + of three arguments. The first argument is the reference to the exception + structure. The second argument is a reference to the personality function + to be used for this try/catch sequence. Each of the + remaining arguments is either a reference to the type info for + a catch statement, a filter + expression, or the number zero (0) representing + a cleanup. The exception is tested against the + arguments sequentially from first to last. The result of + the llvm.eh.selector is a + positive number if the exception matched a type info, a negative number if + it matched a filter, and zero if it matched a cleanup. If nothing is + matched, the behaviour of the program + is undefined. This only returns a sensible + value if called after an invoke has branched to a landing pad. + Due to codegen limitations, it must currently be called in the landing pad + itself. If a type info matched, then the selector value is the index of + the type info in the exception table, which can be obtained using the + llvm.eh.typeid.for + intrinsic.
      4. +

      Once the landing pad has the type info selector, the code branches to the -code for the first catch. The catch then checks the value of the type info -selector against the index of type info for that catch. Since the type info -index is not known until all the type info have been gathered in the backend, -the catch code will call the llvm.eh.typeid.for intrinsic to -determine the index for a given type info. If the catch fails to match the -selector then control is passed on to the next catch. Note: Since the landing -pad will not be used if there is no match in the list of type info on the call -to llvm.eh.selector, then neither the -last catch nor catch all need to perform the the check against the -selector.

      - -

      Finally, the entry and exit of catch code is bracketed with calls to -__cxa_begin_catch and __cxa_end_catch. -__cxa_begin_catch takes a exception structure reference as an argument -and returns the value of the exception object. __cxa_end_catch -takes a exception structure reference as an argument. This function clears the -exception from the exception space. Note: a rethrow from within the catch may -replace this call with a __cxa_rethrow.

      + code for the first catch. The catch then checks the value of the type info + selector against the index of type info for that catch. Since the type info + index is not known until all the type info have been gathered in the backend, + the catch code will call the + llvm.eh.typeid.for intrinsic + to determine the index for a given type info. If the catch fails to match + the selector then control is passed on to the next catch. Note: Since the + landing pad will not be used if there is no match in the list of type info on + the call to llvm.eh.selector, then + neither the last catch nor catch all need to perform the check + against the selector.

      + +

      Finally, the entry and exit of catch code is bracketed with calls + to __cxa_begin_catch and __cxa_end_catch.

      + +
        +
      • __cxa_begin_catch takes a exception structure reference as an + argument and returns the value of the exception object.
      • + +
      • __cxa_end_catch takes no arguments. This function:

        +
          +
        1. Locates the most recently caught exception and decrements its handler + count,
        2. +
        3. Removes the exception from the "caught" stack if the handler count + goes to zero, and
        4. +
        5. Destroys the exception if the handler count goes to zero, and the + exception was not re-thrown by throw.
        6. +
        +

        Note: a rethrow from within the catch may replace this call with + a __cxa_rethrow.

      • +
      @@ -258,16 +317,15 @@ replace this call with a __cxa_rethrow.

      -

      To handle destructors and cleanups in try code, control may not run directly -from a landing pad to the first catch. Control may actually flow from the -landing pad to clean up code and then to the first catch. Since the required -clean up for each invoke in a try may be different (ex., intervening -constructor), there may be several landing pads for a given try. If cleanups -need to be run, the number zero should be passed as the last -llvm.eh.selector argument. -However for C++ a null i8* must be passed -instead. -

      +

      To handle destructors and cleanups in try code, control may not run + directly from a landing pad to the first catch. Control may actually flow + from the landing pad to clean up code and then to the first catch. Since the + required clean up for each invoke in a try may be different + (e.g. intervening constructor), there may be several landing pads for a given + try. If cleanups need to be run, an i32 0 should be passed as the + last llvm.eh.selector argument. + However, when using DWARF exception handling with C++, a i8* null + must be passed instead.

      @@ -278,23 +336,23 @@ instead.
      -

      C++ allows the specification of which exception types can be thrown from -a function. To represent this a top level landing pad may exist to filter out -invalid types. To express this in LLVM code the landing pad will call llvm.eh.selector. The arguments are a -reference to the exception structure, a reference to the personality function, -the length of the filter expression (the number of type infos plus one), -followed by the type infos themselves. -llvm.eh.selector will return a negative -value if the exception does not match any of the type infos. If no match is -found then a call to __cxa_call_unexpected should be made, otherwise -_Unwind_Resume. Each of these functions requires a reference to the -exception structure. Note that the most general form of an -llvm.eh.selector call can contain -any number of type infos, filter expressions and cleanups (though having more -than one cleanup is pointless). The LLVM C++ front-end can generate such -llvm.eh.selector calls due to inlining -creating nested exception handling scopes.

      +

      C++ allows the specification of which exception types can be thrown from a + function. To represent this a top level landing pad may exist to filter out + invalid types. To express this in LLVM code the landing pad will + call llvm.eh.selector. The + arguments are a reference to the exception structure, a reference to the + personality function, the length of the filter expression (the number of type + infos plus one), followed by the type infos themselves. + llvm.eh.selector will return a + negative value if the exception does not match any of the type infos. If no + match is found then a call to __cxa_call_unexpected should be made, + otherwise _Unwind_Resume. Each of these functions requires a + reference to the exception structure. Note that the most general form of an + llvm.eh.selector call can contain + any number of type infos, filter expressions and cleanups (though having more + than one cleanup is pointless). The LLVM C++ front-end can generate such + llvm.eh.selector calls due to + inlining creating nested exception handling scopes.

      @@ -306,23 +364,21 @@ creating nested exception handling scopes.

      The semantics of the invoke instruction require that any exception that -unwinds through an invoke call should result in a branch to the invoke's unwind -label. However such a branch will only happen if the -llvm.eh.selector matches. -Thus in order to ensure correct operation, the front-end must only generate -llvm.eh.selector calls that are -guaranteed to always match whatever exception unwinds through the invoke. -For most languages it is enough to pass zero, indicating the presence of -a cleanup, as the last -llvm.eh.selector argument. -However for C++ this is not sufficient, because the C++ personality function -will terminate the program if it detects that unwinding the exception only -results in matches with cleanups. For C++ a null i8* should -be passed as the last -llvm.eh.selector argument instead. -This is interpreted as a catch-all by the C++ personality function, and will -always match. -

      + unwinds through an invoke call should result in a branch to the invoke's + unwind label. However such a branch will only happen if the + llvm.eh.selector matches. Thus in + order to ensure correct operation, the front-end must only generate + llvm.eh.selector calls that are + guaranteed to always match whatever exception unwinds through the invoke. + For most languages it is enough to pass zero, indicating the presence of + a cleanup, as the + last llvm.eh.selector argument. + However for C++ this is not sufficient, because the C++ personality function + will terminate the program if it detects that unwinding the exception only + results in matches with cleanups. For C++ a null i8* should be + passed as the last llvm.eh.selector + argument instead. This is interpreted as a catch-all by the C++ personality + function, and will always match.

      @@ -334,7 +390,8 @@ always match.

      LLVM uses several intrinsic functions (name prefixed with "llvm.eh") to -provide exception handling information at various points in generated code.

      + provide exception handling information at various points in generated + code.

      @@ -344,6 +401,7 @@ provide exception handling information at various points in generated code.

      +
         i8* %llvm.eh.exception( )
       
      @@ -358,29 +416,29 @@ provide exception handling information at various points in generated code.

      +
      -  i32 %llvm.eh.selector.i32(i8*, i8*, i8*, ...)
      -  i64 %llvm.eh.selector.i64(i8*, i8*, i8*, ...)
      +  i32 %llvm.eh.selector(i8*, i8*, i8*, ...)
       

      This intrinsic is used to compare the exception with the given type infos, -filters and cleanups.

      + filters and cleanups.

      llvm.eh.selector takes a minimum of -three arguments. The first argument is the reference to the exception -structure. The second argument is a reference to the personality function to be -used for this try catch sequence. Each of the remaining arguments is either a -reference to the type info for a catch statement, -a filter expression, -or the number zero representing a cleanup. -The exception is tested against the arguments sequentially from first to last. -The result of the llvm.eh.selector is a -positive number if the exception matched a type info, a negative number if it matched -a filter, and zero if it matched a cleanup. If nothing is matched, the behaviour of -the program is undefined. -If a type info matched then the selector value is the index of the type info in -the exception table, which can be obtained using the -llvm.eh.typeid.for intrinsic.

      + three arguments. The first argument is the reference to the exception + structure. The second argument is a reference to the personality function to + be used for this try catch sequence. Each of the remaining arguments is + either a reference to the type info for a catch statement, + a filter expression, or the number zero + representing a cleanup. The exception is tested + against the arguments sequentially from first to last. The result of + the llvm.eh.selector is a positive + number if the exception matched a type info, a negative number if it matched + a filter, and zero if it matched a cleanup. If nothing is matched, the + behaviour of the program is undefined. If a type + info matched then the selector value is the index of the type info in the + exception table, which can be obtained using the + llvm.eh.typeid.for intrinsic.

      @@ -390,15 +448,15 @@ the exception table, which can be obtained using the
      +
      -  i32 %llvm.eh.typeid.for.i32(i8*)
      -  i64 %llvm.eh.typeid.for.i64(i8*)
      +  i32 %llvm.eh.typeid.for(i8*)
       

      This intrinsic returns the type info index in the exception table of the -current function. This value can be used to compare against the result of llvm.eh.selector. The single argument is -a reference to a type info.

      + current function. This value can be used to compare against the result + of llvm.eh.selector. The single + argument is a reference to a type info.

      @@ -408,23 +466,47 @@ a reference to a type info.

      +
         i32 %llvm.eh.sjlj.setjmp(i8*)
       
      -

      The SJLJ exception handling uses this intrinsic to force register saving -for the current function and to store the address of the following instruction -for use as a destination address by -llvm.eh.sjlj.longjmp. The buffer format and the overall functioning -of this intrinsic is compatible with the GCC __builtin_setjmp -implementation, allowing code built with the two compilers to interoperate.

      +

      The SJLJ exception handling uses this intrinsic to force register saving for + the current function and to store the address of the following instruction + for use as a destination address by + llvm.eh.sjlj.longjmp. The buffer format and the overall + functioning of this intrinsic is compatible with the GCC + __builtin_setjmp implementation, allowing code built with the + two compilers to interoperate.

      + +

      The single parameter is a pointer to a five word buffer in which the calling + context is saved. The front end places the frame pointer in the first word, + and the target implementation of this intrinsic should place the destination + address for a + llvm.eh.sjlj.longjmp in the + second word. The following three words are available for use in a + target-specific manner.

      + +
      + + + + +
      + +
      +  i8* %llvm.eh.sjlj.lsda( )
      +
      + +

      Used for SJLJ based exception handling, the + llvm.eh.sjlj.lsda intrinsic returns the address of the Language + Specific Data Area (LSDA) for the current function. The SJLJ front-end code + stores this address in the exception handling function context for use by the + runtime.

      -

      The single parameter is a pointer to a five word buffer in which the -calling context is saved. The front end places the frame pointer in the -first word, and the target implementation of this intrinsic should place the -destination address for a -llvm.eh.sjlj.longjmp in the second word. The following three words -are available for use in a target-specific manner.

      +
      @@ -434,7 +516,7 @@ are available for use in a target-specific manner.

      There are two tables that are used by the exception handling runtime to -determine which actions should take place when an exception is thrown.

      + determine which actions should take place when an exception is thrown.

      @@ -446,11 +528,11 @@ determine which actions should take place when an exception is thrown.

      An exception handling frame eh_frame is very similar to the unwind -frame used by dwarf debug info. The frame contains all the information -necessary to tear down the current frame and restore the state of the prior -frame. There is an exception handling frame for each function in a compile -unit, plus a common exception handling frame that defines information common to -all functions in the unit.

      + frame used by dwarf debug info. The frame contains all the information + necessary to tear down the current frame and restore the state of the prior + frame. There is an exception handling frame for each function in a compile + unit, plus a common exception handling frame that defines information common + to all functions in the unit.

      Todo - Table details here.

      @@ -464,9 +546,9 @@ all functions in the unit.

      An exception table contains information about what actions to take when an -exception is thrown in a particular part of a function's code. There is -one exception table per function except leaf routines and functions that have -only calls to non-throwing functions will not need an exception table.

      + exception is thrown in a particular part of a function's code. There is one + exception table per function except leaf routines and functions that have + only calls to non-throwing functions will not need an exception table.

      Todo - Table details here.

      @@ -481,7 +563,7 @@ only calls to non-throwing functions will not need an exception table.

        -
      1. Testing/Testing/Testing.

      2. +
      3. Testing/Testing/Testing.
      @@ -498,7 +580,7 @@ only calls to non-throwing functions will not need an exception table.

      Chris Lattner
      LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-05-14 17:44:15 +0200 (Thu, 14 May 2009) $ + Last modified: $Date: 2009-10-14 18:11:37 +0200 (Wed, 14 Oct 2009) $ diff --git a/docs/FAQ.html b/docs/FAQ.html index 9fd89288f54c3..00746381f7b9c 100644 --- a/docs/FAQ.html +++ b/docs/FAQ.html @@ -685,7 +685,7 @@ Stop.

      Also, there are a number of other limitations of the C backend that cause it to produce code that does not fully conform to the C++ ABI on most platforms. Some of the C++ programs in LLVM's test suite are known to fail - when compiled with the C back end because of ABI incompatiblities with + when compiled with the C back end because of ABI incompatibilities with standard C++ libraries.

      @@ -700,7 +700,7 @@ Stop. portable is by using the preprocessor to include platform-specific code. In practice, information about other platforms is lost after preprocessing, so the result is inherently dependent on the platform that the preprocessing was - targetting.

      + targeting.

      Another example is sizeof. It's common for sizeof(long) to vary between platforms. In most C front-ends, sizeof is expanded to @@ -931,7 +931,7 @@ F.i: src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-06-30 19:10:19 +0200 (Tue, 30 Jun 2009) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html index 9d2243f589861..873faa6c53734 100644 --- a/docs/GCCFEBuildInstrs.html +++ b/docs/GCCFEBuildInstrs.html @@ -88,6 +88,7 @@ top-level README.LLVM file, adding ",ada" to EXTRALANGS, for example: are gcc-4.2 and the 2005, 2006 and 2007 versions of the GNAT GPL Edition. + GNAT GPL 2008, gcc-4.3 and later will not work. The LLVM parts of llvm-gcc are written in C++ so a C++ compiler is needed to build them. The rest of gcc is written in C. Some linux distributions provide a version of gcc that supports all @@ -271,7 +272,7 @@ More information is available in the FAQ. src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-04-27 13:21:35 +0200 (Mon, 27 Apr 2009) $ + Last modified: $Date: 2009-07-05 14:01:44 +0200 (Sun, 05 Jul 2009) $ diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html index 562025f13eae0..a372f697f9b28 100644 --- a/docs/GarbageCollection.html +++ b/docs/GarbageCollection.html @@ -334,11 +334,11 @@ void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) { // For roots [0, NumMeta), the metadata pointer is in the FrameMap. for (unsigned e = R->Map->NumMeta; i != e; ++i) - Visitor(&R->Roots[i], R->Map->Meta[i]); + Visitor(&R->Roots[i], R->Map->Meta[i]); // For roots [NumMeta, NumRoots), the metadata pointer is null. for (unsigned e = R->Map->NumRoots; i != e; ++i) - Visitor(&R->Roots[i], NULL); + Visitor(&R->Roots[i], NULL); } }

      @@ -398,7 +398,7 @@ program.

      - define ty @name(...) gc "name" { ... + define ty @name(...) gc "name" { ...
      @@ -1380,7 +1380,7 @@ Fergus Henderson. International Symposium on Memory Management 2002.

      Chris Lattner
      LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-05-13 20:02:09 +0200 (Wed, 13 May 2009) $ + Last modified: $Date: 2009-08-05 17:42:44 +0200 (Wed, 05 Aug 2009) $ diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html index f4b096a10de5f..d5863e8a52b7d 100644 --- a/docs/GetElementPtr.html +++ b/docs/GetElementPtr.html @@ -40,7 +40,7 @@

      This document seeks to dispel the mystery and confusion surrounding LLVM's GetElementPtr (GEP) instruction. Questions about the wiley GEP instruction are - probably the most frequently occuring questions once a developer gets down to + probably the most frequently occurring questions once a developer gets down to coding with LLVM. Here we lay out the sources of confusion and show that the GEP instruction is really quite simple.

      @@ -303,13 +303,14 @@ idx3 = (char*) &MyVar + 8

      In this example, idx1 computes the address of the second integer - in the array that is in the structure in %MyVar, that is MyVar+4. The - type of idx1 is i32*. However, idx2 computes the - address of the next structure after %MyVar. The type of - idx2 is { [10 x i32] }* and its value is equivalent - to MyVar + 40 because it indexes past the ten 4-byte integers - in MyVar. Obviously, in such a situation, the pointers don't - alias.

      + in the array that is in the structure in %MyVar, that is + MyVar+4. The type of idx1 is i32*. However, + idx2 computes the address of the next structure after + %MyVar. The type of idx2 is { [10 x i32] }* and its + value is equivalent to MyVar + 40 because it indexes past the ten + 4-byte integers in MyVar. Obviously, in such a situation, the + pointers don't alias.

      +
      @@ -364,7 +365,7 @@ idx3 = (char*) &MyVar + 8 Valid HTML 4.01 The LLVM Compiler Infrastructure
      - Last modified: $Date: 2008-12-11 19:23:24 +0100 (Thu, 11 Dec 2008) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html index 26a46885082df..8a8bce377d139 100644 --- a/docs/GettingStarted.html +++ b/docs/GettingStarted.html @@ -215,11 +215,21 @@ software you will need.

      Arch Compilers + + AuroraUX + x861 + GCC + Linux x861 GCC + + Linux + amd64 + GCC + Solaris V9 (Ultrasparc) @@ -239,7 +249,6 @@ software you will need.

      MacOS X2,9 x86 GCC - Cygwin/Win32 @@ -248,14 +257,10 @@ software you will need.

      MinGW/Win32 - x861,6,8 + x861,6, + 8, 10 GCC 3.4.X, binutils 2.15 - - Linux - amd64 - GCC -

      LLVM has partial support for the following platforms:

      @@ -321,6 +326,11 @@ up levels greater than 0 (i.e., "-O1" and higher). Add OPTIMIZE_OPTION="-O0" to the build command line if compiling for LLVM Release or bootstrapping the LLVM toolchain. +
    1. For MSYS/MinGW on Windows, be sure to install the MSYS + version of the perl package, and be sure it appears in your path + before any Windows-based versions such as Strawberry Perl and + ActivePerl, as these have Windows-specifics that will cause the + build to fail.
    @@ -410,19 +420,19 @@ href="GCCFEBuildInstrs.html">try to compile it on your platform.

    GNU Autoconf - 2.59 + 2.60 Configuration script builder4 GNU Automake - 1.9.2 + 1.9.6 aclocal macro generator4 libtool - 1.5.10 + 1.5.22 Shared library manager4 @@ -548,7 +558,10 @@ as the previous one. It appears to work with ENABLE_OPTIMIZED=0 (the default).Cygwin GCC 4.3.2 20080827 (beta) 2: Users reported various problems related with link errors when using this GCC version.

    - +

    Debian GCC 4.3.2 on X86: Crashes building some files in LLVM 2.6.

    +

    GCC 4.3.3 (Debian 4.3.3-10) on ARM: Miscompiles parts of LLVM 2.6 +when optimizations are turned on. The symptom is an infinite loop in +FoldingSetImpl::RemoveNode while running the code generator.

    GNU ld 2.16.X. Some 2.16.X versions of the ld linker will produce very long warning messages complaining that some ".gnu.linkonce.t.*" symbol was defined in a discarded section. You can safely ignore these messages as they are @@ -1622,7 +1635,7 @@ out:

    Chris Lattner
    Reid Spencer
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-05-04 12:24:46 +0200 (Mon, 04 May 2009) $ + Last modified: $Date: 2009-09-27 06:56:27 +0200 (Sun, 27 Sep 2009) $ diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html index 809fda0a6f35b..c0024506e360c 100644 --- a/docs/GettingStartedVS.html +++ b/docs/GettingStartedVS.html @@ -140,15 +140,15 @@
  • If you used CMake, then the directory you created the project files, the root directory will have an llvm.sln file, just double-click on that to open Visual Studio.
  • - +
  • Build the LLVM Suite: -
      +
      • Simply build the solution.
      • The Fibonacci project is a sample program that uses the JIT. Modify the project's debugging properties to provide a numeric command line argument. The program will print the corresponding fibonacci value.
      • -
  • + @@ -411,7 +411,7 @@ out:

    Jeff Cohen
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-04-03 22:32:13 +0200 (Fri, 03 Apr 2009) $ + Last modified: $Date: 2009-08-05 17:42:44 +0200 (Wed, 05 Aug 2009) $ diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html index b5148ab3312ed..77a417f5710d5 100644 --- a/docs/GoldPlugin.html +++ b/docs/GoldPlugin.html @@ -127,8 +127,9 @@ void foo4(void) { --- command lines --- $ llvm-gcc -flto a.c -c -o a.o # <-- a.o is LLVM bitcode file +$ ar q a.a a.o # <-- a.a is an archive with LLVM bitcode $ llvm-gcc b.c -c -o b.o # <-- b.o is native object file -$ llvm-gcc -use-gold-plugin a.o b.o -o main # <-- link with LLVMgold plugin +$ llvm-gcc -use-gold-plugin a.a b.o -o main # <-- link with LLVMgold plugin

    Gold informs the plugin that foo3 is never referenced outside the IR, leading LLVM to delete that function. However, unlike in the diff --git a/docs/HistoricalNotes/2007-OriginalClangReadme.txt b/docs/HistoricalNotes/2007-OriginalClangReadme.txt new file mode 100644 index 0000000000000..611dc9d2c01c1 --- /dev/null +++ b/docs/HistoricalNotes/2007-OriginalClangReadme.txt @@ -0,0 +1,178 @@ +//===----------------------------------------------------------------------===// +// C Language Family Front-end +//===----------------------------------------------------------------------===// + Chris Lattner + +I. Introduction: + + clang: noun + 1. A loud, resonant, metallic sound. + 2. The strident call of a crane or goose. + 3. C-language family front-end toolkit. + + The world needs better compiler tools, tools which are built as libraries. This + design point allows reuse of the tools in new and novel ways. However, building + the tools as libraries isn't enough: they must have clean APIs, be as + decoupled from each other as possible, and be easy to modify/extend. This + requires clean layering, decent design, and avoiding tying the libraries to a + specific use. Oh yeah, did I mention that we want the resultant libraries to + be as fast as possible? :) + + This front-end is built as a component of the LLVM toolkit that can be used + with the LLVM backend or independently of it. In this spirit, the API has been + carefully designed as the following components: + + libsupport - Basic support library, reused from LLVM. + + libsystem - System abstraction library, reused from LLVM. + + libbasic - Diagnostics, SourceLocations, SourceBuffer abstraction, + file system caching for input source files. This depends on + libsupport and libsystem. + + libast - Provides classes to represent the C AST, the C type system, + builtin functions, and various helpers for analyzing and + manipulating the AST (visitors, pretty printers, etc). This + library depends on libbasic. + + + liblex - C/C++/ObjC lexing and preprocessing, identifier hash table, + pragma handling, tokens, and macros. This depends on libbasic. + + libparse - C (for now) parsing and local semantic analysis. This library + invokes coarse-grained 'Actions' provided by the client to do + stuff (e.g. libsema builds ASTs). This depends on liblex. + + libsema - Provides a set of parser actions to build a standardized AST + for programs. AST's are 'streamed' out a top-level declaration + at a time, allowing clients to use decl-at-a-time processing, + build up entire translation units, or even build 'whole + program' ASTs depending on how they use the APIs. This depends + on libast and libparse. + + librewrite - Fast, scalable rewriting of source code. This operates on + the raw syntactic text of source code, allowing a client + to insert and delete text in very large source files using + the same source location information embedded in ASTs. This + is intended to be a low-level API that is useful for + higher-level clients and libraries such as code refactoring. + + libanalysis - Source-level dataflow analysis useful for performing analyses + such as computing live variables. It also includes a + path-sensitive "graph-reachability" engine for writing + analyses that reason about different possible paths of + execution through source code. This is currently being + employed to write a set of checks for finding bugs in software. + + libcodegen - Lower the AST to LLVM IR for optimization & codegen. Depends + on libast. + + clang - An example driver, client of the libraries at various levels. + This depends on all these libraries, and on LLVM VMCore. + + This front-end has been intentionally built as a DAG of libraries, making it + easy to reuse individual parts or replace pieces if desired. For example, to + build a preprocessor, you take the Basic and Lexer libraries. If you want an + indexer, you take those plus the Parser library and provide some actions for + indexing. If you want a refactoring, static analysis, or source-to-source + compiler tool, it makes sense to take those plus the AST building and semantic + analyzer library. Finally, if you want to use this with the LLVM backend, + you'd take these components plus the AST to LLVM lowering code. + + In the future I hope this toolkit will grow to include new and interesting + components, including a C++ front-end, ObjC support, and a whole lot of other + things. + + Finally, it should be pointed out that the goal here is to build something that + is high-quality and industrial-strength: all the obnoxious features of the C + family must be correctly supported (trigraphs, preprocessor arcana, K&R-style + prototypes, GCC/MS extensions, etc). It cannot be used if it is not 'real'. + + +II. Usage of clang driver: + + * Basic Command-Line Options: + - Help: clang --help + - Standard GCC options accepted: -E, -I*, -i*, -pedantic, -std=c90, etc. + - To make diagnostics more gcc-like: -fno-caret-diagnostics -fno-show-column + - Enable metric printing: -stats + + * -fsyntax-only is currently the default mode. + + * -E mode works the same way as GCC. + + * -Eonly mode does all preprocessing, but does not print the output, + useful for timing the preprocessor. + + * -fsyntax-only is currently partially implemented, lacking some + semantic analysis (some errors and warnings are not produced). + + * -parse-noop parses code without building an AST. This is useful + for timing the cost of the parser without including AST building + time. + + * -parse-ast builds ASTs, but doesn't print them. This is most + useful for timing AST building vs -parse-noop. + + * -parse-ast-print pretty prints most expression and statements nodes. + + * -parse-ast-check checks that diagnostic messages that are expected + are reported and that those which are reported are expected. + + * -dump-cfg builds ASTs and then CFGs. CFGs are then pretty-printed. + + * -view-cfg builds ASTs and then CFGs. CFGs are then visualized by + invoking Graphviz. + + For more information on getting Graphviz to work with clang/LLVM, + see: http://llvm.org/docs/ProgrammersManual.html#ViewGraph + + +III. Current advantages over GCC: + + * Column numbers are fully tracked (no 256 col limit, no GCC-style pruning). + * All diagnostics have column numbers, includes 'caret diagnostics', and they + highlight regions of interesting code (e.g. the LHS and RHS of a binop). + * Full diagnostic customization by client (can format diagnostics however they + like, e.g. in an IDE or refactoring tool) through DiagnosticClient interface. + * Built as a framework, can be reused by multiple tools. + * All languages supported linked into same library (no cc1,cc1obj, ...). + * mmap's code in read-only, does not dirty the pages like GCC (mem footprint). + * LLVM License, can be linked into non-GPL projects. + * Full diagnostic control, per diagnostic. Diagnostics are identified by ID. + * Significantly faster than GCC at semantic analysis, parsing, preprocessing + and lexing. + * Defers exposing platform-specific stuff to as late as possible, tracks use of + platform-specific features (e.g. #ifdef PPC) to allow 'portable bytecodes'. + * The lexer doesn't rely on the "lexer hack": it has no notion of scope and + does not categorize identifiers as types or variables -- this is up to the + parser to decide. + +Potential Future Features: + + * Fine grained diag control within the source (#pragma enable/disable warning). + * Better token tracking within macros? (Token came from this line, which is + a macro argument instantiated here, recursively instantiated here). + * Fast #import with a module system. + * Dependency tracking: change to header file doesn't recompile every function + that texually depends on it: recompile only those functions that need it. + This is aka 'incremental parsing'. + + +IV. Missing Functionality / Improvements + +Lexer: + * Source character mapping. GCC supports ASCII and UTF-8. + See GCC options: -ftarget-charset and -ftarget-wide-charset. + * Universal character support. Experimental in GCC, enabled with + -fextended-identifiers. + * -fpreprocessed mode. + +Preprocessor: + * #assert/#unassert + * MSExtension: "L#param" stringizes to a wide string literal. + * Add support for -M* + +Traditional Preprocessor: + * Currently, we have none. :) + diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html index 53945a533ef58..d6ef416e166c2 100644 --- a/docs/HowToReleaseLLVM.html +++ b/docs/HowToReleaseLLVM.html @@ -10,14 +10,15 @@

    How To Release LLVM To The Public
    1. Introduction
    2. +
    3. Qualification Criteria
    4. Release Timeline
    5. Release Process
    6. -
    7. Distribution Targets
    @@ -26,32 +27,23 @@

    - This document collects information about successfully releasing LLVM to the - public. It is the release manager's guide to ensuring that a high quality + This document collects information about successfully releasing LLVM + (including subprojects llvm-gcc and Clang) to the public. + It is the release manager's responsibility to ensure that a high quality build of LLVM is released.

    - -

    - The following is the basic criteria for releasing LLVM: -

    - -
      -
    1. Successful configure and build.
    2. -
    3. Clean 'make check'.
    4. -
    5. No regressions in the testsuite from the previous release. This may - include performance regressions for major benchmarks.
    6. -
    -The release manager should attempt to have a release every 3-4 months because LLVM -does time based releases (instead of feature based). The release schedule should -be roughly as follows: +

    LLVM is released on a time based schedule (currently every 6 months). We + do not have dot releases because of the nature of LLVM incremental + development philosophy. The release schedule is roughly as follows: +

      -
    1. Set code freeze and branch creation date for 3 months after last release +
    2. Set code freeze and branch creation date for 6 months after last code freeze date. Announce release schedule to the LLVM community and update the website.
    3. Create release branch and begin release process.
    4. Send out pre-release for first round of testing. Testing will last 7-10 days. @@ -71,44 +63,76 @@ pre-release testing.
    5. - - - + + +
      +This section describes a few administrative tasks that need to be done for the +release process to begin. Specifically, it involves creating the release branch, + resetting version numbers, and creating the release tarballs for the release + team to begin testing. +
      + + +

      Branch the Subversion HEAD using the following procedure:

      1. -

        Verify that the current Subversion HEAD is in decent shape by examining nightly - tester results.

      2. +

        Verify that the current Subversion HEAD is in decent shape by examining + nightly tester or buildbot results.

      3. Request all developers to refrain from committing. Offenders get commit rights taken away (temporarily).

      4. -

        Create the release branch for llvm, llvm-gcc4.2, and - the test-suite. The branch name will be release_XX, - where XX is the major and minor release numbers. These branches can - be created without checking out anything from subversion. +

        Create the release branch for llvm, llvm-gcc4.2, + clang, and the test-suite. The branch name will be + release_XX,where XX is the major and minor release numbers. + Clang will have a different release number than llvm/ + llvm-gcc4 since its first release was years later + (still deciding if this will be true or not). These branches + can be created without checking out anything from subversion.

        @@ -119,6 +143,8 @@ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk \ https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \ https://llvm.org/svn/llvm-project/test-suite/branches/release_XX +svn copy https://llvm.org/svn/llvm-project/cfe/trunk \ + https://llvm.org/svn/llvm-project/cfe/branches/release_XX
        @@ -135,34 +161,36 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_XX svn co https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_XX +svn co https://llvm.org/svn/llvm-project/cfe/branches/release_XX
      -
    - +

    After creating the LLVM release branch, update the release branches' autoconf/configure.ac version from X.Xsvn to just X.X. Update it on mainline as well to be the next version (X.X+1svn). Regenerated the configure script - for both. This must be done for both llvm and the test-suite. + for both. This must be done for both llvm and the + test-suite.

    +

    FIXME: Add a note about clang.

    In addition, the version number of all the Bugzilla components must be updated for the next release.

    - +

    - Create source distributions for LLVM, LLVM GCC, and the LLVM Test Suite by - exporting the source from Subversion and archiving it. This can be done with - the following commands: + Create source distributions for LLVM, LLVM-GCC, + clang, and the llvm test-suite by exporting the source from + Subversion and archiving it. This can be done with the following commands:

    @@ -170,25 +198,43 @@ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_XX svn export https://llvm.org/svn/llvm-project/llvm/branches/release_XX llvm-X.X svn export https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX llvm-gcc4.2-X.X.source svn export https://llvm.org/svn/llvm-project/test-suite/branches/release_XX llvm-test-X.X -tar -cvf - llvm-X.X | gzip > llvm-X.X.tar.gz -tar -cvf - llvm-test-X.X | gzip > llvm-test-X.X.tar.gz -tar -cvf - llvm-gcc4.2-X.X.source | gzip > llvm-gcc-4.2-X.X.source.tar.gz +svn export https://llvm.org/svn/llvm-project/cfe/branches/release_XX clang-X.X +tar -czvf - llvm-X.X | gzip > llvm-X.X.tar.gz +tar -czvf - llvm-test-X.X | gzip > llvm-test-X.X.tar.gz +tar -czvf - llvm-gcc4.2-X.X.source | gzip > llvm-gcc-4.2-X.X.source.tar.gz +tar -czvf - clang-X.X | gzip > clang-X.X.tar.gz
    - + + +
    +The build of llvm, llvm-gcc, and clang must be free +of errors and warnings in both debug, release, and release-asserts builds. +If all builds are clean, then the release passes build qualification. + +
      +
    1. debug: ENABLE_OPTIMIZED=0
    2. +
    3. release: ENABLE_OPTIMIZED=1
    4. +
    5. release-asserts: ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1
    6. +
    +
    + + +

    - Build both debug and release (optimized) versions of LLVM on all - platforms. Ensure the build is warning and error free on each platform. - Note that when building the LLVM GCC Binary, use a release build of LLVM. + Build both debug, release (optimized), and release-asserts versions of + LLVM on all supported platforms. Direction to build llvm are + here.

    - +

    Creating the LLVM GCC binary distribution (release/optimized) requires @@ -198,128 +244,202 @@ tar -cvf - llvm-gcc4.2-X.X.source | gzip > llvm-gcc-4.2-X.X.source.tar.gz

    1. Build the LLVM GCC front-end by following the directions in the README.LLVM - file. Be sure to build with LLVM_VERSION_INFO=X.X, where X is the major and + file. The frontend must be compiled with c, c++, objc (mac only), + objc++ (mac only) and fortran support.
    2. +
    3. Please boostrap as well.
    4. +
    5. Be sure to build with LLVM_VERSION_INFO=X.X, where X is the major and minor release numbers.
    6. Copy the installation directory to a directory named for the specific target. For example on Red Hat Enterprise Linux, the directory would be named - llvm-gcc4.0-2.1-x86-linux-RHEL4. Archive and compress the new directory. + llvm-gcc4.2-2.6-x86-linux-RHEL4. Archive and compress the new directory.
    - +

    - Using the newly built llvm-gcc and llvm, reconfigure llvm to locate llvm-gcc. - Run make check and ensure there are no unexpected failures. If there - are, resolve the failures or file a bug. If there is a fix commited to mainline, - merge back into the release branch, and restart testing by - re-building LLVM and llvm-gcc. If no - fix will be made, XFAIL the test and commit back to the release branch. + Creating the Clang binary distribution (debug/release/release-asserts) requires + performing the following steps for each supported platform:

    +
      +
    1. + Build clang according to the directions + here. +
    2. + +
    3. Build both a debug and release version of clang, but the binary + will be a release build.
    4. + +
    5. + Package clang (details to follow). +
    6. +
    +
    + + + + +

    - Ensure that 'make check' passes on all platforms for all targets. The - test suite must complete with "0 unexpected failures" before sending out the - pre-releases for testing. + The table below specifies which compilers are used for each arch/os combination + when qualifying the build of llvm, llvm-gcc, clang. +

    + +

    + + + + + + + + + + +
    ArchitectureOScompiler
    x86-32Mac OS 10.5gcc 4.0.1
    x86-32Linuxgcc 4.2.X, gcc 4.3.X
    x86-32FreeBSDgcc 4.2.X
    x86-32mingwgcc 3.4.5
    x86-64Mac OS 10.5gcc 4.0.1
    x86-64Linuxgcc 4.2.X, gcc 4.3.X
    x86-64FreeBSDgcc 4.2.X

    +
    + + + + +
    + A release is qualified when it has no regressions from the previous + release (or baseline). Regressions are related to correctness only and not + performance at this time. Regressions are new failures in the set of tests that + are used to qualify each product and only include things on the list. + Ultimately, there is no end to the number of possible bugs in a release. We + need a very concrete and definitive release criteria that ensures we have + monotonically improving quality on some metric. The metric we use is + described below. This doesn't mean that we don't care about other things, + but this are things that must be satisfied before a release can go out +
    + + - +

    - Run the llvm-test suite and ensure there are no unacceptable - failures. Unacceptable failures are regression from the previous release - and (optionally) major performance regressions from the previous release. - If a regression is found a bug is filled, but the pre-releases may still go - out.

    + LLVM is qualified when it has a clean dejagnu test run without a frontend and + it has no regressions when using either llvm-gcc or clang + with the test-suite from the previous release. +

    - +

    - You can, optionally, create source and binary RPM packages for LLVM. These may - make it easier to get LLVM into a distribution. This can be done with the - following commands: -

    + LLVM-GCC is qualified when front-end specific tests in the + llvm dejagnu test suite all pass and there are no regressions in + the test-suite.

    +

    We do not use the gcc dejagnu test suite as release criteria.

    +
    -
    -
    -make dist        # Build the distribution source tarball
    -make dist-check  # Check that the source tarball can build itself.
    -cp llvm-M.m.tar.gz /usr/src/redhat/SOURCES  # Required by rpmbuild
    -make srpm # for source rpm
    -make rpm  # for binary rpm
    -
    + + +
    + Clang is qualified when front-end specific tests in the + llvm dejagnu test suite all pass, clang's own test suite passes + cleanly, and there are no regressions in the test-suite.

    -

    - First, use make dist to simply build the distribution. Any failures - need to be corrected (on the branch). Once make dist can be - successful, do make dist-check. This target will do the same thing as - the 'dist' target but also test that distribution to make sure it can build - itself and runs make check as well. This ensures that needed files - are not missing and that the src tarball can be successfully unpacked, built, - installed, and cleaned. Once you have a reliable tarball, you need to copy it - to the /usr/src/redhat/SOURCES directory which is a requirement of - the rpmbuild tool. The last two make invocations just run rpmbuild to - build either a source (srpm) or binary (rpm) RPM package. -

    + + +
    +

    + + + + + + + + +
    ArchitectureOSllvm-gcc baselineclang baseline + tests
    x86-32Mac OS 10.5last releasenonellvm dejagnu, clang tests, test-suite (including spec)
    x86-32Linuxlast releasenonellvm dejagnu, clang tests, test-suite (including spec)
    x86-32FreeBSDnonenonellvm dejagnu, clang tests, test-suite
    x86-32mingwlast releasenoneQT
    x86-64Mac OS 10.5last releasenonellvm dejagnu, clang tests, test-suite (including spec)
    x86-64Linuxlast releasenonellvm dejagnu, clang tests, test-suite (including spec)
    x86-64FreeBSDnonenonellvm dejagnu, clang tests, test-suite

    - +

    Once all testing has been completed and appropriate bugs filed, the pre-release tar balls may be put on the website and the LLVM community is notified. Ask that all LLVM developers test the release in 2 ways:

      -
    1. Download llvm-X.X, llvm-test-X.X, and the appropriate llvm-gcc4 binary. - Run "make check" and the full llvm-test suite (make TEST=nightly report).
    2. -
    3. Download llvm-X.X, llvm-test-X.X, and the llvm-gcc4 source. Compile - everything. Run "make check" and the full llvm-test suite (make TEST=nightly +
    4. Download llvm-X.X, llvm-test-X.X, and the appropriate llvm-gcc4 + and/or clang binary. Build LLVM. + Run "make check" and the full llvm-test suite (make TEST=nightly report).
    5. +
    6. Download llvm-X.X, llvm-test-X.X, and the llvm-gcc4 and/or clang source. + Compile everything. Run "make check" and the full llvm-test suite (make TEST=nightly report).

    Ask LLVM developers to submit the report and make check results to the list. - Verify that there are no regressions from the previous release. For - unsupported targets, verify that make check at least is clean.

    + Attempt to verify that there are no regressions from the previous release. + The results are not used to qualify a release, but to spot other potential + problems. For unsupported targets, verify that make check at least is + clean.

    -

    The first round of pre-release testing will be the longest. During this time, - all regressions must be fixed before the second pre-release is created (repeat - steps 4-8).

    +

    During the first round of testing time, + all regressions must be fixed before the second pre-release is created.

    -

    If this is the second round of testing, this is only to ensure the bug fixes - previously merged in have not created new major problems. This is not the time - to solve additional and unrelated bugs. If no patches are merged in, the release - is determined to be ready and the release manager may move onto the next step.

    +

    If this is the second round of testing, this is only to ensure the bug + fixes previously merged in have not created new major problems. This is not + the time to solve additional and unrelated bugs. If no patches are merged in, + the release is determined to be ready and the release manager may move onto + the next step. +

    - - +
    -

    Tag the release branch using the following procedure:

    -
    -
    -svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XX \
    -         https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XX
    -svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX \
    -         https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_XX
    -svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
    -         https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XX
    -
    +

    + Below are the rules regarding patching the release branch.

    +

    +

  • Patches applied to the release branch are only applied by the release + manager.
  • +
  • During the first round of testing, patches that fix regressions or that + are small and relatively risk free (verified by the appropriate code owner) + are applied to the branch. Code owners are asked to be very conservative in + approving patches for the branch and we reserve the right to reject any patch + that does not fix a regression as previously defined.
  • +
  • During the remaining rounds of testing, only patches that fix regressions + may be applied.
  • + +

    + + + + +
    +

    + The final stages of the release process involving taging the release branch, + updating documentation that refers to the release, and updating the demo + page.

    +

    FIXME: Add a note if anything needs to be done to the clang website. + Eventually the websites will be merged hopefully.

    + - +

    Review the documentation and ensure that it is up to date. The Release Notes @@ -331,6 +451,24 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \

    + + +
    +

    Tag the release branch using the following procedure:

    +
    +
    +svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XX \
    +         https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XX
    +svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX \
    +         https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_XX
    +svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
    +         https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XX
    +
    +
    +
    + + +
    @@ -341,7 +479,7 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
    - +

    The website must be updated before the release announcement is sent out. Here is @@ -349,7 +487,8 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \

    1. Check out the website module from CVS.
    2. Create a new subdirectory X.X in the releases directory.
    3. -
    4. Commit the llvm, test-suite, llvm-gcc source, +
    5. Commit the llvm, test-suite, llvm-gcc source, + clang source, clang binaries, and llvm-gcc binaries in this new directory.
    6. Copy and commit the llvm/docs and LICENSE.txt files into this new directory. The docs should be built with BUILD_FOR_WEBSITE=1.
    7. @@ -360,231 +499,16 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \ release documentation.
    8. Finally, update the main page (index.html and sidebar) to point to the new release and release announcement. Make sure this all gets - commited back into Subversion.
    9. + committed back into Subversion.
    - +

    Have Chris send out the release announcement when everything is finished.

    - - - - - -
    Overview
    -
    -

    - The first thing you need to understand is that there are multiple make targets - to support this feature. Here's an overview, we'll delve into the details - later. -

    - -
      -
    • distdir - builds the distribution directory from which the - distribution will be packaged
    • -
    • dist - builds each of the distribution tarballs (tar.gz, - tar.bzip2, .zip). These can be built individually as well, with separate - targets.
    • -
    • dist-check - this is identical to dist but includes a - check on the distribution that ensures the tarball can: unpack - successfully, compile correctly, pass 'make check', and pass - 'make clean'.
    • -
    • dist-clean- this just does a normal clean but also cleans up the - stuff generated by the other three dist targets (above).
    • -
    - -

    - Okay, that's the basic functionality. When making a release, we want to ensure - that the tree you build the distribution from passes - dist-check. Beyond fixing the usual bugs, there is generally one - impediment to making the release in this fashion: missing files. The - dist-check process guards against that possibility. It will either - fail and that failure will indicate what's missing, or it will succeed meaning - that it has proved that the tarballs can actually succeed in building LLVM - correctly and that it passes make check. -

    -
    - - - -
    distdir
    -
    -

    - This target builds the distribution directory which is the directory from - which the tarballs are generated. The distribution directory has the same - name as the release, e.g. LLVM-1.7). This target goes through the following - process: -

    - -
      -
    1. First, if there was an old distribution directory (for the current - release), it is removed in its entirety and you see Removing old - LLVM-1.7
    2. -
    3. Second, it issues a make all ENABLE_OPTIMIZED=3D1 to ensure - that the everything in your tree can be built in release mode. Often - times there are discrepancies in building between debug and release - modes so it enforces release mode first. If that fails, the - distdir target fails too. This is preceded by the message - Making 'all' to verify build.
    4. -
    5. Next, it traverses your source tree and copies it to a new directory - that has the name of the release (LLVM-M.m in our current - case). This is the directory that will get tar'd. It contains all the - software that needs to be in the distribution. During the copying - process, it omits generated files, SVN directories, and any other - "cruft" that's in your build tree. This is done to eliminate the - possibility of huge distribution tarballs that include useless or - irrelevant stuff in them. This is the trickiest part of making the - distribution. Done manually you will either include stuff that - shouldn't be in the distribution or exclude stuff that should. This - step is preceded by the message Building Distribution Directory - LLVM-1.7
    6. -
    7. The distribution directory is then traversed and all CVS or - .svn directories are removed. You see: Eliminating CVS/.svn - directories from distribution
    8. -
    9. The recursive dist-hook target is executed. This gives each - directory a chance to modify the distribution in some way (more on this - below).
    10. -
    11. The distribution directory is traversed and the correct file - permissions and modes are set based on the type of file.
    12. -
    - -

    - To control the process of making the distribution directory correctly, each - Makefile can utilize two features: -

    - -
      -
    1. EXTRA_DIST - this make variable specifies which files - it should distribute. By default, all source files are automatically - included for distribution as well as certain well known files - (see DistAlways variable in Makefile.rules for details). Each Makefile - specifies, via the EXTRA_DIST variable, which additional files - need to be distributed. Only those files that are needed to build LLVM - should be added to EXTRA_DIST. EXTRA_DIST contains a - list of file or directory names that should be distributed. For example, - the top level Makefile contains EXTRA_DIST := test llvm.spec - include. This means that in addition to regular things that are - distributed at the top level (CREDITS.txt, LICENSE.txt, etc.) - the distribution should contain the entire test and - include directories as well as the llvm.spec file.
    2. -
    3. dist-hook - this make target can be used to alter the - content of the distribution directory. For example, in the top level - Makefile there is some logic to eliminate files in the include - subtree that are generated by the configure script. These should not be - distributed. Similarly, any dist-hook target found in any - directory can add or remove or modify things just before it gets - packaged. Any transformation is permitted. Generally, not much is - needed.
    4. -
    - -

    - You will see various messages if things go wrong: -

    - -
      -
    1. During the copying process, any files that are missing will be flagged - with: ===== WARNING: Distribution Source 'dir/file' Not Found! - These must be corrected by either adding the file or removing it from - EXTRA_DIST.
    2. -
    3. If you build the distribution with VERBOSE=1, then you might - also see: Skipping non-existent 'dir/file' in certain cases - where it's okay to skip the file.
    4. -
    5. The target can fail if any of the things it does fail. Error messages - should indicate what went wrong.
    6. -
    -
    - - -
    dist
    -
    -

    - This target does exactly what distdir target does, but also includes - assembling the tarballs. There are actually four related targets here: -

    - -
      -
    • dist-gzip: package the gzipped distribution tar - file. The distribution directory is packaged into a single file ending - in .tar.gz which is gzip compressed.
    • -
    • dist-bzip2: package the bzip2 distribution tar file. - The distribution directory is packaged into a single file ending in - .tar.bzip2 which is bzip2 compressed.
    • -
    • dist-zip: package the zip distribution file. The - distribution directory is packaged into a single file ending in - .zip which is zip compressed.
    • -
    • dist: does all three, dist-gzip, dist-bzip2, - dist-zip
    • -
    -
    - - -
    dist-check
    -
    -

    - This target checks the distribution. The basic idea is that it unpacks the - distribution tarball and ensures that it can build. It takes the following - actions: -

    - -
      -
    1. It depends on the dist-gzip target which, if it hasn't already - been built, builds the gzip tar bundle (see dist and distdir - above).
    2. -
    3. removes any pre-existing _distcheckdir at the top level.
    4. -
    5. creates a new _distcheckdir directory at the top level.
    6. -
    7. creates a build subdirectory and an install - subdirectory under _distcheckdir.
    8. -
    9. unzips and untars the release tarball into _distcheckdir, - creating LLVM-1.7 directory (from the tarball).
    10. -
    11. in the build subdirectory, it configures with appropriate options to - build from the unpacked source tarball into the build directory - with installation in the install directory.
    12. -
    13. runs make all
    14. -
    15. runs make check
    16. -
    17. runs make install
    18. -
    19. runs make uninstall
    20. -
    21. runs make dist
    22. -
    23. runs make clean
    24. -
    25. runs make dist-clean
    26. -
    - -

    - If it can pass all that, the distribution will be deemed distribution worth y - and you will see: -

    - -
    ===== LLVM-1.7.tar.gz Ready For Distribution =====
    - -

    - This means the tarball should then be tested on other platforms and have the - nightly test run against it. If those all pass, THEN it is ready for - distribution. -

    - -

    - A note about disk space: using dist-check will easily triple the - amount of disk space your build tree is using. You might want to check - available space before you begin. -

    -
    - - -
    dist-clean
    -
    -

    - In addition to doing a normal clean, this target will clean up the - files and directories created by the distribution targets. In particular the - distribution directory (LLVM-X.X), check directory - (_distcheckdir), and the various tarballs will be removed. You do - this after the release has shipped and you no longer need this stuff in your - build tree. -

    -
    -
    @@ -594,7 +518,7 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \ src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> The LLVM Compiler Infrastructure
    - Last modified: $Date: 2008-12-11 19:23:24 +0100 (Thu, 11 Dec 2008) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $
    diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html index 2e6cc4f9da73d..2ac4575396288 100644 --- a/docs/HowToSubmitABug.html +++ b/docs/HowToSubmitABug.html @@ -60,7 +60,7 @@ more easily.

    Once you have a reduced test-case, go to the LLVM Bug Tracking System and fill out the form with the necessary details (note that you don't -need to pick a catagory, just use the "new-bugs" catagory if you're not sure). +need to pick a category, just use the "new-bugs" category if you're not sure). The bug description should contain the following information:

    @@ -183,12 +183,12 @@ to llvm-gcc (in addition to the options you already pass). Once your have foo.bc, one of the following commands should fail:

      -
    1. llc foo.bc -f
    2. -
    3. llc foo.bc -f -relocation-model=pic
    4. -
    5. llc foo.bc -f -relocation-model=static
    6. -
    7. llc foo.bc -f -enable-eh
    8. -
    9. llc foo.bc -f -relocation-model=pic -enable-eh
    10. -
    11. llc foo.bc -f -relocation-model=static -enable-eh
    12. +
    13. llc foo.bc
    14. +
    15. llc foo.bc -relocation-model=pic
    16. +
    17. llc foo.bc -relocation-model=static
    18. +
    19. llc foo.bc -enable-eh
    20. +
    21. llc foo.bc -relocation-model=pic -enable-eh
    22. +
    23. llc foo.bc -relocation-model=static -enable-eh

    If none of these crash, please follow the instructions for a @@ -320,7 +320,7 @@ the following:

    -llc test.bc -o test.s -f
    +llc test.bc -o test.s
    gcc test.s safe.so -o test.llc
    ./test.llc [program options]

    @@ -348,7 +348,7 @@ the following:

    Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-04-05 02:41:19 +0200 (Sun, 05 Apr 2009) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/LangRef.html b/docs/LangRef.html index f229150ea300c..21e41d5fa6b6e 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -20,7 +20,24 @@
  • High Level Structure
    1. Module Structure
    2. -
    3. Linkage Types
    4. +
    5. Linkage Types +
        +
      1. 'private' Linkage
      2. +
      3. 'linker_private' Linkage
      4. +
      5. 'internal' Linkage
      6. +
      7. 'available_externally' Linkage
      8. +
      9. 'linkonce' Linkage
      10. +
      11. 'common' Linkage
      12. +
      13. 'weak' Linkage
      14. +
      15. 'appending' Linkage
      16. +
      17. 'extern_weak' Linkage
      18. +
      19. 'linkonce_odr' Linkage
      20. +
      21. 'weak_odr' Linkage
      22. +
      23. 'externally visible' Linkage
      24. +
      25. 'dllimport' Linkage
      26. +
      27. 'dllexport' Linkage
      28. +
      +
    6. Calling Conventions
    7. Named Types
    8. Global Variables
    9. @@ -31,6 +48,7 @@
    10. Garbage Collector Names
    11. Module-Level Inline Assembly
    12. Data Layout
    13. +
    14. Pointer Aliasing Rules
  • Type System @@ -38,6 +56,7 @@
  • Type Classifications
  • Primitive Types
      +
    1. Integer Type
    2. Floating Point Types
    3. Void Type
    4. Label Type
    5. @@ -46,7 +65,6 @@
    6. Derived Types
        -
      1. Integer Type
      2. Array Type
      3. Function Type
      4. Pointer Type
      5. @@ -74,6 +92,17 @@
      6. Inline Assembler Expressions
    7. +
    8. Intrinsic Global Variables +
        +
      1. The 'llvm.used' Global Variable
      2. +
      3. The 'llvm.compiler.used' + Global Variable
      4. +
      5. The 'llvm.global_ctors' + Global Variable
      6. +
      7. The 'llvm.global_dtors' + Global Variable
      8. +
      +
    9. Instruction Reference
      1. Terminator Instructions @@ -155,8 +184,6 @@
        1. 'icmp' Instruction
        2. 'fcmp' Instruction
        3. -
        4. 'vicmp' Instruction
        5. -
        6. 'vfcmp' Instruction
        7. 'phi' Instruction
        8. 'select' Instruction
        9. 'call' Instruction
        10. @@ -210,8 +237,6 @@
        11. 'llvm.ctpop.*' Intrinsic
        12. 'llvm.ctlz.*' Intrinsic
        13. 'llvm.cttz.*' Intrinsic
        14. -
        15. 'llvm.part.select.*' Intrinsic
        16. -
        17. 'llvm.part.set.*' Intrinsic
      2. Arithmetic with Overflow Intrinsics @@ -248,6 +273,14 @@
      3. llvm.atomic.load.umin
    10. +
    11. Memory Use Markers +
        +
      1. llvm.lifetime.start
      2. +
      3. llvm.lifetime.end
      4. +
      5. llvm.invariant.start
      6. +
      7. llvm.invariant.end
      8. +
      +
    12. General intrinsics
      1. @@ -274,12 +307,13 @@
        -

        This document is a reference manual for the LLVM assembly language. -LLVM is a Static Single Assignment (SSA) based representation that provides -type safety, low-level operations, flexibility, and the capability of -representing 'all' high-level languages cleanly. It is the common code -representation used throughout all phases of the LLVM compilation -strategy.

        + +

        This document is a reference manual for the LLVM assembly language. LLVM is + a Static Single Assignment (SSA) based representation that provides type + safety, low-level operations, flexibility, and the capability of representing + 'all' high-level languages cleanly. It is the common code representation + used throughout all phases of the LLVM compilation strategy.

        +
        @@ -288,26 +322,24 @@ strategy.

        -

        The LLVM code representation is designed to be used in three -different forms: as an in-memory compiler IR, as an on-disk bitcode -representation (suitable for fast loading by a Just-In-Time compiler), -and as a human readable assembly language representation. This allows -LLVM to provide a powerful intermediate representation for efficient -compiler transformations and analysis, while providing a natural means -to debug and visualize the transformations. The three different forms -of LLVM are all equivalent. This document describes the human readable -representation and notation.

        +

        The LLVM code representation is designed to be used in three different forms: + as an in-memory compiler IR, as an on-disk bitcode representation (suitable + for fast loading by a Just-In-Time compiler), and as a human readable + assembly language representation. This allows LLVM to provide a powerful + intermediate representation for efficient compiler transformations and + analysis, while providing a natural means to debug and visualize the + transformations. The three different forms of LLVM are all equivalent. This + document describes the human readable representation and notation.

        -

        The LLVM representation aims to be light-weight and low-level -while being expressive, typed, and extensible at the same time. It -aims to be a "universal IR" of sorts, by being at a low enough level -that high-level ideas may be cleanly mapped to it (similar to how -microprocessors are "universal IR's", allowing many source languages to -be mapped to them). By providing type information, LLVM can be used as -the target of optimizations: for example, through pointer analysis, it -can be proven that a C automatic variable is never accessed outside of -the current function... allowing it to be promoted to a simple SSA -value instead of a memory location.

        +

        The LLVM representation aims to be light-weight and low-level while being + expressive, typed, and extensible at the same time. It aims to be a + "universal IR" of sorts, by being at a low enough level that high-level ideas + may be cleanly mapped to it (similar to how microprocessors are "universal + IR's", allowing many source languages to be mapped to them). By providing + type information, LLVM can be used as the target of optimizations: for + example, through pointer analysis, it can be proven that a C automatic + variable is never accessed outside of the current function... allowing it to + be promoted to a simple SSA value instead of a memory location.

        @@ -316,10 +348,10 @@ value instead of a memory location.

        -

        It is important to note that this document describes 'well formed' -LLVM assembly language. There is a difference between what the parser -accepts and what is considered 'well formed'. For example, the -following instruction is syntactically okay, but not well formed:

        +

        It is important to note that this document describes 'well formed' LLVM + assembly language. There is a difference between what the parser accepts and + what is considered 'well formed'. For example, the following instruction is + syntactically okay, but not well formed:

        @@ -327,13 +359,13 @@ following instruction is syntactically okay, but not well formed:

        -

        ...because the definition of %x does not dominate all of -its uses. The LLVM infrastructure provides a verification pass that may -be used to verify that an LLVM module is well formed. This pass is -automatically run by the parser after parsing input assembly and by -the optimizer before it outputs bitcode. The violations pointed out -by the verifier pass indicate bugs in transformation passes or input to -the parser.

        +

        ...because the definition of %x does not dominate all of its + uses. The LLVM infrastructure provides a verification pass that may be used + to verify that an LLVM module is well formed. This pass is automatically run + by the parser after parsing input assembly and by the optimizer before it + outputs bitcode. The violations pointed out by the verifier pass indicate + bugs in transformation passes or input to the parser.

        +
        @@ -344,44 +376,47 @@ the parser.

        -

        LLVM identifiers come in two basic types: global and local. Global - identifiers (functions, global variables) begin with the @ character. Local - identifiers (register names, types) begin with the % character. Additionally, - there are three different formats for identifiers, for different purposes:

        +

        LLVM identifiers come in two basic types: global and local. Global + identifiers (functions, global variables) begin with the '@' + character. Local identifiers (register names, types) begin with + the '%' character. Additionally, there are three different formats + for identifiers, for different purposes:

        1. Named values are represented as a string of characters with their prefix. - For example, %foo, @DivisionByZero, %a.really.long.identifier. The actual - regular expression used is '[%@][a-zA-Z$._][a-zA-Z$._0-9]*'. - Identifiers which require other characters in their names can be surrounded - with quotes. Special characters may be escaped using "\xx" where xx is the - ASCII code for the character in hexadecimal. In this way, any character can - be used in a name value, even quotes themselves. + For example, %foo, @DivisionByZero, + %a.really.long.identifier. The actual regular expression used is + '[%@][a-zA-Z$._][a-zA-Z$._0-9]*'. Identifiers which require + other characters in their names can be surrounded with quotes. Special + characters may be escaped using "\xx" where xx is the + ASCII code for the character in hexadecimal. In this way, any character + can be used in a name value, even quotes themselves.
        2. Unnamed values are represented as an unsigned numeric value with their - prefix. For example, %12, @2, %44.
        3. + prefix. For example, %12, @2, %44.
        4. Constants, which are described in a section about - constants, below.
        5. + constants, below.

        LLVM requires that values start with a prefix for two reasons: Compilers -don't need to worry about name clashes with reserved words, and the set of -reserved words may be expanded in the future without penalty. Additionally, -unnamed identifiers allow a compiler to quickly come up with a temporary -variable without having to avoid symbol table conflicts.

        + don't need to worry about name clashes with reserved words, and the set of + reserved words may be expanded in the future without penalty. Additionally, + unnamed identifiers allow a compiler to quickly come up with a temporary + variable without having to avoid symbol table conflicts.

        Reserved words in LLVM are very similar to reserved words in other -languages. There are keywords for different opcodes -('add', - 'bitcast', - 'ret', etc...), for primitive type names ('void', 'i32', etc...), -and others. These reserved words cannot conflict with variable names, because -none of them start with a prefix character ('%' or '@').

        + languages. There are keywords for different opcodes + ('add', + 'bitcast', + 'ret', etc...), for primitive type names + ('void', + 'i32', etc...), and others. These + reserved words cannot conflict with variable names, because none of them + start with a prefix character ('%' or '@').

        Here is an example of LLVM code to multiply the integer variable -'%X' by 8:

        + '%X' by 8:

        The easy way:

        @@ -409,25 +444,23 @@ none of them start with a prefix character ('%' or '@').

        -

        This last way of multiplying %X by 8 illustrates several -important lexical features of LLVM:

        +

        This last way of multiplying %X by 8 illustrates several important + lexical features of LLVM:

          -
        1. Comments are delimited with a ';' and go until the end of - line.
        2. + line.
        3. Unnamed temporaries are created when the result of a computation is not - assigned to a named value.
        4. + assigned to a named value.
        5. Unnamed temporaries are numbered sequentially
        6. -

        ...and it also shows a convention that we follow in this document. When -demonstrating instructions, we will follow an instruction with a comment that -defines the type and name of value produced. Comments are shown in italic -text.

        + demonstrating instructions, we will follow an instruction with a comment that + defines the type and name of value produced. Comments are shown in italic + text.

  • @@ -441,12 +474,12 @@ text.

    -

    LLVM programs are composed of "Module"s, each of which is a -translation unit of the input programs. Each module consists of -functions, global variables, and symbol table entries. Modules may be -combined together with the LLVM linker, which merges function (and -global variable) definitions, resolves forward declarations, and merges -symbol table entries. Here is an example of the "hello world" module:

    +

    LLVM programs are composed of "Module"s, each of which is a translation unit + of the input programs. Each module consists of functions, global variables, + and symbol table entries. Modules may be combined together with the LLVM + linker, which merges function (and global variable) definitions, resolves + forward declarations, and merges symbol table entries. Here is an example of + the "hello world" module:

    ; Declare the string constant as a global constant...
    @@ -454,32 +487,32 @@ symbol table entries. Here is an example of the "hello world" module:

    href="#globalvars">constant [13 x i8] c"hello world\0A\00" ; [13 x i8]* ; External declaration of the puts function -declare i32 @puts(i8 *) ; i32(i8 *)* +declare i32 @puts(i8 *) ; i32(i8 *)* ; Definition of main function -define i32 @main() { ; i32()* +define i32 @main() { ; i32()* ; Convert [13 x i8]* to i8 *... %cast210 = getelementptr [13 x i8]* @.LC0, i64 0, i64 0 ; i8 * + href="#i_getelementptr">getelementptr [13 x i8]* @.LC0, i64 0, i64 0 ; i8 * ; Call puts function to write out the string to stdout... call i32 @puts(i8 * %cast210) ; i32 + href="#i_call">call i32 @puts(i8 * %cast210) ; i32 ret i32 0
    }
    -

    This example is made up of a global variable -named ".LC0", an external declaration of the "puts" -function, and a function definition -for "main".

    +

    This example is made up of a global variable named + ".LC0", an external declaration of the "puts" function, and + a function definition for + "main".

    -

    In general, a module is made up of a list of global values, -where both functions and global variables are global values. Global values are -represented by a pointer to a memory location (in this case, a pointer to an -array of char, and a pointer to a function), and have one of the following linkage types.

    +

    In general, a module is made up of a list of global values, where both + functions and global variables are global values. Global values are + represented by a pointer to a memory location (in this case, a pointer to an + array of char, and a pointer to a function), and have one of the + following linkage types.

    @@ -490,139 +523,126 @@ href="#linkage">linkage types.

    -

    -All Global Variables and Functions have one of the following types of linkage: -

    +

    All Global Variables and Functions have one of the following types of + linkage:

    -
    private:
    - -
    Global values with private linkage are only directly accessible by - objects in the current module. In particular, linking code into a module with - an private global value may cause the private to be renamed as necessary to - avoid collisions. Because the symbol is private to the module, all - references can be updated. This doesn't show up in any symbol table in the - object file. -
    +
    Global values with private linkage are only directly accessible by objects + in the current module. In particular, linking code into a module with an + private global value may cause the private to be renamed as necessary to + avoid collisions. Because the symbol is private to the module, all + references can be updated. This doesn't show up in any symbol table in the + object file.
    + +
    linker_private:
    +
    Similar to private, but the symbol is passed through the assembler and + removed by the linker after evaluation. Note that (unlike private + symbols) linker_private symbols are subject to coalescing by the linker: + weak symbols get merged and redefinitions are rejected. However, unlike + normal strong symbols, they are removed by the linker from the final + linked image (executable or dynamic library).
    internal:
    +
    Similar to private, but the value shows as a local symbol + (STB_LOCAL in the case of ELF) in the object file. This + corresponds to the notion of the 'static' keyword in C.
    -
    Similar to private, but the value shows as a local symbol (STB_LOCAL in - the case of ELF) in the object file. This corresponds to the notion of the - 'static' keyword in C. -
    - -
    available_externally: -
    - +
    available_externally:
    Globals with "available_externally" linkage are never emitted - into the object file corresponding to the LLVM module. They exist to - allow inlining and other optimizations to take place given knowledge of the - definition of the global, which is known to be somewhere outside the module. - Globals with available_externally linkage are allowed to be discarded - at will, and are otherwise the same as linkonce_odr. This linkage - type is only allowed on definitions, not declarations.
    + into the object file corresponding to the LLVM module. They exist to + allow inlining and other optimizations to take place given knowledge of + the definition of the global, which is known to be somewhere outside the + module. Globals with available_externally linkage are allowed to + be discarded at will, and are otherwise the same as linkonce_odr. + This linkage type is only allowed on definitions, not declarations.
    linkonce:
    -
    Globals with "linkonce" linkage are merged with other globals of - the same name when linkage occurs. This is typically used to implement - inline functions, templates, or other code which must be generated in each - translation unit that uses it. Unreferenced linkonce globals are - allowed to be discarded. -
    - -
    common:
    - -
    "common" linkage is exactly the same as linkonce - linkage, except that unreferenced common globals may not be - discarded. This is used for globals that may be emitted in multiple - translation units, but that are not guaranteed to be emitted into every - translation unit that uses them. One example of this is tentative - definitions in C, such as "int X;" at global scope. -
    + the same name when linkage occurs. This is typically used to implement + inline functions, templates, or other code which must be generated in each + translation unit that uses it. Unreferenced linkonce globals are + allowed to be discarded.
    weak:
    +
    "weak" linkage has the same merging semantics as + linkonce linkage, except that unreferenced globals with + weak linkage may not be discarded. This is used for globals that + are declared "weak" in C source code.
    + +
    common:
    +
    "common" linkage is most similar to "weak" linkage, but + they are used for tentative definitions in C, such as "int X;" at + global scope. + Symbols with "common" linkage are merged in the same way as + weak symbols, and they may not be deleted if unreferenced. + common symbols may not have an explicit section, + must have a zero initializer, and may not be marked 'constant'. Functions and aliases may not + have common linkage.
    -
    "weak" linkage is the same as common linkage, except - that some targets may choose to emit different assembly sequences for them - for target-dependent reasons. This is used for globals that are declared - "weak" in C source code. -
    appending:
    -
    "appending" linkage may only be applied to global variables of - pointer to array type. When two global variables with appending linkage are - linked together, the two global arrays are appended together. This is the - LLVM, typesafe, equivalent of having the system linker append together - "sections" with identical names when .o files are linked. -
    + pointer to array type. When two global variables with appending linkage + are linked together, the two global arrays are appended together. This is + the LLVM, typesafe, equivalent of having the system linker append together + "sections" with identical names when .o files are linked.
    extern_weak:
    - -
    The semantics of this linkage follow the ELF object file model: the - symbol is weak until linked, if not linked, the symbol becomes null instead - of being an undefined reference. -
    - -
    linkonce_odr:
    -
    weak_odr:
    -
    Some languages allow differing globals to be merged, such as two - functions with different semantics. Other languages, such as C++, - ensure that only equivalent globals are ever merged (the "one definition - rule" - "ODR"). Such languages can use the linkonce_odr - and weak_odr linkage types to indicate that the global will only - be merged with equivalent globals. These linkage types are otherwise the - same as their non-odr versions. -
    +
    The semantics of this linkage follow the ELF object file model: the symbol + is weak until linked, if not linked, the symbol becomes null instead of + being an undefined reference.
    + +
    linkonce_odr:
    +
    weak_odr:
    +
    Some languages allow differing globals to be merged, such as two functions + with different semantics. Other languages, such as C++, ensure + that only equivalent globals are ever merged (the "one definition rule" - + "ODR"). Such languages can use the linkonce_odr + and weak_odr linkage types to indicate that the global will only + be merged with equivalent globals. These linkage types are otherwise the + same as their non-odr versions.
    externally visible:
    -
    If none of the above identifiers are used, the global is externally - visible, meaning that it participates in linkage and can be used to resolve - external symbol references. -
    + visible, meaning that it participates in linkage and can be used to + resolve external symbol references.
    -

    - The next two types of linkage are targeted for Microsoft Windows platform - only. They are designed to support importing (exporting) symbols from (to) - DLLs (Dynamic Link Libraries). -

    +

    The next two types of linkage are targeted for Microsoft Windows platform + only. They are designed to support importing (exporting) symbols from (to) + DLLs (Dynamic Link Libraries).

    -
    +
    dllimport:
    -
    "dllimport" linkage causes the compiler to reference a function - or variable via a global pointer to a pointer that is set up by the DLL - exporting the symbol. On Microsoft Windows targets, the pointer name is - formed by combining __imp_ and the function or variable name. -
    + or variable via a global pointer to a pointer that is set up by the DLL + exporting the symbol. On Microsoft Windows targets, the pointer name is + formed by combining __imp_ and the function or variable + name.
    dllexport:
    -
    "dllexport" linkage causes the compiler to provide a global - pointer to a pointer in a DLL, so that it can be referenced with the - dllimport attribute. On Microsoft Windows targets, the pointer - name is formed by combining __imp_ and the function or variable - name. -
    - + pointer to a pointer in a DLL, so that it can be referenced with the + dllimport attribute. On Microsoft Windows targets, the pointer + name is formed by combining __imp_ and the function or + variable name.
    -

    For example, since the ".LC0" -variable is defined to be internal, if another module defined a ".LC0" -variable and was linked with this one, one of the two would be renamed, -preventing a collision. Since "main" and "puts" are -external (i.e., lacking any linkage declarations), they are accessible -outside of the current module.

    -

    It is illegal for a function declaration -to have any linkage type other than "externally visible", dllimport -or extern_weak.

    +

    For example, since the ".LC0" variable is defined to be internal, if + another module defined a ".LC0" variable and was linked with this + one, one of the two would be renamed, preventing a collision. Since + "main" and "puts" are external (i.e., lacking any linkage + declarations), they are accessible outside of the current module.

    + +

    It is illegal for a function declaration to have any linkage type + other than "externally visible", dllimport + or extern_weak.

    +

    Aliases can have only external, internal, weak -or weak_odr linkages.

    + or weak_odr linkages.

    +
    @@ -633,55 +653,48 @@ or weak_odr linkages.

    LLVM functions, calls -and invokes can all have an optional calling convention -specified for the call. The calling convention of any pair of dynamic -caller/callee must match, or the behavior of the program is undefined. The -following calling conventions are supported by LLVM, and more may be added in -the future:

    + and invokes can all have an optional calling + convention specified for the call. The calling convention of any pair of + dynamic caller/callee must match, or the behavior of the program is + undefined. The following calling conventions are supported by LLVM, and more + may be added in the future:

    "ccc" - The C calling convention:
    -
    This calling convention (the default if no other calling convention is - specified) matches the target C calling conventions. This calling convention - supports varargs function calls and tolerates some mismatch in the declared - prototype and implemented declaration of the function (as does normal C). -
    + specified) matches the target C calling conventions. This calling + convention supports varargs function calls and tolerates some mismatch in + the declared prototype and implemented declaration of the function (as + does normal C).
    "fastcc" - The fast calling convention:
    -
    This calling convention attempts to make calls as fast as possible - (e.g. by passing things in registers). This calling convention allows the - target to use whatever tricks it wants to produce fast code for the target, - without having to conform to an externally specified ABI (Application Binary - Interface). Implementations of this convention should allow arbitrary - tail call optimization to be - supported. This calling convention does not support varargs and requires the - prototype of all callees to exactly match the prototype of the function - definition. -
    + (e.g. by passing things in registers). This calling convention allows the + target to use whatever tricks it wants to produce fast code for the + target, without having to conform to an externally specified ABI + (Application Binary Interface). Implementations of this convention should + allow arbitrary tail call + optimization to be supported. This calling convention does not + support varargs and requires the prototype of all callees to exactly match + the prototype of the function definition.
    "coldcc" - The cold calling convention:
    -
    This calling convention attempts to make code in the caller as efficient - as possible under the assumption that the call is not commonly executed. As - such, these calls often preserve all registers so that the call does not break - any live ranges in the caller side. This calling convention does not support - varargs and requires the prototype of all callees to exactly match the - prototype of the function definition. -
    + as possible under the assumption that the call is not commonly executed. + As such, these calls often preserve all registers so that the call does + not break any live ranges in the caller side. This calling convention + does not support varargs and requires the prototype of all callees to + exactly match the prototype of the function definition.
    "cc <n>" - Numbered convention:
    -
    Any calling convention may be specified by number, allowing - target-specific calling conventions to be used. Target specific calling - conventions start at 64. -
    + target-specific calling conventions to be used. Target specific calling + conventions start at 64.

    More calling conventions can be added/defined on an as-needed basis, to -support pascal conventions or any other well-known target-independent -convention.

    + support Pascal conventions or any other well-known target-independent + convention.

    @@ -692,37 +705,29 @@ convention.

    -

    -All Global Variables and Functions have one of the following visibility styles: -

    +

    All Global Variables and Functions have one of the following visibility + styles:

    "default" - Default style:
    -
    On targets that use the ELF object file format, default visibility means - that the declaration is visible to other - modules and, in shared libraries, means that the declared entity may be - overridden. On Darwin, default visibility means that the declaration is - visible to other modules. Default visibility corresponds to "external - linkage" in the language. -
    + that the declaration is visible to other modules and, in shared libraries, + means that the declared entity may be overridden. On Darwin, default + visibility means that the declaration is visible to other modules. Default + visibility corresponds to "external linkage" in the language.
    "hidden" - Hidden style:
    -
    Two declarations of an object with hidden visibility refer to the same - object if they are in the same shared object. Usually, hidden visibility - indicates that the symbol will not be placed into the dynamic symbol table, - so no other module (executable or shared library) can reference it - directly. -
    + object if they are in the same shared object. Usually, hidden visibility + indicates that the symbol will not be placed into the dynamic symbol + table, so no other module (executable or shared library) can reference it + directly.
    "protected" - Protected style:
    -
    On ELF, protected visibility indicates that the symbol will be placed in - the dynamic symbol table, but that references within the defining module will - bind to the local symbol. That is, the symbol cannot be overridden by another - module. -
    + the dynamic symbol table, but that references within the defining module + will bind to the local symbol. That is, the symbol cannot be overridden by + another module.
    @@ -735,9 +740,8 @@ All Global Variables and Functions have one of the following visibility styles:

    LLVM IR allows you to specify name aliases for certain types. This can make -it easier to read the IR and make the IR more condensed (particularly when -recursive types are involved). An example of a name specification is: -

    + it easier to read the IR and make the IR more condensed (particularly when + recursive types are involved). An example of a name specification is:

    @@ -745,19 +749,19 @@ recursive types are involved).  An example of a name specification is:
     
    -

    You may give a name to any type except "void". Type name aliases may be used anywhere a type is -expected with the syntax "%mytype".

    +

    You may give a name to any type except + "void". Type name aliases may be used anywhere a type + is expected with the syntax "%mytype".

    Note that type names are aliases for the structural type that they indicate, -and that you can therefore specify multiple names for the same type. This often -leads to confusing behavior when dumping out a .ll file. Since LLVM IR uses -structural typing, the name is not part of the type. When printing out LLVM IR, -the printer will pick one name to render all types of a particular -shape. This means that if you have code where two different source types end up -having the same LLVM type, that the dumper will sometimes print the "wrong" or -unexpected type. This is an important design point and isn't going to -change.

    + and that you can therefore specify multiple names for the same type. This + often leads to confusing behavior when dumping out a .ll file. Since LLVM IR + uses structural typing, the name is not part of the type. When printing out + LLVM IR, the printer will pick one name to render all types of a + particular shape. This means that if you have code where two different + source types end up having the same LLVM type, that the dumper will sometimes + print the "wrong" or unexpected type. This is an important design point and + isn't going to change.

    @@ -769,48 +773,47 @@ change.

    Global variables define regions of memory allocated at compilation time -instead of run-time. Global variables may optionally be initialized, may have -an explicit section to be placed in, and may have an optional explicit alignment -specified. A variable may be defined as "thread_local", which means that it -will not be shared by threads (each thread will have a separated copy of the -variable). A variable may be defined as a global "constant," which indicates -that the contents of the variable will never be modified (enabling better -optimization, allowing the global data to be placed in the read-only section of -an executable, etc). Note that variables that need runtime initialization -cannot be marked "constant" as there is a store to the variable.

    - -

    -LLVM explicitly allows declarations of global variables to be marked -constant, even if the final definition of the global is not. This capability -can be used to enable slightly better optimization of the program, but requires -the language definition to guarantee that optimizations based on the -'constantness' are valid for the translation units that do not include the -definition. -

    - -

    As SSA values, global variables define pointer values that are in -scope (i.e. they dominate) all basic blocks in the program. Global -variables always define a pointer to their "content" type because they -describe a region of memory, and all memory objects in LLVM are -accessed through pointers.

    - -

    A global variable may be declared to reside in a target-specifc numbered -address space. For targets that support them, address spaces may affect how -optimizations are performed and/or what target instructions are used to access -the variable. The default address space is zero. The address space qualifier -must precede any other attributes.

    + instead of run-time. Global variables may optionally be initialized, may + have an explicit section to be placed in, and may have an optional explicit + alignment specified. A variable may be defined as "thread_local", which + means that it will not be shared by threads (each thread will have a + separated copy of the variable). A variable may be defined as a global + "constant," which indicates that the contents of the variable + will never be modified (enabling better optimization, allowing the + global data to be placed in the read-only section of an executable, etc). + Note that variables that need runtime initialization cannot be marked + "constant" as there is a store to the variable.

    + +

    LLVM explicitly allows declarations of global variables to be marked + constant, even if the final definition of the global is not. This capability + can be used to enable slightly better optimization of the program, but + requires the language definition to guarantee that optimizations based on the + 'constantness' are valid for the translation units that do not include the + definition.

    + +

    As SSA values, global variables define pointer values that are in scope + (i.e. they dominate) all basic blocks in the program. Global variables + always define a pointer to their "content" type because they describe a + region of memory, and all memory objects in LLVM are accessed through + pointers.

    + +

    A global variable may be declared to reside in a target-specific numbered + address space. For targets that support them, address spaces may affect how + optimizations are performed and/or what target instructions are used to + access the variable. The default address space is zero. The address space + qualifier must precede any other attributes.

    LLVM allows an explicit section to be specified for globals. If the target -supports it, it will emit globals to the section specified.

    + supports it, it will emit globals to the section specified.

    An explicit alignment may be specified for a global. If not present, or if -the alignment is set to zero, the alignment of the global is set by the target -to whatever it feels convenient. If an explicit alignment is specified, the -global is forced to have at least that much alignment. All alignments must be -a power of 2.

    + the alignment is set to zero, the alignment of the global is set by the + target to whatever it feels convenient. If an explicit alignment is + specified, the global is forced to have at least that much alignment. All + alignments must be a power of 2.

    -

    For example, the following defines a global in a numbered address space with -an initializer, section, and alignment:

    +

    For example, the following defines a global in a numbered address space with + an initializer, section, and alignment:

    @@ -828,74 +831,72 @@ an initializer, section, and alignment:

    -

    LLVM function definitions consist of the "define" keyord, -an optional linkage type, an optional -visibility style, an optional -calling convention, a return type, an optional -parameter attribute for the return type, a function -name, a (possibly empty) argument list (each with optional -parameter attributes), optional -function attributes, an optional section, -an optional alignment, an optional garbage collector name, -an opening curly brace, a list of basic blocks, and a closing curly brace. +

    LLVM function definitions consist of the "define" keyord, an + optional linkage type, an optional + visibility style, an optional + calling convention, a return type, an optional + parameter attribute for the return type, a function + name, a (possibly empty) argument list (each with optional + parameter attributes), optional + function attributes, an optional section, an optional + alignment, an optional garbage collector name, an opening + curly brace, a list of basic blocks, and a closing curly brace.

    -LLVM function declarations consist of the "declare" keyword, an -optional linkage type, an optional -visibility style, an optional -calling convention, a return type, an optional -parameter attribute for the return type, a function -name, a possibly empty list of arguments, an optional alignment, and an optional -garbage collector name.

    +

    LLVM function declarations consist of the "declare" keyword, an + optional linkage type, an optional + visibility style, an optional + calling convention, a return type, an optional + parameter attribute for the return type, a function + name, a possibly empty list of arguments, an optional alignment, and an + optional garbage collector name.

    A function definition contains a list of basic blocks, forming the CFG -(Control Flow Graph) for -the function. Each basic block may optionally start with a label (giving the -basic block a symbol table entry), contains a list of instructions, and ends -with a terminator instruction (such as a branch or -function return).

    + (Control Flow Graph) for the function. Each basic block may optionally start + with a label (giving the basic block a symbol table entry), contains a list + of instructions, and ends with a terminator + instruction (such as a branch or function return).

    The first basic block in a function is special in two ways: it is immediately -executed on entrance to the function, and it is not allowed to have predecessor -basic blocks (i.e. there can not be any branches to the entry block of a -function). Because the block can have no predecessors, it also cannot have any -PHI nodes.

    + executed on entrance to the function, and it is not allowed to have + predecessor basic blocks (i.e. there can not be any branches to the entry + block of a function). Because the block can have no predecessors, it also + cannot have any PHI nodes.

    LLVM allows an explicit section to be specified for functions. If the target -supports it, it will emit functions to the section specified.

    + supports it, it will emit functions to the section specified.

    An explicit alignment may be specified for a function. If not present, or if -the alignment is set to zero, the alignment of the function is set by the target -to whatever it feels convenient. If an explicit alignment is specified, the -function is forced to have at least that much alignment. All alignments must be -a power of 2.

    - -
    Syntax:
    + the alignment is set to zero, the alignment of the function is set by the + target to whatever it feels convenient. If an explicit alignment is + specified, the function is forced to have at least that much alignment. All + alignments must be a power of 2.

    +
    Syntax:
    - +
     define [linkage] [visibility]
    -      [cconv] [ret attrs]
    -      <ResultType> @<FunctionName> ([argument list])
    -      [fn Attrs] [section "name"] [align N]
    -      [gc] { ... }
    -
    +       [cconv] [ret attrs]
    +       <ResultType> @<FunctionName> ([argument list])
    +       [fn Attrs] [section "name"] [align N]
    +       [gc] { ... }
    +
    - +
    -

    Aliases act as "second name" for the aliasee value (which can be either - function, global variable, another alias or bitcast of global value). Aliases - may have an optional linkage type, and an - optional visibility style.

    -
    Syntax:
    +

    Aliases act as "second name" for the aliasee value (which can be either + function, global variable, another alias or bitcast of global value). Aliases + may have an optional linkage type, and an + optional visibility style.

    +
    Syntax:
     @<Name> = alias [Linkage] [Visibility] <AliaseeTy> @<Aliasee>
    @@ -904,21 +905,21 @@ define [linkage] [visibility]
     
     
    - - +
    -

    The return type and each parameter of a function type may have a set of - parameter attributes associated with them. Parameter attributes are - used to communicate additional information about the result or parameters of - a function. Parameter attributes are considered to be part of the function, - not of the function type, so functions with different parameter attributes - can have the same function type.

    -

    Parameter attributes are simple keywords that follow the type specified. If - multiple parameter attributes are needed, they are space separated. For - example:

    +

    The return type and each parameter of a function type may have a set of + parameter attributes associated with them. Parameter attributes are + used to communicate additional information about the result or parameters of + a function. Parameter attributes are considered to be part of the function, + not of the function type, so functions with different parameter attributes + can have the same function type.

    + +

    Parameter attributes are simple keywords that follow the type specified. If + multiple parameter attributes are needed, they are space separated. For + example:

    @@ -928,71 +929,72 @@ declare signext i8 @returns_signed_char()
     
    -

    Note that any attributes for the function result (nounwind, - readonly) come immediately after the argument list.

    - -

    Currently, only the following parameter attributes are defined:

    -
    -
    zeroext
    -
    This indicates to the code generator that the parameter or return value - should be zero-extended to a 32-bit value by the caller (for a parameter) - or the callee (for a return value).
    - -
    signext
    -
    This indicates to the code generator that the parameter or return value - should be sign-extended to a 32-bit value by the caller (for a parameter) - or the callee (for a return value).
    - -
    inreg
    -
    This indicates that this parameter or return value should be treated - in a special target-dependent fashion during while emitting code for a - function call or return (usually, by putting it in a register as opposed - to memory, though some targets use it to distinguish between two different - kinds of registers). Use of this attribute is target-specific.
    - -
    byval
    -
    This indicates that the pointer parameter should really be passed by - value to the function. The attribute implies that a hidden copy of the - pointee is made between the caller and the callee, so the callee is unable - to modify the value in the callee. This attribute is only valid on LLVM - pointer arguments. It is generally used to pass structs and arrays by - value, but is also valid on pointers to scalars. The copy is considered to - belong to the caller not the callee (for example, - readonly functions should not write to - byval parameters). This is not a valid attribute for return - values. The byval attribute also supports specifying an alignment with the - align attribute. This has a target-specific effect on the code generator - that usually indicates a desired alignment for the synthesized stack - slot.
    - -
    sret
    -
    This indicates that the pointer parameter specifies the address of a - structure that is the return value of the function in the source program. - This pointer must be guaranteed by the caller to be valid: loads and stores - to the structure may be assumed by the callee to not to trap. This may only - be applied to the first parameter. This is not a valid attribute for - return values.
    - -
    noalias
    -
    This indicates that the pointer does not alias any global or any other - parameter. The caller is responsible for ensuring that this is the - case. On a function return value, noalias additionally indicates - that the pointer does not alias any other pointers visible to the - caller. For further details, please see the discussion of the NoAlias - response in - alias - analysis.
    - -
    nocapture
    -
    This indicates that the callee does not make any copies of the pointer - that outlive the callee itself. This is not a valid attribute for return - values.
    - -
    nest
    -
    This indicates that the pointer parameter can be excised using the - trampoline intrinsics. This is not a valid - attribute for return values.
    -
    +

    Note that any attributes for the function result (nounwind, + readonly) come immediately after the argument list.

    + +

    Currently, only the following parameter attributes are defined:

    + +
    +
    zeroext
    +
    This indicates to the code generator that the parameter or return value + should be zero-extended to a 32-bit value by the caller (for a parameter) + or the callee (for a return value).
    + +
    signext
    +
    This indicates to the code generator that the parameter or return value + should be sign-extended to a 32-bit value by the caller (for a parameter) + or the callee (for a return value).
    + +
    inreg
    +
    This indicates that this parameter or return value should be treated in a + special target-dependent fashion during while emitting code for a function + call or return (usually, by putting it in a register as opposed to memory, + though some targets use it to distinguish between two different kinds of + registers). Use of this attribute is target-specific.
    + +
    byval
    +
    This indicates that the pointer parameter should really be passed by value + to the function. The attribute implies that a hidden copy of the pointee + is made between the caller and the callee, so the callee is unable to + modify the value in the callee. This attribute is only valid on LLVM + pointer arguments. It is generally used to pass structs and arrays by + value, but is also valid on pointers to scalars. The copy is considered + to belong to the caller not the callee (for example, + readonly functions should not write to + byval parameters). This is not a valid attribute for return + values. The byval attribute also supports specifying an alignment with + the align attribute. This has a target-specific effect on the code + generator that usually indicates a desired alignment for the synthesized + stack slot.
    + +
    sret
    +
    This indicates that the pointer parameter specifies the address of a + structure that is the return value of the function in the source program. + This pointer must be guaranteed by the caller to be valid: loads and + stores to the structure may be assumed by the callee to not to trap. This + may only be applied to the first parameter. This is not a valid attribute + for return values.
    + +
    noalias
    +
    This indicates that the pointer does not alias any global or any other + parameter. The caller is responsible for ensuring that this is the + case. On a function return value, noalias additionally indicates + that the pointer does not alias any other pointers visible to the + caller. For further details, please see the discussion of the NoAlias + response in + alias + analysis.
    + +
    nocapture
    +
    This indicates that the callee does not make any copies of the pointer + that outlive the callee itself. This is not a valid attribute for return + values.
    + +
    nest
    +
    This indicates that the pointer parameter can be excised using the + trampoline intrinsics. This is not a valid + attribute for return values.
    +
    @@ -1002,15 +1004,20 @@ declare signext i8 @returns_signed_char()
    +

    Each function may specify a garbage collector name, which is simply a -string.

    + string:

    -
    define void @f() gc "name" { ...
    +
    +
    +define void @f() gc "name" { ...
    +
    +

    The compiler declares the supported values of name. Specifying a -collector which will cause the compiler to alter its output in order to support -the named garbage collection algorithm.

    + collector which will cause the compiler to alter its output in order to + support the named garbage collection algorithm.

    +
    @@ -1020,14 +1027,13 @@ the named garbage collection algorithm.

    -

    Function attributes are set to communicate additional information about - a function. Function attributes are considered to be part of the function, - not of the function type, so functions with different parameter attributes - can have the same function type.

    +

    Function attributes are set to communicate additional information about a + function. Function attributes are considered to be part of the function, not + of the function type, so functions with different parameter attributes can + have the same function type.

    -

    Function attributes are simple keywords that follow the type specified. If - multiple attributes are needed, they are space separated. For - example:

    +

    Function attributes are simple keywords that follow the type specified. If + multiple attributes are needed, they are space separated. For example:

    @@ -1039,80 +1045,89 @@ define void @f() optsize
     
    -
    alwaysinline
    -
    This attribute indicates that the inliner should attempt to inline this -function into callers whenever possible, ignoring any active inlining size -threshold for this caller.
    - -
    noinline
    -
    This attribute indicates that the inliner should never inline this function -in any situation. This attribute may not be used together with the -alwaysinline attribute.
    - -
    optsize
    -
    This attribute suggests that optimization passes and code generator passes -make choices that keep the code size of this function low, and otherwise do -optimizations specifically to reduce code size.
    - -
    noreturn
    -
    This function attribute indicates that the function never returns normally. -This produces undefined behavior at runtime if the function ever does -dynamically return.
    - -
    nounwind
    -
    This function attribute indicates that the function never returns with an -unwind or exceptional control flow. If the function does unwind, its runtime -behavior is undefined.
    - -
    readnone
    -
    This attribute indicates that the function computes its result (or decides to -unwind an exception) based strictly on its arguments, without dereferencing any -pointer arguments or otherwise accessing any mutable state (e.g. memory, control -registers, etc) visible to caller functions. It does not write through any -pointer arguments (including byval arguments) and -never changes any state visible to callers. This means that it cannot unwind -exceptions by calling the C++ exception throwing methods, but could -use the unwind instruction.
    - -
    readonly
    -
    This attribute indicates that the function does not write through any -pointer arguments (including byval arguments) -or otherwise modify any state (e.g. memory, control registers, etc) visible to -caller functions. It may dereference pointer arguments and read state that may -be set in the caller. A readonly function always returns the same value (or -unwinds an exception identically) when called with the same set of arguments -and global state. It cannot unwind an exception by calling the C++ -exception throwing methods, but may use the unwind instruction.
    - -
    ssp
    -
    This attribute indicates that the function should emit a stack smashing -protector. It is in the form of a "canary"—a random value placed on the -stack before the local variables that's checked upon return from the function to -see if it has been overwritten. A heuristic is used to determine if a function -needs stack protectors or not. - -

    If a function that has an ssp attribute is inlined into a function -that doesn't have an ssp attribute, then the resulting function will -have an ssp attribute.
    - -
    sspreq
    -
    This attribute indicates that the function should always emit a -stack smashing protector. This overrides the ssp -function attribute. - -If a function that has an sspreq attribute is inlined into a -function that doesn't have an sspreq attribute or which has -an ssp attribute, then the resulting function will have -an sspreq attribute.
    - -
    noredzone
    -
    This attribute indicates that the code generator should not use a -red zone, even if the target-specific ABI normally permits it. -
    - -
    noimplicitfloat
    -
    This attributes disables implicit floating point instructions.
    - +
    alwaysinline
    +
    This attribute indicates that the inliner should attempt to inline this + function into callers whenever possible, ignoring any active inlining size + threshold for this caller.
    + +
    inlinehint
    +
    This attribute indicates that the source code contained a hint that inlining + this function is desirable (such as the "inline" keyword in C/C++). It + is just a hint; it imposes no requirements on the inliner.
    + +
    noinline
    +
    This attribute indicates that the inliner should never inline this + function in any situation. This attribute may not be used together with + the alwaysinline attribute.
    + +
    optsize
    +
    This attribute suggests that optimization passes and code generator passes + make choices that keep the code size of this function low, and otherwise + do optimizations specifically to reduce code size.
    + +
    noreturn
    +
    This function attribute indicates that the function never returns + normally. This produces undefined behavior at runtime if the function + ever does dynamically return.
    + +
    nounwind
    +
    This function attribute indicates that the function never returns with an + unwind or exceptional control flow. If the function does unwind, its + runtime behavior is undefined.
    + +
    readnone
    +
    This attribute indicates that the function computes its result (or decides + to unwind an exception) based strictly on its arguments, without + dereferencing any pointer arguments or otherwise accessing any mutable + state (e.g. memory, control registers, etc) visible to caller functions. + It does not write through any pointer arguments + (including byval arguments) and never + changes any state visible to callers. This means that it cannot unwind + exceptions by calling the C++ exception throwing methods, but + could use the unwind instruction.
    + +
    readonly
    +
    This attribute indicates that the function does not write through any + pointer arguments (including byval + arguments) or otherwise modify any state (e.g. memory, control registers, + etc) visible to caller functions. It may dereference pointer arguments + and read state that may be set in the caller. A readonly function always + returns the same value (or unwinds an exception identically) when called + with the same set of arguments and global state. It cannot unwind an + exception by calling the C++ exception throwing methods, but may + use the unwind instruction.
    + +
    ssp
    +
    This attribute indicates that the function should emit a stack smashing + protector. It is in the form of a "canary"—a random value placed on + the stack before the local variables that's checked upon return from the + function to see if it has been overwritten. A heuristic is used to + determine if a function needs stack protectors or not.
    +
    + If a function that has an ssp attribute is inlined into a + function that doesn't have an ssp attribute, then the resulting + function will have an ssp attribute.
    + +
    sspreq
    +
    This attribute indicates that the function should always emit a + stack smashing protector. This overrides + the ssp function attribute.
    +
    + If a function that has an sspreq attribute is inlined into a + function that doesn't have an sspreq attribute or which has + an ssp attribute, then the resulting function will have + an sspreq attribute.
    + +
    noredzone
    +
    This attribute indicates that the code generator should not use a red + zone, even if the target-specific ABI normally permits it.
    + +
    noimplicitfloat
    +
    This attributes disables implicit floating point instructions.
    + +
    naked
    +
    This attribute disables prologue / epilogue emission for the function. + This can have very system-specific consequences.
    @@ -1123,12 +1138,11 @@ red zone, even if the target-specific ABI normally permits it.
    -

    -Modules may contain "module-level inline asm" blocks, which corresponds to the -GCC "file scope inline asm" blocks. These blocks are internally concatenated by -LLVM and treated as a single unit, but may be separated in the .ll file if -desired. The syntax is very simple: -

    + +

    Modules may contain "module-level inline asm" blocks, which corresponds to + the GCC "file scope inline asm" blocks. These blocks are internally + concatenated by LLVM and treated as a single unit, but may be separated in + the .ll file if desired. The syntax is very simple:

    @@ -1139,13 +1153,11 @@ module asm "more can go here"
     
     

    The strings can contain any character by escaping non-printable characters. The escape sequence used is simply "\xx" where "xx" is the two digit hex code - for the number. -

    + for the number.

    + +

    The inline asm code is simply printed to the machine code .s file when + assembly code is generated.

    -

    - The inline asm code is simply printed to the machine code .s file when - assembly code is generated. -

    @@ -1154,46 +1166,65 @@ module asm "more can go here"
    +

    A module may specify a target specific data layout string that specifies how -data is to be laid out in memory. The syntax for the data layout is simply:

    -
        target datalayout = "layout specification"
    -

    The layout specification consists of a list of specifications -separated by the minus sign character ('-'). Each specification starts with a -letter and may include other information after the letter to define some -aspect of the data layout. The specifications accepted are as follows:

    + data is to be laid out in memory. The syntax for the data layout is + simply:

    + +
    +
    +target datalayout = "layout specification"
    +
    +
    + +

    The layout specification consists of a list of specifications + separated by the minus sign character ('-'). Each specification starts with + a letter and may include other information after the letter to define some + aspect of the data layout. The specifications accepted are as follows:

    +
    E
    Specifies that the target lays out data in big-endian form. That is, the - bits with the most significance have the lowest address location.
    + bits with the most significance have the lowest address location. +
    e
    Specifies that the target lays out data in little-endian form. That is, - the bits with the least significance have the lowest address location.
    + the bits with the least significance have the lowest address + location. +
    p:size:abi:pref
    This specifies the size of a pointer and its abi and - preferred alignments. All sizes are in bits. Specifying the pref - alignment is optional. If omitted, the preceding : should be omitted - too.
    + preferred alignments. All sizes are in bits. Specifying + the pref alignment is optional. If omitted, the + preceding : should be omitted too. +
    isize:abi:pref
    This specifies the alignment for an integer type of a given bit - size. The value of size must be in the range [1,2^23).
    + size. The value of size must be in the range [1,2^23). +
    vsize:abi:pref
    This specifies the alignment for a vector type of a given bit - size.
    + size. +
    fsize:abi:pref
    This specifies the alignment for a floating point type of a given bit - size. The value of size must be either 32 (float) or 64 - (double).
    + size. The value of size must be either 32 (float) or 64 + (double). +
    asize:abi:pref
    This specifies the alignment for an aggregate type of a given bit - size.
    + size. +
    ssize:abi:pref
    This specifies the alignment for a stack object of a given bit - size.
    + size.
    +

    When constructing the data layout for a given target, LLVM starts with a -default set of specifications which are then (possibly) overriden by the -specifications in the datalayout keyword. The default specifications -are given in this list:

    + default set of specifications which are then (possibly) overriden by the + specifications in the datalayout keyword. The default specifications + are given in this list:

    +
    • E - big endian
    • p:32:64:64 - 32-bit pointers with 64-bit alignment
    • @@ -1210,22 +1241,80 @@ are given in this list:

    • a0:0:1 - aggregates are 8-bit aligned
    • s0:64:64 - stack objects are 64-bit aligned
    -

    When LLVM is determining the alignment for a given type, it uses the -following rules:

    + +

    When LLVM is determining the alignment for a given type, it uses the + following rules:

    +
    1. If the type sought is an exact match for one of the specifications, that - specification is used.
    2. + specification is used. +
    3. If no match is found, and the type sought is an integer type, then the - smallest integer type that is larger than the bitwidth of the sought type is - used. If none of the specifications are larger than the bitwidth then the the - largest integer type is used. For example, given the default specifications - above, the i7 type will use the alignment of i8 (next largest) while both - i65 and i256 will use the alignment of i64 (largest specified).
    4. + smallest integer type that is larger than the bitwidth of the sought type + is used. If none of the specifications are larger than the bitwidth then + the the largest integer type is used. For example, given the default + specifications above, the i7 type will use the alignment of i8 (next + largest) while both i65 and i256 will use the alignment of i64 (largest + specified). +
    5. If no match is found, and the type sought is a vector type, then the - largest vector type that is smaller than the sought vector type will be used - as a fall back. This happens because <128 x double> can be implemented - in terms of 64 <2 x double>, for example.
    6. + largest vector type that is smaller than the sought vector type will be + used as a fall back. This happens because <128 x double> can be + implemented in terms of 64 <2 x double>, for example.
    + +
    + + + + +
    + +

    Any memory access must be done through a pointer value associated +with an address range of the memory access, otherwise the behavior +is undefined. Pointer values are associated with address ranges +according to the following rules:

    + +
      +
    • A pointer value formed from a + getelementptr instruction + is associated with the addresses associated with the first operand + of the getelementptr.
    • +
    • An address of a global variable is associated with the address + range of the variable's storage.
    • +
    • The result value of an allocation instruction is associated with + the address range of the allocated storage.
    • +
    • A null pointer in the default address-space is associated with + no address.
    • +
    • A pointer value formed by an + inttoptr is associated with all + address ranges of all pointer values that contribute (directly or + indirectly) to the computation of the pointer's value.
    • +
    • The result value of a + bitcast is associated with all + addresses associated with the operand of the bitcast.
    • +
    • An integer constant other than zero or a pointer value returned + from a function not defined within LLVM may be associated with address + ranges allocated through mechanisms other than those provided by + LLVM. Such ranges shall not overlap with any ranges of addresses + allocated by mechanisms provided by LLVM.
    • +
    + +

    LLVM IR does not associate types with memory. The result type of a +load merely indicates the size and +alignment of the memory from which to load, as well as the +interpretation of the value. The first operand of a +store similarly only indicates the size +and alignment of the store.

    + +

    Consequently, type-based alias analysis, aka TBAA, aka +-fstrict-aliasing, is not applicable to general unadorned +LLVM IR. Metadata may be used to encode +additional information which specialized optimization passes may use +to implement type-based alias analysis.

    +
    @@ -1235,22 +1324,22 @@ following rules:

    The LLVM type system is one of the most important features of the -intermediate representation. Being typed enables a number of -optimizations to be performed on the intermediate representation directly, -without having to do -extra analyses on the side before the transformation. A strong type -system makes it easier to read the generated code and enables novel -analyses and transformations that are not feasible to perform on normal -three address code representations.

    + intermediate representation. Being typed enables a number of optimizations + to be performed on the intermediate representation directly, without having + to do extra analyses on the side before the transformation. A strong type + system makes it easier to read the generated code and enables novel analyses + and transformations that are not feasible to perform on normal three address + code representations.

    +
    -

    The types fall into a few useful -classifications:

    + +

    The types fall into a few useful classifications:

    @@ -1297,18 +1386,60 @@ classifications:

    -

    The first class types are perhaps the -most important. Values of these types are the only ones which can be -produced by instructions, passed as arguments, or used as operands to -instructions.

    +

    The first class types are perhaps the most + important. Values of these types are the only ones which can be produced by + instructions.

    +
    +

    The primitive types are the fundamental building blocks of the LLVM -system.

    + system.

    + +
    + + + + +
    + +
    Overview:
    +

    The integer type is a very simple type that simply specifies an arbitrary + bit width for the integer type desired. Any bit width from 1 bit to + 223-1 (about 8 million) can be specified.

    + +
    Syntax:
    +
    +  iN
    +
    + +

    The number of bits the integer will occupy is specified by the N + value.

    + +
    Examples:
    + + + + + + + + + + + + + +
    i1a single-bit integer.
    i32a 32-bit integer.
    i1942652a really big integer of over 1 million bits.
    + +

    Note that the code generator does not yet support large integer types to be + used as function return types. The specific limit on how large a return type + the code generator can currently handle is target-dependent; currently it's + often 64 bits for 32-bit targets and 128 bits for 64-bit targets.

    @@ -1316,60 +1447,65 @@ system.

    - - - - - - - - - -
    TypeDescription
    float32-bit floating point value
    double64-bit floating point value
    fp128128-bit floating point value (112-bit mantissa)
    x86_fp8080-bit floating point value (X87)
    ppc_fp128128-bit floating point value (two 64-bits)
    + + + + + + + + + + +
    TypeDescription
    float32-bit floating point value
    double64-bit floating point value
    fp128128-bit floating point value (112-bit mantissa)
    x86_fp8080-bit floating point value (X87)
    ppc_fp128128-bit floating point value (two 64-bits)
    +
    +
    Overview:

    The void type does not represent any value and has no size.

    Syntax:
    -
       void
     
    +
    +
    Overview:

    The label type represents code labels.

    Syntax:
    -
       label
     
    +
    +
    Overview:
    -

    The metadata type represents embedded metadata. The only derived type that -may contain metadata is metadata* or a function type that returns or -takes metadata typed parameters, but not pointer to metadata types.

    +

    The metadata type represents embedded metadata. No derived types may be + created from metadata except for function + arguments.

    Syntax:
    -
       metadata
     
    +
    @@ -1378,53 +1514,12 @@ takes metadata typed parameters, but not pointer to metadata types.

    -

    The real power in LLVM comes from the derived types in the system. -This is what allows a programmer to represent arrays, functions, -pointers, and other useful types. Note that these derived types may be -recursive: For example, it is possible to have a two dimensional array.

    - -
    - - - - -
    - -
    Overview:
    -

    The integer type is a very simple derived type that simply specifies an -arbitrary bit width for the integer type desired. Any bit width from 1 bit to -2^23-1 (about 8 million) can be specified.

    - -
    Syntax:
    - -
    -  iN
    -
    - -

    The number of bits the integer will occupy is specified by the N -value.

    - -
    Examples:
    - - - - - - - - - - - - - -
    i1a single-bit integer.
    i32a 32-bit integer.
    i1942652a really big integer of over 1 million bits.
    - -

    Note that the code generator does not yet support large integer types -to be used as function return types. The specific limit on how large a -return type the code generator can currently handle is target-dependent; -currently it's often 64 bits for 32-bit targets and 128 bits for 64-bit -targets.

    +

    The real power in LLVM comes from the derived types in the system. This is + what allows a programmer to represent arrays, functions, pointers, and other + useful types. Each of these types contain one or more element types which + may be a primitive type, or another derived type. For example, it is + possible to have a two dimensional array, using an array as the element type + of another array.

    @@ -1434,19 +1529,17 @@ targets.

    Overview:
    -

    The array type is a very simple derived type that arranges elements -sequentially in memory. The array type requires a size (number of -elements) and an underlying data type.

    + sequentially in memory. The array type requires a size (number of elements) + and an underlying data type.

    Syntax:
    -
       [<# elements> x <elementtype>]
     
    -

    The number of elements is a constant integer value; elementtype may -be any type with a size.

    +

    The number of elements is a constant integer value; elementtype may + be any type with a size.

    Examples:
    @@ -1479,45 +1572,44 @@ be any type with a size.

    -

    Note that 'variable sized arrays' can be implemented in LLVM with a zero -length array. Normally, accesses past the end of an array are undefined in -LLVM (e.g. it is illegal to access the 5th element of a 3 element array). -As a special case, however, zero length arrays are recognized to be variable -length. This allows implementation of 'pascal style arrays' with the LLVM -type "{ i32, [0 x float]}", for example.

    +

    Note that 'variable sized arrays' can be implemented in LLVM with a zero + length array. Normally, accesses past the end of an array are undefined in + LLVM (e.g. it is illegal to access the 5th element of a 3 element array). As + a special case, however, zero length arrays are recognized to be variable + length. This allows implementation of 'pascal style arrays' with the LLVM + type "{ i32, [0 x float]}", for example.

    -

    Note that the code generator does not yet support large aggregate types -to be used as function return types. The specific limit on how large an -aggregate return type the code generator can currently handle is -target-dependent, and also dependent on the aggregate element types.

    +

    Note that the code generator does not yet support large aggregate types to be + used as function return types. The specific limit on how large an aggregate + return type the code generator can currently handle is target-dependent, and + also dependent on the aggregate element types.

    +
    Overview:
    - -

    The function type can be thought of as a function signature. It -consists of a return type and a list of formal parameter types. The -return type of a function type is a scalar type, a void type, or a struct type. -If the return type is a struct type then all struct elements must be of first -class types, and the struct must have at least one element.

    +

    The function type can be thought of as a function signature. It consists of + a return type and a list of formal parameter types. The return type of a + function type is a scalar type, a void type, or a struct type. If the return + type is a struct type then all struct elements must be of first class types, + and the struct must have at least one element.

    Syntax:
    -
    -  <returntype list> (<parameter list>)
    +  <returntype> (<parameter list>)
     

    ...where '<parameter list>' is a comma-separated list of type -specifiers. Optionally, the parameter list may include a type ..., -which indicates that the function takes a variable number of arguments. -Variable argument functions can access their arguments with the variable argument handling intrinsic functions. -'<returntype list>' is a comma-separated list of -first class type specifiers.

    + specifiers. Optionally, the parameter list may include a type ..., + which indicates that the function takes a variable number of arguments. + Variable argument functions can access their arguments with + the variable argument handling intrinsic + functions. '<returntype>' is a any type except + label.

    Examples:
    @@ -1542,27 +1634,34 @@ Variable argument functions can access their arguments with the -
    {i32, i32} (i32)A function taking an i32, returning two - i32 values as an aggregate of type { i32, i32 } + A function taking an i32, returning a + structure containing two i32 values
    + +
    +
    Overview:
    -

    The structure type is used to represent a collection of data members -together in memory. The packing of the field types is defined to match -the ABI of the underlying processor. The elements of a structure may -be any type that has a size.

    -

    Structures are accessed using 'load -and 'store' by getting a pointer to a -field with the 'getelementptr' -instruction.

    +

    The structure type is used to represent a collection of data members together + in memory. The packing of the field types is defined to match the ABI of the + underlying processor. The elements of a structure may be any type that has a + size.

    + +

    Structures are accessed using 'load and + 'store' by getting a pointer to a field with + the 'getelementptr' instruction.

    +
    Syntax:
    -
      { <type list> }
    +
    +  { <type list> }
    +
    +
    Examples:
    @@ -1577,28 +1676,34 @@ instruction.

    -

    Note that the code generator does not yet support large aggregate types -to be used as function return types. The specific limit on how large an -aggregate return type the code generator can currently handle is -target-dependent, and also dependent on the aggregate element types.

    +

    Note that the code generator does not yet support large aggregate types to be + used as function return types. The specific limit on how large an aggregate + return type the code generator can currently handle is target-dependent, and + also dependent on the aggregate element types.

    +
    +
    Overview:

    The packed structure type is used to represent a collection of data members -together in memory. There is no padding between fields. Further, the alignment -of a packed structure is 1 byte. The elements of a packed structure may -be any type that has a size.

    -

    Structures are accessed using 'load -and 'store' by getting a pointer to a -field with the 'getelementptr' -instruction.

    + together in memory. There is no padding between fields. Further, the + alignment of a packed structure is 1 byte. The elements of a packed + structure may be any type that has a size.

    + +

    Structures are accessed using 'load and + 'store' by getting a pointer to a field with + the 'getelementptr' instruction.

    +
    Syntax:
    -
      < { <type list> } > 
    +
    +  < { <type list> } >
    +
    +
    Examples:
    @@ -1613,23 +1718,28 @@ instruction.

    an i32.
    +
    +
    +
    Overview:
    -

    As in many languages, the pointer type represents a pointer or -reference to another object, which must live in memory. Pointer types may have -an optional address space attribute defining the target-specific numbered -address space where the pointed-to object resides. The default address space is -zero.

    +

    As in many languages, the pointer type represents a pointer or reference to + another object, which must live in memory. Pointer types may have an optional + address space attribute defining the target-specific numbered address space + where the pointed-to object resides. The default address space is zero.

    -

    Note that LLVM does not permit pointers to void (void*) nor does -it permit pointers to labels (label*). Use i8* instead.

    +

    Note that LLVM does not permit pointers to void (void*) nor does it + permit pointers to labels (label*). Use i8* instead.

    Syntax:
    -
      <type> *
    +
    +  <type> *
    +
    +
    Examples:
    @@ -1649,33 +1759,31 @@ it permit pointers to labels (label*). Use i8* instead.

    that resides in address space #5.
    +
    +
    Overview:
    - -

    A vector type is a simple derived type that represents a vector -of elements. Vector types are used when multiple primitive data -are operated in parallel using a single instruction (SIMD). -A vector type requires a size (number of -elements) and an underlying primitive data type. Vectors must have a power -of two length (1, 2, 4, 8, 16 ...). Vector types are -considered first class.

    +

    A vector type is a simple derived type that represents a vector of elements. + Vector types are used when multiple primitive data are operated in parallel + using a single instruction (SIMD). A vector type requires a size (number of + elements) and an underlying primitive data type. Vectors must have a power + of two length (1, 2, 4, 8, 16 ...). Vector types are considered + first class.

    Syntax:
    -
       < <# elements> x <elementtype> >
     
    -

    The number of elements is a constant integer value; elementtype may -be any integer or floating point type.

    +

    The number of elements is a constant integer value; elementtype may be any + integer or floating point type.

    Examples:
    - @@ -1691,10 +1799,10 @@ be any integer or floating point type.

    <4 x i32>
    -

    Note that the code generator does not yet support large vector types -to be used as function return types. The specific limit on how large a -vector return type codegen can currently handle is target-dependent; -currently it's often a few times longer than a hardware vector register.

    +

    Note that the code generator does not yet support large vector types to be + used as function return types. The specific limit on how large a vector + return type codegen can currently handle is target-dependent; currently it's + often a few times longer than a hardware vector register.

    @@ -1703,26 +1811,24 @@ currently it's often a few times longer than a hardware vector register.

    Overview:
    -

    Opaque types are used to represent unknown types in the system. This -corresponds (for example) to the C notion of a forward declared structure type. -In LLVM, opaque types can eventually be resolved to any type (not just a -structure type).

    + corresponds (for example) to the C notion of a forward declared structure + type. In LLVM, opaque types can eventually be resolved to any type (not just + a structure type).

    Syntax:
    -
       opaque
     
    Examples:
    -
    opaque An opaque type.
    +
    @@ -1731,12 +1837,13 @@ structure type).

    +
    Overview:
    -

    -An "up reference" allows you to refer to a lexically enclosing type without -requiring it to have a name. For instance, a structure declaration may contain a -pointer to any of the types it is lexically a member of. Example of up -references (with their equivalent as named type declarations) include:

    +

    An "up reference" allows you to refer to a lexically enclosing type without + requiring it to have a name. For instance, a structure declaration may + contain a pointer to any of the types it is lexically a member of. Example + of up references (with their equivalent as named type declarations) + include:

        { \2 * }                %x = type { %x* }
    @@ -1744,24 +1851,20 @@ references (with their equivalent as named type declarations) include:

    \1* %z = type %z*
    -

    -An up reference is needed by the asmprinter for printing out cyclic types when -there is no declared name for a type in the cycle. Because the asmprinter does -not want to print out an infinite type string, it needs a syntax to handle -recursive types that have no names (all names are optional in llvm IR). -

    +

    An up reference is needed by the asmprinter for printing out cyclic types + when there is no declared name for a type in the cycle. Because the + asmprinter does not want to print out an infinite type string, it needs a + syntax to handle recursive types that have no names (all names are optional + in llvm IR).

    Syntax:
        \<level>
     
    -

    -The level is the count of the lexical type that is being referred to. -

    +

    The level is the count of the lexical type that is being referred to.

    Examples:
    - @@ -1773,8 +1876,8 @@ The level is the count of the lexical type that is being referred to. structure.
    \1*
    -
    +
    @@ -1783,7 +1886,7 @@ The level is the count of the lexical type that is being referred to.

    LLVM has several different basic types of constants. This section describes -them all and their syntax.

    + them all and their syntax.

    @@ -1794,118 +1897,103 @@ them all and their syntax.

    Boolean constants
    -
    The two strings 'true' and 'false' are both valid - constants of the i1 type. -
    + constants of the i1 type.
    Integer constants
    - -
    Standard integers (such as '4') are constants of the integer type. Negative numbers may be used with - integer types. -
    +
    Standard integers (such as '4') are constants of + the integer type. Negative numbers may be used + with integer types.
    Floating point constants
    -
    Floating point constants use standard decimal notation (e.g. 123.421), - exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal - notation (see below). The assembler requires the exact decimal value of - a floating-point constant. For example, the assembler accepts 1.25 but - rejects 1.3 because 1.3 is a repeating decimal in binary. Floating point - constants must have a floating point type.
    + exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal + notation (see below). The assembler requires the exact decimal value of a + floating-point constant. For example, the assembler accepts 1.25 but + rejects 1.3 because 1.3 is a repeating decimal in binary. Floating point + constants must have a floating point type.
    Null pointer constants
    -
    The identifier 'null' is recognized as a null pointer constant - and must be of pointer type.
    - + and must be of pointer type.
    -

    The one non-intuitive notation for constants is the hexadecimal form -of floating point constants. For example, the form 'double -0x432ff973cafa8000' is equivalent to (but harder to read than) 'double -4.5e+15'. The only time hexadecimal floating point constants are required -(and the only time that they are generated by the disassembler) is when a -floating point constant must be emitted but it cannot be represented as a -decimal floating point number in a reasonable number of digits. For example, -NaN's, infinities, and other -special values are represented in their IEEE hexadecimal format so that -assembly and disassembly do not cause any bits to change in the constants.

    +

    The one non-intuitive notation for constants is the hexadecimal form of + floating point constants. For example, the form 'double + 0x432ff973cafa8000' is equivalent to (but harder to read than) + 'double 4.5e+15'. The only time hexadecimal floating point + constants are required (and the only time that they are generated by the + disassembler) is when a floating point constant must be emitted but it cannot + be represented as a decimal floating point number in a reasonable number of + digits. For example, NaN's, infinities, and other special values are + represented in their IEEE hexadecimal format so that assembly and disassembly + do not cause any bits to change in the constants.

    +

    When using the hexadecimal form, constants of types float and double are -represented using the 16-digit form shown above (which matches the IEEE754 -representation for double); float values must, however, be exactly representable -as IEE754 single precision. -Hexadecimal format is always used for long -double, and there are three forms of long double. The 80-bit -format used by x86 is represented as 0xK -followed by 20 hexadecimal digits. -The 128-bit format used by PowerPC (two adjacent doubles) is represented -by 0xM followed by 32 hexadecimal digits. The IEEE 128-bit -format is represented -by 0xL followed by 32 hexadecimal digits; no currently supported -target uses this format. Long doubles will only work if they match -the long double format on your target. All hexadecimal formats are big-endian -(sign bit at the left).

    + represented using the 16-digit form shown above (which matches the IEEE754 + representation for double); float values must, however, be exactly + representable as IEE754 single precision. Hexadecimal format is always used + for long double, and there are three forms of long double. The 80-bit format + used by x86 is represented as 0xK followed by 20 hexadecimal digits. + The 128-bit format used by PowerPC (two adjacent doubles) is represented + by 0xM followed by 32 hexadecimal digits. The IEEE 128-bit format + is represented by 0xL followed by 32 hexadecimal digits; no + currently supported target uses this format. Long doubles will only work if + they match the long double format on your target. All hexadecimal formats + are big-endian (sign bit at the left).

    +
    +

    Complex constants are a (potentially recursive) combination of simple -constants and smaller complex constants.

    + constants and smaller complex constants.

    Structure constants
    -
    Structure constants are represented with notation similar to structure - type definitions (a comma separated list of elements, surrounded by braces - ({})). For example: "{ i32 4, float 17.0, i32* @G }", - where "@G" is declared as "@G = external global i32". Structure constants - must have structure type, and the number and - types of elements must match those specified by the type. -
    + type definitions (a comma separated list of elements, surrounded by braces + ({})). For example: "{ i32 4, float 17.0, i32* @G }", + where "@G" is declared as "@G = external global i32". + Structure constants must have structure type, and + the number and types of elements must match those specified by the + type.
    Array constants
    -
    Array constants are represented with notation similar to array type - definitions (a comma separated list of elements, surrounded by square brackets - ([])). For example: "[ i32 42, i32 11, i32 74 ]". Array - constants must have array type, and the number and - types of elements must match those specified by the type. -
    + definitions (a comma separated list of elements, surrounded by square + brackets ([])). For example: "[ i32 42, i32 11, i32 74 + ]". Array constants must have array type, and + the number and types of elements must match those specified by the + type.
    Vector constants
    -
    Vector constants are represented with notation similar to vector type - definitions (a comma separated list of elements, surrounded by - less-than/greater-than's (<>)). For example: "< i32 42, - i32 11, i32 74, i32 100 >". Vector constants must have vector type, and the number and types of elements must - match those specified by the type. -
    + definitions (a comma separated list of elements, surrounded by + less-than/greater-than's (<>)). For example: "< i32 + 42, i32 11, i32 74, i32 100 >". Vector constants must + have vector type, and the number and types of + elements must match those specified by the type.
    Zero initialization
    -
    The string 'zeroinitializer' can be used to zero initialize a - value to zero of any type, including scalar and aggregate types. - This is often used to avoid having to print large zero initializers (e.g. for - large arrays) and is always exactly equivalent to using explicit zero - initializers. -
    + value to zero of any type, including scalar and aggregate types. + This is often used to avoid having to print large zero initializers + (e.g. for large arrays) and is always exactly equivalent to using explicit + zero initializers.
    Metadata node
    -
    A metadata node is a structure-like constant with - metadata type. For example: - "metadata !{ i32 0, metadata !"test" }". Unlike other constants - that are meant to be interpreted as part of the instruction stream, metadata - is a place to attach additional information such as debug info. -
    + metadata type. For example: "metadata !{ + i32 0, metadata !"test" }". Unlike other constants that are meant to + be interpreted as part of the instruction stream, metadata is a place to + attach additional information such as debug info.
    @@ -1917,12 +2005,12 @@ constants and smaller complex constants.

    -

    The addresses of global variables and functions are always implicitly valid (link-time) -constants. These constants are explicitly referenced when the identifier for the global is used and always have pointer type. For example, the following is a legal LLVM -file:

    +

    The addresses of global variables + and functions are always implicitly valid + (link-time) constants. These constants are explicitly referenced when + the identifier for the global is used and always + have pointer type. For example, the following is a + legal LLVM file:

    @@ -1937,87 +2025,228 @@ file:

    -

    The string 'undef' is recognized as a type-less constant that has - no specific value. Undefined values may be of any type and be used anywhere - a constant is permitted.

    -

    Undefined values indicate to the compiler that the program is well defined - no matter what value is used, giving the compiler more freedom to optimize. -

    -
    +

    The string 'undef' can be used anywhere a constant is expected, and + indicates that the user of the value may receive an unspecified bit-pattern. + Undefined values may be of any type (other than label or void) and be used + anywhere a constant is permitted.

    - - +

    Undefined values are useful because they indicate to the compiler that the + program is well defined no matter what value is used. This gives the + compiler more freedom to optimize. Here are some examples of (potentially + surprising) transformations that are valid (in pseudo IR):

    -
    + +
    +
    +  %A = add %X, undef
    +  %B = sub %X, undef
    +  %C = xor %X, undef
    +Safe:
    +  %A = undef
    +  %B = undef
    +  %C = undef
    +
    +
    + +

    This is safe because all of the output bits are affected by the undef bits. +Any output bit can have a zero or one depending on the input bits.

    + +
    +
    +  %A = or %X, undef
    +  %B = and %X, undef
    +Safe:
    +  %A = -1
    +  %B = 0
    +Unsafe:
    +  %A = undef
    +  %B = undef
    +
    +
    + +

    These logical operations have bits that are not always affected by the input. +For example, if "%X" has a zero bit, then the output of the 'and' operation will +always be a zero, no matter what the corresponding bit from the undef is. As +such, it is unsafe to optimize or assume that the result of the and is undef. +However, it is safe to assume that all bits of the undef could be 0, and +optimize the and to 0. Likewise, it is safe to assume that all the bits of +the undef operand to the or could be set, allowing the or to be folded to +-1.

    + +
    +
    +  %A = select undef, %X, %Y
    +  %B = select undef, 42, %Y
    +  %C = select %X, %Y, undef
    +Safe:
    +  %A = %X     (or %Y)
    +  %B = 42     (or %Y)
    +  %C = %Y
    +Unsafe:
    +  %A = undef
    +  %B = undef
    +  %C = undef
    +
    +
    + +

    This set of examples show that undefined select (and conditional branch) +conditions can go "either way" but they have to come from one of the two +operands. In the %A example, if %X and %Y were both known to have a clear low +bit, then %A would have to have a cleared low bit. However, in the %C example, +the optimizer is allowed to assume that the undef operand could be the same as +%Y, allowing the whole select to be eliminated.

    + + +
    +
    +  %A = xor undef, undef
    +  
    +  %B = undef
    +  %C = xor %B, %B
    +
    +  %D = undef
    +  %E = icmp lt %D, 4
    +  %F = icmp gte %D, 4
    +
    +Safe:
    +  %A = undef
    +  %B = undef
    +  %C = undef
    +  %D = undef
    +  %E = undef
    +  %F = undef
    +
    +
    + +

    This example points out that two undef operands are not necessarily the same. +This can be surprising to people (and also matches C semantics) where they +assume that "X^X" is always zero, even if X is undef. This isn't true for a +number of reasons, but the short answer is that an undef "variable" can +arbitrarily change its value over its "live range". This is true because the +"variable" doesn't actually have a live range. Instead, the value is +logically read from arbitrary registers that happen to be around when needed, +so the value is not necessarily consistent over time. In fact, %A and %C need +to have the same semantics or the core LLVM "replace all uses with" concept +would not hold.

    + +
    +
    +  %A = fdiv undef, %X
    +  %B = fdiv %X, undef
    +Safe:
    +  %A = undef
    +b: unreachable
    +
    +
    + +

    These examples show the crucial difference between an undefined +value and undefined behavior. An undefined value (like undef) is +allowed to have an arbitrary bit-pattern. This means that the %A operation +can be constant folded to undef because the undef could be an SNaN, and fdiv is +not (currently) defined on SNaN's. However, in the second example, we can make +a more aggressive assumption: because the undef is allowed to be an arbitrary +value, we are allowed to assume that it could be zero. Since a divide by zero +has undefined behavior, we are allowed to assume that the operation +does not execute at all. This allows us to delete the divide and all code after +it: since the undefined operation "can't happen", the optimizer can assume that +it occurs in dead code. +

    + +
    +
    +a:  store undef -> %X
    +b:  store %X -> undef
    +Safe:
    +a: <deleted>
    +b: unreachable
    +
    +
    + +

    These examples reiterate the fdiv example: a store "of" an undefined value +can be assumed to not have any effect: we can assume that the value is +overwritten with bits that happen to match what was already there. However, a +store "to" an undefined location could clobber arbitrary memory, therefore, it +has undefined behavior.

    + +
    + + + + +

    Constant expressions are used to allow expressions involving other constants -to be used as constants. Constant expressions may be of any first class type and may involve any LLVM operation -that does not have side effects (e.g. load and call are not supported). The -following is the syntax for constant expressions:

    + to be used as constants. Constant expressions may be of + any first class type and may involve any LLVM + operation that does not have side effects (e.g. load and call are not + supported). The following is the syntax for constant expressions:

    trunc ( CST to TYPE )
    -
    Truncate a constant to another type. The bit size of CST must be larger - than the bit size of TYPE. Both types must be integers.
    +
    Truncate a constant to another type. The bit size of CST must be larger + than the bit size of TYPE. Both types must be integers.
    zext ( CST to TYPE )
    -
    Zero extend a constant to another type. The bit size of CST must be - smaller or equal to the bit size of TYPE. Both types must be integers.
    +
    Zero extend a constant to another type. The bit size of CST must be + smaller or equal to the bit size of TYPE. Both types must be + integers.
    sext ( CST to TYPE )
    -
    Sign extend a constant to another type. The bit size of CST must be - smaller or equal to the bit size of TYPE. Both types must be integers.
    +
    Sign extend a constant to another type. The bit size of CST must be + smaller or equal to the bit size of TYPE. Both types must be + integers.
    fptrunc ( CST to TYPE )
    -
    Truncate a floating point constant to another floating point type. The - size of CST must be larger than the size of TYPE. Both types must be - floating point.
    +
    Truncate a floating point constant to another floating point type. The + size of CST must be larger than the size of TYPE. Both types must be + floating point.
    fpext ( CST to TYPE )
    -
    Floating point extend a constant to another type. The size of CST must be - smaller or equal to the size of TYPE. Both types must be floating point.
    +
    Floating point extend a constant to another type. The size of CST must be + smaller or equal to the size of TYPE. Both types must be floating + point.
    fptoui ( CST to TYPE )
    Convert a floating point constant to the corresponding unsigned integer - constant. TYPE must be a scalar or vector integer type. CST must be of scalar - or vector floating point type. Both CST and TYPE must be scalars, or vectors - of the same number of elements. If the value won't fit in the integer type, - the results are undefined.
    + constant. TYPE must be a scalar or vector integer type. CST must be of + scalar or vector floating point type. Both CST and TYPE must be scalars, + or vectors of the same number of elements. If the value won't fit in the + integer type, the results are undefined.
    fptosi ( CST to TYPE )
    Convert a floating point constant to the corresponding signed integer - constant. TYPE must be a scalar or vector integer type. CST must be of scalar - or vector floating point type. Both CST and TYPE must be scalars, or vectors - of the same number of elements. If the value won't fit in the integer type, - the results are undefined.
    + constant. TYPE must be a scalar or vector integer type. CST must be of + scalar or vector floating point type. Both CST and TYPE must be scalars, + or vectors of the same number of elements. If the value won't fit in the + integer type, the results are undefined.
    uitofp ( CST to TYPE )
    Convert an unsigned integer constant to the corresponding floating point - constant. TYPE must be a scalar or vector floating point type. CST must be of - scalar or vector integer type. Both CST and TYPE must be scalars, or vectors - of the same number of elements. If the value won't fit in the floating point - type, the results are undefined.
    + constant. TYPE must be a scalar or vector floating point type. CST must be + of scalar or vector integer type. Both CST and TYPE must be scalars, or + vectors of the same number of elements. If the value won't fit in the + floating point type, the results are undefined.
    sitofp ( CST to TYPE )
    Convert a signed integer constant to the corresponding floating point - constant. TYPE must be a scalar or vector floating point type. CST must be of - scalar or vector integer type. Both CST and TYPE must be scalars, or vectors - of the same number of elements. If the value won't fit in the floating point - type, the results are undefined.
    + constant. TYPE must be a scalar or vector floating point type. CST must be + of scalar or vector integer type. Both CST and TYPE must be scalars, or + vectors of the same number of elements. If the value won't fit in the + floating point type, the results are undefined.
    ptrtoint ( CST to TYPE )
    Convert a pointer typed constant to the corresponding integer constant - TYPE must be an integer type. CST must be of pointer type. The CST value is - zero extended, truncated, or unchanged to make it fit in TYPE.
    + TYPE must be an integer type. CST must be of pointer + type. The CST value is zero extended, truncated, or unchanged to + make it fit in TYPE.
    inttoptr ( CST to TYPE )
    -
    Convert a integer constant to a pointer constant. TYPE must be a - pointer type. CST must be of integer type. The CST value is zero extended, - truncated, or unchanged to make it fit in a pointer size. This one is - really dangerous!
    +
    Convert a integer constant to a pointer constant. TYPE must be a pointer + type. CST must be of integer type. The CST value is zero extended, + truncated, or unchanged to make it fit in a pointer size. This one is + really dangerous!
    bitcast ( CST to TYPE )
    Convert a constant, CST, to another TYPE. The constraints of the operands @@ -2025,16 +2254,14 @@ following is the syntax for constant expressions:

    instruction.
    getelementptr ( CSTPTR, IDX0, IDX1, ... )
    - +
    getelementptr inbounds ( CSTPTR, IDX0, IDX1, ... )
    Perform the getelementptr operation on - constants. As with the getelementptr - instruction, the index list may have zero or more indexes, which are required - to make sense for the type of "CSTPTR".
    + constants. As with the getelementptr + instruction, the index list may have zero or more indexes, which are + required to make sense for the type of "CSTPTR".
    select ( COND, VAL1, VAL2 )
    - -
    Perform the select operation on - constants.
    +
    Perform the select operation on constants.
    icmp COND ( VAL1, VAL2 )
    Performs the icmp operation on constants.
    @@ -2042,36 +2269,26 @@ following is the syntax for constant expressions:

    fcmp COND ( VAL1, VAL2 )
    Performs the fcmp operation on constants.
    -
    vicmp COND ( VAL1, VAL2 )
    -
    Performs the vicmp operation on constants.
    - -
    vfcmp COND ( VAL1, VAL2 )
    -
    Performs the vfcmp operation on constants.
    -
    extractelement ( VAL, IDX )
    - -
    Perform the extractelement - operation on constants.
    +
    Perform the extractelement operation on + constants.
    insertelement ( VAL, ELT, IDX )
    - -
    Perform the insertelement - operation on constants.
    - +
    Perform the insertelement operation on + constants.
    shufflevector ( VEC1, VEC2, IDXMASK )
    - -
    Perform the shufflevector - operation on constants.
    +
    Perform the shufflevector operation on + constants.
    OPCODE ( LHS, RHS )
    - -
    Perform the specified operation of the LHS and RHS constants. OPCODE may - be any of the binary or bitwise - binary operations. The constraints on operands are the same as those for - the corresponding instruction (e.g. no bitwise operations on floating point - values are allowed).
    +
    Perform the specified operation of the LHS and RHS constants. OPCODE may + be any of the binary + or bitwise binary operations. The constraints + on operands are the same as those for the corresponding instruction + (e.g. no bitwise operations on floating point values are allowed).
    +
    @@ -2080,31 +2297,30 @@ following is the syntax for constant expressions:

    -

    Embedded metadata provides a way to attach arbitrary data to the -instruction stream without affecting the behaviour of the program. There are -two metadata primitives, strings and nodes. All metadata has the -metadata type and is identified in syntax by a preceding exclamation -point ('!'). -

    +

    Embedded metadata provides a way to attach arbitrary data to the instruction + stream without affecting the behaviour of the program. There are two + metadata primitives, strings and nodes. All metadata has the + metadata type and is identified in syntax by a preceding exclamation + point ('!').

    A metadata string is a string surrounded by double quotes. It can contain -any character by escaping non-printable characters with "\xx" where "xx" is -the two digit hex code. For example: "!"test\00"". -

    + any character by escaping non-printable characters with "\xx" where "xx" is + the two digit hex code. For example: "!"test\00"".

    Metadata nodes are represented with notation similar to structure constants -(a comma separated list of elements, surrounded by braces and preceeded by an -exclamation point). For example: "!{ metadata !"test\00", i32 10}". -

    + (a comma separated list of elements, surrounded by braces and preceded by an + exclamation point). For example: "!{ metadata !"test\00", i32 + 10}".

    -

    A metadata node will attempt to track changes to the values it holds. In -the event that a value is deleted, it will be replaced with a typeless -"null", such as "metadata !{null, i32 10}".

    +

    A metadata node will attempt to track changes to the values it holds. In the + event that a value is deleted, it will be replaced with a typeless + "null", such as "metadata !{null, i32 10}".

    Optimizations may rely on metadata to provide additional information about -the program that isn't available in the instructions, or that isn't easily -computable. Similarly, the code generator may expect a certain metadata format -to be used to express debugging information.

    + the program that isn't available in the instructions, or that isn't easily + computable. Similarly, the code generator may expect a certain metadata + format to be used to express debugging information.

    +
    @@ -2118,14 +2334,14 @@ to be used to express debugging information.

    -

    -LLVM supports inline assembler expressions (as opposed to -Module-Level Inline Assembly) through the use of a special value. This -value represents the inline assembler as a string (containing the instructions -to emit), a list of operand constraints (stored as a string), and a flag that -indicates whether or not the inline asm expression has side effects. An example -inline assembler expression is: -

    +

    LLVM supports inline assembler expressions (as opposed + to Module-Level Inline Assembly) through the use of + a special value. This value represents the inline assembler as a string + (containing the instructions to emit), a list of operand constraints (stored + as a string), a flag that indicates whether or not the inline asm + expression has side effects, and a flag indicating whether the asm came + originally from an asm block. An example inline assembler + expression is:

    @@ -2133,10 +2349,9 @@ i32 (i32) asm "bswap $0", "=r,r"
     
    -

    -Inline assembler expressions may only be used as the callee operand of -a call instruction. Thus, typically we have: -

    +

    Inline assembler expressions may only be used as the callee operand of + a call instruction. Thus, typically we + have:

    @@ -2144,11 +2359,9 @@ a call instruction.  Thus, typically we have:
     
    -

    -Inline asms with side effects not visible in the constraint list must be marked -as having side effects. This is done through the use of the -'sideeffect' keyword, like so: -

    +

    Inline asms with side effects not visible in the constraint list must be + marked as having side effects. This is done through the use of the + 'sideeffect' keyword, like so:

    @@ -2156,26 +2369,126 @@ call void asm sideeffect "eieio", ""()
     
    +

    Inline asms derived from asm blocks are similarly marked with the + 'msasm' keyword:

    + +
    +
    +call void asm msasm "eieio", ""()
    +
    +
    + +

    If both keywords appear the 'sideeffect' keyword must come + first.

    +

    TODO: The format of the asm and constraints string still need to be -documented here. Constraints on what can be done (e.g. duplication, moving, etc -need to be documented). This is probably best done by reference to another -document that covers inline asm from a holistic perspective. -

    + documented here. Constraints on what can be done (e.g. duplication, moving, + etc need to be documented). This is probably best done by reference to + another document that covers inline asm from a holistic perspective.

    + + + + + +

    LLVM has a number of "magic" global variables that contain data that affect +code generation or other IR semantics. These are documented here. All globals +of this sort should have a section specified as "llvm.metadata". This +section and all globals that start with "llvm." are reserved for use +by LLVM.

    + + + + +
    + +

    The @llvm.used global is an array with i8* element type which has appending linkage. This array contains a list of +pointers to global variables and functions which may optionally have a pointer +cast formed of bitcast or getelementptr. For example, a legal use of it is:

    + +
    +  @X = global i8 4
    +  @Y = global i32 123
    +
    +  @llvm.used = appending global [2 x i8*] [
    +     i8* @X,
    +     i8* bitcast (i32* @Y to i8*)
    +  ], section "llvm.metadata"
    +
    + +

    If a global variable appears in the @llvm.used list, then the +compiler, assembler, and linker are required to treat the symbol as if there is +a reference to the global that it cannot see. For example, if a variable has +internal linkage and no references other than that from the @llvm.used +list, it cannot be deleted. This is commonly used to represent references from +inline asms and other things the compiler cannot "see", and corresponds to +"attribute((used))" in GNU C.

    + +

    On some targets, the code generator must emit a directive to the assembler or +object file to prevent the assembler and linker from molesting the symbol.

    + +
    + + + + +
    + +

    The @llvm.compiler.used directive is the same as the +@llvm.used directive, except that it only prevents the compiler from +touching the symbol. On targets that support it, this allows an intelligent +linker to optimize references to the symbol without being impeded as it would be +by @llvm.used.

    + +

    This is a rare construct that should only be used in rare circumstances, and +should not be exposed to source languages.

    + +
    + + + + +
    + +

    TODO: Describe this.

    + +
    + + + + +
    + +

    TODO: Describe this.

    + +
    + +
    -

    The LLVM instruction set consists of several different -classifications of instructions: terminator -instructions, binary instructions, -bitwise binary instructions, memory instructions, and other -instructions.

    +

    The LLVM instruction set consists of several different classifications of + instructions: terminator + instructions, binary instructions, + bitwise binary instructions, + memory instructions, and + other instructions.

    @@ -2185,25 +2498,29 @@ Instructions
    -

    As mentioned previously, every -basic block in a program ends with a "Terminator" instruction, which -indicates which block should be executed after the current block is -finished. These terminator instructions typically yield a 'void' -value: they produce control flow, not values (the one exception being -the 'invoke' instruction).

    -

    There are six different terminator instructions: the 'ret' instruction, the 'br' -instruction, the 'switch' instruction, -the 'invoke' instruction, the 'unwind' instruction, and the 'unreachable' instruction.

    +

    As mentioned previously, every basic block + in a program ends with a "Terminator" instruction, which indicates which + block should be executed after the current block is finished. These + terminator instructions typically yield a 'void' value: they produce + control flow, not values (the one exception being the + 'invoke' instruction).

    + +

    There are six different terminator instructions: the + 'ret' instruction, the + 'br' instruction, the + 'switch' instruction, the + 'invoke' instruction, the + 'unwind' instruction, and the + 'unreachable' instruction.

    +
    +
    Syntax:
       ret <type> <value>       ; Return a value from a non-void function
    @@ -2211,38 +2528,35 @@ Instruction 
    Overview:
    +

    The 'ret' instruction is used to return control flow (and optionally + a value) from a function back to the caller.

    -

    The 'ret' instruction is used to return control flow (and -optionally a value) from a function back to the caller.

    -

    There are two forms of the 'ret' instruction: one that -returns a value and then causes control flow, and one that just causes -control flow to occur.

    +

    There are two forms of the 'ret' instruction: one that returns a + value and then causes control flow, and one that just causes control flow to + occur.

    Arguments:
    +

    The 'ret' instruction optionally accepts a single argument, the + return value. The type of the return value must be a + 'first class' type.

    -

    The 'ret' instruction optionally accepts a single argument, -the return value. The type of the return value must be a -'first class' type.

    - -

    A function is not well formed if -it it has a non-void return type and contains a 'ret' -instruction with no return value or a return value with a type that -does not match its type, or if it has a void return type and contains -a 'ret' instruction with a return value.

    +

    A function is not well formed if it it has a + non-void return type and contains a 'ret' instruction with no return + value or a return value with a type that does not match its type, or if it + has a void return type and contains a 'ret' instruction with a + return value.

    Semantics:
    - -

    When the 'ret' instruction is executed, control flow -returns back to the calling function's context. If the caller is a "call" instruction, execution continues at -the instruction after the call. If the caller was an "invoke" instruction, execution continues -at the beginning of the "normal" destination block. If the instruction -returns a value, that value shall set the call or invoke instruction's -return value.

    +

    When the 'ret' instruction is executed, control flow returns back to + the calling function's context. If the caller is a + "call" instruction, execution continues at the + instruction after the call. If the caller was an + "invoke" instruction, execution continues at + the beginning of the "normal" destination block. If the instruction returns + a value, that value shall set the call or invoke instruction's return + value.

    Example:
    -
       ret i32 5                       ; Return an integer value of 5
       ret void                        ; Return from a void function
    @@ -2260,73 +2574,83 @@ return value.

    +
    +
    Syntax:
    -
      br i1 <cond>, label <iftrue>, label <iffalse>
    br label <dest> ; Unconditional branch +
    +  br i1 <cond>, label <iftrue>, label <iffalse>
    br label <dest> ; Unconditional branch
    +
    Overview:
    -

    The 'br' instruction is used to cause control flow to -transfer to a different basic block in the current function. There are -two forms of this instruction, corresponding to a conditional branch -and an unconditional branch.

    +

    The 'br' instruction is used to cause control flow to transfer to a + different basic block in the current function. There are two forms of this + instruction, corresponding to a conditional branch and an unconditional + branch.

    +
    Arguments:
    -

    The conditional branch form of the 'br' instruction takes a -single 'i1' value and two 'label' values. The -unconditional form of the 'br' instruction takes a single -'label' value as a target.

    +

    The conditional branch form of the 'br' instruction takes a single + 'i1' value and two 'label' values. The unconditional form + of the 'br' instruction takes a single 'label' value as a + target.

    +
    Semantics:

    Upon execution of a conditional 'br' instruction, the 'i1' -argument is evaluated. If the value is true, control flows -to the 'iftrue' label argument. If "cond" is false, -control flows to the 'iffalse' label argument.

    + argument is evaluated. If the value is true, control flows to the + 'iftrue' label argument. If "cond" is false, + control flows to the 'iffalse' label argument.

    +
    Example:
    -
    Test:
    %cond = icmp eq i32 %a, %b
    br i1 %cond, label %IfEqual, label %IfUnequal
    IfEqual:
    ret i32 1
    IfUnequal:
    ret i32 0
    +
    +Test:
    +  %cond = icmp eq i32 %a, %b
    +  br i1 %cond, label %IfEqual, label %IfUnequal
    +IfEqual:
    +  ret i32 1
    +IfUnequal:
    +  ret i32 0
    +
    +
    +
    -
    Syntax:
    +
    Syntax:
       switch <intty> <value>, label <defaultdest> [ <intty> <val>, label <dest> ... ]
     
    Overview:
    -

    The 'switch' instruction is used to transfer control flow to one of -several different places. It is a generalization of the 'br' -instruction, allowing a branch to occur to one of many possible -destinations.

    - + several different places. It is a generalization of the 'br' + instruction, allowing a branch to occur to one of many possible + destinations.

    Arguments:
    -

    The 'switch' instruction uses three parameters: an integer -comparison value 'value', a default 'label' destination, and -an array of pairs of comparison value constants and 'label's. The -table is not allowed to contain duplicate constant entries.

    + comparison value 'value', a default 'label' destination, + and an array of pairs of comparison value constants and 'label's. + The table is not allowed to contain duplicate constant entries.

    Semantics:
    -

    The switch instruction specifies a table of values and -destinations. When the 'switch' instruction is executed, this -table is searched for the given value. If the value is found, control flow is -transfered to the corresponding destination; otherwise, control flow is -transfered to the default destination.

    + destinations. When the 'switch' instruction is executed, this table + is searched for the given value. If the value is found, control flow is + transferred to the corresponding destination; otherwise, control flow is + transferred to the default destination.

    Implementation:
    -

    Depending on properties of the target machine and the particular -switch instruction, this instruction may be code generated in different -ways. For example, it could be generated as a series of chained conditional -branches or with a lookup table.

    + switch instruction, this instruction may be code generated in + different ways. For example, it could be generated as a series of chained + conditional branches or with a lookup table.

    Example:
    -
      ; Emulate a conditional br instruction
      %Val = zext i1 %value to i32
    @@ -2340,6 +2664,7 @@ branches or with a lookup table.

    i32 1, label %onone i32 2, label %ontwo ]
    +
    @@ -2350,79 +2675,72 @@ branches or with a lookup table.

    Syntax:
    -
       <result> = invoke [cconv] [ret attrs] <ptr to function ty> <function ptr val>(<function args>) [fn attrs]
                     to label <normal label> unwind label <exception label>
     
    Overview:
    -

    The 'invoke' instruction causes control to transfer to a specified -function, with the possibility of control flow transfer to either the -'normal' label or the -'exception' label. If the callee function returns with the -"ret" instruction, control flow will return to the -"normal" label. If the callee (or any indirect callees) returns with the "unwind" instruction, control is interrupted and -continued at the dynamically nearest "exception" label.

    + function, with the possibility of control flow transfer to either the + 'normal' label or the 'exception' label. If the callee + function returns with the "ret" instruction, + control flow will return to the "normal" label. If the callee (or any + indirect callees) returns with the "unwind" + instruction, control is interrupted and continued at the dynamically nearest + "exception" label.

    Arguments:
    -

    This instruction requires several arguments:

      -
    1. - The optional "cconv" marker indicates which calling - convention the call should use. If none is specified, the call defaults - to using C calling conventions. -
    2. +
    3. The optional "cconv" marker indicates which calling + convention the call should use. If none is specified, the call + defaults to using C calling conventions.
    4. The optional Parameter Attributes list for - return values. Only 'zeroext', 'signext', - and 'inreg' attributes are valid here.
    5. + return values. Only 'zeroext', 'signext', and + 'inreg' attributes are valid here.
    6. 'ptr to function ty': shall be the signature of the pointer to - function value being invoked. In most cases, this is a direct function - invocation, but indirect invokes are just as possible, branching off - an arbitrary pointer to function value. -
    7. + function value being invoked. In most cases, this is a direct function + invocation, but indirect invokes are just as possible, branching + off an arbitrary pointer to function value.
    8. 'function ptr val': An LLVM value containing a pointer to a - function to be invoked.
    9. + function to be invoked.
    10. 'function args': argument list whose types match the function - signature argument types. If the function signature indicates the function - accepts a variable number of arguments, the extra arguments can be - specified.
    11. + signature argument types. If the function signature indicates the + function accepts a variable number of arguments, the extra arguments can + be specified.
    12. 'normal label': the label reached when the called function - executes a 'ret' instruction.
    13. + executes a 'ret' instruction.
    14. 'exception label': the label reached when a callee returns with - the unwind instruction.
    15. + the unwind instruction.
    16. The optional function attributes list. Only - 'noreturn', 'nounwind', 'readonly' and - 'readnone' attributes are valid here.
    17. + 'noreturn', 'nounwind', 'readonly' and + 'readnone' attributes are valid here.
    Semantics:
    - -

    This instruction is designed to operate as a standard 'call' instruction in most regards. The primary -difference is that it establishes an association with a label, which is used by -the runtime library to unwind the stack.

    +

    This instruction is designed to operate as a standard + 'call' instruction in most regards. The + primary difference is that it establishes an association with a label, which + is used by the runtime library to unwind the stack.

    This instruction is used in languages with destructors to ensure that proper -cleanup is performed in the case of either a longjmp or a thrown -exception. Additionally, this is important for implementation of -'catch' clauses in high-level languages that support them.

    + cleanup is performed in the case of either a longjmp or a thrown + exception. Additionally, this is important for implementation of + 'catch' clauses in high-level languages that support them.

    -

    For the purposes of the SSA form, the definition of the value -returned by the 'invoke' instruction is deemed to occur on -the edge from the current block to the "normal" label. If the callee -unwinds then no return value is available.

    +

    For the purposes of the SSA form, the definition of the value returned by the + 'invoke' instruction is deemed to occur on the edge from the current + block to the "normal" label. If the callee unwinds then no return value is + available.

    Example:
    @@ -2431,8 +2749,8 @@ unwinds then no return value is available.

    %retval = invoke coldcc i32 %Testfnptr(i32 15) to label %Continue unwind label %TestCleanup ; {i32}:retval set
    -
    + @@ -2447,20 +2765,19 @@ Instruction
    Overview:
    -

    The 'unwind' instruction unwinds the stack, continuing control flow -at the first callee in the dynamic call stack which used an invoke instruction to perform the call. This is -primarily used to implement exception handling.

    + at the first callee in the dynamic call stack which used + an invoke instruction to perform the call. + This is primarily used to implement exception handling.

    Semantics:
    -

    The 'unwind' instruction causes execution of the current function to -immediately halt. The dynamic call stack is then searched for the first invoke instruction on the call stack. Once found, -execution continues at the "exceptional" destination block specified by the -invoke instruction. If there is no invoke instruction in the -dynamic call chain, undefined behavior results.

    + immediately halt. The dynamic call stack is then searched for the + first invoke instruction on the call stack. + Once found, execution continues at the "exceptional" destination block + specified by the invoke instruction. If there is no invoke + instruction in the dynamic call chain, undefined behavior results.

    + @@ -2476,29 +2793,31 @@ Instruction
    Overview:
    -

    The 'unreachable' instruction has no defined semantics. This -instruction is used to inform the optimizer that a particular portion of the -code is not reachable. This can be used to indicate that the code after a -no-return function cannot be reached, and other facts.

    + instruction is used to inform the optimizer that a particular portion of the + code is not reachable. This can be used to indicate that the code after a + no-return function cannot be reached, and other facts.

    Semantics:
    -

    The 'unreachable' instruction has no defined semantics.

    - - + +
    -

    Binary operators are used to do most of the computation in a -program. They require two operands of the same type, execute an operation on them, and -produce a single value. The operands might represent -multiple data, as is the case with the vector data type. -The result value has the same type as its operands.

    + +

    Binary operators are used to do most of the computation in a program. They + require two operands of the same type, execute an operation on them, and + produce a single value. The operands might represent multiple data, as is + the case with the vector data type. The result value + has the same type as its operands.

    +

    There are several different binary operators:

    +
    +
    'add' Instruction @@ -2507,39 +2826,42 @@ The result value has the same type as its operands.

    Syntax:
    -
    -  <result> = add <ty> <op1>, <op2>   ; yields {ty}:result
    +  <result> = add <ty> <op1>, <op2>          ; yields {ty}:result
    +  <result> = add nuw <ty> <op1>, <op2>      ; yields {ty}:result
    +  <result> = add nsw <ty> <op1>, <op2>      ; yields {ty}:result
    +  <result> = add nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
     
    Overview:
    -

    The 'add' instruction returns the sum of its two operands.

    Arguments:
    - -

    The two arguments to the 'add' instruction must be integer or - vector of integer values. Both arguments must - have identical types.

    +

    The two arguments to the 'add' instruction must + be integer or vector of + integer values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the integer sum of the two operands.

    -

    If the sum has unsigned overflow, the result returned is the -mathematical result modulo 2n, where n is the bit width of -the result.

    +

    If the sum has unsigned overflow, the result returned is the mathematical + result modulo 2n, where n is the bit width of the result.

    -

    Because LLVM integers use a two's complement representation, this -instruction is appropriate for both signed and unsigned integers.

    +

    Because LLVM integers use a two's complement representation, this instruction + is appropriate for both signed and unsigned integers.

    -
    Example:
    +

    nuw and nsw stand for "No Unsigned Wrap" + and "No Signed Wrap", respectively. If the nuw and/or + nsw keywords are present, the result value of the add + is undefined if unsigned and/or signed overflow, respectively, occurs.

    +
    Example:
       <result> = add i32 4, %var          ; yields {i32}:result = 4 + %var
     
    +
    +
    'fadd' Instruction @@ -2548,31 +2870,28 @@ instruction is appropriate for both signed and unsigned integers.

    Syntax:
    -
       <result> = fadd <ty> <op1>, <op2>   ; yields {ty}:result
     
    Overview:
    -

    The 'fadd' instruction returns the sum of its two operands.

    Arguments:
    -

    The two arguments to the 'fadd' instruction must be -floating point or vector of -floating point values. Both arguments must have identical types.

    + floating point or vector of + floating point values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the floating point sum of the two operands.

    Example:
    -
       <result> = fadd float 4.0, %var          ; yields {float}:result = 4.0 + %var
     
    +
    +
    'sub' Instruction @@ -2581,42 +2900,47 @@ floating point values. Both arguments must have identical types.

    Syntax:
    -
    -  <result> = sub <ty> <op1>, <op2>   ; yields {ty}:result
    +  <result> = sub <ty> <op1>, <op2>          ; yields {ty}:result
    +  <result> = sub nuw <ty> <op1>, <op2>      ; yields {ty}:result
    +  <result> = sub nsw <ty> <op1>, <op2>      ; yields {ty}:result
    +  <result> = sub nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
     
    Overview:
    -

    The 'sub' instruction returns the difference of its two -operands.

    + operands.

    Note that the 'sub' instruction is used to represent the -'neg' instruction present in most other intermediate -representations.

    + 'neg' instruction present in most other intermediate + representations.

    Arguments:
    - -

    The two arguments to the 'sub' instruction must be integer or vector of - integer values. Both arguments must have identical types.

    +

    The two arguments to the 'sub' instruction must + be integer or vector of + integer values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the integer difference of the two operands.

    If the difference has unsigned overflow, the result returned is the -mathematical result modulo 2n, where n is the bit width of -the result.

    + mathematical result modulo 2n, where n is the bit width of the + result.

    -

    Because LLVM integers use a two's complement representation, this -instruction is appropriate for both signed and unsigned integers.

    +

    Because LLVM integers use a two's complement representation, this instruction + is appropriate for both signed and unsigned integers.

    + +

    nuw and nsw stand for "No Unsigned Wrap" + and "No Signed Wrap", respectively. If the nuw and/or + nsw keywords are present, the result value of the sub + is undefined if unsigned and/or signed overflow, respectively, occurs.

    Example:
       <result> = sub i32 4, %var          ; yields {i32}:result = 4 - %var
       <result> = sub i32 0, %val          ; yields {i32}:result = -%var
     
    +
    @@ -2627,28 +2951,24 @@ instruction is appropriate for both signed and unsigned integers.

    Syntax:
    -
       <result> = fsub <ty> <op1>, <op2>   ; yields {ty}:result
     
    Overview:
    -

    The 'fsub' instruction returns the difference of its two -operands.

    + operands.

    Note that the 'fsub' instruction is used to represent the -'fneg' instruction present in most other intermediate -representations.

    + 'fneg' instruction present in most other intermediate + representations.

    Arguments:
    - -

    The two arguments to the 'fsub' instruction must be floating point or vector - of floating point values. Both arguments must have identical types.

    +

    The two arguments to the 'fsub' instruction must be + floating point or vector of + floating point values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the floating point difference of the two operands.

    Example:
    @@ -2656,6 +2976,7 @@ representations.

    <result> = fsub float 4.0, %var ; yields {float}:result = 4.0 - %var <result> = fsub float -0.0, %val ; yields {float}:result = -%var +
    @@ -2666,34 +2987,45 @@ representations.

    Syntax:
    -
      <result> = mul <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = mul <ty> <op1>, <op2>          ; yields {ty}:result
    +  <result> = mul nuw <ty> <op1>, <op2>      ; yields {ty}:result
    +  <result> = mul nsw <ty> <op1>, <op2>      ; yields {ty}:result
    +  <result> = mul nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
     
    +
    Overview:
    -

    The 'mul' instruction returns the product of its two -operands.

    +

    The 'mul' instruction returns the product of its two operands.

    Arguments:
    - -

    The two arguments to the 'mul' instruction must be integer or vector of integer -values. Both arguments must have identical types.

    +

    The two arguments to the 'mul' instruction must + be integer or vector of + integer values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the integer product of the two operands.

    -

    If the result of the multiplication has unsigned overflow, -the result returned is the mathematical result modulo -2n, where n is the bit width of the result.

    -

    Because LLVM integers use a two's complement representation, and the -result is the same width as the operands, this instruction returns the -correct result for both signed and unsigned integers. If a full product -(e.g. i32xi32->i64) is needed, the operands -should be sign-extended or zero-extended as appropriate to the -width of the full product.

    +

    If the result of the multiplication has unsigned overflow, the result + returned is the mathematical result modulo 2n, where n is the bit + width of the result.

    + +

    Because LLVM integers use a two's complement representation, and the result + is the same width as the operands, this instruction returns the correct + result for both signed and unsigned integers. If a full product + (e.g. i32xi32->i64) is needed, the operands should + be sign-extended or zero-extended as appropriate to the width of the full + product.

    + +

    nuw and nsw stand for "No Unsigned Wrap" + and "No Signed Wrap", respectively. If the nuw and/or + nsw keywords are present, the result value of the mul + is undefined if unsigned and/or signed overflow, respectively, occurs.

    +
    Example:
    -
      <result> = mul i32 4, %var          ; yields {i32}:result = 4 * %var
    +
    +  <result> = mul i32 4, %var          ; yields {i32}:result = 4 * %var
     
    +
    @@ -2704,140 +3036,170 @@ width of the full product.

    Syntax:
    -
      <result> = fmul <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = fmul <ty> <op1>, <op2>   ; yields {ty}:result
     
    +
    Overview:
    -

    The 'fmul' instruction returns the product of its two -operands.

    +

    The 'fmul' instruction returns the product of its two operands.

    Arguments:
    -

    The two arguments to the 'fmul' instruction must be -floating point or vector -of floating point values. Both arguments must have identical types.

    + floating point or vector of + floating point values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the floating point product of the two operands.

    Example:
    -
      <result> = fmul float 4.0, %var          ; yields {float}:result = 4.0 * %var
    +
    +  <result> = fmul float 4.0, %var          ; yields {float}:result = 4.0 * %var
     
    +
    +
    +
    Syntax:
    -
      <result> = udiv <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = udiv <ty> <op1>, <op2>   ; yields {ty}:result
     
    +
    Overview:
    -

    The 'udiv' instruction returns the quotient of its two -operands.

    +

    The 'udiv' instruction returns the quotient of its two operands.

    Arguments:
    -

    The two arguments to the 'udiv' instruction must be -integer or vector of integer -values. Both arguments must have identical types.

    + integer or vector of integer + values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the unsigned integer quotient of the two operands.

    +

    Note that unsigned integer division and signed integer division are distinct -operations; for signed integer division, use 'sdiv'.

    + operations; for signed integer division, use 'sdiv'.

    +

    Division by zero leads to undefined behavior.

    +
    Example:
    -
      <result> = udiv i32 4, %var          ; yields {i32}:result = 4 / %var
    +
    +  <result> = udiv i32 4, %var          ; yields {i32}:result = 4 / %var
     
    +
    + +
    +
    Syntax:
    -  <result> = sdiv <ty> <op1>, <op2>   ; yields {ty}:result
    +  <result> = sdiv <ty> <op1>, <op2>         ; yields {ty}:result
    +  <result> = sdiv exact <ty> <op1>, <op2>   ; yields {ty}:result
     
    Overview:
    - -

    The 'sdiv' instruction returns the quotient of its two -operands.

    +

    The 'sdiv' instruction returns the quotient of its two operands.

    Arguments:
    -

    The two arguments to the 'sdiv' instruction must be -integer or vector of integer -values. Both arguments must have identical types.

    + integer or vector of integer + values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the signed integer quotient of the two operands rounded towards zero.

    +

    The value produced is the signed integer quotient of the two operands rounded + towards zero.

    +

    Note that signed integer division and unsigned integer division are distinct -operations; for unsigned integer division, use 'udiv'.

    + operations; for unsigned integer division, use 'udiv'.

    +

    Division by zero leads to undefined behavior. Overflow also leads to -undefined behavior; this is a rare case, but can occur, for example, -by doing a 32-bit division of -2147483648 by -1.

    + undefined behavior; this is a rare case, but can occur, for example, by doing + a 32-bit division of -2147483648 by -1.

    + +

    If the exact keyword is present, the result value of the + sdiv is undefined if the result would be rounded or if overflow + would occur.

    +
    Example:
    -
      <result> = sdiv i32 4, %var          ; yields {i32}:result = 4 / %var
    +
    +  <result> = sdiv i32 4, %var          ; yields {i32}:result = 4 / %var
     
    +
    + +
    +
    Syntax:
       <result> = fdiv <ty> <op1>, <op2>   ; yields {ty}:result
     
    -
    Overview:
    -

    The 'fdiv' instruction returns the quotient of its two -operands.

    +
    Overview:
    +

    The 'fdiv' instruction returns the quotient of its two operands.

    Arguments:
    -

    The two arguments to the 'fdiv' instruction must be -floating point or vector -of floating point values. Both arguments must have identical types.

    + floating point or vector of + floating point values. Both arguments must have identical types.

    Semantics:
    -

    The value produced is the floating point quotient of the two operands.

    Example:
    -
       <result> = fdiv float 4.0, %var          ; yields {float}:result = 4.0 / %var
     
    +
    +
    +
    Syntax:
    -
      <result> = urem <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = urem <ty> <op1>, <op2>   ; yields {ty}:result
     
    +
    Overview:
    -

    The 'urem' instruction returns the remainder from the -unsigned division of its two arguments.

    +

    The 'urem' instruction returns the remainder from the unsigned + division of its two arguments.

    +
    Arguments:

    The two arguments to the 'urem' instruction must be -integer or vector of integer -values. Both arguments must have identical types.

    + integer or vector of integer + values. Both arguments must have identical types.

    +
    Semantics:

    This instruction returns the unsigned integer remainder of a division. -This instruction always performs an unsigned division to get the remainder.

    + This instruction always performs an unsigned division to get the + remainder.

    +

    Note that unsigned integer remainder and signed integer remainder are -distinct operations; for signed integer remainder, use 'srem'.

    + distinct operations; for signed integer remainder, use 'srem'.

    +

    Taking the remainder of a division by zero leads to undefined behavior.

    +
    Example:
    -
      <result> = urem i32 4, %var          ; yields {i32}:result = 4 % %var
    +
    +  <result> = urem i32 4, %var          ; yields {i32}:result = 4 % %var
     
    +
    'srem' Instruction @@ -2846,47 +3208,48 @@ distinct operations; for signed integer remainder, use 'srem'.

    Syntax:
    -
       <result> = srem <ty> <op1>, <op2>   ; yields {ty}:result
     
    Overview:
    - -

    The 'srem' instruction returns the remainder from the -signed division of its two operands. This instruction can also take -vector versions of the values in which case -the elements must be integers.

    +

    The 'srem' instruction returns the remainder from the signed + division of its two operands. This instruction can also take + vector versions of the values in which case the + elements must be integers.

    Arguments:
    -

    The two arguments to the 'srem' instruction must be -integer or vector of integer -values. Both arguments must have identical types.

    + integer or vector of integer + values. Both arguments must have identical types.

    Semantics:
    -

    This instruction returns the remainder of a division (where the result -has the same sign as the dividend, op1), not the modulo -operator (where the result has the same sign as the divisor, op2) of -a value. For more information about the difference, see The -Math Forum. For a table of how this is implemented in various languages, -please see -Wikipedia: modulo operation.

    + has the same sign as the dividend, op1), not the modulo + operator (where the result has the same sign as the divisor, op2) of + a value. For more information about the difference, + see The + Math Forum. For a table of how this is implemented in various languages, + please see + Wikipedia: modulo operation.

    +

    Note that signed integer remainder and unsigned integer remainder are -distinct operations; for unsigned integer remainder, use 'urem'.

    + distinct operations; for unsigned integer remainder, use 'urem'.

    +

    Taking the remainder of a division by zero leads to undefined behavior. -Overflow also leads to undefined behavior; this is a rare case, but can occur, -for example, by taking the remainder of a 32-bit division of -2147483648 by -1. -(The remainder doesn't actually overflow, but this rule lets srem be -implemented using instructions that return both the result of the division -and the remainder.)

    + Overflow also leads to undefined behavior; this is a rare case, but can + occur, for example, by taking the remainder of a 32-bit division of + -2147483648 by -1. (The remainder doesn't actually overflow, but this rule + lets srem be implemented using instructions that return both the result of + the division and the remainder.)

    +
    Example:
    -
      <result> = srem i32 4, %var          ; yields {i32}:result = 4 % %var
    +
    +  <result> = srem i32 4, %var          ; yields {i32}:result = 4 % %var
     
    + @@ -2894,99 +3257,110 @@ and the remainder.)

    Syntax:
    -
      <result> = frem <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = frem <ty> <op1>, <op2>   ; yields {ty}:result
     
    +
    Overview:
    -

    The 'frem' instruction returns the remainder from the -division of its two operands.

    +

    The 'frem' instruction returns the remainder from the division of + its two operands.

    +
    Arguments:

    The two arguments to the 'frem' instruction must be -floating point or vector -of floating point values. Both arguments must have identical types.

    + floating point or vector of + floating point values. Both arguments must have identical types.

    Semantics:
    - -

    This instruction returns the remainder of a division. -The remainder has the same sign as the dividend.

    +

    This instruction returns the remainder of a division. The remainder + has the same sign as the dividend.

    Example:
    -
       <result> = frem float 4.0, %var          ; yields {float}:result = 4.0 % %var
     
    +
    +
    -

    Bitwise binary operators are used to do various forms of -bit-twiddling in a program. They are generally very efficient -instructions and can commonly be strength reduced from other -instructions. They require two operands of the same type, execute an operation on them, -and produce a single value. The resulting value is the same type as its operands.

    + +

    Bitwise binary operators are used to do various forms of bit-twiddling in a + program. They are generally very efficient instructions and can commonly be + strength reduced from other instructions. They require two operands of the + same type, execute an operation on them, and produce a single value. The + resulting value is the same type as its operands.

    +
    +
    +
    Syntax:
    -
      <result> = shl <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = shl <ty> <op1>, <op2>   ; yields {ty}:result
     
    Overview:
    - -

    The 'shl' instruction returns the first operand shifted to -the left a specified number of bits.

    +

    The 'shl' instruction returns the first operand shifted to the left + a specified number of bits.

    Arguments:
    - -

    Both arguments to the 'shl' instruction must be the same integer or vector of integer -type. 'op2' is treated as an unsigned value.

    +

    Both arguments to the 'shl' instruction must be the + same integer or vector of + integer type. 'op2' is treated as an unsigned value.

    Semantics:
    +

    The value produced is op1 * 2op2 mod + 2n, where n is the width of the result. If op2 + is (statically or dynamically) negative or equal to or larger than the number + of bits in op1, the result is undefined. If the arguments are + vectors, each vector element of op1 is shifted by the corresponding + shift amount in op2.

    -

    The value produced is op1 * 2op2 mod 2n, -where n is the width of the result. If op2 is (statically or dynamically) negative or -equal to or larger than the number of bits in op1, the result is undefined. -If the arguments are vectors, each vector element of op1 is shifted by the -corresponding shift amount in op2.

    - -
    Example:
    +
    Example:
    +
       <result> = shl i32 4, %var   ; yields {i32}: 4 << %var
       <result> = shl i32 4, 2      ; yields {i32}: 16
       <result> = shl i32 1, 10     ; yields {i32}: 1024
       <result> = shl i32 1, 32     ; undefined
       <result> = shl <2 x i32> < i32 1, i32 1>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 2, i32 4>
     
    +
    + +
    +
    Syntax:
    -
      <result> = lshr <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = lshr <ty> <op1>, <op2>   ; yields {ty}:result
     
    Overview:
    -

    The 'lshr' instruction (logical shift right) returns the first -operand shifted to the right a specified number of bits with zero fill.

    +

    The 'lshr' instruction (logical shift right) returns the first + operand shifted to the right a specified number of bits with zero fill.

    Arguments:

    Both arguments to the 'lshr' instruction must be the same -integer or vector of integer -type. 'op2' is treated as an unsigned value.

    + integer or vector of integer + type. 'op2' is treated as an unsigned value.

    Semantics:
    -

    This instruction always performs a logical shift right operation. The most -significant bits of the result will be filled with zero bits after the -shift. If op2 is (statically or dynamically) equal to or larger than -the number of bits in op1, the result is undefined. If the arguments are -vectors, each vector element of op1 is shifted by the corresponding shift -amount in op2.

    + significant bits of the result will be filled with zero bits after the shift. + If op2 is (statically or dynamically) equal to or larger than the + number of bits in op1, the result is undefined. If the arguments are + vectors, each vector element of op1 is shifted by the corresponding + shift amount in op2.

    Example:
    @@ -2997,6 +3371,7 @@ amount in op2.

    <result> = lshr i32 1, 32 ; undefined <result> = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2> ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1>
    +
    @@ -3005,25 +3380,27 @@ Instruction
    Syntax:
    -
      <result> = ashr <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = ashr <ty> <op1>, <op2>   ; yields {ty}:result
     
    Overview:
    -

    The 'ashr' instruction (arithmetic shift right) returns the first -operand shifted to the right a specified number of bits with sign extension.

    +

    The 'ashr' instruction (arithmetic shift right) returns the first + operand shifted to the right a specified number of bits with sign + extension.

    Arguments:

    Both arguments to the 'ashr' instruction must be the same -integer or vector of integer -type. 'op2' is treated as an unsigned value.

    + integer or vector of integer + type. 'op2' is treated as an unsigned value.

    Semantics:
    -

    This instruction always performs an arithmetic shift right operation, -The most significant bits of the result will be filled with the sign bit -of op1. If op2 is (statically or dynamically) equal to or -larger than the number of bits in op1, the result is undefined. If the -arguments are vectors, each vector element of op1 is shifted by the -corresponding shift amount in op2.

    +

    This instruction always performs an arithmetic shift right operation, The + most significant bits of the result will be filled with the sign bit + of op1. If op2 is (statically or dynamically) equal to or + larger than the number of bits in op1, the result is undefined. If + the arguments are vectors, each vector element of op1 is shifted by + the corresponding shift amount in op2.

    Example:
    @@ -3034,6 +3411,7 @@ corresponding shift amount in op2.

    <result> = ashr i32 1, 32 ; undefined <result> = ashr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 3> ; yields: result=<2 x i32> < i32 -1, i32 0>
    +
    @@ -3043,26 +3421,22 @@ Instruction
    Syntax:
    -
       <result> = and <ty> <op1>, <op2>   ; yields {ty}:result
     
    Overview:
    - -

    The 'and' instruction returns the bitwise logical and of -its two operands.

    +

    The 'and' instruction returns the bitwise logical and of its two + operands.

    Arguments:
    -

    The two arguments to the 'and' instruction must be -integer or vector of integer -values. Both arguments must have identical types.

    + integer or vector of integer + values. Both arguments must have identical types.

    Semantics:

    The truth table used for the 'and' instruction is:

    -

    -
    + @@ -3092,7 +3466,7 @@ values. Both arguments must have identical types.

    -
    +
    Example:
       <result> = and i32 4, %var         ; yields {i32}:result = 4 & %var
    @@ -3102,22 +3476,26 @@ values.  Both arguments must have identical types.

    +
    +
    Syntax:
    -
      <result> = or <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = or <ty> <op1>, <op2>   ; yields {ty}:result
     
    +
    Overview:
    -

    The 'or' instruction returns the bitwise logical inclusive -or of its two operands.

    -
    Arguments:
    +

    The 'or' instruction returns the bitwise logical inclusive or of its + two operands.

    +
    Arguments:

    The two arguments to the 'or' instruction must be -integer or vector of integer -values. Both arguments must have identical types.

    + integer or vector of integer + values. Both arguments must have identical types.

    +
    Semantics:

    The truth table used for the 'or' instruction is:

    -

    -
    + @@ -3147,34 +3525,40 @@ values. Both arguments must have identical types.

    -
    +
    Example:
    -
      <result> = or i32 4, %var         ; yields {i32}:result = 4 | %var
    +
    +  <result> = or i32 4, %var         ; yields {i32}:result = 4 | %var
       <result> = or i32 15, 40          ; yields {i32}:result = 47
       <result> = or i32 4, 8            ; yields {i32}:result = 12
     
    +
    + +
    +
    Syntax:
    -
      <result> = xor <ty> <op1>, <op2>   ; yields {ty}:result
    +
    +  <result> = xor <ty> <op1>, <op2>   ; yields {ty}:result
     
    +
    Overview:
    -

    The 'xor' instruction returns the bitwise logical exclusive -or of its two operands. The xor is used to implement the -"one's complement" operation, which is the "~" operator in C.

    +

    The 'xor' instruction returns the bitwise logical exclusive or of + its two operands. The xor is used to implement the "one's + complement" operation, which is the "~" operator in C.

    +
    Arguments:

    The two arguments to the 'xor' instruction must be -integer or vector of integer -values. Both arguments must have identical types.

    + integer or vector of integer + values. Both arguments must have identical types.

    Semantics:
    -

    The truth table used for the 'xor' instruction is:

    -

    -
    + @@ -3204,14 +3588,15 @@ values. Both arguments must have identical types.

    -
    -

    +
    Example:
    -
      <result> = xor i32 4, %var         ; yields {i32}:result = 4 ^ %var
    +
    +  <result> = xor i32 4, %var         ; yields {i32}:result = 4 ^ %var
       <result> = xor i32 15, 40          ; yields {i32}:result = 39
       <result> = xor i32 4, 8            ; yields {i32}:result = 12
       <result> = xor i32 %V, -1          ; yields {i32}:result = ~%V
     
    +
    @@ -3222,11 +3607,11 @@ values. Both arguments must have identical types.

    LLVM supports several instructions to represent vector operations in a -target-independent manner. These instructions cover the element-access and -vector-specific operations needed to process vectors effectively. While LLVM -does directly support these vector operations, many sophisticated algorithms -will want to use target-specific intrinsics to take full advantage of a specific -target.

    + target-independent manner. These instructions cover the element-access and + vector-specific operations needed to process vectors effectively. While LLVM + does directly support these vector operations, many sophisticated algorithms + will want to use target-specific intrinsics to take full advantage of a + specific target.

    @@ -3238,43 +3623,33 @@ target.

    Syntax:
    -
       <result> = extractelement <n x <ty>> <val>, i32 <idx>    ; yields <ty>
     
    Overview:
    - -

    -The 'extractelement' instruction extracts a single scalar -element from a vector at a specified index. -

    +

    The 'extractelement' instruction extracts a single scalar element + from a vector at a specified index.

    Arguments:
    - -

    -The first operand of an 'extractelement' instruction is a -value of vector type. The second operand is -an index indicating the position from which to extract the element. -The index may be a variable.

    +

    The first operand of an 'extractelement' instruction is a value + of vector type. The second operand is an index + indicating the position from which to extract the element. The index may be + a variable.

    Semantics:
    - -

    -The result is a scalar of the same type as the element type of -val. Its value is the value at position idx of -val. If idx exceeds the length of val, the -results are undefined. -

    +

    The result is a scalar of the same type as the element type of + val. Its value is the value at position idx of + val. If idx exceeds the length of val, the + results are undefined.

    Example:
    -
       %result = extractelement <4 x i32> %vec, i32 0    ; yields i32
     
    -
    +
    @@ -3284,42 +3659,32 @@ results are undefined.
    Syntax:
    -
       <result> = insertelement <n x <ty>> <val>, <ty> <elt>, i32 <idx>    ; yields <n x <ty>>
     
    Overview:
    - -

    -The 'insertelement' instruction inserts a scalar -element into a vector at a specified index. -

    - +

    The 'insertelement' instruction inserts a scalar element into a + vector at a specified index.

    Arguments:
    - -

    -The first operand of an 'insertelement' instruction is a -value of vector type. The second operand is a -scalar value whose type must equal the element type of the first -operand. The third operand is an index indicating the position at -which to insert the value. The index may be a variable.

    +

    The first operand of an 'insertelement' instruction is a value + of vector type. The second operand is a scalar value + whose type must equal the element type of the first operand. The third + operand is an index indicating the position at which to insert the value. + The index may be a variable.

    Semantics:
    - -

    -The result is a vector of the same type as val. Its -element values are those of val except at position -idx, where it gets the value elt. If idx -exceeds the length of val, the results are undefined. -

    +

    The result is a vector of the same type as val. Its element values + are those of val except at position idx, where it gets the + value elt. If idx exceeds the length of val, the + results are undefined.

    Example:
    -
       %result = insertelement <4 x i32> %vec, i32 1, i32 0    ; yields <4 x i32>
     
    +
    @@ -3330,46 +3695,33 @@ exceeds the length of val, the results are undefined.
    Syntax:
    -
       <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask>    ; yields <m x <ty>>
     
    Overview:
    - -

    -The 'shufflevector' instruction constructs a permutation of elements -from two input vectors, returning a vector with the same element type as -the input and length that is the same as the shuffle mask. -

    +

    The 'shufflevector' instruction constructs a permutation of elements + from two input vectors, returning a vector with the same element type as the + input and length that is the same as the shuffle mask.

    Arguments:
    +

    The first two operands of a 'shufflevector' instruction are vectors + with types that match each other. The third argument is a shuffle mask whose + element type is always 'i32'. The result of the instruction is a vector + whose length is the same as the shuffle mask and whose element type is the + same as the element type of the first two operands.

    -

    -The first two operands of a 'shufflevector' instruction are vectors -with types that match each other. The third argument is a shuffle mask whose -element type is always 'i32'. The result of the instruction is a vector whose -length is the same as the shuffle mask and whose element type is the same as -the element type of the first two operands. -

    - -

    -The shuffle mask operand is required to be a constant vector with either -constant integer or undef values. -

    +

    The shuffle mask operand is required to be a constant vector with either + constant integer or undef values.

    Semantics:
    - -

    -The elements of the two input vectors are numbered from left to right across -both of the vectors. The shuffle mask operand specifies, for each element of -the result vector, which element of the two input vectors the result element -gets. The element selector may be undef (meaning "don't care") and the second -operand may be undef if performing a shuffle from only one vector. -

    +

    The elements of the two input vectors are numbered from left to right across + both of the vectors. The shuffle mask operand specifies, for each element of + the result vector, which element of the two input vectors the result element + gets. The element selector may be undef (meaning "don't care") and the + second operand may be undef if performing a shuffle from only one vector.

    Example:
    -
       %result = shufflevector <4 x i32> %v1, <4 x i32> %v2, 
                               <4 x i32> <i32 0, i32 4, i32 1, i32 5>  ; yields <4 x i32>
    @@ -3380,8 +3732,8 @@ operand may be undef if performing a shuffle from only one vector.
       %result = shufflevector <4 x i32> %v1, <4 x i32> %v2, 
                               <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >  ; yields <8 x i32>
     
    -
    +
    @@ -3390,8 +3742,7 @@ operand may be undef if performing a shuffle from only one vector.
    -

    LLVM supports several instructions for working with aggregate values. -

    +

    LLVM supports several instructions for working with aggregate values.

    @@ -3403,43 +3754,31 @@ operand may be undef if performing a shuffle from only one vector.
    Syntax:
    -
       <result> = extractvalue <aggregate type> <val>, <idx>{, <idx>}*
     
    Overview:
    - -

    -The 'extractvalue' instruction extracts the value of a struct field -or array element from an aggregate value. -

    - +

    The 'extractvalue' instruction extracts the value of a struct field + or array element from an aggregate value.

    Arguments:
    - -

    -The first operand of an 'extractvalue' instruction is a -value of struct or array -type. The operands are constant indices to specify which value to extract -in a similar manner as indices in a -'getelementptr' instruction. -

    +

    The first operand of an 'extractvalue' instruction is a value + of struct or array type. The + operands are constant indices to specify which value to extract in a similar + manner as indices in a + 'getelementptr' instruction.

    Semantics:
    - -

    -The result is the value at the position in the aggregate specified by -the index operands. -

    +

    The result is the value at the position in the aggregate specified by the + index operands.

    Example:
    -
       %result = extractvalue {i32, float} %agg, 0    ; yields i32
     
    -
    +
    @@ -3449,46 +3788,35 @@ the index operands.
    Syntax:
    -
       <result> = insertvalue <aggregate type> <val>, <ty> <val>, <idx>    ; yields <n x <ty>>
     
    Overview:
    - -

    -The 'insertvalue' instruction inserts a value -into a struct field or array element in an aggregate. -

    +

    The 'insertvalue' instruction inserts a value into a struct field or + array element in an aggregate.

    Arguments:
    - -

    -The first operand of an 'insertvalue' instruction is a -value of struct or array type. -The second operand is a first-class value to insert. -The following operands are constant indices -indicating the position at which to insert the value in a similar manner as -indices in a -'getelementptr' instruction. -The value to insert must have the same type as the value identified -by the indices. -

    +

    The first operand of an 'insertvalue' instruction is a value + of struct or array type. The + second operand is a first-class value to insert. The following operands are + constant indices indicating the position at which to insert the value in a + similar manner as indices in a + 'getelementptr' instruction. The + value to insert must have the same type as the value identified by the + indices.

    Semantics:
    - -

    -The result is an aggregate of the same type as val. Its -value is that of val except that the value at the position -specified by the indices is that of elt. -

    +

    The result is an aggregate of the same type as val. Its value is + that of val except that the value at the position specified by the + indices is that of elt.

    Example:
    -
       %result = insertvalue {i32, float} %agg, i32 1, 0    ; yields {i32, float}
     
    +
    @@ -3499,10 +3827,10 @@ specified by the indices is that of elt.
    -

    A key design point of an SSA-based representation is how it -represents memory. In LLVM, no memory locations are in SSA form, which -makes things very simple. This section describes how to read, write, -allocate, and free memory in LLVM.

    +

    A key design point of an SSA-based representation is how it represents + memory. In LLVM, no memory locations are in SSA form, which makes things + very simple. This section describes how to read, write, allocate, and free + memory in LLVM.

    @@ -3514,39 +3842,33 @@ allocate, and free memory in LLVM.

    Syntax:
    -
       <result> = malloc <type>[, i32 <NumElements>][, align <alignment>]     ; yields {type*}:result
     
    Overview:
    - -

    The 'malloc' instruction allocates memory from the system -heap and returns a pointer to it. The object is always allocated in the generic -address space (address space zero).

    +

    The 'malloc' instruction allocates memory from the system heap and + returns a pointer to it. The object is always allocated in the generic + address space (address space zero).

    Arguments:
    -

    The 'malloc' instruction allocates -sizeof(<type>)*NumElements -bytes of memory from the operating system and returns a pointer of the -appropriate type to the program. If "NumElements" is specified, it is the -number of elements allocated, otherwise "NumElements" is defaulted to be one. -If a constant alignment is specified, the value result of the allocation is -guaranteed to be aligned to at least that boundary. If not specified, or if -zero, the target can choose to align the allocation on any convenient boundary -compatible with the type.

    + sizeof(<type>)*NumElements bytes of memory from the operating + system and returns a pointer of the appropriate type to the program. If + "NumElements" is specified, it is the number of elements allocated, otherwise + "NumElements" is defaulted to be one. If a constant alignment is specified, + the value result of the allocation is guaranteed to be aligned to at least + that boundary. If not specified, or if zero, the target can choose to align + the allocation on any convenient boundary compatible with the type.

    'type' must be a sized type.

    Semantics:
    - -

    Memory is allocated using the system "malloc" function, and -a pointer is returned. The result of a zero byte allocation is undefined. The -result is null if there is insufficient memory available.

    +

    Memory is allocated using the system "malloc" function, and a + pointer is returned. The result of a zero byte allocation is undefined. The + result is null if there is insufficient memory available.

    Example:
    -
       %array  = malloc [4 x i8]                     ; yields {[%4 x i8]*}:array
     
    @@ -3557,8 +3879,7 @@ result is null if there is insufficient memory available.

    %array4 = malloc i32, align 1024 ; yields {i32*}:array4
    -

    Note that the code generator does not yet respect the - alignment value.

    +

    Note that the code generator does not yet respect the alignment value.

    @@ -3570,34 +3891,29 @@ result is null if there is insufficient memory available.

    Syntax:
    -
       free <type> <value>                           ; yields {void}
     
    Overview:
    - -

    The 'free' instruction returns memory back to the unused -memory heap to be reallocated in the future.

    +

    The 'free' instruction returns memory back to the unused memory heap + to be reallocated in the future.

    Arguments:
    - -

    'value' shall be a pointer value that points to a value -that was allocated with the 'malloc' -instruction.

    +

    'value' shall be a pointer value that points to a value that was + allocated with the 'malloc' instruction.

    Semantics:
    - -

    Access to the memory pointed to by the pointer is no longer defined -after this instruction executes. If the pointer is null, the operation -is a noop.

    +

    Access to the memory pointed to by the pointer is no longer defined after + this instruction executes. If the pointer is null, the operation is a + noop.

    Example:
    -
       %array  = malloc [4 x i8]                     ; yields {[4 x i8]*}:array
                 free   [4 x i8]* %array
     
    +
    @@ -3608,137 +3924,150 @@ is a noop.

    Syntax:
    -
       <result> = alloca <type>[, i32 <NumElements>][, align <alignment>]     ; yields {type*}:result
     
    Overview:
    -

    The 'alloca' instruction allocates memory on the stack frame of the -currently executing function, to be automatically released when this function -returns to its caller. The object is always allocated in the generic address -space (address space zero).

    + currently executing function, to be automatically released when this function + returns to its caller. The object is always allocated in the generic address + space (address space zero).

    Arguments:
    - -

    The 'alloca' instruction allocates sizeof(<type>)*NumElements -bytes of memory on the runtime stack, returning a pointer of the -appropriate type to the program. If "NumElements" is specified, it is the -number of elements allocated, otherwise "NumElements" is defaulted to be one. -If a constant alignment is specified, the value result of the allocation is -guaranteed to be aligned to at least that boundary. If not specified, or if -zero, the target can choose to align the allocation on any convenient boundary -compatible with the type.

    +

    The 'alloca' instruction + allocates sizeof(<type>)*NumElements bytes of memory on the + runtime stack, returning a pointer of the appropriate type to the program. + If "NumElements" is specified, it is the number of elements allocated, + otherwise "NumElements" is defaulted to be one. If a constant alignment is + specified, the value result of the allocation is guaranteed to be aligned to + at least that boundary. If not specified, or if zero, the target can choose + to align the allocation on any convenient boundary compatible with the + type.

    'type' may be any sized type.

    Semantics:
    -

    Memory is allocated; a pointer is returned. The operation is undefined if -there is insufficient stack space for the allocation. 'alloca'd -memory is automatically released when the function returns. The 'alloca' -instruction is commonly used to represent automatic variables that must -have an address available. When the function returns (either with the ret or unwind -instructions), the memory is reclaimed. Allocating zero bytes -is legal, but the result is undefined.

    + there is insufficient stack space for the allocation. 'alloca'd + memory is automatically released when the function returns. The + 'alloca' instruction is commonly used to represent automatic + variables that must have an address available. When the function returns + (either with the ret + or unwind instructions), the memory is + reclaimed. Allocating zero bytes is legal, but the result is undefined.

    Example:
    -
       %ptr = alloca i32                             ; yields {i32*}:ptr
       %ptr = alloca i32, i32 4                      ; yields {i32*}:ptr
       %ptr = alloca i32, i32 4, align 1024          ; yields {i32*}:ptr
       %ptr = alloca i32, align 1024                 ; yields {i32*}:ptr
     
    +
    +
    +
    Syntax:
    -
      <result> = load <ty>* <pointer>[, align <alignment>]
    <result> = volatile load <ty>* <pointer>[, align <alignment>]
    +
    +  <result> = load <ty>* <pointer>[, align <alignment>]
    +  <result> = volatile load <ty>* <pointer>[, align <alignment>]
    +
    +
    Overview:

    The 'load' instruction is used to read from memory.

    +
    Arguments:
    -

    The argument to the 'load' instruction specifies the memory -address from which to load. The pointer must point to a first class type. If the load is -marked as volatile, then the optimizer is not allowed to modify -the number or order of execution of this load with other -volatile load and store -instructions.

    -

    -The optional constant "align" argument specifies the alignment of the operation -(that is, the alignment of the memory address). A value of 0 or an -omitted "align" argument means that the operation has the preferential -alignment for the target. It is the responsibility of the code emitter -to ensure that the alignment information is correct. Overestimating -the alignment results in an undefined behavior. Underestimating the -alignment may produce less efficient code. An alignment of 1 is always -safe. -

    +

    The argument to the 'load' instruction specifies the memory address + from which to load. The pointer must point to + a first class type. If the load is + marked as volatile, then the optimizer is not allowed to modify the + number or order of execution of this load with other + volatile load and store + instructions.

    + +

    The optional constant "align" argument specifies the alignment of the + operation (that is, the alignment of the memory address). A value of 0 or an + omitted "align" argument means that the operation has the preferential + alignment for the target. It is the responsibility of the code emitter to + ensure that the alignment information is correct. Overestimating the + alignment results in an undefined behavior. Underestimating the alignment may + produce less efficient code. An alignment of 1 is always safe.

    +
    Semantics:
    -

    The location of memory pointed to is loaded. If the value being loaded -is of scalar type then the number of bytes read does not exceed the minimum -number of bytes needed to hold all bits of the type. For example, loading an -i24 reads at most three bytes. When loading a value of a type like -i20 with a size that is not an integral number of bytes, the result -is undefined if the value was not originally written using a store of the -same type.

    +

    The location of memory pointed to is loaded. If the value being loaded is of + scalar type then the number of bytes read does not exceed the minimum number + of bytes needed to hold all bits of the type. For example, loading an + i24 reads at most three bytes. When loading a value of a type like + i20 with a size that is not an integral number of bytes, the result + is undefined if the value was not originally written using a store of the + same type.

    +
    Examples:
    -
      %ptr = alloca i32                               ; yields {i32*}:ptr
    -  store i32 3, i32* %ptr                          ; yields {void}
    +
    +  %ptr = alloca i32                               ; yields {i32*}:ptr
    +  store i32 3, i32* %ptr                          ; yields {void}
       %val = load i32* %ptr                           ; yields {i32}:val = i32 3
     
    +
    + +
    +
    Syntax:
    -
      store <ty> <value>, <ty>* <pointer>[, align <alignment>]                   ; yields {void}
    +
    +  store <ty> <value>, <ty>* <pointer>[, align <alignment>]                   ; yields {void}
       volatile store <ty> <value>, <ty>* <pointer>[, align <alignment>]          ; yields {void}
     
    +
    Overview:

    The 'store' instruction is used to write to memory.

    +
    Arguments:
    -

    There are two arguments to the 'store' instruction: a value -to store and an address at which to store it. The type of the '<pointer>' -operand must be a pointer to the first class type -of the '<value>' -operand. If the store is marked as volatile, then the -optimizer is not allowed to modify the number or order of execution of -this store with other volatile load and store instructions.

    -

    -The optional constant "align" argument specifies the alignment of the operation -(that is, the alignment of the memory address). A value of 0 or an -omitted "align" argument means that the operation has the preferential -alignment for the target. It is the responsibility of the code emitter -to ensure that the alignment information is correct. Overestimating -the alignment results in an undefined behavior. Underestimating the -alignment may produce less efficient code. An alignment of 1 is always -safe. -

    +

    There are two arguments to the 'store' instruction: a value to store + and an address at which to store it. The type of the + '<pointer>' operand must be a pointer to + the first class type of the + '<value>' operand. If the store is marked + as volatile, then the optimizer is not allowed to modify the number + or order of execution of this store with other + volatile load and store + instructions.

    + +

    The optional constant "align" argument specifies the alignment of the + operation (that is, the alignment of the memory address). A value of 0 or an + omitted "align" argument means that the operation has the preferential + alignment for the target. It is the responsibility of the code emitter to + ensure that the alignment information is correct. Overestimating the + alignment results in an undefined behavior. Underestimating the alignment may + produce less efficient code. An alignment of 1 is always safe.

    +
    Semantics:
    -

    The contents of memory are updated to contain '<value>' -at the location specified by the '<pointer>' operand. -If '<value>' is of scalar type then the number of bytes -written does not exceed the minimum number of bytes needed to hold all -bits of the type. For example, storing an i24 writes at most -three bytes. When writing a value of a type like i20 with a -size that is not an integral number of bytes, it is unspecified what -happens to the extra bits that do not belong to the type, but they will -typically be overwritten.

    +

    The contents of memory are updated to contain '<value>' at the + location specified by the '<pointer>' operand. If + '<value>' is of scalar type then the number of bytes written + does not exceed the minimum number of bytes needed to hold all bits of the + type. For example, storing an i24 writes at most three bytes. When + writing a value of a type like i20 with a size that is not an + integral number of bytes, it is unspecified what happens to the extra bits + that do not belong to the type, but they will typically be overwritten.

    +
    Example:
    -
      %ptr = alloca i32                               ; yields {i32*}:ptr
    +
    +  %ptr = alloca i32                               ; yields {i32*}:ptr
       store i32 3, i32* %ptr                          ; yields {void}
       %val = load i32* %ptr                           ; yields {i32}:val = i32 3
     
    +
    @@ -3747,38 +4076,39 @@ typically be overwritten.

    +
    Syntax:
       <result> = getelementptr <pty>* <ptrval>{, <ty> <idx>}*
    +  <result> = getelementptr inbounds <pty>* <ptrval>{, <ty> <idx>}*
     
    Overview:
    - -

    -The 'getelementptr' instruction is used to get the address of a -subelement of an aggregate data structure. It performs address calculation only -and does not access memory.

    +

    The 'getelementptr' instruction is used to get the address of a + subelement of an aggregate data structure. It performs address calculation + only and does not access memory.

    Arguments:
    -

    The first argument is always a pointer, and forms the basis of the -calculation. The remaining arguments are indices, that indicate which of the -elements of the aggregate object are indexed. The interpretation of each index -is dependent on the type being indexed into. The first index always indexes the -pointer value given as the first argument, the second index indexes a value of -the type pointed to (not necessarily the value directly pointed to, since the -first index can be non-zero), etc. The first type indexed into must be a pointer -value, subsequent types can be arrays, vectors and structs. Note that subsequent -types being indexed into can never be pointers, since that would require loading -the pointer before continuing calculation.

    + calculation. The remaining arguments are indices that indicate which of the + elements of the aggregate object are indexed. The interpretation of each + index is dependent on the type being indexed into. The first index always + indexes the pointer value given as the first argument, the second index + indexes a value of the type pointed to (not necessarily the value directly + pointed to, since the first index can be non-zero), etc. The first type + indexed into must be a pointer value, subsequent types can be arrays, vectors + and structs. Note that subsequent types being indexed into can never be + pointers, since that would require loading the pointer before continuing + calculation.

    The type of each index argument depends on the type it is indexing into. -When indexing into a (packed) structure, only i32 integer -constants are allowed. When indexing into an array, pointer or vector, -integers of any width are allowed (also non-constants).

    + When indexing into a (optionally packed) structure, only i32 integer + constants are allowed. When indexing into an array, pointer or + vector, integers of any width are allowed, and they are not required to be + constant.

    -

    For example, let's consider a C code fragment and how it gets -compiled to LLVM:

    +

    For example, let's consider a C code fragment and how it gets compiled to + LLVM:

    @@ -3806,7 +4136,7 @@ int *foo(struct ST *s) {
     %RT = type { i8 , [10 x [20 x i32]], i8  }
     %ST = type { i32, double, %RT }
     
    -define i32* %foo(%ST* %s) {
    +define i32* @foo(%ST* %s) {
     entry:
       %reg = getelementptr %ST* %s, i32 1, i32 2, i32 1, i32 5, i32 13
       ret i32* %reg
    @@ -3815,23 +4145,22 @@ entry:
     
    Semantics:
    -

    In the example above, the first index is indexing into the '%ST*' -type, which is a pointer, yielding a '%ST' = '{ i32, double, %RT -}' type, a structure. The second index indexes into the third element of -the structure, yielding a '%RT' = '{ i8 , [10 x [20 x i32]], -i8 }' type, another structure. The third index indexes into the second -element of the structure, yielding a '[10 x [20 x i32]]' type, an -array. The two dimensions of the array are subscripted into, yielding an -'i32' type. The 'getelementptr' instruction returns a pointer -to this element, thus computing a value of 'i32*' type.

    + type, which is a pointer, yielding a '%ST' = '{ i32, double, %RT + }' type, a structure. The second index indexes into the third element + of the structure, yielding a '%RT' = '{ i8 , [10 x [20 x i32]], + i8 }' type, another structure. The third index indexes into the second + element of the structure, yielding a '[10 x [20 x i32]]' type, an + array. The two dimensions of the array are subscripted into, yielding an + 'i32' type. The 'getelementptr' instruction returns a + pointer to this element, thus computing a value of 'i32*' type.

    -

    Note that it is perfectly legal to index partially through a -structure, returning a pointer to an inner element. Because of this, -the LLVM code for the given testcase is equivalent to:

    +

    Note that it is perfectly legal to index partially through a structure, + returning a pointer to an inner element. Because of this, the LLVM code for + the given testcase is equivalent to:

    -  define i32* %foo(%ST* %s) {
    +  define i32* @foo(%ST* %s) {
         %t1 = getelementptr %ST* %s, i32 1                        ; yields %ST*:%t1
         %t2 = getelementptr %ST* %t1, i32 0, i32 2                ; yields %RT*:%t2
         %t3 = getelementptr %RT* %t2, i32 0, i32 1                ; yields [10 x [20 x i32]]*:%t3
    @@ -3841,20 +4170,27 @@ the LLVM code for the given testcase is equivalent to:

    }
    -

    Note that it is undefined to access an array out of bounds: array -and pointer indexes must always be within the defined bounds of the -array type when accessed with an instruction that dereferences the -pointer (e.g. a load or store instruction). The one exception for -this rule is zero length arrays. These arrays are defined to be -accessible as variable length arrays, which requires access beyond the -zero'th element.

    +

    If the inbounds keyword is present, the result value of the + getelementptr is undefined if the base pointer is not an + in bounds address of an allocated object, or if any of the addresses + that would be formed by successive addition of the offsets implied by the + indices to the base address with infinitely precise arithmetic are not an + in bounds address of that allocated object. + The in bounds addresses for an allocated object are all the addresses + that point into the object, plus the address one byte past the end.

    -

    The getelementptr instruction is often confusing. For some more insight -into how it works, see the getelementptr -FAQ.

    +

    If the inbounds keyword is not present, the offsets are added to + the base address with silently-wrapping two's complement arithmetic, and + the result value of the getelementptr may be outside the object + pointed to by the base pointer. The result value may not necessarily be + used to access memory though, even if it happens to point into allocated + storage. See the Pointer Aliasing Rules + section for more information.

    -
    Example:
    +

    The getelementptr instruction is often confusing. For some more insight into + how it works, see the getelementptr FAQ.

    +
    Example:
         ; yields [12 x i8]*:aptr
         %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
    @@ -3865,15 +4201,19 @@ FAQ.

    ; yields i32*:iptr %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
    +
    +
    +

    The instructions in this category are the conversion instructions (casting) -which all take a single operand and a type. They perform various bit conversions -on the operand.

    + which all take a single operand and a type. They perform various bit + conversions on the operand.

    +
    @@ -3888,24 +4228,22 @@ on the operand.

    Overview:
    -

    -The 'trunc' instruction truncates its operand to the type ty2. -

    +

    The 'trunc' instruction truncates its operand to the + type ty2.

    Arguments:
    -

    -The 'trunc' instruction takes a value to trunc, which must -be an integer type, and a type that specifies the size -and type of the result, which must be an integer -type. The bit size of value must be larger than the bit size of -ty2. Equal sized types are not allowed.

    +

    The 'trunc' instruction takes a value to trunc, which must + be an integer type, and a type that specifies the + size and type of the result, which must be + an integer type. The bit size of value must + be larger than the bit size of ty2. Equal sized types are not + allowed.

    Semantics:
    -

    -The 'trunc' instruction truncates the high order bits in value -and converts the remaining bits to ty2. Since the source size must be -larger than the destination size, trunc cannot be a no-op cast. -It will always truncate bits.

    +

    The 'trunc' instruction truncates the high order bits + in value and converts the remaining bits to ty2. Since the + source size must be larger than the destination size, trunc cannot + be a no-op cast. It will always truncate bits.

    Example:
    @@ -3913,6 +4251,7 @@ It will always truncate bits.

    %Y = trunc i32 123 to i1 ; yields i1:true %Y = trunc i32 122 to i1 ; yields i1:false
    +
    @@ -3928,19 +4267,19 @@ It will always truncate bits.

    Overview:

    The 'zext' instruction zero extends its operand to type -ty2.

    + ty2.

    Arguments:

    The 'zext' instruction takes a value to cast, which must be of -integer type, and a type to cast it to, which must -also be of integer type. The bit size of the -value must be smaller than the bit size of the destination type, -ty2.

    + integer type, and a type to cast it to, which must + also be of integer type. The bit size of the + value must be smaller than the bit size of the destination type, + ty2.

    Semantics:

    The zext fills the high order bits of the value with zero -bits until it reaches the size of the destination type, ty2.

    + bits until it reaches the size of the destination type, ty2.

    When zero extending from i1, the result will always be either 0 or 1.

    @@ -3949,6 +4288,7 @@ bits until it reaches the size of the destination type, ty2.

    %X = zext i32 257 to i64 ; yields i64:257 %Y = zext i1 true to i32 ; yields i32:1 + @@ -3966,18 +4306,16 @@ bits until it reaches the size of the destination type, ty2.

    The 'sext' sign extends value to the type ty2.

    Arguments:
    -

    -The 'sext' instruction takes a value to cast, which must be of -integer type, and a type to cast it to, which must -also be of integer type. The bit size of the -value must be smaller than the bit size of the destination type, -ty2.

    +

    The 'sext' instruction takes a value to cast, which must be of + integer type, and a type to cast it to, which must + also be of integer type. The bit size of the + value must be smaller than the bit size of the destination type, + ty2.

    Semantics:
    -

    -The 'sext' instruction performs a sign extension by copying the sign -bit (highest order bit) of the value until it reaches the bit size of -the type ty2.

    +

    The 'sext' instruction performs a sign extension by copying the sign + bit (highest order bit) of the value until it reaches the bit size + of the type ty2.

    When sign extending from i1, the extension always results in -1 or 0.

    @@ -3986,6 +4324,7 @@ the type ty2.

    %X = sext i8 -1 to i16 ; yields i16 :65535 %Y = sext i1 true to i32 ; yields i32:-1 + @@ -3996,34 +4335,34 @@ the type ty2.

    Syntax:
    -
       <result> = fptrunc <ty> <value> to <ty2>             ; yields ty2
     
    Overview:

    The 'fptrunc' instruction truncates value to type -ty2.

    - + ty2.

    Arguments:

    The 'fptrunc' instruction takes a floating - point value to cast and a floating point type to -cast it to. The size of value must be larger than the size of -ty2. This implies that fptrunc cannot be used to make a -no-op cast.

    + point value to cast and a floating point type + to cast it to. The size of value must be larger than the size of + ty2. This implies that fptrunc cannot be used to make a + no-op cast.

    Semantics:
    -

    The 'fptrunc' instruction truncates a value from a larger -floating point type to a smaller -floating point type. If the value cannot fit within -the destination type, ty2, then the results are undefined.

    +

    The 'fptrunc' instruction truncates a value from a larger + floating point type to a smaller + floating point type. If the value cannot fit + within the destination type, ty2, then the results are + undefined.

    Example:
       %X = fptrunc double 123.0 to float         ; yields float:123.0
       %Y = fptrunc double 1.0E+300 to float      ; yields undefined
     
    +
    @@ -4039,26 +4378,27 @@ the destination type, ty2, then the results are undefined.

    Overview:

    The 'fpext' extends a floating point value to a larger -floating point value.

    + floating point value.

    Arguments:

    The 'fpext' instruction takes a -floating point value to cast, -and a floating point type to cast it to. The source -type must be smaller than the destination type.

    + floating point value to cast, and + a floating point type to cast it to. The source + type must be smaller than the destination type.

    Semantics:

    The 'fpext' instruction extends the value from a smaller -floating point type to a larger -floating point type. The fpext cannot be -used to make a no-op cast because it always changes bits. Use -bitcast to make a no-op cast for a floating point cast.

    + floating point type to a larger + floating point type. The fpext cannot be + used to make a no-op cast because it always changes bits. Use + bitcast to make a no-op cast for a floating point cast.

    Example:
       %X = fpext float 3.1415 to double        ; yields double:3.1415
       %Y = fpext float 1.0 to float            ; yields float:1.0 (no-op)
     
    + @@ -4074,21 +4414,20 @@ used to make a no-op cast because it always changes bits. Use
    Overview:

    The 'fptoui' converts a floating point value to its -unsigned integer equivalent of type ty2. -

    + unsigned integer equivalent of type ty2.

    Arguments:
    -

    The 'fptoui' instruction takes a value to cast, which must be a -scalar or vector floating point value, and a type -to cast it to ty2, which must be an integer -type. If ty is a vector floating point type, ty2 must be a -vector integer type with the same number of elements as ty

    +

    The 'fptoui' instruction takes a value to cast, which must be a + scalar or vector floating point value, and a type + to cast it to ty2, which must be an integer + type. If ty is a vector floating point type, ty2 must be a + vector integer type with the same number of elements as ty

    Semantics:
    -

    The 'fptoui' instruction converts its -floating point operand into the nearest (rounding -towards zero) unsigned integer value. If the value cannot fit in ty2, -the results are undefined.

    +

    The 'fptoui' instruction converts its + floating point operand into the nearest (rounding + towards zero) unsigned integer value. If the value cannot fit + in ty2, the results are undefined.

    Example:
    @@ -4096,6 +4435,7 @@ the results are undefined.

    %Y = fptoui float 1.0E+300 to i1 ; yields undefined:1 %X = fptoui float 1.04E+17 to i8 ; yields undefined:1
    + @@ -4111,21 +4451,21 @@ the results are undefined.

    Overview:

    The 'fptosi' instruction converts -floating point value to type ty2. -

    + floating point value to + type ty2.

    Arguments:
    -

    The 'fptosi' instruction takes a value to cast, which must be a -scalar or vector floating point value, and a type -to cast it to ty2, which must be an integer -type. If ty is a vector floating point type, ty2 must be a -vector integer type with the same number of elements as ty

    +

    The 'fptosi' instruction takes a value to cast, which must be a + scalar or vector floating point value, and a type + to cast it to ty2, which must be an integer + type. If ty is a vector floating point type, ty2 must be a + vector integer type with the same number of elements as ty

    Semantics:

    The 'fptosi' instruction converts its -floating point operand into the nearest (rounding -towards zero) signed integer value. If the value cannot fit in ty2, -the results are undefined.

    + floating point operand into the nearest (rounding + towards zero) signed integer value. If the value cannot fit in ty2, + the results are undefined.

    Example:
    @@ -4133,6 +4473,7 @@ the results are undefined.

    %Y = fptosi float 1.0E-247 to i1 ; yields undefined:1 %X = fptosi float 1.04E+17 to i8 ; yields undefined:1
    + @@ -4148,25 +4489,27 @@ the results are undefined.

    Overview:

    The 'uitofp' instruction regards value as an unsigned -integer and converts that value to the ty2 type.

    + integer and converts that value to the ty2 type.

    Arguments:

    The 'uitofp' instruction takes a value to cast, which must be a -scalar or vector integer value, and a type to cast it -to ty2, which must be an floating point -type. If ty is a vector integer type, ty2 must be a vector -floating point type with the same number of elements as ty

    + scalar or vector integer value, and a type to cast + it to ty2, which must be an floating point + type. If ty is a vector integer type, ty2 must be a vector + floating point type with the same number of elements as ty

    Semantics:

    The 'uitofp' instruction interprets its operand as an unsigned -integer quantity and converts it to the corresponding floating point value. If -the value cannot fit in the floating point value, the results are undefined.

    + integer quantity and converts it to the corresponding floating point + value. If the value cannot fit in the floating point value, the results are + undefined.

    Example:
       %X = uitofp i32 257 to float         ; yields float:257.0
       %Y = uitofp i8 -1 to double          ; yields double:255.0
     
    + @@ -4181,26 +4524,27 @@ the value cannot fit in the floating point value, the results are undefined.

    Overview:
    -

    The 'sitofp' instruction regards value as a signed -integer and converts that value to the ty2 type.

    +

    The 'sitofp' instruction regards value as a signed integer + and converts that value to the ty2 type.

    Arguments:

    The 'sitofp' instruction takes a value to cast, which must be a -scalar or vector integer value, and a type to cast it -to ty2, which must be an floating point -type. If ty is a vector integer type, ty2 must be a vector -floating point type with the same number of elements as ty

    + scalar or vector integer value, and a type to cast + it to ty2, which must be an floating point + type. If ty is a vector integer type, ty2 must be a vector + floating point type with the same number of elements as ty

    Semantics:
    -

    The 'sitofp' instruction interprets its operand as a signed -integer quantity and converts it to the corresponding floating point value. If -the value cannot fit in the floating point value, the results are undefined.

    +

    The 'sitofp' instruction interprets its operand as a signed integer + quantity and converts it to the corresponding floating point value. If the + value cannot fit in the floating point value, the results are undefined.

    Example:
       %X = sitofp i32 257 to float         ; yields float:257.0
       %Y = sitofp i8 -1 to double          ; yields double:-1.0
     
    + @@ -4215,28 +4559,29 @@ the value cannot fit in the floating point value, the results are undefined.

    Overview:
    -

    The 'ptrtoint' instruction converts the pointer value to -the integer type ty2.

    +

    The 'ptrtoint' instruction converts the pointer value to + the integer type ty2.

    Arguments:
    -

    The 'ptrtoint' instruction takes a value to cast, which -must be a pointer value, and a type to cast it to -ty2, which must be an integer type.

    +

    The 'ptrtoint' instruction takes a value to cast, which + must be a pointer value, and a type to cast it to + ty2, which must be an integer type.

    Semantics:

    The 'ptrtoint' instruction converts value to integer type -ty2 by interpreting the pointer value as an integer and either -truncating or zero extending that value to the size of the integer type. If -value is smaller than ty2 then a zero extension is done. If -value is larger than ty2 then a truncation is done. If they -are the same size, then nothing is done (no-op cast) other than a type -change.

    + ty2 by interpreting the pointer value as an integer and either + truncating or zero extending that value to the size of the integer type. If + value is smaller than ty2 then a zero extension is done. If + value is larger than ty2 then a truncation is done. If they + are the same size, then nothing is done (no-op cast) other than a type + change.

    Example:
       %X = ptrtoint i32* %X to i8           ; yields truncation on 32-bit architecture
       %Y = ptrtoint i32* %x to i64          ; yields zero extension on 32-bit architecture
     
    + @@ -4251,21 +4596,21 @@ change.

    Overview:
    -

    The 'inttoptr' instruction converts an integer value to -a pointer type, ty2.

    +

    The 'inttoptr' instruction converts an integer value to a + pointer type, ty2.

    Arguments:

    The 'inttoptr' instruction takes an integer -value to cast, and a type to cast it to, which must be a -pointer type.

    + value to cast, and a type to cast it to, which must be a + pointer type.

    Semantics:

    The 'inttoptr' instruction converts value to type -ty2 by applying either a zero extension or a truncation depending on -the size of the integer value. If value is larger than the -size of a pointer then a truncation is done. If value is smaller than -the size of a pointer then a zero extension is done. If they are the same size, -nothing is done (no-op cast).

    + ty2 by applying either a zero extension or a truncation depending on + the size of the integer value. If value is larger than the + size of a pointer then a truncation is done. If value is smaller + than the size of a pointer then a zero extension is done. If they are the + same size, nothing is done (no-op cast).

    Example:
    @@ -4273,6 +4618,7 @@ nothing is done (no-op cast).

    %X = inttoptr i32 255 to i32* ; yields no-op on 32-bit architecture %Y = inttoptr i64 0 to i32* ; yields truncation on 32-bit architecture
    + @@ -4287,29 +4633,27 @@ nothing is done (no-op cast).

    Overview:
    -

    The 'bitcast' instruction converts value to type -ty2 without changing any bits.

    + ty2 without changing any bits.

    Arguments:
    - -

    The 'bitcast' instruction takes a value to cast, which must be -a non-aggregate first class value, and a type to cast it to, which must also be -a non-aggregate first class type. The bit sizes of -value -and the destination type, ty2, must be identical. If the source -type is a pointer, the destination type must also be a pointer. This -instruction supports bitwise conversion of vectors to integers and to vectors -of other types (as long as they have the same size).

    +

    The 'bitcast' instruction takes a value to cast, which must be a + non-aggregate first class value, and a type to cast it to, which must also be + a non-aggregate first class type. The bit sizes + of value and the destination type, ty2, must be + identical. If the source type is a pointer, the destination type must also be + a pointer. This instruction supports bitwise conversion of vectors to + integers and to vectors of other types (as long as they have the same + size).

    Semantics:

    The 'bitcast' instruction converts value to type -ty2. It is always a no-op cast because no bits change with -this conversion. The conversion is done as if the value had been -stored to memory and read back as type ty2. Pointer types may only be -converted to other pointer types with this instruction. To convert pointers to -other types, use the inttoptr or -ptrtoint instructions first.

    + ty2. It is always a no-op cast because no bits change with + this conversion. The conversion is done as if the value had been + stored to memory and read back as type ty2. Pointer types may only + be converted to other pointer types with this instruction. To convert + pointers to other types, use the inttoptr or + ptrtoint instructions first.

    Example:
    @@ -4317,31 +4661,40 @@ other types, use the inttoptr or
       %Y = bitcast i32* %x to sint*          ; yields sint*:%x
       %Z = bitcast <2 x int> %V to i64;      ; yields i64: %V   
     
    + +
    -

    The instructions in this category are the "miscellaneous" -instructions, which defy better classification.

    + +

    The instructions in this category are the "miscellaneous" instructions, which + defy better classification.

    +
    +
    +
    Syntax:
    -
      <result> = icmp <cond> <ty> <op1>, <op2>   ; yields {i1} or {<N x i1>}:result
    +
    +  <result> = icmp <cond> <ty> <op1>, <op2>   ; yields {i1} or {<N x i1>}:result
     
    +
    Overview:
    -

    The 'icmp' instruction returns a boolean value or -a vector of boolean values based on comparison -of its two integer, integer vector, or pointer operands.

    +

    The 'icmp' instruction returns a boolean value or a vector of + boolean values based on comparison of its two integer, integer vector, or + pointer operands.

    +
    Arguments:

    The 'icmp' instruction takes three operands. The first operand is -the condition code indicating the kind of comparison to perform. It is not -a value, just a keyword. The possible condition code are: -

    + the condition code indicating the kind of comparison to perform. It is not a + value, just a keyword. The possible condition code are:

    +
    1. eq: equal
    2. ne: not equal
    3. @@ -4354,48 +4707,63 @@ a value, just a keyword. The possible condition code are:
    4. slt: signed less than
    5. sle: signed less or equal
    +

    The remaining two arguments must be integer or -pointer -or integer vector typed. -They must also be identical types.

    + pointer or integer vector + typed. They must also be identical types.

    +
    Semantics:
    -

    The 'icmp' compares op1 and op2 according to -the condition code given as cond. The comparison performed always -yields either an i1 or vector of i1 result, as follows: -

    +

    The 'icmp' compares op1 and op2 according to the + condition code given as cond. The comparison performed always yields + either an i1 or vector of i1 + result, as follows:

    +
    1. eq: yields true if the operands are equal, - false otherwise. No sign interpretation is necessary or performed. -
    2. + false otherwise. No sign interpretation is necessary or + performed. +
    3. ne: yields true if the operands are unequal, - false otherwise. No sign interpretation is necessary or performed.
    4. + false otherwise. No sign interpretation is necessary or + performed. +
    5. ugt: interprets the operands as unsigned values and yields - true if op1 is greater than op2.
    6. + true if op1 is greater than op2. +
    7. uge: interprets the operands as unsigned values and yields - true if op1 is greater than or equal to op2.
    8. + true if op1 is greater than or equal + to op2. +
    9. ult: interprets the operands as unsigned values and yields - true if op1 is less than op2.
    10. + true if op1 is less than op2. +
    11. ule: interprets the operands as unsigned values and yields - true if op1 is less than or equal to op2.
    12. + true if op1 is less than or equal to op2. +
    13. sgt: interprets the operands as signed values and yields - true if op1 is greater than op2.
    14. + true if op1 is greater than op2. +
    15. sge: interprets the operands as signed values and yields - true if op1 is greater than or equal to op2.
    16. + true if op1 is greater than or equal + to op2. +
    17. slt: interprets the operands as signed values and yields - true if op1 is less than op2.
    18. + true if op1 is less than op2. +
    19. sle: interprets the operands as signed values and yields - true if op1 is less than or equal to op2.
    20. + true if op1 is less than or equal to op2.
    +

    If the operands are pointer typed, the pointer -values are compared as if they were integers.

    -

    If the operands are integer vectors, then they are compared -element by element. The result is an i1 vector with -the same number of elements as the values being compared. -Otherwise, the result is an i1. -

    + values are compared as if they were integers.

    + +

    If the operands are integer vectors, then they are compared element by + element. The result is an i1 vector with the same number of elements + as the values being compared. Otherwise, the result is an i1.

    Example:
    -
      <result> = icmp eq i32 4, 5          ; yields: result=false
    +
    +  <result> = icmp eq i32 4, 5          ; yields: result=false
       <result> = icmp ne float* %X, %X     ; yields: result=false
       <result> = icmp ult i16  4, 5        ; yields: result=true
       <result> = icmp sgt i16  4, 5        ; yields: result=false
    @@ -4411,25 +4779,30 @@ Otherwise, the result is an i1.
     
     
    +
     
    +
    Syntax:
    -
      <result> = fcmp <cond> <ty> <op1>, <op2>     ; yields {i1} or {<N x i1>}:result
    +
    +  <result> = fcmp <cond> <ty> <op1>, <op2>     ; yields {i1} or {<N x i1>}:result
     
    +
    Overview:
    -

    The 'fcmp' instruction returns a boolean value -or vector of boolean values based on comparison -of its operands.

    -

    -If the operands are floating point scalars, then the result -type is a boolean (i1). -

    -

    If the operands are floating point vectors, then the result type -is a vector of boolean with the same number of elements as the -operands being compared.

    +

    The 'fcmp' instruction returns a boolean value or vector of boolean + values based on comparison of its operands.

    + +

    If the operands are floating point scalars, then the result type is a boolean +(i1).

    + +

    If the operands are floating point vectors, then the result type is a vector + of boolean with the same number of elements as the operands being + compared.

    +
    Arguments:

    The 'fcmp' instruction takes three operands. The first operand is -the condition code indicating the kind of comparison to perform. It is not -a value, just a keyword. The possible condition code are:

    + the condition code indicating the kind of comparison to perform. It is not a + value, just a keyword. The possible condition code are:

    +
    1. false: no comparison, always returns false
    2. oeq: ordered and equal
    3. @@ -4448,163 +4821,79 @@ a value, just a keyword. The possible condition code are:

    4. uno: unordered (either nans)
    5. true: no comparison, always returns true
    +

    Ordered means that neither operand is a QNAN while -unordered means that either operand may be a QNAN.

    -

    Each of val1 and val2 arguments must be -either a floating point type -or a vector of floating point type. -They must have identical types.

    + unordered means that either operand may be a QNAN.

    + +

    Each of val1 and val2 arguments must be either + a floating point type or + a vector of floating point type. They must have + identical types.

    +
    Semantics:

    The 'fcmp' instruction compares op1 and op2 -according to the condition code given as cond. -If the operands are vectors, then the vectors are compared -element by element. -Each comparison performed -always yields an i1 result, as follows:

    + according to the condition code given as cond. If the operands are + vectors, then the vectors are compared element by element. Each comparison + performed always yields an i1 result, as + follows:

    +
    1. false: always yields false, regardless of operands.
    2. +
    3. oeq: yields true if both operands are not a QNAN and - op1 is equal to op2.
    4. + op1 is equal to op2. +
    5. ogt: yields true if both operands are not a QNAN and - op1 is greather than op2.
    6. + op1 is greather than op2. +
    7. oge: yields true if both operands are not a QNAN and - op1 is greater than or equal to op2.
    8. + op1 is greater than or equal to op2. +
    9. olt: yields true if both operands are not a QNAN and - op1 is less than op2.
    10. + op1 is less than op2. +
    11. ole: yields true if both operands are not a QNAN and - op1 is less than or equal to op2.
    12. + op1 is less than or equal to op2. +
    13. one: yields true if both operands are not a QNAN and - op1 is not equal to op2.
    14. + op1 is not equal to op2. +
    15. ord: yields true if both operands are not a QNAN.
    16. +
    17. ueq: yields true if either operand is a QNAN or - op1 is equal to op2.
    18. + op1 is equal to op2. +
    19. ugt: yields true if either operand is a QNAN or - op1 is greater than op2.
    20. + op1 is greater than op2. +
    21. uge: yields true if either operand is a QNAN or - op1 is greater than or equal to op2.
    22. + op1 is greater than or equal to op2. +
    23. ult: yields true if either operand is a QNAN or - op1 is less than op2.
    24. + op1 is less than op2. +
    25. ule: yields true if either operand is a QNAN or - op1 is less than or equal to op2.
    26. + op1 is less than or equal to op2. +
    27. une: yields true if either operand is a QNAN or - op1 is not equal to op2.
    28. + op1 is not equal to op2. +
    29. uno: yields true if either operand is a QNAN.
    30. +
    31. true: always yields true, regardless of operands.
    Example:
    -
      <result> = fcmp oeq float 4.0, 5.0    ; yields: result=false
    +
    +  <result> = fcmp oeq float 4.0, 5.0    ; yields: result=false
       <result> = fcmp one float 4.0, 5.0    ; yields: result=true
       <result> = fcmp olt float 4.0, 5.0    ; yields: result=true
    -  <result> = fcmp ueq double 1.0, 2.0   ; yields: result=false
    -
    - -

    Note that the code generator does not yet support vector types with - the fcmp instruction.

    - -
    - - - -
    -
    Syntax:
    -
      <result> = vicmp <cond> <ty> <op1>, <op2>   ; yields {ty}:result
    -
    -
    Overview:
    -

    The 'vicmp' instruction returns an integer vector value based on -element-wise comparison of its two integer vector operands.

    -
    Arguments:
    -

    The 'vicmp' instruction takes three operands. The first operand is -the condition code indicating the kind of comparison to perform. It is not -a value, just a keyword. The possible condition code are:

    -
      -
    1. eq: equal
    2. -
    3. ne: not equal
    4. -
    5. ugt: unsigned greater than
    6. -
    7. uge: unsigned greater or equal
    8. -
    9. ult: unsigned less than
    10. -
    11. ule: unsigned less or equal
    12. -
    13. sgt: signed greater than
    14. -
    15. sge: signed greater or equal
    16. -
    17. slt: signed less than
    18. -
    19. sle: signed less or equal
    20. -
    -

    The remaining two arguments must be vector or -integer typed. They must also be identical types.

    -
    Semantics:
    -

    The 'vicmp' instruction compares op1 and op2 -according to the condition code given as cond. The comparison yields a -vector of integer result, of -identical type as the values being compared. The most significant bit in each -element is 1 if the element-wise comparison evaluates to true, and is 0 -otherwise. All other bits of the result are undefined. The condition codes -are evaluated identically to the 'icmp' -instruction.

    - -
    Example:
    -
    -  <result> = vicmp eq <2 x i32> < i32 4, i32 0>, < i32 5, i32 0>   ; yields: result=<2 x i32> < i32 0, i32 -1 >
    -  <result> = vicmp ult <2 x i8 > < i8 1, i8 2>, < i8 2, i8 2 >        ; yields: result=<2 x i8> < i8 -1, i8 0 >
    -
    -
    - - - -
    -
    Syntax:
    -
      <result> = vfcmp <cond> <ty> <op1>, <op2>
    -
    Overview:
    -

    The 'vfcmp' instruction returns an integer vector value based on -element-wise comparison of its two floating point vector operands. The output -elements have the same width as the input elements.

    -
    Arguments:
    -

    The 'vfcmp' instruction takes three operands. The first operand is -the condition code indicating the kind of comparison to perform. It is not -a value, just a keyword. The possible condition code are:

    -
      -
    1. false: no comparison, always returns false
    2. -
    3. oeq: ordered and equal
    4. -
    5. ogt: ordered and greater than
    6. -
    7. oge: ordered and greater than or equal
    8. -
    9. olt: ordered and less than
    10. -
    11. ole: ordered and less than or equal
    12. -
    13. one: ordered and not equal
    14. -
    15. ord: ordered (no nans)
    16. -
    17. ueq: unordered or equal
    18. -
    19. ugt: unordered or greater than
    20. -
    21. uge: unordered or greater than or equal
    22. -
    23. ult: unordered or less than
    24. -
    25. ule: unordered or less than or equal
    26. -
    27. une: unordered or not equal
    28. -
    29. uno: unordered (either nans)
    30. -
    31. true: no comparison, always returns true
    32. -
    -

    The remaining two arguments must be vector of -floating point typed. They must also be identical -types.

    -
    Semantics:
    -

    The 'vfcmp' instruction compares op1 and op2 -according to the condition code given as cond. The comparison yields a -vector of integer result, with -an identical number of elements as the values being compared, and each element -having identical with to the width of the floating point elements. The most -significant bit in each element is 1 if the element-wise comparison evaluates to -true, and is 0 otherwise. All other bits of the result are undefined. The -condition codes are evaluated identically to the -'fcmp' instruction.

    - -
    Example:
    -
    -  ; yields: result=<2 x i32> < i32 0, i32 -1 >
    -  <result> = vfcmp oeq <2 x float> < float 4, float 0 >, < float 5, float 0 >
    -  
    -  ; yields: result=<2 x i64> < i64 -1, i64 0 >
    -  <result> = vfcmp ult <2 x double> < double 1, double 2 >, < double 2, double 2>
    +  <result> = fcmp ueq double 1.0, 2.0   ; yields: result=false
     
    + +

    Note that the code generator does not yet support vector types with + the fcmp instruction.

    +
    @@ -4615,34 +4904,35 @@ condition codes are evaluated identically to the
    Syntax:
    +
    +  <result> = phi <ty> [ <val0>, <label0>], ...
    +
    -
      <result> = phi <ty> [ <val0>, <label0>], ...
    Overview:
    -

    The 'phi' instruction is used to implement the φ node in -the SSA graph representing the function.

    -
    Arguments:
    +

    The 'phi' instruction is used to implement the φ node in the + SSA graph representing the function.

    -

    The type of the incoming values is specified with the first type -field. After this, the 'phi' instruction takes a list of pairs -as arguments, with one pair for each predecessor basic block of the -current block. Only values of first class -type may be used as the value arguments to the PHI node. Only labels -may be used as the label arguments.

    - -

    There must be no non-phi instructions between the start of a basic -block and the PHI instructions: i.e. PHI instructions must be first in -a basic block.

    - -

    For the purposes of the SSA form, the use of each incoming value is -deemed to occur on the edge from the corresponding predecessor block -to the current block (but after any definition of an 'invoke' -instruction's return value on the same edge).

    +
    Arguments:
    +

    The type of the incoming values is specified with the first type field. After + this, the 'phi' instruction takes a list of pairs as arguments, with + one pair for each predecessor basic block of the current block. Only values + of first class type may be used as the value + arguments to the PHI node. Only labels may be used as the label + arguments.

    + +

    There must be no non-phi instructions between the start of a basic block and + the PHI instructions: i.e. PHI instructions must be first in a basic + block.

    + +

    For the purposes of the SSA form, the use of each incoming value is deemed to + occur on the edge from the corresponding predecessor block to the current + block (but after any definition of an 'invoke' instruction's return + value on the same edge).

    Semantics:
    -

    At runtime, the 'phi' instruction logically takes on the value -specified by the pair corresponding to the predecessor basic block that executed -just prior to the current block.

    + specified by the pair corresponding to the predecessor basic block that + executed just prior to the current block.

    Example:
    @@ -4651,6 +4941,7 @@ Loop:       ; Infinite loop that counts from 0 on up...
       %nextindvar = add i32 %indvar, 1
       br label %Loop
     
    +
    @@ -4661,7 +4952,6 @@ Loop: ; Infinite loop that counts from 0 on up...
    Syntax:
    -
       <result> = select selty <cond>, <ty> <val1>, <ty> <val2>             ; yields ty
     
    @@ -4669,38 +4959,25 @@ Loop:       ; Infinite loop that counts from 0 on up...
     
    Overview:
    - -

    -The 'select' instruction is used to choose one value based on a -condition, without branching. -

    +

    The 'select' instruction is used to choose one value based on a + condition, without branching.

    Arguments:
    - -

    -The 'select' instruction requires an 'i1' value or -a vector of 'i1' values indicating the -condition, and two values of the same first class -type. If the val1/val2 are vectors and -the condition is a scalar, then entire vectors are selected, not -individual elements. -

    +

    The 'select' instruction requires an 'i1' value or a vector of 'i1' + values indicating the condition, and two values of the + same first class type. If the val1/val2 are + vectors and the condition is a scalar, then entire vectors are selected, not + individual elements.

    Semantics:
    +

    If the condition is an i1 and it evaluates to 1, the instruction returns the + first value argument; otherwise, it returns the second value argument.

    -

    -If the condition is an i1 and it evaluates to 1, the instruction returns the first -value argument; otherwise, it returns the second value argument. -

    -

    -If the condition is a vector of i1, then the value arguments must -be vectors of the same size, and the selection is done element -by element. -

    +

    If the condition is a vector of i1, then the value arguments must be vectors + of the same size, and the selection is done element by element.

    Example:
    -
       %X = select i1 true, i8 17, i8 42          ; yields i8:17
     
    @@ -4710,7 +4987,6 @@ by element.
    -
    'call' Instruction @@ -4724,75 +5000,60 @@ by element.
    Overview:
    -

    The 'call' instruction represents a simple function call.

    Arguments:
    -

    This instruction requires several arguments:

      -
    1. -

      The optional "tail" marker indicates whether the callee function accesses - any allocas or varargs in the caller. If the "tail" marker is present, the - function call is eligible for tail call optimization. Note that calls may - be marked "tail" even if they do not occur before a ret instruction.

      -
    2. -
    3. -

      The optional "cconv" marker indicates which calling - convention the call should use. If none is specified, the call defaults - to using C calling conventions.

      -
    4. +
    5. The optional "tail" marker indicates whether the callee function accesses + any allocas or varargs in the caller. If the "tail" marker is present, + the function call is eligible for tail call optimization. Note that calls + may be marked "tail" even if they do not occur before + a ret instruction.
    6. -
    7. -

      The optional Parameter Attributes list for - return values. Only 'zeroext', 'signext', - and 'inreg' attributes are valid here.

      -
    8. +
    9. The optional "cconv" marker indicates which calling + convention the call should use. If none is specified, the call + defaults to using C calling conventions.
    10. -
    11. -

      'ty': the type of the call instruction itself which is also - the type of the return value. Functions that return no value are marked - void.

      -
    12. -
    13. -

      'fnty': shall be the signature of the pointer to function - value being invoked. The argument types must match the types implied by - this signature. This type can be omitted if the function is not varargs - and if the function type does not return a pointer to a function.

      -
    14. -
    15. -

      'fnptrval': An LLVM value containing a pointer to a function to - be invoked. In most cases, this is a direct function invocation, but - indirect calls are just as possible, calling an arbitrary pointer - to function value.

      -
    16. -
    17. -

      'function args': argument list whose types match the - function signature argument types. All arguments must be of - first class type. If the function signature - indicates the function accepts a variable number of arguments, the extra - arguments can be specified.

      -
    18. -
    19. -

      The optional function attributes list. Only - 'noreturn', 'nounwind', 'readonly' and - 'readnone' attributes are valid here.

      -
    20. +
    21. The optional Parameter Attributes list for + return values. Only 'zeroext', 'signext', and + 'inreg' attributes are valid here.
    22. + +
    23. 'ty': the type of the call instruction itself which is also the + type of the return value. Functions that return no value are marked + void.
    24. + +
    25. 'fnty': shall be the signature of the pointer to function value + being invoked. The argument types must match the types implied by this + signature. This type can be omitted if the function is not varargs and if + the function type does not return a pointer to a function.
    26. + +
    27. 'fnptrval': An LLVM value containing a pointer to a function to + be invoked. In most cases, this is a direct function invocation, but + indirect calls are just as possible, calling an arbitrary pointer + to function value.
    28. + +
    29. 'function args': argument list whose types match the function + signature argument types. All arguments must be of + first class type. If the function signature + indicates the function accepts a variable number of arguments, the extra + arguments can be specified.
    30. + +
    31. The optional function attributes list. Only + 'noreturn', 'nounwind', 'readonly' and + 'readnone' attributes are valid here.
    Semantics:
    - -

    The 'call' instruction is used to cause control flow to -transfer to a specified function, with its incoming arguments bound to -the specified values. Upon a 'ret' -instruction in the called function, control flow continues with the -instruction after the function call, and the return value of the -function is bound to the result argument.

    +

    The 'call' instruction is used to cause control flow to transfer to + a specified function, with its incoming arguments bound to the specified + values. Upon a 'ret' instruction in the called + function, control flow continues with the instruction after the function + call, and the return value of the function is bound to the result + argument.

    Example:
    -
       %retval = call i32 @test(i32 %argc)
       call i32 (i8 *, ...)* @printf(i8 * %msg, i32 12, i8 42)      ; yields i32
    @@ -4808,6 +5069,12 @@ function is bound to the result argument.

    %ZZ = call zeroext i32 @bar() ; Return value is %zero extended
    +

    llvm treats calls to some functions with names and arguments that match the +standard C99 library as being the C99 library functions, and may perform +optimizations or generate code for them under that assumption. This is +something we'd like to change in the future to provide better support for +freestanding environments and non-C-based langauges.

    +
    @@ -4818,47 +5085,41 @@ function is bound to the result argument.

    Syntax:
    -
       <resultval> = va_arg <va_list*> <arglist>, <argty>
     
    Overview:
    -

    The 'va_arg' instruction is used to access arguments passed through -the "variable argument" area of a function call. It is used to implement the -va_arg macro in C.

    + the "variable argument" area of a function call. It is used to implement the + va_arg macro in C.

    Arguments:
    - -

    This instruction takes a va_list* value and the type of -the argument. It returns a value of the specified argument type and -increments the va_list to point to the next argument. The -actual type of va_list is target specific.

    +

    This instruction takes a va_list* value and the type of the + argument. It returns a value of the specified argument type and increments + the va_list to point to the next argument. The actual type + of va_list is target specific.

    Semantics:
    - -

    The 'va_arg' instruction loads an argument of the specified -type from the specified va_list and causes the -va_list to point to the next argument. For more information, -see the variable argument handling Intrinsic -Functions.

    +

    The 'va_arg' instruction loads an argument of the specified type + from the specified va_list and causes the va_list to point + to the next argument. For more information, see the variable argument + handling Intrinsic Functions.

    It is legal for this instruction to be called in a function which does not -take a variable number of arguments, for example, the vfprintf -function.

    + take a variable number of arguments, for example, the vfprintf + function.

    -

    va_arg is an LLVM instruction instead of an intrinsic function because it takes a type as an -argument.

    +

    va_arg is an LLVM instruction instead of + an intrinsic function because it takes a type as an + argument.

    Example:
    -

    See the variable argument processing section.

    -

    Note that the code generator does not yet fully support va_arg - on many targets. Also, it does not currently support va_arg with - aggregate types on any target.

    +

    Note that the code generator does not yet fully support va_arg on many + targets. Also, it does not currently support va_arg with aggregate types on + any target.

    @@ -4869,45 +5130,45 @@ argument.

    LLVM supports the notion of an "intrinsic function". These functions have -well known names and semantics and are required to follow certain restrictions. -Overall, these intrinsics represent an extension mechanism for the LLVM -language that does not require changing all of the transformations in LLVM when -adding to the language (or the bitcode reader/writer, the parser, etc...).

    + well known names and semantics and are required to follow certain + restrictions. Overall, these intrinsics represent an extension mechanism for + the LLVM language that does not require changing all of the transformations + in LLVM when adding to the language (or the bitcode reader/writer, the + parser, etc...).

    Intrinsic function names must all start with an "llvm." prefix. This -prefix is reserved in LLVM for intrinsic names; thus, function names may not -begin with this prefix. Intrinsic functions must always be external functions: -you cannot define the body of intrinsic functions. Intrinsic functions may -only be used in call or invoke instructions: it is illegal to take the address -of an intrinsic function. Additionally, because intrinsic functions are part -of the LLVM language, it is required if any are added that they be documented -here.

    - -

    Some intrinsic functions can be overloaded, i.e., the intrinsic represents -a family of functions that perform the same operation but on different data -types. Because LLVM can represent over 8 million different integer types, -overloading is used commonly to allow an intrinsic function to operate on any -integer type. One or more of the argument types or the result type can be -overloaded to accept any integer type. Argument types may also be defined as -exactly matching a previous argument's type or the result type. This allows an -intrinsic function which accepts multiple arguments, but needs all of them to -be of the same type, to only be overloaded with respect to a single argument or -the result.

    - -

    Overloaded intrinsics will have the names of its overloaded argument types -encoded into its function name, each preceded by a period. Only those types -which are overloaded result in a name suffix. Arguments whose type is matched -against another type do not. For example, the llvm.ctpop function can -take an integer of any width and returns an integer of exactly the same integer -width. This leads to a family of functions such as -i8 @llvm.ctpop.i8(i8 %val) and i29 @llvm.ctpop.i29(i29 %val). -Only one type, the return type, is overloaded, and only one type suffix is -required. Because the argument's type is matched against the return type, it -does not require its own name suffix.

    + prefix is reserved in LLVM for intrinsic names; thus, function names may not + begin with this prefix. Intrinsic functions must always be external + functions: you cannot define the body of intrinsic functions. Intrinsic + functions may only be used in call or invoke instructions: it is illegal to + take the address of an intrinsic function. Additionally, because intrinsic + functions are part of the LLVM language, it is required if any are added that + they be documented here.

    + +

    Some intrinsic functions can be overloaded, i.e., the intrinsic represents a + family of functions that perform the same operation but on different data + types. Because LLVM can represent over 8 million different integer types, + overloading is used commonly to allow an intrinsic function to operate on any + integer type. One or more of the argument types or the result type can be + overloaded to accept any integer type. Argument types may also be defined as + exactly matching a previous argument's type or the result type. This allows + an intrinsic function which accepts multiple arguments, but needs all of them + to be of the same type, to only be overloaded with respect to a single + argument or the result.

    + +

    Overloaded intrinsics will have the names of its overloaded argument types + encoded into its function name, each preceded by a period. Only those types + which are overloaded result in a name suffix. Arguments whose type is matched + against another type do not. For example, the llvm.ctpop function + can take an integer of any width and returns an integer of exactly the same + integer width. This leads to a family of functions such as + i8 @llvm.ctpop.i8(i8 %val) and i29 @llvm.ctpop.i29(i29 + %val). Only one type, the return type, is overloaded, and only one type + suffix is required. Because the argument's type is matched against the return + type, it does not require its own name suffix.

    To learn how to add an intrinsic function, please see the -Extending LLVM Guide. -

    + Extending LLVM Guide.

    @@ -4918,20 +5179,19 @@ does not require its own name suffix.

    -

    Variable argument support is defined in LLVM with the va_arg instruction and these three -intrinsic functions. These functions are related to the similarly -named macros defined in the <stdarg.h> header file.

    +

    Variable argument support is defined in LLVM with + the va_arg instruction and these three + intrinsic functions. These functions are related to the similarly named + macros defined in the <stdarg.h> header file.

    -

    All of these functions operate on arguments that use a -target-specific value type "va_list". The LLVM assembly -language reference manual does not define what this type is, so all -transformations should be prepared to handle these functions regardless of -the type used.

    +

    All of these functions operate on arguments that use a target-specific value + type "va_list". The LLVM assembly language reference manual does + not define what this type is, so all transformations should be prepared to + handle these functions regardless of the type used.

    This example shows how the va_arg -instruction and the variable argument handling intrinsic functions are -used.

    + instruction and the variable argument handling intrinsic functions are + used.

    @@ -4970,25 +5230,27 @@ declare void @llvm.va_end(i8*)
     
     
     
    +
    Syntax:
    -
      declare void %llvm.va_start(i8* <arglist>)
    +
    +  declare void %llvm.va_start(i8* <arglist>)
    +
    +
    Overview:
    -

    The 'llvm.va_start' intrinsic initializes -*<arglist> for subsequent use by va_arg.

    +

    The 'llvm.va_start' intrinsic initializes *<arglist> + for subsequent use by va_arg.

    Arguments:
    -

    The argument is a pointer to a va_list element to initialize.

    Semantics:
    -

    The 'llvm.va_start' intrinsic works just like the va_start -macro available in C. In a target-dependent way, it initializes the -va_list element to which the argument points, so that the next call to -va_arg will produce the first variable argument passed to the function. -Unlike the C va_start macro, this intrinsic does not need to know the -last argument of the function as the compiler can figure that out.

    + macro available in C. In a target-dependent way, it initializes + the va_list element to which the argument points, so that the next + call to va_arg will produce the first variable argument passed to + the function. Unlike the C va_start macro, this intrinsic does not + need to know the last argument of the function as the compiler can figure + that out.

    @@ -4998,26 +5260,28 @@ last argument of the function as the compiler can figure that out.

    +
    Syntax:
    -
      declare void @llvm.va_end(i8* <arglist>)
    -
    Overview:
    +
    +  declare void @llvm.va_end(i8* <arglist>)
    +
    +
    Overview:

    The 'llvm.va_end' intrinsic destroys *<arglist>, -which has been initialized previously with llvm.va_start -or llvm.va_copy.

    + which has been initialized previously + with llvm.va_start + or llvm.va_copy.

    Arguments:
    -

    The argument is a pointer to a va_list to destroy.

    Semantics:
    -

    The 'llvm.va_end' intrinsic works just like the va_end -macro available in C. In a target-dependent way, it destroys the -va_list element to which the argument points. Calls to llvm.va_start and -llvm.va_copy must be matched exactly with calls to -llvm.va_end.

    + macro available in C. In a target-dependent way, it destroys + the va_list element to which the argument points. Calls + to llvm.va_start + and llvm.va_copy must be matched exactly + with calls to llvm.va_end.

    @@ -5029,30 +5293,26 @@ href="#int_va_start">llvm.va_start and @@ -5063,20 +5323,18 @@ example, memory allocation.

    -

    -LLVM support for Accurate Garbage +

    LLVM support for Accurate Garbage Collection (GC) requires the implementation and generation of these -intrinsics. -These intrinsics allow identification of GC roots on the -stack, as well as garbage collector implementations that require read and write barriers. -Front-ends for type-safe garbage collected languages should generate these -intrinsics to make use of the LLVM garbage collectors. For more details, see Accurate Garbage Collection with LLVM. -

    +intrinsics. These intrinsics allow identification of GC +roots on the stack, as well as garbage collector implementations that +require read and write +barriers. Front-ends for type-safe garbage collected languages should generate +these intrinsics to make use of the LLVM garbage collectors. For more details, +see Accurate Garbage Collection with +LLVM.

    -

    The garbage collection intrinsics only operate on objects in the generic - address space (address space zero).

    +

    The garbage collection intrinsics only operate on objects in the generic + address space (address space zero).

    @@ -5088,33 +5346,29 @@ href="GarbageCollection.html">Accurate Garbage Collection with LLVM.
    Syntax:
    -
       declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
     
    Overview:
    -

    The 'llvm.gcroot' intrinsic declares the existence of a GC root to -the code generator, and allows some metadata to be associated with it.

    + the code generator, and allows some metadata to be associated with it.

    Arguments:
    -

    The first argument specifies the address of a stack object that contains the -root pointer. The second pointer (which must be either a constant or a global -value address) contains the meta-data to be associated with the root.

    + root pointer. The second pointer (which must be either a constant or a + global value address) contains the meta-data to be associated with the + root.

    Semantics:
    -

    At runtime, a call to this intrinsic stores a null pointer into the "ptrloc" -location. At compile-time, the code generator generates information to allow -the runtime to find the pointer at GC safe points. The 'llvm.gcroot' -intrinsic may only be used in a function which specifies a GC -algorithm.

    + location. At compile-time, the code generator generates information to allow + the runtime to find the pointer at GC safe points. The 'llvm.gcroot' + intrinsic may only be used in a function which specifies a GC + algorithm.

    -
    'llvm.gcread' Intrinsic @@ -5123,35 +5377,30 @@ algorithm.

    Syntax:
    -
       declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
     
    Overview:
    -

    The 'llvm.gcread' intrinsic identifies reads of references from heap -locations, allowing garbage collector implementations that require read -barriers.

    + locations, allowing garbage collector implementations that require read + barriers.

    Arguments:
    -

    The second argument is the address to read from, which should be an address -allocated from the garbage collector. The first object is a pointer to the -start of the referenced object, if needed by the language runtime (otherwise -null).

    + allocated from the garbage collector. The first object is a pointer to the + start of the referenced object, if needed by the language runtime (otherwise + null).

    Semantics:
    -

    The 'llvm.gcread' intrinsic has the same semantics as a load -instruction, but may be replaced with substantially more complex code by the -garbage collector runtime, as needed. The 'llvm.gcread' intrinsic -may only be used in a function which specifies a GC -algorithm.

    + instruction, but may be replaced with substantially more complex code by the + garbage collector runtime, as needed. The 'llvm.gcread' intrinsic + may only be used in a function which specifies a GC + algorithm.

    -
    'llvm.gcwrite' Intrinsic @@ -5160,46 +5409,39 @@ algorithm.

    Syntax:
    -
       declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
     
    Overview:
    -

    The 'llvm.gcwrite' intrinsic identifies writes of references to heap -locations, allowing garbage collector implementations that require write -barriers (such as generational or reference counting collectors).

    + locations, allowing garbage collector implementations that require write + barriers (such as generational or reference counting collectors).

    Arguments:
    -

    The first argument is the reference to store, the second is the start of the -object to store it to, and the third is the address of the field of Obj to -store to. If the runtime does not require a pointer to the object, Obj may be -null.

    + object to store it to, and the third is the address of the field of Obj to + store to. If the runtime does not require a pointer to the object, Obj may + be null.

    Semantics:
    -

    The 'llvm.gcwrite' intrinsic has the same semantics as a store -instruction, but may be replaced with substantially more complex code by the -garbage collector runtime, as needed. The 'llvm.gcwrite' intrinsic -may only be used in a function which specifies a GC -algorithm.

    + instruction, but may be replaced with substantially more complex code by the + garbage collector runtime, as needed. The 'llvm.gcwrite' intrinsic + may only be used in a function which specifies a GC + algorithm.

    - -
    -

    -These intrinsics are provided by LLVM to expose special features that may only -be implemented with code generator support. -

    + +

    These intrinsics are provided by LLVM to expose special features that may + only be implemented with code generator support.

    @@ -5216,38 +5458,28 @@ be implemented with code generator support.
    Overview:
    - -

    -The 'llvm.returnaddress' intrinsic attempts to compute a -target-specific value indicating the return address of the current function -or one of its callers. -

    +

    The 'llvm.returnaddress' intrinsic attempts to compute a + target-specific value indicating the return address of the current function + or one of its callers.

    Arguments:
    - -

    -The argument to this intrinsic indicates which function to return the address -for. Zero indicates the calling function, one indicates its caller, etc. The -argument is required to be a constant integer value. -

    +

    The argument to this intrinsic indicates which function to return the address + for. Zero indicates the calling function, one indicates its caller, etc. + The argument is required to be a constant integer value.

    Semantics:
    +

    The 'llvm.returnaddress' intrinsic either returns a pointer + indicating the return address of the specified call frame, or zero if it + cannot be identified. The value returned by this intrinsic is likely to be + incorrect or 0 for arguments other than zero, so it should only be used for + debugging purposes.

    -

    -The 'llvm.returnaddress' intrinsic either returns a pointer indicating -the return address of the specified call frame, or zero if it cannot be -identified. The value returned by this intrinsic is likely to be incorrect or 0 -for arguments other than zero, so it should only be used for debugging purposes. -

    +

    Note that calling this intrinsic does not prevent function inlining or other + aggressive transformations, so the value returned may not be that of the + obvious source-language caller.

    -

    -Note that calling this intrinsic does not prevent function inlining or other -aggressive transformations, so the value returned may not be that of the obvious -source-language caller. -

    -
    'llvm.frameaddress' Intrinsic @@ -5261,34 +5493,25 @@ source-language caller.
    Overview:
    - -

    -The 'llvm.frameaddress' intrinsic attempts to return the -target-specific frame pointer value for the specified stack frame. -

    +

    The 'llvm.frameaddress' intrinsic attempts to return the + target-specific frame pointer value for the specified stack frame.

    Arguments:
    - -

    -The argument to this intrinsic indicates which function to return the frame -pointer for. Zero indicates the calling function, one indicates its caller, -etc. The argument is required to be a constant integer value. -

    +

    The argument to this intrinsic indicates which function to return the frame + pointer for. Zero indicates the calling function, one indicates its caller, + etc. The argument is required to be a constant integer value.

    Semantics:
    +

    The 'llvm.frameaddress' intrinsic either returns a pointer + indicating the frame address of the specified call frame, or zero if it + cannot be identified. The value returned by this intrinsic is likely to be + incorrect or 0 for arguments other than zero, so it should only be used for + debugging purposes.

    -

    -The 'llvm.frameaddress' intrinsic either returns a pointer indicating -the frame address of the specified call frame, or zero if it cannot be -identified. The value returned by this intrinsic is likely to be incorrect or 0 -for arguments other than zero, so it should only be used for debugging purposes. -

    +

    Note that calling this intrinsic does not prevent function inlining or other + aggressive transformations, so the value returned may not be that of the + obvious source-language caller.

    -

    -Note that calling this intrinsic does not prevent function inlining or other -aggressive transformations, so the value returned may not be that of the obvious -source-language caller. -

    @@ -5304,25 +5527,20 @@ source-language caller.
    Overview:
    - -

    -The 'llvm.stacksave' intrinsic is used to remember the current state of -the function stack, for use with -llvm.stackrestore. This is useful for implementing language -features like scoped automatic variable sized arrays in C99. -

    +

    The 'llvm.stacksave' intrinsic is used to remember the current state + of the function stack, for use + with llvm.stackrestore. This is + useful for implementing language features like scoped automatic variable + sized arrays in C99.

    Semantics:
    - -

    -This intrinsic returns a opaque pointer value that can be passed to llvm.stackrestore. When an -llvm.stackrestore intrinsic is executed with a value saved from -llvm.stacksave, it effectively restores the state of the stack to the -state it was in when the llvm.stacksave intrinsic executed. In -practice, this pops any alloca blocks from the stack -that were allocated after the llvm.stacksave was executed. -

    +

    This intrinsic returns a opaque pointer value that can be passed + to llvm.stackrestore. When + an llvm.stackrestore intrinsic is executed with a value saved + from llvm.stacksave, it effectively restores the state of the stack + to the state it was in when the llvm.stacksave intrinsic executed. + In practice, this pops any alloca blocks from the + stack that were allocated after the llvm.stacksave was executed.

    @@ -5339,24 +5557,18 @@ that were allocated after the llvm.stacksave was executed.
    Overview:
    - -

    -The 'llvm.stackrestore' intrinsic is used to restore the state of -the function stack to the state it was in when the corresponding llvm.stacksave intrinsic executed. This is -useful for implementing language features like scoped automatic variable sized -arrays in C99. -

    +

    The 'llvm.stackrestore' intrinsic is used to restore the state of + the function stack to the state it was in when the + corresponding llvm.stacksave intrinsic + executed. This is useful for implementing language features like scoped + automatic variable sized arrays in C99.

    Semantics:
    - -

    -See the description for llvm.stacksave. -

    +

    See the description + for llvm.stacksave.

    -
    'llvm.prefetch' Intrinsic @@ -5370,34 +5582,23 @@ See the description for llvm.stacksave.
    Overview:
    - - -

    -The 'llvm.prefetch' intrinsic is a hint to the code generator to insert -a prefetch instruction if supported; otherwise, it is a noop. Prefetches have -no -effect on the behavior of the program but can change its performance -characteristics. -

    +

    The 'llvm.prefetch' intrinsic is a hint to the code generator to + insert a prefetch instruction if supported; otherwise, it is a noop. + Prefetches have no effect on the behavior of the program but can change its + performance characteristics.

    Arguments:
    - -

    -address is the address to be prefetched, rw is the specifier -determining if the fetch should be for a read (0) or write (1), and -locality is a temporal locality specifier ranging from (0) - no -locality, to (3) - extremely local keep in cache. The rw and -locality arguments must be constant integers. -

    +

    address is the address to be prefetched, rw is the + specifier determining if the fetch should be for a read (0) or write (1), + and locality is a temporal locality specifier ranging from (0) - no + locality, to (3) - extremely local keep in cache. The rw + and locality arguments must be constant integers.

    Semantics:
    - -

    -This intrinsic does not modify the behavior of the program. In particular, -prefetches cannot trap and do not produce a value. On targets that support this -intrinsic, the prefetch can provide hints to the processor cache for better -performance. -

    +

    This intrinsic does not modify the behavior of the program. In particular, + prefetches cannot trap and do not produce a value. On targets that support + this intrinsic, the prefetch can provide hints to the processor cache for + better performance.

    @@ -5414,32 +5615,21 @@ performance.
    Overview:
    - - -

    -The 'llvm.pcmarker' intrinsic is a method to export a Program Counter -(PC) in a region of -code to simulators and other tools. The method is target specific, but it is -expected that the marker will use exported symbols to transmit the PC of the -marker. -The marker makes no guarantees that it will remain with any specific instruction -after optimizations. It is possible that the presence of a marker will inhibit -optimizations. The intended use is to be inserted after optimizations to allow -correlations of simulation runs. -

    +

    The 'llvm.pcmarker' intrinsic is a method to export a Program + Counter (PC) in a region of code to simulators and other tools. The method + is target specific, but it is expected that the marker will use exported + symbols to transmit the PC of the marker. The marker makes no guarantees + that it will remain with any specific instruction after optimizations. It is + possible that the presence of a marker will inhibit optimizations. The + intended use is to be inserted after optimizations to allow correlations of + simulation runs.

    Arguments:
    - -

    -id is a numerical id identifying the marker. -

    +

    id is a numerical id identifying the marker.

    Semantics:
    - -

    -This intrinsic does not modify the behavior of the program. Backends that do not -support this intrinisic may ignore it. -

    +

    This intrinsic does not modify the behavior of the program. Backends that do + not support this intrinisic may ignore it.

    @@ -5456,23 +5646,17 @@ support this intrinisic may ignore it.
    Overview:
    - - -

    -The 'llvm.readcyclecounter' intrinsic provides access to the cycle -counter register (or similar low latency, high accuracy clocks) on those targets -that support it. On X86, it should map to RDTSC. On Alpha, it should map to RPCC. -As the backing counters overflow quickly (on the order of 9 seconds on alpha), this -should only be used for small timings. -

    +

    The 'llvm.readcyclecounter' intrinsic provides access to the cycle + counter register (or similar low latency, high accuracy clocks) on those + targets that support it. On X86, it should map to RDTSC. On Alpha, it + should map to RPCC. As the backing counters overflow quickly (on the order + of 9 seconds on alpha), this should only be used for small timings.

    Semantics:
    - -

    -When directly supported, reading the cycle counter should not modify any memory. -Implementations are allowed to either return a application specific value or a -system wide value. On backends without support, this is lowered to a constant 0. -

    +

    When directly supported, reading the cycle counter should not modify any + memory. Implementations are allowed to either return a application specific + value or a system wide value. On backends without support, this is lowered + to a constant 0.

    @@ -5482,12 +5666,11 @@ system wide value. On backends without support, this is lowered to a constant 0
    -

    -LLVM provides intrinsics for a few important standard C library functions. -These intrinsics allow source-language front-ends to pass information about the -alignment of the pointer arguments to the code generator, providing opportunity -for more efficient code generation. -

    + +

    LLVM provides intrinsics for a few important standard C library functions. + These intrinsics allow source-language front-ends to pass information about + the alignment of the pointer arguments to the code generator, providing + opportunity for more efficient code generation.

    @@ -5499,11 +5682,12 @@ for more efficient code generation.
    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.memcpy on any integer bit -width. Not all targets support all bit widths however.

    +

    This is an overloaded intrinsic. You can use llvm.memcpy on any + integer bit width. Not all targets support all bit widths however.

    +
       declare void @llvm.memcpy.i8(i8 * <dest>, i8 * <src>,
    -                                i8 <len>, i32 <align>)
    +                               i8 <len>, i32 <align>)
       declare void @llvm.memcpy.i16(i8 * <dest>, i8 * <src>,
                                     i16 <len>, i32 <align>)
       declare void @llvm.memcpy.i32(i8 * <dest>, i8 * <src>,
    @@ -5513,44 +5697,31 @@ width. Not all targets support all bit widths however.

    Overview:
    +

    The 'llvm.memcpy.*' intrinsics copy a block of memory from the + source location to the destination location.

    -

    -The 'llvm.memcpy.*' intrinsics copy a block of memory from the source -location to the destination location. -

    - -

    -Note that, unlike the standard libc function, the llvm.memcpy.* -intrinsics do not return a value, and takes an extra alignment argument. -

    +

    Note that, unlike the standard libc function, the llvm.memcpy.* + intrinsics do not return a value, and takes an extra alignment argument.

    Arguments:
    +

    The first argument is a pointer to the destination, the second is a pointer + to the source. The third argument is an integer argument specifying the + number of bytes to copy, and the fourth argument is the alignment of the + source and destination locations.

    -

    -The first argument is a pointer to the destination, the second is a pointer to -the source. The third argument is an integer argument -specifying the number of bytes to copy, and the fourth argument is the alignment -of the source and destination locations. -

    - -

    -If the call to this intrinisic has an alignment value that is not 0 or 1, then -the caller guarantees that both the source and destination pointers are aligned -to that boundary. -

    +

    If the call to this intrinisic has an alignment value that is not 0 or 1, + then the caller guarantees that both the source and destination pointers are + aligned to that boundary.

    Semantics:
    +

    The 'llvm.memcpy.*' intrinsics copy a block of memory from the + source location to the destination location, which are not allowed to + overlap. It copies "len" bytes of memory over. If the argument is known to + be aligned to some boundary, this can be specified as the fourth argument, + otherwise it should be set to 0 or 1.

    -

    -The 'llvm.memcpy.*' intrinsics copy a block of memory from the source -location to the destination location, which are not allowed to overlap. It -copies "len" bytes of memory over. If the argument is known to be aligned to -some boundary, this can be specified as the fourth argument, otherwise it should -be set to 0 or 1. -

    -
    'llvm.memmove' Intrinsic @@ -5560,10 +5731,11 @@ be set to 0 or 1.
    Syntax:

    This is an overloaded intrinsic. You can use llvm.memmove on any integer bit -width. Not all targets support all bit widths however.

    + width. Not all targets support all bit widths however.

    +
       declare void @llvm.memmove.i8(i8 * <dest>, i8 * <src>,
    -                                 i8 <len>, i32 <align>)
    +                                i8 <len>, i32 <align>)
       declare void @llvm.memmove.i16(i8 * <dest>, i8 * <src>,
                                      i16 <len>, i32 <align>)
       declare void @llvm.memmove.i32(i8 * <dest>, i8 * <src>,
    @@ -5573,45 +5745,33 @@ width. Not all targets support all bit widths however.

    Overview:
    +

    The 'llvm.memmove.*' intrinsics move a block of memory from the + source location to the destination location. It is similar to the + 'llvm.memcpy' intrinsic but allows the two memory locations to + overlap.

    -

    -The 'llvm.memmove.*' intrinsics move a block of memory from the source -location to the destination location. It is similar to the -'llvm.memcpy' intrinsic but allows the two memory locations to overlap. -

    - -

    -Note that, unlike the standard libc function, the llvm.memmove.* -intrinsics do not return a value, and takes an extra alignment argument. -

    +

    Note that, unlike the standard libc function, the llvm.memmove.* + intrinsics do not return a value, and takes an extra alignment argument.

    Arguments:
    +

    The first argument is a pointer to the destination, the second is a pointer + to the source. The third argument is an integer argument specifying the + number of bytes to copy, and the fourth argument is the alignment of the + source and destination locations.

    -

    -The first argument is a pointer to the destination, the second is a pointer to -the source. The third argument is an integer argument -specifying the number of bytes to copy, and the fourth argument is the alignment -of the source and destination locations. -

    - -

    -If the call to this intrinisic has an alignment value that is not 0 or 1, then -the caller guarantees that the source and destination pointers are aligned to -that boundary. -

    +

    If the call to this intrinisic has an alignment value that is not 0 or 1, + then the caller guarantees that the source and destination pointers are + aligned to that boundary.

    Semantics:
    +

    The 'llvm.memmove.*' intrinsics copy a block of memory from the + source location to the destination location, which may overlap. It copies + "len" bytes of memory over. If the argument is known to be aligned to some + boundary, this can be specified as the fourth argument, otherwise it should + be set to 0 or 1.

    -

    -The 'llvm.memmove.*' intrinsics copy a block of memory from the source -location to the destination location, which may overlap. It -copies "len" bytes of memory over. If the argument is known to be aligned to -some boundary, this can be specified as the fourth argument, otherwise it should -be set to 0 or 1. -

    -
    'llvm.memset.*' Intrinsics @@ -5621,10 +5781,11 @@ be set to 0 or 1.
    Syntax:

    This is an overloaded intrinsic. You can use llvm.memset on any integer bit -width. Not all targets support all bit widths however.

    + width. Not all targets support all bit widths however.

    +
       declare void @llvm.memset.i8(i8 * <dest>, i8 <val>,
    -                                i8 <len>, i32 <align>)
    +                               i8 <len>, i32 <align>)
       declare void @llvm.memset.i16(i8 * <dest>, i8 <val>,
                                     i16 <len>, i32 <align>)
       declare void @llvm.memset.i32(i8 * <dest>, i8 <val>,
    @@ -5634,43 +5795,30 @@ width. Not all targets support all bit widths however.

    Overview:
    +

    The 'llvm.memset.*' intrinsics fill a block of memory with a + particular byte value.

    -

    -The 'llvm.memset.*' intrinsics fill a block of memory with a particular -byte value. -

    - -

    -Note that, unlike the standard libc function, the llvm.memset intrinsic -does not return a value, and takes an extra alignment argument. -

    +

    Note that, unlike the standard libc function, the llvm.memset + intrinsic does not return a value, and takes an extra alignment argument.

    Arguments:
    +

    The first argument is a pointer to the destination to fill, the second is the + byte value to fill it with, the third argument is an integer argument + specifying the number of bytes to fill, and the fourth argument is the known + alignment of destination location.

    -

    -The first argument is a pointer to the destination to fill, the second is the -byte value to fill it with, the third argument is an integer -argument specifying the number of bytes to fill, and the fourth argument is the -known alignment of destination location. -

    - -

    -If the call to this intrinisic has an alignment value that is not 0 or 1, then -the caller guarantees that the destination pointer is aligned to that boundary. -

    +

    If the call to this intrinisic has an alignment value that is not 0 or 1, + then the caller guarantees that the destination pointer is aligned to that + boundary.

    Semantics:
    +

    The 'llvm.memset.*' intrinsics fill "len" bytes of memory starting + at the destination location. If the argument is known to be aligned to some + boundary, this can be specified as the fourth argument, otherwise it should + be set to 0 or 1.

    -

    -The 'llvm.memset.*' intrinsics fill "len" bytes of memory starting at -the -destination location. If the argument is known to be aligned to some boundary, -this can be specified as the fourth argument, otherwise it should be set to 0 or -1. -

    -
    'llvm.sqrt.*' Intrinsic @@ -5679,9 +5827,10 @@ this can be specified as the fourth argument, otherwise it should be set to 0 or
    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.sqrt on any -floating point or vector of floating point type. Not all targets support all -types however.

    +

    This is an overloaded intrinsic. You can use llvm.sqrt on any + floating point or vector of floating point type. Not all targets support all + types however.

    +
       declare float     @llvm.sqrt.f32(float %Val)
       declare double    @llvm.sqrt.f64(double %Val)
    @@ -5691,28 +5840,21 @@ types however.

    Overview:
    - -

    -The 'llvm.sqrt' intrinsics return the sqrt of the specified operand, -returning the same value as the libm 'sqrt' functions would. Unlike -sqrt in libm, however, llvm.sqrt has undefined behavior for -negative numbers other than -0.0 (which allows for better optimization, because -there is no need to worry about errno being set). llvm.sqrt(-0.0) is -defined to return -0.0 like IEEE sqrt. -

    +

    The 'llvm.sqrt' intrinsics return the sqrt of the specified operand, + returning the same value as the libm 'sqrt' functions would. + Unlike sqrt in libm, however, llvm.sqrt has undefined + behavior for negative numbers other than -0.0 (which allows for better + optimization, because there is no need to worry about errno being + set). llvm.sqrt(-0.0) is defined to return -0.0 like IEEE sqrt.

    Arguments:
    - -

    -The argument and return value are floating point numbers of the same type. -

    +

    The argument and return value are floating point numbers of the same + type.

    Semantics:
    +

    This function returns the sqrt of the specified operand if it is a + nonnegative floating point number.

    -

    -This function returns the sqrt of the specified operand if it is a nonnegative -floating point number. -

    @@ -5723,9 +5865,10 @@ floating point number.
    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.powi on any -floating point or vector of floating point type. Not all targets support all -types however.

    +

    This is an overloaded intrinsic. You can use llvm.powi on any + floating point or vector of floating point type. Not all targets support all + types however.

    +
       declare float     @llvm.powi.f32(float  %Val, i32 %power)
       declare double    @llvm.powi.f64(double %Val, i32 %power)
    @@ -5735,26 +5878,19 @@ types however.

    Overview:
    - -

    -The 'llvm.powi.*' intrinsics return the first operand raised to the -specified (positive or negative) power. The order of evaluation of -multiplications is not defined. When a vector of floating point type is -used, the second argument remains a scalar integer value. -

    +

    The 'llvm.powi.*' intrinsics return the first operand raised to the + specified (positive or negative) power. The order of evaluation of + multiplications is not defined. When a vector of floating point type is + used, the second argument remains a scalar integer value.

    Arguments:
    - -

    -The second argument is an integer power, and the first is a value to raise to -that power. -

    +

    The second argument is an integer power, and the first is a value to raise to + that power.

    Semantics:
    +

    This function returns the first value raised to the second power with an + unspecified sequence of rounding operations.

    -

    -This function returns the first value raised to the second power with an -unspecified sequence of rounding operations.

    @@ -5765,9 +5901,10 @@ unspecified sequence of rounding operations.

    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.sin on any -floating point or vector of floating point type. Not all targets support all -types however.

    +

    This is an overloaded intrinsic. You can use llvm.sin on any + floating point or vector of floating point type. Not all targets support all + types however.

    +
       declare float     @llvm.sin.f32(float  %Val)
       declare double    @llvm.sin.f64(double %Val)
    @@ -5777,23 +5914,17 @@ types however.

    Overview:
    - -

    -The 'llvm.sin.*' intrinsics return the sine of the operand. -

    +

    The 'llvm.sin.*' intrinsics return the sine of the operand.

    Arguments:
    - -

    -The argument and return value are floating point numbers of the same type. -

    +

    The argument and return value are floating point numbers of the same + type.

    Semantics:
    +

    This function returns the sine of the specified operand, returning the same + values as the libm sin functions would, and handles error conditions + in the same way.

    -

    -This function returns the sine of the specified operand, returning the -same values as the libm sin functions would, and handles error -conditions in the same way.

    @@ -5804,9 +5935,10 @@ conditions in the same way.

    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.cos on any -floating point or vector of floating point type. Not all targets support all -types however.

    +

    This is an overloaded intrinsic. You can use llvm.cos on any + floating point or vector of floating point type. Not all targets support all + types however.

    +
       declare float     @llvm.cos.f32(float  %Val)
       declare double    @llvm.cos.f64(double %Val)
    @@ -5816,23 +5948,17 @@ types however.

    Overview:
    - -

    -The 'llvm.cos.*' intrinsics return the cosine of the operand. -

    +

    The 'llvm.cos.*' intrinsics return the cosine of the operand.

    Arguments:
    - -

    -The argument and return value are floating point numbers of the same type. -

    +

    The argument and return value are floating point numbers of the same + type.

    Semantics:
    +

    This function returns the cosine of the specified operand, returning the same + values as the libm cos functions would, and handles error conditions + in the same way.

    -

    -This function returns the cosine of the specified operand, returning the -same values as the libm cos functions would, and handles error -conditions in the same way.

    @@ -5843,9 +5969,10 @@ conditions in the same way.

    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.pow on any -floating point or vector of floating point type. Not all targets support all -types however.

    +

    This is an overloaded intrinsic. You can use llvm.pow on any + floating point or vector of floating point type. Not all targets support all + types however.

    +
       declare float     @llvm.pow.f32(float  %Val, float %Power)
       declare double    @llvm.pow.f64(double %Val, double %Power)
    @@ -5855,39 +5982,29 @@ types however.

    Overview:
    - -

    -The 'llvm.pow.*' intrinsics return the first operand raised to the -specified (positive or negative) power. -

    +

    The 'llvm.pow.*' intrinsics return the first operand raised to the + specified (positive or negative) power.

    Arguments:
    - -

    -The second argument is a floating point power, and the first is a value to -raise to that power. -

    +

    The second argument is a floating point power, and the first is a value to + raise to that power.

    Semantics:
    +

    This function returns the first value raised to the second power, returning + the same values as the libm pow functions would, and handles error + conditions in the same way.

    -

    -This function returns the first value raised to the second power, -returning the -same values as the libm pow functions would, and handles error -conditions in the same way.

    -
    -

    -LLVM provides intrinsics for a few important bit manipulation operations. -These allow efficient code generation for some algorithms. -

    + +

    LLVM provides intrinsics for a few important bit manipulation operations. + These allow efficient code generation for some algorithms.

    @@ -5900,7 +6017,8 @@ These allow efficient code generation for some algorithms.
    Syntax:

    This is an overloaded intrinsic function. You can use bswap on any integer -type that is an even number of bytes (i.e. BitWidth % 16 == 0).

    + type that is an even number of bytes (i.e. BitWidth % 16 == 0).

    +
       declare i16 @llvm.bswap.i16(i16 <id>)
       declare i32 @llvm.bswap.i32(i32 <id>)
    @@ -5908,25 +6026,20 @@ type that is an even number of bytes (i.e. BitWidth % 16 == 0).

    Overview:
    - -

    -The 'llvm.bswap' family of intrinsics is used to byte swap integer -values with an even number of bytes (positive multiple of 16 bits). These are -useful for performing operations on data that is not in the target's native -byte order. -

    +

    The 'llvm.bswap' family of intrinsics is used to byte swap integer + values with an even number of bytes (positive multiple of 16 bits). These + are useful for performing operations on data that is not in the target's + native byte order.

    Semantics:
    - -

    -The llvm.bswap.i16 intrinsic returns an i16 value that has the high -and low byte of the input i16 swapped. Similarly, the llvm.bswap.i32 -intrinsic returns an i32 value that has the four bytes of the input i32 -swapped, so that if the input bytes are numbered 0, 1, 2, 3 then the returned -i32 will have its bytes in 3, 2, 1, 0 order. The llvm.bswap.i48, -llvm.bswap.i64 and other intrinsics extend this concept to -additional even-byte lengths (6 bytes, 8 bytes and more, respectively). -

    +

    The llvm.bswap.i16 intrinsic returns an i16 value that has the high + and low byte of the input i16 swapped. Similarly, + the llvm.bswap.i32 intrinsic returns an i32 value that has the four + bytes of the input i32 swapped, so that if the input bytes are numbered 0, 1, + 2, 3 then the returned i32 will have its bytes in 3, 2, 1, 0 order. + The llvm.bswap.i48, llvm.bswap.i64 and other intrinsics + extend this concept to additional even-byte lengths (6 bytes, 8 bytes and + more, respectively).

    @@ -5939,7 +6052,8 @@ additional even-byte lengths (6 bytes, 8 bytes and more, respectively).
    Syntax:

    This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit -width. Not all targets support all bit widths however.

    + width. Not all targets support all bit widths however.

    +
       declare i8 @llvm.ctpop.i8(i8  <src>)
       declare i16 @llvm.ctpop.i16(i16 <src>)
    @@ -5949,24 +6063,16 @@ width. Not all targets support all bit widths however.

    Overview:
    - -

    -The 'llvm.ctpop' family of intrinsics counts the number of bits set in a -value. -

    +

    The 'llvm.ctpop' family of intrinsics counts the number of bits set + in a value.

    Arguments:
    - -

    -The only argument is the value to be counted. The argument may be of any -integer type. The return type must match the argument type. -

    +

    The only argument is the value to be counted. The argument may be of any + integer type. The return type must match the argument type.

    Semantics:
    +

    The 'llvm.ctpop' intrinsic counts the 1's in a variable.

    -

    -The 'llvm.ctpop' intrinsic counts the 1's in a variable. -

    @@ -5977,8 +6083,9 @@ The 'llvm.ctpop' intrinsic counts the 1's in a variable.
    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.ctlz on any -integer bit width. Not all targets support all bit widths however.

    +

    This is an overloaded intrinsic. You can use llvm.ctlz on any + integer bit width. Not all targets support all bit widths however.

    +
       declare i8 @llvm.ctlz.i8 (i8  <src>)
       declare i16 @llvm.ctlz.i16(i16 <src>)
    @@ -5988,30 +6095,20 @@ integer bit width. Not all targets support all bit widths however.

    Overview:
    - -

    -The 'llvm.ctlz' family of intrinsic functions counts the number of -leading zeros in a variable. -

    +

    The 'llvm.ctlz' family of intrinsic functions counts the number of + leading zeros in a variable.

    Arguments:
    - -

    -The only argument is the value to be counted. The argument may be of any -integer type. The return type must match the argument type. -

    +

    The only argument is the value to be counted. The argument may be of any + integer type. The return type must match the argument type.

    Semantics:
    +

    The 'llvm.ctlz' intrinsic counts the leading (most significant) + zeros in a variable. If the src == 0 then the result is the size in bits of + the type of src. For example, llvm.ctlz(i32 2) = 30.

    -

    -The 'llvm.ctlz' intrinsic counts the leading (most significant) zeros -in a variable. If the src == 0 then the result is the size in bits of the type -of src. For example, llvm.ctlz(i32 2) = 30. -

    - -
    'llvm.cttz.*' Intrinsic @@ -6020,8 +6117,9 @@ of src. For example, llvm.ctlz(i32 2) = 30.
    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.cttz on any -integer bit width. Not all targets support all bit widths however.

    +

    This is an overloaded intrinsic. You can use llvm.cttz on any + integer bit width. Not all targets support all bit widths however.

    +
       declare i8 @llvm.cttz.i8 (i8  <src>)
       declare i16 @llvm.cttz.i16(i16 <src>)
    @@ -6031,130 +6129,17 @@ integer bit width. Not all targets support all bit widths however.

    Overview:
    +

    The 'llvm.cttz' family of intrinsic functions counts the number of + trailing zeros.

    -

    -The 'llvm.cttz' family of intrinsic functions counts the number of -trailing zeros. -

    - -
    Arguments:
    - -

    -The only argument is the value to be counted. The argument may be of any -integer type. The return type must match the argument type. -

    - -
    Semantics:
    - -

    -The 'llvm.cttz' intrinsic counts the trailing (least significant) zeros -in a variable. If the src == 0 then the result is the size in bits of the type -of src. For example, llvm.cttz(2) = 1. -

    -
    - - - - -
    - -
    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.part.select -on any integer bit width.

    -
    -  declare i17 @llvm.part.select.i17 (i17 %val, i32 %loBit, i32 %hiBit)
    -  declare i29 @llvm.part.select.i29 (i29 %val, i32 %loBit, i32 %hiBit)
    -
    - -
    Overview:
    -

    The 'llvm.part.select' family of intrinsic functions selects a -range of bits from an integer value and returns them in the same bit width as -the original value.

    - -
    Arguments:
    -

    The first argument, %val and the result may be integer types of -any bit width but they must have the same bit width. The second and third -arguments must be i32 type since they specify only a bit index.

    - -
    Semantics:
    -

    The operation of the 'llvm.part.select' intrinsic has two modes -of operation: forwards and reverse. If %loBit is greater than -%hiBits then the intrinsic operates in reverse mode. Otherwise it -operates in forward mode.

    -

    In forward mode, this intrinsic is the equivalent of shifting %val -right by %loBit bits and then ANDing it with a mask with -only the %hiBit - %loBit bits set, as follows:

    -
      -
    1. The %val is shifted right (LSHR) by the number of bits specified - by %loBits. This normalizes the value to the low order bits.
    2. -
    3. The %loBits value is subtracted from the %hiBits value - to determine the number of bits to retain.
    4. -
    5. A mask of the retained bits is created by shifting a -1 value.
    6. -
    7. The mask is ANDed with %val to produce the result.
    8. -
    -

    In reverse mode, a similar computation is made except that the bits are -returned in the reverse order. So, for example, if X has the value -i16 0x0ACF (101011001111) and we apply -part.select(i16 X, 8, 3) to it, we get back the value -i16 0x0026 (000000100110).

    -
    - - - -
    - -
    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.part.set -on any integer bit width.

    -
    -  declare i17 @llvm.part.set.i17.i9 (i17 %val, i9 %repl, i32 %lo, i32 %hi)
    -  declare i29 @llvm.part.set.i29.i9 (i29 %val, i9 %repl, i32 %lo, i32 %hi)
    -
    - -
    Overview:
    -

    The 'llvm.part.set' family of intrinsic functions replaces a range -of bits in an integer value with another integer value. It returns the integer -with the replaced bits.

    - -
    Arguments:
    -

    The first argument, %val, and the result may be integer types of -any bit width, but they must have the same bit width. %val is the value -whose bits will be replaced. The second argument, %repl may be an -integer of any bit width. The third and fourth arguments must be i32 -type since they specify only a bit index.

    - -
    Semantics:
    -

    The operation of the 'llvm.part.set' intrinsic has two modes -of operation: forwards and reverse. If %lo is greater than -%hi then the intrinsic operates in reverse mode. Otherwise it -operates in forward mode.

    - -

    For both modes, the %repl value is prepared for use by either -truncating it down to the size of the replacement area or zero extending it -up to that size.

    - -

    In forward mode, the bits between %lo and %hi (inclusive) -are replaced with corresponding bits from %repl. That is the 0th bit -in %repl replaces the %loth bit in %val and etc. up -to the %hith bit.

    - -

    In reverse mode, a similar computation is made except that the bits are -reversed. That is, the 0th bit in %repl replaces the -%hi bit in %val and etc. down to the %loth bit.

    - -
    Examples:
    +
    Arguments:
    +

    The only argument is the value to be counted. The argument may be of any + integer type. The return type must match the argument type.

    -
    -  llvm.part.set(0xFFFF, 0, 4, 7) -> 0xFF0F
    -  llvm.part.set(0xFFFF, 0, 7, 4) -> 0xFF0F
    -  llvm.part.set(0xFFFF, 1, 7, 4) -> 0xFF8F
    -  llvm.part.set(0xFFFF, F, 8, 3) -> 0xFFE7
    -  llvm.part.set(0xFFFF, 0, 3, 8) -> 0xFE07
    -
    +
    Semantics:
    +

    The 'llvm.cttz' intrinsic counts the trailing (least significant) + zeros in a variable. If the src == 0 then the result is the size in bits of + the type of src. For example, llvm.cttz(2) = 1.

    @@ -6164,9 +6149,8 @@ reversed. That is, the 0th bit in %repl replaces the
    -

    -LLVM provides intrinsics for some arithmetic with overflow operations. -

    + +

    LLVM provides intrinsics for some arithmetic with overflow operations.

    @@ -6178,9 +6162,8 @@ LLVM provides intrinsics for some arithmetic with overflow operations.
    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.sadd.with.overflow -on any integer bit width.

    + on any integer bit width.

       declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
    @@ -6189,24 +6172,23 @@ on any integer bit width.

    Overview:
    -

    The 'llvm.sadd.with.overflow' family of intrinsic functions perform -a signed addition of the two arguments, and indicate whether an overflow -occurred during the signed summation.

    + a signed addition of the two arguments, and indicate whether an overflow + occurred during the signed summation.

    Arguments:
    -

    The arguments (%a and %b) and the first element of the result structure may -be of integer types of any bit width, but they must have the same bit width. The -second element of the result structure must be of type i1. %a -and %b are the two values that will undergo signed addition.

    + be of integer types of any bit width, but they must have the same bit + width. The second element of the result structure must be of + type i1. %a and %b are the two values that will + undergo signed addition.

    Semantics:
    -

    The 'llvm.sadd.with.overflow' family of intrinsic functions perform -a signed addition of the two variables. They return a structure — the -first element of which is the signed summation, and the second element of which -is a bit specifying if the signed summation resulted in an overflow.

    + a signed addition of the two variables. They return a structure — the + first element of which is the signed summation, and the second element of + which is a bit specifying if the signed summation resulted in an + overflow.

    Examples:
    @@ -6226,9 +6208,8 @@ is a bit specifying if the signed summation resulted in an overflow.

    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.uadd.with.overflow -on any integer bit width.

    + on any integer bit width.

       declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
    @@ -6237,24 +6218,22 @@ on any integer bit width.

    Overview:
    -

    The 'llvm.uadd.with.overflow' family of intrinsic functions perform -an unsigned addition of the two arguments, and indicate whether a carry occurred -during the unsigned summation.

    + an unsigned addition of the two arguments, and indicate whether a carry + occurred during the unsigned summation.

    Arguments:
    -

    The arguments (%a and %b) and the first element of the result structure may -be of integer types of any bit width, but they must have the same bit width. The -second element of the result structure must be of type i1. %a -and %b are the two values that will undergo unsigned addition.

    + be of integer types of any bit width, but they must have the same bit + width. The second element of the result structure must be of + type i1. %a and %b are the two values that will + undergo unsigned addition.

    Semantics:
    -

    The 'llvm.uadd.with.overflow' family of intrinsic functions perform -an unsigned addition of the two arguments. They return a structure — the -first element of which is the sum, and the second element of which is a bit -specifying if the unsigned summation resulted in a carry.

    + an unsigned addition of the two arguments. They return a structure — + the first element of which is the sum, and the second element of which is a + bit specifying if the unsigned summation resulted in a carry.

    Examples:
    @@ -6274,9 +6253,8 @@ specifying if the unsigned summation resulted in a carry.

    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.ssub.with.overflow -on any integer bit width.

    + on any integer bit width.

       declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
    @@ -6285,24 +6263,23 @@ on any integer bit width.

    Overview:
    -

    The 'llvm.ssub.with.overflow' family of intrinsic functions perform -a signed subtraction of the two arguments, and indicate whether an overflow -occurred during the signed subtraction.

    + a signed subtraction of the two arguments, and indicate whether an overflow + occurred during the signed subtraction.

    Arguments:
    -

    The arguments (%a and %b) and the first element of the result structure may -be of integer types of any bit width, but they must have the same bit width. The -second element of the result structure must be of type i1. %a -and %b are the two values that will undergo signed subtraction.

    + be of integer types of any bit width, but they must have the same bit + width. The second element of the result structure must be of + type i1. %a and %b are the two values that will + undergo signed subtraction.

    Semantics:
    -

    The 'llvm.ssub.with.overflow' family of intrinsic functions perform -a signed subtraction of the two arguments. They return a structure — the -first element of which is the subtraction, and the second element of which is a bit -specifying if the signed subtraction resulted in an overflow.

    + a signed subtraction of the two arguments. They return a structure — + the first element of which is the subtraction, and the second element of + which is a bit specifying if the signed subtraction resulted in an + overflow.

    Examples:
    @@ -6322,9 +6299,8 @@ specifying if the signed subtraction resulted in an overflow.

    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.usub.with.overflow -on any integer bit width.

    + on any integer bit width.

       declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
    @@ -6333,24 +6309,23 @@ on any integer bit width.

    Overview:
    -

    The 'llvm.usub.with.overflow' family of intrinsic functions perform -an unsigned subtraction of the two arguments, and indicate whether an overflow -occurred during the unsigned subtraction.

    + an unsigned subtraction of the two arguments, and indicate whether an + overflow occurred during the unsigned subtraction.

    Arguments:
    -

    The arguments (%a and %b) and the first element of the result structure may -be of integer types of any bit width, but they must have the same bit width. The -second element of the result structure must be of type i1. %a -and %b are the two values that will undergo unsigned subtraction.

    + be of integer types of any bit width, but they must have the same bit + width. The second element of the result structure must be of + type i1. %a and %b are the two values that will + undergo unsigned subtraction.

    Semantics:
    -

    The 'llvm.usub.with.overflow' family of intrinsic functions perform -an unsigned subtraction of the two arguments. They return a structure — the -first element of which is the subtraction, and the second element of which is a bit -specifying if the unsigned subtraction resulted in an overflow.

    + an unsigned subtraction of the two arguments. They return a structure — + the first element of which is the subtraction, and the second element of + which is a bit specifying if the unsigned subtraction resulted in an + overflow.

    Examples:
    @@ -6370,9 +6345,8 @@ specifying if the unsigned subtraction resulted in an overflow.

    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.smul.with.overflow -on any integer bit width.

    + on any integer bit width.

       declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
    @@ -6383,23 +6357,22 @@ on any integer bit width.

    Overview:

    The 'llvm.smul.with.overflow' family of intrinsic functions perform -a signed multiplication of the two arguments, and indicate whether an overflow -occurred during the signed multiplication.

    + a signed multiplication of the two arguments, and indicate whether an + overflow occurred during the signed multiplication.

    Arguments:
    -

    The arguments (%a and %b) and the first element of the result structure may -be of integer types of any bit width, but they must have the same bit width. The -second element of the result structure must be of type i1. %a -and %b are the two values that will undergo signed multiplication.

    + be of integer types of any bit width, but they must have the same bit + width. The second element of the result structure must be of + type i1. %a and %b are the two values that will + undergo signed multiplication.

    Semantics:
    -

    The 'llvm.smul.with.overflow' family of intrinsic functions perform -a signed multiplication of the two arguments. They return a structure — -the first element of which is the multiplication, and the second element of -which is a bit specifying if the signed multiplication resulted in an -overflow.

    + a signed multiplication of the two arguments. They return a structure — + the first element of which is the multiplication, and the second element of + which is a bit specifying if the signed multiplication resulted in an + overflow.

    Examples:
    @@ -6419,9 +6392,8 @@ overflow.

    Syntax:
    -

    This is an overloaded intrinsic. You can use llvm.umul.with.overflow -on any integer bit width.

    + on any integer bit width.

       declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
    @@ -6430,26 +6402,23 @@ on any integer bit width.

    Overview:
    -

    The 'llvm.umul.with.overflow' family of intrinsic functions perform -a unsigned multiplication of the two arguments, and indicate whether an overflow -occurred during the unsigned multiplication.

    + a unsigned multiplication of the two arguments, and indicate whether an + overflow occurred during the unsigned multiplication.

    Arguments:
    -

    The arguments (%a and %b) and the first element of the result structure may -be of integer types of any bit width, but they must have the same bit width. The -second element of the result structure must be of type i1. %a -and %b are the two values that will undergo unsigned -multiplication.

    + be of integer types of any bit width, but they must have the same bit + width. The second element of the result structure must be of + type i1. %a and %b are the two values that will + undergo unsigned multiplication.

    Semantics:
    -

    The 'llvm.umul.with.overflow' family of intrinsic functions perform -an unsigned multiplication of the two arguments. They return a structure — -the first element of which is the multiplication, and the second element of -which is a bit specifying if the unsigned multiplication resulted in an -overflow.

    + an unsigned multiplication of the two arguments. They return a structure + — the first element of which is the multiplication, and the second + element of which is a bit specifying if the unsigned multiplication resulted + in an overflow.

    Examples:
    @@ -6467,14 +6436,13 @@ overflow.

    -

    -The LLVM debugger intrinsics (which all start with llvm.dbg. prefix), -are described in the LLVM Source Level -Debugging document. -

    -
    +

    The LLVM debugger intrinsics (which all start with llvm.dbg. + prefix), are described in + the LLVM Source + Level Debugging document.

    + +
    @@ -6482,10 +6450,12 @@ Debugging document.
    -

    The LLVM exception handling intrinsics (which all start with -llvm.eh. prefix), are described in the LLVM Exception -Handling document.

    + +

    The LLVM exception handling intrinsics (which all start with + llvm.eh. prefix), are described in + the LLVM Exception + Handling document.

    +
    @@ -6494,70 +6464,74 @@ Handling document.

    -

    - This intrinsic makes it possible to excise one parameter, marked with - the nest attribute, from a function. The result is a callable - function pointer lacking the nest parameter - the caller does not need - to provide a value for it. Instead, the value to use is stored in - advance in a "trampoline", a block of memory usually allocated - on the stack, which also contains code to splice the nest value into the - argument list. This is used to implement the GCC nested function address - extension. -

    -

    - For example, if the function is - i32 f(i8* nest %c, i32 %x, i32 %y) then the resulting function - pointer has signature i32 (i32, i32)*. It can be created as follows:

    + +

    This intrinsic makes it possible to excise one parameter, marked with + the nest attribute, from a function. The result is a callable + function pointer lacking the nest parameter - the caller does not need to + provide a value for it. Instead, the value to use is stored in advance in a + "trampoline", a block of memory usually allocated on the stack, which also + contains code to splice the nest value into the argument list. This is used + to implement the GCC nested function address extension.

    + +

    For example, if the function is + i32 f(i8* nest %c, i32 %x, i32 %y) then the resulting function + pointer has signature i32 (i32, i32)*. It can be created as + follows:

    + +
       %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
       %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
       %p = call i8* @llvm.init.trampoline( i8* %tramp1, i8* bitcast (i32 (i8* nest , i32, i32)* @f to i8*), i8* %nval )
       %fp = bitcast i8* %p to i32 (i32, i32)*
     
    -

    The call %val = call i32 %fp( i32 %x, i32 %y ) is then equivalent - to %val = call i32 %f( i8* %nval, i32 %x, i32 %y ).

    +
    + +

    The call %val = call i32 %fp( i32 %x, i32 %y ) is then equivalent + to %val = call i32 %f( i8* %nval, i32 %x, i32 %y ).

    +
    +
    +
    Syntax:
    -declare i8* @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>)
    +  declare i8* @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>)
     
    +
    Overview:
    -

    - This fills the memory pointed to by tramp with code - and returns a function pointer suitable for executing it. -

    +

    This fills the memory pointed to by tramp with code and returns a + function pointer suitable for executing it.

    +
    Arguments:
    -

    - The llvm.init.trampoline intrinsic takes three arguments, all - pointers. The tramp argument must point to a sufficiently large - and sufficiently aligned block of memory; this memory is written to by the - intrinsic. Note that the size and the alignment are target-specific - LLVM - currently provides no portable way of determining them, so a front-end that - generates this intrinsic needs to have some target-specific knowledge. - The func argument must hold a function bitcast to an i8*. -

    +

    The llvm.init.trampoline intrinsic takes three arguments, all + pointers. The tramp argument must point to a sufficiently large and + sufficiently aligned block of memory; this memory is written to by the + intrinsic. Note that the size and the alignment are target-specific - LLVM + currently provides no portable way of determining them, so a front-end that + generates this intrinsic needs to have some target-specific knowledge. + The func argument must hold a function bitcast to + an i8*.

    +
    Semantics:
    -

    - The block of memory pointed to by tramp is filled with target - dependent code, turning it into a function. A pointer to this function is - returned, but needs to be bitcast to an - appropriate function pointer type - before being called. The new function's signature is the same as that of - func with any arguments marked with the nest attribute - removed. At most one such nest argument is allowed, and it must be - of pointer type. Calling the new function is equivalent to calling - func with the same argument list, but with nval used for the - missing nest argument. If, after calling - llvm.init.trampoline, the memory pointed to by tramp is - modified, then the effect of any later call to the returned function pointer is - undefined. -

    +

    The block of memory pointed to by tramp is filled with target + dependent code, turning it into a function. A pointer to this function is + returned, but needs to be bitcast to an appropriate + function pointer type before being called. The new function's signature + is the same as that of func with any arguments marked with + the nest attribute removed. At most one such nest argument + is allowed, and it must be of pointer type. Calling the new function is + equivalent to calling func with the same argument list, but + with nval used for the missing nest argument. If, after + calling llvm.init.trampoline, the memory pointed to + by tramp is modified, then the effect of any later call to the + returned function pointer is undefined.

    +
    @@ -6566,27 +6540,25 @@ declare i8* @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <n
    -

    - These intrinsic functions expand the "universal IR" of LLVM to represent - hardware constructs for atomic operations and memory synchronization. This - provides an interface to the hardware, not an interface to the programmer. It - is aimed at a low enough level to allow any programming models or APIs - (Application Programming Interfaces) which - need atomic behaviors to map cleanly onto it. It is also modeled primarily on - hardware behavior. Just as hardware provides a "universal IR" for source - languages, it also provides a starting point for developing a "universal" - atomic operation and synchronization IR. -

    -

    - These do not form an API such as high-level threading libraries, - software transaction memory systems, atomic primitives, and intrinsic - functions as found in BSD, GNU libc, atomic_ops, APR, and other system and - application libraries. The hardware interface provided by LLVM should allow - a clean implementation of all of these APIs and parallel programming models. - No one model or paradigm should be selected above others unless the hardware - itself ubiquitously does so. -

    +

    These intrinsic functions expand the "universal IR" of LLVM to represent + hardware constructs for atomic operations and memory synchronization. This + provides an interface to the hardware, not an interface to the programmer. It + is aimed at a low enough level to allow any programming models or APIs + (Application Programming Interfaces) which need atomic behaviors to map + cleanly onto it. It is also modeled primarily on hardware behavior. Just as + hardware provides a "universal IR" for source languages, it also provides a + starting point for developing a "universal" atomic operation and + synchronization IR.

    + +

    These do not form an API such as high-level threading libraries, + software transaction memory systems, atomic primitives, and intrinsic + functions as found in BSD, GNU libc, atomic_ops, APR, and other system and + application libraries. The hardware interface provided by LLVM should allow + a clean implementation of all of these APIs and parallel programming models. + No one model or paradigm should be selected above others unless the hardware + itself ubiquitously does so.

    +
    @@ -6596,59 +6568,56 @@ declare i8* @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <n
    Syntax:
    -declare void @llvm.memory.barrier( i1 <ll>, i1 <ls>, i1 <sl>, i1 <ss>, 
    -i1 <device> )
    -
    +  declare void @llvm.memory.barrier( i1 <ll>, i1 <ls>, i1 <sl>, i1 <ss>, i1 <device> )
     
    +
    Overview:
    -

    - The llvm.memory.barrier intrinsic guarantees ordering between - specific pairs of memory access types. -

    +

    The llvm.memory.barrier intrinsic guarantees ordering between + specific pairs of memory access types.

    +
    Arguments:
    -

    - The llvm.memory.barrier intrinsic requires five boolean arguments. - The first four arguments enables a specific barrier as listed below. The fith - argument specifies that the barrier applies to io or device or uncached memory. +

    The llvm.memory.barrier intrinsic requires five boolean arguments. + The first four arguments enables a specific barrier as listed below. The + fith argument specifies that the barrier applies to io or device or uncached + memory.

    + +
      +
    • ll: load-load barrier
    • +
    • ls: load-store barrier
    • +
    • sl: store-load barrier
    • +
    • ss: store-store barrier
    • +
    • device: barrier applies to device and uncached memory also.
    • +
    -

    -
      -
    • ll: load-load barrier
    • -
    • ls: load-store barrier
    • -
    • sl: store-load barrier
    • -
    • ss: store-store barrier
    • -
    • device: barrier applies to device and uncached memory also.
    • -
    Semantics:
    -

    - This intrinsic causes the system to enforce some ordering constraints upon - the loads and stores of the program. This barrier does not indicate - when any events will occur, it only enforces an order in - which they occur. For any of the specified pairs of load and store operations - (f.ex. load-load, or store-load), all of the first operations preceding the - barrier will complete before any of the second operations succeeding the - barrier begin. Specifically the semantics for each pairing is as follows: -

    -
      -
    • ll: All loads before the barrier must complete before any load - after the barrier begins.
    • - -
    • ls: All loads before the barrier must complete before any - store after the barrier begins.
    • -
    • ss: All stores before the barrier must complete before any - store after the barrier begins.
    • -
    • sl: All stores before the barrier must complete before any - load after the barrier begins.
    • -
    -

    - These semantics are applied with a logical "and" behavior when more than one - is enabled in a single memory barrier intrinsic. -

    -

    - Backends may implement stronger barriers than those requested when they do not - support as fine grained a barrier as requested. Some architectures do not - need all types of barriers and on such architectures, these become noops. -

    +

    This intrinsic causes the system to enforce some ordering constraints upon + the loads and stores of the program. This barrier does not + indicate when any events will occur, it only enforces + an order in which they occur. For any of the specified pairs of load + and store operations (f.ex. load-load, or store-load), all of the first + operations preceding the barrier will complete before any of the second + operations succeeding the barrier begin. Specifically the semantics for each + pairing is as follows:

    + +
      +
    • ll: All loads before the barrier must complete before any load + after the barrier begins.
    • +
    • ls: All loads before the barrier must complete before any + store after the barrier begins.
    • +
    • ss: All stores before the barrier must complete before any + store after the barrier begins.
    • +
    • sl: All stores before the barrier must complete before any + load after the barrier begins.
    • +
    + +

    These semantics are applied with a logical "and" behavior when more than one + is enabled in a single memory barrier intrinsic.

    + +

    Backends may implement stronger barriers than those requested when they do + not support as fine grained a barrier as requested. Some architectures do + not need all types of barriers and on such architectures, these become + noops.

    +
    Example:
     %ptr      = malloc i32
    @@ -6659,50 +6628,48 @@ i1 <device> )
                                     ; guarantee the above finishes
                 store i32 8, %ptr   ; before this begins
     
    +
    +
    +
    Syntax:
    -

    - This is an overloaded intrinsic. You can use llvm.atomic.cmp.swap on - any integer bit width and for different address spaces. Not all targets - support all bit widths however.

    +

    This is an overloaded intrinsic. You can use llvm.atomic.cmp.swap on + any integer bit width and for different address spaces. Not all targets + support all bit widths however.

    -declare i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* <ptr>, i8 <cmp>, i8 <val> )
    -declare i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* <ptr>, i16 <cmp>, i16 <val> )
    -declare i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* <ptr>, i32 <cmp>, i32 <val> )
    -declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* <ptr>, i64 <cmp>, i64 <val> )
    -
    +  declare i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* <ptr>, i8 <cmp>, i8 <val> )
    +  declare i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* <ptr>, i16 <cmp>, i16 <val> )
    +  declare i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* <ptr>, i32 <cmp>, i32 <val> )
    +  declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* <ptr>, i64 <cmp>, i64 <val> )
     
    +
    Overview:
    -

    - This loads a value in memory and compares it to a given value. If they are - equal, it stores a new value into the memory. -

    +

    This loads a value in memory and compares it to a given value. If they are + equal, it stores a new value into the memory.

    +
    Arguments:
    -

    - The llvm.atomic.cmp.swap intrinsic takes three arguments. The result as - well as both cmp and val must be integer values with the - same bit width. The ptr argument must be a pointer to a value of - this integer type. While any bit width integer may be used, targets may only - lower representations they support in hardware. +

    The llvm.atomic.cmp.swap intrinsic takes three arguments. The result + as well as both cmp and val must be integer values with the + same bit width. The ptr argument must be a pointer to a value of + this integer type. While any bit width integer may be used, targets may only + lower representations they support in hardware.

    -

    Semantics:
    -

    - This entire intrinsic must be executed atomically. It first loads the value - in memory pointed to by ptr and compares it with the value - cmp. If they are equal, val is stored into the memory. The - loaded value is yielded in all cases. This provides the equivalent of an - atomic compare-and-swap operation within the SSA framework. -

    -
    Examples:
    +

    This entire intrinsic must be executed atomically. It first loads the value + in memory pointed to by ptr and compares it with the + value cmp. If they are equal, val is stored into the + memory. The loaded value is yielded in all cases. This provides the + equivalent of an atomic compare-and-swap operation within the SSA + framework.

    +
    Examples:
     %ptr      = malloc i32
                 store i32 4, %ptr
    @@ -6720,6 +6687,7 @@ declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* <ptr>, i64 <cmp>,
     
     %memval2  = load i32* %ptr                ; yields {i32}:memval2 = 8
     
    +
    @@ -6729,38 +6697,33 @@ declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* <ptr>, i64 <cmp>,
    Syntax:
    -

    - This is an overloaded intrinsic. You can use llvm.atomic.swap on any - integer bit width. Not all targets support all bit widths however.

    -
    -declare i8 @llvm.atomic.swap.i8.p0i8( i8* <ptr>, i8 <val> )
    -declare i16 @llvm.atomic.swap.i16.p0i16( i16* <ptr>, i16 <val> )
    -declare i32 @llvm.atomic.swap.i32.p0i32( i32* <ptr>, i32 <val> )
    -declare i64 @llvm.atomic.swap.i64.p0i64( i64* <ptr>, i64 <val> )
    +

    This is an overloaded intrinsic. You can use llvm.atomic.swap on any + integer bit width. Not all targets support all bit widths however.

    +
    +  declare i8 @llvm.atomic.swap.i8.p0i8( i8* <ptr>, i8 <val> )
    +  declare i16 @llvm.atomic.swap.i16.p0i16( i16* <ptr>, i16 <val> )
    +  declare i32 @llvm.atomic.swap.i32.p0i32( i32* <ptr>, i32 <val> )
    +  declare i64 @llvm.atomic.swap.i64.p0i64( i64* <ptr>, i64 <val> )
     
    +
    Overview:
    -

    - This intrinsic loads the value stored in memory at ptr and yields - the value from memory. It then stores the value in val in the memory - at ptr. -

    +

    This intrinsic loads the value stored in memory at ptr and yields + the value from memory. It then stores the value in val in the memory + at ptr.

    +
    Arguments:
    +

    The llvm.atomic.swap intrinsic takes two arguments. Both + the val argument and the result must be integers of the same bit + width. The first argument, ptr, must be a pointer to a value of this + integer type. The targets may only lower integer representations they + support.

    -

    - The llvm.atomic.swap intrinsic takes two arguments. Both the - val argument and the result must be integers of the same bit width. - The first argument, ptr, must be a pointer to a value of this - integer type. The targets may only lower integer representations they - support. -

    Semantics:
    -

    - This intrinsic loads the value pointed to by ptr, yields it, and - stores val back into ptr atomically. This provides the - equivalent of an atomic swap operation within the SSA framework. +

    This intrinsic loads the value pointed to by ptr, yields it, and + stores val back into ptr atomically. This provides the + equivalent of an atomic swap operation within the SSA framework.

    -

    Examples:
     %ptr      = malloc i32
    @@ -6779,6 +6742,7 @@ declare i64 @llvm.atomic.swap.i64.p0i64( i64* <ptr>, i64 <val> )
     %stored2  = icmp eq i32 %result2, 8     ; yields {i1}:stored2 = true
     %memval2  = load i32* %ptr              ; yields {i32}:memval2 = 2
     
    +
    @@ -6786,37 +6750,34 @@ declare i64 @llvm.atomic.swap.i64.p0i64( i64* <ptr>, i64 <val> ) 'llvm.atomic.load.add.*' Intrinsic
    +
    +
    Syntax:
    -

    - This is an overloaded intrinsic. You can use llvm.atomic.load.add on any - integer bit width. Not all targets support all bit widths however.

    -
    -declare i8 @llvm.atomic.load.add.i8..p0i8( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.add.i16..p0i16( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.add.i32..p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.add.i64..p0i64( i64* <ptr>, i64 <delta> )
    +

    This is an overloaded intrinsic. You can use llvm.atomic.load.add on + any integer bit width. Not all targets support all bit widths however.

    +
    +  declare i8 @llvm.atomic.load.add.i8..p0i8( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.add.i16..p0i16( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.add.i32..p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.add.i64..p0i64( i64* <ptr>, i64 <delta> )
     
    +
    Overview:
    -

    - This intrinsic adds delta to the value stored in memory at - ptr. It yields the original value at ptr. -

    +

    This intrinsic adds delta to the value stored in memory + at ptr. It yields the original value at ptr.

    +
    Arguments:
    -

    +

    The intrinsic takes two arguments, the first a pointer to an integer value + and the second an integer value. The result is also an integer value. These + integer types can have any bit width, but they must all have the same bit + width. The targets may only lower integer representations they support.

    - The intrinsic takes two arguments, the first a pointer to an integer value - and the second an integer value. The result is also an integer value. These - integer types can have any bit width, but they must all have the same bit - width. The targets may only lower integer representations they support. -

    Semantics:
    -

    - This intrinsic does a series of operations atomically. It first loads the - value stored at ptr. It then adds delta, stores the result - to ptr. It yields the original value stored at ptr. -

    +

    This intrinsic does a series of operations atomically. It first loads the + value stored at ptr. It then adds delta, stores the result + to ptr. It yields the original value stored at ptr.

    Examples:
    @@ -6830,6 +6791,7 @@ declare i64 @llvm.atomic.load.add.i64..p0i64( i64* <ptr>, i64 <delta>
                                     ; yields {i32}:result3 = 10
     %memval1  = load i32* %ptr      ; yields {i32}:memval1 = 15
     
    +
    @@ -6837,38 +6799,36 @@ declare i64 @llvm.atomic.load.add.i64..p0i64( i64* <ptr>, i64 <delta> 'llvm.atomic.load.sub.*' Intrinsic
    +
    +
    Syntax:
    -

    - This is an overloaded intrinsic. You can use llvm.atomic.load.sub on - any integer bit width and for different address spaces. Not all targets - support all bit widths however.

    -
    -declare i8 @llvm.atomic.load.sub.i8.p0i32( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.sub.i16.p0i32( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.sub.i32.p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* <ptr>, i64 <delta> )
    +

    This is an overloaded intrinsic. You can use llvm.atomic.load.sub on + any integer bit width and for different address spaces. Not all targets + support all bit widths however.

    +
    +  declare i8 @llvm.atomic.load.sub.i8.p0i32( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.sub.i16.p0i32( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.sub.i32.p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* <ptr>, i64 <delta> )
     
    +
    Overview:
    -

    - This intrinsic subtracts delta to the value stored in memory at - ptr. It yields the original value at ptr. -

    +

    This intrinsic subtracts delta to the value stored in memory at + ptr. It yields the original value at ptr.

    +
    Arguments:
    -

    +

    The intrinsic takes two arguments, the first a pointer to an integer value + and the second an integer value. The result is also an integer value. These + integer types can have any bit width, but they must all have the same bit + width. The targets may only lower integer representations they support.

    - The intrinsic takes two arguments, the first a pointer to an integer value - and the second an integer value. The result is also an integer value. These - integer types can have any bit width, but they must all have the same bit - width. The targets may only lower integer representations they support. -

    Semantics:
    -

    - This intrinsic does a series of operations atomically. It first loads the - value stored at ptr. It then subtracts delta, stores the - result to ptr. It yields the original value stored at ptr. -

    +

    This intrinsic does a series of operations atomically. It first loads the + value stored at ptr. It then subtracts delta, stores the + result to ptr. It yields the original value stored + at ptr.

    Examples:
    @@ -6882,6 +6842,7 @@ declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* <ptr>, i64 <delta>
                                     ; yields {i32}:result3 = 2
     %memval1  = load i32* %ptr      ; yields {i32}:memval1 = -3
     
    +
    @@ -6890,67 +6851,61 @@ declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* <ptr>, i64 <delta> 'llvm.atomic.load.nand.*' Intrinsic
    'llvm.atomic.load.or.*' Intrinsic
    'llvm.atomic.load.xor.*' Intrinsic
    - +
    +
    Syntax:
    -

    - These are overloaded intrinsics. You can use llvm.atomic.load_and, - llvm.atomic.load_nand, llvm.atomic.load_or, and - llvm.atomic.load_xor on any integer bit width and for different - address spaces. Not all targets support all bit widths however.

    -
    -declare i8 @llvm.atomic.load.and.i8.p0i8( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.and.i16.p0i16( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.and.i32.p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.and.i64.p0i64( i64* <ptr>, i64 <delta> )
    +

    These are overloaded intrinsics. You can + use llvm.atomic.load_and, llvm.atomic.load_nand, + llvm.atomic.load_or, and llvm.atomic.load_xor on any integer + bit width and for different address spaces. Not all targets support all bit + widths however.

    +
    +  declare i8 @llvm.atomic.load.and.i8.p0i8( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.and.i16.p0i16( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.and.i32.p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.and.i64.p0i64( i64* <ptr>, i64 <delta> )
     
    -declare i8 @llvm.atomic.load.or.i8.p0i8( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.or.i16.p0i16( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.or.i32.p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.or.i64.p0i64( i64* <ptr>, i64 <delta> )
    -
    +  declare i8 @llvm.atomic.load.or.i8.p0i8( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.or.i16.p0i16( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.or.i32.p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.or.i64.p0i64( i64* <ptr>, i64 <delta> )
     
    -declare i8 @llvm.atomic.load.nand.i8.p0i32( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.nand.i16.p0i32( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.nand.i32.p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.nand.i64.p0i32( i64* <ptr>, i64 <delta> )
    -
    +  declare i8 @llvm.atomic.load.nand.i8.p0i32( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.nand.i16.p0i32( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.nand.i32.p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.nand.i64.p0i32( i64* <ptr>, i64 <delta> )
     
    -declare i8 @llvm.atomic.load.xor.i8.p0i32( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.xor.i16.p0i32( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.xor.i32.p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* <ptr>, i64 <delta> )
    -
    +  declare i8 @llvm.atomic.load.xor.i8.p0i32( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.xor.i16.p0i32( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.xor.i32.p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* <ptr>, i64 <delta> )
     
    +
    Overview:
    -

    - These intrinsics bitwise the operation (and, nand, or, xor) delta to - the value stored in memory at ptr. It yields the original value - at ptr. -

    +

    These intrinsics bitwise the operation (and, nand, or, xor) delta to + the value stored in memory at ptr. It yields the original value + at ptr.

    +
    Arguments:
    -

    +

    These intrinsics take two arguments, the first a pointer to an integer value + and the second an integer value. The result is also an integer value. These + integer types can have any bit width, but they must all have the same bit + width. The targets may only lower integer representations they support.

    - These intrinsics take two arguments, the first a pointer to an integer value - and the second an integer value. The result is also an integer value. These - integer types can have any bit width, but they must all have the same bit - width. The targets may only lower integer representations they support. -

    Semantics:
    -

    - These intrinsics does a series of operations atomically. They first load the - value stored at ptr. They then do the bitwise operation - delta, store the result to ptr. They yield the original - value stored at ptr. -

    +

    These intrinsics does a series of operations atomically. They first load the + value stored at ptr. They then do the bitwise + operation delta, store the result to ptr. They yield the + original value stored at ptr.

    Examples:
    @@ -6966,8 +6921,8 @@ declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* <ptr>, i64 <delta>
                                     ; yields {i32}:result3 = FF
     %memval1  = load i32* %ptr      ; yields {i32}:memval1 = F0
     
    -
    +
    @@ -6975,68 +6930,60 @@ declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* <ptr>, i64 <delta> 'llvm.atomic.load.min.*' Intrinsic
    'llvm.atomic.load.umax.*' Intrinsic
    'llvm.atomic.load.umin.*' Intrinsic
    -
    +
    +
    Syntax:
    -

    - These are overloaded intrinsics. You can use llvm.atomic.load_max, - llvm.atomic.load_min, llvm.atomic.load_umax, and - llvm.atomic.load_umin on any integer bit width and for different - address spaces. Not all targets - support all bit widths however.

    -
    -declare i8 @llvm.atomic.load.max.i8.p0i8( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.max.i16.p0i16( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.max.i32.p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.max.i64.p0i64( i64* <ptr>, i64 <delta> )
    +

    These are overloaded intrinsics. You can use llvm.atomic.load_max, + llvm.atomic.load_min, llvm.atomic.load_umax, and + llvm.atomic.load_umin on any integer bit width and for different + address spaces. Not all targets support all bit widths however.

    +
    +  declare i8 @llvm.atomic.load.max.i8.p0i8( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.max.i16.p0i16( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.max.i32.p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.max.i64.p0i64( i64* <ptr>, i64 <delta> )
     
    -declare i8 @llvm.atomic.load.min.i8.p0i8( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.min.i16.p0i16( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.min.i32..p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.min.i64..p0i64( i64* <ptr>, i64 <delta> )
    -
    +  declare i8 @llvm.atomic.load.min.i8.p0i8( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.min.i16.p0i16( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.min.i32..p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.min.i64..p0i64( i64* <ptr>, i64 <delta> )
     
    -declare i8 @llvm.atomic.load.umax.i8.p0i8( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.umax.i16.p0i16( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.umax.i32.p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.umax.i64.p0i64( i64* <ptr>, i64 <delta> )
    -
    +  declare i8 @llvm.atomic.load.umax.i8.p0i8( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.umax.i16.p0i16( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.umax.i32.p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.umax.i64.p0i64( i64* <ptr>, i64 <delta> )
     
    -declare i8 @llvm.atomic.load.umin.i8..p0i8( i8* <ptr>, i8 <delta> )
    -declare i16 @llvm.atomic.load.umin.i16.p0i16( i16* <ptr>, i16 <delta> )
    -declare i32 @llvm.atomic.load.umin.i32..p0i32( i32* <ptr>, i32 <delta> )
    -declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* <ptr>, i64 <delta> )
    -
    +  declare i8 @llvm.atomic.load.umin.i8..p0i8( i8* <ptr>, i8 <delta> )
    +  declare i16 @llvm.atomic.load.umin.i16.p0i16( i16* <ptr>, i16 <delta> )
    +  declare i32 @llvm.atomic.load.umin.i32..p0i32( i32* <ptr>, i32 <delta> )
    +  declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* <ptr>, i64 <delta> )
     
    +
    Overview:
    -

    - These intrinsics takes the signed or unsigned minimum or maximum of - delta and the value stored in memory at ptr. It yields the - original value at ptr. -

    +

    These intrinsics takes the signed or unsigned minimum or maximum of + delta and the value stored in memory at ptr. It yields the + original value at ptr.

    +
    Arguments:
    -

    +

    These intrinsics take two arguments, the first a pointer to an integer value + and the second an integer value. The result is also an integer value. These + integer types can have any bit width, but they must all have the same bit + width. The targets may only lower integer representations they support.

    - These intrinsics take two arguments, the first a pointer to an integer value - and the second an integer value. The result is also an integer value. These - integer types can have any bit width, but they must all have the same bit - width. The targets may only lower integer representations they support. -

    Semantics:
    -

    - These intrinsics does a series of operations atomically. They first load the - value stored at ptr. They then do the signed or unsigned min or max - delta and the value, store the result to ptr. They yield - the original value stored at ptr. -

    +

    These intrinsics does a series of operations atomically. They first load the + value stored at ptr. They then do the signed or unsigned min or + max delta and the value, store the result to ptr. They + yield the original value stored at ptr.

    Examples:
    @@ -7052,6 +6999,134 @@ declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* <ptr>, i64 <delta&g
                                     ; yields {i32}:result3 = 8
     %memval1  = load i32* %ptr      ; yields {i32}:memval1 = 30
     
    + +
    + + + + + +
    + +

    This class of intrinsics exists to information about the lifetime of memory + objects and ranges where variables are immutable.

    + +
    + + + + +
    + +
    Syntax:
    +
    +  declare void @llvm.lifetime.start(i64 <size>, i8* nocapture <ptr>)
    +
    + +
    Overview:
    +

    The 'llvm.lifetime.start' intrinsic specifies the start of a memory + object's lifetime.

    + +
    Arguments:
    +

    The first argument is a constant integer representing the size of the + object, or -1 if it is variable sized. The second argument is a pointer to + the object.

    + +
    Semantics:
    +

    This intrinsic indicates that before this point in the code, the value of the + memory pointed to by ptr is dead. This means that it is known to + never be used and has an undefined value. A load from the pointer that is + preceded by this intrinsic can be replaced with + 'undef'.

    + +
    + + + + +
    + +
    Syntax:
    +
    +  declare void @llvm.lifetime.end(i64 <size>, i8* nocapture <ptr>)
    +
    + +
    Overview:
    +

    The 'llvm.lifetime.end' intrinsic specifies the end of a memory + object's lifetime.

    + +
    Arguments:
    +

    The first argument is a constant integer representing the size of the + object, or -1 if it is variable sized. The second argument is a pointer to + the object.

    + +
    Semantics:
    +

    This intrinsic indicates that after this point in the code, the value of the + memory pointed to by ptr is dead. This means that it is known to + never be used and has an undefined value. Any stores into the memory object + following this intrinsic may be removed as dead. + +

    + + + + +
    + +
    Syntax:
    +
    +  declare {}* @llvm.invariant.start(i64 <size>, i8* nocapture <ptr>) readonly
    +
    + +
    Overview:
    +

    The 'llvm.invariant.start' intrinsic specifies that the contents of + a memory object will not change.

    + +
    Arguments:
    +

    The first argument is a constant integer representing the size of the + object, or -1 if it is variable sized. The second argument is a pointer to + the object.

    + +
    Semantics:
    +

    This intrinsic indicates that until an llvm.invariant.end that uses + the return value, the referenced memory location is constant and + unchanging.

    + +
    + + + + +
    + +
    Syntax:
    +
    +  declare void @llvm.invariant.end({}* <start>, i64 <size>, i8* nocapture <ptr>)
    +
    + +
    Overview:
    +

    The 'llvm.invariant.end' intrinsic specifies that the contents of + a memory object are mutable.

    + +
    Arguments:
    +

    The first argument is the matching llvm.invariant.start intrinsic. + The second argument is a constant integer representing the size of the + object, or -1 if it is variable sized and the third argument is a pointer + to the object.

    + +
    Semantics:
    +

    This intrinsic indicates that the memory is mutable again.

    +
    @@ -7060,8 +7135,10 @@ declare i64 @llvm.atomic.load.umin.i64..p0i64( i64* <ptr>, i64 <delta&g
    -

    This class of intrinsics is designed to be generic and has -no specific purpose.

    + +

    This class of intrinsics is designed to be generic and has no specific + purpose.

    +
    @@ -7077,27 +7154,19 @@ no specific purpose.

    Overview:
    - -

    -The 'llvm.var.annotation' intrinsic -

    +

    The 'llvm.var.annotation' intrinsic.

    Arguments:
    - -

    -The first argument is a pointer to a value, the second is a pointer to a -global string, the third is a pointer to a global string which is the source -file name, and the last argument is the line number. -

    +

    The first argument is a pointer to a value, the second is a pointer to a + global string, the third is a pointer to a global string which is the source + file name, and the last argument is the line number.

    Semantics:
    +

    This intrinsic allows annotation of local variables with arbitrary strings. + This can be useful for special purpose optimizations that want to look for + these annotations. These have no other defined use, they are ignored by code + generation and optimization.

    -

    -This intrinsic allows annotation of local variables with arbitrary strings. -This can be useful for special purpose optimizations that want to look for these -annotations. These have no other defined use, they are ignored by code -generation and optimization. -

    @@ -7108,9 +7177,9 @@ generation and optimization.
    Syntax:
    -

    This is an overloaded intrinsic. You can use 'llvm.annotation' on -any integer bit width. -

    +

    This is an overloaded intrinsic. You can use 'llvm.annotation' on + any integer bit width.

    +
       declare i8 @llvm.annotation.i8(i8 <val>, i8* <str>, i8* <str>, i32  <int> )
       declare i16 @llvm.annotation.i16(i16 <val>, i8* <str>, i8* <str>, i32  <int> )
    @@ -7120,28 +7189,20 @@ any integer bit width.
     
    Overview:
    - -

    -The 'llvm.annotation' intrinsic. -

    +

    The 'llvm.annotation' intrinsic.

    Arguments:
    - -

    -The first argument is an integer value (result of some expression), -the second is a pointer to a global string, the third is a pointer to a global -string which is the source file name, and the last argument is the line number. -It returns the value of the first argument. -

    +

    The first argument is an integer value (result of some expression), the + second is a pointer to a global string, the third is a pointer to a global + string which is the source file name, and the last argument is the line + number. It returns the value of the first argument.

    Semantics:
    +

    This intrinsic allows annotations to be put on arbitrary expressions with + arbitrary strings. This can be useful for special purpose optimizations that + want to look for these annotations. These have no other defined use, they + are ignored by code generation and optimization.

    -

    -This intrinsic allows annotations to be put on arbitrary expressions -with arbitrary strings. This can be useful for special purpose optimizations -that want to look for these annotations. These have no other defined use, they -are ignored by code generation and optimization. -

    @@ -7157,58 +7218,50 @@ are ignored by code generation and optimization.
    Overview:
    - -

    -The 'llvm.trap' intrinsic -

    +

    The 'llvm.trap' intrinsic.

    Arguments:
    - -

    -None -

    +

    None.

    Semantics:
    +

    This intrinsics is lowered to the target dependent trap instruction. If the + target does not have a trap instruction, this intrinsic will be lowered to + the call of the abort() function.

    -

    -This intrinsics is lowered to the target dependent trap instruction. If the -target does not have a trap instruction, this intrinsic will be lowered to the -call of the abort() function. -

    +
    +
    Syntax:
    -declare void @llvm.stackprotector( i8* <guard>, i8** <slot> )
    -
    +  declare void @llvm.stackprotector( i8* <guard>, i8** <slot> )
     
    +
    Overview:
    -

    - The llvm.stackprotector intrinsic takes the guard and stores - it onto the stack at slot. The stack slot is adjusted to ensure that - it is placed on the stack before local variables. -

    +

    The llvm.stackprotector intrinsic takes the guard and + stores it onto the stack at slot. The stack slot is adjusted to + ensure that it is placed on the stack before local variables.

    +
    Arguments:
    -

    - The llvm.stackprotector intrinsic requires two pointer arguments. The - first argument is the value loaded from the stack guard - @__stack_chk_guard. The second variable is an alloca that - has enough space to hold the value of the guard. -

    +

    The llvm.stackprotector intrinsic requires two pointer + arguments. The first argument is the value loaded from the stack + guard @__stack_chk_guard. The second variable is an alloca + that has enough space to hold the value of the guard.

    +
    Semantics:
    -

    - This intrinsic causes the prologue/epilogue inserter to force the position of - the AllocaInst stack slot to be before local variables on the - stack. This is to ensure that if a local variable on the stack is overwritten, - it will destroy the value of the guard. When the function exits, the guard on - the stack is checked against the original guard. If they're different, then - the program aborts by calling the __stack_chk_fail() function. -

    +

    This intrinsic causes the prologue/epilogue inserter to force the position of + the AllocaInst stack slot to be before local variables on the + stack. This is to ensure that if a local variable on the stack is + overwritten, it will destroy the value of the guard. When the function exits, + the guard on the stack is checked against the original guard. If they're + different, then the program aborts by calling the __stack_chk_fail() + function.

    +
    @@ -7221,7 +7274,7 @@ declare void @llvm.stackprotector( i8* <guard>, i8** <slot> ) Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-06-20 13:26:06 +0000 (Sat, 20 Jun 2009) $ + Last modified: $Date: 2009-10-13 23:56:55 +0200 (Tue, 13 Oct 2009) $ diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html index c9d1e190eab0c..0934b47cbc96f 100644 --- a/docs/LinkTimeOptimization.html +++ b/docs/LinkTimeOptimization.html @@ -166,7 +166,7 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific provided by the linker on various platform are not unique. This means, this new tool needs to support all such features and platforms in one super tool or a separate tool per platform is required. This increases - maintance cost for link time optimizer significantly, which is not + maintenance cost for link time optimizer significantly, which is not necessary. This approach also requires staying synchronized with linker developements on various platforms, which is not the main focus of the link time optimizer. Finally, this approach increases end user's build time due @@ -189,7 +189,7 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific user-supplied information, such as a list of exported symbols. LLVM optimizer collects control flow information, data flow information and knows much more about program structure from the optimizer's point of view. - Our goal is to take advantage of tight intergration between the linker and + Our goal is to take advantage of tight integration between the linker and the optimizer by sharing this information during various linking phases.

    @@ -382,7 +382,7 @@ of the native object files.

    Devang Patel and Nick Kledzik
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2008-12-16 04:07:49 +0100 (Tue, 16 Dec 2008) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html index 39a04f7c84d1d..36a4725edec33 100644 --- a/docs/MakefileGuide.html +++ b/docs/MakefileGuide.html @@ -261,7 +261,7 @@
    -

    In some situations, it is desireable to build a single bitcode module from +

    In some situations, it is desirable to build a single bitcode module from a variety of sources, instead of an archive, shared library, or bitcode library. Bitcode modules can be specified in addition to any of the other types of libraries by defining the MODULE_NAME @@ -626,6 +626,11 @@

    If set to any value, causes a bitcode library (.bc) to be built.
    CONFIG_FILES
    Specifies a set of configuration files to be installed.
    +
    DEBUG_SYMBOLS
    +
    If set to any value, causes the build to include debugging + symbols even in optimized objects, libraries and executables. This + alters the flags specified to the compilers and linkers. Debugging + isn't fun in an optimized build, but it is possible.
    DIRS
    Specifies a set of directories, usually children of the current directory, that should also be made using the same goal. These directories @@ -1016,7 +1021,7 @@ Reid Spencer
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-06-16 23:00:42 +0000 (Tue, 16 Jun 2009) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/Passes.html b/docs/Passes.html index 5406be5e2df61..48f5adf62ef85 100644 --- a/docs/Passes.html +++ b/docs/Passes.html @@ -78,7 +78,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! -anders-aaAndersen's Interprocedural Alias Analysis -basicaaBasic Alias Analysis (default AA impl) -basiccgBasic CallGraph Construction --basicvnBasic Value Numbering (default GVN impl) -codegenprepareOptimize for code generation -count-aaCount Alias Analysis Query Responses -debug-aaAA use debugger @@ -90,7 +89,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! -globalsmodref-aaSimple mod/ref analysis for globals -instcountCounts the various types of Instructions -intervalsInterval Partition Construction --load-vnLoad Value Numbering -loopsNatural Loop Construction -memdepMemory Dependence Analysis -no-aaNo Alias Analysis (always returns 'may' alias) @@ -125,11 +123,9 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! -deadtypeelimDead Type Elimination -dieDead Instruction Elimination -dseDead Store Elimination --gcseGlobal Common Subexpression Elimination -globaldceDead Global Elimination -globaloptGlobal Variable Optimizer -gvnGlobal Value Numbering --gvnpreGlobal Value Numbering/Partial Redundancy Elimination -indmemremIndirect Malloc and Free Removal -indvarsCanonicalize Induction Variables -inlineFunction Integration/Inlining @@ -161,7 +157,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! -mem2regPromote Memory to Register -memcpyoptOptimize use of memcpy and friends -mergereturnUnify function exit nodes --predsimplifyPredicate Simplifier -prune-ehRemove unused exception handling info -raiseallocsRaise allocations from calls to instructions -reassociateReassociate expressions @@ -274,6 +269,12 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! located at getNode(F) + CallReturnPos. The arguments start at getNode(F) + CallArgPos.

    + +

    + Please keep in mind that the current andersen's pass has many known + problems and bugs. It should be considered "research quality". +

    +
    @@ -296,25 +297,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    Yet to be written.

    - - -
    -

    - This is the default implementation of the ValueNumbering - interface. It walks the SSA def-use chains to trivially identify - lexically identical expressions. This does not require any ahead of time - analysis, so it is a very fast default implementation. -

    -

    - The ValueNumbering analysis passes are mostly deprecated. They are only used - by the Global Common Subexpression Elimination pass, which - is deprecated by the Global Value Numbering pass (which - does its value numbering on its own). -

    -
    -
    Optimize for code generation @@ -453,28 +435,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    - - -
    -

    - This pass value numbers load and call instructions. To do this, it finds - lexically identical load instructions, and uses alias analysis to determine - which loads are guaranteed to produce the same value. To value number call - instructions, it looks for calls to functions that do not write to memory - which do not have intervening instructions that clobber the memory that is - read from. -

    - -

    - This pass builds off of another value numbering pass to implement value - numbering for non-load and non-call instructions. It uses Alias Analysis so - that it can disambiguate the load instructions. The more powerful these base - analyses are, the more powerful the resultant value numbering will be. -

    -
    -
    Natural Loop Construction @@ -857,23 +817,6 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    - - -
    -

    - This pass is designed to be a very quick global transformation that - eliminates global common subexpressions from a function. It does this by - using an existing value numbering analysis pass to identify the common - subexpressions, eliminating them when possible. -

    -

    - This pass is deprecated by the Global Value Numbering pass - (which does a better job with its own value numbering). -

    -
    -
    Dead Global Elimination @@ -906,35 +849,11 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    - This pass performs global value numbering to eliminate fully redundant - instructions. It also performs simple dead load elimination. -

    -

    - Note that this pass does the value numbering itself, it does not use the - ValueNumbering analysis passes. + This pass performs global value numbering to eliminate fully and partially + redundant instructions. It also performs redundant load elimination.

    - - -
    -

    - This pass performs a hybrid of global value numbering and partial redundancy - elimination, known as GVN-PRE. It performs partial redundancy elimination on - values, rather than lexical expressions, allowing a more comprehensive view - the optimization. It replaces redundant values with uses of earlier - occurences of the same value. While this is beneficial in that it eliminates - unneeded computation, it also increases register pressure by creating large - live ranges, and should be used with caution on platforms that are very - sensitive to register pressure. -

    -

    - Note that this pass does the value numbering itself, it does not use the - ValueNumbering analysis passes. -

    -
    @@ -1570,28 +1489,6 @@ if (X < 3) {

    - - -
    -

    - Path-sensitive optimizer. In a branch where x == y, replace uses of - x with y. Permits further optimization, such as the - elimination of the unreachable call: -

    - -
    void test(int *p, int *q)
    -{
    -  if (p != q)
    -    return;
    -
    -  if (*p != *q)
    -    foo(); // unreachable
    -}
    -
    -
    Remove unused exception handling info @@ -1647,7 +1544,7 @@ if (X < 3) {

    This file demotes all registers to memory references. It is intented to be the inverse of -mem2reg. By converting to - load instructions, the only values live accross basic blocks are + load instructions, the only values live across basic blocks are alloca instructions and load instructions before phi nodes. It is intended that this should make CFG hacking much easier. To make later hacking easier, the entry block is split into two, such @@ -1963,7 +1860,7 @@ if (X < 3) { Reid Spencer
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2008-12-11 18:34:48 +0100 (Thu, 11 Dec 2008) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html index b45a60b7611d9..3234554f7cae5 100644 --- a/docs/ProgrammersManual.html +++ b/docs/ProgrammersManual.html @@ -29,6 +29,13 @@

    + + + + +
    + +

    Although LLVM generally does not do much string manipulation, we do have +several important APIs which take strings. Two important examples are the +Value class -- which has names for instructions, functions, etc. -- and the +StringMap class which is used extensively in LLVM and Clang.

    + +

    These are generic classes, and they need to be able to accept strings which +may have embedded null characters. Therefore, they cannot simply take +a const char *, and taking a const std::string& requires +clients to perform a heap allocation which is usually unnecessary. Instead, +many LLVM APIs use a const StringRef& or a const +Twine& for passing strings efficiently.

    + +
    + + + + +
    + +

    The StringRef data type represents a reference to a constant string +(a character array and a length) and supports the common operations available +on std:string, but does not require heap allocation.

    + +

    It can be implicitly constructed using a C style null-terminated string, +an std::string, or explicitly with a character pointer and length. +For example, the StringRef find function is declared as:

    + +
    + iterator find(const StringRef &Key); +
    + +

    and clients can call it using any one of:

    + +
    +
    +  Map.find("foo");                 // Lookup "foo"
    +  Map.find(std::string("bar"));    // Lookup "bar"
    +  Map.find(StringRef("\0baz", 4)); // Lookup "\0baz"
    +
    +
    + +

    Similarly, APIs which need to return a string may return a StringRef +instance, which can be used directly or converted to an std::string +using the str member function. See +"llvm/ADT/StringRef.h" +for more information.

    + +

    You should rarely use the StringRef class directly, because it contains +pointers to external memory it is not generally safe to store an instance of the +class (unless you know that the external storage will not be freed).

    + +
    + + + + +
    + +

    The Twine class is an efficient way for APIs to accept concatenated +strings. For example, a common LLVM paradigm is to name one instruction based on +the name of another instruction with a suffix, for example:

    + +
    +
    +    New = CmpInst::Create(..., SO->getName() + ".cmp");
    +
    +
    + +

    The Twine class is effectively a +lightweight rope +which points to temporary (stack allocated) objects. Twines can be implicitly +constructed as the result of the plus operator applied to strings (i.e., a C +strings, an std::string, or a StringRef). The twine delays the +actual concatentation of strings until it is actually required, at which point +it can be efficiently rendered directly into a character array. This avoids +unnecessary heap allocation involved in constructing the temporary results of +string concatenation. See +"llvm/ADT/Twine.h" +for more information.

    + +

    As with a StringRef, Twine objects point to external memory +and should almost never be stored or mentioned directly. They are intended +solely for use when defining a function which should be able to efficiently +accept concatenated strings.

    + +
    + +
    The DEBUG() macro and -debug option @@ -448,7 +561,7 @@ tool) is run with the '-debug' command line argument:

    -DOUT << "I am here!\n";
    +DEBUG(errs() << "I am here!\n");
     
    @@ -493,16 +606,16 @@ option as follows:

    -DOUT << "No debug type\n";
     #undef  DEBUG_TYPE
    +DEBUG(errs() << "No debug type\n");
     #define DEBUG_TYPE "foo"
    -DOUT << "'foo' debug type\n";
    +DEBUG(errs() << "'foo' debug type\n");
     #undef  DEBUG_TYPE
     #define DEBUG_TYPE "bar"
    -DOUT << "'bar' debug type\n";
    +DEBUG(errs() << "'bar' debug type\n"));
     #undef  DEBUG_TYPE
     #define DEBUG_TYPE ""
    -DOUT << "No debug type (2)\n";
    +DEBUG(errs() << "No debug type (2)\n");
     
    @@ -534,6 +647,21 @@ on when the name is specified. This allows, for example, all debug information for instruction scheduling to be enabled with -debug-type=InstrSched, even if the source lives in multiple files.

    +

    The DEBUG_WITH_TYPE macro is also available for situations where you +would like to set DEBUG_TYPE, but only for one specific DEBUG +statement. It takes an additional first parameter, which is the type to use. For +example, the preceding example could be written as:

    + + +
    +
    +DEBUG_WITH_TYPE("", errs() << "No debug type\n");
    +DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n");
    +DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n"));
    +DEBUG_WITH_TYPE("", errs() << "No debug type (2)\n");
    +
    +
    +
    @@ -726,6 +854,10 @@ access the container. Based on that, you should use:

    iteration, but do not support efficient look-up based on a key. +
  • a string container is a specialized sequential + container or reference structure that is used for character or byte + arrays.
  • +
  • a bit container provides an efficient way to store and perform set operations on sets of numeric id's, while automatically eliminating duplicates. Bit containers require a maximum of 1 bit for each @@ -1397,6 +1529,20 @@ always better.

    + + + +
    + +

    +TODO: const char* vs stringref vs smallstring vs std::string. Describe twine, +xref to #string_apis. +

    + +
    +
    Bit storage containers (BitVector, SparseBitVector) @@ -1508,7 +1654,7 @@ an example that prints the name of a BasicBlock and the number of for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i) // Print out the name of the basic block if it has one, and then the // number of instructions that it contains - llvm::cerr << "Basic block (name=" << i->getName() << ") has " + errs() << "Basic block (name=" << i->getName() << ") has " << i->size() << " instructions.\n";
    @@ -1541,14 +1687,14 @@ a BasicBlock:

    for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i) // The next statement works since operator<<(ostream&,...) // is overloaded for Instruction& - llvm::cerr << *i << "\n"; + errs() << *i << "\n";

    However, this isn't really the best way to print out the contents of a BasicBlock! Since the ostream operators are overloaded for virtually anything you'll care about, you could have just invoked the print routine on the -basic block itself: llvm::cerr << *blk << "\n";.

    +basic block itself: errs() << *blk << "\n";.

    @@ -1574,7 +1720,7 @@ small example that shows how to dump all instructions in a function to the stand // F is a pointer to a Function instance for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - llvm::cerr << *I << "\n"; + errs() << *I << "\n"; @@ -1653,7 +1799,7 @@ without actually obtaining it via iteration over some structure:

    void printNextInstruction(Instruction* inst) { BasicBlock::iterator it(inst); ++it; // After this line, it refers to the instruction after *inst - if (it != inst->getParent()->end()) llvm::cerr << *it << "\n"; + if (it != inst->getParent()->end()) errs() << *it << "\n"; } @@ -1771,8 +1917,8 @@ Function *F = ...; for (Value::use_iterator i = F->use_begin(), e = F->use_end(); i != e; ++i) if (Instruction *Inst = dyn_cast<Instruction>(*i)) { - llvm::cerr << "F is used in instruction:\n"; - llvm::cerr << *Inst << "\n"; + errs() << "F is used in instruction:\n"; + errs() << *Inst << "\n"; } @@ -2257,6 +2403,50 @@ and only if you know what you're doing!

    + + + +
    +

    +LLVMContext is an opaque class in the LLVM API which clients can use +to operate multiple, isolated instances of LLVM concurrently within the same +address space. For instance, in a hypothetical compile-server, the compilation +of an individual translation unit is conceptually independent from all the +others, and it would be desirable to be able to compile incoming translation +units concurrently on independent server threads. Fortunately, +LLVMContext exists to enable just this kind of scenario! +

    + +

    +Conceptually, LLVMContext provides isolation. Every LLVM entity +(Modules, Values, Types, Constants, etc.) +in LLVM's in-memory IR belongs to an LLVMContext. Entities in +different contexts cannot interact with each other: Modules in +different contexts cannot be linked together, Functions cannot be added +to Modules in different contexts, etc. What this means is that is is +safe to compile on multiple threads simultaneously, as long as no two threads +operate on entities within the same context. +

    + +

    +In practice, very few places in the API require the explicit specification of a +LLVMContext, other than the Type creation/lookup APIs. +Because every Type carries a reference to its owning context, most +other entities can determine what context they belong to by looking at their +own Type. If you are adding new entities to LLVM IR, please try to +maintain this interface design. +

    + +

    +For clients that do not require the benefits of isolation, LLVM +provides a convenience API getGlobalContext(). This returns a global, +lazily initialized LLVMContext that may be used in situations where +isolation is not a concern. +

    +
    +
    Advanced Topics @@ -2793,7 +2983,7 @@ the lib/VMCore directory.

    VectorType
    Subclass of SequentialType for vector types. A vector type is similar to an ArrayType but is distinguished because it is - a first class type wherease ArrayType is not. Vector types are used for + a first class type whereas ArrayType is not. Vector types are used for vector operations and are usually small vectors of of an integer or floating point type.
    StructType
    @@ -3353,7 +3543,7 @@ Superclasses: GlobalValue, Value

    The Function class represents a single procedure in LLVM. It is -actually one of the more complex classes in the LLVM heirarchy because it must +actually one of the more complex classes in the LLVM hierarchy because it must keep track of a large amount of data. The Function class keeps track of a list of BasicBlocks, a list of formal Arguments, and a @@ -3362,7 +3552,7 @@ of a list of BasicBlocks, a list of formal

    The list of BasicBlocks is the most commonly used part of Function objects. The list imposes an implicit ordering of the blocks in the function, which indicate how the code will be -layed out by the backend. Additionally, the first BasicBlock is the implicit entry node for the Function. It is not legal in LLVM to explicitly branch to this initial block. There are no implicit exit nodes, and in fact there may be multiple exit @@ -3492,7 +3682,7 @@ Superclasses: GlobalValue, User, Value

    -

    Global variables are represented with the (suprise suprise) +

    Global variables are represented with the (surprise surprise) GlobalVariable class. Like functions, GlobalVariables are also subclasses of GlobalValue, and as such are always referenced by their address (global values must live in memory, so their @@ -3542,7 +3732,7 @@ never change at runtime).

  • Constant *getInitializer() -

    Returns the intial value for a GlobalVariable. It is not legal +

    Returns the initial value for a GlobalVariable. It is not legal to call this method if there is no initializer.

  • @@ -3664,7 +3854,7 @@ arguments. An argument has a pointer to the parent Function.

    Dinakar Dhurjati and Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-06-17 21:12:26 +0000 (Wed, 17 Jun 2009) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/Projects.html b/docs/Projects.html index 882ce2eb234dc..582c4e2cbf3f4 100644 --- a/docs/Projects.html +++ b/docs/Projects.html @@ -121,8 +121,8 @@ configure script with these commands: % ./AutoRegen.sh

    -

    You must be using Autoconf version 2.59 or later and your aclocal version -should 1.9 or later.

    +

    You must be using Autoconf version 2.59 or later and your aclocal version +should be 1.9 or later.

  • Run configure in the directory in which you want to place object code. Use the following options to tell your project where it @@ -453,7 +453,7 @@ Mailing List.

    John Criswell
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-01-12 22:29:24 +0100 (Mon, 12 Jan 2009) $ + Last modified: $Date: 2009-08-13 22:08:52 +0200 (Thu, 13 Aug 2009) $ diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index b0165b05cf0a2..870705224c282 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -4,17 +4,17 @@ - LLVM 2.5 Release Notes + LLVM 2.6 Release Notes -
    LLVM 2.5 Release Notes
    +
    LLVM 2.6 Release Notes
    1. Introduction
    2. Sub-project Status Update
    3. -
    4. External Projects Using LLVM 2.5
    5. -
    6. What's New in LLVM 2.5?
    7. +
    8. External Projects Using LLVM 2.6
    9. +
    10. What's New in LLVM 2.6?
    11. Installation Instructions
    12. Portability and Supported Platforms
    13. Known Problems
    14. @@ -34,7 +34,7 @@

      This document contains the release notes for the LLVM Compiler -Infrastructure, release 2.5. Here we describe the status of LLVM, including +Infrastructure, release 2.6. Here we describe the status of LLVM, including major improvements from the previous release and significant known problems. All LLVM releases may be downloaded from the LLVM releases web site.

      @@ -51,25 +51,37 @@ current one. To see the release notes for a specific release, please see the releases page.

      - - + + + @@ -80,12 +92,11 @@ initial support for debug line numbers when optimization enabled, not useful in

      -The LLVM 2.5 distribution currently consists of code from the core LLVM -repository —which roughly includes the LLVM optimizers, code generators -and supporting tools — and the llvm-gcc repository. In addition to this -code, the LLVM Project includes other sub-projects that are in development. The -two which are the most actively developed are the Clang -Project and the VMKit Project. +The LLVM 2.6 distribution currently consists of code from the core LLVM +repository (which roughly includes the LLVM optimizers, code generators +and supporting tools), the Clang repository and the llvm-gcc repository. In +addition to this code, the LLVM Project includes other sub-projects that are in +development. Here we include updates on these subprojects.

      @@ -99,37 +110,30 @@ Project and the VMKit Project.

      The Clang project is an effort to build -a set of new 'LLVM native' front-end technologies for the LLVM optimizer and -code generator. While Clang is not included in the LLVM 2.5 release, it is -continuing to make major strides forward in all areas. Its C and Objective-C -parsing and code generation support is now very solid. For example, it is -capable of successfully building many real-world applications for X86-32 -and X86-64, -including the FreeBSD -kernel and gcc 4.2. C++ is also -making incredible progress, -and work on templates has recently started. If you are -interested in fast compiles and good diagnostics, we encourage you to try it out -by building from mainline -and reporting any issues you hit to the fast compiles and +good diagnostics, we +encourage you to try it out. Clang currently compiles typical Objective-C code +3x faster than GCC and compiles C code about 30% faster than GCC at -O0 -g +(which is when the most pressure is on the frontend).

      + +

      In addition to supporting these languages, C++ support is also well under way, and mainline +Clang is able to parse the libstdc++ 4.2 headers and even codegen simple apps. +If you are interested in Clang C++ support or any other Clang feature, we +strongly encourage you to get involved on the Clang front-end mailing list.

      -

      In the LLVM 2.5 time-frame, the Clang team has made many improvements:

      +

      In the LLVM 2.6 time-frame, the Clang team has made many improvements:

        -
      • Clang now has a new driver, which is focused on providing a GCC-compatible - interface.
      • -
      • The X86-64 ABI is now supported, including support for the Apple - 64-bit Objective-C runtime and zero cost exception handling.
      • -
      • Precompiled header support is now implemented.
      • -
      • Objective-C support is significantly improved beyond LLVM 2.4, supporting - many features, such as Objective-C Garbage Collection.
      • -
      • Variable length arrays are now fully supported.
      • -
      • C99 designated initializers are now fully supported.
      • -
      • Clang now includes all major compiler headers, including a - redesigned tgmath.h and several more intrinsic headers.
      • -
      • Many many bugs are fixed and many features have been added.
      • +
      • C and Objective-C support are now considered production quality.
      • +
      • AuroraUX, FreeBSD and OpenBSD are now supported.
      • +
      • Most of Objective-C 2.0 is now supported with the GNU runtime.
      • +
      • Many many bugs are fixed and lots of features have been added.
      @@ -140,19 +144,18 @@ list.

      -

      Previously announced in the last LLVM release, the Clang project also +

      Previously announced in the 2.4 and 2.5 LLVM releases, the Clang project also includes an early stage static source code analysis tool for automatically finding bugs -in C and Objective-C programs. The tool performs a growing set of checks to find +in C and Objective-C programs. The tool performs checks to find bugs that occur on a specific path within a program.

      -

      In the LLVM 2.5 time-frame there have been many significant improvements to -the analyzer's core path simulation engine and machinery for generating -path-based bug reports to end-users. Particularly noteworthy improvements -include experimental support for full field-sensitivity and reasoning about heap -objects as well as an improved value-constraints subengine that does a much -better job of reasoning about inequality relationships (e.g., x > 2) -between variables and constants. +

      In the LLVM 2.6 time-frame, the analyzer core has undergone several important +improvements and cleanups and now includes a new Checker interface that +is intended to eventually serve as a basis for domain-specific checks. Further, +in addition to generating HTML files for reporting analysis results, the +analyzer can now also emit bug reports in a structured XML format that is +intended to be easily readable by other programs.

      The set of checks performed by the static analyzer continues to expand, and future plans for the tool include full source-level inter-procedural analysis @@ -170,44 +173,191 @@ this project is encouraged to get involved!

      The VMKit project is an implementation of -a JVM and a CLI Virtual Machines (Microsoft .NET is an -implementation of the CLI) using the Just-In-Time compiler of LLVM.

      +a JVM and a CLI Virtual Machine (Microsoft .NET is an +implementation of the CLI) using LLVM for static and just-in-time +compilation.

      -

      Following LLVM 2.5, VMKit has its second release that you can find on its -webpage. The release includes +

      +VMKit version 0.26 builds with LLVM 2.6 and you can find it on its +web page. The release includes bug fixes, cleanup and new features. The major changes are:

        -
      • Ahead of Time compiler: compiles .class files to llvm .bc. VMKit uses this -functionality to native compile the standard classes (e.g. java.lang.String). -Users can compile AoT .class files into dynamic libraries and run them with the -help of VMKit.
      • +
      • A new llcj tool to generate shared libraries or executables of Java + files.
      • +
      • Cooperative garbage collection.
      • +
      • Fast subtype checking (paper from Click et al [JGI'02]).
      • +
      • Implementation of a two-word header for Java objects instead of the original + three-word header.
      • +
      • Better Java specification-compliance: division by zero checks, stack + overflow checks, finalization and references support.
      • -
      • New exception model: the dwarf exception model is very slow for -exception-intensive applications, so the JVM has had a new implementation of -exceptions which check at each function call if an exception happened. There is -a low performance penalty on applications without exceptions, but it is a big -gain for exception-intensive applications. For example the jack benchmark in -Spec JVM98 is 6x faster (performance gain of 83%).
      • +
      +
      -
    15. User-level management of thread stacks, so that thread local data access -at runtime is fast and portable.
    16. -
    17. Implementation of biased locking for faster object synchronizations at -runtime.
    18. + + -
    19. New support for OSX/X64, Linux/X64 (with the Boehm GC) and Linux/ppc32.
    20. +
      +

      +The new LLVM compiler-rt project +is a simple library that provides an implementation of the low-level +target-specific hooks required by code generation and other runtime components. +For example, when compiling for a 32-bit target, converting a double to a 64-bit +unsigned integer is compiled into a runtime call to the "__fixunsdfdi" +function. The compiler-rt library provides highly optimized implementations of +this and other low-level routines (some are 3x faster than the equivalent +libgcc routines).

      - +

      +All of the code in the compiler-rt project is available under the standard LLVM +License, a "BSD-style" license.

      + +
      + + + +
      +

      +The new LLVM KLEE project is a symbolic +execution framework for programs in LLVM bitcode form. KLEE tries to +symbolically evaluate "all" paths through the application and records state +transitions that lead to fault states. This allows it to construct testcases +that lead to faults and can even be used to verify algorithms. For more +details, please see the OSDI 2008 paper about +KLEE.

      + +
      + + + + +
      +

      +The goal of DragonEgg is to make +gcc-4.5 act like llvm-gcc without requiring any gcc modifications whatsoever. +DragonEgg is a shared library (llvm.so) +that is loaded by gcc at runtime. It uses the new gcc plugin architecture to +disable the GCC optimizers and code generators, and schedule the LLVM optimizers +and code generators (or direct output of LLVM IR) instead. Currently only Linux +and Darwin are supported, and only on x86-32 and x86-64. It should be easy to +add additional unix-like architectures and other processor families. In theory +it should be possible to use DragonEgg +with any language supported by gcc, however only C and Fortran work well for the +moment. Ada and C++ work to some extent, while Java, Obj-C and Obj-C++ are so +far entirely untested. Since gcc-4.5 has not yet been released, neither has +DragonEgg. To build +DragonEgg you will need to check out the +development versions of gcc, +llvm and +DragonEgg from their respective +subversion repositories, and follow the instructions in the +DragonEgg README. +

      + +
      + + + + + +
      +

      +The LLVM Machine Code (MC) Toolkit project is a (very early) effort to build +better tools for dealing with machine code, object file formats, etc. The idea +is to be able to generate most of the target specific details of assemblers and +disassemblers from existing LLVM target .td files (with suitable enhancements), +and to build infrastructure for reading and writing common object file formats. +One of the first deliverables is to build a full assembler and integrate it into +the compiler, which is predicted to substantially reduce compile time in some +scenarios. +

      + +

      In the LLVM 2.6 timeframe, the MC framework has grown to the point where it +can reliably parse and pretty print (with some encoding information) a +darwin/x86 .s file successfully, and has the very early phases of a Mach-O +assembler in progress. Beyond the MC framework itself, major refactoring of the +LLVM code generator has started. The idea is to make the code generator reason +about the code it is producing in a much more semantic way, rather than a +textual way. For example, the code generator now uses MCSection objects to +represent section assignments, instead of text strings that print to .section +directives.

      + +

      MC is an early and ongoing project that will hopefully continue to lead to +many improvements in the code generator and build infrastructure useful for many +other situations. +

      + +
      + + +
      + +

      An exciting aspect of LLVM is that it is used as an enabling technology for + a lot of other language and tools projects. This section lists some of the + projects that have already been updated to work with LLVM 2.6.

      +
      + + + + + +
      +

      Rubinius is an environment +for running Ruby code which strives to write as much of the core class +implementation in Ruby as possible. Combined with a bytecode interpreting VM, it +uses LLVM to optimize and compile ruby code down to machine code. Techniques +such as type feedback, method inlining, and uncommon traps are all used to +remove dynamism from ruby execution and increase performance.

      + +

      Since LLVM 2.5, Rubinius has made several major leaps forward, implementing +a counter based JIT, type feedback and speculative method inlining. +

      + +
      + + + + +
      + +

      +MacRuby is an implementation of Ruby on top of +core Mac OS X technologies, such as the Objective-C common runtime and garbage +collector and the CoreFoundation framework. It is principally developed by +Apple and aims at enabling the creation of full-fledged Mac OS X applications. +

      + +

      +MacRuby uses LLVM for optimization passes, JIT and AOT compilation of Ruby +expressions. It also uses zero-cost DWARF exceptions to implement Ruby exception +handling.

      + +
      + +
      Pure @@ -224,12 +374,8 @@ built-in list and matrix support (including list and matrix comprehensions) and an easy-to-use C interface. The interpreter uses LLVM as a backend to JIT-compile Pure programs to fast native code.

      -

      In addition to the usual algebraic data structures, Pure also has -MATLAB-style matrices in order to support numeric computations and signal -processing in an efficient way. Pure is mainly aimed at mathematical -applications right now, but it has been designed as a general purpose language. -The dynamic interpreter environment and the C interface make it possible to use -it as a kind of functional scripting language for many application areas. +

      Pure versions 0.31 and later have been tested and are known to work with +LLVM 2.6 (and continue to work with older LLVM releases >= 2.3 as well).

      @@ -243,11 +389,11 @@ it as a kind of functional scripting language for many application areas.

      LDC is an implementation of the D Programming Language using the LLVM optimizer and code generator. -The LDC project works great with the LLVM 2.5 release. General improvements in +The LDC project works great with the LLVM 2.6 release. General improvements in this cycle have included new inline asm constraint handling, better debug info -support, general bugfixes, and better x86-64 support. This has allowed -some major improvements in LDC, getting us much closer to being as +support, general bug fixes and better x86-64 support. This has allowed +some major improvements in LDC, getting it much closer to being as fully featured as the original DMD compiler from DigitalMars.

      @@ -258,142 +404,160 @@ fully featured as the original DMD compiler from DigitalMars.
      -

      Roadsend PHP (rphp) is an open +

      +Roadsend PHP (rphp) is an open source implementation of the PHP programming -language that uses LLVM for its optimizer, JIT, and static compiler. This is a +language that uses LLVM for its optimizer, JIT and static compiler. This is a reimplementation of an earlier project that is now based on LLVM.

      - - -
      - What's New in LLVM 2.5? + + -
      - -

      This release includes a huge number of bug fixes, performance tweaks, and -minor improvements. Some of the major improvements and new features are listed -in this section. -

      +

      +Unladen Swallow is a +branch of Python intended to be fully +compatible and significantly faster. It uses LLVM's optimization passes and JIT +compiler.

      +

      +LLVM-Lua uses LLVM to add JIT +and static compiling support to the Lua VM. Lua bytecode is analyzed to +remove type checks, then LLVM is used to compile the bytecode down to machine +code.

      +
      -

      LLVM 2.5 includes several major new capabilities:

      + + -
        -
      • LLVM 2.5 includes a brand new XCore backend.
      • +
        +

        +IcedTea provides a +harness to build OpenJDK using only free software build tools and to provide +replacements for the not-yet free parts of OpenJDK. One of the extensions that +IcedTea provides is a new JIT compiler named Shark which uses LLVM +to provide native code generation without introducing processor-dependent +code. +

        +
        -
      • llvm-gcc now generally supports the GFortran front-end, and the precompiled -release binaries now support Fortran, even on Mac OS/X.
      • -
      • CMake is now used by the LLVM build process -on Windows. It automatically generates Visual Studio project files (and -more) from a set of simple text files. This makes it much easier to -maintain. In time, we'd like to standardize on CMake for everything.
      • -
      • LLVM 2.5 now uses (and includes) Google Test for unit testing.
      • + + + -
      • The LLVM native code generator now supports arbitrary precision integers. -Types like i33 have long been valid in the LLVM IR, but were previously -only supported by the interpreter. Note that the C backend still does not -support these.
      • +
        -
      • LLVM 2.5 no longer uses 'bison,' so it is easier to build on Windows.
      • -
      +

      This release includes a huge number of bug fixes, performance tweaks and +minor improvements. Some of the major improvements and new features are listed +in this section. +

      -
      -

      LLVM fully supports the llvm-gcc 4.2 front-end, which marries the GCC -front-ends and driver with the LLVM optimizer and code generator. It currently -includes support for the C, C++, Objective-C, Ada, and Fortran front-ends.

      +

      LLVM 2.6 includes several major new capabilities:

        -
      • In this release, the GCC inliner is completely disabled. Previously the GCC -inliner was used to handle always-inline functions and other cases. This caused -problems with code size growth, and it is completely disabled in this -release.
      • - -
      • llvm-gcc (and LLVM in general) now support code generation for stack -canaries, which is an effective form of buffer overflow -protection. llvm-gcc supports this with the -fstack-protector -command line option (just like GCC). In LLVM IR, you can request code -generation for stack canaries with function attributes. -
      • +
      • New compiler-rt, KLEE + and machine code toolkit sub-projects.
      • +
      • Debug information now includes line numbers when optimizations are enabled. + This allows statistical sampling tools like OProfile and Shark to map + samples back to source lines.
      • +
      • LLVM now includes new experimental backends to support the MSP430, SystemZ + and BlackFin architectures.
      • +
      • LLVM supports a new Gold Linker Plugin which + enables support for transparent + link-time optimization on ELF targets when used with the Gold binutils + linker.
      • +
      • LLVM now supports doing optimization and code generation on multiple + threads. Please see the LLVM + Programmer's Manual for more information.
      • +
      • LLVM now has experimental support for embedded + metadata in LLVM IR, though the implementation is not guaranteed to be + final and the .bc file format may change in future releases. Debug info + does not yet use this format in LLVM 2.6.
      -
      -

      LLVM IR has several new features that are used by our existing front-ends and -can be useful if you are writing a front-end for LLVM:

      +

      LLVM IR has several new features for better support of new targets and that +expose new optimization opportunities:

        -
      • The shufflevector instruction -has been generalized to allow different shuffle mask width than its input -vectors. This allows you to use shufflevector to combine two -"<4 x float>" vectors into a "<8 x float>" for example.
      • - -
      • LLVM IR now supports new intrinsics for computing and acting on overflow of integer operations. This allows -efficient code generation for languages that must trap or throw an exception on -overflow. While these intrinsics work on all targets, they only generate -efficient code on X86 so far.
      • - -
      • LLVM IR now supports a new private -linkage type to produce labels that are stripped by the assembler before it -produces a .o file (thus they are invisible to the linker).
      • - -
      • LLVM IR supports two new attributes for better alias analysis. The noalias attribute can now be used on the -return value of a function to indicate that it returns new memory (e.g. -'malloc', 'calloc', etc). -The new nocapture attribute can be used -on pointer arguments to indicate that the function does not return the pointer, -store it in an object that outlives the call, or let the value of the pointer -escape from the function in any other way. -Note that it is the pointer itself that must not escape, not the value it -points to: loading a value out of the pointer is perfectly fine. -Many standard library functions (e.g. 'strlen', 'memcpy') have this property. - -
      • - -
      • The parser for ".ll" files in lib/AsmParser is now completely rewritten as a -recursive descent parser. This parser produces better error messages (including -caret diagnostics), is less fragile (less likely to crash on strange things), -does not leak memory, is more efficient, and eliminates LLVM's last use of the -'bison' tool.
      • - -
      • Debug information representation and manipulation internals have been - consolidated to use a new set of classes in - llvm/Analysis/DebugInfo.h. These routines are more - efficient, robust, and extensible and replace the older mechanisms. - llvm-gcc, clang, and the code generator now use them to create and process - debug information.
      • - +
      • The add, sub and mul + instructions have been split into integer and floating point versions (like + divide and remainder), introducing new fadd, fsub, + and fmul instructions.
      • +
      • The add, sub and mul + instructions now support optional "nsw" and "nuw" bits which indicate that + the operation is guaranteed to not overflow (in the signed or + unsigned case, respectively). This gives the optimizer more information and + can be used for things like C signed integer values, which are undefined on + overflow.
      • +
      • The sdiv instruction now supports an + optional "exact" flag which indicates that the result of the division is + guaranteed to have a remainder of zero. This is useful for optimizing pointer + subtraction in C.
      • +
      • The getelementptr instruction now + supports arbitrary integer index values for array/pointer indices. This + allows for better code generation on 16-bit pointer targets like PIC16.
      • +
      • The getelementptr instruction now + supports an "inbounds" optimization hint that tells the optimizer that the + pointer is guaranteed to be within its allocated object.
      • +
      • LLVM now support a series of new linkage types for global values which allow + for better optimization and new capabilities: +
          +
        • linkonce_odr and + weak_odr have the same linkage + semantics as the non-"odr" linkage types. The difference is that these + linkage types indicate that all definitions of the specified function + are guaranteed to have the same semantics. This allows inlining + templates functions in C++ but not inlining weak functions in C, + which previously both got the same linkage type.
        • +
        • available_externally + is a new linkage type that gives the optimizer visibility into the + definition of a function (allowing inlining and side effect analysis) + but that does not cause code to be generated. This allows better + optimization of "GNU inline" functions, extern templates, etc.
        • +
        • linker_private is a + new linkage type (which is only useful on Mac OS X) that is used for + some metadata generation and other obscure things.
        • +
      • +
      • Finally, target-specific intrinsics can now return multiple values, which + is useful for modeling target operations with multiple results.
      @@ -405,27 +569,53 @@ does not leak memory, is more efficient, and eliminates LLVM's last use of the
      -

      In addition to a large array of bug fixes and minor performance tweaks, this +

      In addition to a large array of minor performance tweaks and bug fixes, this release includes a few major enhancements and additions to the optimizers:

        -
      • The loop optimizer now improves floating point induction variables in -several ways, including adding shadow induction variables to avoid -"integer <-> floating point" conversions in loops when safe.
      • +
      • The Scalar Replacement of Aggregates + pass has many improvements that allow it to better promote vector unions, + variables which are memset, and much more strange code that can happen to + do bitfield accesses to register operations. An interesting change is that + it now produces "unusual" integer sizes (like i1704) in some cases and lets + other optimizers clean things up.
      • +
      • The Loop Strength Reduction pass now + promotes small integer induction variables to 64-bit on 64-bit targets, + which provides a major performance boost for much numerical code. It also + promotes shorts to int on 32-bit hosts, etc. LSR now also analyzes pointer + expressions (e.g. getelementptrs), as well as integers.
      • +
      • The GVN pass now eliminates partial + redundancies of loads in simple cases.
      • +
      • The Inliner now reuses stack space when + inlining similar arrays from multiple callees into one caller.
      • +
      • LLVM includes a new experimental Static Single Information (SSI) + construction pass.
      • -
      • The "-mem2reg" pass is now much faster on code with large basic blocks.
      • +
      + +
      -
    21. The "-jump-threading" pass is more powerful: it is iterative - and handles threading based on values with fully and partially redundant - loads.
    22. -
    23. The "-memdep" memory dependence analysis pass (used by GVN and memcpyopt) is - both faster and more aggressive.
    24. + + -
    25. The "-scalarrepl" scalar replacement of aggregates pass is more aggressive - about promoting unions to registers.
    26. +
      +
        +
      • LLVM has a new "EngineBuilder" class which makes it more obvious how to + set up and configure an ExecutionEngine (a JIT or interpreter).
      • +
      • The JIT now supports generating more than 16M of code.
      • +
      • When configured with --with-oprofile, the JIT can now inform + OProfile about JIT'd code, allowing OProfile to get line number and function + name information for JIT'd functions.
      • +
      • When "libffi" is available, the LLVM interpreter now uses it, which supports + calling almost arbitrary external (natively compiled) functions.
      • +
      • Clients of the JIT can now register a 'JITEventListener' object to receive + callbacks when the JIT emits or frees machine code. The OProfile support + uses this mechanism.
      @@ -442,33 +632,55 @@ infrastructure, which allows us to implement more aggressive algorithms and make it run faster:

        -
      • The Writing an LLVM Compiler -Backend document has been greatly expanded and is substantially more -complete.
      • - -
      • The SelectionDAG type legalization logic has been completely rewritten, is -now more powerful (it supports arbitrary precision integer types for example), -and is more correct in several corner cases. The type legalizer converts -operations on types that are not natively supported by the target machine into -equivalent code sequences that only use natively supported types. The old type -legalizer is still available (for now) and will be used if --disable-legalize-types is passed to the code generator. -
      • -
      • The code generator now supports widening illegal vectors to larger legal -ones (for example, converting operations on <3 x float> to work on -<4 x float>) which is very important for common graphics -applications.
      • - -
      • The assembly printers for each target are now split out into their own -libraries that are separate from the main code generation logic. This reduces -the code size of JIT compilers by not requiring them to be linked in.
      • - -
      • The 'fast' instruction selection path (used at -O0 and for fast JIT - compilers) now supports accelerating codegen for code that uses exception - handling constructs.
      • - -
      • The optional PBQP register allocator now supports register coalescing.
      • +
      • The llc -asm-verbose option (exposed from llvm-gcc as -dA + and clang as -fverbose-asm or -dA) now adds a lot of + useful information in comments to + the generated .s file. This information includes location information (if + built with -g) and loop nest information.
      • +
      • The code generator now supports a new MachineVerifier pass which is useful + for finding bugs in targets and codegen passes.
      • +
      • The Machine LICM is now enabled by default. It hoists instructions out of + loops (such as constant pool loads, loads from read-only stubs, vector + constant synthesization code, etc.) and is currently configured to only do + so when the hoisted operation can be rematerialized.
      • +
      • The Machine Sinking pass is now enabled by default. This pass moves + side-effect free operations down the CFG so that they are executed on fewer + paths through a function.
      • +
      • The code generator now performs "stack slot coloring" of register spills, + which allows spill slots to be reused. This leads to smaller stack frames + in cases where there are lots of register spills.
      • +
      • The register allocator has many improvements to take better advantage of + commutable operations, various spiller peephole optimizations, and can now + coalesce cross-register-class copies.
      • +
      • Tblgen now supports multiclass inheritance and a number of new string and + list operations like !(subst), !(foreach), !car, + !cdr, !null, !if, !cast. + These make the .td files more expressive and allow more aggressive factoring + of duplication across instruction patterns.
      • +
      • Target-specific intrinsics can now be added without having to hack VMCore to + add them. This makes it easier to maintain out-of-tree targets.
      • +
      • The instruction selector is better at propagating information about values + (such as whether they are sign/zero extended etc.) across basic block + boundaries.
      • +
      • The SelectionDAG datastructure has new nodes for representing buildvector + and vector shuffle operations. This + makes operations and pattern matching more efficient and easier to get + right.
      • +
      • The Prolog/Epilog Insertion Pass now has experimental support for performing + the "shrink wrapping" optimization, which moves spills and reloads around in + the CFG to avoid doing saves on paths that don't need them.
      • +
      • LLVM includes new experimental support for writing ELF .o files directly + from the compiler. It works well for many simple C testcases, but doesn't + support exception handling, debug info, inline assembly, etc.
      • +
      • Targets can now specify register allocation hints through + MachineRegisterInfo::setRegAllocationHint. A regalloc hint consists + of hint type and physical register number. A hint type of zero specifies a + register allocation preference. Other hint type values are target specific + which are resolved by TargetRegisterInfo::ResolveRegAllocHint. An + example is the ARM target which uses register hints to request that the + register allocator provide an even / odd register pair to two virtual + registers.
      @@ -482,37 +694,33 @@ the code size of JIT compilers by not requiring them to be linked in.

        -
      • The llvm.returnaddress -intrinsic (which is used to implement __builtin_return_address) now -supports non-zero stack depths on X86.
      • - -
      • The X86 backend now supports code generation of vector shift operations -using SSE instructions.
      • - -
      • X86-64 code generation now takes advantage of red zone, unless the --mno-red-zone option is specified.
      • - -
      • The X86 backend now supports using address space #256 in LLVM IR as a way of -performing memory references off the GS segment register. This allows a -front-end to take advantage of very low-level programming techniques when -targeting X86 CPUs. See test/CodeGen/X86/movgs.ll for a simple -example.
      • - -
      • The X86 backend now supports a -disable-mmx command line option to - prevent use of MMX even on chips that support it. This is important for cases - where code does not contain the proper llvm.x86.mmx.emms - intrinsics.
      • - -
      • The X86 JIT now detects the new Intel Core i7 and Atom chips and - auto-configures itself appropriately for the features of these chips.
      • - -
      • The JIT now supports exception handling constructs on Linux/X86-64 and - Darwin/x86-64.
      • -
      • The JIT supports Thread Local Storage (TLS) on Linux/X86-32 but not yet on - X86-64.
      • +
      • SSE 4.2 builtins are now supported.
      • +
      • GCC-compatible soft float modes are now supported, which are typically used + by OS kernels.
      • +
      • X86-64 now models implicit zero extensions better, which allows the code + generator to remove a lot of redundant zexts. It also models the 8-bit "H" + registers as subregs, which allows them to be used in some tricky + situations.
      • +
      • X86-64 now supports the "local exec" and "initial exec" thread local storage + model.
      • +
      • The vector forms of the icmp and fcmp instructions now select to efficient + SSE operations.
      • +
      • Support for the win64 calling conventions have improved. The primary + missing feature is support for varargs function definitions. It seems to + work well for many win64 JIT purposes.
      • +
      • The X86 backend has preliminary support for mapping address spaces to segment + register references. This allows you to write GS or FS relative memory + accesses directly in LLVM IR for cases where you know exactly what you're + doing (such as in an OS kernel). There are some known problems with this + support, but it works in simple cases.
      • +
      • The X86 code generator has been refactored to move all global variable + reference logic to one place + (X86Subtarget::ClassifyGlobalReference) which + makes it easier to reason about.
      • +
      @@ -527,70 +735,156 @@ example.

        -
      • Both direct and indirect load/stores work now.
      • -
      • Logical, bitwise and conditional operations now work for integer data -types.
      • -
      • Function calls involving basic types work now.
      • -
      • Support for integer arrays.
      • -
      • The compiler can now emit libcalls for operations not supported by m/c -instructions.
      • -
      • Support for both data and ROM address spaces.
      • +
      • Support for floating-point, indirect function calls, and + passing/returning aggregate types to functions. +
      • The code generator is able to generate debug info into output COFF files. +
      • Support for placing an object into a specific section or at a specific + address in memory.

      Things not yet supported:

        -
      • Floating point.
      • -
      • Passing/returning aggregate types to and from functions.
      • Variable arguments.
      • -
      • Indirect function calls.
      • Interrupts/programs.
      • -
      • Debug info.
      + + + +
      +

      New features of the ARM target include: +

      + +
        + +
      • Preliminary support for processors, such as the Cortex-A8 and Cortex-A9, +that implement version v7-A of the ARM architecture. The ARM backend now +supports both the Thumb2 and Advanced SIMD (Neon) instruction sets.
      • + +
      • The AAPCS-VFP "hard float" calling conventions are also supported with the +-float-abi=hard flag.
      • + +
      • The ARM calling convention code is now tblgen generated instead of resorting + to C++ code.
      • +
      + +

      These features are still somewhat experimental +and subject to change. The Neon intrinsics, in particular, may change in future +releases of LLVM. ARMv7 support has progressed a lot on top of tree since 2.6 +branched.

      + + +
      -

      New features include:

      +

      New features of other targets include: +

        -
      • Beginning with LLVM 2.5, llvmc2 is known as - just llvmc. The old llvmc driver was removed.
      • +
      • Mips now supports O32 Calling Convention.
      • +
      • Many improvements to the 32-bit PowerPC SVR4 ABI (used on powerpc-linux) + support, lots of bugs fixed.
      • +
      • Added support for the 64-bit PowerPC SVR4 ABI (used on powerpc64-linux). + Needs more testing.
      • +
      + +
      + + + -
    27. The Clang plugin was substantially improved and is now enabled - by default. The command llvmc --clang can be now used as a - synonym to ccc.
    28. +
      -
    29. There is now a --check-graph option, which is supposed to catch - common errors like multiple default edges, mismatched output/input language - names and cycles. In general, these checks can't be done at compile-time - because of the need to support plugins.
    30. +

      This release includes a number of new APIs that are used internally, which + may also be useful for external clients. +

      -
    31. Plugins are now more flexible and can refer to compilation graph nodes and - options defined in other plugins. To manage dependencies, a priority-sorting - mechanism was introduced. This change affects the TableGen file syntax. See the - documentation for details.
    32. +
        +
      • New + PrettyStackTrace class allows crashes of llvm tools (and applications + that integrate them) to provide more detailed indication of what the + compiler was doing at the time of the crash (e.g. running a pass). + At the top level for each LLVM tool, it includes the command line arguments. +
      • +
      • New StringRef + and Twine classes + make operations on character ranges and + string concatenation to be more efficient. StringRef is just a const + char* with a length, Twine is a light-weight rope.
      • +
      • LLVM has new WeakVH, AssertingVH and CallbackVH + classes, which make it easier to write LLVM IR transformations. WeakVH + is automatically drops to null when the referenced Value is deleted, + and is updated across a replaceAllUsesWith operation. + AssertingVH aborts the program if the + referenced value is destroyed while it is being referenced. CallbackVH + is a customizable class for handling value references. See ValueHandle.h + for more information.
      • +
      • The new 'Triple + ' class centralizes a lot of logic that reasons about target + triples.
      • +
      • The new ' + llvm_report_error()' set of APIs allows tools to embed the LLVM + optimizer and backend and recover from previously unrecoverable errors.
      • +
      • LLVM has new abstractions for atomic operations + and reader/writer + locks.
      • +
      • LLVM has new + SourceMgr and SMLoc classes which implement caret + diagnostics and basic include stack processing for simple parsers. It is + used by tablegen, llvm-mc, the .ll parser and FileCheck.
      • +
      -
    33. Hooks can now be provided with arguments. The syntax is "$CALL(MyHook, - 'Arg1', 'Arg2', 'Arg3')".
    34. -
    35. A new option type: multi-valued option, for options that take more than one - argument (for example, "-foo a b c").
    36. +
      -
    37. New option properties: 'one_or_more', 'zero_or_more', -'hidden' and 'really_hidden'.
    38. + + -
    39. The 'case' expression gained an 'error' action and - an 'empty' test (equivalent to "(not (not_empty ...))").
    40. +
      +

      Other miscellaneous features include:

      -
    41. Documentation now looks more consistent to the rest of the LLVM - docs. There is also a man page now.
    42. +
        +
      • LLVM now includes a new internal 'FileCheck' tool which allows + writing much more accurate regression tests that run faster. Please see the + FileCheck section of the Testing + Guide for more information.
      • +
      • LLVM profile information support has been significantly improved to produce +correct use counts, and has support for edge profiling with reduced runtime +overhead. Combined, the generated profile information is both more correct and +imposes about half as much overhead (2.6. from 12% to 6% overhead on SPEC +CPU2000).
      • +
      • The C bindings (in the llvm/include/llvm-c directory) include many newly + supported APIs.
      • +
      • LLVM 2.6 includes a brand new experimental LLVM bindings to the Ada2005 + programming language.
      • + +
      • The LLVMC driver has several new features: +
          +
        • Dynamic plugins now work on Windows.
        • +
        • New option property: init. Makes possible to provide default values for + options defined in plugins (interface to cl::init).
        • +
        • New example: Skeleton, shows how to create a standalone LLVMC-based + driver.
        • +
        • New example: mcc16, a driver for the PIC16 toolchain.
        • +
        +
      @@ -605,13 +899,24 @@ instructions.

      If you're already an LLVM user or developer with out-of-tree changes based -on LLVM 2.4, this section lists some "gotchas" that you may run into upgrading +on LLVM 2.5, this section lists some "gotchas" that you may run into upgrading from the previous release.

        - -
      • llvm-gcc defaults to -fno-math-errno on all X86 targets.
      • - +
      • The Itanium (IA64) backend has been removed. It was not actively supported + and had bitrotted.
      • +
      • The BigBlock register allocator has been removed, it had also bitrotted.
      • +
      • The C Backend (-march=c) is no longer considered part of the LLVM release +criteria. We still want it to work, but no one is maintaining it and it lacks +support for arbitrary precision integers and other important IR features.
      • + +
      • All LLVM tools now default to overwriting their output file, behaving more + like standard unix tools. Previously, this only happened with the '-f' + option.
      • +
      • LLVM build now builds all libraries as .a files instead of some + libraries as relinked .o files. This requires some APIs like + InitializeAllTargets.h. +
      @@ -619,8 +924,82 @@ from the previous release.

      API changes are:

        -
      • Some deprecated interfaces to create Instruction subclasses, that - were spelled with lower case "create," have been removed.
      • +
      • All uses of hash_set and hash_map have been removed from + the LLVM tree and the wrapper headers have been removed.
      • +
      • The llvm/Streams.h and DOUT member of Debug.h have been removed. The + llvm::Ostream class has been completely removed and replaced with + uses of raw_ostream.
      • +
      • LLVM's global uniquing tables for Types and Constants have + been privatized into members of an LLVMContext. A number of APIs + now take an LLVMContext as a parameter. To smooth the transition + for clients that will only ever use a single context, the new + getGlobalContext() API can be used to access a default global + context which can be passed in any and all cases where a context is + required. +
      • The getABITypeSize methods are now called getAllocSize.
      • +
      • The Add, Sub and Mul operators are no longer + overloaded for floating-point types. Floating-point addition, subtraction + and multiplication are now represented with new operators FAdd, + FSub and FMul. In the IRBuilder API, + CreateAdd, CreateSub, CreateMul and + CreateNeg should only be used for integer arithmetic now; + CreateFAdd, CreateFSub, CreateFMul and + CreateFNeg should now be used for floating-point arithmetic.
      • +
      • The DynamicLibrary class can no longer be constructed, its functionality has + moved to static member functions.
      • +
      • raw_fd_ostream's constructor for opening a given filename now + takes an extra Force argument. If Force is set to + false, an error will be reported if a file with the given name + already exists. If Force is set to true, the file will + be silently truncated (which is the behavior before this flag was + added).
      • +
      • SCEVHandle no longer exists, because reference counting is no + longer done for SCEV* objects, instead const SCEV* + should be used.
      • + +
      • Many APIs, notably llvm::Value, now use the StringRef +and Twine classes instead of passing const char* +or std::string, as described in +the Programmer's Manual. Most +clients should be unaffected by this transition, unless they are used to +Value::getName() returning a string. Here are some tips on updating to +2.6: +
          +
        • getNameStr() is still available, and matches the old + behavior. Replacing getName() calls with this is an safe option, + although more efficient alternatives are now possible.
        • + +
        • If you were just relying on getName() being able to be sent to + a std::ostream, consider migrating + to llvm::raw_ostream.
        • + +
        • If you were using getName().c_str() to get a const + char* pointer to the name, you can use getName().data(). + Note that this string (as before), may not be the entire name if the + name contains embedded null characters.
        • + +
        • If you were using operator + on the result of getName() and + treating the result as an std::string, you can either + use Twine::str to get the result as an std::string, or + could move to a Twine based design.
        • + +
        • isName() should be replaced with comparison + against getName() (this is now efficient). +
        +
      • + +
      • The registration interfaces for backend Targets has changed (what was +previously TargetMachineRegistry). For backend authors, see the Writing An LLVM Backend +guide. For clients, the notable API changes are: +
          +
        • TargetMachineRegistry has been renamed + to TargetRegistry.
        • + +
        • Clients should move to using the TargetRegistry::lookupTarget() + function to find targets.
        • +
        +
      @@ -639,15 +1018,15 @@ API changes are:

      • Intel and AMD machines (IA32, X86-64, AMD64, EMT-64) running Red Hat -Linux, Fedora Core and FreeBSD (and probably other unix-like systems).
      • + Linux, Fedora Core, FreeBSD and AuroraUX (and probably other unix-like + systems).
      • PowerPC and X86-based Mac OS X systems, running 10.3 and above in 32-bit -and 64-bit modes.
      • + and 64-bit modes.
      • Intel and AMD machines running on Win32 using MinGW libraries (native).
      • Intel and AMD machines running on Win32 with the Cygwin libraries (limited support is available for native builds with Visual C++).
      • Sun UltraSPARC workstations running Solaris 10.
      • Alpha-based machines running Debian GNU/Linux.
      • -
      • Itanium-based (IA64) machines running Linux and HP-UX.

      The core LLVM infrastructure uses GNU autoconf to adapt itself @@ -670,6 +1049,21 @@ listed by component. If you run into a problem, please check the LLVM bug database and submit a bug if there isn't already one.

      +
        +
      • The llvm-gcc bootstrap will fail with some versions of binutils (e.g. 2.15) + with a message of "Error: can not do 8 + byte pc-relative relocation" when building C++ code. We intend to + fix this on mainline, but a workaround for 2.6 is to upgrade to binutils + 2.17 or later.
      • + +
      • LLVM will not correctly compile on Solaris and/or OpenSolaris +using the stock GCC 3.x.x series 'out the box', +See: Broken versions of GCC and other tools. +However, A Modern GCC Build +for x86/x86-64 has been made available from the third party AuroraUX Project +that has been meticulously tested for bootstrapping LLVM & Clang.
      • +
      +
      @@ -687,9 +1081,11 @@ components, please contact us on the LLVMdev list.

        -
      • The MSIL, IA64, Alpha, SPU, MIPS, and PIC16 backends are experimental.
      • +
      • The MSIL, Alpha, SPU, MIPS, PIC16, Blackfin, MSP430 and SystemZ backends are + experimental.
      • The llc "-filetype=asm" (the default) is the only - supported value for this option.
      • + supported value for this option. The ELF writer is experimental. +
      • The implementation of Andersen's Alias Analysis has many known bugs.
      @@ -744,14 +1140,14 @@ compilation, and lacks support for debug information.
        +
      • Support for the Advanced SIMD (Neon) instruction set is still incomplete +and not well tested. Some features may not work at all, and the code quality +may be poor in some cases.
      • Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6 processors, thumb programs can crash or produce wrong results (PR1388).
      • Compilation for ARM Linux OABI (old ABI) is supported but not fully tested.
      • -
      • There is a bug in QEMU-ARM (<= 0.9.0) which causes it to incorrectly - execute -programs compiled with LLVM. Please use more recent versions of QEMU.
      @@ -778,7 +1174,6 @@ programs compiled with LLVM. Please use more recent versions of QEMU.
        -
      • The O32 ABI is not fully supported.
      • 64-bit MIPS targets are not supported yet.
      @@ -799,21 +1194,6 @@ appropriate nops inserted to ensure restartability.
      - - - -
      - -
        -
      • The Itanium backend is highly experimental and has a number of known - issues. We are looking for a maintainer for the Itanium backend. If you - are interested, please contact the LLVMdev mailing list.
      • -
      - -
      -
      Known problems with the C back-end @@ -841,10 +1221,6 @@ appropriate nops inserted to ensure restartability.
      -

      llvm-gcc does not currently support Link-Time -Optimization on most platforms "out-of-the-box". Please inquire on the -LLVMdev mailing list if you are interested.

      -

      The only major language feature of GCC not supported by llvm-gcc is the __builtin_apply family of builtins. However, some extensions are only supported on some targets. For example, trampolines are only @@ -882,7 +1258,8 @@ itself, Qt, Mozilla, etc.

      • Fortran support generally works, but there are still several unresolved bugs - in Bugzilla. Please see the tools/gfortran component for details.
      • + in Bugzilla. Please see the + tools/gfortran component for details.
      @@ -902,16 +1279,16 @@ which does support trampolines.
    43. The Ada front-end fails to bootstrap. This is due to lack of LLVM support for setjmp/longjmp style exception handling, which is used internally by the compiler. -Workaround: configure with --disable-bootstrap.
    44. +Workaround: configure with --disable-bootstrap.
    45. The c380004, c393010 and cxg2021 ACATS tests fail (c380004 also fails with gcc-4.2 mainline). If the compiler is built with checks disabled then c393010 causes the compiler to go into an infinite loop, using up all system memory.
    46. Some GCC specific Ada tests continue to crash the compiler.
    47. -
    48. The -E binder option (exception backtraces) +
    49. The -E binder option (exception backtraces) does not work and will result in programs -crashing if an exception is raised. Workaround: do not use -E.
    50. +crashing if an exception is raised. Workaround: do not use -E.
    51. Only discrete types are allowed to start or finish at a non-byte offset in a record. Workaround: do not pack records or use representation clauses that result in a field of a non-discrete type @@ -925,6 +1302,20 @@ ignored.
    52. + + + +
      + +

      The Llvm.Linkage module is broken, and has incorrect values. Only +Llvm.Linkage.External, Llvm.Linkage.Available_externally, and +Llvm.Linkage.Link_once will be correct. If you need any of the other linkage +modes, you'll have to write an external C library in order to expose the +functionality. This has been fixed in the trunk.

      +
      +
      Additional Information @@ -957,7 +1348,7 @@ lists.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-03-02 21:08:12 +0100 (Mon, 02 Mar 2009) $ + Last modified: $Date: 2009-10-13 19:48:04 +0200 (Tue, 13 Oct 2009) $ diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html index e940e93d82faf..277b1e334f9f8 100644 --- a/docs/SourceLevelDebugging.html +++ b/docs/SourceLevelDebugging.html @@ -80,7 +80,7 @@ height="369"> debug information in LLVM. It describes the actual format that the LLVM debug information takes, which is useful for those interested in creating front-ends or dealing directly with the information. - Further, this document provides specifc examples of what debug information + Further, this document provides specific examples of what debug information for C/C++.

      @@ -122,8 +122,8 @@ height="369">

      The approach used by the LLVM implementation is to use a small set of intrinsic functions to define a mapping between LLVM program objects and the source-level objects. The - description of the source-level program is maintained in LLVM global - variables in an implementation-defined format + description of the source-level program is maintained in LLVM metadata + in an implementation-defined format (the C/C++ front-end currently uses working draft 7 of the DWARF 3 standard).

      @@ -240,31 +240,21 @@ height="369">

      LLVM debugging information has been carefully designed to make it possible for the optimizer to optimize the program and debugging information without necessarily having to know anything about debugging information. In - particular, the global constant merging pass automatically eliminates - duplicated debugging information (often caused by header files), the global - dead code elimination pass automatically deletes debugging information for a - function if it decides to delete the function, and the linker eliminates - debug information when it merges linkonce functions.

      + particular, te use of metadadta avoids duplicated dubgging information from + the beginning, and the global dead code elimination pass automatically + deletes debugging information for a function if it decides to delete the + function.

      To do this, most of the debugging information (descriptors for types, variables, functions, source files, etc) is inserted by the language - front-end in the form of LLVM global variables. These LLVM global variables - are no different from any other global variables, except that they have a web - of LLVM intrinsic functions that point to them. If the last references to a - particular piece of debugging information are deleted (for example, by the - -globaldce pass), the extraneous debug information will - automatically become dead and be removed by the optimizer.

      + front-end in the form of LLVM metadata.

      Debug information is designed to be agnostic about the target debugger and debugging information representation (e.g. DWARF/Stabs/etc). It uses a - generic machine debug information pass to decode the information that - represents variables, types, functions, namespaces, etc: this allows for - arbitrary source-language semantics and type-systems to be used, as long as - there is a module written for the target debugger to interpret the - information. In addition, debug global variables are declared in - the "llvm.metadata" section. All values declared in this section - are stripped away after target debug information is constructed and before - the program object is emitted.

      + generic pass to decode the information that represents variables, types, + functions, namespaces, etc: this allows for arbitrary source-language + semantics and type-systems to be used, as long as there is a module + written for the target debugger to interpret the information.

      To provide basic functionality, the LLVM debugger does have to make some assumptions about the source-level language being debugged, though it keeps @@ -288,9 +278,7 @@ height="369">

      In consideration of the complexity and volume of debug information, LLVM - provides a specification for well formed debug global variables. The - constant value of each of these globals is one of a limited set of - structures, known as debug descriptors.

      + provides a specification for well formed debug descriptors.

      Consumers of LLVM debug information expect the descriptors for program objects to start in a canonical format, but the descriptors can include @@ -300,20 +288,17 @@ height="369"> way. Also, all debugging information objects start with a tag to indicate what type of object it is. The source-language is allowed to define its own objects, by using unreserved tag numbers. We recommend using with tags in - the range 0x1000 thru 0x2000 (there is a defined enum DW_TAG_user_base = + the range 0x1000 through 0x2000 (there is a defined enum DW_TAG_user_base = 0x1000.)

      -

      The fields of debug descriptors used internally by LLVM (MachineModuleInfo) +

      The fields of debug descriptors used internally by LLVM are restricted to only the simple data types int, uint, - bool, float, double, i8* and - { }*. References to arbitrary values are handled using a - { }* and a cast to { }* expression; typically - references to other field descriptors, arrays of descriptors or global - variables.

      + bool, float, double, mdstring and + mdnode.

      -%llvm.dbg.object.type = type {
      +!1 = metadata !{
         uint,   ;; A tag
         ...
       }
      @@ -326,8 +311,8 @@ height="369">
          of tags are loosely bound to the tag values of DWARF information entries.
          However, that does not restrict the use of the information supplied to DWARF
          targets.  To facilitate versioning of debug information, the tag is augmented
      -   with the current debug version (LLVMDebugVersion = 4 << 16 or 0x40000 or
      -   262144.)

      + with the current debug version (LLVMDebugVersion = 7 << 16 or 0x70000 or + 458752.)

      The details of the various descriptors follow.

      @@ -342,17 +327,18 @@ height="369">
      -%llvm.dbg.compile_unit.type = type {
      -  i32,    ;; Tag = 17 + LLVMDebugVersion (DW_TAG_compile_unit)
      -  {  }*,  ;; Compile unit anchor = cast = (%llvm.dbg.anchor.type* %llvm.dbg.compile_units to {  }*)
      -  i32,    ;; DWARF language identifier (ex. DW_LANG_C89) 
      -  i8*,    ;; Source file name
      -  i8*,    ;; Source file directory (includes trailing slash)
      -  i8*     ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
      -  i1,     ;; True if this is a main compile unit. 
      -  i1,     ;; True if this is optimized.
      -  i8*,    ;; Flags
      -  i32     ;; Runtime version
      +!0 = metadata !{
      +  i32,       ;; Tag = 17 + LLVMDebugVersion 
      +             ;; (DW_TAG_compile_unit)
      +  i32,       ;; Unused field. 
      +  i32,       ;; DWARF language identifier (ex. DW_LANG_C89) 
      +  metadata,  ;; Source file name
      +  metadata,  ;; Source file directory (includes trailing slash)
      +  metadata   ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
      +  i1,        ;; True if this is a main compile unit. 
      +  i1,        ;; True if this is optimized.
      +  metadata,  ;; Flags
      +  i32        ;; Runtime version
       }
       
      @@ -388,19 +374,20 @@ height="369">
      -%llvm.dbg.global_variable.type = type {
      -  i32,    ;; Tag = 52 + LLVMDebugVersion (DW_TAG_variable)
      -  {  }*,  ;; Global variable anchor = cast (%llvm.dbg.anchor.type* %llvm.dbg.global_variables to {  }*),  
      -  {  }*,  ;; Reference to context descriptor
      -  i8*,    ;; Name
      -  i8*,    ;; Display name (fully qualified C++ name)
      -  i8*,    ;; MIPS linkage name (for C++)
      -  {  }*,  ;; Reference to compile unit where defined
      -  i32,    ;; Line number where defined
      -  {  }*,  ;; Reference to type descriptor
      -  i1,     ;; True if the global is local to compile unit (static)
      -  i1,     ;; True if the global is defined in the compile unit (not extern)
      -  {  }*   ;; Reference to the global variable
      +!1 = metadata !{
      +  i32,      ;; Tag = 52 + LLVMDebugVersion 
      +            ;; (DW_TAG_variable)
      +  i32,      ;; Unused field.
      +  metadata, ;; Reference to context descriptor
      +  metadata, ;; Name
      +  metadata, ;; Display name (fully qualified C++ name)
      +  metadata, ;; MIPS linkage name (for C++)
      +  metadata, ;; Reference to compile unit where defined
      +  i32,      ;; Line number where defined
      +  metadata, ;; Reference to type descriptor
      +  i1,       ;; True if the global is local to compile unit (static)
      +  i1,       ;; True if the global is defined in the compile unit (not extern)
      +  {  }*     ;; Reference to the global variable
       }
       
      @@ -419,18 +406,19 @@ provide details such as name, type and where the variable is defined.

      -%llvm.dbg.subprogram.type = type {
      -  i32,    ;; Tag = 46 + LLVMDebugVersion (DW_TAG_subprogram)
      -  {  }*,  ;; Subprogram anchor = cast (%llvm.dbg.anchor.type* %llvm.dbg.subprograms to {  }*),  
      -  {  }*,  ;; Reference to context descriptor
      -  i8*,    ;; Name
      -  i8*,    ;; Display name (fully qualified C++ name)
      -  i8*,    ;; MIPS linkage name (for C++)
      -  {  }*,  ;; Reference to compile unit where defined
      -  i32,    ;; Line number where defined
      -  {  }*,  ;; Reference to type descriptor
      -  i1,     ;; True if the global is local to compile unit (static)
      -  i1      ;; True if the global is defined in the compile unit (not extern)
      +!2 = metadata !{
      +  i32,      ;; Tag = 46 + LLVMDebugVersion
      +            ;; (DW_TAG_subprogram)
      +  i32,      ;; Unused field.
      +  metadata, ;; Reference to context descriptor
      +  metadata, ;; Name
      +  metadata, ;; Display name (fully qualified C++ name)
      +  metadata, ;; MIPS linkage name (for C++)
      +  metadata, ;; Reference to compile unit where defined
      +  i32,      ;; Line number where defined
      +  metadata, ;; Reference to type descriptor
      +  i1,       ;; True if the global is local to compile unit (static)
      +  i1        ;; True if the global is defined in the compile unit (not extern)
       }
       
      @@ -450,9 +438,9 @@ provide details such as name, type and where the variable is defined.

      -%llvm.dbg.block = type {
      -  i32,    ;; Tag = 13 + LLVMDebugVersion (DW_TAG_lexical_block)
      -  {  }*   ;; Reference to context descriptor
      +!3 = metadata !{
      +  i32,     ;; Tag = 13 + LLVMDebugVersion (DW_TAG_lexical_block)
      +  metadata ;; Reference to context descriptor
       }
       
      @@ -472,17 +460,18 @@ provide details such as name, type and where the variable is defined.

      -%llvm.dbg.basictype.type = type {
      -  i32,    ;; Tag = 36 + LLVMDebugVersion (DW_TAG_base_type)
      -  {  }*,  ;; Reference to context (typically a compile unit)
      -  i8*,    ;; Name (may be "" for anonymous types)
      -  {  }*,  ;; Reference to compile unit where defined (may be NULL)
      -  i32,    ;; Line number where defined (may be 0)
      -  i64,    ;; Size in bits
      -  i64,    ;; Alignment in bits
      -  i64,    ;; Offset in bits
      -  i32,    ;; Flags
      -  i32     ;; DWARF type encoding
      +!4 = metadata !{
      +  i32,      ;; Tag = 36 + LLVMDebugVersion 
      +            ;; (DW_TAG_base_type)
      +  metadata, ;; Reference to context (typically a compile unit)
      +  metadata, ;; Name (may be "" for anonymous types)
      +  metadata, ;; Reference to compile unit where defined (may be NULL)
      +  i32,      ;; Line number where defined (may be 0)
      +  i64,      ;; Size in bits
      +  i64,      ;; Alignment in bits
      +  i64,      ;; Offset in bits
      +  i32,      ;; Flags
      +  i32       ;; DWARF type encoding
       }
       
      @@ -523,16 +512,16 @@ DW_ATE_unsigned_char = 8
      -%llvm.dbg.derivedtype.type = type {
      -  i32,    ;; Tag (see below)
      -  {  }*,  ;; Reference to context
      -  i8*,    ;; Name (may be "" for anonymous types)
      -  {  }*,  ;; Reference to compile unit where defined (may be NULL)
      -  i32,    ;; Line number where defined (may be 0)
      -  i32,    ;; Size in bits
      -  i32,    ;; Alignment in bits
      -  i32,    ;; Offset in bits
      -  {  }*   ;; Reference to type derived from
      +!5 = metadata !{
      +  i32,      ;; Tag (see below)
      +  metadata, ;; Reference to context
      +  metadata, ;; Name (may be "" for anonymous types)
      +  metadata, ;; Reference to compile unit where defined (may be NULL)
      +  i32,      ;; Line number where defined (may be 0)
      +  i32,      ;; Size in bits
      +  i32,      ;; Alignment in bits
      +  i32,      ;; Offset in bits
      +  metadata  ;; Reference to type derived from
       }
       
      @@ -591,19 +580,19 @@ DW_TAG_restrict_type = 55
      -%llvm.dbg.compositetype.type = type {
      -  i32,    ;; Tag (see below)
      -  {  }*,  ;; Reference to context
      -  i8*,    ;; Name (may be "" for anonymous types)
      -  {  }*,  ;; Reference to compile unit where defined (may be NULL)
      -  i32,    ;; Line number where defined (may be 0)
      -  i64,    ;; Size in bits
      -  i64,    ;; Alignment in bits
      -  i64,    ;; Offset in bits
      -  i32,    ;; Flags
      -  {  }*,  ;; Reference to type derived from
      -  {  }*,  ;; Reference to array of member descriptors
      -  i32     ;; Runtime languages
      +!6 = metadata !{
      +  i32,      ;; Tag (see below)
      +  metadata, ;; Reference to context
      +  metadata, ;; Name (may be "" for anonymous types)
      +  metadata, ;; Reference to compile unit where defined (may be NULL)
      +  i32,      ;; Line number where defined (may be 0)
      +  i64,      ;; Size in bits
      +  i64,      ;; Alignment in bits
      +  i64,      ;; Offset in bits
      +  i32,      ;; Flags
      +  metadata, ;; Reference to type derived from
      +  metadata, ;; Reference to array of member descriptors
      +  i32       ;; Runtime languages
       }
       
      @@ -702,10 +691,11 @@ DW_TAG_inheritance = 28
      -%llvm.dbg.enumerator.type = type {
      -  i32,    ;; Tag = 40 + LLVMDebugVersion (DW_TAG_enumerator)
      -  i8*,    ;; Name
      -  i64     ;; Value
      +!6 = metadata !{
      +  i32,      ;; Tag = 40 + LLVMDebugVersion 
      +            ;; (DW_TAG_enumerator)
      +  metadata, ;; Name
      +  i64       ;; Value
       }
       
      @@ -725,13 +715,13 @@ DW_TAG_inheritance = 28
      -%llvm.dbg.variable.type = type {
      -  i32,     ;; Tag (see below)
      -  {  }*,   ;; Context
      -  i8*,     ;; Name
      -  {  }*,   ;; Reference to compile unit where defined
      -  i32,     ;; Line number where defined
      -  {  }*    ;; Type descriptor
      +!7 = metadata !{
      +  i32,      ;; Tag (see below)
      +  metadata, ;; Context
      +  metadata, ;; Name
      +  metadata, ;; Reference to compile unit where defined
      +  i32,      ;; Line number where defined
      +  metadata  ;; Type descriptor
       }
       
      @@ -778,14 +768,14 @@ DW_TAG_return_variable = 258
      -  void %llvm.dbg.stoppoint( uint, uint, { }* )
      +  void %llvm.dbg.stoppoint( uint, uint, metadata)
       

      This intrinsic is used to provide correspondence between the source file and the generated code. The first argument is the line number (base 1), second argument is the column number (0 if unknown) and the third argument the - source %llvm.dbg.compile_unit* - cast to a { }*. Code following a call to this intrinsic will + source %llvm.dbg.compile_unit. + Code following a call to this intrinsic will have been defined in close proximity of the line, column and file. This information holds until the next call to %lvm.dbg.stoppoint.

      @@ -799,7 +789,7 @@ DW_TAG_return_variable = 258
      -  void %llvm.dbg.func.start( { }* )
      +  void %llvm.dbg.func.start( metadata )
       

      This intrinsic is used to link the debug information @@ -823,7 +813,7 @@ DW_TAG_return_variable = 258

      -  void %llvm.dbg.region.start( { }* )
      +  void %llvm.dbg.region.start( metadata )
       

      This intrinsic is used to define the beginning of a declarative scope (ex. @@ -843,7 +833,7 @@ DW_TAG_return_variable = 258

      -  void %llvm.dbg.region.end( { }* )
      +  void %llvm.dbg.region.end( metadata )
       

      This intrinsic is used to define the end of a declarative scope (ex. block) @@ -864,14 +854,14 @@ DW_TAG_return_variable = 258

      -  void %llvm.dbg.declare( { } *, { }* )
      +  void %llvm.dbg.declare( { } *, metadata )
       

      This intrinsic provides information about a local element (ex. variable.) The first argument is the alloca for the variable, cast to a { }*. The second argument is the %llvm.dbg.variable containing - the description of the variable, also cast to a { }*.

      + the description of the variable.

      @@ -955,29 +945,29 @@ entry: ... - call void @llvm.dbg.func.start( %llvm.dbg.subprogram.type* @llvm.dbg.subprogram ) + call void @llvm.dbg.func.start( metadata !0) - call void @llvm.dbg.stoppoint( uint 2, uint 2, %llvm.dbg.compile_unit* @llvm.dbg.compile_unit ) + call void @llvm.dbg.stoppoint( uint 2, uint 2, metadata !1) call void @llvm.dbg.declare({}* %X, ...) call void @llvm.dbg.declare({}* %Y, ...) ;; Evaluate expression on line 2, assigning to X. - call void @llvm.dbg.stoppoint( uint 3, uint 2, %llvm.dbg.compile_unit* @llvm.dbg.compile_unit ) + call void @llvm.dbg.stoppoint( uint 3, uint 2, metadata !1) ;; Evaluate expression on line 3, assigning to Y. call void @llvm.region.start() - call void @llvm.dbg.stoppoint( uint 5, uint 4, %llvm.dbg.compile_unit* @llvm.dbg.compile_unit ) + call void @llvm.dbg.stoppoint( uint 5, uint 4, metadata !1) call void @llvm.dbg.declare({}* %X, ...) ;; Evaluate expression on line 5, assigning to Z. - call void @llvm.dbg.stoppoint( uint 7, uint 2, %llvm.dbg.compile_unit* @llvm.dbg.compile_unit ) + call void @llvm.dbg.stoppoint( uint 7, uint 2, metadata !1) call void @llvm.region.end() - call void @llvm.dbg.stoppoint( uint 9, uint 2, %llvm.dbg.compile_unit* @llvm.dbg.compile_unit ) + call void @llvm.dbg.stoppoint( uint 9, uint 2, metadata !1) call void @llvm.region.end() @@ -1096,51 +1086,36 @@ int main(int argc, char *argv[]) {
       ...
      -;;
      -;; Define types used.  In this case we need one for compile unit anchors and one
      -;; for compile units.
      -;;
      -%llvm.dbg.anchor.type = type { uint, uint }
      -%llvm.dbg.compile_unit.type = type { uint, {  }*, uint, uint, i8*, i8*, i8* }
      -...
      -;;
      -;; Define the anchor for compile units.  Note that the second field of the
      -;; anchor is 17, which is the same as the tag for compile units
      -;; (17 = DW_TAG_compile_unit.)
      -;;
      -%llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { uint 0, uint 17 }, section "llvm.metadata"
      -
       ;;
       ;; Define the compile unit for the source file "/Users/mine/sources/MySource.cpp".
       ;;
      -%llvm.dbg.compile_unit1 = internal constant %llvm.dbg.compile_unit.type {
      -    uint add(uint 17, uint 262144), 
      -    {  }* cast (%llvm.dbg.anchor.type* %llvm.dbg.compile_units to {  }*), 
      -    uint 1, 
      -    uint 1, 
      -    i8* getelementptr ([13 x i8]* %str1, i32 0, i32 0), 
      -    i8* getelementptr ([21 x i8]* %str2, i32 0, i32 0), 
      -    i8* getelementptr ([33 x i8]* %str3, i32 0, i32 0) }, section "llvm.metadata"
      -    
      +!3 = metadata !{
      +  i32 458769,    ;; Tag
      +  i32 0,         ;; Unused
      +  i32 4,         ;; Language Id
      +  metadata !"MySource.cpp", 
      +  metadata !"/Users/mine/sources", 
      +  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)", 
      +  i1 true,       ;; Main Compile Unit
      +  i1 false,      ;; Optimized compile unit
      +  metadata !"",  ;; Compiler flags
      +  i32 0}         ;; Runtime version
      +
       ;;
       ;; Define the compile unit for the header file "/Users/mine/sources/MyHeader.h".
       ;;
      -%llvm.dbg.compile_unit2 = internal constant %llvm.dbg.compile_unit.type {
      -    uint add(uint 17, uint 262144), 
      -    {  }* cast (%llvm.dbg.anchor.type* %llvm.dbg.compile_units to {  }*), 
      -    uint 1, 
      -    uint 1, 
      -    i8* getelementptr ([11 x i8]* %str4, int 0, int 0), 
      -    i8* getelementptr ([21 x i8]* %str2, int 0, int 0), 
      -    i8* getelementptr ([33 x i8]* %str3, int 0, int 0) }, section "llvm.metadata"
      +!1 = metadata !{
      +  i32 458769,    ;; Tag
      +  i32 0,         ;; Unused
      +  i32 4,         ;; Language Id
      +  metadata !"MyHeader.h", 
      +  metadata !"/Users/mine/sources", 
      +  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)", 
      +  i1 false,      ;; Main Compile Unit
      +  i1 false,      ;; Optimized compile unit
      +  metadata !"",  ;; Compiler flags
      +  i32 0}         ;; Runtime version
       
      -;;
      -;; Define each of the strings used in the compile units.
      -;;
      -%str1 = internal constant [13 x i8] c"MySource.cpp\00", section "llvm.metadata";
      -%str2 = internal constant [21 x i8] c"/Users/mine/sources/\00", section "llvm.metadata";
      -%str3 = internal constant [33 x i8] c"4.0.1 LLVM (LLVM research group)\00", section "llvm.metadata";
      -%str4 = internal constant [11 x i8] c"MyHeader.h\00", section "llvm.metadata";
       ...
       
      @@ -1167,65 +1142,51 @@ int MyGlobal = 100;
       ;;
      -;; Define types used. One for global variable anchors, one for the global
      -;; variable descriptor, one for the global's basic type and one for the global's
      -;; compile unit.
      -;;
      -%llvm.dbg.anchor.type = type { uint, uint }
      -%llvm.dbg.global_variable.type = type { uint, {  }*, {  }*, i8*, {  }*, uint, {  }*, bool, bool, {  }*, uint }
      -%llvm.dbg.basictype.type = type { uint, {  }*, i8*, {  }*, int, uint, uint, uint, uint }
      -%llvm.dbg.compile_unit.type = ...
      -...
      -;;
       ;; Define the global itself.
       ;;
       %MyGlobal = global int 100
       ...
       ;;
      -;; Define the anchor for global variables.  Note that the second field of the
      -;; anchor is 52, which is the same as the tag for global variables
      -;; (52 = DW_TAG_variable.)
      +;; List of debug info of globals
       ;;
      -%llvm.dbg.global_variables = linkonce constant %llvm.dbg.anchor.type { uint 0, uint 52 }, section "llvm.metadata"
      +!llvm.dbg.gv = !{!0}
       
       ;;
       ;; Define the global variable descriptor.  Note the reference to the global
       ;; variable anchor and the global variable itself.
       ;;
      -%llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type {
      -    uint add(uint 52, uint 262144), 
      -    {  }* cast (%llvm.dbg.anchor.type* %llvm.dbg.global_variables to {  }*), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([9 x i8]* %str1, int 0, int 0), 
      -    i8* getelementptr ([1 x i8]* %str2, int 0, int 0), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    uint 1,
      -    {  }* cast (%llvm.dbg.basictype.type* %llvm.dbg.basictype to {  }*), 
      -    bool false, 
      -    bool true, 
      -    {  }* cast (int* %MyGlobal to {  }*) }, section "llvm.metadata"
      -    
      +!0 = metadata !{
      +  i32 458804,              ;; Tag
      +  i32 0,                   ;; Unused
      +  metadata !1,             ;; Context
      +  metadata !"MyGlobal",    ;; Name
      +  metadata !"MyGlobal",    ;; Display Name
      +  metadata !"MyGlobal",    ;; Linkage Name
      +  metadata !1,             ;; Compile Unit
      +  i32 1,                   ;; Line Number
      +  metadata !2,             ;; Type
      +  i1 false,                ;; Is a local variable
      +  i1 true,                 ;; Is this a definition
      +  i32* @MyGlobal           ;; The global variable
      +}
      +
       ;;
       ;; Define the basic type of 32 bit signed integer.  Note that since int is an
       ;; intrinsic type the source file is NULL and line 0.
       ;;    
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([4 x i8]* %str3, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 32, 
      -    uint 32, 
      -    uint 0, 
      -    uint 5 }, section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,              ;; Tag
      +  metadata !1,             ;; Context
      +  metadata !"int",         ;; Name
      +  metadata !1,             ;; Compile Unit
      +  i32 0,                   ;; Line number
      +  i64 32,                  ;; Size in Bits
      +  i64 32,                  ;; Align in Bits
      +  i64 0,                   ;; Offset in Bits
      +  i32 0,                   ;; Flags
      +  i32 5                    ;; Encoding
      +}
       
      -;;
      -;; Define the names of the global variable and basic type.
      -;;
      -%str1 = internal constant [9 x i8] c"MyGlobal\00", section "llvm.metadata"
      -%str2 = internal constant [1 x i8] c"\00", section "llvm.metadata"
      -%str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"
       
      @@ -1252,47 +1213,28 @@ int main(int argc, char *argv[]) {
      -;;
      -;; Define types used. One for subprogram anchors, one for the subprogram
      -;; descriptor, one for the global's basic type and one for the subprogram's
      -;; compile unit.
      -;;
      -%llvm.dbg.subprogram.type = type { uint, {  }*, {  }*, i8*, {  }*, bool, bool }
      -%llvm.dbg.anchor.type = type { uint, uint }
      -%llvm.dbg.compile_unit.type = ...
      -	
       ;;
       ;; Define the anchor for subprograms.  Note that the second field of the
       ;; anchor is 46, which is the same as the tag for subprograms
       ;; (46 = DW_TAG_subprogram.)
       ;;
      -%llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { uint 0, uint 46 }, section "llvm.metadata"
      -
      -;;
      -;; Define the descriptor for the subprogram.  TODO - more details.
      -;;
      -%llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type {
      -    uint add(uint 46, uint 262144), 
      -    {  }* cast (%llvm.dbg.anchor.type* %llvm.dbg.subprograms to {  }*), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([5 x i8]* %str1, int 0, int 0), 
      -    i8* getelementptr ([1 x i8]* %str2, int 0, int 0), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*),
      -    uint 1,
      -    {  }* null, 
      -    bool false, 
      -    bool true }, section "llvm.metadata"
      -
      -;;
      -;; Define the name of the subprogram.
      -;;
      -%str1 = internal constant [5 x i8] c"main\00", section "llvm.metadata"
      -%str2 = internal constant [1 x i8] c"\00", section "llvm.metadata"
      -
      +!0 = metadata !{
      +  i32 458798,        ;; Tag
      +  i32 0,             ;; Unused
      +  metadata !1,       ;; Context
      +  metadata !"main",  ;; Name
      +  metadata !"main",  ;; Display name
      +  metadata !"main",  ;; Linkage name
      +  metadata !1,       ;; Compile unit
      +  i32 1,             ;; Line number
      +  metadata !2,       ;; Type
      +  i1 false,          ;; Is local 
      +  i1 true            ;; Is definition
      +}
       ;;
       ;; Define the subprogram itself.
       ;;
      -int %main(int %argc, i8** %argv) {
      +define i32 @main(i32 %argc, i8** %argv) {
       ...
       }
       
      @@ -1320,17 +1262,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([5 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 32, 
      -    uint 32, 
      -    uint 0, 
      -    uint 2 }, section "llvm.metadata"
      -%str1 = internal constant [5 x i8] c"bool\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"bool",  ;; Name
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 8,             ;; Size in Bits
      +  i64 8,             ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 2              ;; Encoding
      +}
       
      @@ -1345,17 +1288,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([5 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 8, 
      -    uint 8, 
      -    uint 0, 
      -    uint 6 }, section "llvm.metadata"
      -%str1 = internal constant [5 x i8] c"char\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"char",  ;; Name
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 8,             ;; Size in Bits
      +  i64 8,             ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 6              ;; Encoding
      +}
       
      @@ -1370,17 +1314,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([14 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 8, 
      -    uint 8, 
      -    uint 0, 
      -    uint 8 }, section "llvm.metadata"
      -%str1 = internal constant [14 x i8] c"unsigned char\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"unsigned char", 
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 8,             ;; Size in Bits
      +  i64 8,             ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 8              ;; Encoding
      +}
       
      @@ -1395,17 +1340,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([10 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 16, 
      -    uint 16, 
      -    uint 0, 
      -    uint 5 }, section "llvm.metadata"
      -%str1 = internal constant [10 x i8] c"short int\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"short int",
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 16,            ;; Size in Bits
      +  i64 16,            ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 5              ;; Encoding
      +}
       
      @@ -1420,17 +1366,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([19 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 16, 
      -    uint 16, 
      -    uint 0, 
      -    uint 7 }, section "llvm.metadata"
      -%str1 = internal constant [19 x i8] c"short unsigned int\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"short unsigned int",
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 16,            ;; Size in Bits
      +  i64 16,            ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 7              ;; Encoding
      +}
       
      @@ -1445,17 +1392,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([4 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 32, 
      -    uint 32, 
      -    uint 0, 
      -    uint 5 }, section "llvm.metadata"
      -%str1 = internal constant [4 x i8] c"int\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"int",   ;; Name
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 32,            ;; Size in Bits
      +  i64 32,            ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 5              ;; Encoding
      +}
       
      @@ -1469,17 +1417,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([13 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 32, 
      -    uint 32, 
      -    uint 0, 
      -    uint 7 }, section "llvm.metadata"
      -%str1 = internal constant [13 x i8] c"unsigned int\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"unsigned int",
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 32,            ;; Size in Bits
      +  i64 32,            ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 7              ;; Encoding
      +}
       
      @@ -1494,17 +1443,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([14 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 64, 
      -    uint 64, 
      -    uint 0, 
      -    uint 5 }, section "llvm.metadata"
      -%str1 = internal constant [14 x i8] c"long long int\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"long long int",
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 64,            ;; Size in Bits
      +  i64 64,            ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 5              ;; Encoding
      +}
       
      @@ -1519,17 +1469,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([23 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 64, 
      -    uint 64, 
      -    uint 0, 
      -    uint 7 }, section "llvm.metadata"
      -%str1 = internal constant [23 x 8] c"long long unsigned int\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"long long unsigned int",
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 64,            ;; Size in Bits
      +  i64 64,            ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 7              ;; Encoding
      +}
       
      @@ -1544,17 +1495,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    i8* getelementptr ([6 x i8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 32, 
      -    uint 32, 
      -    uint 0, 
      -    uint 4 }, section "llvm.metadata"
      -%str1 = internal constant [6 x i8] c"float\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"float",
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 32,            ;; Size in Bits
      +  i64 32,            ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 4              ;; Encoding
      +}
       
      @@ -1569,17 +1521,18 @@ int %main(int %argc, i8** %argv) {
      -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
      -    uint add(uint 36, uint 262144), 
      -    {  }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to {  }*), 
      -    8* getelementptr ([7 x 8]* %str1, int 0, int 0), 
      -    {  }* null, 
      -    int 0, 
      -    uint 64, 
      -    uint 64, 
      -    uint 0, 
      -    uint 4 }, section "llvm.metadata"
      -%str1 = internal constant [7 x 8] c"double\00", section "llvm.metadata"
      +!2 = metadata !{
      +  i32 458788,        ;; Tag
      +  metadata !1,       ;; Context
      +  metadata !"double",;; Name
      +  metadata !1,       ;; Compile Unit
      +  i32 0,             ;; Line number
      +  i64 64,            ;; Size in Bits
      +  i64 64,            ;; Align in Bits
      +  i64 0,             ;; Offset in Bits
      +  i32 0,             ;; Flags
      +  i32 4              ;; Encoding
      +}
       
      @@ -1607,60 +1560,64 @@ typedef const int *IntPtr; ;; ;; Define the typedef "IntPtr". ;; -%llvm.dbg.derivedtype1 = internal constant %llvm.dbg.derivedtype.type { - uint add(uint 22, uint 262144), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - i8* getelementptr ([7 x 8]* %str1, int 0, int 0), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - int 1, - uint 0, - uint 0, - uint 0, - { }* cast (%llvm.dbg.derivedtype.type* %llvm.dbg.derivedtype2 to { }*) }, section "llvm.metadata" -%str1 = internal constant [7 x 8] c"IntPtr\00", section "llvm.metadata" +!2 = metadata !{ + i32 458774, ;; Tag + metadata !1, ;; Context + metadata !"IntPtr", ;; Name + metadata !3, ;; Compile unit + i32 0, ;; Line number + i64 0, ;; Size in bits + i64 0, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + metadata !4 ;; Derived From type +} ;; ;; Define the pointer type. ;; -%llvm.dbg.derivedtype2 = internal constant %llvm.dbg.derivedtype.type { - uint add(uint 15, uint 262144), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - i8* null, - { }* null, - int 0, - uint 32, - uint 32, - uint 0, - { }* cast (%llvm.dbg.derivedtype.type* %llvm.dbg.derivedtype3 to { }*) }, section "llvm.metadata" - +!4 = metadata !{ + i32 458767, ;; Tag + metadata !1, ;; Context + metadata !"", ;; Name + metadata !1, ;; Compile unit + i32 0, ;; Line number + i64 64, ;; Size in bits + i64 64, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + metadata !5 ;; Derived From type +} ;; ;; Define the const type. ;; -%llvm.dbg.derivedtype3 = internal constant %llvm.dbg.derivedtype.type { - uint add(uint 38, uint 262144), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - i8* null, - { }* null, - int 0, - uint 0, - uint 0, - uint 0, - { }* cast (%llvm.dbg.basictype.type* %llvm.dbg.basictype1 to { }*) }, section "llvm.metadata" - +!5 = metadata !{ + i32 458790, ;; Tag + metadata !1, ;; Context + metadata !"", ;; Name + metadata !1, ;; Compile unit + i32 0, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + metadata !6 ;; Derived From type +} ;; ;; Define the int type. ;; -%llvm.dbg.basictype1 = internal constant %llvm.dbg.basictype.type { - uint add(uint 36, uint 262144), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - 8* getelementptr ([4 x 8]* %str2, int 0, int 0), - { }* null, - int 0, - uint 32, - uint 32, - uint 0, - uint 5 }, section "llvm.metadata" -%str2 = internal constant [4 x 8] c"int\00", section "llvm.metadata" +!6 = metadata !{ + i32 458788, ;; Tag + metadata !1, ;; Context + metadata !"int", ;; Name + metadata !1, ;; Compile unit + i32 0, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + 5 ;; Encoding +}
      @@ -1692,86 +1649,88 @@ struct Color { ;; ;; Define basic type for unsigned int. ;; -%llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { - uint add(uint 36, uint 262144), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - i8* getelementptr ([13 x i8]* %str1, int 0, int 0), - { }* null, - int 0, - uint 32, - uint 32, - uint 0, - uint 7 }, section "llvm.metadata" -%str1 = internal constant [13 x i8] c"unsigned int\00", section "llvm.metadata" - +!5 = metadata !{ + i32 458788, ;; Tag + metadata !1, ;; Context + metadata !"unsigned int", + metadata !1, ;; Compile Unit + i32 0, ;; Line number + i64 32, ;; Size in Bits + i64 32, ;; Align in Bits + i64 0, ;; Offset in Bits + i32 0, ;; Flags + i32 7 ;; Encoding +} ;; ;; Define composite type for struct Color. ;; -%llvm.dbg.compositetype = internal constant %llvm.dbg.compositetype.type { - uint add(uint 19, uint 262144), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - i8* getelementptr ([6 x i8]* %str2, int 0, int 0), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - int 1, - uint 96, - uint 32, - uint 0, - { }* null, - { }* cast ([3 x { }*]* %llvm.dbg.array to { }*) }, section "llvm.metadata" -%str2 = internal constant [6 x i8] c"Color\00", section "llvm.metadata" +!2 = metadata !{ + i32 458771, ;; Tag + metadata !1, ;; Context + metadata !"Color", ;; Name + metadata !1, ;; Compile unit + i32 1, ;; Line number + i64 96, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + null, ;; Derived From + metadata !3, ;; Elements + i32 0 ;; Runtime Language +} ;; ;; Define the Red field. ;; -%llvm.dbg.derivedtype1 = internal constant %llvm.dbg.derivedtype.type { - uint add(uint 13, uint 262144), - { }* null, - i8* getelementptr ([4 x i8]* %str3, int 0, int 0), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - int 2, - uint 32, - uint 32, - uint 0, - { }* cast (%llvm.dbg.basictype.type* %llvm.dbg.basictype to { }*) }, section "llvm.metadata" -%str3 = internal constant [4 x i8] c"Red\00", section "llvm.metadata" +!4 = metadata !{ + i32 458765, ;; Tag + metadata !1, ;; Context + metadata !"Red", ;; Name + metadata !1, ;; Compile Unit + i32 2, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + metadata !5 ;; Derived From type +} ;; ;; Define the Green field. ;; -%llvm.dbg.derivedtype2 = internal constant %llvm.dbg.derivedtype.type { - uint add(uint 13, uint 262144), - { }* null, - i8* getelementptr ([6 x i8]* %str4, int 0, int 0), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - int 3, - uint 32, - uint 32, - uint 32, - { }* cast (%llvm.dbg.basictype.type* %llvm.dbg.basictype to { }*) }, section "llvm.metadata" -%str4 = internal constant [6 x i8] c"Green\00", section "llvm.metadata" +!6 = metadata !{ + i32 458765, ;; Tag + metadata !1, ;; Context + metadata !"Green", ;; Name + metadata !1, ;; Compile Unit + i32 3, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 32, ;; Offset in bits + i32 0, ;; Flags + metadata !5 ;; Derived From type +} ;; ;; Define the Blue field. ;; -%llvm.dbg.derivedtype3 = internal constant %llvm.dbg.derivedtype.type { - uint add(uint 13, uint 262144), - { }* null, - i8* getelementptr ([5 x i8]* %str5, int 0, int 0), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - int 4, - uint 32, - uint 32, - uint 64, - { }* cast (%llvm.dbg.basictype.type* %llvm.dbg.basictype to { }*) }, section "llvm.metadata" -%str5 = internal constant [5 x 8] c"Blue\00", section "llvm.metadata" +!7 = metadata !{ + i32 458765, ;; Tag + metadata !1, ;; Context + metadata !"Blue", ;; Name + metadata !1, ;; Compile Unit + i32 4, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 64, ;; Offset in bits + i32 0, ;; Flags + metadata !5 ;; Derived From type +} ;; ;; Define the array of fields used by the composite type Color. ;; -%llvm.dbg.array = internal constant [3 x { }*] [ - { }* cast (%llvm.dbg.derivedtype.type* %llvm.dbg.derivedtype1 to { }*), - { }* cast (%llvm.dbg.derivedtype.type* %llvm.dbg.derivedtype2 to { }*), - { }* cast (%llvm.dbg.derivedtype.type* %llvm.dbg.derivedtype3 to { }*) ], section "llvm.metadata" +!3 = metadata !{metadata !4, metadata !6, metadata !7}
      @@ -1803,53 +1762,41 @@ enum Trees { ;; ;; Define composite type for enum Trees ;; -%llvm.dbg.compositetype = internal constant %llvm.dbg.compositetype.type { - uint add(uint 4, uint 262144), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - i8* getelementptr ([6 x i8]* %str1, int 0, int 0), - { }* cast (%llvm.dbg.compile_unit.type* %llvm.dbg.compile_unit to { }*), - int 1, - uint 32, - uint 32, - uint 0, - { }* null, - { }* cast ([3 x { }*]* %llvm.dbg.array to { }*) }, section "llvm.metadata" -%str1 = internal constant [6 x i8] c"Trees\00", section "llvm.metadata" +!2 = metadata !{ + i32 458756, ;; Tag + metadata !1, ;; Context + metadata !"Trees", ;; Name + metadata !1, ;; Compile unit + i32 1, ;; Line number + i64 32, ;; Size in bits + i64 32, ;; Align in bits + i64 0, ;; Offset in bits + i32 0, ;; Flags + null, ;; Derived From type + metadata !3, ;; Elements + i32 0 ;; Runtime language +} + +;; +;; Define the array of enumerators used by composite type Trees. +;; +!3 = metadata !{metadata !4, metadata !5, metadata !6} ;; ;; Define Spruce enumerator. ;; -%llvm.dbg.enumerator1 = internal constant %llvm.dbg.enumerator.type { - uint add(uint 40, uint 262144), - i8* getelementptr ([7 x i8]* %str2, int 0, int 0), - int 100 }, section "llvm.metadata" -%str2 = internal constant [7 x i8] c"Spruce\00", section "llvm.metadata" +!4 = metadata !{i32 458792, metadata !"Spruce", i64 100} ;; ;; Define Oak enumerator. ;; -%llvm.dbg.enumerator2 = internal constant %llvm.dbg.enumerator.type { - uint add(uint 40, uint 262144), - i8* getelementptr ([4 x i8]* %str3, int 0, int 0), - int 200 }, section "llvm.metadata" -%str3 = internal constant [4 x i8] c"Oak\00", section "llvm.metadata" +!5 = metadata !{i32 458792, metadata !"Oak", i64 200} ;; ;; Define Maple enumerator. ;; -%llvm.dbg.enumerator3 = internal constant %llvm.dbg.enumerator.type { - uint add(uint 40, uint 262144), - i8* getelementptr ([6 x i8]* %str4, int 0, int 0), - int 300 }, section "llvm.metadata" -%str4 = internal constant [6 x i8] c"Maple\00", section "llvm.metadata" +!6 = metadata !{i32 458792, metadata !"Maple", i64 300} -;; -;; Define the array of enumerators used by composite type Trees. -;; -%llvm.dbg.array = internal constant [3 x { }*] [ - { }* cast (%llvm.dbg.enumerator.type* %llvm.dbg.enumerator1 to { }*), - { }* cast (%llvm.dbg.enumerator.type* %llvm.dbg.enumerator2 to { }*), - { }* cast (%llvm.dbg.enumerator.type* %llvm.dbg.enumerator3 to { }*) ], section "llvm.metadata"
      @@ -1866,7 +1813,7 @@ enum Trees { Chris Lattner
      LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-06-26 03:49:18 +0200 (Fri, 26 Jun 2009) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html index c4e85d1948d59..aca7e5d5f8743 100644 --- a/docs/SystemLibrary.html +++ b/docs/SystemLibrary.html @@ -23,7 +23,6 @@
    53. No Unused Functionality
    54. No Virtual Methods
    55. Minimize Soft Errors
    56. -
    57. Throw Only std::string
    58. No throw() Specifications
    59. Code Organization
    60. Consistent Semantics
    61. @@ -76,7 +75,7 @@ -
      Don't Inlcude System Headers +

      Except in lib/System, no LLVM source code should directly @@ -211,8 +210,8 @@ "out of space", "bad disk sector", or "system call interrupted". We'll call the first group "soft" errors and the second group "hard" errors.

      -

      lib/System must always attempt to minimize soft errors and always just - throw a std::string on hard errors. This is a design requirement because the +

      lib/System must always attempt to minimize soft errors. + This is a design requirement because the minimization of soft errors can affect the granularity and the nature of the interface. In general, if you find that you're wanting to throw soft errors, you must review the granularity of the interface because it is likely you're @@ -239,31 +238,6 @@

    - - -
    -

    If an error occurs that lib/System cannot handle, the only action taken by - lib/System is to throw an instance of std:string. The contents of the string - must explain both what happened and the context in which it happened. The - format of the string should be a (possibly empty) list of contexts each - terminated with a : and a space, followed by the error message, optionally - followed by a reason, and optionally followed by a suggestion.

    -

    For example, failure to open a file named "foo" could result in a message - like:

    -
    • foo: Unable to open file because it doesn't exist."
    -

    The "foo:" part is the context. The "Unable to open file" part is the error - message. The "because it doesn't exist." part is the reason. This message has - no suggestion. Where possible, the implementation of lib/System should use - operating system specific facilities for converting the error code returned by - a system call into an error message. This will help to make the error message - more familiar to users of that type of operating system.

    -

    Note that this requirement precludes the throwing of any other exceptions. - For example, various C++ standard library functions can cause exceptions to be - thrown (e.g. out of memory situation). In all cases, if there is a possibility - that non-string exceptions could be thrown, the lib/System library must ensure - that the exceptions are translated to std::string form.

    -
    - @@ -273,7 +247,8 @@ compiler does not insert additional exception handling code into the interface functions. This is a performance consideration: lib/System functions are at the bottom of many call chains and as such can be frequently called. We - need them to be as efficient as possible.

    + need them to be as efficient as possible. However, no routines in the + system library should actually throw exceptions.

    @@ -338,7 +313,7 @@ Reid Spencer
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2008-12-11 18:34:48 +0100 (Thu, 11 Dec 2008) $ + Last modified: $Date: 2009-07-17 23:11:24 +0200 (Fri, 17 Jul 2009) $ diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html index 568b5728850fd..7ae1ca489e32e 100644 --- a/docs/TableGenFundamentals.html +++ b/docs/TableGenFundamentals.html @@ -371,8 +371,8 @@ supported include:

    string value
    [{ ... }]
    code fragment
    -
    [ X, Y, Z ]
    -
    list value. is the type of the list +
    [ X, Y, Z ]<type>
    +
    list value. <type> is the type of the list element and is usually optional. In rare cases, TableGen is unable to deduce the element type in which case the user must specify it explicitly.
    @@ -398,23 +398,16 @@ which case the user must specify it explicitly.
    a dag value. The first element is required to be a record definition, the remaining elements in the list may be arbitrary other values, including nested `dag' values.
    -
    (implicit a)
    -
    an implicitly defined physical register. This tells the dag instruction - selection emitter the input pattern's extra definitions matches implicit - physical register definitions.
    -
    (parallel (a), (b))
    -
    a list of dags specifying parallel operations which map to the same - instruction.
    !strconcat(a, b)
    A string value that is the result of concatenating the 'a' and 'b' strings.
    -
    !cast(a)
    +
    !cast<type>(a)
    A symbol of type type obtained by looking up the string 'a' in the symbol table. If the type of 'a' does not match type, TableGen -aborts with an error. !cast is a special case in that the argument must +aborts with an error. !cast<string> is a special case in that the argument must be an object defined by a 'def' construct.
    !nameconcat<type>(a, b)
    -
    Shorthand for !cast(!strconcat(a, b))
    +
    Shorthand for !cast<type>(!strconcat(a, b))
    !subst(a, b, c)
    If 'a' and 'b' are of string type or are symbol references, substitute 'b' for 'a' in 'c.' This operation is analogous to $(subst) in GNU make.
    @@ -759,6 +752,25 @@ opened, as in the case with the CALL* instructions above.

    + + + + +

    Expressions used by code generator to describe instructions and isel +patterns:

    + +
    + +
    (implicit a)
    +
    an implicitly defined physical register. This tells the dag instruction + selection emitter the input pattern's extra definitions matches implicit + physical register definitions.
    +
    (parallel (a), (b))
    +
    a list of dags specifying parallel operations which map to the same + instruction.
    + +
    + @@ -782,7 +794,7 @@ This should highlight the APIs in TableGen/Record.h.

    Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-06-29 22:05:29 +0200 (Mon, 29 Jun 2009) $ + Last modified: $Date: 2009-10-05 04:51:06 +0200 (Mon, 05 Oct 2009) $ diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html index 32b16cadf47ae..4f05d77daaed5 100644 --- a/docs/TestingGuide.html +++ b/docs/TestingGuide.html @@ -29,6 +29,7 @@
  • DejaGNU structure @@ -448,7 +449,257 @@ negatives).

    - + + + +
    + +

    A powerful feature of the RUN: lines is that it allows any arbitrary commands + to be executed as part of the test harness. While standard (portable) unix + tools like 'grep' work fine on run lines, as you see above, there are a lot + of caveats due to interaction with Tcl syntax, and we want to make sure the + run lines are portable to a wide range of systems. Another major problem is + that grep is not very good at checking to verify that the output of a tools + contains a series of different output in a specific order. The FileCheck + tool was designed to help with these problems.

    + +

    FileCheck (whose basic command line arguments are described in the FileCheck man page is + designed to read a file to check from standard input, and the set of things + to verify from a file specified as a command line argument. A simple example + of using FileCheck from a RUN line looks like this:

    + +
    +
    +; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
    +
    +
    + +

    This syntax says to pipe the current file ("%s") into llvm-as, pipe that into +llc, then pipe the output of llc into FileCheck. This means that FileCheck will +be verifying its standard input (the llc output) against the filename argument +specified (the original .ll file specified by "%s"). To see how this works, +lets look at the rest of the .ll file (after the RUN line):

    + +
    +
    +define void @sub1(i32* %p, i32 %v) {
    +entry:
    +; CHECK: sub1:
    +; CHECK: subl
    +        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
    +        ret void
    +}
    +
    +define void @inc4(i64* %p) {
    +entry:
    +; CHECK: inc4:
    +; CHECK: incq
    +        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
    +        ret void
    +}
    +
    +
    + +

    Here you can see some "CHECK:" lines specified in comments. Now you can see +how the file is piped into llvm-as, then llc, and the machine code output is +what we are verifying. FileCheck checks the machine code output to verify that +it matches what the "CHECK:" lines specify.

    + +

    The syntax of the CHECK: lines is very simple: they are fixed strings that +must occur in order. FileCheck defaults to ignoring horizontal whitespace +differences (e.g. a space is allowed to match a tab) but otherwise, the contents +of the CHECK: line is required to match some thing in the test file exactly.

    + +

    One nice thing about FileCheck (compared to grep) is that it allows merging +test cases together into logical groups. For example, because the test above +is checking for the "sub1:" and "inc4:" labels, it will not match unless there +is a "subl" in between those labels. If it existed somewhere else in the file, +that would not count: "grep subl" matches if subl exists anywhere in the +file.

    + +
    + + + + +
    + +

    The FileCheck -check-prefix option allows multiple test configurations to be +driven from one .ll file. This is useful in many circumstances, for example, +testing different architectural variants with llc. Here's a simple example:

    + +
    +
    +; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
    +; RUN:              | FileCheck %s -check-prefix=X32
    +; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \
    +; RUN:              | FileCheck %s -check-prefix=X64
    +
    +define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
    +        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
    +        ret <4 x i32> %tmp1
    +; X32: pinsrd_1:
    +; X32:    pinsrd $1, 4(%esp), %xmm0
    +
    +; X64: pinsrd_1:
    +; X64:    pinsrd $1, %edi, %xmm0
    +}
    +
    +
    + +

    In this case, we're testing that we get the expected code generation with +both 32-bit and 64-bit code generation.

    + +
    + + + + +
    + +

    Sometimes you want to match lines and would like to verify that matches +happen on exactly consequtive lines with no other lines in between them. In +this case, you can use CHECK: and CHECK-NEXT: directives to specify this. If +you specified a custom check prefix, just use "<PREFIX>-NEXT:". For +example, something like this works as you'd expect:

    + +
    +
    +define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) {
    +	%tmp3 = load <2 x double>* %A, align 16
    +	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
    +	%tmp9 = shufflevector <2 x double> %tmp3,
    +                              <2 x double> %tmp7,
    +                              <2 x i32> < i32 0, i32 2 >
    +	store <2 x double> %tmp9, <2 x double>* %r, align 16
    +	ret void
    +        
    +; CHECK: t2:
    +; CHECK: 	movl	8(%esp), %eax
    +; CHECK-NEXT: 	movapd	(%eax), %xmm0
    +; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
    +; CHECK-NEXT: 	movl	4(%esp), %eax
    +; CHECK-NEXT: 	movapd	%xmm0, (%eax)
    +; CHECK-NEXT: 	ret
    +}
    +
    +
    + +

    CHECK-NEXT: directives reject the input unless there is exactly one newline +between it an the previous directive. A CHECK-NEXT cannot be the first +directive in a file.

    + +
    + + + + +
    + +

    The CHECK-NOT: directive is used to verify that a string doesn't occur +between two matches (or the first match and the beginning of the file). For +example, to verify that a load is removed by a transformation, a test like this +can be used:

    + +
    +
    +define i8 @coerce_offset0(i32 %V, i32* %P) {
    +  store i32 %V, i32* %P
    +   
    +  %P2 = bitcast i32* %P to i8*
    +  %P3 = getelementptr i8* %P2, i32 2
    +
    +  %A = load i8* %P3
    +  ret i8 %A
    +; CHECK: @coerce_offset0
    +; CHECK-NOT: load
    +; CHECK: ret i8
    +}
    +
    +
    + +
    + + + + +
    + +

    The CHECK: and CHECK-NOT: directives both take a pattern to match. For most +uses of FileCheck, fixed string matching is perfectly sufficient. For some +things, a more flexible form of matching is desired. To support this, FileCheck +allows you to specify regular expressions in matching strings, surrounded by +double braces: {{yourregex}}. Because we want to use fixed string +matching for a majority of what we do, FileCheck has been designed to support +mixing and matching fixed string matching with regular expressions. This allows +you to write things like this:

    + +
    +
    +; CHECK: movhpd	{{[0-9]+}}(%esp), {{%xmm[0-7]}}
    +
    +
    + +

    In this case, any offset from the ESP register will be allowed, and any xmm +register will be allowed.

    + +

    Because regular expressions are enclosed with double braces, they are +visually distinct, and you don't need to use escape characters within the double +braces like you would in C. In the rare case that you want to match double +braces explicitly from the input, you can use something ugly like +{{[{][{]}} as your pattern.

    + +
    + + + + +
    + +

    It is often useful to match a pattern and then verify that it occurs again +later in the file. For codegen tests, this can be useful to allow any register, +but verify that that register is used consistently later. To do this, FileCheck +allows named variables to be defined and substituted into patterns. Here is a +simple example:

    + +
    +
    +; CHECK: test5:
    +; CHECK:    notw	[[REGISTER:%[a-z]+]]
    +; CHECK:    andw	{{.*}}[[REGISTER]]
    +
    +
    + +

    The first check line matches a regex (%[a-z]+) and captures it into +the variables "REGISTER". The second line verifies that whatever is in REGISTER +occurs later in the file after an "andw". FileCheck variable references are +always contained in [[ ]] pairs, are named, and their names can be +formed with the regex "[a-zA-Z][a-zA-Z0-9]*". If a colon follows the +name, then it is a definition of the variable, if not, it is a use.

    + +

    FileCheck variables can be defined multiple times, and uses always get the +latest value. Note that variables are all read at the start of a "CHECK" line +and are all defined at the end. This means that if you have something like +"CHECK: [[XYZ:.*]]x[[XYZ]]" that the check line will read the previous +value of the XYZ variable and define a new one after the match is performed. If +you need to do something like this you can probably take advantage of the fact +that FileCheck is not actually line-oriented when it matches, this allows you to +define two separate CHECK lines that match on the same line. +

    + + + +
    + + +

    With a RUN line there are a number of substitutions that are permitted. In @@ -502,14 +753,6 @@ negatives).

    The target triplet that corresponds to the current host machine (the one running the test cases). This should probably be called "host".
    -
    prcontext (%prcontext)
    -
    Path to the prcontext tcl script that prints some context around a - line that matches a pattern. This isn't strictly necessary as the test suite - is run with its PATH altered to include the test/Scripts directory where - the prcontext script is located. Note that this script is similar to - grep -C but you should use the prcontext script because - not all platforms support grep -C.
    -
    llvmgcc (%llvmgcc)
    The full path to the llvm-gcc executable as specified in the configured LLVM environment
    @@ -974,7 +1217,7 @@ know. Thanks!

    John T. Criswell, Reid Spencer, and Tanya Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-06-26 07:44:53 +0200 (Fri, 26 Jun 2009) $ + Last modified: $Date: 2009-09-27 10:01:44 +0200 (Sun, 27 Sep 2009) $ diff --git a/docs/UsingLibraries.html b/docs/UsingLibraries.html index 7458ecad9ec41..c6bcaf09a8bfd 100644 --- a/docs/UsingLibraries.html +++ b/docs/UsingLibraries.html @@ -128,8 +128,6 @@ Code generation for ARM architecture LLVMCBackend.o 'C' language code generator. - LLVMIA64.o - Code generation for IA64 architecture LLVMPowerPC.o Code generation for PowerPC architecture LLVMSparc.o @@ -356,14 +354,6 @@
  • libLLVMSystem.a
  • libLLVMTarget.a
  • -
    LLVMIA64.o
      -
    • libLLVMCodeGen.a
    • -
    • libLLVMCore.a
    • -
    • libLLVMSelectionDAG.a
    • -
    • libLLVMSupport.a
    • -
    • libLLVMSystem.a
    • -
    • libLLVMTarget.a
    • -
    LLVMInterpreter.o
    • LLVMExecutionEngine.o
    • libLLVMCodeGen.a
    • @@ -442,7 +432,7 @@ Reid Spencer The LLVM Compiler Infrastructure -
      Last modified: $Date: 2008-12-11 19:23:24 +0100 (Thu, 11 Dec 2008) $ +
      Last modified: $Date: 2009-07-24 02:30:09 +0200 (Fri, 24 Jul 2009) $ + + + +
      + +

      +You must also register your target with the TargetRegistry, which is +what other LLVM tools use to be able to lookup and use your target at +runtime. The TargetRegistry can be used directly, but for most targets +there are helper templates which should take care of the work for you.

      + +

      +All targets should declare a global Target object which is used to +represent the target during registration. Then, in the target's TargetInfo +library, the target should define that object and use +the RegisterTarget template to register the target. For example, the Sparc registration code looks like this: +

      + +
      +
      +Target llvm::TheSparcTarget;
      +
      +extern "C" void LLVMInitializeSparcTargetInfo() { 
      +  RegisterTarget<Triple::sparc, /*HasJIT=*/false>
      +    X(TheSparcTarget, "sparc", "Sparc");
      +}
      +
      +
      +

      -You must also register your target using the RegisterTarget -template. (See the TargetMachineRegistry class.) For example, -in SparcTargetMachine.cpp, the target is registered with: +This allows the TargetRegistry to look up the target by name or by +target triple. In addition, most targets will also register additional features +which are available in separate libraries. These registration steps are +separate, because some clients may wish to only link in some parts of the target +-- the JIT code generator does not require the use of the assembler printer, for +example. Here is an example of registering the Sparc assembly printer:

      -namespace {
      -  // Register the target.
      -  RegisterTarget<SparcTargetMachine>X("sparc", "SPARC");
      +extern "C" void LLVMInitializeSparcAsmPrinter() { 
      +  RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
       }
       
      +

      +For more information, see +"llvm/Target/TargetRegistry.h". +

      +
      @@ -2038,8 +2080,8 @@ SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) {

      The X86 assembly printer implementation (X86TargetAsmInfo) is an -example where the target specific TargetAsmInfo class uses overridden -methods: ExpandInlineAsm and PreferredEHDataFormat. +example where the target specific TargetAsmInfo class uses an +overridden methods: ExpandInlineAsm.

      @@ -2122,9 +2164,7 @@ in XXXGenAsmWriter.inc contains an implementation of the The implementations of printDeclare, printImplicitDef, printInlineAsm, and printLabel in AsmPrinter.cpp are generally adequate for printing assembly and do not need to be -overridden. (printBasicBlockLabel is another method that is implemented -in AsmPrinter.cpp that may be directly used in an implementation of -XXXAsmPrinter.) +overridden.

      @@ -2523,7 +2563,7 @@ with assembler. Mason Woo and Misha Brukman
      The LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-04-05 02:44:06 +0200 (Sun, 05 Apr 2009) $ + Last modified: $Date: 2009-09-13 00:57:37 +0200 (Sun, 13 Sep 2009) $ diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html index dd8b41d121b62..218f8ef90d435 100644 --- a/docs/WritingAnLLVMPass.html +++ b/docs/WritingAnLLVMPass.html @@ -179,7 +179,7 @@ source tree in the lib/Transforms/Hello directory.

       # Makefile for hello pass
       
      -# Path to top level of LLVM heirarchy
      +# Path to top level of LLVM hierarchy
       LEVEL = ../../..
       
       # Name of the library to build
      @@ -223,12 +223,14 @@ Start out with:

       #include "llvm/Pass.h"
       #include "llvm/Function.h"
      +#include "llvm/Support/raw_ostream.h"
       

      Which are needed because we are writing a Pass, and +href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass, we are operating on Function's.

      +href="http://llvm.org/doxygen/classllvm_1_1Function.html">Function's, +and we will be doing some printing.

      Next we have:

      @@ -273,7 +275,7 @@ avoid using expensive C++ runtime information.

           virtual bool runOnFunction(Function &F) {
      -      llvm::cerr << "Hello: " << F.getName() << "\n";
      +      errs() << "Hello: " << F.getName() << "\n";
             return false;
           }
         };  // end of struct Hello
      @@ -312,6 +314,7 @@ is supplied as fourth argument. 

       #include "llvm/Pass.h"
       #include "llvm/Function.h"
      +#include "llvm/Support/raw_ostream.h"
       
       using namespace llvm;
       
      @@ -322,7 +325,7 @@ is supplied as fourth argument. 

      Hello() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - llvm::cerr << "Hello: " << F.getName() << "\n"; + errs() << "Hello: " << F.getName() << "\n"; return false; } }; @@ -450,7 +453,7 @@ available, from the most general to the most specific.

      When choosing a superclass for your Pass, you should choose the most specific class possible, while still being able to meet the requirements listed. This gives the LLVM Pass Infrastructure information necessary to -optimize how passes are run, so that the resultant compiler isn't unneccesarily +optimize how passes are run, so that the resultant compiler isn't unnecessarily slow.

      @@ -489,7 +492,7 @@ invalidated, and are never "run".

      href="http://llvm.org/doxygen/classllvm_1_1ModulePass.html">ModulePass" class is the most general of all superclasses that you can use. Deriving from ModulePass indicates that your pass uses the entire program as a unit, -refering to function bodies in no predictable order, or adding and removing +referring to function bodies in no predictable order, or adding and removing functions. Because nothing is known about the behavior of ModulePass subclasses, no optimization can be done for their execution.

      @@ -497,7 +500,7 @@ subclasses, no optimization can be done for their execution.

      the getAnalysis interface getAnalysis<DominatorTree>(llvm::Function *) to provide the function to retrieve analysis result for, if the function pass does not require -any module passes. Note that this can only be done for functions for which the +any module or immutable passes. Note that this can only be done for functions for which the analysis ran, e.g. in the case of dominators you should only ask for the DominatorTree for function definitions, not declarations.

      @@ -1826,7 +1829,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.

      Chris Lattner
      The LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-07-02 01:38:44 +0200 (Thu, 02 Jul 2009) $ + Last modified: $Date: 2009-10-12 16:46:08 +0200 (Mon, 12 Oct 2009) $ diff --git a/docs/index.html b/docs/index.html index 00d48ae37c57d..36ed0e2d9f860 100644 --- a/docs/index.html +++ b/docs/index.html @@ -41,13 +41,13 @@ @@ -233,6 +233,9 @@ the linker and its design
    • The LLVM gold plugin - How to build your programs with link-time optimization on Linux.
    • + +
    • The GDB JIT interface - How to debug +JITed code with GDB.
    @@ -278,7 +281,7 @@ times each day, making it a high volume list. src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-03-02 00:42:51 +0100 (Mon, 02 Mar 2009) $ + Last modified: $Date: 2009-09-21 04:34:59 +0200 (Mon, 21 Sep 2009) $ diff --git a/docs/re_format.7 b/docs/re_format.7 new file mode 100644 index 0000000000000..0c0928716f49c --- /dev/null +++ b/docs/re_format.7 @@ -0,0 +1,756 @@ +.\" $OpenBSD: re_format.7,v 1.14 2007/05/31 19:19:30 jmc Exp $ +.\" +.\" Copyright (c) 1997, Phillip F Knaack. All rights reserved. +.\" +.\" Copyright (c) 1992, 1993, 1994 Henry Spencer. +.\" Copyright (c) 1992, 1993, 1994 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Henry Spencer. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)re_format.7 8.3 (Berkeley) 3/20/94 +.\" +.Dd $Mdocdate: May 31 2007 $ +.Dt RE_FORMAT 7 +.Os +.Sh NAME +.Nm re_format +.Nd POSIX regular expressions +.Sh DESCRIPTION +Regular expressions (REs), +as defined in +.St -p1003.1-2004 , +come in two forms: +basic regular expressions +(BREs) +and extended regular expressions +(EREs). +Both forms of regular expressions are supported +by the interfaces described in +.Xr regex 3 . +Applications dealing with regular expressions +may use one or the other form +(or indeed both). +For example, +.Xr ed 1 +uses BREs, +whilst +.Xr egrep 1 +talks EREs. +Consult the manual page for the specific application to find out which +it uses. +.Pp +POSIX leaves some aspects of RE syntax and semantics open; +.Sq ** +marks decisions on these aspects that +may not be fully portable to other POSIX implementations. +.Pp +This manual page first describes regular expressions in general, +specifically extended regular expressions, +and then discusses differences between them and basic regular expressions. +.Sh EXTENDED REGULAR EXPRESSIONS +An ERE is one** or more non-empty** +.Em branches , +separated by +.Sq \*(Ba . +It matches anything that matches one of the branches. +.Pp +A branch is one** or more +.Em pieces , +concatenated. +It matches a match for the first, followed by a match for the second, etc. +.Pp +A piece is an +.Em atom +possibly followed by a single** +.Sq * , +.Sq + , +.Sq ?\& , +or +.Em bound . +An atom followed by +.Sq * +matches a sequence of 0 or more matches of the atom. +An atom followed by +.Sq + +matches a sequence of 1 or more matches of the atom. +An atom followed by +.Sq ?\& +matches a sequence of 0 or 1 matches of the atom. +.Pp +A bound is +.Sq { +followed by an unsigned decimal integer, +possibly followed by +.Sq ,\& +possibly followed by another unsigned decimal integer, +always followed by +.Sq } . +The integers must lie between 0 and +.Dv RE_DUP_MAX +(255**) inclusive, +and if there are two of them, the first may not exceed the second. +An atom followed by a bound containing one integer +.Ar i +and no comma matches +a sequence of exactly +.Ar i +matches of the atom. +An atom followed by a bound +containing one integer +.Ar i +and a comma matches +a sequence of +.Ar i +or more matches of the atom. +An atom followed by a bound +containing two integers +.Ar i +and +.Ar j +matches a sequence of +.Ar i +through +.Ar j +(inclusive) matches of the atom. +.Pp +An atom is a regular expression enclosed in +.Sq () +(matching a part of the regular expression), +an empty set of +.Sq () +(matching the null string)**, +a +.Em bracket expression +(see below), +.Sq .\& +(matching any single character), +.Sq ^ +(matching the null string at the beginning of a line), +.Sq $ +(matching the null string at the end of a line), +a +.Sq \e +followed by one of the characters +.Sq ^.[$()|*+?{\e +(matching that character taken as an ordinary character), +a +.Sq \e +followed by any other character** +(matching that character taken as an ordinary character, +as if the +.Sq \e +had not been present**), +or a single character with no other significance (matching that character). +A +.Sq { +followed by a character other than a digit is an ordinary character, +not the beginning of a bound**. +It is illegal to end an RE with +.Sq \e . +.Pp +A bracket expression is a list of characters enclosed in +.Sq [] . +It normally matches any single character from the list (but see below). +If the list begins with +.Sq ^ , +it matches any single character +.Em not +from the rest of the list +(but see below). +If two characters in the list are separated by +.Sq - , +this is shorthand for the full +.Em range +of characters between those two (inclusive) in the +collating sequence, e.g.\& +.Sq [0-9] +in ASCII matches any decimal digit. +It is illegal** for two ranges to share an endpoint, e.g.\& +.Sq a-c-e . +Ranges are very collating-sequence-dependent, +and portable programs should avoid relying on them. +.Pp +To include a literal +.Sq ]\& +in the list, make it the first character +(following a possible +.Sq ^ ) . +To include a literal +.Sq - , +make it the first or last character, +or the second endpoint of a range. +To use a literal +.Sq - +as the first endpoint of a range, +enclose it in +.Sq [. +and +.Sq .] +to make it a collating element (see below). +With the exception of these and some combinations using +.Sq [ +(see next paragraphs), +all other special characters, including +.Sq \e , +lose their special significance within a bracket expression. +.Pp +Within a bracket expression, a collating element +(a character, +a multi-character sequence that collates as if it were a single character, +or a collating-sequence name for either) +enclosed in +.Sq [. +and +.Sq .] +stands for the sequence of characters of that collating element. +The sequence is a single element of the bracket expression's list. +A bracket expression containing a multi-character collating element +can thus match more than one character, +e.g. if the collating sequence includes a +.Sq ch +collating element, +then the RE +.Sq [[.ch.]]*c +matches the first five characters of +.Sq chchcc . +.Pp +Within a bracket expression, a collating element enclosed in +.Sq [= +and +.Sq =] +is an equivalence class, standing for the sequences of characters +of all collating elements equivalent to that one, including itself. +(If there are no other equivalent collating elements, +the treatment is as if the enclosing delimiters were +.Sq [. +and +.Sq .] . ) +For example, if +.Sq x +and +.Sq y +are the members of an equivalence class, +then +.Sq [[=x=]] , +.Sq [[=y=]] , +and +.Sq [xy] +are all synonymous. +An equivalence class may not** be an endpoint of a range. +.Pp +Within a bracket expression, the name of a +.Em character class +enclosed +in +.Sq [: +and +.Sq :] +stands for the list of all characters belonging to that class. +Standard character class names are: +.Bd -literal -offset indent +alnum digit punct +alpha graph space +blank lower upper +cntrl print xdigit +.Ed +.Pp +These stand for the character classes defined in +.Xr ctype 3 . +A locale may provide others. +A character class may not be used as an endpoint of a range. +.Pp +There are two special cases** of bracket expressions: +the bracket expressions +.Sq [[:<:]] +and +.Sq [[:>:]] +match the null string at the beginning and end of a word, respectively. +A word is defined as a sequence of +characters starting and ending with a word character +which is neither preceded nor followed by +word characters. +A word character is an +.Em alnum +character (as defined by +.Xr ctype 3 ) +or an underscore. +This is an extension, +compatible with but not specified by POSIX, +and should be used with +caution in software intended to be portable to other systems. +.Pp +In the event that an RE could match more than one substring of a given +string, +the RE matches the one starting earliest in the string. +If the RE could match more than one substring starting at that point, +it matches the longest. +Subexpressions also match the longest possible substrings, subject to +the constraint that the whole match be as long as possible, +with subexpressions starting earlier in the RE taking priority over +ones starting later. +Note that higher-level subexpressions thus take priority over +their lower-level component subexpressions. +.Pp +Match lengths are measured in characters, not collating elements. +A null string is considered longer than no match at all. +For example, +.Sq bb* +matches the three middle characters of +.Sq abbbc ; +.Sq (wee|week)(knights|nights) +matches all ten characters of +.Sq weeknights ; +when +.Sq (.*).* +is matched against +.Sq abc , +the parenthesized subexpression matches all three characters; +and when +.Sq (a*)* +is matched against +.Sq bc , +both the whole RE and the parenthesized subexpression match the null string. +.Pp +If case-independent matching is specified, +the effect is much as if all case distinctions had vanished from the +alphabet. +When an alphabetic that exists in multiple cases appears as an +ordinary character outside a bracket expression, it is effectively +transformed into a bracket expression containing both cases, +e.g.\& +.Sq x +becomes +.Sq [xX] . +When it appears inside a bracket expression, +all case counterparts of it are added to the bracket expression, +so that, for example, +.Sq [x] +becomes +.Sq [xX] +and +.Sq [^x] +becomes +.Sq [^xX] . +.Pp +No particular limit is imposed on the length of REs**. +Programs intended to be portable should not employ REs longer +than 256 bytes, +as an implementation can refuse to accept such REs and remain +POSIX-compliant. +.Pp +The following is a list of extended regular expressions: +.Bl -tag -width Ds +.It Ar c +Any character +.Ar c +not listed below matches itself. +.It \e Ns Ar c +Any backslash-escaped character +.Ar c +matches itself. +.It \&. +Matches any single character that is not a newline +.Pq Sq \en . +.It Bq Ar char-class +Matches any single character in +.Ar char-class . +To include a +.Ql \&] +in +.Ar char-class , +it must be the first character. +A range of characters may be specified by separating the end characters +of the range with a +.Ql - ; +e.g.\& +.Ar a-z +specifies the lower case characters. +The following literal expressions can also be used in +.Ar char-class +to specify sets of characters: +.Bd -unfilled -offset indent +[:alnum:] [:cntrl:] [:lower:] [:space:] +[:alpha:] [:digit:] [:print:] [:upper:] +[:blank:] [:graph:] [:punct:] [:xdigit:] +.Ed +.Pp +If +.Ql - +appears as the first or last character of +.Ar char-class , +then it matches itself. +All other characters in +.Ar char-class +match themselves. +.Pp +Patterns in +.Ar char-class +of the form +.Eo [. +.Ar col-elm +.Ec .]\& +or +.Eo [= +.Ar col-elm +.Ec =]\& , +where +.Ar col-elm +is a collating element, are interpreted according to +.Xr setlocale 3 +.Pq not currently supported . +.It Bq ^ Ns Ar char-class +Matches any single character, other than newline, not in +.Ar char-class . +.Ar char-class +is defined as above. +.It ^ +If +.Sq ^ +is the first character of a regular expression, then it +anchors the regular expression to the beginning of a line. +Otherwise, it matches itself. +.It $ +If +.Sq $ +is the last character of a regular expression, +it anchors the regular expression to the end of a line. +Otherwise, it matches itself. +.It [[:<:]] +Anchors the single character regular expression or subexpression +immediately following it to the beginning of a word. +.It [[:>:]] +Anchors the single character regular expression or subexpression +immediately following it to the end of a word. +.It Pq Ar re +Defines a subexpression +.Ar re . +Any set of characters enclosed in parentheses +matches whatever the set of characters without parentheses matches +(that is a long-winded way of saying the constructs +.Sq (re) +and +.Sq re +match identically). +.It * +Matches the single character regular expression or subexpression +immediately preceding it zero or more times. +If +.Sq * +is the first character of a regular expression or subexpression, +then it matches itself. +The +.Sq * +operator sometimes yields unexpected results. +For example, the regular expression +.Ar b* +matches the beginning of the string +.Qq abbb +(as opposed to the substring +.Qq bbb ) , +since a null match is the only leftmost match. +.It + +Matches the singular character regular expression +or subexpression immediately preceding it +one or more times. +.It ? +Matches the singular character regular expression +or subexpression immediately preceding it +0 or 1 times. +.Sm off +.It Xo +.Pf { Ar n , m No }\ \& +.Pf { Ar n , No }\ \& +.Pf { Ar n No } +.Xc +.Sm on +Matches the single character regular expression or subexpression +immediately preceding it at least +.Ar n +and at most +.Ar m +times. +If +.Ar m +is omitted, then it matches at least +.Ar n +times. +If the comma is also omitted, then it matches exactly +.Ar n +times. +.It \*(Ba +Used to separate patterns. +For example, +the pattern +.Sq cat\*(Badog +matches either +.Sq cat +or +.Sq dog . +.El +.Sh BASIC REGULAR EXPRESSIONS +Basic regular expressions differ in several respects: +.Bl -bullet -offset 3n +.It +.Sq \*(Ba , +.Sq + , +and +.Sq ?\& +are ordinary characters and there is no equivalent +for their functionality. +.It +The delimiters for bounds are +.Sq \e{ +and +.Sq \e} , +with +.Sq { +and +.Sq } +by themselves ordinary characters. +.It +The parentheses for nested subexpressions are +.Sq \e( +and +.Sq \e) , +with +.Sq ( +and +.Sq )\& +by themselves ordinary characters. +.It +.Sq ^ +is an ordinary character except at the beginning of the +RE or** the beginning of a parenthesized subexpression. +.It +.Sq $ +is an ordinary character except at the end of the +RE or** the end of a parenthesized subexpression. +.It +.Sq * +is an ordinary character if it appears at the beginning of the +RE or the beginning of a parenthesized subexpression +(after a possible leading +.Sq ^ ) . +.It +Finally, there is one new type of atom, a +.Em back-reference : +.Sq \e +followed by a non-zero decimal digit +.Ar d +matches the same sequence of characters matched by the +.Ar d Ns th +parenthesized subexpression +(numbering subexpressions by the positions of their opening parentheses, +left to right), +so that, for example, +.Sq \e([bc]\e)\e1 +matches +.Sq bb\& +or +.Sq cc +but not +.Sq bc . +.El +.Pp +The following is a list of basic regular expressions: +.Bl -tag -width Ds +.It Ar c +Any character +.Ar c +not listed below matches itself. +.It \e Ns Ar c +Any backslash-escaped character +.Ar c , +except for +.Sq { , +.Sq } , +.Sq \&( , +and +.Sq \&) , +matches itself. +.It \&. +Matches any single character that is not a newline +.Pq Sq \en . +.It Bq Ar char-class +Matches any single character in +.Ar char-class . +To include a +.Ql \&] +in +.Ar char-class , +it must be the first character. +A range of characters may be specified by separating the end characters +of the range with a +.Ql - ; +e.g.\& +.Ar a-z +specifies the lower case characters. +The following literal expressions can also be used in +.Ar char-class +to specify sets of characters: +.Bd -unfilled -offset indent +[:alnum:] [:cntrl:] [:lower:] [:space:] +[:alpha:] [:digit:] [:print:] [:upper:] +[:blank:] [:graph:] [:punct:] [:xdigit:] +.Ed +.Pp +If +.Ql - +appears as the first or last character of +.Ar char-class , +then it matches itself. +All other characters in +.Ar char-class +match themselves. +.Pp +Patterns in +.Ar char-class +of the form +.Eo [. +.Ar col-elm +.Ec .]\& +or +.Eo [= +.Ar col-elm +.Ec =]\& , +where +.Ar col-elm +is a collating element, are interpreted according to +.Xr setlocale 3 +.Pq not currently supported . +.It Bq ^ Ns Ar char-class +Matches any single character, other than newline, not in +.Ar char-class . +.Ar char-class +is defined as above. +.It ^ +If +.Sq ^ +is the first character of a regular expression, then it +anchors the regular expression to the beginning of a line. +Otherwise, it matches itself. +.It $ +If +.Sq $ +is the last character of a regular expression, +it anchors the regular expression to the end of a line. +Otherwise, it matches itself. +.It [[:<:]] +Anchors the single character regular expression or subexpression +immediately following it to the beginning of a word. +.It [[:>:]] +Anchors the single character regular expression or subexpression +immediately following it to the end of a word. +.It \e( Ns Ar re Ns \e) +Defines a subexpression +.Ar re . +Subexpressions may be nested. +A subsequent backreference of the form +.Pf \e Ns Ar n , +where +.Ar n +is a number in the range [1,9], expands to the text matched by the +.Ar n Ns th +subexpression. +For example, the regular expression +.Ar \e(.*\e)\e1 +matches any string consisting of identical adjacent substrings. +Subexpressions are ordered relative to their left delimiter. +.It * +Matches the single character regular expression or subexpression +immediately preceding it zero or more times. +If +.Sq * +is the first character of a regular expression or subexpression, +then it matches itself. +The +.Sq * +operator sometimes yields unexpected results. +For example, the regular expression +.Ar b* +matches the beginning of the string +.Qq abbb +(as opposed to the substring +.Qq bbb ) , +since a null match is the only leftmost match. +.Sm off +.It Xo +.Pf \e{ Ar n , m No \e}\ \& +.Pf \e{ Ar n , No \e}\ \& +.Pf \e{ Ar n No \e} +.Xc +.Sm on +Matches the single character regular expression or subexpression +immediately preceding it at least +.Ar n +and at most +.Ar m +times. +If +.Ar m +is omitted, then it matches at least +.Ar n +times. +If the comma is also omitted, then it matches exactly +.Ar n +times. +.El +.Sh SEE ALSO +.Xr ctype 3 , +.Xr regex 3 +.Sh STANDARDS +.St -p1003.1-2004 : +Base Definitions, Chapter 9 (Regular Expressions). +.Sh BUGS +Having two kinds of REs is a botch. +.Pp +The current POSIX spec says that +.Sq )\& +is an ordinary character in the absence of an unmatched +.Sq ( ; +this was an unintentional result of a wording error, +and change is likely. +Avoid relying on it. +.Pp +Back-references are a dreadful botch, +posing major problems for efficient implementations. +They are also somewhat vaguely defined +(does +.Sq a\e(\e(b\e)*\e2\e)*d +match +.Sq abbbd ? ) . +Avoid using them. +.Pp +POSIX's specification of case-independent matching is vague. +The +.Dq one case implies all cases +definition given above +is the current consensus among implementors as to the right interpretation. +.Pp +The syntax for word boundaries is incredibly ugly. diff --git a/docs/tutorial/JITTutorial1.html b/docs/tutorial/JITTutorial1.html index ac3958e64273b..3b7b8dea1accb 100644 --- a/docs/tutorial/JITTutorial1.html +++ b/docs/tutorial/JITTutorial1.html @@ -107,7 +107,7 @@ first chunk of our makeLLVMModule():

     Module* makeLLVMModule() {
       // Module Construction
    -  Module* mod = new Module("test");
    +  Module* mod = new Module("test", getGlobalContext());
     
    @@ -153,7 +153,7 @@ function will interoperate properly with C code, which is a good thing.

    -  BasicBlock* block = BasicBlock::Create("entry", mul_add);
    +  BasicBlock* block = BasicBlock::Create(getGlobalContext(), "entry", mul_add);
       IRBuilder<> builder(block);
     
    @@ -200,7 +200,7 @@ function will interoperate properly with C code, which is a good thing.

    Owen Anderson
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2007-10-17 11:05:13 -0700 (Wed, 17 Oct 2007) $ + Last modified: $Date: 2009-07-21 11:05:13 -0700 (Tue, 21 Jul 2009) $ diff --git a/docs/tutorial/JITTutorial2.html b/docs/tutorial/JITTutorial2.html index c2483e4d01bc4..504d96597b00d 100644 --- a/docs/tutorial/JITTutorial2.html +++ b/docs/tutorial/JITTutorial2.html @@ -100,11 +100,11 @@ Module* makeLLVMModule() {
    -  BasicBlock* entry = BasicBlock::Create("entry", gcd);
    -  BasicBlock* ret = BasicBlock::Create("return", gcd);
    -  BasicBlock* cond_false = BasicBlock::Create("cond_false", gcd);
    -  BasicBlock* cond_true = BasicBlock::Create("cond_true", gcd);
    -  BasicBlock* cond_false_2 = BasicBlock::Create("cond_false", gcd);
    +  BasicBlock* entry = BasicBlock::Create(getGlobalContext(), ("entry", gcd);
    +  BasicBlock* ret = BasicBlock::Create(getGlobalContext(), ("return", gcd);
    +  BasicBlock* cond_false = BasicBlock::Create(getGlobalContext(), ("cond_false", gcd);
    +  BasicBlock* cond_true = BasicBlock::Create(getGlobalContext(), ("cond_true", gcd);
    +  BasicBlock* cond_false_2 = BasicBlock::Create(getGlobalContext(), ("cond_false", gcd);
     
    diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html index 018d0be76032f..5bcd0dd2c7ff2 100644 --- a/docs/tutorial/LangImpl2.html +++ b/docs/tutorial/LangImpl2.html @@ -84,7 +84,7 @@ public: class NumberExprAST : public ExprAST { double Val; public: - explicit NumberExprAST(double val) : Val(val) {} + NumberExprAST(double val) : Val(val) {} }; @@ -107,7 +107,7 @@ in the basic form of the Kaleidoscope language: class VariableExprAST : public ExprAST { std::string Name; public: - explicit VariableExprAST(const std::string &name) : Name(name) {} + VariableExprAST(const std::string &name) : Name(name) {} }; /// BinaryExprAST - Expression class for a binary operator. @@ -333,9 +333,9 @@ static ExprAST *ParseIdentifierExpr() { ExprAST *Arg = ParseExpression(); if (!Arg) return 0; Args.push_back(Arg); - + if (CurTok == ')') break; - + if (CurTok != ',') return Error("Expected ')' or ',' in argument list"); getNextToken(); @@ -815,6 +815,7 @@ course.) To build this, just compile with:

     #include <cstdio>
    +#include <cstdlib>
     #include <string>
     #include <map>
     #include <vector>
    @@ -832,7 +833,7 @@ enum Token {
       tok_def = -2, tok_extern = -3,
     
       // primary
    -  tok_identifier = -4, tok_number = -5,
    +  tok_identifier = -4, tok_number = -5
     };
     
     static std::string IdentifierStr;  // Filled in if tok_identifier
    @@ -900,14 +901,14 @@ public:
     class NumberExprAST : public ExprAST {
       double Val;
     public:
    -  explicit NumberExprAST(double val) : Val(val) {}
    +  NumberExprAST(double val) : Val(val) {}
     };
     
     /// VariableExprAST - Expression class for referencing a variable, like "a".
     class VariableExprAST : public ExprAST {
       std::string Name;
     public:
    -  explicit VariableExprAST(const std::string &name) : Name(name) {}
    +  VariableExprAST(const std::string &name) : Name(name) {}
     };
     
     /// BinaryExprAST - Expression class for a binary operator.
    @@ -1003,9 +1004,9 @@ static ExprAST *ParseIdentifierExpr() {
           ExprAST *Arg = ParseExpression();
           if (!Arg) return 0;
           Args.push_back(Arg);
    -    
    +
           if (CurTok == ')') break;
    -    
    +
           if (CurTok != ',')
             return Error("Expected ')' or ',' in argument list");
           getNextToken();
    @@ -1149,7 +1150,7 @@ static PrototypeAST *ParseExtern() {
     //===----------------------------------------------------------------------===//
     
     static void HandleDefinition() {
    -  if (FunctionAST *F = ParseDefinition()) {
    +  if (ParseDefinition()) {
         fprintf(stderr, "Parsed a function definition.\n");
       } else {
         // Skip token for error recovery.
    @@ -1158,7 +1159,7 @@ static void HandleDefinition() {
     }
     
     static void HandleExtern() {
    -  if (PrototypeAST *P = ParseExtern()) {
    +  if (ParseExtern()) {
         fprintf(stderr, "Parsed an extern\n");
       } else {
         // Skip token for error recovery.
    @@ -1168,7 +1169,7 @@ static void HandleExtern() {
     
     static void HandleTopLevelExpression() {
       // Evaluate a top-level expression into an anonymous function.
    -  if (FunctionAST *F = ParseTopLevelExpr()) {
    +  if (ParseTopLevelExpr()) {
         fprintf(stderr, "Parsed a top-level expr\n");
       } else {
         // Skip token for error recovery.
    @@ -1206,7 +1207,9 @@ int main() {
       fprintf(stderr, "ready> ");
       getNextToken();
     
    +  // Run the main "interpreter loop" now.
       MainLoop();
    +
       return 0;
     }
     
    diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html index faf11d0592bed..e3d2117c4e686 100644 --- a/docs/tutorial/LangImpl3.html +++ b/docs/tutorial/LangImpl3.html @@ -79,7 +79,7 @@ public: class NumberExprAST : public ExprAST { double Val; public: - explicit NumberExprAST(double val) : Val(val) {} + NumberExprAST(double val) : Val(val) {} virtual Value *Codegen(); }; ... @@ -115,7 +115,7 @@ undeclared parameter):

    Value *ErrorV(const char *Str) { Error(Str); return 0; } static Module *TheModule; -static IRBuilder<> Builder; +static IRBuilder<> Builder(getGlobalContext()); static std::map<std::string, Value*> NamedValues;
    @@ -159,7 +159,7 @@ we'll do numeric literals:

     Value *NumberExprAST::Codegen() {
    -  return ConstantFP::get(APFloat(Val));
    +  return ConstantFP::get(getGlobalContext(), APFloat(Val));
     }
     
    @@ -170,7 +170,7 @@ internally (APFloat has the capability of holding floating point constants of Arbitrary Precision). This code basically just creates and returns a ConstantFP. Note that in the LLVM IR that constants are all uniqued together and shared. For this reason, the API -uses "the foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".

    +uses "the Context.get..." idiom instead of "new foo(..)" or "foo::Create(..)".

    @@ -183,7 +183,7 @@ Value *VariableExprAST::Codegen() {
     

    References to variables are also quite simple using LLVM. In the simple version -of Kaleidoscope, we assume that the variable has already been emited somewhere +of Kaleidoscope, we assume that the variable has already been emitted somewhere and its value is available. In practice, the only values that can be in the NamedValues map are function arguments. This code simply checks to see that the specified name is in the map (if not, an @@ -206,7 +206,8 @@ Value *BinaryExprAST::Codegen() { case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 - return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp"); + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); default: return ErrorV("invalid binary operator"); } } @@ -307,8 +308,10 @@ bodies and external function declarations. The code starts with:

     Function *PrototypeAST::Codegen() {
       // Make the function type:  double(double,double) etc.
    -  std::vector<const Type*> Doubles(Args.size(), Type::DoubleTy);
    -  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
    +  std::vector<const Type*> Doubles(Args.size(),
    +                                   Type::getDoubleTy(getGlobalContext()));
    +  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
    +                                       Doubles, false);
       
       Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
     
    @@ -320,10 +323,10 @@ really talks about the external interface for a function (not the value computed by an expression), it makes sense for it to return the LLVM Function it corresponds to when codegen'd.

    -

    The call to FunctionType::get creates +

    The call to Context.get creates the FunctionType that should be used for a given Prototype. Since all function arguments in Kaleidoscope are of type double, the first line creates -a vector of "N" LLVM double types. It then uses the FunctionType::get +a vector of "N" LLVM double types. It then uses the Context.get method to create a function type that takes "N" doubles as arguments, returns one double as a result, and that is not vararg (the false parameter indicates this). Note that Types in LLVM are uniqued just like Constants are, so you @@ -359,7 +362,7 @@ definition of this function.

    first, we want to allow 'extern'ing a function more than once, as long as the prototypes for the externs match (since all arguments have the same type, we just have to check that the number of arguments match). Second, we want to -allow 'extern'ing a function and then definining a body for it. This is useful +allow 'extern'ing a function and then defining a body for it. This is useful when defining mutually recursive functions.

    In order to implement this, the code above first checks to see if there is @@ -439,7 +442,7 @@ is an LLVM Function object that is ready to go for us.

       // Create a new basic block to start insertion into.
    -  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
    +  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
       Builder.SetInsertPoint(BB);
       
       if (Value *RetVal = Body->Codegen()) {
    @@ -461,9 +464,10 @@ block at this point.  We'll fix this in Chapter 5 :
       if (Value *RetVal = Body->Codegen()) {
         // Finish off the function.
         Builder.CreateRet(RetVal);
    -    
    +
         // Validate the generated code, checking for consistency.
         verifyFunction(*TheFunction);
    +
         return TheFunction;
       }
     
    @@ -682,6 +686,7 @@ our makefile/command line about which options to use:

    // See example below. #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Support/IRBuilder.h" @@ -704,7 +709,7 @@ enum Token { tok_def = -2, tok_extern = -3, // primary - tok_identifier = -4, tok_number = -5, + tok_identifier = -4, tok_number = -5 }; static std::string IdentifierStr; // Filled in if tok_identifier @@ -773,7 +778,7 @@ public: class NumberExprAST : public ExprAST { double Val; public: - explicit NumberExprAST(double val) : Val(val) {} + NumberExprAST(double val) : Val(val) {} virtual Value *Codegen(); }; @@ -781,7 +786,7 @@ public: class VariableExprAST : public ExprAST { std::string Name; public: - explicit VariableExprAST(const std::string &name) : Name(name) {} + VariableExprAST(const std::string &name) : Name(name) {} virtual Value *Codegen(); }; @@ -806,7 +811,8 @@ public: }; /// PrototypeAST - This class represents the "prototype" for a function, -/// which captures its argument names as well as if it is an operator. +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). class PrototypeAST { std::string Name; std::vector<std::string> Args; @@ -833,7 +839,7 @@ public: //===----------------------------------------------------------------------===// /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current -/// token the parser it looking at. getNextToken reads another token from the +/// token the parser is looking at. getNextToken reads another token from the /// lexer and updates CurTok with its results. static int CurTok; static int getNextToken() { @@ -881,9 +887,9 @@ static ExprAST *ParseIdentifierExpr() { ExprAST *Arg = ParseExpression(); if (!Arg) return 0; Args.push_back(Arg); - + if (CurTok == ')') break; - + if (CurTok != ',') return Error("Expected ')' or ',' in argument list"); getNextToken(); @@ -1027,13 +1033,13 @@ static PrototypeAST *ParseExtern() { //===----------------------------------------------------------------------===// static Module *TheModule; -static IRBuilder<> Builder; +static IRBuilder<> Builder(getGlobalContext()); static std::map<std::string, Value*> NamedValues; Value *ErrorV(const char *Str) { Error(Str); return 0; } Value *NumberExprAST::Codegen() { - return ConstantFP::get(APFloat(Val)); + return ConstantFP::get(getGlobalContext(), APFloat(Val)); } Value *VariableExprAST::Codegen() { @@ -1054,7 +1060,8 @@ Value *BinaryExprAST::Codegen() { case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 - return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp"); + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); default: return ErrorV("invalid binary operator"); } } @@ -1080,8 +1087,10 @@ Value *CallExprAST::Codegen() { Function *PrototypeAST::Codegen() { // Make the function type: double(double,double) etc. - std::vector<const Type*> Doubles(Args.size(), Type::DoubleTy); - FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false); + std::vector<const Type*> Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); @@ -1126,15 +1135,16 @@ Function *FunctionAST::Codegen() { return 0; // Create a new basic block to start insertion into. - BasicBlock *BB = BasicBlock::Create("entry", TheFunction); + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); Builder.SetInsertPoint(BB); if (Value *RetVal = Body->Codegen()) { // Finish off the function. Builder.CreateRet(RetVal); - + // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); + return TheFunction; } @@ -1172,7 +1182,7 @@ static void HandleExtern() { } static void HandleTopLevelExpression() { - // Evaluate a top level expression into an anonymous function. + // Evaluate a top-level expression into an anonymous function. if (FunctionAST *F = ParseTopLevelExpr()) { if (Function *LF = F->Codegen()) { fprintf(stderr, "Read top-level expression:"); @@ -1190,7 +1200,7 @@ static void MainLoop() { fprintf(stderr, "ready> "); switch (CurTok) { case tok_eof: return; - case ';': getNextToken(); break; // ignore top level semicolons. + case ';': getNextToken(); break; // ignore top-level semicolons. case tok_def: HandleDefinition(); break; case tok_extern: HandleExtern(); break; default: HandleTopLevelExpression(); break; @@ -1198,8 +1208,6 @@ static void MainLoop() { } } - - //===----------------------------------------------------------------------===// // "Library" functions that can be "extern'd" from user code. //===----------------------------------------------------------------------===// @@ -1216,7 +1224,7 @@ double putchard(double X) { //===----------------------------------------------------------------------===// int main() { - TheModule = new Module("my cool jit"); + LLVMContext &Context = getGlobalContext(); // Install standard binary operators. // 1 is lowest precedence. @@ -1229,8 +1237,15 @@ int main() { fprintf(stderr, "ready> "); getNextToken(); + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Run the main "interpreter loop" now. MainLoop(); + + // Print out all of the generated code. TheModule->dump(); + return 0; } @@ -1248,7 +1263,7 @@ int main() { Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2007-10-17 11:05:13 -0700 (Wed, 17 Oct 2007) $ + Last modified: $Date: 2009-07-21 11:05:13 -0700 (Tue, 21 Jul 2009) $ diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html index 9a3bfd21471e7..3188135384e0e 100644 --- a/docs/tutorial/LangImpl4.html +++ b/docs/tutorial/LangImpl4.html @@ -171,26 +171,30 @@ add a set of optimizations to run. The code looks like this:

    -    ExistingModuleProvider OurModuleProvider(TheModule);
    -    FunctionPassManager OurFPM(&OurModuleProvider);
    -      
    -    // Set up the optimizer pipeline.  Start with registering info about how the
    -    // target lays out data structures.
    -    OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    -    // Do simple "peephole" optimizations and bit-twiddling optzns.
    -    OurFPM.add(createInstructionCombiningPass());
    -    // Reassociate expressions.
    -    OurFPM.add(createReassociatePass());
    -    // Eliminate Common SubExpressions.
    -    OurFPM.add(createGVNPass());
    -    // Simplify the control flow graph (deleting unreachable blocks, etc).
    -    OurFPM.add(createCFGSimplificationPass());
    -
    -    // Set the global so the code gen can use this.
    -    TheFPM = &OurFPM;
    -
    -    // Run the main "interpreter loop" now.
    -    MainLoop();
    +  ExistingModuleProvider *OurModuleProvider =
    +      new ExistingModuleProvider(TheModule);
    +
    +  FunctionPassManager OurFPM(OurModuleProvider);
    +
    +  // Set up the optimizer pipeline.  Start with registering info about how the
    +  // target lays out data structures.
    +  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    +  // Do simple "peephole" optimizations and bit-twiddling optzns.
    +  OurFPM.add(createInstructionCombiningPass());
    +  // Reassociate expressions.
    +  OurFPM.add(createReassociatePass());
    +  // Eliminate Common SubExpressions.
    +  OurFPM.add(createGVNPass());
    +  // Simplify the control flow graph (deleting unreachable blocks, etc).
    +  OurFPM.add(createCFGSimplificationPass());
    +
    +  OurFPM.doInitialization();
    +
    +  // Set the global so the code gen can use this.
    +  TheFPM = &OurFPM;
    +
    +  // Run the main "interpreter loop" now.
    +  MainLoop();
     
    @@ -205,7 +209,7 @@ requires a pointer to the Module (through the ModuleProvider) to construct itself. Once it is set up, we use a series of "add" calls to add a bunch of LLVM passes. The first pass is basically boilerplate, it adds a pass so that later optimizations know how the data structures in the program are -layed out. The "TheExecutionEngine" variable is related to the JIT, +laid out. The "TheExecutionEngine" variable is related to the JIT, which we will get to in the next section.

    In this case, we choose to add 4 optimization passes. The passes we chose @@ -298,8 +302,8 @@ by adding a global variable and a call in main:

    ... int main() { .. - // Create the JIT. - TheExecutionEngine = ExecutionEngine::create(TheModule); + // Create the JIT. This takes ownership of the module and module provider. + TheExecutionEngine = EngineBuilder(OurModuleProvider).create(); .. } @@ -320,7 +324,7 @@ top-level expression to look like this:

     static void HandleTopLevelExpression() {
    -  // Evaluate a top level expression into an anonymous function.
    +  // Evaluate a top-level expression into an anonymous function.
       if (FunctionAST *F = ParseTopLevelExpr()) {
         if (Function *LF = F->Codegen()) {
           LF->dump();  // Dump the function for exposition purposes.
    @@ -330,7 +334,7 @@ static void HandleTopLevelExpression() {
           
           // Cast it to the right type (takes no arguments, returns a double) so we
           // can call it as a native function.
    -      double (*FP)() = (double (*)())FPtr;
    +      double (*FP)() = (double (*)())(intptr_t)FPtr;
           fprintf(stderr, "Evaluated to %f\n", FP());
         }
     
    @@ -359,7 +363,7 @@ entry:

    Well this looks like it is basically working. The dump of the function shows the "no argument function that always returns double" that we synthesize -for each top level expression that is typed in. This demonstrates very basic +for each top-level expression that is typed in. This demonstrates very basic functionality, but can we do more?

    @@ -495,7 +499,7 @@ LLVM JIT and optimizer. To build this example, use:
        # Compile
    -   g++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
    +   g++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit interpreter native` -O3 -o toy
        # Run
        ./toy
     
    @@ -512,11 +516,15 @@ at runtime.

     #include "llvm/DerivedTypes.h"
     #include "llvm/ExecutionEngine/ExecutionEngine.h"
    +#include "llvm/ExecutionEngine/Interpreter.h"
    +#include "llvm/ExecutionEngine/JIT.h"
    +#include "llvm/LLVMContext.h"
     #include "llvm/Module.h"
     #include "llvm/ModuleProvider.h"
     #include "llvm/PassManager.h"
     #include "llvm/Analysis/Verifier.h"
     #include "llvm/Target/TargetData.h"
    +#include "llvm/Target/TargetSelect.h"
     #include "llvm/Transforms/Scalar.h"
     #include "llvm/Support/IRBuilder.h"
     #include <cstdio>
    @@ -538,7 +546,7 @@ enum Token {
       tok_def = -2, tok_extern = -3,
     
       // primary
    -  tok_identifier = -4, tok_number = -5,
    +  tok_identifier = -4, tok_number = -5
     };
     
     static std::string IdentifierStr;  // Filled in if tok_identifier
    @@ -640,7 +648,8 @@ public:
     };
     
     /// PrototypeAST - This class represents the "prototype" for a function,
    -/// which captures its argument names as well as if it is an operator.
    +/// which captures its name, and its argument names (thus implicitly the number
    +/// of arguments the function takes).
     class PrototypeAST {
       std::string Name;
       std::vector<std::string> Args;
    @@ -667,7 +676,7 @@ public:
     //===----------------------------------------------------------------------===//
     
     /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
    -/// token the parser it looking at.  getNextToken reads another token from the
    +/// token the parser is looking at.  getNextToken reads another token from the
     /// lexer and updates CurTok with its results.
     static int CurTok;
     static int getNextToken() {
    @@ -715,9 +724,9 @@ static ExprAST *ParseIdentifierExpr() {
           ExprAST *Arg = ParseExpression();
           if (!Arg) return 0;
           Args.push_back(Arg);
    -    
    +
           if (CurTok == ')') break;
    -    
    +
           if (CurTok != ',')
             return Error("Expected ')' or ',' in argument list");
           getNextToken();
    @@ -861,14 +870,14 @@ static PrototypeAST *ParseExtern() {
     //===----------------------------------------------------------------------===//
     
     static Module *TheModule;
    -static IRBuilder<> Builder;
    +static IRBuilder<> Builder(getGlobalContext());
     static std::map<std::string, Value*> NamedValues;
     static FunctionPassManager *TheFPM;
     
     Value *ErrorV(const char *Str) { Error(Str); return 0; }
     
     Value *NumberExprAST::Codegen() {
    -  return ConstantFP::get(APFloat(Val));
    +  return ConstantFP::get(getGlobalContext(), APFloat(Val));
     }
     
     Value *VariableExprAST::Codegen() {
    @@ -889,7 +898,8 @@ Value *BinaryExprAST::Codegen() {
       case '<':
         L = Builder.CreateFCmpULT(L, R, "cmptmp");
         // Convert bool 0/1 to double 0.0 or 1.0
    -    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
    +    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
    +                                "booltmp");
       default: return ErrorV("invalid binary operator");
       }
     }
    @@ -915,8 +925,10 @@ Value *CallExprAST::Codegen() {
     
     Function *PrototypeAST::Codegen() {
       // Make the function type:  double(double,double) etc.
    -  std::vector<const Type*> Doubles(Args.size(), Type::DoubleTy);
    -  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
    +  std::vector<const Type*> Doubles(Args.size(),
    +                                   Type::getDoubleTy(getGlobalContext()));
    +  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
    +                                       Doubles, false);
       
       Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
       
    @@ -961,7 +973,7 @@ Function *FunctionAST::Codegen() {
         return 0;
       
       // Create a new basic block to start insertion into.
    -  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
    +  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
       Builder.SetInsertPoint(BB);
       
       if (Value *RetVal = Body->Codegen()) {
    @@ -1013,7 +1025,7 @@ static void HandleExtern() {
     }
     
     static void HandleTopLevelExpression() {
    -  // Evaluate a top level expression into an anonymous function.
    +  // Evaluate a top-level expression into an anonymous function.
       if (FunctionAST *F = ParseTopLevelExpr()) {
         if (Function *LF = F->Codegen()) {
           // JIT the function, returning a function pointer.
    @@ -1021,7 +1033,7 @@ static void HandleTopLevelExpression() {
           
           // Cast it to the right type (takes no arguments, returns a double) so we
           // can call it as a native function.
    -      double (*FP)() = (double (*)())FPtr;
    +      double (*FP)() = (double (*)())(intptr_t)FPtr;
           fprintf(stderr, "Evaluated to %f\n", FP());
         }
       } else {
    @@ -1036,7 +1048,7 @@ static void MainLoop() {
         fprintf(stderr, "ready> ");
         switch (CurTok) {
         case tok_eof:    return;
    -    case ';':        getNextToken(); break;  // ignore top level semicolons.
    +    case ';':        getNextToken(); break;  // ignore top-level semicolons.
         case tok_def:    HandleDefinition(); break;
         case tok_extern: HandleExtern(); break;
         default:         HandleTopLevelExpression(); break;
    @@ -1044,8 +1056,6 @@ static void MainLoop() {
       }
     }
     
    -
    -
     //===----------------------------------------------------------------------===//
     // "Library" functions that can be "extern'd" from user code.
     //===----------------------------------------------------------------------===//
    @@ -1062,6 +1072,9 @@ double putchard(double X) {
     //===----------------------------------------------------------------------===//
     
     int main() {
    +  InitializeNativeTarget();
    +  LLVMContext &Context = getGlobalContext();
    +
       // Install standard binary operators.
       // 1 is lowest precedence.
       BinopPrecedence['<'] = 10;
    @@ -1074,39 +1087,41 @@ int main() {
       getNextToken();
     
       // Make the module, which holds all the code.
    -  TheModule = new Module("my cool jit");
    -  
    -  // Create the JIT.
    -  TheExecutionEngine = ExecutionEngine::create(TheModule);
    +  TheModule = new Module("my cool jit", Context);
    +
    +  ExistingModuleProvider *OurModuleProvider =
    +      new ExistingModuleProvider(TheModule);
    +
    +  // Create the JIT.  This takes ownership of the module and module provider.
    +  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
    +
    +  FunctionPassManager OurFPM(OurModuleProvider);
    +
    +  // Set up the optimizer pipeline.  Start with registering info about how the
    +  // target lays out data structures.
    +  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    +  // Do simple "peephole" optimizations and bit-twiddling optzns.
    +  OurFPM.add(createInstructionCombiningPass());
    +  // Reassociate expressions.
    +  OurFPM.add(createReassociatePass());
    +  // Eliminate Common SubExpressions.
    +  OurFPM.add(createGVNPass());
    +  // Simplify the control flow graph (deleting unreachable blocks, etc).
    +  OurFPM.add(createCFGSimplificationPass());
    +
    +  OurFPM.doInitialization();
    +
    +  // Set the global so the code gen can use this.
    +  TheFPM = &OurFPM;
    +
    +  // Run the main "interpreter loop" now.
    +  MainLoop();
    +
    +  TheFPM = 0;
    +
    +  // Print out all of the generated code.
    +  TheModule->dump();
     
    -  {
    -    ExistingModuleProvider OurModuleProvider(TheModule);
    -    FunctionPassManager OurFPM(&OurModuleProvider);
    -      
    -    // Set up the optimizer pipeline.  Start with registering info about how the
    -    // target lays out data structures.
    -    OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    -    // Do simple "peephole" optimizations and bit-twiddling optzns.
    -    OurFPM.add(createInstructionCombiningPass());
    -    // Reassociate expressions.
    -    OurFPM.add(createReassociatePass());
    -    // Eliminate Common SubExpressions.
    -    OurFPM.add(createGVNPass());
    -    // Simplify the control flow graph (deleting unreachable blocks, etc).
    -    OurFPM.add(createCFGSimplificationPass());
    -
    -    // Set the global so the code gen can use this.
    -    TheFPM = &OurFPM;
    -
    -    // Run the main "interpreter loop" now.
    -    MainLoop();
    -    
    -    TheFPM = 0;
    -    
    -    // Print out all of the generated code.
    -    TheModule->dump();
    -  }  // Free module provider (and thus the module) and pass manager.
    -                                   
       return 0;
     }
     
    diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html index bf96b460465fd..f93b59be0dcac 100644 --- a/docs/tutorial/LangImpl5.html +++ b/docs/tutorial/LangImpl5.html @@ -288,8 +288,8 @@ into "t.ll" and run "llvm-as < t.ll | opt -analyze -view-cfg", a window will pop up and you'll see this graph:

    -
    Example CFG
    +
    Example CFG

    Another way to get this is to call "F->viewCFG()" or "F->viewCFGOnly()" (where F is a "Function*") either by @@ -364,7 +364,7 @@ Value *IfExprAST::Codegen() { // Convert condition to a bool by comparing equal to 0.0. CondV = Builder.CreateFCmpONE(CondV, - ConstantFP::get(APFloat(0.0)), + ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond");

    @@ -379,9 +379,9 @@ value as a 1-bit (bool) value.

    // Create blocks for the then and else cases. Insert the 'then' block at the // end of the function. - BasicBlock *ThenBB = BasicBlock::Create("then", TheFunction); - BasicBlock *ElseBB = BasicBlock::Create("else"); - BasicBlock *MergeBB = BasicBlock::Create("ifcont"); + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); Builder.CreateCondBr(CondV, ThenBB, ElseBB); @@ -472,7 +472,8 @@ are emitted, we can finish up with the merge code:

    // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::DoubleTy, "iftmp"); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + "iftmp"); PN->addIncoming(ThenV, ThenBB); PN->addIncoming(ElseV, ElseBB); @@ -727,7 +728,7 @@ block, but remember that the body code itself could consist of multiple blocks // block. Function *TheFunction = Builder.GetInsertBlock()->getParent(); BasicBlock *PreheaderBB = Builder.GetInsertBlock(); - BasicBlock *LoopBB = BasicBlock::Create("loop", TheFunction); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); // Insert an explicit fall through from the current block to the LoopBB. Builder.CreateBr(LoopBB); @@ -745,7 +746,7 @@ create an unconditional branch for the fall-through between the two blocks.

    Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. - PHINode *Variable = Builder.CreatePHI(Type::DoubleTy, VarName.c_str()); + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB);
    @@ -796,7 +797,7 @@ references to it will naturally find it in the symbol table.

    if (StepVal == 0) return 0; } else { // If not specified, use 1.0. - StepVal = ConstantFP::get(APFloat(1.0)); + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar"); @@ -815,7 +816,7 @@ will be the value of the loop variable on the next iteration of the loop.

    // Convert condition to a bool by comparing equal to 0.0. EndCond = Builder.CreateFCmpONE(EndCond, - ConstantFP::get(APFloat(0.0)), + ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond");
    @@ -828,7 +829,7 @@ statement.

       // Create the "after loop" block and insert it.
       BasicBlock *LoopEndBB = Builder.GetInsertBlock();
    -  BasicBlock *AfterBB = BasicBlock::Create("afterloop", TheFunction);
    +  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
       
       // Insert the conditional branch into the end of LoopEndBB.
       Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
    @@ -856,7 +857,7 @@ the loop again and exiting the loop.  Any future code is emitted in the
         NamedValues.erase(VarName);
       
       // for expr always returns 0.0.
    -  return Constant::getNullValue(Type::DoubleTy);
    +  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
     }
     
    @@ -901,11 +902,15 @@ if/then/else and for expressions.. To build this example, use:
     #include "llvm/DerivedTypes.h"
     #include "llvm/ExecutionEngine/ExecutionEngine.h"
    +#include "llvm/ExecutionEngine/Interpreter.h"
    +#include "llvm/ExecutionEngine/JIT.h"
    +#include "llvm/LLVMContext.h"
     #include "llvm/Module.h"
     #include "llvm/ModuleProvider.h"
     #include "llvm/PassManager.h"
     #include "llvm/Analysis/Verifier.h"
     #include "llvm/Target/TargetData.h"
    +#include "llvm/Target/TargetSelect.h"
     #include "llvm/Transforms/Scalar.h"
     #include "llvm/Support/IRBuilder.h"
     #include <cstdio>
    @@ -1058,7 +1063,8 @@ public:
     };
     
     /// PrototypeAST - This class represents the "prototype" for a function,
    -/// which captures its argument names as well as if it is an operator.
    +/// which captures its name, and its argument names (thus implicitly the number
    +/// of arguments the function takes).
     class PrototypeAST {
       std::string Name;
       std::vector<std::string> Args;
    @@ -1085,7 +1091,7 @@ public:
     //===----------------------------------------------------------------------===//
     
     /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
    -/// token the parser it looking at.  getNextToken reads another token from the
    +/// token the parser is looking at.  getNextToken reads another token from the
     /// lexer and updates CurTok with its results.
     static int CurTok;
     static int getNextToken() {
    @@ -1133,9 +1139,9 @@ static ExprAST *ParseIdentifierExpr() {
           ExprAST *Arg = ParseExpression();
           if (!Arg) return 0;
           Args.push_back(Arg);
    -      
    +
           if (CurTok == ')') break;
    -      
    +
           if (CurTok != ',')
             return Error("Expected ')' or ',' in argument list");
           getNextToken();
    @@ -1235,7 +1241,6 @@ static ExprAST *ParseForExpr() {
       return new ForExprAST(IdName, Start, End, Step, Body);
     }
     
    -
     /// primary
     ///   ::= identifierexpr
     ///   ::= numberexpr
    @@ -1352,14 +1357,14 @@ static PrototypeAST *ParseExtern() {
     //===----------------------------------------------------------------------===//
     
     static Module *TheModule;
    -static IRBuilder<> Builder;
    +static IRBuilder<> Builder(getGlobalContext());
     static std::map<std::string, Value*> NamedValues;
     static FunctionPassManager *TheFPM;
     
     Value *ErrorV(const char *Str) { Error(Str); return 0; }
     
     Value *NumberExprAST::Codegen() {
    -  return ConstantFP::get(APFloat(Val));
    +  return ConstantFP::get(getGlobalContext(), APFloat(Val));
     }
     
     Value *VariableExprAST::Codegen() {
    @@ -1380,7 +1385,8 @@ Value *BinaryExprAST::Codegen() {
       case '<':
         L = Builder.CreateFCmpULT(L, R, "cmptmp");
         // Convert bool 0/1 to double 0.0 or 1.0
    -    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
    +    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
    +                                "booltmp");
       default: return ErrorV("invalid binary operator");
       }
     }
    @@ -1410,16 +1416,16 @@ Value *IfExprAST::Codegen() {
       
       // Convert condition to a bool by comparing equal to 0.0.
       CondV = Builder.CreateFCmpONE(CondV, 
    -                                ConstantFP::get(APFloat(0.0)),
    +                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                     "ifcond");
       
       Function *TheFunction = Builder.GetInsertBlock()->getParent();
       
       // Create blocks for the then and else cases.  Insert the 'then' block at the
       // end of the function.
    -  BasicBlock *ThenBB = BasicBlock::Create("then", TheFunction);
    -  BasicBlock *ElseBB = BasicBlock::Create("else");
    -  BasicBlock *MergeBB = BasicBlock::Create("ifcont");
    +  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
    +  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
    +  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
       
       Builder.CreateCondBr(CondV, ThenBB, ElseBB);
       
    @@ -1447,7 +1453,8 @@ Value *IfExprAST::Codegen() {
       // Emit merge block.
       TheFunction->getBasicBlockList().push_back(MergeBB);
       Builder.SetInsertPoint(MergeBB);
    -  PHINode *PN = Builder.CreatePHI(Type::DoubleTy, "iftmp");
    +  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
    +                                  "iftmp");
       
       PN->addIncoming(ThenV, ThenBB);
       PN->addIncoming(ElseV, ElseBB);
    @@ -1479,7 +1486,7 @@ Value *ForExprAST::Codegen() {
       // block.
       Function *TheFunction = Builder.GetInsertBlock()->getParent();
       BasicBlock *PreheaderBB = Builder.GetInsertBlock();
    -  BasicBlock *LoopBB = BasicBlock::Create("loop", TheFunction);
    +  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
       
       // Insert an explicit fall through from the current block to the LoopBB.
       Builder.CreateBr(LoopBB);
    @@ -1488,7 +1495,7 @@ Value *ForExprAST::Codegen() {
       Builder.SetInsertPoint(LoopBB);
       
       // Start the PHI node with an entry for Start.
    -  PHINode *Variable = Builder.CreatePHI(Type::DoubleTy, VarName.c_str());
    +  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
       Variable->addIncoming(StartVal, PreheaderBB);
       
       // Within the loop, the variable is defined equal to the PHI node.  If it
    @@ -1509,7 +1516,7 @@ Value *ForExprAST::Codegen() {
         if (StepVal == 0) return 0;
       } else {
         // If not specified, use 1.0.
    -    StepVal = ConstantFP::get(APFloat(1.0));
    +    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
       }
       
       Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
    @@ -1520,12 +1527,12 @@ Value *ForExprAST::Codegen() {
       
       // Convert condition to a bool by comparing equal to 0.0.
       EndCond = Builder.CreateFCmpONE(EndCond, 
    -                                  ConstantFP::get(APFloat(0.0)),
    +                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                       "loopcond");
       
       // Create the "after loop" block and insert it.
       BasicBlock *LoopEndBB = Builder.GetInsertBlock();
    -  BasicBlock *AfterBB = BasicBlock::Create("afterloop", TheFunction);
    +  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
       
       // Insert the conditional branch into the end of LoopEndBB.
       Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
    @@ -1544,13 +1551,15 @@ Value *ForExprAST::Codegen() {
     
       
       // for expr always returns 0.0.
    -  return Constant::getNullValue(Type::DoubleTy);
    +  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
     }
     
     Function *PrototypeAST::Codegen() {
       // Make the function type:  double(double,double) etc.
    -  std::vector<const Type*> Doubles(Args.size(), Type::DoubleTy);
    -  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
    +  std::vector<const Type*> Doubles(Args.size(),
    +                                   Type::getDoubleTy(getGlobalContext()));
    +  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
    +                                       Doubles, false);
       
       Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
       
    @@ -1595,7 +1604,7 @@ Function *FunctionAST::Codegen() {
         return 0;
       
       // Create a new basic block to start insertion into.
    -  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
    +  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
       Builder.SetInsertPoint(BB);
       
       if (Value *RetVal = Body->Codegen()) {
    @@ -1647,7 +1656,7 @@ static void HandleExtern() {
     }
     
     static void HandleTopLevelExpression() {
    -  // Evaluate a top level expression into an anonymous function.
    +  // Evaluate a top-level expression into an anonymous function.
       if (FunctionAST *F = ParseTopLevelExpr()) {
         if (Function *LF = F->Codegen()) {
           // JIT the function, returning a function pointer.
    @@ -1655,7 +1664,7 @@ static void HandleTopLevelExpression() {
           
           // Cast it to the right type (takes no arguments, returns a double) so we
           // can call it as a native function.
    -      double (*FP)() = (double (*)())FPtr;
    +      double (*FP)() = (double (*)())(intptr_t)FPtr;
           fprintf(stderr, "Evaluated to %f\n", FP());
         }
       } else {
    @@ -1670,7 +1679,7 @@ static void MainLoop() {
         fprintf(stderr, "ready> ");
         switch (CurTok) {
         case tok_eof:    return;
    -    case ';':        getNextToken(); break;  // ignore top level semicolons.
    +    case ';':        getNextToken(); break;  // ignore top-level semicolons.
         case tok_def:    HandleDefinition(); break;
         case tok_extern: HandleExtern(); break;
         default:         HandleTopLevelExpression(); break;
    @@ -1678,8 +1687,6 @@ static void MainLoop() {
       }
     }
     
    -
    -
     //===----------------------------------------------------------------------===//
     // "Library" functions that can be "extern'd" from user code.
     //===----------------------------------------------------------------------===//
    @@ -1696,6 +1703,9 @@ double putchard(double X) {
     //===----------------------------------------------------------------------===//
     
     int main() {
    +  InitializeNativeTarget();
    +  LLVMContext &Context = getGlobalContext();
    +
       // Install standard binary operators.
       // 1 is lowest precedence.
       BinopPrecedence['<'] = 10;
    @@ -1708,38 +1718,41 @@ int main() {
       getNextToken();
     
       // Make the module, which holds all the code.
    -  TheModule = new Module("my cool jit");
    -  
    -  // Create the JIT.
    -  TheExecutionEngine = ExecutionEngine::create(TheModule);
    +  TheModule = new Module("my cool jit", Context);
     
    -  {
    -    ExistingModuleProvider OurModuleProvider(TheModule);
    -    FunctionPassManager OurFPM(&OurModuleProvider);
    -      
    -    // Set up the optimizer pipeline.  Start with registering info about how the
    -    // target lays out data structures.
    -    OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    -    // Do simple "peephole" optimizations and bit-twiddling optzns.
    -    OurFPM.add(createInstructionCombiningPass());
    -    // Reassociate expressions.
    -    OurFPM.add(createReassociatePass());
    -    // Eliminate Common SubExpressions.
    -    OurFPM.add(createGVNPass());
    -    // Simplify the control flow graph (deleting unreachable blocks, etc).
    -    OurFPM.add(createCFGSimplificationPass());
    -    // Set the global so the code gen can use this.
    -    TheFPM = &OurFPM;
    -
    -    // Run the main "interpreter loop" now.
    -    MainLoop();
    -    
    -    TheFPM = 0;
    +  ExistingModuleProvider *OurModuleProvider =
    +      new ExistingModuleProvider(TheModule);
    +
    +  // Create the JIT.  This takes ownership of the module and module provider.
    +  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
    +
    +  FunctionPassManager OurFPM(OurModuleProvider);
    +
    +  // Set up the optimizer pipeline.  Start with registering info about how the
    +  // target lays out data structures.
    +  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    +  // Do simple "peephole" optimizations and bit-twiddling optzns.
    +  OurFPM.add(createInstructionCombiningPass());
    +  // Reassociate expressions.
    +  OurFPM.add(createReassociatePass());
    +  // Eliminate Common SubExpressions.
    +  OurFPM.add(createGVNPass());
    +  // Simplify the control flow graph (deleting unreachable blocks, etc).
    +  OurFPM.add(createCFGSimplificationPass());
    +
    +  OurFPM.doInitialization();
    +
    +  // Set the global so the code gen can use this.
    +  TheFPM = &OurFPM;
    +
    +  // Run the main "interpreter loop" now.
    +  MainLoop();
    +
    +  TheFPM = 0;
    +
    +  // Print out all of the generated code.
    +  TheModule->dump();
     
    -    // Print out all of the generated code.
    -    TheModule->dump();
    -  }  // Free module provider (and thus the module) and pass manager.
    -                                   
       return 0;
     }
     
    diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html index 44ad15b009655..f113e96651e99 100644 --- a/docs/tutorial/LangImpl6.html +++ b/docs/tutorial/LangImpl6.html @@ -207,7 +207,7 @@ the prototype for a user-defined operator, we need to parse it:

    static PrototypeAST *ParsePrototype() { std::string FnName; - int Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. unsigned BinaryPrecedence = 30; switch (CurTok) { @@ -283,7 +283,8 @@ Value *BinaryExprAST::Codegen() { case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 - return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp"); + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); default: break; } @@ -305,7 +306,7 @@ function call to it. Since user-defined operators are just built as normal functions (because the "prototype" boils down to a function with the right name) everything falls into place.

    -

    The final piece of code we are missing, is a bit of top level magic:

    +

    The final piece of code we are missing, is a bit of top-level magic:

    @@ -321,7 +322,7 @@ Function *FunctionAST::Codegen() {
         BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
       
       // Create a new basic block to start insertion into.
    -  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
    +  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
       Builder.SetInsertPoint(BB);
       
       if (Value *RetVal = Body->Codegen()) {
    @@ -438,7 +439,7 @@ with:

    static PrototypeAST *ParsePrototype() { std::string FnName; - int Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. unsigned BinaryPrecedence = 30; switch (CurTok) { @@ -794,7 +795,6 @@ add variable mutation without building SSA in your front-end.

    - @@ -821,11 +821,15 @@ if/then/else and for expressions.. To build this example, use:
     #include "llvm/DerivedTypes.h"
     #include "llvm/ExecutionEngine/ExecutionEngine.h"
    +#include "llvm/ExecutionEngine/Interpreter.h"
    +#include "llvm/ExecutionEngine/JIT.h"
    +#include "llvm/LLVMContext.h"
     #include "llvm/Module.h"
     #include "llvm/ModuleProvider.h"
     #include "llvm/PassManager.h"
     #include "llvm/Analysis/Verifier.h"
     #include "llvm/Target/TargetData.h"
    +#include "llvm/Target/TargetSelect.h"
     #include "llvm/Transforms/Scalar.h"
     #include "llvm/Support/IRBuilder.h"
     #include <cstdio>
    @@ -993,7 +997,8 @@ public:
     };
     
     /// PrototypeAST - This class represents the "prototype" for a function,
    -/// which captures its argument names as well as if it is an operator.
    +/// which captures its name, and its argument names (thus implicitly the number
    +/// of arguments the function takes), as well as if it is an operator.
     class PrototypeAST {
       std::string Name;
       std::vector<std::string> Args;
    @@ -1033,7 +1038,7 @@ public:
     //===----------------------------------------------------------------------===//
     
     /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
    -/// token the parser it looking at.  getNextToken reads another token from the
    +/// token the parser is looking at.  getNextToken reads another token from the
     /// lexer and updates CurTok with its results.
     static int CurTok;
     static int getNextToken() {
    @@ -1081,9 +1086,9 @@ static ExprAST *ParseIdentifierExpr() {
           ExprAST *Arg = ParseExpression();
           if (!Arg) return 0;
           Args.push_back(Arg);
    -      
    +
           if (CurTok == ')') break;
    -      
    +
           if (CurTok != ',')
             return Error("Expected ')' or ',' in argument list");
           getNextToken();
    @@ -1183,7 +1188,6 @@ static ExprAST *ParseForExpr() {
       return new ForExprAST(IdName, Start, End, Step, Body);
     }
     
    -
     /// primary
     ///   ::= identifierexpr
     ///   ::= numberexpr
    @@ -1267,7 +1271,7 @@ static ExprAST *ParseExpression() {
     static PrototypeAST *ParsePrototype() {
       std::string FnName;
       
    -  int Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
    +  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
       unsigned BinaryPrecedence = 30;
       
       switch (CurTok) {
    @@ -1357,14 +1361,14 @@ static PrototypeAST *ParseExtern() {
     //===----------------------------------------------------------------------===//
     
     static Module *TheModule;
    -static IRBuilder<> Builder;
    +static IRBuilder<> Builder(getGlobalContext());
     static std::map<std::string, Value*> NamedValues;
     static FunctionPassManager *TheFPM;
     
     Value *ErrorV(const char *Str) { Error(Str); return 0; }
     
     Value *NumberExprAST::Codegen() {
    -  return ConstantFP::get(APFloat(Val));
    +  return ConstantFP::get(getGlobalContext(), APFloat(Val));
     }
     
     Value *VariableExprAST::Codegen() {
    @@ -1384,7 +1388,6 @@ Value *UnaryExprAST::Codegen() {
       return Builder.CreateCall(F, OperandV, "unop");
     }
     
    -
     Value *BinaryExprAST::Codegen() {
       Value *L = LHS->Codegen();
       Value *R = RHS->Codegen();
    @@ -1397,7 +1400,8 @@ Value *BinaryExprAST::Codegen() {
       case '<':
         L = Builder.CreateFCmpULT(L, R, "cmptmp");
         // Convert bool 0/1 to double 0.0 or 1.0
    -    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
    +    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
    +                                "booltmp");
       default: break;
       }
       
    @@ -1435,16 +1439,16 @@ Value *IfExprAST::Codegen() {
       
       // Convert condition to a bool by comparing equal to 0.0.
       CondV = Builder.CreateFCmpONE(CondV, 
    -                                ConstantFP::get(APFloat(0.0)),
    +                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                     "ifcond");
       
       Function *TheFunction = Builder.GetInsertBlock()->getParent();
       
       // Create blocks for the then and else cases.  Insert the 'then' block at the
       // end of the function.
    -  BasicBlock *ThenBB = BasicBlock::Create("then", TheFunction);
    -  BasicBlock *ElseBB = BasicBlock::Create("else");
    -  BasicBlock *MergeBB = BasicBlock::Create("ifcont");
    +  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
    +  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
    +  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
       
       Builder.CreateCondBr(CondV, ThenBB, ElseBB);
       
    @@ -1472,7 +1476,8 @@ Value *IfExprAST::Codegen() {
       // Emit merge block.
       TheFunction->getBasicBlockList().push_back(MergeBB);
       Builder.SetInsertPoint(MergeBB);
    -  PHINode *PN = Builder.CreatePHI(Type::DoubleTy, "iftmp");
    +  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
    +                                  "iftmp");
       
       PN->addIncoming(ThenV, ThenBB);
       PN->addIncoming(ElseV, ElseBB);
    @@ -1504,7 +1509,7 @@ Value *ForExprAST::Codegen() {
       // block.
       Function *TheFunction = Builder.GetInsertBlock()->getParent();
       BasicBlock *PreheaderBB = Builder.GetInsertBlock();
    -  BasicBlock *LoopBB = BasicBlock::Create("loop", TheFunction);
    +  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
       
       // Insert an explicit fall through from the current block to the LoopBB.
       Builder.CreateBr(LoopBB);
    @@ -1513,7 +1518,7 @@ Value *ForExprAST::Codegen() {
       Builder.SetInsertPoint(LoopBB);
       
       // Start the PHI node with an entry for Start.
    -  PHINode *Variable = Builder.CreatePHI(Type::DoubleTy, VarName.c_str());
    +  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
       Variable->addIncoming(StartVal, PreheaderBB);
       
       // Within the loop, the variable is defined equal to the PHI node.  If it
    @@ -1534,7 +1539,7 @@ Value *ForExprAST::Codegen() {
         if (StepVal == 0) return 0;
       } else {
         // If not specified, use 1.0.
    -    StepVal = ConstantFP::get(APFloat(1.0));
    +    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
       }
       
       Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
    @@ -1545,12 +1550,12 @@ Value *ForExprAST::Codegen() {
       
       // Convert condition to a bool by comparing equal to 0.0.
       EndCond = Builder.CreateFCmpONE(EndCond, 
    -                                  ConstantFP::get(APFloat(0.0)),
    +                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                       "loopcond");
       
       // Create the "after loop" block and insert it.
       BasicBlock *LoopEndBB = Builder.GetInsertBlock();
    -  BasicBlock *AfterBB = BasicBlock::Create("afterloop", TheFunction);
    +  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
       
       // Insert the conditional branch into the end of LoopEndBB.
       Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
    @@ -1569,13 +1574,15 @@ Value *ForExprAST::Codegen() {
     
       
       // for expr always returns 0.0.
    -  return Constant::getNullValue(Type::DoubleTy);
    +  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
     }
     
     Function *PrototypeAST::Codegen() {
       // Make the function type:  double(double,double) etc.
    -  std::vector<const Type*> Doubles(Args.size(), Type::DoubleTy);
    -  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
    +  std::vector<const Type*> Doubles(Args.size(),
    +                                   Type::getDoubleTy(getGlobalContext()));
    +  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
    +                                       Doubles, false);
       
       Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
       
    @@ -1624,7 +1631,7 @@ Function *FunctionAST::Codegen() {
         BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
       
       // Create a new basic block to start insertion into.
    -  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
    +  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
       Builder.SetInsertPoint(BB);
       
       if (Value *RetVal = Body->Codegen()) {
    @@ -1679,7 +1686,7 @@ static void HandleExtern() {
     }
     
     static void HandleTopLevelExpression() {
    -  // Evaluate a top level expression into an anonymous function.
    +  // Evaluate a top-level expression into an anonymous function.
       if (FunctionAST *F = ParseTopLevelExpr()) {
         if (Function *LF = F->Codegen()) {
           // JIT the function, returning a function pointer.
    @@ -1687,7 +1694,7 @@ static void HandleTopLevelExpression() {
           
           // Cast it to the right type (takes no arguments, returns a double) so we
           // can call it as a native function.
    -      double (*FP)() = (double (*)())FPtr;
    +      double (*FP)() = (double (*)())(intptr_t)FPtr;
           fprintf(stderr, "Evaluated to %f\n", FP());
         }
       } else {
    @@ -1702,7 +1709,7 @@ static void MainLoop() {
         fprintf(stderr, "ready> ");
         switch (CurTok) {
         case tok_eof:    return;
    -    case ';':        getNextToken(); break;  // ignore top level semicolons.
    +    case ';':        getNextToken(); break;  // ignore top-level semicolons.
         case tok_def:    HandleDefinition(); break;
         case tok_extern: HandleExtern(); break;
         default:         HandleTopLevelExpression(); break;
    @@ -1710,8 +1717,6 @@ static void MainLoop() {
       }
     }
     
    -
    -
     //===----------------------------------------------------------------------===//
     // "Library" functions that can be "extern'd" from user code.
     //===----------------------------------------------------------------------===//
    @@ -1735,6 +1740,9 @@ double printd(double X) {
     //===----------------------------------------------------------------------===//
     
     int main() {
    +  InitializeNativeTarget();
    +  LLVMContext &Context = getGlobalContext();
    +
       // Install standard binary operators.
       // 1 is lowest precedence.
       BinopPrecedence['<'] = 10;
    @@ -1747,38 +1755,41 @@ int main() {
       getNextToken();
     
       // Make the module, which holds all the code.
    -  TheModule = new Module("my cool jit");
    -  
    -  // Create the JIT.
    -  TheExecutionEngine = ExecutionEngine::create(TheModule);
    +  TheModule = new Module("my cool jit", Context);
    +
    +  ExistingModuleProvider *OurModuleProvider =
    +      new ExistingModuleProvider(TheModule);
    +
    +  // Create the JIT.  This takes ownership of the module and module provider.
    +  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
    +
    +  FunctionPassManager OurFPM(OurModuleProvider);
    +
    +  // Set up the optimizer pipeline.  Start with registering info about how the
    +  // target lays out data structures.
    +  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    +  // Do simple "peephole" optimizations and bit-twiddling optzns.
    +  OurFPM.add(createInstructionCombiningPass());
    +  // Reassociate expressions.
    +  OurFPM.add(createReassociatePass());
    +  // Eliminate Common SubExpressions.
    +  OurFPM.add(createGVNPass());
    +  // Simplify the control flow graph (deleting unreachable blocks, etc).
    +  OurFPM.add(createCFGSimplificationPass());
    +
    +  OurFPM.doInitialization();
    +
    +  // Set the global so the code gen can use this.
    +  TheFPM = &OurFPM;
    +
    +  // Run the main "interpreter loop" now.
    +  MainLoop();
    +
    +  TheFPM = 0;
    +
    +  // Print out all of the generated code.
    +  TheModule->dump();
     
    -  {
    -    ExistingModuleProvider OurModuleProvider(TheModule);
    -    FunctionPassManager OurFPM(&OurModuleProvider);
    -      
    -    // Set up the optimizer pipeline.  Start with registering info about how the
    -    // target lays out data structures.
    -    OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    -    // Do simple "peephole" optimizations and bit-twiddling optzns.
    -    OurFPM.add(createInstructionCombiningPass());
    -    // Reassociate expressions.
    -    OurFPM.add(createReassociatePass());
    -    // Eliminate Common SubExpressions.
    -    OurFPM.add(createGVNPass());
    -    // Simplify the control flow graph (deleting unreachable blocks, etc).
    -    OurFPM.add(createCFGSimplificationPass());
    -    // Set the global so the code gen can use this.
    -    TheFPM = &OurFPM;
    -
    -    // Run the main "interpreter loop" now.
    -    MainLoop();
    -    
    -    TheFPM = 0;
    -    
    -    // Print out all of the generated code.
    -    TheModule->dump();
    -  }  // Free module provider (and thus the module) and pass manager.
    -  
       return 0;
     }
     
    diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html index f5606484eb9ff..ec07fa88d4b14 100644 --- a/docs/tutorial/LangImpl7.html +++ b/docs/tutorial/LangImpl7.html @@ -424,7 +424,8 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, const std::string &VarName) { IRBuilder<> TmpB(&TheFunction->getEntryBlock(), TheFunction->getEntryBlock().begin()); - return TmpB.CreateAlloca(Type::DoubleTy, 0, VarName.c_str()); + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + VarName.c_str()); } @@ -923,7 +924,7 @@ that we replace in OldBindings.

    InitVal = Init->Codegen(); if (InitVal == 0) return 0; } else { // If not specified, use 0.0. - InitVal = ConstantFP::get(APFloat(0.0)); + InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); } AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); @@ -1003,11 +1004,15 @@ variables and var/in support. To build this example, use:
     #include "llvm/DerivedTypes.h"
     #include "llvm/ExecutionEngine/ExecutionEngine.h"
    +#include "llvm/ExecutionEngine/Interpreter.h"
    +#include "llvm/ExecutionEngine/JIT.h"
    +#include "llvm/LLVMContext.h"
     #include "llvm/Module.h"
     #include "llvm/ModuleProvider.h"
     #include "llvm/PassManager.h"
     #include "llvm/Analysis/Verifier.h"
     #include "llvm/Target/TargetData.h"
    +#include "llvm/Target/TargetSelect.h"
     #include "llvm/Transforms/Scalar.h"
     #include "llvm/Support/IRBuilder.h"
     #include <cstdio>
    @@ -1192,7 +1197,8 @@ public:
     };
     
     /// PrototypeAST - This class represents the "prototype" for a function,
    -/// which captures its argument names as well as if it is an operator.
    +/// which captures its name, and its argument names (thus implicitly the number
    +/// of arguments the function takes), as well as if it is an operator.
     class PrototypeAST {
       std::string Name;
       std::vector<std::string> Args;
    @@ -1234,7 +1240,7 @@ public:
     //===----------------------------------------------------------------------===//
     
     /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
    -/// token the parser it looking at.  getNextToken reads another token from the
    +/// token the parser is looking at.  getNextToken reads another token from the
     /// lexer and updates CurTok with its results.
     static int CurTok;
     static int getNextToken() {
    @@ -1282,9 +1288,9 @@ static ExprAST *ParseIdentifierExpr() {
           ExprAST *Arg = ParseExpression();
           if (!Arg) return 0;
           Args.push_back(Arg);
    -      
    +
           if (CurTok == ')') break;
    -      
    +
           if (CurTok != ',')
             return Error("Expected ')' or ',' in argument list");
           getNextToken();
    @@ -1429,7 +1435,6 @@ static ExprAST *ParseVarExpr() {
       return new VarExprAST(VarNames, Body);
     }
     
    -
     /// primary
     ///   ::= identifierexpr
     ///   ::= numberexpr
    @@ -1515,7 +1520,7 @@ static ExprAST *ParseExpression() {
     static PrototypeAST *ParsePrototype() {
       std::string FnName;
       
    -  int Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
    +  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
       unsigned BinaryPrecedence = 30;
       
       switch (CurTok) {
    @@ -1605,7 +1610,7 @@ static PrototypeAST *ParseExtern() {
     //===----------------------------------------------------------------------===//
     
     static Module *TheModule;
    -static IRBuilder<> Builder;
    +static IRBuilder<> Builder(getGlobalContext());
     static std::map<std::string, AllocaInst*> NamedValues;
     static FunctionPassManager *TheFPM;
     
    @@ -1617,12 +1622,12 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                               const std::string &VarName) {
       IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
                      TheFunction->getEntryBlock().begin());
    -  return TmpB.CreateAlloca(Type::DoubleTy, 0, VarName.c_str());
    +  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
    +                           VarName.c_str());
     }
     
    -
     Value *NumberExprAST::Codegen() {
    -  return ConstantFP::get(APFloat(Val));
    +  return ConstantFP::get(getGlobalContext(), APFloat(Val));
     }
     
     Value *VariableExprAST::Codegen() {
    @@ -1645,7 +1650,6 @@ Value *UnaryExprAST::Codegen() {
       return Builder.CreateCall(F, OperandV, "unop");
     }
     
    -
     Value *BinaryExprAST::Codegen() {
       // Special case '=' because we don't want to emit the LHS as an expression.
       if (Op == '=') {
    @@ -1665,7 +1669,6 @@ Value *BinaryExprAST::Codegen() {
         return Val;
       }
       
    -  
       Value *L = LHS->Codegen();
       Value *R = RHS->Codegen();
       if (L == 0 || R == 0) return 0;
    @@ -1677,7 +1680,8 @@ Value *BinaryExprAST::Codegen() {
       case '<':
         L = Builder.CreateFCmpULT(L, R, "cmptmp");
         // Convert bool 0/1 to double 0.0 or 1.0
    -    return Builder.CreateUIToFP(L, Type::DoubleTy, "booltmp");
    +    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
    +                                "booltmp");
       default: break;
       }
       
    @@ -1715,16 +1719,16 @@ Value *IfExprAST::Codegen() {
       
       // Convert condition to a bool by comparing equal to 0.0.
       CondV = Builder.CreateFCmpONE(CondV, 
    -                                ConstantFP::get(APFloat(0.0)),
    +                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                     "ifcond");
       
       Function *TheFunction = Builder.GetInsertBlock()->getParent();
       
       // Create blocks for the then and else cases.  Insert the 'then' block at the
       // end of the function.
    -  BasicBlock *ThenBB = BasicBlock::Create("then", TheFunction);
    -  BasicBlock *ElseBB = BasicBlock::Create("else");
    -  BasicBlock *MergeBB = BasicBlock::Create("ifcont");
    +  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
    +  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
    +  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
       
       Builder.CreateCondBr(CondV, ThenBB, ElseBB);
       
    @@ -1752,7 +1756,8 @@ Value *IfExprAST::Codegen() {
       // Emit merge block.
       TheFunction->getBasicBlockList().push_back(MergeBB);
       Builder.SetInsertPoint(MergeBB);
    -  PHINode *PN = Builder.CreatePHI(Type::DoubleTy, "iftmp");
    +  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
    +                                  "iftmp");
       
       PN->addIncoming(ThenV, ThenBB);
       PN->addIncoming(ElseV, ElseBB);
    @@ -1794,8 +1799,7 @@ Value *ForExprAST::Codegen() {
       
       // Make the new basic block for the loop header, inserting after current
       // block.
    -  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
    -  BasicBlock *LoopBB = BasicBlock::Create("loop", TheFunction);
    +  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
       
       // Insert an explicit fall through from the current block to the LoopBB.
       Builder.CreateBr(LoopBB);
    @@ -1821,7 +1825,7 @@ Value *ForExprAST::Codegen() {
         if (StepVal == 0) return 0;
       } else {
         // If not specified, use 1.0.
    -    StepVal = ConstantFP::get(APFloat(1.0));
    +    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
       }
       
       // Compute the end condition.
    @@ -1836,12 +1840,11 @@ Value *ForExprAST::Codegen() {
       
       // Convert condition to a bool by comparing equal to 0.0.
       EndCond = Builder.CreateFCmpONE(EndCond, 
    -                                  ConstantFP::get(APFloat(0.0)),
    +                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                       "loopcond");
       
       // Create the "after loop" block and insert it.
    -  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
    -  BasicBlock *AfterBB = BasicBlock::Create("afterloop", TheFunction);
    +  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
       
       // Insert the conditional branch into the end of LoopEndBB.
       Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
    @@ -1857,7 +1860,7 @@ Value *ForExprAST::Codegen() {
     
       
       // for expr always returns 0.0.
    -  return Constant::getNullValue(Type::DoubleTy);
    +  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
     }
     
     Value *VarExprAST::Codegen() {
    @@ -1880,7 +1883,7 @@ Value *VarExprAST::Codegen() {
           InitVal = Init->Codegen();
           if (InitVal == 0) return 0;
         } else { // If not specified, use 0.0.
    -      InitVal = ConstantFP::get(APFloat(0.0));
    +      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
         }
         
         AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
    @@ -1906,11 +1909,12 @@ Value *VarExprAST::Codegen() {
       return BodyVal;
     }
     
    -
     Function *PrototypeAST::Codegen() {
       // Make the function type:  double(double,double) etc.
    -  std::vector<const Type*> Doubles(Args.size(), Type::DoubleTy);
    -  FunctionType *FT = FunctionType::get(Type::DoubleTy, Doubles, false);
    +  std::vector<const Type*> Doubles(Args.size(),
    +                                   Type::getDoubleTy(getGlobalContext()));
    +  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
    +                                       Doubles, false);
       
       Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
       
    @@ -1959,7 +1963,6 @@ void PrototypeAST::CreateArgumentAllocas(Function *F) {
       }
     }
     
    -
     Function *FunctionAST::Codegen() {
       NamedValues.clear();
       
    @@ -1972,12 +1975,12 @@ Function *FunctionAST::Codegen() {
         BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
       
       // Create a new basic block to start insertion into.
    -  BasicBlock *BB = BasicBlock::Create("entry", TheFunction);
    +  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
       Builder.SetInsertPoint(BB);
       
       // Add all arguments to the symbol table and create their allocas.
       Proto->CreateArgumentAllocas(TheFunction);
    -  
    +
       if (Value *RetVal = Body->Codegen()) {
         // Finish off the function.
         Builder.CreateRet(RetVal);
    @@ -2030,7 +2033,7 @@ static void HandleExtern() {
     }
     
     static void HandleTopLevelExpression() {
    -  // Evaluate a top level expression into an anonymous function.
    +  // Evaluate a top-level expression into an anonymous function.
       if (FunctionAST *F = ParseTopLevelExpr()) {
         if (Function *LF = F->Codegen()) {
           // JIT the function, returning a function pointer.
    @@ -2038,7 +2041,7 @@ static void HandleTopLevelExpression() {
           
           // Cast it to the right type (takes no arguments, returns a double) so we
           // can call it as a native function.
    -      double (*FP)() = (double (*)())FPtr;
    +      double (*FP)() = (double (*)())(intptr_t)FPtr;
           fprintf(stderr, "Evaluated to %f\n", FP());
         }
       } else {
    @@ -2053,7 +2056,7 @@ static void MainLoop() {
         fprintf(stderr, "ready> ");
         switch (CurTok) {
         case tok_eof:    return;
    -    case ';':        getNextToken(); break;  // ignore top level semicolons.
    +    case ';':        getNextToken(); break;  // ignore top-level semicolons.
         case tok_def:    HandleDefinition(); break;
         case tok_extern: HandleExtern(); break;
         default:         HandleTopLevelExpression(); break;
    @@ -2061,8 +2064,6 @@ static void MainLoop() {
       }
     }
     
    -
    -
     //===----------------------------------------------------------------------===//
     // "Library" functions that can be "extern'd" from user code.
     //===----------------------------------------------------------------------===//
    @@ -2086,6 +2087,9 @@ double printd(double X) {
     //===----------------------------------------------------------------------===//
     
     int main() {
    +  InitializeNativeTarget();
    +  LLVMContext &Context = getGlobalContext();
    +
       // Install standard binary operators.
       // 1 is lowest precedence.
       BinopPrecedence['='] = 2;
    @@ -2099,42 +2103,43 @@ int main() {
       getNextToken();
     
       // Make the module, which holds all the code.
    -  TheModule = new Module("my cool jit");
    -  
    -  // Create the JIT.
    -  TheExecutionEngine = ExecutionEngine::create(TheModule);
    +  TheModule = new Module("my cool jit", Context);
     
    -  {
    -    ExistingModuleProvider OurModuleProvider(TheModule);
    -    FunctionPassManager OurFPM(&OurModuleProvider);
    -      
    -    // Set up the optimizer pipeline.  Start with registering info about how the
    -    // target lays out data structures.
    -    OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    -    // Promote allocas to registers.
    -    OurFPM.add(createPromoteMemoryToRegisterPass());
    -    // Do simple "peephole" optimizations and bit-twiddling optzns.
    -    OurFPM.add(createInstructionCombiningPass());
    -    // Reassociate expressions.
    -    OurFPM.add(createReassociatePass());
    -    // Eliminate Common SubExpressions.
    -    OurFPM.add(createGVNPass());
    -    // Simplify the control flow graph (deleting unreachable blocks, etc).
    -    OurFPM.add(createCFGSimplificationPass());
    +  ExistingModuleProvider *OurModuleProvider =
    +      new ExistingModuleProvider(TheModule);
     
    -    // Set the global so the code gen can use this.
    -    TheFPM = &OurFPM;
    +  // Create the JIT.  This takes ownership of the module and module provider.
    +  TheExecutionEngine = EngineBuilder(OurModuleProvider).create();
    +
    +  FunctionPassManager OurFPM(OurModuleProvider);
    +
    +  // Set up the optimizer pipeline.  Start with registering info about how the
    +  // target lays out data structures.
    +  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
    +  // Promote allocas to registers.
    +  OurFPM.add(createPromoteMemoryToRegisterPass());
    +  // Do simple "peephole" optimizations and bit-twiddling optzns.
    +  OurFPM.add(createInstructionCombiningPass());
    +  // Reassociate expressions.
    +  OurFPM.add(createReassociatePass());
    +  // Eliminate Common SubExpressions.
    +  OurFPM.add(createGVNPass());
    +  // Simplify the control flow graph (deleting unreachable blocks, etc).
    +  OurFPM.add(createCFGSimplificationPass());
    +
    +  OurFPM.doInitialization();
    +
    +  // Set the global so the code gen can use this.
    +  TheFPM = &OurFPM;
    +
    +  // Run the main "interpreter loop" now.
    +  MainLoop();
    +
    +  TheFPM = 0;
    +
    +  // Print out all of the generated code.
    +  TheModule->dump();
     
    -    // Run the main "interpreter loop" now.
    -    MainLoop();
    -    
    -    TheFPM = 0;
    -    
    -    // Print out all of the generated code.
    -    TheModule->dump();
    -    
    -  }  // Free module provider (and thus the module) and pass manager.
    -  
       return 0;
     }
     
    diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html index 9caae435e29e1..a59887522ee50 100644 --- a/docs/tutorial/OCamlLangImpl3.html +++ b/docs/tutorial/OCamlLangImpl3.html @@ -95,8 +95,8 @@ an undeclared parameter):

     exception Error of string
     
    -let the_module = create_module "my cool jit"
    -let builder = builder ()
    +let the_module = create_module (global_context ()) "my cool jit"
    +let builder = builder (global_context ())
     let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
     
    @@ -159,7 +159,7 @@ uses "the foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".

    References to variables are also quite simple using LLVM. In the simple -version of Kaleidoscope, we assume that the variable has already been emited +version of Kaleidoscope, we assume that the variable has already been emitted somewhere and its value is available. In practice, the only values that can be in the Codegen.named_values map are function arguments. This code simply checks to see that the specified name is in the map (if not, an unknown @@ -323,7 +323,7 @@ code above.

    first, we want to allow 'extern'ing a function more than once, as long as the prototypes for the externs match (since all arguments have the same type, we just have to check that the number of arguments match). Second, we want to -allow 'extern'ing a function and then definining a body for it. This is useful +allow 'extern'ing a function and then defining a body for it. This is useful when defining mutually recursive functions.

    @@ -899,8 +899,9 @@ open Llvm exception Error of string -let the_module = create_module "my cool jit" -let builder = builder () +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 let rec codegen_expr = function diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html index ffa85d51dfb78..26f253249bb1c 100644 --- a/docs/tutorial/OCamlLangImpl4.html +++ b/docs/tutorial/OCamlLangImpl4.html @@ -206,6 +206,8 @@ add a set of optimizations to run. The code looks like this:

    (* Simplify the control flow graph (deleting unreachable blocks, etc). *) add_cfg_simplification the_fpm; + ignore (PassManager.initialize the_fpm); + (* Run the main "interpreter loop" now. *) Toplevel.main_loop the_fpm the_execution_engine stream; @@ -222,7 +224,7 @@ requires a pointer to the the_module (through the the_module_provider) to construct itself. Once it is set up, we use a series of "add" calls to add a bunch of LLVM passes. The first pass is basically boilerplate, it adds a pass so that later optimizations know how the -data structures in the program are layed out. The +data structures in the program are laid out. The "the_execution_engine" variable is related to the JIT, which we will get to in the next section.

    @@ -795,8 +797,9 @@ open Llvm exception Error of string -let the_module = create_module "my cool jit" -let builder = builder () +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 let rec codegen_expr = function @@ -959,6 +962,8 @@ open Llvm_target open Llvm_scalar_opts let main () = + ignore (initialize_native_target ()); + (* Install standard binary operators. * 1 is the lowest precedence. *) Hashtbl.add Parser.binop_precedence '<' 10; @@ -991,6 +996,8 @@ let main () = (* Simplify the control flow graph (deleting unreachable blocks, etc). *) add_cfg_simplification the_fpm; + ignore (PassManager.initialize the_fpm); + (* Run the main "interpreter loop" now. *) Toplevel.main_loop the_fpm the_execution_engine stream; diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html index 594a77d164803..f19e900c00bdd 100644 --- a/docs/tutorial/OCamlLangImpl5.html +++ b/docs/tutorial/OCamlLangImpl5.html @@ -271,8 +271,8 @@ into "t.ll" and run "llvm-as < t.ll | opt -analyze -view-cfg", a window will pop up and you'll see this graph:

    -
    Example CFG
    +
    Example CFG

    Another way to get this is to call "Llvm_analysis.view_function_cfg f" or "Llvm_analysis.view_function_cfg_only f" (where f @@ -1200,8 +1200,9 @@ open Llvm exception Error of string -let the_module = create_module "my cool jit" -let builder = builder () +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 let rec codegen_expr = function @@ -1486,6 +1487,8 @@ open Llvm_target open Llvm_scalar_opts let main () = + ignore (initialize_native_target ()); + (* Install standard binary operators. * 1 is the lowest precedence. *) Hashtbl.add Parser.binop_precedence '<' 10; @@ -1518,6 +1521,8 @@ let main () = (* Simplify the control flow graph (deleting unreachable blocks, etc). *) add_cfg_simplification the_fpm; + ignore (PassManager.initialize the_fpm); + (* Run the main "interpreter loop" now. *) Toplevel.main_loop the_fpm the_execution_engine stream; diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html index 780cab819142a..2edb22edf632a 100644 --- a/docs/tutorial/OCamlLangImpl6.html +++ b/docs/tutorial/OCamlLangImpl6.html @@ -1173,8 +1173,9 @@ open Llvm exception Error of string -let the_module = create_module "my cool jit" -let builder = builder () +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 let rec codegen_expr = function @@ -1485,6 +1486,8 @@ open Llvm_target open Llvm_scalar_opts let main () = + ignore (initialize_native_target ()); + (* Install standard binary operators. * 1 is the lowest precedence. *) Hashtbl.add Parser.binop_precedence '<' 10; @@ -1517,6 +1520,8 @@ let main () = (* Simplify the control flow graph (deleting unreachable blocks, etc). *) add_cfg_simplification the_fpm; + ignore (PassManager.initialize the_fpm); + (* Run the main "interpreter loop" now. *) Toplevel.main_loop the_fpm the_execution_engine stream; diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html index abda44011cab1..07768214b9a5a 100644 --- a/docs/tutorial/OCamlLangImpl7.html +++ b/docs/tutorial/OCamlLangImpl7.html @@ -1384,14 +1384,15 @@ open Llvm exception Error of string -let the_module = create_module "my cool jit" -let builder = builder () +let context = global_context () +let the_module = create_module context "my cool jit" +let builder = builder context let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10 (* Create an alloca instruction in the entry block of the function. This * is used for mutable variables etc. *) let create_entry_block_alloca the_function var_name = - let builder = builder_at (instr_begin (entry_block the_function)) in + let builder = builder_at context (instr_begin (entry_block the_function)) in build_alloca double_type var_name builder let rec codegen_expr = function @@ -1815,6 +1816,8 @@ open Llvm_target open Llvm_scalar_opts let main () = + ignore (initialize_native_target ()); + (* Install standard binary operators. * 1 is the lowest precedence. *) Hashtbl.add Parser.binop_precedence '=' 2; @@ -1851,6 +1854,8 @@ let main () = (* Simplify the control flow graph (deleting unreachable blocks, etc). *) add_cfg_simplification the_fpm; + ignore (PassManager.initialize the_fpm); + (* Run the main "interpreter loop" now. *) Toplevel.main_loop the_fpm the_execution_engine stream; diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp index d3261d79965cc..5cf2b883bc480 100644 --- a/examples/BrainF/BrainF.cpp +++ b/examples/BrainF/BrainF.cpp @@ -43,7 +43,7 @@ Module *BrainF::parse(std::istream *in1, int mem, CompileFlags cf, comflag = cf; header(Context); - readloop(0, 0, 0); + readloop(0, 0, 0, Context); delete builder; return module; } @@ -54,39 +54,39 @@ void BrainF::header(LLVMContext& C) { //Function prototypes //declare void @llvm.memset.i32(i8 *, i8, i32, i32) - const Type *Tys[] = { Type::Int32Ty }; + const Type *Tys[] = { Type::getInt32Ty(C) }; Function *memset_func = Intrinsic::getDeclaration(module, Intrinsic::memset, Tys, 1); //declare i32 @getchar() getchar_func = cast(module-> - getOrInsertFunction("getchar", IntegerType::Int32Ty, NULL)); + getOrInsertFunction("getchar", IntegerType::getInt32Ty(C), NULL)); //declare i32 @putchar(i32) putchar_func = cast(module-> - getOrInsertFunction("putchar", IntegerType::Int32Ty, - IntegerType::Int32Ty, NULL)); + getOrInsertFunction("putchar", IntegerType::getInt32Ty(C), + IntegerType::getInt32Ty(C), NULL)); //Function header //define void @brainf() brainf_func = cast(module-> - getOrInsertFunction("brainf", Type::VoidTy, NULL)); + getOrInsertFunction("brainf", Type::getVoidTy(C), NULL)); - builder = new IRBuilder<>(BasicBlock::Create(label, brainf_func)); + builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func)); //%arr = malloc i8, i32 %d - ConstantInt *val_mem = ConstantInt::get(APInt(32, memtotal)); - ptr_arr = builder->CreateMalloc(IntegerType::Int8Ty, val_mem, "arr"); + ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal)); + ptr_arr = builder->CreateMalloc(IntegerType::getInt8Ty(C), val_mem, "arr"); //call void @llvm.memset.i32(i8 *%arr, i8 0, i32 %d, i32 1) { Value *memset_params[] = { ptr_arr, - ConstantInt::get(APInt(8, 0)), + ConstantInt::get(C, APInt(8, 0)), val_mem, - ConstantInt::get(APInt(32, 1)) + ConstantInt::get(C, APInt(32, 1)) }; CallInst *memset_call = builder-> @@ -97,12 +97,12 @@ void BrainF::header(LLVMContext& C) { //%arrmax = getelementptr i8 *%arr, i32 %d if (comflag & flag_arraybounds) { ptr_arrmax = builder-> - CreateGEP(ptr_arr, ConstantInt::get(APInt(32, memtotal)), "arrmax"); + CreateGEP(ptr_arr, ConstantInt::get(C, APInt(32, memtotal)), "arrmax"); } //%head.%d = getelementptr i8 *%arr, i32 %d curhead = builder->CreateGEP(ptr_arr, - ConstantInt::get(APInt(32, memtotal/2)), + ConstantInt::get(C, APInt(32, memtotal/2)), headreg); @@ -110,13 +110,13 @@ void BrainF::header(LLVMContext& C) { //Function footer //brainf.end: - endbb = BasicBlock::Create(label, brainf_func); + endbb = BasicBlock::Create(C, label, brainf_func); //free i8 *%arr new FreeInst(ptr_arr, endbb); //ret void - ReturnInst::Create(endbb); + ReturnInst::Create(C, endbb); @@ -124,28 +124,28 @@ void BrainF::header(LLVMContext& C) { if (comflag & flag_arraybounds) { //@aberrormsg = internal constant [%d x i8] c"\00" - Constant *msg_0 = ConstantArray:: - get("Error: The head has left the tape.", true); + Constant *msg_0 = + ConstantArray::get(C, "Error: The head has left the tape.", true); GlobalVariable *aberrormsg = new GlobalVariable( + *module, msg_0->getType(), true, GlobalValue::InternalLinkage, msg_0, - "aberrormsg", - module); + "aberrormsg"); //declare i32 @puts(i8 *) Function *puts_func = cast(module-> - getOrInsertFunction("puts", IntegerType::Int32Ty, - PointerType::getUnqual(IntegerType::Int8Ty), NULL)); + getOrInsertFunction("puts", IntegerType::getInt32Ty(C), + PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL)); //brainf.aberror: - aberrorbb = BasicBlock::Create(label, brainf_func); + aberrorbb = BasicBlock::Create(C, label, brainf_func); //call i32 @puts(i8 *getelementptr([%d x i8] *@aberrormsg, i32 0, i32 0)) { - Constant *zero_32 = Constant::getNullValue(IntegerType::Int32Ty); + Constant *zero_32 = Constant::getNullValue(IntegerType::getInt32Ty(C)); Constant *gep_params[] = { zero_32, @@ -172,7 +172,8 @@ void BrainF::header(LLVMContext& C) { } } -void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { +void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb, + LLVMContext &C) { Symbol cursym = SYM_NONE; int curvalue = 0; Symbol nextsym = SYM_NONE; @@ -197,7 +198,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { //%tape.%d = trunc i32 %tape.%d to i8 Value *tape_1 = builder-> - CreateTrunc(tape_0, IntegerType::Int8Ty, tapereg); + CreateTrunc(tape_0, IntegerType::getInt8Ty(C), tapereg); //store i8 %tape.%d, i8 *%head.%d builder->CreateStore(tape_1, curhead); @@ -211,7 +212,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { //%tape.%d = sext i8 %tape.%d to i32 Value *tape_1 = builder-> - CreateSExt(tape_0, IntegerType::Int32Ty, tapereg); + CreateSExt(tape_0, IntegerType::getInt32Ty(C), tapereg); //call i32 @putchar(i32 %tape.%d) Value *putchar_params[] = { @@ -228,7 +229,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { { //%head.%d = getelementptr i8 *%head.%d, i32 %d curhead = builder-> - CreateGEP(curhead, ConstantInt::get(APInt(32, curvalue)), + CreateGEP(curhead, ConstantInt::get(C, APInt(32, curvalue)), headreg); //Error block for array out of bounds @@ -247,7 +248,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { CreateOr(test_0, test_1, testreg); //br i1 %test.%d, label %main.%d, label %main.%d - BasicBlock *nextbb = BasicBlock::Create(label, brainf_func); + BasicBlock *nextbb = BasicBlock::Create(C, label, brainf_func); builder->CreateCondBr(test_2, aberrorbb, nextbb); //main.%d: @@ -263,7 +264,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { //%tape.%d = add i8 %tape.%d, %d Value *tape_1 = builder-> - CreateAdd(tape_0, ConstantInt::get(APInt(8, curvalue)), tapereg); + CreateAdd(tape_0, ConstantInt::get(C, APInt(8, curvalue)), tapereg); //store i8 %tape.%d, i8 *%head.%d\n" builder->CreateStore(tape_1, curhead); @@ -273,23 +274,23 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { case SYM_LOOP: { //br label %main.%d - BasicBlock *testbb = BasicBlock::Create(label, brainf_func); + BasicBlock *testbb = BasicBlock::Create(C, label, brainf_func); builder->CreateBr(testbb); //main.%d: BasicBlock *bb_0 = builder->GetInsertBlock(); - BasicBlock *bb_1 = BasicBlock::Create(label, brainf_func); + BasicBlock *bb_1 = BasicBlock::Create(C, label, brainf_func); builder->SetInsertPoint(bb_1); // Make part of PHI instruction now, wait until end of loop to finish PHINode *phi_0 = - PHINode::Create(PointerType::getUnqual(IntegerType::Int8Ty), + PHINode::Create(PointerType::getUnqual(IntegerType::getInt8Ty(C)), headreg, testbb); phi_0->reserveOperandSpace(2); phi_0->addIncoming(curhead, bb_0); curhead = phi_0; - readloop(phi_0, bb_1, testbb); + readloop(phi_0, bb_1, testbb, C); } break; @@ -427,12 +428,11 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { LoadInst *tape_0 = new LoadInst(head_0, tapereg, testbb); //%test.%d = icmp eq i8 %tape.%d, 0 - ICmpInst *test_0 = new ICmpInst(ICmpInst::ICMP_EQ, tape_0, - ConstantInt::get(APInt(8, 0)), testreg, - testbb); + ICmpInst *test_0 = new ICmpInst(*testbb, ICmpInst::ICMP_EQ, tape_0, + ConstantInt::get(C, APInt(8, 0)), testreg); //br i1 %test.%d, label %main.%d, label %main.%d - BasicBlock *bb_0 = BasicBlock::Create(label, brainf_func); + BasicBlock *bb_0 = BasicBlock::Create(C, label, brainf_func); BranchInst::Create(bb_0, oldbb, test_0, testbb); //main.%d: @@ -440,7 +440,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb) { //%head.%d = phi i8 *[%head.%d, %main.%d] PHINode *phi_1 = builder-> - CreatePHI(PointerType::getUnqual(IntegerType::Int8Ty), headreg); + CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), headreg); phi_1->reserveOperandSpace(1); phi_1->addIncoming(head_0, testbb); curhead = phi_1; diff --git a/examples/BrainF/BrainF.h b/examples/BrainF/BrainF.h index 053ddaa587554..add0687d54a63 100644 --- a/examples/BrainF/BrainF.h +++ b/examples/BrainF/BrainF.h @@ -70,7 +70,8 @@ class BrainF { /// The main loop for parsing. It calls itself recursively /// to handle the depth of nesting of "[]". - void readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb); + void readloop(PHINode *phi, BasicBlock *oldbb, + BasicBlock *testbb, LLVMContext &Context); /// Constants during parsing int memtotal; diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp index 4eaa4940e70ab..6f4ba69927ce1 100644 --- a/examples/BrainF/BrainFDriver.cpp +++ b/examples/BrainF/BrainFDriver.cpp @@ -32,11 +32,12 @@ #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JIT.h" +#include "llvm/Target/TargetSelect.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Target/TargetSelect.h" -#include +#include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm; //Command line options @@ -58,9 +59,10 @@ JIT("jit", cl::desc("Run program Just-In-Time")); void addMainFunction(Module *mod) { //define i32 @main(i32 %argc, i8 **%argv) Function *main_func = cast(mod-> - getOrInsertFunction("main", IntegerType::Int32Ty, IntegerType::Int32Ty, + getOrInsertFunction("main", IntegerType::getInt32Ty(mod->getContext()), + IntegerType::getInt32Ty(mod->getContext()), PointerType::getUnqual(PointerType::getUnqual( - IntegerType::Int8Ty)), NULL)); + IntegerType::getInt8Ty(mod->getContext()))), NULL)); { Function::arg_iterator args = main_func->arg_begin(); Value *arg_0 = args++; @@ -70,7 +72,7 @@ void addMainFunction(Module *mod) { } //main.0: - BasicBlock *bb = BasicBlock::Create("main.0", main_func); + BasicBlock *bb = BasicBlock::Create(mod->getContext(), "main.0", main_func); //call void @brainf() { @@ -80,59 +82,58 @@ void addMainFunction(Module *mod) { } //ret i32 0 - ReturnInst::Create(ConstantInt::get(APInt(32, 0)), bb); + ReturnInst::Create(mod->getContext(), + ConstantInt::get(mod->getContext(), APInt(32, 0)), bb); } int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, " BrainF compiler\n"); - LLVMContext Context; + LLVMContext &Context = getGlobalContext(); if (InputFilename == "") { - std::cerr<<"Error: You must specify the filename of the program to " + errs() << "Error: You must specify the filename of the program to " "be compiled. Use --help to see the options.\n"; abort(); } //Get the output stream - std::ostream *out = &std::cout; + raw_ostream *out = &outs(); if (!JIT) { if (OutputFilename == "") { std::string base = InputFilename; - if (InputFilename == "-") {base = "a";} + if (InputFilename == "-") { base = "a"; } - //Use default filename - const char *suffix = ".bc"; - OutputFilename = base+suffix; + // Use default filename. + OutputFilename = base+".bc"; } if (OutputFilename != "-") { - out = new std:: - ofstream(OutputFilename.c_str(), - std::ios::out | std::ios::trunc | std::ios::binary); + std::string ErrInfo; + out = new raw_fd_ostream(OutputFilename.c_str(), ErrInfo, + raw_fd_ostream::F_Binary); } } //Get the input stream std::istream *in = &std::cin; - if (InputFilename != "-") { + if (InputFilename != "-") in = new std::ifstream(InputFilename.c_str()); - } //Gather the compile flags BrainF::CompileFlags cf = BrainF::flag_off; - if (ArrayBoundsChecking) { + if (ArrayBoundsChecking) cf = BrainF::CompileFlags(cf | BrainF::flag_arraybounds); - } //Read the BrainF program BrainF bf; Module *mod = bf.parse(in, 65536, cf, Context); //64 KiB - if (in != &std::cin) {delete in;} + if (in != &std::cin) + delete in; addMainFunction(mod); //Verify generated code if (verifyModule(*mod)) { - std::cerr<<"Error: module failed verification. This shouldn't happen.\n"; + errs() << "Error: module failed verification. This shouldn't happen.\n"; abort(); } @@ -140,9 +141,8 @@ int main(int argc, char **argv) { if (JIT) { InitializeNativeTarget(); - std::cout << "------- Running JIT -------\n"; - ExistingModuleProvider *mp = new ExistingModuleProvider(mod); - ExecutionEngine *ee = ExecutionEngine::create(mp, false); + outs() << "------- Running JIT -------\n"; + ExecutionEngine *ee = EngineBuilder(mod).create(); std::vector args; Function *brainf_func = mod->getFunction("brainf"); GenericValue gv = ee->runFunction(brainf_func, args); @@ -151,7 +151,8 @@ int main(int argc, char **argv) { } //Clean up - if (out != &std::cout) {delete out;} + if (out != &outs()) + delete out; delete mod; llvm_shutdown(); diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp index c3431fc3527e0..b1a4691a9f6cc 100644 --- a/examples/Fibonacci/fibonacci.cpp +++ b/examples/Fibonacci/fibonacci.cpp @@ -36,35 +36,36 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -static Function *CreateFibFunction(Module *M) { +static Function *CreateFibFunction(Module *M, LLVMContext &Context) { // Create the fib function and insert it into module M. This function is said // to return an int and take an int parameter. Function *FibF = - cast(M->getOrInsertFunction("fib", Type::Int32Ty, Type::Int32Ty, + cast(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), + Type::getInt32Ty(Context), (Type *)0)); // Add a basic block to the function. - BasicBlock *BB = BasicBlock::Create("EntryBlock", FibF); + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF); // Get pointers to the constants. - Value *One = ConstantInt::get(Type::Int32Ty, 1); - Value *Two = ConstantInt::get(Type::Int32Ty, 2); + Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1); + Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); // Get pointer to the integer argument of the add1 function... Argument *ArgX = FibF->arg_begin(); // Get the arg. ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. // Create the true_block. - BasicBlock *RetBB = BasicBlock::Create("return", FibF); + BasicBlock *RetBB = BasicBlock::Create(Context, "return", FibF); // Create an exit block. - BasicBlock* RecurseBB = BasicBlock::Create("recurse", FibF); + BasicBlock* RecurseBB = BasicBlock::Create(Context, "recurse", FibF); // Create the "if (arg <= 2) goto exitbb" - Value *CondInst = new ICmpInst(ICmpInst::ICMP_SLE, ArgX, Two, "cond", BB); + Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond"); BranchInst::Create(RetBB, RecurseBB, CondInst, BB); // Create: ret int 1 - ReturnInst::Create(One, RetBB); + ReturnInst::Create(Context, One, RetBB); // create fib(x-1) Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB); @@ -82,7 +83,7 @@ static Function *CreateFibFunction(Module *M) { "addresult", RecurseBB); // Create the return instruction and add it to the basic block - ReturnInst::Create(Sum, RecurseBB); + ReturnInst::Create(Context, Sum, RecurseBB); return FibF; } @@ -97,11 +98,10 @@ int main(int argc, char **argv) { Module *M = new Module("test", Context); // We are about to create the "fib" function: - Function *FibF = CreateFibFunction(M); + Function *FibF = CreateFibFunction(M, Context); // Now we going to create JIT - ExistingModuleProvider *MP = new ExistingModuleProvider(M); - ExecutionEngine *EE = ExecutionEngine::create(MP, false); + ExecutionEngine *EE = EngineBuilder(M).create(); errs() << "verifying... "; if (verifyModule(*M)) { diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp index 6734547916245..ec9c2e68541f5 100644 --- a/examples/HowToUseJIT/HowToUseJIT.cpp +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -61,15 +61,16 @@ int main() { // function will have a return type of "int" and take an argument of "int". // The '0' terminates the list of argument types. Function *Add1F = - cast(M->getOrInsertFunction("add1", Type::Int32Ty, Type::Int32Ty, + cast(M->getOrInsertFunction("add1", Type::getInt32Ty(Context), + Type::getInt32Ty(Context), (Type *)0)); // Add a basic block to the function. As before, it automatically inserts // because of the last argument. - BasicBlock *BB = BasicBlock::Create("EntryBlock", Add1F); + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", Add1F); // Get pointers to the constant `1'. - Value *One = ConstantInt::get(Type::Int32Ty, 1); + Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1); // Get pointers to the integer argument of the add1 function... assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg @@ -80,7 +81,7 @@ int main() { Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB); // Create the return instruction and add it to the basic block - ReturnInst::Create(Add, BB); + ReturnInst::Create(Context, Add, BB); // Now, function add1 is ready. @@ -88,24 +89,24 @@ int main() { // Now we going to create function `foo', which returns an int and takes no // arguments. Function *FooF = - cast(M->getOrInsertFunction("foo", Type::Int32Ty, (Type *)0)); + cast(M->getOrInsertFunction("foo", Type::getInt32Ty(Context), + (Type *)0)); // Add a basic block to the FooF function. - BB = BasicBlock::Create("EntryBlock", FooF); + BB = BasicBlock::Create(Context, "EntryBlock", FooF); // Get pointers to the constant `10'. - Value *Ten = ConstantInt::get(Type::Int32Ty, 10); + Value *Ten = ConstantInt::get(Type::getInt32Ty(Context), 10); // Pass Ten to the call call: CallInst *Add1CallRes = CallInst::Create(Add1F, Ten, "add1", BB); Add1CallRes->setTailCall(true); // Create the return instruction and add it to the basic block. - ReturnInst::Create(Add1CallRes, BB); + ReturnInst::Create(Context, Add1CallRes, BB); // Now we create the JIT. - ExistingModuleProvider* MP = new ExistingModuleProvider(M); - ExecutionEngine* EE = ExecutionEngine::create(MP, false); + ExecutionEngine* EE = EngineBuilder(M).create(); outs() << "We just constructed this LLVM module:\n\n" << *M; outs() << "\n\nRunning foo: "; diff --git a/examples/Kaleidoscope/CMakeLists.txt b/examples/Kaleidoscope/CMakeLists.txt index 9a18aae30534f..8c87ac50b7a4c 100644 --- a/examples/Kaleidoscope/CMakeLists.txt +++ b/examples/Kaleidoscope/CMakeLists.txt @@ -1,5 +1,6 @@ -set(LLVM_LINK_COMPONENTS core jit native) - -add_llvm_example(Kaleidoscope - toy.cpp - ) +add_subdirectory(Chapter2) +add_subdirectory(Chapter3) +add_subdirectory(Chapter4) +add_subdirectory(Chapter5) +add_subdirectory(Chapter6) +add_subdirectory(Chapter7) diff --git a/examples/Kaleidoscope/Chapter2/CMakeLists.txt b/examples/Kaleidoscope/Chapter2/CMakeLists.txt new file mode 100644 index 0000000000000..79f2b172d0df3 --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_example(Kaleidoscope-Ch2 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter2/Makefile b/examples/Kaleidoscope/Chapter2/Makefile new file mode 100644 index 0000000000000..1a9b94ce541e6 --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/Makefile @@ -0,0 +1,13 @@ +##===- examples/Kaleidoscope/Chapter2/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch2 +EXAMPLE_TOOL = 1 + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp new file mode 100644 index 0000000000000..f4f09d0b351a4 --- /dev/null +++ b/examples/Kaleidoscope/Chapter2/toy.cpp @@ -0,0 +1,398 @@ +#include +#include +#include +#include +#include + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; +public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector Args; +public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (ParseDefinition()) { + fprintf(stderr, "Parsed a function definition.\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (ParseExtern()) { + fprintf(stderr, "Parsed an extern\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (ParseTopLevelExpr()) { + fprintf(stderr, "Parsed a top-level expr\n"); + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter3/CMakeLists.txt b/examples/Kaleidoscope/Chapter3/CMakeLists.txt new file mode 100644 index 0000000000000..1af8db00a1722 --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core) + +add_llvm_example(Kaleidoscope-Ch3 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter3/Makefile b/examples/Kaleidoscope/Chapter3/Makefile new file mode 100644 index 0000000000000..4cc6948d80374 --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter3/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch3 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp new file mode 100644 index 0000000000000..73520d8fa953f --- /dev/null +++ b/examples/Kaleidoscope/Chapter3/toy.cpp @@ -0,0 +1,563 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Support/IRBuilder.h" +#include +#include +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; +public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector Args; +public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map NamedValues; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read top-level expression:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + // Run the main "interpreter loop" now. + MainLoop(); + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/examples/Kaleidoscope/Chapter4/CMakeLists.txt new file mode 100644 index 0000000000000..0d1ac533f02d4 --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch4 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter4/Makefile b/examples/Kaleidoscope/Chapter4/Makefile new file mode 100644 index 0000000000000..7bc742fb1e44a --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter4/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch4 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit interpreter native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp new file mode 100644 index 0000000000000..d136635e81142 --- /dev/null +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -0,0 +1,610 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include +#include +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; +public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector Args; +public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + ExistingModuleProvider *OurModuleProvider = + new ExistingModuleProvider(TheModule); + + // Create the JIT. This takes ownership of the module and module provider. + TheExecutionEngine = EngineBuilder(OurModuleProvider).create(); + + FunctionPassManager OurFPM(OurModuleProvider); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt new file mode 100644 index 0000000000000..2d75ad35923fa --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch5 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter5/Makefile b/examples/Kaleidoscope/Chapter5/Makefile new file mode 100644 index 0000000000000..5a8355d3153d1 --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter5/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch5 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit interpreter native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp new file mode 100644 index 0000000000000..c2613e36029b9 --- /dev/null +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -0,0 +1,855 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include +#include +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; +public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes). +class PrototypeAST { + std::string Name; + std::vector Args; +public: + PrototypeAST(const std::string &name, const std::vector &args) + : Name(name), Args(args) {} + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + } +} + +/// binoprhs +/// ::= ('+' primary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the primary expression after the binary operator. + ExprAST *RHS = ParsePrimary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= primary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParsePrimary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +static PrototypeAST *ParsePrototype() { + if (CurTok != tok_identifier) + return ErrorP("Expected function name in prototype"); + + std::string FnName = IdentifierStr; + getNextToken(); + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + return new PrototypeAST(FnName, ArgNames); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: return ErrorV("invalid binary operator"); + } +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // ... + // start = startexpr + // goto loop + // loop: + // variable = phi [start, loopheader], [nextvariable, loopend] + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // nextvariable = variable + step + // endcond = endexpr + // br endcond, loop, endloop + // outloop: + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + ExistingModuleProvider *OurModuleProvider = + new ExistingModuleProvider(TheModule); + + // Create the JIT. This takes ownership of the module and module provider. + TheExecutionEngine = EngineBuilder(OurModuleProvider).create(); + + FunctionPassManager OurFPM(OurModuleProvider); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt new file mode 100644 index 0000000000000..2e15a5f7dfc60 --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch6 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter6/Makefile b/examples/Kaleidoscope/Chapter6/Makefile new file mode 100644 index 0000000000000..de2d758728eb2 --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter6/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch6 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit interpreter native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp new file mode 100644 index 0000000000000..638a340d51aea --- /dev/null +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -0,0 +1,973 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include +#include +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10, + + // operators + tok_binary = -11, tok_unary = -12 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + virtual Value *Codegen(); +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; +public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; +public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. +public: + PrototypeAST(const std::string &name, const std::vector &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; +} + +/// binoprhs +/// ::= ('+' unary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= unary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + return V ? V : ErrorV("Unknown variable name"); +} + +Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::Codegen() { + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = { L, R }; + return Builder.CreateCall(F, Ops, Ops+2, "binop"); +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // ... + // start = startexpr + // goto loop + // loop: + // variable = phi [start, loopheader], [nextvariable, loopend] + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // nextvariable = variable + step + // endcond = endexpr + // br endcond, loop, endloop + // outloop: + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Make the new basic block for the loop header, inserting after current + // block. + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + BasicBlock *PreheaderBB = Builder.GetInsertBlock(); + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Start the PHI node with an entry for Start. + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Variable; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar"); + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *LoopEndBB = Builder.GetInsertBlock(); + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, LoopEndBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) { + AI->setName(Args[Idx]); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = AI; + } + + return F; +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (Proto->isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" +double printd(double X) { + printf("%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + ExistingModuleProvider *OurModuleProvider = + new ExistingModuleProvider(TheModule); + + // Create the JIT. This takes ownership of the module and module provider. + TheExecutionEngine = EngineBuilder(OurModuleProvider).create(); + + FunctionPassManager OurFPM(OurModuleProvider); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt new file mode 100644 index 0000000000000..9b8227c693409 --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS core jit interpreter native) + +add_llvm_example(Kaleidoscope-Ch7 + toy.cpp + ) diff --git a/examples/Kaleidoscope/Chapter7/Makefile b/examples/Kaleidoscope/Chapter7/Makefile new file mode 100644 index 0000000000000..9d2df6f02d074 --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/Makefile @@ -0,0 +1,15 @@ +##===- examples/Kaleidoscope/Chapter7/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +TOOLNAME = Kaleidoscope-Ch7 +EXAMPLE_TOOL = 1 + +LINK_COMPONENTS := core jit interpreter native + +include $(LEVEL)/Makefile.common diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp new file mode 100644 index 0000000000000..8b0c321c06c5d --- /dev/null +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -0,0 +1,1139 @@ +#include "llvm/DerivedTypes.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/ModuleProvider.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetSelect.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/IRBuilder.h" +#include +#include +#include +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, tok_extern = -3, + + // primary + tok_identifier = -4, tok_number = -5, + + // control + tok_if = -6, tok_then = -7, tok_else = -8, + tok_for = -9, tok_in = -10, + + // operators + tok_binary = -11, tok_unary = -12, + + // var definition + tok_var = -13 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") return tok_def; + if (IdentifierStr == "extern") return tok_extern; + if (IdentifierStr == "if") return tok_if; + if (IdentifierStr == "then") return tok_then; + if (IdentifierStr == "else") return tok_else; + if (IdentifierStr == "for") return tok_for; + if (IdentifierStr == "in") return tok_in; + if (IdentifierStr == "binary") return tok_binary; + if (IdentifierStr == "unary") return tok_unary; + if (IdentifierStr == "var") return tok_var; + return tok_identifier; + } + + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), 0); + return tok_number; + } + + if (LastChar == '#') { + // Comment until end of line. + do LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() {} + virtual Value *Codegen() = 0; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; +public: + NumberExprAST(double val) : Val(val) {} + virtual Value *Codegen(); +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; +public: + VariableExprAST(const std::string &name) : Name(name) {} + const std::string &getName() const { return Name; } + virtual Value *Codegen(); +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + ExprAST *Operand; +public: + UnaryExprAST(char opcode, ExprAST *operand) + : Opcode(opcode), Operand(operand) {} + virtual Value *Codegen(); +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + ExprAST *LHS, *RHS; +public: + BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) + : Op(op), LHS(lhs), RHS(rhs) {} + virtual Value *Codegen(); +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector Args; +public: + CallExprAST(const std::string &callee, std::vector &args) + : Callee(callee), Args(args) {} + virtual Value *Codegen(); +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + ExprAST *Cond, *Then, *Else; +public: + IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else) + : Cond(cond), Then(then), Else(_else) {} + virtual Value *Codegen(); +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + ExprAST *Start, *End, *Step, *Body; +public: + ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end, + ExprAST *step, ExprAST *body) + : VarName(varname), Start(start), End(end), Step(step), Body(body) {} + virtual Value *Codegen(); +}; + +/// VarExprAST - Expression class for var/in +class VarExprAST : public ExprAST { + std::vector > VarNames; + ExprAST *Body; +public: + VarExprAST(const std::vector > &varnames, + ExprAST *body) + : VarNames(varnames), Body(body) {} + + virtual Value *Codegen(); +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its argument names as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector Args; + bool isOperator; + unsigned Precedence; // Precedence if a binary op. +public: + PrototypeAST(const std::string &name, const std::vector &args, + bool isoperator = false, unsigned prec = 0) + : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {} + + bool isUnaryOp() const { return isOperator && Args.size() == 1; } + bool isBinaryOp() const { return isOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size()-1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } + + Function *Codegen(); + + void CreateArgumentAllocas(Function *F); +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + PrototypeAST *Proto; + ExprAST *Body; +public: + FunctionAST(PrototypeAST *proto, ExprAST *body) + : Proto(proto), Body(body) {} + + Function *Codegen(); +}; + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { + return CurTok = gettok(); +} + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) return -1; + return TokPrec; +} + +/// Error* - These are little helper functions for error handling. +ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;} +PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; } +FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; } + +static ExprAST *ParseExpression(); + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static ExprAST *ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return new VariableExprAST(IdName); + + // Call. + getNextToken(); // eat ( + std::vector Args; + if (CurTok != ')') { + while (1) { + ExprAST *Arg = ParseExpression(); + if (!Arg) return 0; + Args.push_back(Arg); + + if (CurTok == ')') break; + + if (CurTok != ',') + return Error("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return new CallExprAST(IdName, Args); +} + +/// numberexpr ::= number +static ExprAST *ParseNumberExpr() { + ExprAST *Result = new NumberExprAST(NumVal); + getNextToken(); // consume the number + return Result; +} + +/// parenexpr ::= '(' expression ')' +static ExprAST *ParseParenExpr() { + getNextToken(); // eat (. + ExprAST *V = ParseExpression(); + if (!V) return 0; + + if (CurTok != ')') + return Error("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static ExprAST *ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + ExprAST *Cond = ParseExpression(); + if (!Cond) return 0; + + if (CurTok != tok_then) + return Error("expected then"); + getNextToken(); // eat the then + + ExprAST *Then = ParseExpression(); + if (Then == 0) return 0; + + if (CurTok != tok_else) + return Error("expected else"); + + getNextToken(); + + ExprAST *Else = ParseExpression(); + if (!Else) return 0; + + return new IfExprAST(Cond, Then, Else); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static ExprAST *ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return Error("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return Error("expected '=' after for"); + getNextToken(); // eat '='. + + + ExprAST *Start = ParseExpression(); + if (Start == 0) return 0; + if (CurTok != ',') + return Error("expected ',' after for start value"); + getNextToken(); + + ExprAST *End = ParseExpression(); + if (End == 0) return 0; + + // The step value is optional. + ExprAST *Step = 0; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (Step == 0) return 0; + } + + if (CurTok != tok_in) + return Error("expected 'in' after for"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new ForExprAST(IdName, Start, End, Step, Body); +} + +/// varexpr ::= 'var' identifier ('=' expression)? +// (',' identifier ('=' expression)?)* 'in' expression +static ExprAST *ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector > VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return Error("expected identifier after var"); + + while (1) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + ExprAST *Init = 0; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (Init == 0) return 0; + } + + VarNames.push_back(std::make_pair(Name, Init)); + + // End of var list, exit loop. + if (CurTok != ',') break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return Error("expected identifier list after var"); + } + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return Error("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + ExprAST *Body = ParseExpression(); + if (Body == 0) return 0; + + return new VarExprAST(VarNames, Body); +} + +/// primary +/// ::= identifierexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +/// ::= varexpr +static ExprAST *ParsePrimary() { + switch (CurTok) { + default: return Error("unknown token when expecting an expression"); + case tok_identifier: return ParseIdentifierExpr(); + case tok_number: return ParseNumberExpr(); + case '(': return ParseParenExpr(); + case tok_if: return ParseIfExpr(); + case tok_for: return ParseForExpr(); + case tok_var: return ParseVarExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static ExprAST *ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (ExprAST *Operand = ParseUnary()) + return new UnaryExprAST(Opc, Operand); + return 0; +} + +/// binoprhs +/// ::= ('+' unary)* +static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) { + // If this is a binop, find its precedence. + while (1) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + ExprAST *RHS = ParseUnary(); + if (!RHS) return 0; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec+1, RHS); + if (RHS == 0) return 0; + } + + // Merge LHS/RHS. + LHS = new BinaryExprAST(BinOp, LHS, RHS); + } +} + +/// expression +/// ::= unary binoprhs +/// +static ExprAST *ParseExpression() { + ExprAST *LHS = ParseUnary(); + if (!LHS) return 0; + + return ParseBinOpRHS(0, LHS); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static PrototypeAST *ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return ErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return ErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_number) { + if (NumVal < 1 || NumVal > 100) + return ErrorP("Invalid precedecnce: must be 1..100"); + BinaryPrecedence = (unsigned)NumVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return ErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return ErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return ErrorP("Invalid number of operands for operator"); + + return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static FunctionAST *ParseDefinition() { + getNextToken(); // eat def. + PrototypeAST *Proto = ParsePrototype(); + if (Proto == 0) return 0; + + if (ExprAST *E = ParseExpression()) + return new FunctionAST(Proto, E); + return 0; +} + +/// toplevelexpr ::= expression +static FunctionAST *ParseTopLevelExpr() { + if (ExprAST *E = ParseExpression()) { + // Make an anonymous proto. + PrototypeAST *Proto = new PrototypeAST("", std::vector()); + return new FunctionAST(Proto, E); + } + return 0; +} + +/// external ::= 'extern' prototype +static PrototypeAST *ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static Module *TheModule; +static IRBuilder<> Builder(getGlobalContext()); +static std::map NamedValues; +static FunctionPassManager *TheFPM; + +Value *ErrorV(const char *Str) { Error(Str); return 0; } + +/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of +/// the function. This is used for mutable variables etc. +static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + const std::string &VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0, + VarName.c_str()); +} + +Value *NumberExprAST::Codegen() { + return ConstantFP::get(getGlobalContext(), APFloat(Val)); +} + +Value *VariableExprAST::Codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (V == 0) return ErrorV("Unknown variable name"); + + // Load the value. + return Builder.CreateLoad(V, Name.c_str()); +} + +Value *UnaryExprAST::Codegen() { + Value *OperandV = Operand->Codegen(); + if (OperandV == 0) return 0; + + Function *F = TheModule->getFunction(std::string("unary")+Opcode); + if (F == 0) + return ErrorV("Unknown unary operator"); + + return Builder.CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::Codegen() { + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + VariableExprAST *LHSE = dynamic_cast(LHS); + if (!LHSE) + return ErrorV("destination of '=' must be a variable"); + // Codegen the RHS. + Value *Val = RHS->Codegen(); + if (Val == 0) return 0; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (Variable == 0) return ErrorV("Unknown variable name"); + + Builder.CreateStore(Val, Variable); + return Val; + } + + Value *L = LHS->Codegen(); + Value *R = RHS->Codegen(); + if (L == 0 || R == 0) return 0; + + switch (Op) { + case '+': return Builder.CreateAdd(L, R, "addtmp"); + case '-': return Builder.CreateSub(L, R, "subtmp"); + case '*': return Builder.CreateMul(L, R, "multmp"); + case '<': + L = Builder.CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()), + "booltmp"); + default: break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = TheModule->getFunction(std::string("binary")+Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = { L, R }; + return Builder.CreateCall(F, Ops, Ops+2, "binop"); +} + +Value *CallExprAST::Codegen() { + // Look up the name in the global module table. + Function *CalleeF = TheModule->getFunction(Callee); + if (CalleeF == 0) + return ErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return ErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + ArgsV.push_back(Args[i]->Codegen()); + if (ArgsV.back() == 0) return 0; + } + + return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp"); +} + +Value *IfExprAST::Codegen() { + Value *CondV = Cond->Codegen(); + if (CondV == 0) return 0; + + // Convert condition to a bool by comparing equal to 0.0. + CondV = Builder.CreateFCmpONE(CondV, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "ifcond"); + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else"); + BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont"); + + Builder.CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder.SetInsertPoint(ThenBB); + + Value *ThenV = Then->Codegen(); + if (ThenV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder.GetInsertBlock(); + + // Emit else block. + TheFunction->getBasicBlockList().push_back(ElseBB); + Builder.SetInsertPoint(ElseBB); + + Value *ElseV = Else->Codegen(); + if (ElseV == 0) return 0; + + Builder.CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder.GetInsertBlock(); + + // Emit merge block. + TheFunction->getBasicBlockList().push_back(MergeBB); + Builder.SetInsertPoint(MergeBB); + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + "iftmp"); + + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +Value *ForExprAST::Codegen() { + // Output this as: + // var = alloca double + // ... + // start = startexpr + // store start -> var + // goto loop + // loop: + // ... + // bodyexpr + // ... + // loopend: + // step = stepexpr + // endcond = endexpr + // + // curvar = load var + // nextvar = curvar + step + // store nextvar -> var + // br endcond, loop, endloop + // outloop: + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->Codegen(); + if (StartVal == 0) return 0; + + // Store the value into the alloca. + Builder.CreateStore(StartVal, Alloca); + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction); + + // Insert an explicit fall through from the current block to the LoopBB. + Builder.CreateBr(LoopBB); + + // Start insertion in LoopBB. + Builder.SetInsertPoint(LoopBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + AllocaInst *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Alloca; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (Body->Codegen() == 0) + return 0; + + // Emit the step value. + Value *StepVal; + if (Step) { + StepVal = Step->Codegen(); + if (StepVal == 0) return 0; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); + } + + // Compute the end condition. + Value *EndCond = End->Codegen(); + if (EndCond == 0) return EndCond; + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); + Value *NextVar = Builder.CreateAdd(CurVar, StepVal, "nextvar"); + Builder.CreateStore(NextVar, Alloca); + + // Convert condition to a bool by comparing equal to 0.0. + EndCond = Builder.CreateFCmpONE(EndCond, + ConstantFP::get(getGlobalContext(), APFloat(0.0)), + "loopcond"); + + // Create the "after loop" block and insert it. + BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction); + + // Insert the conditional branch into the end of LoopEndBB. + Builder.CreateCondBr(EndCond, LoopBB, AfterBB); + + // Any new code will be inserted in AfterBB. + Builder.SetInsertPoint(AfterBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(getGlobalContext())); +} + +Value *VarExprAST::Codegen() { + std::vector OldBindings; + + Function *TheFunction = Builder.GetInsertBlock()->getParent(); + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second; + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->Codegen(); + if (InitVal == 0) return 0; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0)); + } + + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName); + Builder.CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->Codegen(); + if (BodyVal == 0) return 0; + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; +} + +Function *PrototypeAST::Codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), + Type::getDoubleTy(getGlobalContext())); + FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), + Doubles, false); + + Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule); + + // If F conflicted, there was already something named 'Name'. If it has a + // body, don't allow redefinition or reextern. + if (F->getName() != Name) { + // Delete the one we just made and get the existing one. + F->eraseFromParent(); + F = TheModule->getFunction(Name); + + // If F already has a body, reject this. + if (!F->empty()) { + ErrorF("redefinition of function"); + return 0; + } + + // If F took a different number of args, reject. + if (F->arg_size() != Args.size()) { + ErrorF("redefinition of function with different # args"); + return 0; + } + } + + // Set names for all arguments. + unsigned Idx = 0; + for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size(); + ++AI, ++Idx) + AI->setName(Args[Idx]); + + return F; +} + +/// CreateArgumentAllocas - Create an alloca for each argument and register the +/// argument in the symbol table so that references to it will succeed. +void PrototypeAST::CreateArgumentAllocas(Function *F) { + Function::arg_iterator AI = F->arg_begin(); + for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]); + + // Store the initial value into the alloca. + Builder.CreateStore(AI, Alloca); + + // Add arguments to variable symbol table. + NamedValues[Args[Idx]] = Alloca; + } +} + +Function *FunctionAST::Codegen() { + NamedValues.clear(); + + Function *TheFunction = Proto->Codegen(); + if (TheFunction == 0) + return 0; + + // If this is an operator, install it. + if (Proto->isBinaryOp()) + BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction); + Builder.SetInsertPoint(BB); + + // Add all arguments to the symbol table and create their allocas. + Proto->CreateArgumentAllocas(TheFunction); + + if (Value *RetVal = Body->Codegen()) { + // Finish off the function. + Builder.CreateRet(RetVal); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Optimize the function. + TheFPM->run(*TheFunction); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (Proto->isBinaryOp()) + BinopPrecedence.erase(Proto->getOperatorName()); + return 0; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static ExecutionEngine *TheExecutionEngine; + +static void HandleDefinition() { + if (FunctionAST *F = ParseDefinition()) { + if (Function *LF = F->Codegen()) { + fprintf(stderr, "Read function definition:"); + LF->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (PrototypeAST *P = ParseExtern()) { + if (Function *F = P->Codegen()) { + fprintf(stderr, "Read extern: "); + F->dump(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (FunctionAST *F = ParseTopLevelExpr()) { + if (Function *LF = F->Codegen()) { + // JIT the function, returning a function pointer. + void *FPtr = TheExecutionEngine->getPointerToFunction(LF); + + // Cast it to the right type (takes no arguments, returns a double) so we + // can call it as a native function. + double (*FP)() = (double (*)())(intptr_t)FPtr; + fprintf(stderr, "Evaluated to %f\n", FP()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (1) { + fprintf(stderr, "ready> "); + switch (CurTok) { + case tok_eof: return; + case ';': getNextToken(); break; // ignore top-level semicolons. + case tok_def: HandleDefinition(); break; + case tok_extern: HandleExtern(); break; + default: HandleTopLevelExpression(); break; + } + } +} + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +/// putchard - putchar that takes a double and returns 0. +extern "C" +double putchard(double X) { + putchar((char)X); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +extern "C" +double printd(double X) { + printf("%f\n", X); + return 0; +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +int main() { + InitializeNativeTarget(); + LLVMContext &Context = getGlobalContext(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['='] = 2; + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + // Make the module, which holds all the code. + TheModule = new Module("my cool jit", Context); + + ExistingModuleProvider *OurModuleProvider = + new ExistingModuleProvider(TheModule); + + // Create the JIT. This takes ownership of the module and module provider. + TheExecutionEngine = EngineBuilder(OurModuleProvider).create(); + + FunctionPassManager OurFPM(OurModuleProvider); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData())); + // Promote allocas to registers. + OurFPM.add(createPromoteMemoryToRegisterPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + OurFPM.add(createInstructionCombiningPass()); + // Reassociate expressions. + OurFPM.add(createReassociatePass()); + // Eliminate Common SubExpressions. + OurFPM.add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + OurFPM.add(createCFGSimplificationPass()); + + OurFPM.doInitialization(); + + // Set the global so the code gen can use this. + TheFPM = &OurFPM; + + // Run the main "interpreter loop" now. + MainLoop(); + + TheFPM = 0; + + // Print out all of the generated code. + TheModule->dump(); + + return 0; +} diff --git a/examples/Kaleidoscope/Makefile b/examples/Kaleidoscope/Makefile index c1759a09fe32e..bd0c252c2c03e 100644 --- a/examples/Kaleidoscope/Makefile +++ b/examples/Kaleidoscope/Makefile @@ -6,10 +6,10 @@ # License. See LICENSE.TXT for details. # ##===----------------------------------------------------------------------===## -LEVEL = ../.. -TOOLNAME = Kaleidoscope -EXAMPLE_TOOL = 1 +LEVEL=../.. -LINK_COMPONENTS := core jit native +include $(LEVEL)/Makefile.config + +PARALLEL_DIRS:= Chapter2 Chapter3 Chapter4 Chapter5 Chapter6 Chapter7 include $(LEVEL)/Makefile.common diff --git a/examples/ModuleMaker/ModuleMaker.cpp b/examples/ModuleMaker/ModuleMaker.cpp index 59a86d031d2f3..6bc52c12a0344 100644 --- a/examples/ModuleMaker/ModuleMaker.cpp +++ b/examples/ModuleMaker/ModuleMaker.cpp @@ -19,7 +19,7 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Bitcode/ReaderWriter.h" -#include +#include "llvm/Support/raw_ostream.h" using namespace llvm; int main() { @@ -30,7 +30,8 @@ int main() { Module *M = new Module("test", Context); // Create the main function: first create the type 'int ()' - FunctionType *FT = FunctionType::get(Type::Int32Ty, /*not vararg*/false); + FunctionType *FT = + FunctionType::get(Type::getInt32Ty(Context), /*not vararg*/false); // By passing a module as the last parameter to the Function constructor, // it automatically gets appended to the Module. @@ -38,11 +39,11 @@ int main() { // Add a basic block to the function... again, it automatically inserts // because of the last argument. - BasicBlock *BB = BasicBlock::Create("EntryBlock", F); + BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", F); // Get pointers to the constant integers... - Value *Two = ConstantInt::get(Type::Int32Ty, 2); - Value *Three = ConstantInt::get(Type::Int32Ty, 3); + Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2); + Value *Three = ConstantInt::get(Type::getInt32Ty(Context), 3); // Create the add instruction... does not insert... Instruction *Add = BinaryOperator::Create(Instruction::Add, Two, Three, @@ -52,10 +53,10 @@ int main() { BB->getInstList().push_back(Add); // Create the return instruction and add it to the basic block - BB->getInstList().push_back(ReturnInst::Create(Add)); + BB->getInstList().push_back(ReturnInst::Create(Context, Add)); // Output the bitcode file to stdout - WriteBitcodeToFile(M, std::cout); + WriteBitcodeToFile(M, outs()); // Delete the module and all of its contents. delete M; diff --git a/examples/ParallelJIT/CMakeLists.txt b/examples/ParallelJIT/CMakeLists.txt index d8dd7084c40df..fbdc6e5fc10b4 100644 --- a/examples/ParallelJIT/CMakeLists.txt +++ b/examples/ParallelJIT/CMakeLists.txt @@ -4,4 +4,6 @@ add_llvm_example(ParallelJIT ParallelJIT.cpp ) -target_link_libraries(ParallelJIT pthread) +if(HAVE_LIBPTHREAD) + target_link_libraries(ParallelJIT pthread) +endif(HAVE_LIBPTHREAD) diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp index eadd0f58e5d50..be40a282150d1 100644 --- a/examples/ParallelJIT/ParallelJIT.cpp +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -36,15 +36,17 @@ static Function* createAdd1(Module *M) { // function will have a return type of "int" and take an argument of "int". // The '0' terminates the list of argument types. Function *Add1F = - cast(M->getOrInsertFunction("add1", Type::Int32Ty, Type::Int32Ty, + cast(M->getOrInsertFunction("add1", + Type::getInt32Ty(M->getContext()), + Type::getInt32Ty(M->getContext()), (Type *)0)); // Add a basic block to the function. As before, it automatically inserts // because of the last argument. - BasicBlock *BB = BasicBlock::Create("EntryBlock", Add1F); + BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", Add1F); // Get pointers to the constant `1'. - Value *One = ConstantInt::get(Type::Int32Ty, 1); + Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); // Get pointers to the integer argument of the add1 function... assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg @@ -55,7 +57,7 @@ static Function* createAdd1(Module *M) { Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB); // Create the return instruction and add it to the basic block - ReturnInst::Create(Add, BB); + ReturnInst::Create(M->getContext(), Add, BB); // Now, function add1 is ready. return Add1F; @@ -65,31 +67,33 @@ static Function *CreateFibFunction(Module *M) { // Create the fib function and insert it into module M. This function is said // to return an int and take an int parameter. Function *FibF = - cast(M->getOrInsertFunction("fib", Type::Int32Ty, Type::Int32Ty, + cast(M->getOrInsertFunction("fib", + Type::getInt32Ty(M->getContext()), + Type::getInt32Ty(M->getContext()), (Type *)0)); // Add a basic block to the function. - BasicBlock *BB = BasicBlock::Create("EntryBlock", FibF); + BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", FibF); // Get pointers to the constants. - Value *One = ConstantInt::get(Type::Int32Ty, 1); - Value *Two = ConstantInt::get(Type::Int32Ty, 2); + Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); + Value *Two = ConstantInt::get(Type::getInt32Ty(M->getContext()), 2); // Get pointer to the integer argument of the add1 function... Argument *ArgX = FibF->arg_begin(); // Get the arg. ArgX->setName("AnArg"); // Give it a nice symbolic name for fun. // Create the true_block. - BasicBlock *RetBB = BasicBlock::Create("return", FibF); + BasicBlock *RetBB = BasicBlock::Create(M->getContext(), "return", FibF); // Create an exit block. - BasicBlock* RecurseBB = BasicBlock::Create("recurse", FibF); + BasicBlock* RecurseBB = BasicBlock::Create(M->getContext(), "recurse", FibF); // Create the "if (arg < 2) goto exitbb" - Value *CondInst = new ICmpInst(ICmpInst::ICMP_SLE, ArgX, Two, "cond", BB); + Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond"); BranchInst::Create(RetBB, RecurseBB, CondInst, BB); // Create: ret int 1 - ReturnInst::Create(One, RetBB); + ReturnInst::Create(M->getContext(), One, RetBB); // create fib(x-1) Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB); @@ -104,7 +108,7 @@ static Function *CreateFibFunction(Module *M) { BinaryOperator::CreateAdd(CallFibX1, CallFibX2, "addresult", RecurseBB); // Create the return instruction and add it to the basic block - ReturnInst::Create(Sum, RecurseBB); + ReturnInst::Create(M->getContext(), Sum, RecurseBB); return FibF; } @@ -242,8 +246,7 @@ int main() { Function* fibF = CreateFibFunction( M ); // Now we create the JIT. - ExistingModuleProvider* MP = new ExistingModuleProvider(M); - ExecutionEngine* EE = ExecutionEngine::create(MP, false); + ExecutionEngine* EE = EngineBuilder(M).create(); //~ std::cout << "We just constructed this LLVM module:\n\n" << *M; //~ std::cout << "\n\nRunning foo: " << std::flush; diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h index 5dce27eb23b86..a184f609d6baa 100644 --- a/include/llvm-c/BitReader.h +++ b/include/llvm-c/BitReader.h @@ -32,8 +32,8 @@ extern "C" { int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, char **OutMessage); -int LLVMParseBitcodeInContext(LLVMMemoryBufferRef MemBuf, - LLVMContextRef ContextRef, +int LLVMParseBitcodeInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, char **OutMessage); /* Reads a module from the specified path, returning via the OutMP parameter @@ -43,8 +43,8 @@ int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, LLVMModuleProviderRef *OutMP, char **OutMessage); -int LLVMGetBitcodeModuleProviderInContext(LLVMMemoryBufferRef MemBuf, - LLVMContextRef ContextRef, +int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, LLVMModuleProviderRef *OutMP, char **OutMessage); diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 3538c0837187b..74c170928f096 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -33,6 +33,8 @@ #ifndef LLVM_C_CORE_H #define LLVM_C_CORE_H +#include "llvm/Support/DataTypes.h" + #ifdef __cplusplus /* Need these includes to support the LLVM 'cast' template for the C++ 'wrap' @@ -49,7 +51,7 @@ extern "C" { /** * The top-level container for all LLVM global data. See the LLVMContext class. */ -typedef struct LLVMCtxt *LLVMContextRef; +typedef struct LLVMOpaqueContext *LLVMContextRef; /** * The top-level container for all other LLVM Intermediate Representation (IR) @@ -87,6 +89,12 @@ typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef; /** See the llvm::PassManagerBase class. */ typedef struct LLVMOpaquePassManager *LLVMPassManagerRef; +/** + * Used to iterate through the uses of a Value, allowing access to all Values + * that use this Value. See the llvm::Use and llvm::value_use_iterator classes. + */ +typedef struct LLVMOpaqueUseIterator *LLVMUseIteratorRef; + typedef enum { LLVMZExtAttribute = 1<<0, LLVMSExtAttribute = 1<<1, @@ -98,9 +106,75 @@ typedef enum { LLVMByValAttribute = 1<<7, LLVMNestAttribute = 1<<8, LLVMReadNoneAttribute = 1<<9, - LLVMReadOnlyAttribute = 1<<10 + LLVMReadOnlyAttribute = 1<<10, + LLVMNoInlineAttribute = 1<<11, + LLVMAlwaysInlineAttribute = 1<<12, + LLVMOptimizeForSizeAttribute = 1<<13, + LLVMStackProtectAttribute = 1<<14, + LLVMStackProtectReqAttribute = 1<<15, + LLVMNoCaptureAttribute = 1<<21, + LLVMNoRedZoneAttribute = 1<<22, + LLVMNoImplicitFloatAttribute = 1<<23, + LLVMNakedAttribute = 1<<24, + LLVMInlineHintAttribute = 1<<25 } LLVMAttribute; +typedef enum { + LLVMRet = 1, + LLVMBr = 2, + LLVMSwitch = 3, + LLVMInvoke = 4, + LLVMUnwind = 5, + LLVMUnreachable = 6, + LLVMAdd = 7, + LLVMFAdd = 8, + LLVMSub = 9, + LLVMFSub = 10, + LLVMMul = 11, + LLVMFMul = 12, + LLVMUDiv = 13, + LLVMSDiv = 14, + LLVMFDiv = 15, + LLVMURem = 16, + LLVMSRem = 17, + LLVMFRem = 18, + LLVMShl = 19, + LLVMLShr = 20, + LLVMAShr = 21, + LLVMAnd = 22, + LLVMOr = 23, + LLVMXor = 24, + LLVMMalloc = 25, + LLVMFree = 26, + LLVMAlloca = 27, + LLVMLoad = 28, + LLVMStore = 29, + LLVMGetElementPtr = 30, + LLVMTrunk = 31, + LLVMZExt = 32, + LLVMSExt = 33, + LLVMFPToUI = 34, + LLVMFPToSI = 35, + LLVMUIToFP = 36, + LLVMSIToFP = 37, + LLVMFPTrunc = 38, + LLVMFPExt = 39, + LLVMPtrToInt = 40, + LLVMIntToPtr = 41, + LLVMBitCast = 42, + LLVMICmp = 43, + LLVMFCmp = 44, + LLVMPHI = 45, + LLVMCall = 46, + LLVMSelect = 47, + LLVMVAArg = 50, + LLVMExtractElement = 51, + LLVMInsertElement = 52, + LLVMShuffleVector = 53, + LLVMExtractValue = 54, + LLVMInsertValue = 55 +} LLVMOpcode; + typedef enum { LLVMVoidTypeKind, /**< type with no size */ LLVMFloatTypeKind, /**< 32 bit floating point type */ @@ -115,7 +189,8 @@ typedef enum { LLVMArrayTypeKind, /**< Arrays */ LLVMPointerTypeKind, /**< Pointers */ LLVMOpaqueTypeKind, /**< Opaque: type with unknown structure */ - LLVMVectorTypeKind /**< SIMD 'packed' format, or other vector type */ + LLVMVectorTypeKind, /**< SIMD 'packed' format, or other vector type */ + LLVMMetadataTypeKind /**< Metadata */ } LLVMTypeKind; typedef enum { @@ -136,7 +211,8 @@ typedef enum { LLVMExternalWeakLinkage,/**< ExternalWeak linkage description */ LLVMGhostLinkage, /**< Stand-in functions for streaming fns from bitcode */ - LLVMCommonLinkage /**< Tentative definitions */ + LLVMCommonLinkage, /**< Tentative definitions */ + LLVMLinkerPrivateLinkage /**< Like Private, but linker removes. */ } LLVMLinkage; typedef enum { @@ -194,8 +270,8 @@ void LLVMDisposeMessage(char *Message); /*===-- Modules -----------------------------------------------------------===*/ /* Create and destroy contexts. */ -LLVMContextRef LLVMContextCreate(); -LLVMContextRef LLVMGetGlobalContext(); +LLVMContextRef LLVMContextCreate(void); +LLVMContextRef LLVMGetGlobalContext(void); void LLVMContextDispose(LLVMContextRef C); /* Create and destroy modules. */ @@ -218,6 +294,7 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple); /** See Module::addTypeName. */ int LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty); void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name); +LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name); /** See Module::dump. */ void LLVMDumpModule(LLVMModuleRef M); @@ -243,7 +320,17 @@ void LLVMDumpModule(LLVMModuleRef M); /** See llvm::LLVMTypeKind::getTypeID. */ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty); +/** See llvm::LLVMType::getContext. */ +LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty); + /* Operations on integer types */ +LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits); + LLVMTypeRef LLVMInt1Type(void); LLVMTypeRef LLVMInt8Type(void); LLVMTypeRef LLVMInt16Type(void); @@ -253,6 +340,12 @@ LLVMTypeRef LLVMIntType(unsigned NumBits); unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy); /* Operations on real types */ +LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C); + LLVMTypeRef LLVMFloatType(void); LLVMTypeRef LLVMDoubleType(void); LLVMTypeRef LLVMX86FP80Type(void); @@ -269,6 +362,8 @@ unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy); void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest); /* Operations on struct types */ +LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, + unsigned ElementCount, int Packed); LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount, int Packed); unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy); @@ -286,6 +381,10 @@ unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy); unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy); /* Operations on other types */ +LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C); +LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C); + LLVMTypeRef LLVMVoidType(void); LLVMTypeRef LLVMLabelType(void); LLVMTypeRef LLVMOpaqueType(void); @@ -340,8 +439,6 @@ void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle); macro(CmpInst) \ macro(FCmpInst) \ macro(ICmpInst) \ - macro(VFCmpInst) \ - macro(VICmpInst) \ macro(ExtractElementInst) \ macro(GetElementPtrInst) \ macro(InsertElementInst) \ @@ -384,6 +481,7 @@ LLVMTypeRef LLVMTypeOf(LLVMValueRef Val); const char *LLVMGetValueName(LLVMValueRef Val); void LLVMSetValueName(LLVMValueRef Val, const char *Name); void LLVMDumpValue(LLVMValueRef Val); +void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal); /* Conversion functions. Return the input value if it is an instance of the specified class, otherwise NULL. See llvm::dyn_cast_or_null<>. */ @@ -391,6 +489,15 @@ void LLVMDumpValue(LLVMValueRef Val); LLVMValueRef LLVMIsA##name(LLVMValueRef Val); LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST) +/* Operations on Uses */ +LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val); +LLVMUseIteratorRef LLVMGetNextUse(LLVMUseIteratorRef U); +LLVMValueRef LLVMGetUser(LLVMUseIteratorRef U); +LLVMValueRef LLVMGetUsedValue(LLVMUseIteratorRef U); + +/* Operations on Users */ +LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index); + /* Operations on constants of any type */ LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */ LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /* only for int/vector */ @@ -398,31 +505,55 @@ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty); int LLVMIsConstant(LLVMValueRef Val); int LLVMIsNull(LLVMValueRef Val); int LLVMIsUndef(LLVMValueRef Val); +LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty); /* Operations on scalar constants */ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, int SignExtend); +LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text, + uint8_t Radix); +LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text, + unsigned SLen, uint8_t Radix); LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N); LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text); +LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char *Text, + unsigned SLen); +unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal); +long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal); + /* Operations on composite constants */ +LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, + unsigned Length, int DontNullTerminate); +LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, + LLVMValueRef *ConstantVals, + unsigned Count, int Packed); + LLVMValueRef LLVMConstString(const char *Str, unsigned Length, int DontNullTerminate); LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, LLVMValueRef *ConstantVals, unsigned Length); LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, - int packed); + int Packed); LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size); /* Constant expressions */ +LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal); +LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty); LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty); LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal); +LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal); LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal); LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); +LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); +LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); +LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); +LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); +LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); @@ -439,6 +570,9 @@ LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices); +LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal, + LLVMValueRef *ConstantIndices, + unsigned NumIndices); LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType); @@ -451,6 +585,17 @@ LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); +LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal, + LLVMTypeRef ToType); +LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal, + LLVMTypeRef ToType); +LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal, + LLVMTypeRef ToType); +LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal, + LLVMTypeRef ToType); +LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType, + unsigned isSigned); +LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition, LLVMValueRef ConstantIfTrue, LLVMValueRef ConstantIfFalse); @@ -517,6 +662,7 @@ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC); const char *LLVMGetGC(LLVMValueRef Fn); void LLVMSetGC(LLVMValueRef Fn, const char *Name); void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); +LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn); void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA); /* Operations on parameters */ @@ -530,6 +676,7 @@ LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg); LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg); void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA); void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA); +LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg); void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align); /* Operations on basic blocks */ @@ -544,6 +691,14 @@ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn); LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB); LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB); LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn); + +LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C, + LLVMValueRef Fn, + const char *Name); +LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C, + LLVMBasicBlockRef BB, + const char *Name); + LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name); LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBB, const char *Name); @@ -582,6 +737,7 @@ LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index); * exclusive means of building instructions using the C interface. */ +LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C); LLVMBuilderRef LLVMCreateBuilder(void); void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block, LLVMValueRef Instr); @@ -590,11 +746,15 @@ void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block); LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder); void LLVMClearInsertionPosition(LLVMBuilderRef Builder); void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr); +void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr, + const char *Name); void LLVMDisposeBuilder(LLVMBuilderRef Builder); /* Terminators */ LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef); LLVMValueRef LLVMBuildRet(LLVMBuilderRef, LLVMValueRef V); +LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef, LLVMValueRef *RetVals, + unsigned N); LLVMValueRef LLVMBuildBr(LLVMBuilderRef, LLVMBasicBlockRef Dest); LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef, LLVMValueRef If, LLVMBasicBlockRef Then, LLVMBasicBlockRef Else); @@ -614,14 +774,24 @@ void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal, /* Arithmetic */ LLVMValueRef LLVMBuildAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); +LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name); +LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name); LLVMValueRef LLVMBuildSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); +LLVMValueRef LLVMBuildFSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name); LLVMValueRef LLVMBuildMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); +LLVMValueRef LLVMBuildFMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name); LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); +LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, + const char *Name); LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildURem(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, @@ -643,6 +813,7 @@ LLVMValueRef LLVMBuildOr(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, LLVMValueRef LLVMBuildXor(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name); +LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name); LLVMValueRef LLVMBuildNot(LLVMBuilderRef, LLVMValueRef V, const char *Name); /* Memory */ @@ -659,6 +830,15 @@ LLVMValueRef LLVMBuildStore(LLVMBuilderRef, LLVMValueRef Val, LLVMValueRef Ptr); LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name); +LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer, + LLVMValueRef *Indices, unsigned NumIndices, + const char *Name); +LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer, + unsigned Idx, const char *Name); +LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str, + const char *Name); +LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, + const char *Name); /* Casts */ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val, @@ -685,6 +865,18 @@ LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); +LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef, LLVMValueRef Val, + LLVMTypeRef DestTy, const char *Name); +LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef, LLVMValueRef Val, + LLVMTypeRef DestTy, const char *Name); +LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef, LLVMValueRef Val, + LLVMTypeRef DestTy, const char *Name); +LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef, LLVMValueRef Val, + LLVMTypeRef DestTy, const char *Name); +LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, + LLVMTypeRef DestTy, const char *Name); +LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val, + LLVMTypeRef DestTy, const char *Name); /* Comparisons */ LLVMValueRef LLVMBuildICmp(LLVMBuilderRef, LLVMIntPredicate Op, @@ -718,6 +910,13 @@ LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef, LLVMValueRef AggVal, LLVMValueRef EltVal, unsigned Index, const char *Name); +LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef, LLVMValueRef Val, + const char *Name); +LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val, + const char *Name); +LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS, + LLVMValueRef RHS, const char *Name); + /*===-- Module providers --------------------------------------------------===*/ @@ -828,11 +1027,22 @@ namespace llvm { DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ModuleProvider, LLVMModuleProviderRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer, LLVMMemoryBufferRef ) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLVMContext, LLVMContextRef ) + DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use, LLVMUseIteratorRef ) DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassManagerBase, LLVMPassManagerRef ) #undef DEFINE_STDCXX_CONVERSION_FUNCTIONS #undef DEFINE_ISA_CONVERSION_FUNCTIONS #undef DEFINE_SIMPLE_CONVERSION_FUNCTIONS + + /* Specialized opaque context conversions. + */ + inline LLVMContext **unwrap(LLVMContextRef* Tys) { + return reinterpret_cast(Tys); + } + + inline LLVMContextRef *wrap(const LLVMContext **Tys) { + return reinterpret_cast(const_cast(Tys)); + } /* Specialized opaque type conversions. */ @@ -853,7 +1063,7 @@ namespace llvm { template inline T **unwrap(LLVMValueRef *Vals, unsigned Length) { #if DEBUG - for (LLVMValueRef *I = Vals, E = Vals + Length; I != E; ++I) + for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I) cast(*I); #endif return reinterpret_cast(Vals); diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h index 9877b8d50953b..05f2a892e2230 100644 --- a/include/llvm-c/ExecutionEngine.h +++ b/include/llvm-c/ExecutionEngine.h @@ -104,7 +104,7 @@ void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global); } namespace llvm { - class GenericValue; + struct GenericValue; class ExecutionEngine; #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref) \ diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h index bb423bbaadfc3..43388512e8756 100644 --- a/include/llvm-c/Target.h +++ b/include/llvm-c/Target.h @@ -33,9 +33,20 @@ typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef; typedef struct LLVMStructLayout *LLVMStructLayoutRef; /* Declare all of the target-initialization functions that are available. */ +#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo(); +#include "llvm/Config/Targets.def" + #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target(); #include "llvm/Config/Targets.def" +/** LLVMInitializeAllTargetInfos - The main program should call this function if + it wants access to all available targets that LLVM is configured to + support. */ +static inline void LLVMInitializeAllTargetInfos() { +#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo(); +#include "llvm/Config/Targets.def" +} + /** LLVMInitializeAllTargets - The main program should call this function if it wants to link in all available targets that LLVM is configured to support. */ @@ -50,7 +61,9 @@ static inline void LLVMInitializeAllTargets() { static inline int LLVMInitializeNativeTarget() { /* If we have a native target, initialize it to ensure it is linked in. */ #ifdef LLVM_NATIVE_ARCH -#define DoInit2(TARG) LLVMInitialize ## TARG () +#define DoInit2(TARG) \ + LLVMInitialize ## TARG ## Info (); \ + LLVMInitialize ## TARG () #define DoInit(T) DoInit2(T) DoInit(LLVM_NATIVE_ARCH); return 0; diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index 1ae8c670dd29b..7cafcb29f219c 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -16,7 +16,6 @@ #ifndef LTO_H #define LTO_H 1 -#include "llvm-c/Core.h" #include #include diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index 928ecc0c3cf57..4d7e7ae11e8b2 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -109,6 +109,7 @@ namespace llvm { typedef signed short exponent_t; struct fltSemantics; + class StringRef; /* When bits of a floating point number are truncated, this enum is used to indicate what fraction of the LSB those bits represented. @@ -172,7 +173,8 @@ namespace llvm { }; // Constructors. - APFloat(const fltSemantics &, const char *); + APFloat(const fltSemantics &); // Default construct to 0.0 + APFloat(const fltSemantics &, const StringRef &); APFloat(const fltSemantics &, integerPart); APFloat(const fltSemantics &, fltCategory, bool negative, unsigned type=0); explicit APFloat(double d); @@ -234,7 +236,7 @@ namespace llvm { bool, roundingMode); opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int, bool, roundingMode); - opStatus convertFromString(const char *, roundingMode); + opStatus convertFromString(const StringRef&, roundingMode); APInt bitcastToAPInt() const; double convertToDouble() const; float convertToFloat() const; @@ -312,8 +314,8 @@ namespace llvm { roundingMode, bool *) const; opStatus convertFromUnsignedParts(const integerPart *, unsigned int, roundingMode); - opStatus convertFromHexadecimalString(const char *, roundingMode); - opStatus convertFromDecimalString (const char *, roundingMode); + opStatus convertFromHexadecimalString(const StringRef&, roundingMode); + opStatus convertFromDecimalString (const StringRef&, roundingMode); char *convertNormalToHexString(char *, unsigned int, bool, roundingMode) const; opStatus roundSignificandWithExponent(const integerPart *, unsigned int, @@ -321,11 +323,13 @@ namespace llvm { APInt convertFloatAPFloatToAPInt() const; APInt convertDoubleAPFloatToAPInt() const; + APInt convertQuadrupleAPFloatToAPInt() const; APInt convertF80LongDoubleAPFloatToAPInt() const; APInt convertPPCDoubleDoubleAPFloatToAPInt() const; void initFromAPInt(const APInt& api, bool isIEEE = false); void initFromFloatAPInt(const APInt& api); void initFromDoubleAPInt(const APInt& api); + void initFromQuadrupleAPInt(const APInt &api); void initFromF80LongDoubleAPInt(const APInt& api); void initFromPPCDoubleDoubleAPInt(const APInt& api); diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index 56cd3ccf84e3e..88aa9956d9321 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -15,7 +15,6 @@ #ifndef LLVM_APINT_H #define LLVM_APINT_H -#include "llvm/Support/DataTypes.h" #include "llvm/Support/MathExtras.h" #include #include @@ -27,12 +26,13 @@ namespace llvm { class Deserializer; class FoldingSetNodeID; class raw_ostream; + class StringRef; template class SmallVectorImpl; - /* An unsigned host type used as a single part of a multi-part - bignum. */ + // An unsigned host type used as a single part of a multi-part + // bignum. typedef uint64_t integerPart; const unsigned int host_char_bit = 8; @@ -152,8 +152,7 @@ class APInt { /// This is used by the constructors that take string arguments. /// @brief Convert a char array into an APInt - void fromString(unsigned numBits, const char *strStart, unsigned slen, - uint8_t radix); + void fromString(unsigned numBits, const StringRef &str, uint8_t radix); /// This is used by the toString method to divide by the radix. It simply /// provides a more convenient form of divide for internal use since KnuthDiv @@ -229,17 +228,17 @@ public: /// @brief Construct an APInt of numBits width, initialized as bigVal[]. APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]); - /// This constructor interprets the slen characters starting at StrStart as - /// a string in the given radix. The interpretation stops when the first - /// character that is not suitable for the radix is encountered. Acceptable - /// radix values are 2, 8, 10 and 16. It is an error for the value implied by - /// the string to require more bits than numBits. + /// This constructor interprets the string \arg str in the given radix. The + /// interpretation stops when the first character that is not suitable for the + /// radix is encountered, or the end of the string. Acceptable radix values + /// are 2, 8, 10 and 16. It is an error for the value implied by the string to + /// require more bits than numBits. + /// /// @param numBits the bit width of the constructed APInt - /// @param strStart the start of the string to be interpreted - /// @param slen the maximum number of characters to interpret - /// @param radix the radix to use for the conversion + /// @param str the string to be interpreted + /// @param radix the radix to use for the conversion /// @brief Construct an APInt from a string representation. - APInt(unsigned numBits, const char strStart[], unsigned slen, uint8_t radix); + APInt(unsigned numBits, const StringRef &str, uint8_t radix); /// Simply makes *this a copy of that. /// @brief Copy Constructor. @@ -1063,9 +1062,9 @@ public: } /// This method determines how many bits are required to hold the APInt - /// equivalent of the string given by \p str of length \p slen. + /// equivalent of the string given by \arg str. /// @brief Get bits required for string value. - static unsigned getBitsNeeded(const char* str, unsigned slen, uint8_t radix); + static unsigned getBitsNeeded(const StringRef& str, uint8_t radix); /// countLeadingZeros - This function is an APInt version of the /// countLeadingZeros_{32,64} functions in MathExtras.h. It counts the number @@ -1235,6 +1234,11 @@ public: return BitWidth - 1 - countLeadingZeros(); } + /// @returns the ceil log base 2 of this APInt. + unsigned ceilLogBase2() const { + return BitWidth - (*this - 1).countLeadingZeros(); + } + /// @returns the log base 2 of this APInt if its an exact power of two, -1 /// otherwise int32_t exactLogBase2() const { @@ -1426,8 +1430,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) { return OS; } -std::ostream &operator<<(std::ostream &o, const APInt &I); - namespace APIntOps { /// @brief Determine the smaller of two APInts considered to be signed. diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index e18be8963d48b..0ed2d5a25257c 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -16,109 +16,15 @@ #include "llvm/Support/PointerLikeTypeTraits.h" #include "llvm/Support/MathExtras.h" -#include -#include +#include "llvm/ADT/DenseMapInfo.h" +#include #include +#include +#include +#include namespace llvm { -template -struct DenseMapInfo { - //static inline T getEmptyKey(); - //static inline T getTombstoneKey(); - //static unsigned getHashValue(const T &Val); - //static bool isEqual(const T &LHS, const T &RHS); - //static bool isPod() -}; - -// Provide DenseMapInfo for all pointers. -template -struct DenseMapInfo { - static inline T* getEmptyKey() { - intptr_t Val = -1; - Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; - return reinterpret_cast(Val); - } - static inline T* getTombstoneKey() { - intptr_t Val = -2; - Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; - return reinterpret_cast(Val); - } - static unsigned getHashValue(const T *PtrVal) { - return (unsigned((uintptr_t)PtrVal) >> 4) ^ - (unsigned((uintptr_t)PtrVal) >> 9); - } - static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; } - static bool isPod() { return true; } -}; - -// Provide DenseMapInfo for chars. -template<> struct DenseMapInfo { - static inline char getEmptyKey() { return ~0; } - static inline char getTombstoneKey() { return ~0 - 1; } - static unsigned getHashValue(const char& Val) { return Val * 37; } - static bool isPod() { return true; } - static bool isEqual(const char &LHS, const char &RHS) { - return LHS == RHS; - } -}; - -// Provide DenseMapInfo for unsigned ints. -template<> struct DenseMapInfo { - static inline unsigned getEmptyKey() { return ~0; } - static inline unsigned getTombstoneKey() { return ~0 - 1; } - static unsigned getHashValue(const unsigned& Val) { return Val * 37; } - static bool isPod() { return true; } - static bool isEqual(const unsigned& LHS, const unsigned& RHS) { - return LHS == RHS; - } -}; - -// Provide DenseMapInfo for unsigned longs. -template<> struct DenseMapInfo { - static inline unsigned long getEmptyKey() { return ~0L; } - static inline unsigned long getTombstoneKey() { return ~0L - 1L; } - static unsigned getHashValue(const unsigned long& Val) { - return (unsigned)(Val * 37L); - } - static bool isPod() { return true; } - static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) { - return LHS == RHS; - } -}; - -// Provide DenseMapInfo for all pairs whose members have info. -template -struct DenseMapInfo > { - typedef std::pair Pair; - typedef DenseMapInfo FirstInfo; - typedef DenseMapInfo SecondInfo; - - static inline Pair getEmptyKey() { - return std::make_pair(FirstInfo::getEmptyKey(), - SecondInfo::getEmptyKey()); - } - static inline Pair getTombstoneKey() { - return std::make_pair(FirstInfo::getTombstoneKey(), - SecondInfo::getEmptyKey()); - } - static unsigned getHashValue(const Pair& PairVal) { - uint64_t key = (uint64_t)FirstInfo::getHashValue(PairVal.first) << 32 - | (uint64_t)SecondInfo::getHashValue(PairVal.second); - key += ~(key << 32); - key ^= (key >> 22); - key += ~(key << 13); - key ^= (key >> 8); - key += (key << 3); - key ^= (key >> 15); - key += ~(key << 27); - key ^= (key >> 31); - return (unsigned)key; - } - static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; } - static bool isPod() { return FirstInfo::isPod() && SecondInfo::isPod(); } -}; - template, typename ValueInfoT = DenseMapInfo > @@ -160,6 +66,9 @@ public: P->second.~ValueT(); P->first.~KeyT(); } +#ifndef NDEBUG + memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets); +#endif operator delete(Buckets); } @@ -185,6 +94,8 @@ public: void resize(size_t Size) { grow(Size); } void clear() { + if (NumEntries == 0 && NumTombstones == 0) return; + // If the capacity of the array is huge, and the # elements used is small, // shrink the array. if (NumEntries * 4 < NumBuckets && NumBuckets > 64) { @@ -234,6 +145,9 @@ public: return ValueT(); } + // Inserts key,value pair into the map if the key isn't already in the map. + // If the key is already in the map, it returns false and doesn't update the + // value. std::pair insert(const std::pair &KV) { BucketT *TheBucket; if (LookupBucketFor(KV.first, TheBucket)) @@ -318,8 +232,12 @@ private: NumEntries = other.NumEntries; NumTombstones = other.NumTombstones; - if (NumBuckets) + if (NumBuckets) { +#ifndef NDEBUG + memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets); +#endif operator delete(Buckets); + } Buckets = static_cast(operator new(sizeof(BucketT) * other.NumBuckets)); @@ -465,6 +383,9 @@ private: B->first.~KeyT(); } +#ifndef NDEBUG + memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets); +#endif // Free the old table. operator delete(OldBuckets); } @@ -495,6 +416,9 @@ private: B->first.~KeyT(); } +#ifndef NDEBUG + memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets); +#endif // Free the old table. operator delete(OldBuckets); @@ -503,12 +427,14 @@ private: }; template -class DenseMapIterator { +class DenseMapIterator : + public std::iterator, + ptrdiff_t> { typedef std::pair BucketT; protected: const BucketT *Ptr, *End; public: - DenseMapIterator(void) : Ptr(0), End(0) {} + DenseMapIterator() : Ptr(0), End(0) {} DenseMapIterator(const BucketT *Pos, const BucketT *E) : Ptr(Pos), End(E) { AdvancePastEmptyBuckets(); @@ -552,7 +478,7 @@ private: template class DenseMapConstIterator : public DenseMapIterator { public: - DenseMapConstIterator(void) : DenseMapIterator() {} + DenseMapConstIterator() : DenseMapIterator() {} DenseMapConstIterator(const std::pair *Pos, const std::pair *E) : DenseMapIterator(Pos, E) { diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h new file mode 100644 index 0000000000000..632728bf0d179 --- /dev/null +++ b/include/llvm/ADT/DenseMapInfo.h @@ -0,0 +1,135 @@ +//===- llvm/ADT/DenseMapInfo.h - Type traits for DenseMap -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines DenseMapInfo traits for DenseMap. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_DENSEMAPINFO_H +#define LLVM_ADT_DENSEMAPINFO_H + +#include "llvm/Support/PointerLikeTypeTraits.h" +#include + +namespace llvm { + +template +struct DenseMapInfo { + //static inline T getEmptyKey(); + //static inline T getTombstoneKey(); + //static unsigned getHashValue(const T &Val); + //static bool isEqual(const T &LHS, const T &RHS); + //static bool isPod() +}; + +// Provide DenseMapInfo for all pointers. +template +struct DenseMapInfo { + static inline T* getEmptyKey() { + intptr_t Val = -1; + Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; + return reinterpret_cast(Val); + } + static inline T* getTombstoneKey() { + intptr_t Val = -2; + Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; + return reinterpret_cast(Val); + } + static unsigned getHashValue(const T *PtrVal) { + return (unsigned((uintptr_t)PtrVal) >> 4) ^ + (unsigned((uintptr_t)PtrVal) >> 9); + } + static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; } + static bool isPod() { return true; } +}; + +// Provide DenseMapInfo for chars. +template<> struct DenseMapInfo { + static inline char getEmptyKey() { return ~0; } + static inline char getTombstoneKey() { return ~0 - 1; } + static unsigned getHashValue(const char& Val) { return Val * 37; } + static bool isPod() { return true; } + static bool isEqual(const char &LHS, const char &RHS) { + return LHS == RHS; + } +}; + +// Provide DenseMapInfo for unsigned ints. +template<> struct DenseMapInfo { + static inline unsigned getEmptyKey() { return ~0; } + static inline unsigned getTombstoneKey() { return ~0U - 1; } + static unsigned getHashValue(const unsigned& Val) { return Val * 37; } + static bool isPod() { return true; } + static bool isEqual(const unsigned& LHS, const unsigned& RHS) { + return LHS == RHS; + } +}; + +// Provide DenseMapInfo for unsigned longs. +template<> struct DenseMapInfo { + static inline unsigned long getEmptyKey() { return ~0UL; } + static inline unsigned long getTombstoneKey() { return ~0UL - 1L; } + static unsigned getHashValue(const unsigned long& Val) { + return Val * 37UL; + } + static bool isPod() { return true; } + static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) { + return LHS == RHS; + } +}; + +// Provide DenseMapInfo for unsigned long longs. +template<> struct DenseMapInfo { + static inline unsigned long long getEmptyKey() { return ~0ULL; } + static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; } + static unsigned getHashValue(const unsigned long long& Val) { + return Val * 37ULL; + } + static bool isPod() { return true; } + static bool isEqual(const unsigned long long& LHS, + const unsigned long long& RHS) { + return LHS == RHS; + } +}; + +// Provide DenseMapInfo for all pairs whose members have info. +template +struct DenseMapInfo > { + typedef std::pair Pair; + typedef DenseMapInfo FirstInfo; + typedef DenseMapInfo SecondInfo; + + static inline Pair getEmptyKey() { + return std::make_pair(FirstInfo::getEmptyKey(), + SecondInfo::getEmptyKey()); + } + static inline Pair getTombstoneKey() { + return std::make_pair(FirstInfo::getTombstoneKey(), + SecondInfo::getEmptyKey()); + } + static unsigned getHashValue(const Pair& PairVal) { + uint64_t key = (uint64_t)FirstInfo::getHashValue(PairVal.first) << 32 + | (uint64_t)SecondInfo::getHashValue(PairVal.second); + key += ~(key << 32); + key ^= (key >> 22); + key += ~(key << 13); + key ^= (key >> 8); + key += (key << 3); + key ^= (key >> 15); + key += ~(key << 27); + key ^= (key >> 31); + return (unsigned)key; + } + static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; } + static bool isPod() { return FirstInfo::isPod() && SecondInfo::isPod(); } +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h index 517768f402df9..5f2df2a17e416 100644 --- a/include/llvm/ADT/DepthFirstIterator.h +++ b/include/llvm/ADT/DepthFirstIterator.h @@ -34,8 +34,8 @@ #define LLVM_ADT_DEPTHFIRSTITERATOR_H #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/PointerIntPair.h" #include #include @@ -62,28 +62,35 @@ public: template::NodeType*, 8>, bool ExtStorage = false, class GT = GraphTraits > -class df_iterator : public forward_iterator, +class df_iterator : public std::iterator, public df_iterator_storage { - typedef forward_iterator super; + typedef std::iterator super; typedef typename GT::NodeType NodeType; typedef typename GT::ChildIteratorType ChildItTy; + typedef PointerIntPair PointerIntTy; // VisitStack - Used to maintain the ordering. Top = current block // First element is node pointer, second is the 'next child' to visit - std::vector > VisitStack; + // if the int in PointerIntTy is 0, the 'next child' to visit is invalid + std::vector > VisitStack; private: inline df_iterator(NodeType *Node) { this->Visited.insert(Node); - VisitStack.push_back(std::make_pair(Node, GT::child_begin(Node))); + VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), + GT::child_begin(Node))); + } + inline df_iterator() { + // End is when stack is empty } - inline df_iterator() { /* End is when stack is empty */ } - inline df_iterator(NodeType *Node, SetType &S) : df_iterator_storage(S) { if (!S.count(Node)) { + VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), + GT::child_begin(Node))); this->Visited.insert(Node); - VisitStack.push_back(std::make_pair(Node, GT::child_begin(Node))); } } inline df_iterator(SetType &S) @@ -91,6 +98,34 @@ private: // End is when stack is empty } + inline void toNext() { + do { + std::pair &Top = VisitStack.back(); + NodeType *Node = Top.first.getPointer(); + ChildItTy &It = Top.second; + if (!Top.first.getInt()) { + // now retrieve the real begin of the children before we dive in + It = GT::child_begin(Node); + Top.first.setInt(1); + } + + while (It != GT::child_end(Node)) { + NodeType *Next = *It++; + // Has our next sibling been visited? + if (Next && !this->Visited.count(Next)) { + // No, do it now. + this->Visited.insert(Next); + VisitStack.push_back(std::make_pair(PointerIntTy(Next, 0), + GT::child_begin(Next))); + return; + } + } + + // Oops, ran out of successors... go up a level on the stack. + VisitStack.pop_back(); + } while (!VisitStack.empty()); + } + public: typedef typename super::pointer pointer; typedef df_iterator _Self; @@ -114,7 +149,7 @@ public: inline bool operator!=(const _Self& x) const { return !operator==(x); } inline pointer operator*() const { - return VisitStack.back().first; + return VisitStack.back().first.getPointer(); } // This is a nonstandard operator-> that dereferences the pointer an extra @@ -124,24 +159,16 @@ public: inline NodeType *operator->() const { return operator*(); } inline _Self& operator++() { // Preincrement - do { - std::pair &Top = VisitStack.back(); - NodeType *Node = Top.first; - ChildItTy &It = Top.second; - - while (It != GT::child_end(Node)) { - NodeType *Next = *It++; - if (!this->Visited.count(Next)) { // Has our next sibling been visited? - // No, do it now. - this->Visited.insert(Next); - VisitStack.push_back(std::make_pair(Next, GT::child_begin(Next))); - return *this; - } - } + toNext(); + return *this; + } - // Oops, ran out of successors... go up a level on the stack. - VisitStack.pop_back(); - } while (!VisitStack.empty()); + // skips all children of the current node and traverses to next node + // + inline _Self& skipChildren() { + VisitStack.pop_back(); + if (!VisitStack.empty()) + toNext(); return *this; } diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h index 6e00a217bebfe..ac9dd4db1fb47 100644 --- a/include/llvm/ADT/EquivalenceClasses.h +++ b/include/llvm/ADT/EquivalenceClasses.h @@ -15,7 +15,6 @@ #ifndef LLVM_ADT_EQUIVALENCECLASSES_H #define LLVM_ADT_EQUIVALENCECLASSES_H -#include "llvm/ADT/iterator.h" #include "llvm/Support/DataTypes.h" #include @@ -234,8 +233,10 @@ public: return L1; } - class member_iterator : public forward_iterator { - typedef forward_iterator super; + class member_iterator : public std::iterator { + typedef std::iterator super; const ECValue *Node; friend class EquivalenceClasses; public: diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h index 1bcff3dc9eb3f..c62c47d273537 100644 --- a/include/llvm/ADT/FoldingSet.h +++ b/include/llvm/ADT/FoldingSet.h @@ -18,7 +18,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/ADT/SmallVector.h" -#include +#include "llvm/ADT/StringRef.h" #include namespace llvm { @@ -227,9 +227,7 @@ public: void AddInteger(long long I); void AddInteger(unsigned long long I); void AddBoolean(bool B) { AddInteger(B ? 1U : 0U); } - void AddString(const char* String, const char* End); - void AddString(const std::string &String); - void AddString(const char* String); + void AddString(StringRef String); template inline void Add(const T& x) { FoldingSetTrait::Profile(x, *this); } @@ -438,6 +436,20 @@ public: operator const T&() const { return data; } }; +//===----------------------------------------------------------------------===// +/// FastFoldingSetNode - This is a subclass of FoldingSetNode which stores +/// a FoldingSetNodeID value rather than requiring the node to recompute it +/// each time it is needed. This trades space for speed (which can be +/// significant if the ID is long), and it also permits nodes to drop +/// information that would otherwise only be required for recomputing an ID. +class FastFoldingSetNode : public FoldingSetNode { + FoldingSetNodeID FastID; +protected: + explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {} +public: + void Profile(FoldingSetNodeID& ID) { ID = FastID; } +}; + //===----------------------------------------------------------------------===// // Partial specializations of FoldingSetTrait. diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h index 52708bc8a1087..742e2321865db 100644 --- a/include/llvm/ADT/ImmutableMap.h +++ b/include/llvm/ADT/ImmutableMap.h @@ -80,22 +80,25 @@ public: class Factory { typename TreeTy::Factory F; + const bool Canonicalize; public: - Factory() {} - - Factory(BumpPtrAllocator& Alloc) - : F(Alloc) {} + Factory(bool canonicalize = true) + : Canonicalize(canonicalize) {} + + Factory(BumpPtrAllocator& Alloc, bool canonicalize = true) + : F(Alloc), Canonicalize(canonicalize) {} ImmutableMap GetEmptyMap() { return ImmutableMap(F.GetEmptyTree()); } ImmutableMap Add(ImmutableMap Old, key_type_ref K, data_type_ref D) { - return ImmutableMap(F.Add(Old.Root, - std::make_pair(K,D))); + TreeTy *T = F.Add(Old.Root, std::make_pair(K,D)); + return ImmutableMap(Canonicalize ? F.GetCanonicalTree(T): T); } ImmutableMap Remove(ImmutableMap Old, key_type_ref K) { - return ImmutableMap(F.Remove(Old.Root,K)); + TreeTy *T = F.Remove(Old.Root,K); + return ImmutableMap(Canonicalize ? F.GetCanonicalTree(T): T); } private: diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h index be274dbe6758c..14f4ac8123d6c 100644 --- a/include/llvm/ADT/ImmutableSet.h +++ b/include/llvm/ADT/ImmutableSet.h @@ -51,10 +51,8 @@ public: /// getLeft - Returns a pointer to the left subtree. This value /// is NULL if there is no left subtree. - ImutAVLTree* getLeft() const { - assert (!isMutable() && "Node is incorrectly marked mutable."); - - return reinterpret_cast(Left); + ImutAVLTree *getLeft() const { + return reinterpret_cast(Left & ~LeftFlags); } /// getRight - Returns a pointer to the right subtree. This value is @@ -168,7 +166,7 @@ public: /// contains - Returns true if this tree contains a subtree (node) that /// has an data element that matches the specified key. Complexity /// is logarithmic in the size of the tree. - bool contains(const key_type_ref K) { return (bool) find(K); } + bool contains(key_type_ref K) { return (bool) find(K); } /// foreach - A member template the accepts invokes operator() on a functor /// object (specifed by Callback) for every node/subtree in the tree. @@ -227,7 +225,7 @@ private: ImutAVLTree* Right; unsigned Height; value_type Value; - unsigned Digest; + uint32_t Digest; //===----------------------------------------------------===// // Internal methods (node manipulation; used by Factory). @@ -235,12 +233,12 @@ private: private: - enum { Mutable = 0x1 }; + enum { Mutable = 0x1, NoCachedDigest = 0x2, LeftFlags = 0x3 }; /// ImutAVLTree - Internal constructor that is only called by /// ImutAVLFactory. ImutAVLTree(ImutAVLTree* l, ImutAVLTree* r, value_type_ref v, unsigned height) - : Left(reinterpret_cast(l) | Mutable), + : Left(reinterpret_cast(l) | (Mutable | NoCachedDigest)), Right(r), Height(height), Value(v), Digest(0) {} @@ -251,13 +249,10 @@ private: /// method returns false for an instance of ImutAVLTree, all subtrees /// will also have this method return false. The converse is not true. bool isMutable() const { return Left & Mutable; } - - /// getSafeLeft - Returns the pointer to the left tree by always masking - /// out the mutable bit. This is used internally by ImutAVLFactory, - /// as no trees returned to the client should have the mutable flag set. - ImutAVLTree* getSafeLeft() const { - return reinterpret_cast(Left & ~Mutable); - } + + /// hasCachedDigest - Returns true if the digest for this tree is cached. + /// This can only be true if the tree is immutable. + bool hasCachedDigest() const { return !(Left & NoCachedDigest); } //===----------------------------------------------------===// // Mutating operations. A tree root can be manipulated as @@ -270,64 +265,73 @@ private: // immutable. //===----------------------------------------------------===// - /// MarkImmutable - Clears the mutable flag for a tree. After this happens, - /// it is an error to call setLeft(), setRight(), and setHeight(). It - /// is also then safe to call getLeft() instead of getSafeLeft(). + /// it is an error to call setLeft(), setRight(), and setHeight(). void MarkImmutable() { - assert (isMutable() && "Mutable flag already removed."); + assert(isMutable() && "Mutable flag already removed."); Left &= ~Mutable; } + + /// MarkedCachedDigest - Clears the NoCachedDigest flag for a tree. + void MarkedCachedDigest() { + assert(!hasCachedDigest() && "NoCachedDigest flag already removed."); + Left &= ~NoCachedDigest; + } /// setLeft - Changes the reference of the left subtree. Used internally /// by ImutAVLFactory. void setLeft(ImutAVLTree* NewLeft) { - assert (isMutable() && - "Only a mutable tree can have its left subtree changed."); - - Left = reinterpret_cast(NewLeft) | Mutable; + assert(isMutable() && + "Only a mutable tree can have its left subtree changed."); + Left = reinterpret_cast(NewLeft) | LeftFlags; } /// setRight - Changes the reference of the right subtree. Used internally /// by ImutAVLFactory. void setRight(ImutAVLTree* NewRight) { - assert (isMutable() && - "Only a mutable tree can have its right subtree changed."); + assert(isMutable() && + "Only a mutable tree can have its right subtree changed."); Right = NewRight; + // Set the NoCachedDigest flag. + Left = Left | NoCachedDigest; + } /// setHeight - Changes the height of the tree. Used internally by /// ImutAVLFactory. void setHeight(unsigned h) { - assert (isMutable() && "Only a mutable tree can have its height changed."); + assert(isMutable() && "Only a mutable tree can have its height changed."); Height = h; } - static inline - unsigned ComputeDigest(ImutAVLTree* L, ImutAVLTree* R, value_type_ref V) { - unsigned digest = 0; + uint32_t ComputeDigest(ImutAVLTree* L, ImutAVLTree* R, value_type_ref V) { + uint32_t digest = 0; - if (L) digest += L->ComputeDigest(); + if (L) + digest += L->ComputeDigest(); - { // Compute digest of stored data. - FoldingSetNodeID ID; - ImutInfo::Profile(ID,V); - digest += ID.ComputeHash(); - } + // Compute digest of stored data. + FoldingSetNodeID ID; + ImutInfo::Profile(ID,V); + digest += ID.ComputeHash(); - if (R) digest += R->ComputeDigest(); + if (R) + digest += R->ComputeDigest(); return digest; } - inline unsigned ComputeDigest() { - if (Digest) return Digest; - - unsigned X = ComputeDigest(getSafeLeft(), getRight(), getValue()); - if (!isMutable()) Digest = X; + inline uint32_t ComputeDigest() { + // Check the lowest bit to determine if digest has actually been + // pre-computed. + if (hasCachedDigest()) + return Digest; + uint32_t X = ComputeDigest(getLeft(), getRight(), getValue()); + Digest = X; + MarkedCachedDigest(); return X; } }; @@ -394,7 +398,7 @@ private: bool isEmpty(TreeTy* T) const { return !T; } unsigned Height(TreeTy* T) const { return T ? T->getHeight() : 0; } - TreeTy* Left(TreeTy* T) const { return T->getSafeLeft(); } + TreeTy* Left(TreeTy* T) const { return T->getLeft(); } TreeTy* Right(TreeTy* T) const { return T->getRight(); } value_type_ref Value(TreeTy* T) const { return T->Value; } @@ -404,7 +408,6 @@ private: return ( hl > hr ? hl : hr ) + 1; } - static bool CompareTreeWithSection(TreeTy* T, typename TreeTy::iterator& TI, typename TreeTy::iterator& TE) { @@ -428,62 +431,10 @@ private: // returned to the caller. //===--------------------------------------------------===// - TreeTy* CreateNode(TreeTy* L, value_type_ref V, TreeTy* R) { - // Search the FoldingSet bucket for a Tree with the same digest. - FoldingSetNodeID ID; - unsigned digest = TreeTy::ComputeDigest(L, R, V); - ID.AddInteger(digest); - unsigned hash = ID.ComputeHash(); - - typename CacheTy::bucket_iterator I = Cache.bucket_begin(hash); - typename CacheTy::bucket_iterator E = Cache.bucket_end(hash); - - for (; I != E; ++I) { - TreeTy* T = &*I; - - if (T->ComputeDigest() != digest) - continue; - - // We found a collision. Perform a comparison of Contents('T') - // with Contents('L')+'V'+Contents('R'). - - typename TreeTy::iterator TI = T->begin(), TE = T->end(); - - // First compare Contents('L') with the (initial) contents of T. - if (!CompareTreeWithSection(L, TI, TE)) - continue; - - // Now compare the new data element. - if (TI == TE || !TI->ElementEqual(V)) - continue; - - ++TI; - - // Now compare the remainder of 'T' with 'R'. - if (!CompareTreeWithSection(R, TI, TE)) - continue; - - if (TI != TE) // Contents('R') did not match suffix of 'T'. - continue; - - // Trees did match! Return 'T'. - return T; - } - - // No tree with the contents: Contents('L')+'V'+Contents('R'). - // Create it. - - // Allocate the new tree node and insert it into the cache. + TreeTy* CreateNode(TreeTy* L, value_type_ref V, TreeTy* R) { BumpPtrAllocator& A = getAllocator(); TreeTy* T = (TreeTy*) A.Allocate(); new (T) TreeTy(L,R,V,IncrementHeight(L,R)); - - // We do not insert 'T' into the FoldingSet here. This is because - // this tree is still mutable and things may get rebalanced. - // Because our digest is associative and based on the contents of - // the set, this should hopefully not cause any strange bugs. - // 'T' is inserted by 'MarkImmutable'. - return T; } @@ -496,7 +447,8 @@ private: OldTree->setHeight(IncrementHeight(L,R)); return OldTree; } - else return CreateNode(L, Value(OldTree), R); + else + return CreateNode(L, Value(OldTree), R); } /// Balance - Used by Add_internal and Remove_internal to @@ -615,12 +567,56 @@ private: T->MarkImmutable(); MarkImmutable(Left(T)); MarkImmutable(Right(T)); + } + +public: + TreeTy *GetCanonicalTree(TreeTy *TNew) { + if (!TNew) + return NULL; + + // Search the FoldingSet bucket for a Tree with the same digest. + FoldingSetNodeID ID; + unsigned digest = TNew->ComputeDigest(); + ID.AddInteger(digest); + unsigned hash = ID.ComputeHash(); + + typename CacheTy::bucket_iterator I = Cache.bucket_begin(hash); + typename CacheTy::bucket_iterator E = Cache.bucket_end(hash); + + for (; I != E; ++I) { + TreeTy *T = &*I; + + if (T->ComputeDigest() != digest) + continue; + + // We found a collision. Perform a comparison of Contents('T') + // with Contents('L')+'V'+Contents('R'). + typename TreeTy::iterator TI = T->begin(), TE = T->end(); + + // First compare Contents('L') with the (initial) contents of T. + if (!CompareTreeWithSection(TNew->getLeft(), TI, TE)) + continue; + + // Now compare the new data element. + if (TI == TE || !TI->ElementEqual(TNew->getValue())) + continue; + + ++TI; + + // Now compare the remainder of 'T' with 'R'. + if (!CompareTreeWithSection(TNew->getRight(), TI, TE)) + continue; + + if (TI != TE) + continue; // Contents('R') did not match suffix of 'T'. + + // Trees did match! Return 'T'. + return T; + } - // Now that the node is immutable it can safely be inserted - // into the node cache. - llvm::FoldingSetNodeID ID; - ID.AddInteger(T->ComputeDigest()); - Cache.InsertNode(T, (void*) &*Cache.bucket_end(ID.ComputeHash())); + // 'TNew' is the only tree of its kind. Return it. + Cache.InsertNode(TNew, (void*) &*Cache.bucket_end(hash)); + return TNew; } }; @@ -701,7 +697,7 @@ public: switch (getVisitState()) { case VisitedNone: - if (TreeTy* L = Current->getSafeLeft()) + if (TreeTy* L = Current->getLeft()) stack.push_back(reinterpret_cast(L)); else stack.back() |= VisitedLeft; @@ -940,8 +936,8 @@ public: typedef ImutAVLTree TreeTy; private: - TreeTy* Root; - + TreeTy *Root; + public: /// Constructs a set from a pointer to a tree root. In general one /// should use a Factory object to create sets instead of directly @@ -951,15 +947,19 @@ public: class Factory { typename TreeTy::Factory F; + const bool Canonicalize; public: - Factory() {} + Factory(bool canonicalize = true) + : Canonicalize(canonicalize) {} - Factory(BumpPtrAllocator& Alloc) - : F(Alloc) {} + Factory(BumpPtrAllocator& Alloc, bool canonicalize = true) + : F(Alloc), Canonicalize(canonicalize) {} /// GetEmptySet - Returns an immutable set that contains no elements. - ImmutableSet GetEmptySet() { return ImmutableSet(F.GetEmptyTree()); } + ImmutableSet GetEmptySet() { + return ImmutableSet(F.GetEmptyTree()); + } /// Add - Creates a new immutable set that contains all of the values /// of the original set with the addition of the specified value. If @@ -969,7 +969,8 @@ public: /// The memory allocated to represent the set is released when the /// factory object that created the set is destroyed. ImmutableSet Add(ImmutableSet Old, value_type_ref V) { - return ImmutableSet(F.Add(Old.Root,V)); + TreeTy *NewT = F.Add(Old.Root, V); + return ImmutableSet(Canonicalize ? F.GetCanonicalTree(NewT) : NewT); } /// Remove - Creates a new immutable set that contains all of the values @@ -980,7 +981,8 @@ public: /// The memory allocated to represent the set is released when the /// factory object that created the set is destroyed. ImmutableSet Remove(ImmutableSet Old, value_type_ref V) { - return ImmutableSet(F.Remove(Old.Root,V)); + TreeTy *NewT = F.Remove(Old.Root, V); + return ImmutableSet(Canonicalize ? F.GetCanonicalTree(NewT) : NewT); } BumpPtrAllocator& getAllocator() { return F.getAllocator(); } @@ -993,7 +995,7 @@ public: friend class Factory; /// contains - Returns true if the set contains the specified value. - bool contains(const value_type_ref V) const { + bool contains(value_type_ref V) const { return Root ? Root->contains(V) : false; } @@ -1005,7 +1007,9 @@ public: return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root; } - TreeTy* getRoot() const { return Root; } + TreeTy *getRoot() { + return Root; + } /// isEmpty - Return true if the set contains no elements. bool isEmpty() const { return !Root; } @@ -1026,11 +1030,10 @@ public: class iterator { typename TreeTy::iterator itr; - - iterator() {} iterator(TreeTy* t) : itr(t) {} friend class ImmutableSet; public: + iterator() {} inline value_type_ref operator*() const { return itr->getValue(); } inline iterator& operator++() { ++itr; return *this; } inline iterator operator++(int) { iterator tmp(*this); ++itr; return tmp; } diff --git a/include/llvm/ADT/IndexedMap.h b/include/llvm/ADT/IndexedMap.h index ff5d3a139c705..89f0dfa64e1ce 100644 --- a/include/llvm/ADT/IndexedMap.h +++ b/include/llvm/ADT/IndexedMap.h @@ -26,7 +26,7 @@ namespace llvm { - struct IdentityFunctor : std::unary_function { + struct IdentityFunctor : public std::unary_function { unsigned operator()(unsigned Index) const { return Index; } diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h index 0aa478b1ff61c..73ba3c7293de0 100644 --- a/include/llvm/ADT/PointerIntPair.h +++ b/include/llvm/ADT/PointerIntPair.h @@ -65,7 +65,8 @@ public: } PointerTy getPointer() const { - return reinterpret_cast(Value & PointerBitMask); + return PtrTraits::getFromVoidPointer( + reinterpret_cast(Value & PointerBitMask)); } IntType getInt() const { @@ -73,7 +74,8 @@ public: } void setPointer(PointerTy Ptr) { - intptr_t PtrVal = reinterpret_cast(Ptr); + intptr_t PtrVal + = reinterpret_cast(PtrTraits::getAsVoidPointer(Ptr)); assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 && "Pointer is not sufficiently aligned"); // Preserve all low bits, just update the pointer. @@ -141,8 +143,7 @@ public: return PointerIntPair::getFromOpaqueValue(P); } enum { - NumLowBitsAvailable = - PointerLikeTypeTraits::NumLowBitsAvailable - IntBits + NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits }; }; diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index 1b36aeea79347..33f2fcb11395f 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -79,7 +79,7 @@ namespace llvm { Val.setInt(1); } - /// isNull - Return true if the pointer help in the union is null, + /// isNull - Return true if the pointer held in the union is null, /// regardless of which type it is. bool isNull() const { return Val.getPointer() == 0; } operator bool() const { return !isNull(); } @@ -176,7 +176,7 @@ namespace llvm { Val = V; } - /// isNull - Return true if the pointer help in the union is null, + /// isNull - Return true if the pointer held in the union is null, /// regardless of which type it is. bool isNull() const { return Val.isNull(); } operator bool() const { return !isNull(); } @@ -254,6 +254,115 @@ namespace llvm { ::NumLowBitsAvailable }; }; + + /// PointerUnion4 - This is a pointer union of four pointer types. See + /// documentation for PointerUnion for usage. + template + class PointerUnion4 { + public: + typedef PointerUnion InnerUnion1; + typedef PointerUnion InnerUnion2; + typedef PointerUnion ValTy; + private: + ValTy Val; + public: + PointerUnion4() {} + + PointerUnion4(PT1 V) { + Val = InnerUnion1(V); + } + PointerUnion4(PT2 V) { + Val = InnerUnion1(V); + } + PointerUnion4(PT3 V) { + Val = InnerUnion2(V); + } + PointerUnion4(PT4 V) { + Val = InnerUnion2(V); + } + + /// isNull - Return true if the pointer held in the union is null, + /// regardless of which type it is. + bool isNull() const { return Val.isNull(); } + operator bool() const { return !isNull(); } + + /// is() return true if the Union currently holds the type matching T. + template + int is() const { + // Is it PT1/PT2? + if (::llvm::getPointerUnionTypeNum((T*)0) != -1) + return Val.is() && Val.get().is(); + return Val.is() && Val.get().is(); + } + + /// get() - Return the value of the specified pointer type. If the + /// specified pointer type is incorrect, assert. + template + T get() const { + assert(is() && "Invalid accessor called"); + // Is it PT1/PT2? + if (::llvm::getPointerUnionTypeNum((T*)0) != -1) + return Val.get().get(); + + return Val.get().get(); + } + + /// dyn_cast() - If the current value is of the specified pointer type, + /// return it, otherwise return null. + template + T dyn_cast() const { + if (is()) return get(); + return T(); + } + + /// Assignment operators - Allow assigning into this union from either + /// pointer type, setting the discriminator to remember what it came from. + const PointerUnion4 &operator=(const PT1 &RHS) { + Val = InnerUnion1(RHS); + return *this; + } + const PointerUnion4 &operator=(const PT2 &RHS) { + Val = InnerUnion1(RHS); + return *this; + } + const PointerUnion4 &operator=(const PT3 &RHS) { + Val = InnerUnion2(RHS); + return *this; + } + const PointerUnion4 &operator=(const PT4 &RHS) { + Val = InnerUnion2(RHS); + return *this; + } + + void *getOpaqueValue() const { return Val.getOpaqueValue(); } + static PointerUnion4 getFromOpaqueValue(void *VP) { + PointerUnion4 V; + V.Val = ValTy::getFromOpaqueValue(VP); + return V; + } + }; + + // Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has + // # low bits available = min(PT1bits,PT2bits,PT2bits)-2. + template + class PointerLikeTypeTraits > { + public: + static inline void * + getAsVoidPointer(const PointerUnion4 &P) { + return P.getOpaqueValue(); + } + static inline PointerUnion4 + getFromVoidPointer(void *P) { + return PointerUnion4::getFromOpaqueValue(P); + } + + // The number of bits available are the min of the two pointer types. + enum { + NumLowBitsAvailable = + PointerLikeTypeTraits::ValTy> + ::NumLowBitsAvailable + }; + }; } #endif diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index b477d0a8f0f52..8315bc9f9ed54 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -17,7 +17,6 @@ #define LLVM_ADT_POSTORDERITERATOR_H #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/SmallPtrSet.h" #include #include @@ -43,9 +42,11 @@ template::NodeType*, 8>, bool ExtStorage = false, class GT = GraphTraits > -class po_iterator : public forward_iterator, +class po_iterator : public std::iterator, public po_iterator_storage { - typedef forward_iterator super; + typedef std::iterator super; typedef typename GT::NodeType NodeType; typedef typename GT::ChildIteratorType ChildItTy; @@ -71,7 +72,7 @@ class po_iterator : public forward_iterator, inline po_iterator() {} // End is when stack is empty. inline po_iterator(NodeType *BB, SetType &S) : - po_iterator_storage(&S) { + po_iterator_storage(S) { if(!S.count(BB)) { this->Visited.insert(BB); VisitStack.push(std::make_pair(BB, GT::child_begin(BB))); @@ -80,7 +81,7 @@ class po_iterator : public forward_iterator, } inline po_iterator(SetType &S) : - po_iterator_storage(&S) { + po_iterator_storage(S) { } // End is when stack is empty. public: typedef typename super::pointer pointer; diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h index e28f4caa5d769..db985b5ae1d22 100644 --- a/include/llvm/ADT/SCCIterator.h +++ b/include/llvm/ADT/SCCIterator.h @@ -22,8 +22,7 @@ #define LLVM_ADT_SCCITERATOR_H #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator.h" -#include +#include "llvm/ADT/DenseMap.h" #include namespace llvm { @@ -35,11 +34,13 @@ namespace llvm { /// template > class scc_iterator - : public forward_iterator, ptrdiff_t> { + : public std::iterator, ptrdiff_t> { typedef typename GT::NodeType NodeType; typedef typename GT::ChildIteratorType ChildItTy; typedef std::vector SccTy; - typedef forward_iterator super; + typedef std::iterator, ptrdiff_t> super; typedef typename super::reference reference; typedef typename super::pointer pointer; @@ -47,7 +48,7 @@ class scc_iterator // visitNum is the global counter. // nodeVisitNumbers are per-node visit numbers, also used as DFS flags. unsigned visitNum; - std::map nodeVisitNumbers; + DenseMap nodeVisitNumbers; // SCCNodeStack - Stack holding nodes of the SCC. std::vector SCCNodeStack; @@ -71,7 +72,7 @@ class scc_iterator SCCNodeStack.push_back(N); MinVisitNumStack.push_back(visitNum); VisitStack.push_back(std::make_pair(N, GT::child_begin(N))); - //DOUT << "TarjanSCC: Node " << N << + //errs() << "TarjanSCC: Node " << N << // " : visitNum = " << visitNum << "\n"; } @@ -106,7 +107,7 @@ class scc_iterator if (!MinVisitNumStack.empty() && MinVisitNumStack.back() > minVisitNum) MinVisitNumStack.back() = minVisitNum; - //DOUT << "TarjanSCC: Popped node " << visitingN << + //errs() << "TarjanSCC: Popped node " << visitingN << // " : minVisitNum = " << minVisitNum << "; Node visit num = " << // nodeVisitNumbers[visitingN] << "\n"; diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 964e7e07ef7d5..6f4769260aa90 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -19,8 +19,8 @@ #include // for std::size_t #include +#include #include // for std::pair -#include "llvm/ADT/iterator.h" namespace llvm { @@ -28,6 +28,13 @@ namespace llvm { // Extra additions to //===----------------------------------------------------------------------===// +template +struct less_ptr : public std::binary_function { + bool operator()(const Ty* left, const Ty* right) const { + return *left < *right; + } +}; + template struct greater_ptr : public std::binary_function { bool operator()(const Ty* left, const Ty* right) const { diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index a189de2c2279d..7d00e9a073e78 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -17,6 +17,7 @@ #include #include +#include #include "llvm/Support/DataTypes.h" #include "llvm/Support/PointerLikeTypeTraits.h" @@ -170,7 +171,14 @@ protected: template class SmallPtrSetIterator : public SmallPtrSetIteratorImpl { typedef PointerLikeTypeTraits PtrTraits; + public: + typedef PtrTy value_type; + typedef PtrTy reference; + typedef PtrTy pointer; + typedef std::ptrdiff_t difference_type; + typedef std::forward_iterator_tag iterator_category; + explicit SmallPtrSetIterator(const void *const *BP) : SmallPtrSetIteratorImpl(BP) {} diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h index caaa96c045f73..d03f1bef15b11 100644 --- a/include/llvm/ADT/SmallSet.h +++ b/include/llvm/ADT/SmallSet.h @@ -30,7 +30,7 @@ namespace llvm { template class SmallSet { /// Use a SmallVector to hold the elements here (even though it will never - /// reach it's 'large' stage) to avoid calling the default ctors of elements + /// reach its 'large' stage) to avoid calling the default ctors of elements /// we will never use. SmallVector Vector; std::set Set; diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h index 687fa2d26e246..035462515a83e 100644 --- a/include/llvm/ADT/SmallString.h +++ b/include/llvm/ADT/SmallString.h @@ -15,8 +15,7 @@ #define LLVM_ADT_SMALLSTRING_H #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/DataTypes.h" -#include +#include "llvm/ADT/StringRef.h" namespace llvm { @@ -37,73 +36,30 @@ public: // Extra methods. - const char *c_str() const { - SmallString *This = const_cast(this); - // Ensure that there is a \0 at the end of the string. - This->reserve(this->size()+1); - This->End[0] = 0; - return this->begin(); - } + StringRef str() const { return StringRef(this->begin(), this->size()); } + const char *c_str() { + this->push_back(0); + this->pop_back(); + return this->data(); + } + // Extra operators. - const SmallString &operator=(const char *RHS) { + const SmallString &operator=(StringRef RHS) { this->clear(); return *this += RHS; } - SmallString &operator+=(const char *RHS) { - this->append(RHS, RHS+strlen(RHS)); + SmallString &operator+=(StringRef RHS) { + this->append(RHS.begin(), RHS.end()); return *this; } SmallString &operator+=(char C) { this->push_back(C); return *this; } - - SmallString &append_uint_32(uint32_t N) { - char Buffer[20]; - char *BufPtr = Buffer+20; - - if (N == 0) *--BufPtr = '0'; // Handle special case. - - while (N) { - *--BufPtr = '0' + char(N % 10); - N /= 10; - } - this->append(BufPtr, Buffer+20); - return *this; - } - - SmallString &append_uint(uint64_t N) { - if (N == uint32_t(N)) - return append_uint_32(uint32_t(N)); - - char Buffer[40]; - char *BufPtr = Buffer+40; - - if (N == 0) *--BufPtr = '0'; // Handle special case... - - while (N) { - *--BufPtr = '0' + char(N % 10); - N /= 10; - } - - this->append(BufPtr, Buffer+40); - return *this; - } - - SmallString &append_sint(int64_t N) { - // TODO, wrong for minint64. - if (N < 0) { - this->push_back('-'); - N = -N; - } - return append_uint(N); - } - }; - } #endif diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index f59a438d3eb4f..f3b4533b94290 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -14,7 +14,6 @@ #ifndef LLVM_ADT_SMALLVECTOR_H #define LLVM_ADT_SMALLVECTOR_H -#include "llvm/ADT/iterator.h" #include "llvm/Support/type_traits.h" #include #include @@ -122,11 +121,11 @@ public: reference operator[](unsigned idx) { - assert (Begin + idx < End); + assert(Begin + idx < End); return Begin[idx]; } const_reference operator[](unsigned idx) const { - assert (Begin + idx < End); + assert(Begin + idx < End); return Begin[idx]; } @@ -399,6 +398,24 @@ public: RHS.begin(), RHS.end()); } + /// capacity - Return the total number of elements in the currently allocated + /// buffer. + size_t capacity() const { return Capacity - Begin; } + + /// set_size - Set the array size to \arg N, which the current array must have + /// enough capacity for. + /// + /// This does not construct or destroy any elements in the vector. + /// + /// Clients can use this in conjunction with capacity() to write past the end + /// of the buffer when they know that more elements are available, and only + /// update the size later. This avoids the cost of value initializing elements + /// which will only be overwritten. + void set_size(unsigned N) { + assert(N <= capacity()); + End = Begin + N; + } + private: /// isSmall - Return true if this is a smallvector which has not had dynamic /// memory allocated for it. diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h index 6230135131a7c..b7a6873001e24 100644 --- a/include/llvm/ADT/SparseBitVector.h +++ b/include/llvm/ADT/SparseBitVector.h @@ -15,13 +15,14 @@ #ifndef LLVM_ADT_SPARSEBITVECTOR_H #define LLVM_ADT_SPARSEBITVECTOR_H +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include #include #include -#include "llvm/Support/DataTypes.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/ADT/ilist.h" namespace llvm { @@ -41,7 +42,7 @@ namespace llvm { template struct SparseBitVectorElement - : ilist_node > { + : public ilist_node > { public: typedef unsigned long BitWord; enum { @@ -887,7 +888,7 @@ operator-(const SparseBitVector &LHS, // Dump a SparseBitVector to a stream template -void dump(const SparseBitVector &LHS, llvm::OStream &out) { +void dump(const SparseBitVector &LHS, raw_ostream &out) { out << "[ "; typename SparseBitVector::iterator bi; diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index e40e409802d25..3d1993c6b263e 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -103,10 +103,6 @@ static inline std::string itostr(int64_t X) { return utostr(static_cast(X)); } -static inline std::string itohexstr(int64_t X) { - return utohexstr(static_cast(X)); -} - static inline std::string ftostr(double V) { char Buffer[200]; sprintf(Buffer, "%20.6e", V); diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index a15d24eeae251..73fd635ee2bca 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -14,6 +14,7 @@ #ifndef LLVM_ADT_STRINGMAP_H #define LLVM_ADT_STRINGMAP_H +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include #include @@ -95,12 +96,12 @@ protected: /// specified bucket will be non-null. Otherwise, it will be null. In either /// case, the FullHashValue field of the bucket will be set to the hash value /// of the string. - unsigned LookupBucketFor(const char *KeyStart, const char *KeyEnd); + unsigned LookupBucketFor(const StringRef &Key); /// FindKey - Look up the bucket that contains the specified key. If it exists /// in the map, return the bucket number of the key. Otherwise return -1. /// This does not modify the map. - int FindKey(const char *KeyStart, const char *KeyEnd) const; + int FindKey(const StringRef &Key) const; /// RemoveKey - Remove the specified StringMapEntry from the table, but do not /// delete it. This aborts if the value isn't in the table. @@ -108,7 +109,7 @@ protected: /// RemoveKey - Remove the StringMapEntry for the specified key from the /// table, returning it. If the key is not in the table, this returns null. - StringMapEntryBase *RemoveKey(const char *KeyStart, const char *KeyEnd); + StringMapEntryBase *RemoveKey(const StringRef &Key); private: void init(unsigned Size); public: @@ -136,6 +137,10 @@ public: StringMapEntry(unsigned strLen, const ValueTy &V) : StringMapEntryBase(strLen), second(V) {} + StringRef getKey() const { + return StringRef(getKeyData(), getKeyLength()); + } + const ValueTy &getValue() const { return second; } ValueTy &getValue() { return second; } @@ -277,75 +282,40 @@ public: return const_iterator(TheTable+NumBuckets, true); } - iterator find(const char *KeyStart, const char *KeyEnd) { - int Bucket = FindKey(KeyStart, KeyEnd); + iterator find(const StringRef &Key) { + int Bucket = FindKey(Key); if (Bucket == -1) return end(); return iterator(TheTable+Bucket); } - iterator find(const char *Key) { - return find(Key, Key + strlen(Key)); - } - iterator find(const std::string &Key) { - return find(Key.data(), Key.data() + Key.size()); - } - const_iterator find(const char *KeyStart, const char *KeyEnd) const { - int Bucket = FindKey(KeyStart, KeyEnd); + const_iterator find(const StringRef &Key) const { + int Bucket = FindKey(Key); if (Bucket == -1) return end(); return const_iterator(TheTable+Bucket); } - const_iterator find(const char *Key) const { - return find(Key, Key + strlen(Key)); - } - const_iterator find(const std::string &Key) const { - return find(Key.data(), Key.data() + Key.size()); - } /// lookup - Return the entry for the specified key, or a default /// constructed value if no such entry exists. - ValueTy lookup(const char *KeyStart, const char *KeyEnd) const { - const_iterator it = find(KeyStart, KeyEnd); - if (it != end()) - return it->second; - return ValueTy(); - } - ValueTy lookup(const char *Key) const { - const_iterator it = find(Key); - if (it != end()) - return it->second; - return ValueTy(); - } - ValueTy lookup(const std::string &Key) const { + ValueTy lookup(const StringRef &Key) const { const_iterator it = find(Key); if (it != end()) return it->second; return ValueTy(); } - ValueTy& operator[](const char *Key) { - return GetOrCreateValue(Key, Key + strlen(Key)).getValue(); - } - ValueTy& operator[](const std::string &Key) { - return GetOrCreateValue(Key.data(), Key.data() + Key.size()).getValue(); + ValueTy& operator[](const StringRef &Key) { + return GetOrCreateValue(Key).getValue(); } - size_type count(const char *KeyStart, const char *KeyEnd) const { - return find(KeyStart, KeyEnd) == end() ? 0 : 1; - } - size_type count(const char *Key) const { - return count(Key, Key + strlen(Key)); - } - size_type count(const std::string &Key) const { - return count(Key.data(), Key.data() + Key.size()); + size_type count(const StringRef &Key) const { + return find(Key) == end() ? 0 : 1; } /// insert - Insert the specified key/value pair into the map. If the key /// already exists in the map, return false and ignore the request, otherwise /// insert it and return true. bool insert(MapEntryTy *KeyValue) { - unsigned BucketNo = - LookupBucketFor(KeyValue->getKeyData(), - KeyValue->getKeyData()+KeyValue->getKeyLength()); + unsigned BucketNo = LookupBucketFor(KeyValue->getKey()); ItemBucket &Bucket = TheTable[BucketNo]; if (Bucket.Item && Bucket.Item != getTombstoneVal()) return false; // Already exists in map. @@ -380,15 +350,15 @@ public: /// exists, return it. Otherwise, default construct a value, insert it, and /// return. template - StringMapEntry &GetOrCreateValue(const char *KeyStart, - const char *KeyEnd, + StringMapEntry &GetOrCreateValue(const StringRef &Key, InitTy Val) { - unsigned BucketNo = LookupBucketFor(KeyStart, KeyEnd); + unsigned BucketNo = LookupBucketFor(Key); ItemBucket &Bucket = TheTable[BucketNo]; if (Bucket.Item && Bucket.Item != getTombstoneVal()) return *static_cast(Bucket.Item); - MapEntryTy *NewItem = MapEntryTy::Create(KeyStart, KeyEnd, Allocator, Val); + MapEntryTy *NewItem = + MapEntryTy::Create(Key.begin(), Key.end(), Allocator, Val); if (Bucket.Item == getTombstoneVal()) --NumTombstones; @@ -403,9 +373,20 @@ public: return *NewItem; } + StringMapEntry &GetOrCreateValue(const StringRef &Key) { + return GetOrCreateValue(Key, ValueTy()); + } + + template + StringMapEntry &GetOrCreateValue(const char *KeyStart, + const char *KeyEnd, + InitTy Val) { + return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart), Val); + } + StringMapEntry &GetOrCreateValue(const char *KeyStart, const char *KeyEnd) { - return GetOrCreateValue(KeyStart, KeyEnd, ValueTy()); + return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart)); } /// remove - Remove the specified key/value pair from the map, but do not @@ -420,14 +401,7 @@ public: V.Destroy(Allocator); } - bool erase(const char *Key) { - iterator I = find(Key); - if (I == end()) return false; - erase(I); - return true; - } - - bool erase(const std::string &Key) { + bool erase(const StringRef &Key) { iterator I = find(Key); if (I == end()) return false; erase(I); diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h new file mode 100644 index 0000000000000..aa7d577da75a2 --- /dev/null +++ b/include/llvm/ADT/StringRef.h @@ -0,0 +1,335 @@ +//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_STRINGREF_H +#define LLVM_ADT_STRINGREF_H + +#include +#include +#include +#include + +namespace llvm { + + /// StringRef - Represent a constant reference to a string, i.e. a character + /// array and a length, which need not be null terminated. + /// + /// This class does not own the string data, it is expected to be used in + /// situations where the character data resides in some other buffer, whose + /// lifetime extends past that of the StringRef. For this reason, it is not in + /// general safe to store a StringRef. + class StringRef { + public: + typedef const char *iterator; + static const size_t npos = ~size_t(0); + typedef size_t size_type; + + private: + /// The start of the string, in an external buffer. + const char *Data; + + /// The length of the string. + size_t Length; + + public: + /// @name Constructors + /// @{ + + /// Construct an empty string ref. + /*implicit*/ StringRef() : Data(0), Length(0) {} + + /// Construct a string ref from a cstring. + /*implicit*/ StringRef(const char *Str) + : Data(Str) { if (Str) Length = ::strlen(Str); else Length = 0; } + + /// Construct a string ref from a pointer and length. + /*implicit*/ StringRef(const char *data, unsigned length) + : Data(data), Length(length) {} + + /// Construct a string ref from an std::string. + /*implicit*/ StringRef(const std::string &Str) + : Data(Str.c_str()), Length(Str.length()) {} + + /// @} + /// @name Iterators + /// @{ + + iterator begin() const { return Data; } + + iterator end() const { return Data + Length; } + + /// @} + /// @name String Operations + /// @{ + + /// data - Get a pointer to the start of the string (which may not be null + /// terminated). + const char *data() const { return Data; } + + /// empty - Check if the string is empty. + bool empty() const { return Length == 0; } + + /// size - Get the string size. + size_t size() const { return Length; } + + /// front - Get the first character in the string. + char front() const { + assert(!empty()); + return Data[0]; + } + + /// back - Get the last character in the string. + char back() const { + assert(!empty()); + return Data[Length-1]; + } + + /// equals - Check for string equality, this is more efficient than + /// compare() when the relative ordering of inequal strings isn't needed. + bool equals(const StringRef &RHS) const { + return (Length == RHS.Length && + memcmp(Data, RHS.Data, RHS.Length) == 0); + } + + /// compare - Compare two strings; the result is -1, 0, or 1 if this string + /// is lexicographically less than, equal to, or greater than the \arg RHS. + int compare(const StringRef &RHS) const { + // Check the prefix for a mismatch. + if (int Res = memcmp(Data, RHS.Data, std::min(Length, RHS.Length))) + return Res < 0 ? -1 : 1; + + // Otherwise the prefixes match, so we only need to check the lengths. + if (Length == RHS.Length) + return 0; + return Length < RHS.Length ? -1 : 1; + } + + /// str - Get the contents as an std::string. + std::string str() const { return std::string(Data, Length); } + + /// @} + /// @name Operator Overloads + /// @{ + + char operator[](size_t Index) const { + assert(Index < Length && "Invalid index!"); + return Data[Index]; + } + + /// @} + /// @name Type Conversions + /// @{ + + operator std::string() const { + return str(); + } + + /// @} + /// @name String Predicates + /// @{ + + /// startswith - Check if this string starts with the given \arg Prefix. + bool startswith(const StringRef &Prefix) const { + return substr(0, Prefix.Length).equals(Prefix); + } + + /// endswith - Check if this string ends with the given \arg Suffix. + bool endswith(const StringRef &Suffix) const { + return slice(size() - Suffix.Length, size()).equals(Suffix); + } + + /// @} + /// @name String Searching + /// @{ + + /// find - Search for the first character \arg C in the string. + /// + /// \return - The index of the first occurence of \arg C, or npos if not + /// found. + size_t find(char C) const { + for (size_t i = 0, e = Length; i != e; ++i) + if (Data[i] == C) + return i; + return npos; + } + + /// find - Search for the first string \arg Str in the string. + /// + /// \return - The index of the first occurence of \arg Str, or npos if not + /// found. + size_t find(const StringRef &Str) const; + + /// rfind - Search for the last character \arg C in the string. + /// + /// \return - The index of the last occurence of \arg C, or npos if not + /// found. + size_t rfind(char C, size_t From = npos) const { + From = std::min(From, Length); + size_t i = From; + while (i != 0) { + --i; + if (Data[i] == C) + return i; + } + return npos; + } + + /// rfind - Search for the last string \arg Str in the string. + /// + /// \return - The index of the last occurence of \arg Str, or npos if not + /// found. + size_t rfind(const StringRef &Str) const; + + /// find_first_of - Find the first instance of the specified character or + /// return npos if not in string. Same as find. + size_type find_first_of(char C) const { return find(C); } + + /// find_first_of - Find the first character from the string 'Chars' in the + /// current string or return npos if not in string. + size_type find_first_of(StringRef Chars) const; + + /// find_first_not_of - Find the first character in the string that is not + /// in the string 'Chars' or return npos if all are in string. Same as find. + size_type find_first_not_of(StringRef Chars) const; + + /// @} + /// @name Helpful Algorithms + /// @{ + + /// count - Return the number of occurrences of \arg C in the string. + size_t count(char C) const { + size_t Count = 0; + for (size_t i = 0, e = Length; i != e; ++i) + if (Data[i] == C) + ++Count; + return Count; + } + + /// count - Return the number of non-overlapped occurrences of \arg Str in + /// the string. + size_t count(const StringRef &Str) const; + + /// getAsInteger - Parse the current string as an integer of the specified + /// radix. If Radix is specified as zero, this does radix autosensing using + /// extended C rules: 0 is octal, 0x is hex, 0b is binary. + /// + /// If the string is invalid or if only a subset of the string is valid, + /// this returns true to signify the error. The string is considered + /// erroneous if empty. + /// + bool getAsInteger(unsigned Radix, long long &Result) const; + bool getAsInteger(unsigned Radix, unsigned long long &Result) const; + bool getAsInteger(unsigned Radix, int &Result) const; + bool getAsInteger(unsigned Radix, unsigned &Result) const; + + // TODO: Provide overloads for int/unsigned that check for overflow. + + /// @} + /// @name Substring Operations + /// @{ + + /// substr - Return a reference to the substring from [Start, Start + N). + /// + /// \param Start - The index of the starting character in the substring; if + /// the index is npos or greater than the length of the string then the + /// empty substring will be returned. + /// + /// \param N - The number of characters to included in the substring. If N + /// exceeds the number of characters remaining in the string, the string + /// suffix (starting with \arg Start) will be returned. + StringRef substr(size_t Start, size_t N = npos) const { + Start = std::min(Start, Length); + return StringRef(Data + Start, std::min(N, Length - Start)); + } + + /// slice - Return a reference to the substring from [Start, End). + /// + /// \param Start - The index of the starting character in the substring; if + /// the index is npos or greater than the length of the string then the + /// empty substring will be returned. + /// + /// \param End - The index following the last character to include in the + /// substring. If this is npos, or less than \arg Start, or exceeds the + /// number of characters remaining in the string, the string suffix + /// (starting with \arg Start) will be returned. + StringRef slice(size_t Start, size_t End) const { + Start = std::min(Start, Length); + End = std::min(std::max(Start, End), Length); + return StringRef(Data + Start, End - Start); + } + + /// split - Split into two substrings around the first occurence of a + /// separator character. + /// + /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) + /// such that (*this == LHS + Separator + RHS) is true and RHS is + /// maximal. If \arg Separator is not in the string, then the result is a + /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). + /// + /// \param Separator - The character to split on. + /// \return - The split substrings. + std::pair split(char Separator) const { + size_t Idx = find(Separator); + if (Idx == npos) + return std::make_pair(*this, StringRef()); + return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); + } + + /// rsplit - Split into two substrings around the last occurence of a + /// separator character. + /// + /// If \arg Separator is in the string, then the result is a pair (LHS, RHS) + /// such that (*this == LHS + Separator + RHS) is true and RHS is + /// minimal. If \arg Separator is not in the string, then the result is a + /// pair (LHS, RHS) where (*this == LHS) and (RHS == ""). + /// + /// \param Separator - The character to split on. + /// \return - The split substrings. + std::pair rsplit(char Separator) const { + size_t Idx = rfind(Separator); + if (Idx == npos) + return std::make_pair(*this, StringRef()); + return std::make_pair(slice(0, Idx), slice(Idx+1, npos)); + } + + /// @} + }; + + /// @name StringRef Comparison Operators + /// @{ + + inline bool operator==(const StringRef &LHS, const StringRef &RHS) { + return LHS.equals(RHS); + } + + inline bool operator!=(const StringRef &LHS, const StringRef &RHS) { + return !(LHS == RHS); + } + + inline bool operator<(const StringRef &LHS, const StringRef &RHS) { + return LHS.compare(RHS) == -1; + } + + inline bool operator<=(const StringRef &LHS, const StringRef &RHS) { + return LHS.compare(RHS) != 1; + } + + inline bool operator>(const StringRef &LHS, const StringRef &RHS) { + return LHS.compare(RHS) == 1; + } + + inline bool operator>=(const StringRef &LHS, const StringRef &RHS) { + return LHS.compare(RHS) != -1; + } + + /// @} + +} + +#endif diff --git a/include/llvm/ADT/Trie.h b/include/llvm/ADT/Trie.h index ed94f9de2d1c4..cf92862c20d99 100644 --- a/include/llvm/ADT/Trie.h +++ b/include/llvm/ADT/Trie.h @@ -118,12 +118,12 @@ public: #if 0 inline void dump() { - std::cerr << "Node: " << this << "\n" + llvm::cerr << "Node: " << this << "\n" << "Label: " << Label << "\n" << "Children:\n"; for (iterator I = Children.begin(), E = Children.end(); I != E; ++I) - std::cerr << (*I)->Label << "\n"; + llvm::cerr << (*I)->Label << "\n"; } #endif diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 96c035795849e..89736bcc45651 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -10,9 +10,17 @@ #ifndef LLVM_ADT_TRIPLE_H #define LLVM_ADT_TRIPLE_H +#include "llvm/ADT/StringRef.h" #include +// Some system headers or GCC predefined macros conflict with identifiers in +// this file. Undefine them here. +#undef mips +#undef sparc + namespace llvm { +class StringRef; +class Twine; /// Triple - Helper class for working with target triples. /// @@ -26,17 +34,44 @@ namespace llvm { /// behavior for particular targets. This class isolates the mapping /// from the components of the target triple to well known IDs. /// -/// See autoconf/config.guess for a glimpse into what they look like -/// in practice. +/// At its core the Triple class is designed to be a wrapper for a triple +/// string; it does not normally change or normalize the triple string, instead +/// it provides additional APIs to parse normalized parts out of the triple. +/// +/// One curiosity this implies is that for some odd triples the results of, +/// e.g., getOSName() can be very different from the result of getOS(). For +/// example, for 'i386-mingw32', getOS() will return MinGW32, but since +/// getOSName() is purely based on the string structure that will return the +/// empty string. +/// +/// Clients should generally avoid using getOSName() and related APIs unless +/// they are familiar with the triple format (this is particularly true when +/// rewriting a triple). +/// +/// See autoconf/config.guess for a glimpse into what they look like in +/// practice. class Triple { public: enum ArchType { UnknownArch, - x86, // i?86 - ppc, // powerpc - ppc64, // powerpc64 - x86_64, // amd64, x86_64 + alpha, // Alpha: alpha + arm, // ARM; arm, armv.*, xscale + bfin, // Blackfin: bfin + cellspu, // CellSPU: spu, cellspu + mips, // MIPS: mips, mipsallegrex + mipsel, // MIPSEL: mipsel, mipsallegrexel, psp + msp430, // MSP430: msp430 + pic16, // PIC16: pic16 + ppc, // PPC: powerpc + ppc64, // PPC64: powerpc64 + sparc, // Sparc: sparc + systemz, // SystemZ: s390x + tce, // TCE (http://tce.cs.tut.fi/): tce + thumb, // Thumb: thumb, thumbv.* + x86, // X86: i[3-9]86 + x86_64, // X86-64: amd64, x86_64 + xcore, // XCore: xcore InvalidArch }; @@ -50,11 +85,17 @@ public: UnknownOS, AuroraUX, + Cygwin, Darwin, DragonFly, FreeBSD, Linux, - OpenBSD + MinGW32, + MinGW64, + NetBSD, + OpenBSD, + Solaris, + Win32 }; private: @@ -76,9 +117,9 @@ public: /// @name Constructors /// @{ - Triple() : Data(""), Arch(InvalidArch) {} - explicit Triple(const char *Str) : Data(Str), Arch(InvalidArch) {} - explicit Triple(const char *ArchStr, const char *VendorStr, const char *OSStr) + Triple() : Data(), Arch(InvalidArch) {} + explicit Triple(StringRef Str) : Data(Str), Arch(InvalidArch) {} + explicit Triple(StringRef ArchStr, StringRef VendorStr, StringRef OSStr) : Data(ArchStr), Arch(InvalidArch) { Data += '-'; Data += VendorStr; @@ -120,29 +161,41 @@ public: const std::string &getTriple() const { return Data; } - // FIXME: Invent a lightweight string representation for these to - // use. - /// getArchName - Get the architecture (first) component of the /// triple. - std::string getArchName() const; + StringRef getArchName() const; /// getVendorName - Get the vendor (second) component of the triple. - std::string getVendorName() const; + StringRef getVendorName() const; /// getOSName - Get the operating system (third) component of the /// triple. - std::string getOSName() const; + StringRef getOSName() const; /// getEnvironmentName - Get the optional environment (fourth) /// component of the triple, or "" if empty. - std::string getEnvironmentName() const; + StringRef getEnvironmentName() const; /// getOSAndEnvironmentName - Get the operating system and optional /// environment components as a single string (separated by a '-' /// if the environment component is present). - std::string getOSAndEnvironmentName() const; + StringRef getOSAndEnvironmentName() const; + + /// getDarwinNumber - Parse the 'darwin number' out of the specific target + /// triple. For example, if we have darwin8.5 return 8,5,0. If any entry is + /// not defined, return 0's. This requires that the triple have an OSType of + /// darwin before it is called. + void getDarwinNumber(unsigned &Maj, unsigned &Min, unsigned &Revision) const; + + /// getDarwinMajorNumber - Return just the major version number, this is + /// specialized because it is a common query. + unsigned getDarwinMajorNumber() const { + unsigned Maj, Min, Rev; + getDarwinNumber(Maj, Min, Rev); + return Maj; + } + /// @} /// @name Mutators /// @{ @@ -160,27 +213,27 @@ public: void setOS(OSType Kind); /// setTriple - Set all components to the new triple \arg Str. - void setTriple(const std::string &Str); + void setTriple(const Twine &Str); /// setArchName - Set the architecture (first) component of the /// triple by name. - void setArchName(const std::string &Str); + void setArchName(const StringRef &Str); /// setVendorName - Set the vendor (second) component of the triple /// by name. - void setVendorName(const std::string &Str); + void setVendorName(const StringRef &Str); /// setOSName - Set the operating system (third) component of the /// triple by name. - void setOSName(const std::string &Str); + void setOSName(const StringRef &Str); /// setEnvironmentName - Set the optional environment (fourth) /// component of the triple by name. - void setEnvironmentName(const std::string &Str); + void setEnvironmentName(const StringRef &Str); /// setOSAndEnvironmentName - Set the operating system and optional /// environment components with a single string. - void setOSAndEnvironmentName(const std::string &Str); + void setOSAndEnvironmentName(const StringRef &Str); /// @} /// @name Static helpers for IDs. @@ -190,6 +243,14 @@ public: /// architecture. static const char *getArchTypeName(ArchType Kind); + /// getArchTypePrefix - Get the "prefix" canonical name for the \arg Kind + /// architecture. This is the prefix used by the architecture specific + /// builtins, and is suitable for passing to \see + /// Intrinsic::getIntrinsicForGCCBuiltin(). + /// + /// \return - The architecture prefix, or 0 if none is defined. + static const char *getArchTypePrefix(ArchType Kind); + /// getVendorTypeName - Get the canonical name for the \arg Kind /// vendor. static const char *getVendorTypeName(VendorType Kind); @@ -197,6 +258,19 @@ public: /// getOSTypeName - Get the canonical name for the \arg Kind vendor. static const char *getOSTypeName(OSType Kind); + /// @} + /// @name Static helpers for converting alternate architecture names. + /// @{ + + /// getArchTypeForLLVMName - The canonical type for the given LLVM + /// architecture name (e.g., "x86"). + static ArchType getArchTypeForLLVMName(const StringRef &Str); + + /// getArchTypeForDarwinArchName - Get the architecture type for a "Darwin" + /// architecture name, for example as accepted by "gcc -arch" (see also + /// arch(3)). + static ArchType getArchTypeForDarwinArchName(const StringRef &Str); + /// @} }; diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h new file mode 100644 index 0000000000000..88fde0a54ae6a --- /dev/null +++ b/include/llvm/ADT/Twine.h @@ -0,0 +1,422 @@ +//===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_TWINE_H +#define LLVM_ADT_TWINE_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" +#include +#include + +namespace llvm { + template + class SmallVectorImpl; + class StringRef; + class raw_ostream; + + /// Twine - A lightweight data structure for efficiently representing the + /// concatenation of temporary values as strings. + /// + /// A Twine is a kind of rope, it represents a concatenated string using a + /// binary-tree, where the string is the preorder of the nodes. Since the + /// Twine can be efficiently rendered into a buffer when its result is used, + /// it avoids the cost of generating temporary values for intermediate string + /// results -- particularly in cases when the Twine result is never + /// required. By explicitly tracking the type of leaf nodes, we can also avoid + /// the creation of temporary strings for conversions operations (such as + /// appending an integer to a string). + /// + /// A Twine is not intended for use directly and should not be stored, its + /// implementation relies on the ability to store pointers to temporary stack + /// objects which may be deallocated at the end of a statement. Twines should + /// only be used accepted as const references in arguments, when an API wishes + /// to accept possibly-concatenated strings. + /// + /// Twines support a special 'null' value, which always concatenates to form + /// itself, and renders as an empty string. This can be returned from APIs to + /// effectively nullify any concatenations performed on the result. + /// + /// \b Implementation \n + /// + /// Given the nature of a Twine, it is not possible for the Twine's + /// concatenation method to construct interior nodes; the result must be + /// represented inside the returned value. For this reason a Twine object + /// actually holds two values, the left- and right-hand sides of a + /// concatenation. We also have nullary Twine objects, which are effectively + /// sentinel values that represent empty strings. + /// + /// Thus, a Twine can effectively have zero, one, or two children. The \see + /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for + /// testing the number of children. + /// + /// We maintain a number of invariants on Twine objects (FIXME: Why): + /// - Nullary twines are always represented with their Kind on the left-hand + /// side, and the Empty kind on the right-hand side. + /// - Unary twines are always represented with the value on the left-hand + /// side, and the Empty kind on the right-hand side. + /// - If a Twine has another Twine as a child, that child should always be + /// binary (otherwise it could have been folded into the parent). + /// + /// These invariants are check by \see isValid(). + /// + /// \b Efficiency Considerations \n + /// + /// The Twine is designed to yield efficient and small code for common + /// situations. For this reason, the concat() method is inlined so that + /// concatenations of leaf nodes can be optimized into stores directly into a + /// single stack allocated object. + /// + /// In practice, not all compilers can be trusted to optimize concat() fully, + /// so we provide two additional methods (and accompanying operator+ + /// overloads) to guarantee that particularly important cases (cstring plus + /// StringRef) codegen as desired. + class Twine { + /// NodeKind - Represent the type of an argument. + enum NodeKind { + /// An empty string; the result of concatenating anything with it is also + /// empty. + NullKind, + + /// The empty string. + EmptyKind, + + /// A pointer to a Twine instance. + TwineKind, + + /// A pointer to a C string instance. + CStringKind, + + /// A pointer to an std::string instance. + StdStringKind, + + /// A pointer to a StringRef instance. + StringRefKind, + + /// A pointer to an unsigned int value, to render as an unsigned decimal + /// integer. + DecUIKind, + + /// A pointer to an int value, to render as a signed decimal integer. + DecIKind, + + /// A pointer to an unsigned long value, to render as an unsigned decimal + /// integer. + DecULKind, + + /// A pointer to a long value, to render as a signed decimal integer. + DecLKind, + + /// A pointer to an unsigned long long value, to render as an unsigned + /// decimal integer. + DecULLKind, + + /// A pointer to a long long value, to render as a signed decimal integer. + DecLLKind, + + /// A pointer to a uint64_t value, to render as an unsigned hexadecimal + /// integer. + UHexKind + }; + + private: + /// LHS - The prefix in the concatenation, which may be uninitialized for + /// Null or Empty kinds. + const void *LHS; + /// RHS - The suffix in the concatenation, which may be uninitialized for + /// Null or Empty kinds. + const void *RHS; + /// LHSKind - The NodeKind of the left hand side, \see getLHSKind(). + NodeKind LHSKind : 8; + /// RHSKind - The NodeKind of the left hand side, \see getLHSKind(). + NodeKind RHSKind : 8; + + private: + /// Construct a nullary twine; the kind must be NullKind or EmptyKind. + explicit Twine(NodeKind Kind) + : LHSKind(Kind), RHSKind(EmptyKind) { + assert(isNullary() && "Invalid kind!"); + } + + /// Construct a binary twine. + explicit Twine(const Twine &_LHS, const Twine &_RHS) + : LHS(&_LHS), RHS(&_RHS), LHSKind(TwineKind), RHSKind(TwineKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct a twine from explicit values. + explicit Twine(const void *_LHS, NodeKind _LHSKind, + const void *_RHS, NodeKind _RHSKind) + : LHS(_LHS), RHS(_RHS), LHSKind(_LHSKind), RHSKind(_RHSKind) { + assert(isValid() && "Invalid twine!"); + } + + /// isNull - Check for the null twine. + bool isNull() const { + return getLHSKind() == NullKind; + } + + /// isEmpty - Check for the empty twine. + bool isEmpty() const { + return getLHSKind() == EmptyKind; + } + + /// isNullary - Check if this is a nullary twine (null or empty). + bool isNullary() const { + return isNull() || isEmpty(); + } + + /// isUnary - Check if this is a unary twine. + bool isUnary() const { + return getRHSKind() == EmptyKind && !isNullary(); + } + + /// isBinary - Check if this is a binary twine. + bool isBinary() const { + return getLHSKind() != NullKind && getRHSKind() != EmptyKind; + } + + /// isValid - Check if this is a valid twine (satisfying the invariants on + /// order and number of arguments). + bool isValid() const { + // Nullary twines always have Empty on the RHS. + if (isNullary() && getRHSKind() != EmptyKind) + return false; + + // Null should never appear on the RHS. + if (getRHSKind() == NullKind) + return false; + + // The RHS cannot be non-empty if the LHS is empty. + if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind) + return false; + + // A twine child should always be binary. + if (getLHSKind() == TwineKind && + !static_cast(LHS)->isBinary()) + return false; + if (getRHSKind() == TwineKind && + !static_cast(RHS)->isBinary()) + return false; + + return true; + } + + /// getLHSKind - Get the NodeKind of the left-hand side. + NodeKind getLHSKind() const { return LHSKind; } + + /// getRHSKind - Get the NodeKind of the left-hand side. + NodeKind getRHSKind() const { return RHSKind; } + + /// printOneChild - Print one child from a twine. + void printOneChild(raw_ostream &OS, const void *Ptr, NodeKind Kind) const; + + /// printOneChildRepr - Print the representation of one child from a twine. + void printOneChildRepr(raw_ostream &OS, const void *Ptr, + NodeKind Kind) const; + + public: + /// @name Constructors + /// @{ + + /// Construct from an empty string. + /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct from a C string. + /// + /// We take care here to optimize "" into the empty twine -- this will be + /// optimized out for string constants. This allows Twine arguments have + /// default "" values, without introducing unnecessary string constants. + /*implicit*/ Twine(const char *Str) + : RHSKind(EmptyKind) { + if (Str[0] != '\0') { + LHS = Str; + LHSKind = CStringKind; + } else + LHSKind = EmptyKind; + + assert(isValid() && "Invalid twine!"); + } + + /// Construct from an std::string. + /*implicit*/ Twine(const std::string &Str) + : LHS(&Str), LHSKind(StdStringKind), RHSKind(EmptyKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct from a StringRef. + /*implicit*/ Twine(const StringRef &Str) + : LHS(&Str), LHSKind(StringRefKind), RHSKind(EmptyKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct a twine to print \arg Val as an unsigned decimal integer. + explicit Twine(const unsigned int &Val) + : LHS(&Val), LHSKind(DecUIKind), RHSKind(EmptyKind) { + } + + /// Construct a twine to print \arg Val as a signed decimal integer. + explicit Twine(const int &Val) + : LHS(&Val), LHSKind(DecIKind), RHSKind(EmptyKind) { + } + + /// Construct a twine to print \arg Val as an unsigned decimal integer. + explicit Twine(const unsigned long &Val) + : LHS(&Val), LHSKind(DecULKind), RHSKind(EmptyKind) { + } + + /// Construct a twine to print \arg Val as a signed decimal integer. + explicit Twine(const long &Val) + : LHS(&Val), LHSKind(DecLKind), RHSKind(EmptyKind) { + } + + /// Construct a twine to print \arg Val as an unsigned decimal integer. + explicit Twine(const unsigned long long &Val) + : LHS(&Val), LHSKind(DecULLKind), RHSKind(EmptyKind) { + } + + /// Construct a twine to print \arg Val as a signed decimal integer. + explicit Twine(const long long &Val) + : LHS(&Val), LHSKind(DecLLKind), RHSKind(EmptyKind) { + } + + // FIXME: Unfortunately, to make sure this is as efficient as possible we + // need extra binary constructors from particular types. We can't rely on + // the compiler to be smart enough to fold operator+()/concat() down to the + // right thing. Yet. + + /// Construct as the concatenation of a C string and a StringRef. + /*implicit*/ Twine(const char *_LHS, const StringRef &_RHS) + : LHS(_LHS), RHS(&_RHS), LHSKind(CStringKind), RHSKind(StringRefKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Construct as the concatenation of a StringRef and a C string. + /*implicit*/ Twine(const StringRef &_LHS, const char *_RHS) + : LHS(&_LHS), RHS(_RHS), LHSKind(StringRefKind), RHSKind(CStringKind) { + assert(isValid() && "Invalid twine!"); + } + + /// Create a 'null' string, which is an empty string that always + /// concatenates to form another empty string. + static Twine createNull() { + return Twine(NullKind); + } + + /// @} + /// @name Numeric Conversions + /// @{ + + // Construct a twine to print \arg Val as an unsigned hexadecimal integer. + static Twine utohexstr(const uint64_t &Val) { + return Twine(&Val, UHexKind, 0, EmptyKind); + } + + /// @} + /// @name Predicate Operations + /// @{ + + /// isTriviallyEmpty - Check if this twine is trivially empty; a false + /// return value does not necessarily mean the twine is empty. + bool isTriviallyEmpty() const { + return isNullary(); + } + + /// @} + /// @name String Operations + /// @{ + + Twine concat(const Twine &Suffix) const; + + /// @} + /// @name Output & Conversion. + /// @{ + + /// str - Return the twine contents as a std::string. + std::string str() const; + + /// toVector - Write the concatenated string into the given SmallString or + /// SmallVector. + void toVector(SmallVectorImpl &Out) const; + + /// print - Write the concatenated string represented by this twine to the + /// stream \arg OS. + void print(raw_ostream &OS) const; + + /// dump - Dump the concatenated string represented by this twine to stderr. + void dump() const; + + /// print - Write the representation of this twine to the stream \arg OS. + void printRepr(raw_ostream &OS) const; + + /// dumpRepr - Dump the representation of this twine to stderr. + void dumpRepr() const; + + /// @} + }; + + /// @name Twine Inline Implementations + /// @{ + + inline Twine Twine::concat(const Twine &Suffix) const { + // Concatenation with null is null. + if (isNull() || Suffix.isNull()) + return Twine(NullKind); + + // Concatenation with empty yields the other side. + if (isEmpty()) + return Suffix; + if (Suffix.isEmpty()) + return *this; + + // Otherwise we need to create a new node, taking care to fold in unary + // twines. + const void *NewLHS = this, *NewRHS = &Suffix; + NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind; + if (isUnary()) { + NewLHS = LHS; + NewLHSKind = getLHSKind(); + } + if (Suffix.isUnary()) { + NewRHS = Suffix.LHS; + NewRHSKind = Suffix.getLHSKind(); + } + + return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind); + } + + inline Twine operator+(const Twine &LHS, const Twine &RHS) { + return LHS.concat(RHS); + } + + /// Additional overload to guarantee simplified codegen; this is equivalent to + /// concat(). + + inline Twine operator+(const char *LHS, const StringRef &RHS) { + return Twine(LHS, RHS); + } + + /// Additional overload to guarantee simplified codegen; this is equivalent to + /// concat(). + + inline Twine operator+(const StringRef &LHS, const char *RHS) { + return Twine(LHS, RHS); + } + + inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) { + RHS.print(OS); + return OS; + } + + /// @} +} + +#endif diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h index b95e3e04e81f0..b3824a217c74b 100644 --- a/include/llvm/ADT/ilist.h +++ b/include/llvm/ADT/ilist.h @@ -38,8 +38,8 @@ #ifndef LLVM_ADT_ILIST_H #define LLVM_ADT_ILIST_H -#include "llvm/ADT/iterator.h" #include +#include namespace llvm { @@ -121,15 +121,15 @@ struct ilist_node_traits { /// for all common operations. /// template -struct ilist_default_traits : ilist_nextprev_traits, - ilist_sentinel_traits, - ilist_node_traits { +struct ilist_default_traits : public ilist_nextprev_traits, + public ilist_sentinel_traits, + public ilist_node_traits { }; // Template traits for intrusive list. By specializing this template class, you // can change what next/prev fields are used to store the links... template -struct ilist_traits : ilist_default_traits {}; +struct ilist_traits : public ilist_default_traits {}; // Const traits are the same as nonconst traits... template @@ -140,11 +140,12 @@ struct ilist_traits : public ilist_traits {}; // template class ilist_iterator - : public bidirectional_iterator { + : public std::iterator { public: typedef ilist_traits Traits; - typedef bidirectional_iterator super; + typedef std::iterator super; typedef typename super::value_type value_type; typedef typename super::difference_type difference_type; @@ -189,12 +190,10 @@ public: // Accessors... operator pointer() const { - assert(Traits::getNext(NodePtr) != 0 && "Dereferencing end()!"); return NodePtr; } reference operator*() const { - assert(Traits::getNext(NodePtr) != 0 && "Dereferencing end()!"); return *NodePtr; } pointer operator->() const { return &operator*(); } @@ -215,7 +214,6 @@ public: } ilist_iterator &operator++() { // preincrement - Advance NodePtr = Traits::getNext(NodePtr); - assert(NodePtr && "++'d off the end of an ilist!"); return *this; } ilist_iterator operator--(int) { // postdecrement operators... @@ -323,13 +321,13 @@ class iplist : public Traits { /// CreateLazySentinel - This method verifies whether the sentinel for the /// list has been created and lazily makes it if not. void CreateLazySentinel() const { - this->Traits::ensureHead(Head); + this->ensureHead(Head); } static bool op_less(NodeTy &L, NodeTy &R) { return L < R; } static bool op_equal(NodeTy &L, NodeTy &R) { return L == R; } - // No fundamental reason why iplist can't by copyable, but the default + // No fundamental reason why iplist can't be copyable, but the default // copy/copy-assign won't do. iplist(const iplist &); // do not implement void operator=(const iplist &); // do not implement @@ -347,7 +345,7 @@ public: typedef std::reverse_iterator const_reverse_iterator; typedef std::reverse_iterator reverse_iterator; - iplist() : Head(this->Traits::provideInitialHead()) {} + iplist() : Head(this->provideInitialHead()) {} ~iplist() { if (!Head) return; clear(); diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h index dae7475ffa012..da25f959e612a 100644 --- a/include/llvm/ADT/ilist_node.h +++ b/include/llvm/ADT/ilist_node.h @@ -18,28 +18,37 @@ namespace llvm { template -struct ilist_nextprev_traits; +struct ilist_traits; +/// ilist_half_node - Base class that provides prev services for sentinels. +/// template -struct ilist_traits; +class ilist_half_node { + friend struct ilist_traits; + NodeTy *Prev; +protected: + NodeTy *getPrev() { return Prev; } + const NodeTy *getPrev() const { return Prev; } + void setPrev(NodeTy *P) { Prev = P; } + ilist_half_node() : Prev(0) {} +}; + +template +struct ilist_nextprev_traits; /// ilist_node - Base class that provides next/prev services for nodes /// that use ilist_nextprev_traits or ilist_default_traits. /// template -class ilist_node { -private: +class ilist_node : private ilist_half_node { friend struct ilist_nextprev_traits; friend struct ilist_traits; - NodeTy *Prev, *Next; - NodeTy *getPrev() { return Prev; } + NodeTy *Next; NodeTy *getNext() { return Next; } - const NodeTy *getPrev() const { return Prev; } const NodeTy *getNext() const { return Next; } - void setPrev(NodeTy *N) { Prev = N; } void setNext(NodeTy *N) { Next = N; } protected: - ilist_node() : Prev(0), Next(0) {} + ilist_node() : Next(0) {} }; } // End llvm namespace diff --git a/include/llvm/AbstractTypeUser.h b/include/llvm/AbstractTypeUser.h index c1216baabf8fa..b6cceb4011ad3 100644 --- a/include/llvm/AbstractTypeUser.h +++ b/include/llvm/AbstractTypeUser.h @@ -31,6 +31,7 @@ namespace llvm { +class Value; class Type; class DerivedType; template struct simplify_type; @@ -55,6 +56,12 @@ template struct simplify_type; class AbstractTypeUser { protected: virtual ~AbstractTypeUser(); // Derive from me + + /// setType - It's normally not possible to change a Value's type in place, + /// but an AbstractTypeUser subclass that knows what its doing can be + /// permitted to do so with care. + void setType(Value *V, const Type *NewTy); + public: /// refineAbstractType - The callback method invoked when an abstract type is @@ -65,7 +72,7 @@ public: const Type *NewTy) = 0; /// The other case which AbstractTypeUsers must be aware of is when a type - /// makes the transition from being abstract (where it has clients on it's + /// makes the transition from being abstract (where it has clients on its /// AbstractTypeUsers list) to concrete (where it does not). This method /// notifies ATU's when this occurs for a type. /// diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index ba040e1393bf3..be7d5ee37b80f 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -56,8 +56,7 @@ protected: void InitializeAliasAnalysis(Pass *P); /// getAnalysisUsage - All alias analysis implementations should invoke this - /// directly (using AliasAnalysis::getAnalysisUsage(AU)) to make sure that - /// TargetData is required by the pass. + /// directly (using AliasAnalysis::getAnalysisUsage(AU)). virtual void getAnalysisUsage(AnalysisUsage &AU) const; public: @@ -65,11 +64,15 @@ public: AliasAnalysis() : TD(0), AA(0) {} virtual ~AliasAnalysis(); // We want to be subclassed - /// getTargetData - Every alias analysis implementation depends on the size of - /// data items in the current Target. This provides a uniform way to handle - /// it. + /// getTargetData - Return a pointer to the current TargetData object, or + /// null if no TargetData object is available. /// - const TargetData &getTargetData() const { return *TD; } + const TargetData *getTargetData() const { return TD; } + + /// getTypeStoreSize - Return the TargetData store size for the given type, + /// if known, or a conservative value otherwise. + /// + unsigned getTypeStoreSize(const Type *Ty); //===--------------------------------------------------------------------===// /// Alias Queries... @@ -344,7 +347,7 @@ bool isNoAliasCall(const Value *V); /// isIdentifiedObject - Return true if this pointer refers to a distinct and /// identifiable object. This returns true for: -/// Global Variables and Functions +/// Global Variables and Functions (but not Global Aliases) /// Allocas and Mallocs /// ByVal and NoAlias Arguments /// NoAlias returns diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 786c1d15ba1ab..239f30f9384e8 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -18,9 +18,8 @@ #define LLVM_ANALYSIS_ALIASSETTRACKER_H #include "llvm/Support/CallSite.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include @@ -155,12 +154,12 @@ public: iterator end() const { return iterator(); } bool empty() const { return PtrList == 0; } - void print(std::ostream &OS) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS) const; void dump() const; /// Define an iterator for alias sets... this is just a forward iterator. - class iterator : public forward_iterator { + class iterator : public std::iterator { PointerRec *CurNode; public: explicit iterator(PointerRec *CN = 0) : CurNode(CN) {} @@ -245,18 +244,38 @@ private: bool aliasesCallSite(CallSite CS, AliasAnalysis &AA) const; }; -inline std::ostream& operator<<(std::ostream &OS, const AliasSet &AS) { +inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) { AS.print(OS); return OS; } class AliasSetTracker { + /// CallbackVH - A CallbackVH to arrange for AliasSetTracker to be + /// notified whenever a Value is deleted. + class ASTCallbackVH : public CallbackVH { + AliasSetTracker *AST; + virtual void deleted(); + public: + ASTCallbackVH(Value *V, AliasSetTracker *AST = 0); + ASTCallbackVH &operator=(Value *V); + }; + /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that ASTCallbackVH + /// is not a POD (it needs its destructor called). + struct ASTCallbackVHDenseMapInfo : public DenseMapInfo { + static bool isPod() { return false; } + }; + AliasAnalysis &AA; ilist AliasSets; + typedef DenseMap + PointerMapType; + // Map from pointers to their node - DenseMap PointerMap; + PointerMapType PointerMap; + public: /// AliasSetTracker ctor - Create an empty collection of AliasSets, and use /// the specified alias analysis object to disambiguate load and store @@ -354,8 +373,7 @@ public: iterator begin() { return AliasSets.begin(); } iterator end() { return AliasSets.end(); } - void print(std::ostream &OS) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS) const; void dump() const; private: @@ -365,7 +383,7 @@ private: // getEntryFor - Just like operator[] on the map, except that it creates an // entry for the pointer if it doesn't already exist. AliasSet::PointerRec &getEntryFor(Value *V) { - AliasSet::PointerRec *&Entry = PointerMap[V]; + AliasSet::PointerRec *&Entry = PointerMap[ASTCallbackVH(V, this)]; if (Entry == 0) Entry = new AliasSet::PointerRec(V); return *Entry; @@ -383,7 +401,7 @@ private: AliasSet *findAliasSetForCallSite(CallSite CS); }; -inline std::ostream& operator<<(std::ostream &OS, const AliasSetTracker &AST) { +inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) { AST.print(OS); return OS; } diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h index de839694dc8a3..bcb6dee033dcd 100644 --- a/include/llvm/Analysis/CallGraph.h +++ b/include/llvm/Analysis/CallGraph.h @@ -55,6 +55,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/System/IncludeFile.h" #include @@ -77,7 +78,7 @@ protected: public: static char ID; // Class identification, replacement for typeinfo //===--------------------------------------------------------------------- - // Accessors... + // Accessors. // typedef FunctionMapTy::iterator iterator; typedef FunctionMapTy::const_iterator const_iterator; @@ -107,6 +108,7 @@ public: /// Returns the CallGraphNode which is used to represent undetermined calls /// into the callgraph. Override this if you want behavioral inheritance. virtual CallGraphNode* getExternalCallingNode() const { return 0; } + virtual CallGraphNode* getCallsExternalNode() const { return 0; } /// Return the root/main method in the module, or some other root node, such /// as the externalcallingnode. Overload these if you behavioral @@ -130,19 +132,13 @@ public: return removeFunctionFromModule((*this)[F]); } - /// changeFunction - This method changes the function associated with this - /// CallGraphNode, for use by transformations that need to change the - /// prototype of a Function (thus they must create a new Function and move the - /// old code over). - void changeFunction(Function *OldF, Function *NewF); - /// getOrInsertFunction - This method is identical to calling operator[], but /// it will insert a new CallGraphNode for the specified function if one does /// not already exist. CallGraphNode *getOrInsertFunction(const Function *F); //===--------------------------------------------------------------------- - // Pass infrastructure interface glue code... + // Pass infrastructure interface glue code. // protected: CallGraph() {} @@ -155,35 +151,50 @@ public: /// void initialize(Module &M); - virtual void print(std::ostream &o, const Module *M) const; - void print(std::ostream *o, const Module *M) const { if (o) print(*o, M); } + void print(raw_ostream &o, Module *) const; void dump() const; - protected: // destroy - Release memory for the call graph virtual void destroy(); }; //===----------------------------------------------------------------------===// -// CallGraphNode class definition +// CallGraphNode class definition. // class CallGraphNode { - Function *F; - typedef std::pair CallRecord; + AssertingVH F; + + // CallRecord - This is a pair of the calling instruction (a call or invoke) + // and the callgraph node being called. +public: + typedef std::pair CallRecord; +private: std::vector CalledFunctions; - - CallGraphNode(const CallGraphNode &); // Do not implement + + /// NumReferences - This is the number of times that this CallGraphNode occurs + /// in the CalledFunctions array of this or other CallGraphNodes. + unsigned NumReferences; + + CallGraphNode(const CallGraphNode &); // DO NOT IMPLEMENT + void operator=(const CallGraphNode &); // DO NOT IMPLEMENT + + void DropRef() { --NumReferences; } + void AddRef() { ++NumReferences; } public: typedef std::vector CalledFunctionsVector; + + // CallGraphNode ctor - Create a node for the specified function. + inline CallGraphNode(Function *f) : F(f), NumReferences(0) {} + //===--------------------------------------------------------------------- - // Accessor methods... + // Accessor methods. // typedef std::vector::iterator iterator; typedef std::vector::const_iterator const_iterator; - // getFunction - Return the function that this call graph node represents... + // getFunction - Return the function that this call graph node represents. Function *getFunction() const { return F; } inline iterator begin() { return CalledFunctions.begin(); } @@ -193,17 +204,21 @@ public: inline bool empty() const { return CalledFunctions.empty(); } inline unsigned size() const { return (unsigned)CalledFunctions.size(); } - // Subscripting operator - Return the i'th called function... + /// getNumReferences - Return the number of other CallGraphNodes in this + /// CallGraph that reference this node in their callee list. + unsigned getNumReferences() const { return NumReferences; } + + // Subscripting operator - Return the i'th called function. // CallGraphNode *operator[](unsigned i) const { + assert(i < CalledFunctions.size() && "Invalid index"); return CalledFunctions[i].second; } /// dump - Print out this call graph node. /// void dump() const; - void print(std::ostream &OS) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS) const; //===--------------------------------------------------------------------- // Methods to keep a call graph up to date with a function that has been @@ -213,15 +228,35 @@ public: /// removeAllCalledFunctions - As the name implies, this removes all edges /// from this CallGraphNode to any functions it calls. void removeAllCalledFunctions() { - CalledFunctions.clear(); + while (!CalledFunctions.empty()) { + CalledFunctions.back().second->DropRef(); + CalledFunctions.pop_back(); + } + } + + /// stealCalledFunctionsFrom - Move all the callee information from N to this + /// node. + void stealCalledFunctionsFrom(CallGraphNode *N) { + assert(CalledFunctions.empty() && + "Cannot steal callsite information if I already have some"); + std::swap(CalledFunctions, N->CalledFunctions); } + /// addCalledFunction - Add a function to the list of functions called by this /// one. void addCalledFunction(CallSite CS, CallGraphNode *M) { - CalledFunctions.push_back(std::make_pair(CS, M)); + CalledFunctions.push_back(std::make_pair(CS.getInstruction(), M)); + M->AddRef(); } + void removeCallEdge(iterator I) { + I->second->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); + } + + /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it /// should be used sparingly. @@ -235,16 +270,12 @@ public: /// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite /// from this node to the specified callee function. void removeOneAbstractEdgeTo(CallGraphNode *Callee); - - /// replaceCallSite - Make the edge in the node for Old CallSite be for - /// New CallSite instead. Note that this method takes linear time, so it - /// should be used sparingly. - void replaceCallSite(CallSite Old, CallSite New); - - friend class CallGraph; - - // CallGraphNode ctor - Create a node for the specified function. - inline CallGraphNode(Function *f) : F(f) {} + + /// replaceCallEdge - This method replaces the edge in the node for the + /// specified call site with a new one. Note that this method takes linear + /// time, so it should be used sparingly. + void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode); + }; //===----------------------------------------------------------------------===// @@ -257,7 +288,7 @@ public: template <> struct GraphTraits { typedef CallGraphNode NodeType; - typedef std::pair CGNPairTy; + typedef CallGraphNode::CallRecord CGNPairTy; typedef std::pointer_to_unary_function CGNDerefFun; static NodeType *getEntryNode(CallGraphNode *CGN) { return CGN; } diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index 5fdf6d2c916cc..9805c6cf28d3d 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -1,4 +1,4 @@ -//===-- ConstantFolding.h - Analyze constant folding possibilities --------===// +//===-- ConstantFolding.h - Fold instructions into constants --------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This family of functions determines the possibility of performing constant -// folding. +// This file declares routines for folding instructions into constants. +// +// Also, to supplement the basic VMCore ConstantExpr simplifications, +// this file declares some additional folding routines that can make use of +// TargetData information. These functions cannot go in VMCore due to library +// dependency issues. // //===----------------------------------------------------------------------===// @@ -22,18 +26,20 @@ namespace llvm { class TargetData; class Function; class Type; + class LLVMContext; /// ConstantFoldInstruction - Attempt to constant fold the specified /// instruction. If successful, the constant result is returned, if not, null /// is returned. Note that this function can only fail when attempting to fold /// instructions like loads and stores, which have no constant expression form. /// -Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0); +Constant *ConstantFoldInstruction(Instruction *I, LLVMContext &Context, + const TargetData *TD = 0); /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. -Constant *ConstantFoldConstantExpression(ConstantExpr *CE, +Constant *ConstantFoldConstantExpression(ConstantExpr *CE, LLVMContext &Context, const TargetData *TD = 0); /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -44,6 +50,7 @@ Constant *ConstantFoldConstantExpression(ConstantExpr *CE, /// Constant *ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, Constant*const * Ops, unsigned NumOps, + LLVMContext &Context, const TargetData *TD = 0); /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare @@ -52,6 +59,7 @@ Constant *ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, /// Constant *ConstantFoldCompareInstOperands(unsigned Predicate, Constant*const * Ops, unsigned NumOps, + LLVMContext &Context, const TargetData *TD = 0); diff --git a/include/llvm/Analysis/ConstantsScanner.h b/include/llvm/Analysis/ConstantsScanner.h index bac551f0492ad..cdaf68d75a639 100644 --- a/include/llvm/Analysis/ConstantsScanner.h +++ b/include/llvm/Analysis/ConstantsScanner.h @@ -17,13 +17,13 @@ #define LLVM_ANALYSIS_CONSTANTSSCANNER_H #include "llvm/Support/InstIterator.h" -#include "llvm/ADT/iterator.h" namespace llvm { class Constant; -class constant_iterator : public forward_iterator { +class constant_iterator : public std::iterator { const_inst_iterator InstI; // Method instruction iterator unsigned OpIdx; // Operand index diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h index 06110d040d624..f76aa46b75d48 100644 --- a/include/llvm/Analysis/DebugInfo.h +++ b/include/llvm/Analysis/DebugInfo.h @@ -17,11 +17,16 @@ #ifndef LLVM_ANALYSIS_DEBUGINFO_H #define LLVM_ANALYSIS_DEBUGINFO_H +#include "llvm/Metadata.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ValueHandle.h" + +#define ATTACH_DEBUG_INFO_TO_AN_INSN 1 namespace llvm { class BasicBlock; @@ -37,19 +42,20 @@ namespace llvm { struct DbgRegionStartInst; struct DbgRegionEndInst; class DebugLoc; - class DebugLocTracker; + struct DebugLocTracker; class Instruction; + class LLVMContext; class DIDescriptor { - protected: - GlobalVariable *DbgGV; + protected: + TrackingVH DbgNode; - /// DIDescriptor constructor. If the specified GV is non-null, this checks + /// DIDescriptor constructor. If the specified node is non-null, check /// to make sure that the tag in the descriptor matches 'RequiredTag'. If /// not, the debug info is corrupt and we ignore it. - DIDescriptor(GlobalVariable *GV, unsigned RequiredTag); + DIDescriptor(MDNode *N, unsigned RequiredTag); - const std::string &getStringField(unsigned Elt, std::string &Result) const; + const char *getStringField(unsigned Elt) const; unsigned getUnsignedField(unsigned Elt) const { return (unsigned)getUInt64Field(Elt); } @@ -58,18 +64,18 @@ namespace llvm { template DescTy getFieldAs(unsigned Elt) const { - return DescTy(getDescriptorField(Elt).getGV()); + return DescTy(getDescriptorField(Elt).getNode()); } GlobalVariable *getGlobalVariableField(unsigned Elt) const; public: - explicit DIDescriptor() : DbgGV(0) {} - explicit DIDescriptor(GlobalVariable *GV) : DbgGV(GV) {} + explicit DIDescriptor() : DbgNode(0) {} + explicit DIDescriptor(MDNode *N) : DbgNode(N) {} - bool isNull() const { return DbgGV == 0; } + bool isNull() const { return DbgNode == 0; } - GlobalVariable *getGV() const { return DbgGV; } + MDNode *getNode() const { return DbgNode; } unsigned getVersion() const { return getUnsignedField(0) & LLVMDebugVersionMask; @@ -79,18 +85,32 @@ namespace llvm { return getUnsignedField(0) & ~LLVMDebugVersionMask; } - /// ValidDebugInfo - Return true if V represents valid debug info value. - static bool ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel); + /// ValidDebugInfo - Return true if N represents valid debug info value. + static bool ValidDebugInfo(MDNode *N, CodeGenOpt::Level OptLevel); /// dump - print descriptor. void dump() const; + + bool isDerivedType() const; + bool isCompositeType() const; + bool isBasicType() const; + bool isVariable() const; + bool isSubprogram() const; + bool isGlobalVariable() const; + bool isScope() const; + bool isCompileUnit() const; + bool isLexicalBlock() const; + bool isSubrange() const; + bool isEnumerator() const; + bool isType() const; + bool isGlobal() const; }; /// DISubrange - This is used to represent ranges, for array bounds. class DISubrange : public DIDescriptor { public: - explicit DISubrange(GlobalVariable *GV = 0) - : DIDescriptor(GV, dwarf::DW_TAG_subrange_type) {} + explicit DISubrange(MDNode *N = 0) + : DIDescriptor(N, dwarf::DW_TAG_subrange_type) {} int64_t getLo() const { return (int64_t)getUInt64Field(1); } int64_t getHi() const { return (int64_t)getUInt64Field(2); } @@ -99,7 +119,8 @@ namespace llvm { /// DIArray - This descriptor holds an array of descriptors. class DIArray : public DIDescriptor { public: - explicit DIArray(GlobalVariable *GV = 0) : DIDescriptor(GV) {} + explicit DIArray(MDNode *N = 0) + : DIDescriptor(N) {} unsigned getNumElements() const; DIDescriptor getElement(unsigned Idx) const { @@ -107,37 +128,44 @@ namespace llvm { } }; + /// DIScope - A base class for various scopes. + class DIScope : public DIDescriptor { + public: + explicit DIScope(MDNode *N = 0) : DIDescriptor (N) { + if (DbgNode && !isScope()) + DbgNode = 0; + } + virtual ~DIScope() {} + + const char *getFilename() const; + const char *getDirectory() const; + }; + /// DICompileUnit - A wrapper for a compile unit. - class DICompileUnit : public DIDescriptor { + class DICompileUnit : public DIScope { public: - explicit DICompileUnit(GlobalVariable *GV = 0) - : DIDescriptor(GV, dwarf::DW_TAG_compile_unit) {} + explicit DICompileUnit(MDNode *N = 0) : DIScope(N) { + if (DbgNode && !isCompileUnit()) + DbgNode = 0; + } unsigned getLanguage() const { return getUnsignedField(2); } - const std::string &getFilename(std::string &F) const { - return getStringField(3, F); - } - const std::string &getDirectory(std::string &F) const { - return getStringField(4, F); - } - const std::string &getProducer(std::string &F) const { - return getStringField(5, F); - } - + const char *getFilename() const { return getStringField(3); } + const char *getDirectory() const { return getStringField(4); } + const char *getProducer() const { return getStringField(5); } + /// isMain - Each input file is encoded as a separate compile unit in LLVM /// debugging information output. However, many target specific tool chains - /// prefer to encode only one compile unit in an object file. In this + /// prefer to encode only one compile unit in an object file. In this /// situation, the LLVM code generator will include debugging information - /// entities in the compile unit that is marked as main compile unit. The + /// entities in the compile unit that is marked as main compile unit. The /// code generator accepts maximum one main compile unit per module. If a - /// module does not contain any main compile unit then the code generator + /// module does not contain any main compile unit then the code generator /// will emit multiple compile units in the output object file. bool isMain() const { return getUnsignedField(6); } bool isOptimized() const { return getUnsignedField(7); } - const std::string &getFlags(std::string &F) const { - return getStringField(8, F); - } + const char *getFlags() const { return getStringField(8); } unsigned getRunTimeVersion() const { return getUnsignedField(9); } /// Verify - Verify that a compile unit is well formed. @@ -152,13 +180,11 @@ namespace llvm { /// type/precision or a file/line pair for location info. class DIEnumerator : public DIDescriptor { public: - explicit DIEnumerator(GlobalVariable *GV = 0) - : DIDescriptor(GV, dwarf::DW_TAG_enumerator) {} + explicit DIEnumerator(MDNode *N = 0) + : DIDescriptor(N, dwarf::DW_TAG_enumerator) {} - const std::string &getName(std::string &F) const { - return getStringField(1, F); - } - uint64_t getEnumValue() const { return getUInt64Field(2); } + const char *getName() const { return getStringField(1); } + uint64_t getEnumValue() const { return getUInt64Field(2); } }; /// DIType - This is a wrapper for a type. @@ -167,43 +193,31 @@ namespace llvm { class DIType : public DIDescriptor { public: enum { - FlagPrivate = 1 << 0, - FlagProtected = 1 << 1, - FlagFwdDecl = 1 << 2 + FlagPrivate = 1 << 0, + FlagProtected = 1 << 1, + FlagFwdDecl = 1 << 2, + FlagAppleBlock = 1 << 3, + FlagBlockByrefStruct = 1 << 4 }; protected: - DIType(GlobalVariable *GV, unsigned Tag) : DIDescriptor(GV, Tag) {} + DIType(MDNode *N, unsigned Tag) + : DIDescriptor(N, Tag) {} // This ctor is used when the Tag has already been validated by a derived // ctor. - DIType(GlobalVariable *GV, bool, bool) : DIDescriptor(GV) {} + DIType(MDNode *N, bool, bool) : DIDescriptor(N) {} public: - /// isDerivedType - Return true if the specified tag is legal for - /// DIDerivedType. - static bool isDerivedType(unsigned TAG); - - /// isCompositeType - Return true if the specified tag is legal for - /// DICompositeType. - static bool isCompositeType(unsigned TAG); - - /// isBasicType - Return true if the specified tag is legal for - /// DIBasicType. - static bool isBasicType(unsigned TAG) { - return TAG == dwarf::DW_TAG_base_type; - } /// Verify - Verify that a type descriptor is well formed. bool Verify() const; public: - explicit DIType(GlobalVariable *GV); + explicit DIType(MDNode *N); explicit DIType() {} virtual ~DIType() {} DIDescriptor getContext() const { return getDescriptorField(1); } - const std::string &getName(std::string &F) const { - return getStringField(2, F); - } + const char *getName() const { return getStringField(2); } DICompileUnit getCompileUnit() const{ return getFieldAs(3); } unsigned getLineNumber() const { return getUnsignedField(4); } uint64_t getSizeInBits() const { return getUInt64Field(5); } @@ -212,9 +226,22 @@ namespace llvm { // carry this is just plain insane. uint64_t getOffsetInBits() const { return getUInt64Field(7); } unsigned getFlags() const { return getUnsignedField(8); } - bool isPrivate() const { return (getFlags() & FlagPrivate) != 0; } - bool isProtected() const { return (getFlags() & FlagProtected) != 0; } - bool isForwardDecl() const { return (getFlags() & FlagFwdDecl) != 0; } + bool isPrivate() const { + return (getFlags() & FlagPrivate) != 0; + } + bool isProtected() const { + return (getFlags() & FlagProtected) != 0; + } + bool isForwardDecl() const { + return (getFlags() & FlagFwdDecl) != 0; + } + // isAppleBlock - Return true if this is the Apple Blocks extension. + bool isAppleBlockExtension() const { + return (getFlags() & FlagAppleBlock) != 0; + } + bool isBlockByrefStruct() const { + return (getFlags() & FlagBlockByrefStruct) != 0; + } /// dump - print type. void dump() const; @@ -223,8 +250,8 @@ namespace llvm { /// DIBasicType - A basic type, like 'int' or 'float'. class DIBasicType : public DIType { public: - explicit DIBasicType(GlobalVariable *GV) - : DIType(GV, dwarf::DW_TAG_base_type) {} + explicit DIBasicType(MDNode *N = 0) + : DIType(N, dwarf::DW_TAG_base_type) {} unsigned getEncoding() const { return getUnsignedField(9); } @@ -236,13 +263,13 @@ namespace llvm { /// a typedef, a pointer or reference, etc. class DIDerivedType : public DIType { protected: - explicit DIDerivedType(GlobalVariable *GV, bool, bool) - : DIType(GV, true, true) {} + explicit DIDerivedType(MDNode *N, bool, bool) + : DIType(N, true, true) {} public: - explicit DIDerivedType(GlobalVariable *GV) - : DIType(GV, true, true) { - if (GV && !isDerivedType(getTag())) - DbgGV = 0; + explicit DIDerivedType(MDNode *N = 0) + : DIType(N, true, true) { + if (DbgNode && !isDerivedType()) + DbgNode = 0; } DIType getTypeDerivedFrom() const { return getFieldAs(9); } @@ -252,6 +279,11 @@ namespace llvm { uint64_t getOriginalTypeSize() const; /// dump - print derived type. void dump() const; + + /// replaceAllUsesWith - Replace all uses of debug info referenced by + /// this descriptor. After this completes, the current debug info value + /// is erased. + void replaceAllUsesWith(DIDescriptor &D); }; /// DICompositeType - This descriptor holds a type that can refer to multiple @@ -259,10 +291,10 @@ namespace llvm { /// FIXME: Why is this a DIDerivedType?? class DICompositeType : public DIDerivedType { public: - explicit DICompositeType(GlobalVariable *GV) - : DIDerivedType(GV, true, true) { - if (GV && !isCompositeType(getTag())) - DbgGV = 0; + explicit DICompositeType(MDNode *N = 0) + : DIDerivedType(N, true, true) { + if (N && !isCompositeType()) + DbgNode = 0; } DIArray getTypeArray() const { return getFieldAs(10); } @@ -278,34 +310,16 @@ namespace llvm { /// DIGlobal - This is a common class for global variables and subprograms. class DIGlobal : public DIDescriptor { protected: - explicit DIGlobal(GlobalVariable *GV, unsigned RequiredTag) - : DIDescriptor(GV, RequiredTag) {} - - /// isSubprogram - Return true if the specified tag is legal for - /// DISubprogram. - static bool isSubprogram(unsigned TAG) { - return TAG == dwarf::DW_TAG_subprogram; - } - - /// isGlobalVariable - Return true if the specified tag is legal for - /// DIGlobalVariable. - static bool isGlobalVariable(unsigned TAG) { - return TAG == dwarf::DW_TAG_variable; - } + explicit DIGlobal(MDNode *N, unsigned RequiredTag) + : DIDescriptor(N, RequiredTag) {} public: virtual ~DIGlobal() {} DIDescriptor getContext() const { return getDescriptorField(2); } - const std::string &getName(std::string &F) const { - return getStringField(3, F); - } - const std::string &getDisplayName(std::string &F) const { - return getStringField(4, F); - } - const std::string &getLinkageName(std::string &F) const { - return getStringField(5, F); - } + const char *getName() const { return getStringField(3); } + const char *getDisplayName() const { return getStringField(4); } + const char *getLinkageName() const { return getStringField(5); } DICompileUnit getCompileUnit() const{ return getFieldAs(6); } unsigned getLineNumber() const { return getUnsignedField(7); } DIType getType() const { return getFieldAs(8); } @@ -320,26 +334,41 @@ namespace llvm { }; /// DISubprogram - This is a wrapper for a subprogram (e.g. a function). - class DISubprogram : public DIGlobal { + class DISubprogram : public DIScope { public: - explicit DISubprogram(GlobalVariable *GV = 0) - : DIGlobal(GV, dwarf::DW_TAG_subprogram) {} + explicit DISubprogram(MDNode *N = 0) : DIScope(N) { + if (DbgNode && !isSubprogram()) + DbgNode = 0; + } + DIDescriptor getContext() const { return getDescriptorField(2); } + const char *getName() const { return getStringField(3); } + const char *getDisplayName() const { return getStringField(4); } + const char *getLinkageName() const { return getStringField(5); } + DICompileUnit getCompileUnit() const{ return getFieldAs(6); } + unsigned getLineNumber() const { return getUnsignedField(7); } DICompositeType getType() const { return getFieldAs(8); } /// getReturnTypeName - Subprogram return types are encoded either as /// DIType or as DICompositeType. - const std::string &getReturnTypeName(std::string &F) const { + const char *getReturnTypeName() const { DICompositeType DCT(getFieldAs(8)); if (!DCT.isNull()) { DIArray A = DCT.getTypeArray(); - DIType T(A.getElement(0).getGV()); - return T.getName(F); + DIType T(A.getElement(0).getNode()); + return T.getName(); } DIType T(getFieldAs(8)); - return T.getName(F); + return T.getName(); } + /// isLocalToUnit - Return true if this subprogram is local to the current + /// compile unit, like 'static' in C. + unsigned isLocalToUnit() const { return getUnsignedField(9); } + unsigned isDefinition() const { return getUnsignedField(10); } + const char *getFilename() const { return getCompileUnit().getFilename();} + const char *getDirectory() const { return getCompileUnit().getDirectory();} + /// Verify - Verify that a subprogram descriptor is well formed. bool Verify() const; @@ -354,8 +383,8 @@ namespace llvm { /// DIGlobalVariable - This is a wrapper for a global variable. class DIGlobalVariable : public DIGlobal { public: - explicit DIGlobalVariable(GlobalVariable *GV = 0) - : DIGlobal(GV, dwarf::DW_TAG_variable) {} + explicit DIGlobalVariable(MDNode *N = 0) + : DIGlobal(N, dwarf::DW_TAG_variable) {} GlobalVariable *getGlobal() const { return getGlobalVariableField(11); } @@ -370,43 +399,75 @@ namespace llvm { /// global etc). class DIVariable : public DIDescriptor { public: - explicit DIVariable(GlobalVariable *GV = 0) - : DIDescriptor(GV) { - if (GV && !isVariable(getTag())) - DbgGV = 0; + explicit DIVariable(MDNode *N = 0) + : DIDescriptor(N) { + if (DbgNode && !isVariable()) + DbgNode = 0; } DIDescriptor getContext() const { return getDescriptorField(1); } - const std::string &getName(std::string &F) const { - return getStringField(2, F); - } + const char *getName() const { return getStringField(2); } DICompileUnit getCompileUnit() const{ return getFieldAs(3); } unsigned getLineNumber() const { return getUnsignedField(4); } DIType getType() const { return getFieldAs(5); } - /// isVariable - Return true if the specified tag is legal for DIVariable. - static bool isVariable(unsigned Tag); /// Verify - Verify that a variable descriptor is well formed. bool Verify() const; + /// HasComplexAddr - Return true if the variable has a complex address. + bool hasComplexAddress() const { + return getNumAddrElements() > 0; + } + + unsigned getNumAddrElements() const { return DbgNode->getNumElements()-6; } + + uint64_t getAddrElement(unsigned Idx) const { + return getUInt64Field(Idx+6); + } + + /// isBlockByrefVariable - Return true if the variable was declared as + /// a "__block" variable (Apple Blocks). + bool isBlockByrefVariable() const { + return getType().isBlockByrefStruct(); + } + /// dump - print variable. void dump() const; }; - /// DIBlock - This is a wrapper for a block (e.g. a function, scope, etc). - class DIBlock : public DIDescriptor { + /// DILexicalBlock - This is a wrapper for a lexical block. + class DILexicalBlock : public DIScope { public: - explicit DIBlock(GlobalVariable *GV = 0) - : DIDescriptor(GV, dwarf::DW_TAG_lexical_block) {} + explicit DILexicalBlock(MDNode *N = 0) : DIScope(N) { + if (DbgNode && !isLexicalBlock()) + DbgNode = 0; + } + DIScope getContext() const { return getFieldAs(1); } + const char *getDirectory() const { return getContext().getDirectory(); } + const char *getFilename() const { return getContext().getFilename(); } + }; - DIDescriptor getContext() const { return getDescriptorField(1); } + /// DILocation - This object holds location information. This object + /// is not associated with any DWARF tag. + class DILocation : public DIDescriptor { + public: + explicit DILocation(MDNode *N) : DIDescriptor(N) { ; } + + unsigned getLineNumber() const { return getUnsignedField(0); } + unsigned getColumnNumber() const { return getUnsignedField(1); } + DIScope getScope() const { return getFieldAs(2); } + DILocation getOrigLocation() const { return getFieldAs(3); } + const char *getFilename() const { return getScope().getFilename(); } + const char *getDirectory() const { return getScope().getDirectory(); } }; /// DIFactory - This object assists with the construction of the various /// descriptors. class DIFactory { Module &M; + LLVMContext& VMContext; + // Cached values for uniquing and faster lookups. const Type *EmptyStructPtr; // "{}*". Function *StopPointFn; // llvm.dbg.stoppoint @@ -420,9 +481,11 @@ namespace llvm { DIFactory(const DIFactory &); // DO NOT IMPLEMENT void operator=(const DIFactory&); // DO NOT IMPLEMENT public: + enum ComplexAddrKind { OpPlus=1, OpDeref }; + explicit DIFactory(Module &m); - /// GetOrCreateArray - Create an descriptor for an array of descriptors. + /// GetOrCreateArray - Create an descriptor for an array of descriptors. /// This implicitly uniques the arrays created. DIArray GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys); @@ -433,19 +496,19 @@ namespace llvm { /// CreateCompileUnit - Create a new descriptor for the specified compile /// unit. DICompileUnit CreateCompileUnit(unsigned LangID, - const std::string &Filename, - const std::string &Directory, - const std::string &Producer, + StringRef Filenae, + StringRef Directory, + StringRef Producer, bool isMain = false, bool isOptimized = false, const char *Flags = "", unsigned RunTimeVer = 0); /// CreateEnumerator - Create a single enumerator value. - DIEnumerator CreateEnumerator(const std::string &Name, uint64_t Val); + DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val); /// CreateBasicType - Create a basic type like int, float, etc. - DIBasicType CreateBasicType(DIDescriptor Context, const std::string &Name, + DIBasicType CreateBasicType(DIDescriptor Context, StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, @@ -454,7 +517,7 @@ namespace llvm { /// CreateDerivedType - Create a derived type like const qualified type, /// pointer, typedef, etc. DIDerivedType CreateDerivedType(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, @@ -463,7 +526,7 @@ namespace llvm { /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType CreateCompositeType(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -475,31 +538,43 @@ namespace llvm { /// CreateSubprogram - Create a new descriptor for the specified subprogram. /// See comments in DISubprogram for descriptions of these fields. - DISubprogram CreateSubprogram(DIDescriptor Context, const std::string &Name, - const std::string &DisplayName, - const std::string &LinkageName, + DISubprogram CreateSubprogram(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, bool isDefinition); /// CreateGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable - CreateGlobalVariable(DIDescriptor Context, const std::string &Name, - const std::string &DisplayName, - const std::string &LinkageName, + CreateGlobalVariable(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, bool isDefinition, llvm::GlobalVariable *GV); /// CreateVariable - Create a new descriptor for the specified variable. DIVariable CreateVariable(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNo, DIType Type); - /// CreateBlock - This creates a descriptor for a lexical block with the - /// specified parent context. - DIBlock CreateBlock(DIDescriptor Context); + /// CreateComplexVariable - Create a new descriptor for the specified + /// variable which has a complex address expression for its address. + DIVariable CreateComplexVariable(unsigned Tag, DIDescriptor Context, + const std::string &Name, + DICompileUnit CompileUnit, unsigned LineNo, + DIType Type, + SmallVector &addr); + + /// CreateLexicalBlock - This creates a descriptor for a lexical block + /// with the specified parent context. + DILexicalBlock CreateLexicalBlock(DIDescriptor Context); + + /// CreateLocation - Creates a debug info location. + DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo, + DIScope S, DILocation OrigLoc); /// InsertStopPoint - Create a new llvm.dbg.stoppoint intrinsic invocation, /// inserting it at the end of the specified basic block. @@ -519,21 +594,22 @@ namespace llvm { void InsertRegionEnd(DIDescriptor D, BasicBlock *BB); /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. - void InsertDeclare(llvm::Value *Storage, DIVariable D, BasicBlock *BB); + void InsertDeclare(llvm::Value *Storage, DIVariable D, + BasicBlock *InsertAtEnd); + + /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. + void InsertDeclare(llvm::Value *Storage, DIVariable D, + Instruction *InsertBefore); private: Constant *GetTagConstant(unsigned TAG); - Constant *GetStringConstant(const std::string &String); - - /// getCastToEmpty - Return the descriptor as a Constant* with type '{}*'. - Constant *getCastToEmpty(DIDescriptor D); }; /// Finds the stoppoint coressponding to this instruction, that is the - /// stoppoint that dominates this instruction + /// stoppoint that dominates this instruction const DbgStopPointInst *findStopPoint(const Instruction *Inst); - /// Finds the stoppoint corresponding to first real (non-debug intrinsic) + /// Finds the stoppoint corresponding to first real (non-debug intrinsic) /// instruction in this Basic Block, and returns the stoppoint for it. const DbgStopPointInst *findBBStopPoint(const BasicBlock *BB); @@ -544,46 +620,46 @@ namespace llvm { /// Find the debug info descriptor corresponding to this global variable. Value *findDbgGlobalDeclare(GlobalVariable *V); - bool getLocationInfo(const Value *V, std::string &DisplayName, std::string &Type, - unsigned &LineNo, std::string &File, std::string &Dir); - - /// CollectDebugInfoAnchors - Collect debugging information anchors. - void CollectDebugInfoAnchors(Module &M, - SmallVector &CompileUnits, - SmallVector &GlobalVars, - SmallVector &Subprograms); +bool getLocationInfo(const Value *V, std::string &DisplayName, + std::string &Type, unsigned &LineNo, std::string &File, + std::string &Dir); - /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug /// info intrinsic. - bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, + bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, CodeGenOpt::Level OptLev); - /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgFuncStartInst &FSI, CodeGenOpt::Level OptLev); - /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgRegionStartInst &RSI, CodeGenOpt::Level OptLev); - /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgRegionEndInst &REI, CodeGenOpt::Level OptLev); - /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgDeclareInst &DI, CodeGenOpt::Level OptLev); - /// ExtractDebugLocation - Extract debug location information + /// ExtractDebugLocation - Extract debug location information /// from llvm.dbg.stoppoint intrinsic. DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI, DebugLocTracker &DebugLocInfo); - /// ExtractDebugLocation - Extract debug location information + /// ExtractDebugLocation - Extract debug location information + /// from DILocation. + DebugLoc ExtractDebugLocation(DILocation &Loc, + DebugLocTracker &DebugLocInfo); + + /// ExtractDebugLocation - Extract debug location information /// from llvm.dbg.func_start intrinsic. DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI, DebugLocTracker &DebugLocInfo); @@ -593,7 +669,74 @@ namespace llvm { /// isInlinedFnEnd - Return true if REI is ending an inlined function. bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn); + /// DebugInfoFinder - This object collects DebugInfo from a module. + class DebugInfoFinder { + public: + /// processModule - Process entire module and collect debug info + /// anchors. + void processModule(Module &M); + + private: + /// processType - Process DIType. + void processType(DIType DT); + + /// processLexicalBlock - Process DILexicalBlock. + void processLexicalBlock(DILexicalBlock LB); + + /// processSubprogram - Process DISubprogram. + void processSubprogram(DISubprogram SP); + + /// processStopPoint - Process DbgStopPointInst. + void processStopPoint(DbgStopPointInst *SPI); + + /// processFuncStart - Process DbgFuncStartInst. + void processFuncStart(DbgFuncStartInst *FSI); + + /// processRegionStart - Process DbgRegionStart. + void processRegionStart(DbgRegionStartInst *DRS); + + /// processRegionEnd - Process DbgRegionEnd. + void processRegionEnd(DbgRegionEndInst *DRE); + + /// processDeclare - Process DbgDeclareInst. + void processDeclare(DbgDeclareInst *DDI); + + /// addCompileUnit - Add compile unit into CUs. + bool addCompileUnit(DICompileUnit CU); + + /// addGlobalVariable - Add global variable into GVs. + bool addGlobalVariable(DIGlobalVariable DIG); + + // addSubprogram - Add subprgoram into SPs. + bool addSubprogram(DISubprogram SP); + + /// addType - Add type into Tys. + bool addType(DIType DT); + + public: + typedef SmallVector::iterator iterator; + iterator compile_unit_begin() { return CUs.begin(); } + iterator compile_unit_end() { return CUs.end(); } + iterator subprogram_begin() { return SPs.begin(); } + iterator subprogram_end() { return SPs.end(); } + iterator global_variable_begin() { return GVs.begin(); } + iterator global_variable_end() { return GVs.end(); } + iterator type_begin() { return TYs.begin(); } + iterator type_end() { return TYs.end(); } + + unsigned compile_unit_count() { return CUs.size(); } + unsigned global_variable_count() { return GVs.size(); } + unsigned subprogram_count() { return SPs.size(); } + unsigned type_count() { return TYs.size(); } + + private: + SmallVector CUs; // Compile Units + SmallVector SPs; // Subprograms + SmallVector GVs; // Global Variables; + SmallVector TYs; // Types + SmallPtrSet NodesSeen; + }; } // end namespace llvm #endif diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h index 366d492b11456..f63e31c36bab3 100644 --- a/include/llvm/Analysis/Dominators.h +++ b/include/llvm/Analysis/Dominators.h @@ -22,7 +22,6 @@ #define LLVM_ANALYSIS_DOMINATORS_H #include "llvm/Pass.h" -#include "llvm/BasicBlock.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" @@ -32,6 +31,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -82,12 +82,12 @@ public: typedef typename std::vector *>::iterator iterator; typedef typename std::vector *>::const_iterator const_iterator; - + iterator begin() { return Children.begin(); } iterator end() { return Children.end(); } const_iterator begin() const { return Children.begin(); } const_iterator end() const { return Children.end(); } - + NodeT *getBlock() const { return TheBB; } DomTreeNodeBase *getIDom() const { return IDom; } const std::vector*> &getChildren() const { @@ -96,7 +96,7 @@ public: DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom) : TheBB(BB), IDom(iDom), DFSNumIn(-1), DFSNumOut(-1) { } - + DomTreeNodeBase *addChild(DomTreeNodeBase *C) { Children.push_back(C); return C; @@ -109,7 +109,7 @@ public: void clearAllChildren() { Children.clear(); } - + bool compare(DomTreeNodeBase *Other) { if (getNumChildren() != Other->getNumChildren()) return true; @@ -143,7 +143,7 @@ public: IDom->Children.push_back(this); } } - + /// getDFSNumIn/getDFSNumOut - These are an internal implementation detail, do /// not call them. unsigned getDFSNumIn() const { return DFSNumIn; } @@ -161,22 +161,22 @@ EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase); EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase); template -static std::ostream &operator<<(std::ostream &o, - const DomTreeNodeBase *Node) { +static raw_ostream &operator<<(raw_ostream &o, + const DomTreeNodeBase *Node) { if (Node->getBlock()) WriteAsOperand(o, Node->getBlock(), false); else o << " <>"; - + o << " {" << Node->getDFSNumIn() << "," << Node->getDFSNumOut() << "}"; - + return o << "\n"; } template -static void PrintDomTree(const DomTreeNodeBase *N, std::ostream &o, +static void PrintDomTree(const DomTreeNodeBase *N, raw_ostream &o, unsigned Lev) { - o << std::string(2*Lev, ' ') << "[" << Lev << "] " << N; + o.indent(2*Lev) << "[" << Lev << "] " << N; for (typename DomTreeNodeBase::const_iterator I = N->begin(), E = N->end(); I != E; ++I) PrintDomTree(*I, o, Lev+1); @@ -233,7 +233,7 @@ protected: Vertex.clear(); RootNode = 0; } - + // NewBB is split and now it has one successor. Update dominator tree to // reflect this change. template @@ -320,7 +320,7 @@ public: DomTreeNodeBase* MyNd = I->second; DomTreeNodeBase* OtherNd = OI->second; - + if (MyNd->compare(OtherNd)) return true; } @@ -352,7 +352,7 @@ public: /// Note that this is not a constant time operation! /// bool properlyDominates(const DomTreeNodeBase *A, - DomTreeNodeBase *B) const { + const DomTreeNodeBase *B) const { if (A == 0 || B == 0) return false; return dominatedBySlowTreeWalk(A, B); } @@ -378,12 +378,12 @@ public: && "This is not implemented for post dominators"); return dominates(&A->getParent()->front(), A); } - + /// dominates - Returns true iff A dominates B. Note that this is not a /// constant time operation! /// inline bool dominates(const DomTreeNodeBase *A, - DomTreeNodeBase *B) { + const DomTreeNodeBase *B) { if (B == A) return true; // A node trivially dominates itself. @@ -404,13 +404,17 @@ public: return dominatedBySlowTreeWalk(A, B); } - inline bool dominates(NodeT *A, NodeT *B) { + inline bool dominates(const NodeT *A, const NodeT *B) { if (A == B) return true; - - return dominates(getNode(A), getNode(B)); + + // Cast away the const qualifiers here. This is ok since + // this function doesn't actually return the values returned + // from getNode. + return dominates(getNode(const_cast(A)), + getNode(const_cast(B))); } - + NodeT *getRoot() const { assert(this->Roots.size() == 1 && "Should always have entry node!"); return this->Roots[0]; @@ -522,7 +526,7 @@ public: assert(getNode(BB) && "Removing node that isn't in dominator tree."); DomTreeNodes.erase(BB); } - + /// splitBlock - BB is split and now it has one successor. Update dominator /// tree to reflect this change. void splitBlock(NodeT* NewBB) { @@ -534,7 +538,7 @@ public: /// print - Convert to human readable form /// - virtual void print(std::ostream &o, const Module* ) const { + void print(raw_ostream &o) const { o << "=============================--------------------------------\n"; if (this->isPostDominator()) o << "Inorder PostDominator Tree: "; @@ -544,17 +548,11 @@ public: o << "DFSNumbers invalid: " << SlowQueries << " slow queries."; o << "\n"; - PrintDomTree(getRootNode(), o, 1); + // The postdom tree can have a null root if there are no returns. + if (getRootNode()) + PrintDomTree(getRootNode(), o, 1); } - - void print(std::ostream *OS, const Module* M = 0) const { - if (OS) print(*OS, M); - } - - virtual void dump() { - print(llvm::cerr); - } - + protected: template friend void Compress(DominatorTreeBase& DT, @@ -569,16 +567,16 @@ protected: friend void Link(DominatorTreeBase& DT, unsigned DFSNumV, typename GraphT::NodeType* W, typename DominatorTreeBase::InfoRec &WInfo); - + template friend unsigned DFSPass(DominatorTreeBase& DT, typename GraphT::NodeType* V, unsigned N); - + template friend void Calculate(DominatorTreeBase::NodeType>& DT, FuncT& F); - + /// updateDFSNumbers - Assign In and Out numbers to the nodes while walking /// dominator tree in dfs order. void updateDFSNumbers() { @@ -606,17 +604,17 @@ protected: // Otherwise, recursively visit this child. DomTreeNodeBase *Child = *ChildIt; ++WorkStack.back().second; - + WorkStack.push_back(std::make_pair(Child, Child->begin())); Child->DFSNumIn = DFSNum++; } } } - + SlowQueries = 0; DFSInfoValid = true; } - + DomTreeNodeBase *getNodeForBlock(NodeT *BB) { typename DomTreeNodeMapType::iterator I = this->DomTreeNodes.find(BB); if (I != this->DomTreeNodes.end() && I->second) @@ -634,31 +632,31 @@ protected: DomTreeNodeBase *C = new DomTreeNodeBase(BB, IDomNode); return this->DomTreeNodes[BB] = IDomNode->addChild(C); } - + inline NodeT *getIDom(NodeT *BB) const { typename DenseMap::const_iterator I = IDoms.find(BB); return I != IDoms.end() ? I->second : 0; } - + inline void addRoot(NodeT* BB) { this->Roots.push_back(BB); } - + public: /// recalculate - compute a dominator tree for the given function template void recalculate(FT& F) { if (!this->IsPostDominators) { reset(); - + // Initialize roots this->Roots.push_back(&F.front()); this->IDoms[&F.front()] = 0; this->DomTreeNodes[&F.front()] = 0; this->Vertex.push_back(0); - + Calculate(*this, F); - + updateDFSNumbers(); } else { reset(); // Reset from the last time we were run... @@ -675,7 +673,7 @@ public: } this->Vertex.push_back(0); - + Calculate >(*this, F); } } @@ -691,18 +689,18 @@ class DominatorTree : public FunctionPass { public: static char ID; // Pass ID, replacement for typeid DominatorTreeBase* DT; - + DominatorTree() : FunctionPass(&ID) { DT = new DominatorTreeBase(false); } - + ~DominatorTree() { DT->releaseMemory(); delete DT; } - + DominatorTreeBase& getBase() { return *DT; } - + /// getRoots - Return the root blocks of the current CFG. This may include /// multiple blocks if we are computing post dominators. For forward /// dominators, this will always be a single block (the entry node). @@ -710,11 +708,11 @@ public: inline const std::vector &getRoots() const { return DT->getRoots(); } - + inline BasicBlock *getRoot() const { return DT->getRoot(); } - + inline DomTreeNode *getRootNode() const { return DT->getRootNode(); } @@ -724,10 +722,10 @@ public: inline bool compare(DominatorTree &Other) const { DomTreeNode *R = getRootNode(); DomTreeNode *OtherR = Other.getRootNode(); - + if (!R || !OtherR || R->getBlock() != OtherR->getBlock()) return true; - + if (DT->compare(Other.getBase())) return true; @@ -735,111 +733,91 @@ public: } virtual bool runOnFunction(Function &F); - + + virtual void verifyAnalysis() const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } - + inline bool dominates(DomTreeNode* A, DomTreeNode* B) const { return DT->dominates(A, B); } - - inline bool dominates(BasicBlock* A, BasicBlock* B) const { + + inline bool dominates(const BasicBlock* A, const BasicBlock* B) const { return DT->dominates(A, B); } - + // dominates - Return true if A dominates B. This performs the // special checks necessary if A and B are in the same basic block. - bool dominates(Instruction *A, Instruction *B) const { - BasicBlock *BBA = A->getParent(), *BBB = B->getParent(); - if (BBA != BBB) return DT->dominates(BBA, BBB); - - // It is not possible to determine dominance between two PHI nodes - // based on their ordering. - if (isa(A) && isa(B)) - return false; - - // Loop through the basic block until we find A or B. - BasicBlock::iterator I = BBA->begin(); - for (; &*I != A && &*I != B; ++I) /*empty*/; + bool dominates(const Instruction *A, const Instruction *B) const; - //if(!DT.IsPostDominators) { - // A dominates B if it is found first in the basic block. - return &*I == A; - //} else { - // // A post-dominates B if B is found first in the basic block. - // return &*I == B; - //} - } - - inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const { + bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const { return DT->properlyDominates(A, B); } - - inline bool properlyDominates(BasicBlock* A, BasicBlock* B) const { + + bool properlyDominates(BasicBlock *A, BasicBlock *B) const { return DT->properlyDominates(A, B); } - + /// findNearestCommonDominator - Find nearest common dominator basic block /// for basic block A and B. If there is no such block then return NULL. inline BasicBlock *findNearestCommonDominator(BasicBlock *A, BasicBlock *B) { return DT->findNearestCommonDominator(A, B); } - + inline DomTreeNode *operator[](BasicBlock *BB) const { return DT->getNode(BB); } - + /// getNode - return the (Post)DominatorTree node for the specified basic /// block. This is the same as using operator[] on this class. /// inline DomTreeNode *getNode(BasicBlock *BB) const { return DT->getNode(BB); } - + /// addNewBlock - Add a new node to the dominator tree information. This /// creates a new node as a child of DomBB dominator node,linking it into /// the children list of the immediate dominator. inline DomTreeNode *addNewBlock(BasicBlock *BB, BasicBlock *DomBB) { return DT->addNewBlock(BB, DomBB); } - + /// changeImmediateDominator - This method is used to update the dominator /// tree information when a node's immediate dominator changes. /// inline void changeImmediateDominator(BasicBlock *N, BasicBlock* NewIDom) { DT->changeImmediateDominator(N, NewIDom); } - + inline void changeImmediateDominator(DomTreeNode *N, DomTreeNode* NewIDom) { DT->changeImmediateDominator(N, NewIDom); } - + /// eraseNode - Removes a node from the dominator tree. Block must not /// domiante any other blocks. Removes node from its immediate dominator's /// children list. Deletes dominator node associated with basic block BB. inline void eraseNode(BasicBlock *BB) { DT->eraseNode(BB); } - + /// splitBlock - BB is split and now it has one successor. Update dominator /// tree to reflect this change. inline void splitBlock(BasicBlock* NewBB) { DT->splitBlock(NewBB); } - + bool isReachableFromEntry(BasicBlock* A) { return DT->isReachableFromEntry(A); } - - + + virtual void releaseMemory() { DT->releaseMemory(); } - - virtual void print(std::ostream &OS, const Module* M= 0) const { - DT->print(OS, M); - } + + virtual void print(raw_ostream &OS, const Module* M= 0) const; }; //===------------------------------------- @@ -849,7 +827,7 @@ public: template <> struct GraphTraits { typedef DomTreeNode NodeType; typedef NodeType::iterator ChildIteratorType; - + static NodeType *getEntryNode(NodeType *N) { return N; } @@ -881,7 +859,7 @@ protected: DomSetMapType Frontiers; std::vector Roots; const bool IsPostDominators; - + public: DominanceFrontierBase(void *ID, bool isPostDom) : FunctionPass(ID), IsPostDominators(isPostDom) {} @@ -891,7 +869,7 @@ public: /// dominators, this will always be a single block (the entry node). /// inline const std::vector &getRoots() const { return Roots; } - + /// isPostDominator - Returns true if analysis based of postdoms /// bool isPostDominator() const { return IsPostDominators; } @@ -987,11 +965,7 @@ public: /// print - Convert to human readable form /// - virtual void print(std::ostream &OS, const Module* = 0) const; - void print(std::ostream *OS, const Module* M = 0) const { - if (OS) print(*OS, M); - } - virtual void dump(); + virtual void print(raw_ostream &OS, const Module* = 0) const; }; @@ -1019,6 +993,8 @@ public: return false; } + virtual void verifyAnalysis() const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); diff --git a/include/llvm/Analysis/FindUsedTypes.h b/include/llvm/Analysis/FindUsedTypes.h index c897af3a58a63..1337385848ed3 100644 --- a/include/llvm/Analysis/FindUsedTypes.h +++ b/include/llvm/Analysis/FindUsedTypes.h @@ -37,8 +37,7 @@ public: /// passed in, then the types are printed symbolically if possible, using the /// symbol table from the module. /// - void print(std::ostream &o, const Module *M) const; - void print(std::ostream *o, const Module *M) const { if (o) print(*o, M); } + void print(raw_ostream &o, const Module *M) const; private: /// IncorporateType - Incorporate one type and all of its subtypes into the diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index 40396e2fcaf24..948c675924242 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -25,7 +25,7 @@ namespace llvm { class DominatorTree; class Instruction; class Value; -class IVUsersOfOneStride; +struct IVUsersOfOneStride; /// IVStrideUse - Keep track of one use of a strided induction variable, where /// the stride is stored externally. The Offset member keeps track of the @@ -34,7 +34,7 @@ class IVUsersOfOneStride; class IVStrideUse : public CallbackVH, public ilist_node { public: IVStrideUse(IVUsersOfOneStride *parent, - const SCEV* offset, + const SCEV *offset, Instruction* U, Value *O) : CallbackVH(U), Parent(parent), Offset(offset), OperandValToReplace(O), @@ -58,10 +58,10 @@ public: /// getOffset - Return the offset to add to a theoeretical induction /// variable that starts at zero and counts up by the stride to compute /// the value for the use. This always has the same type as the stride. - const SCEV* getOffset() const { return Offset; } + const SCEV *getOffset() const { return Offset; } /// setOffset - Assign a new offset to this use. - void setOffset(const SCEV* Val) { + void setOffset(const SCEV *Val) { Offset = Val; } @@ -96,7 +96,7 @@ private: IVUsersOfOneStride *Parent; /// Offset - The offset to add to the base induction expression. - const SCEV* Offset; + const SCEV *Offset; /// OperandValToReplace - The Value of the operand in the user instruction /// that this IVStrideUse is representing. @@ -158,7 +158,7 @@ public: /// initial value and the operand that uses the IV. ilist Users; - void addUser(const SCEV* Offset, Instruction *User, Value *Operand) { + void addUser(const SCEV *Offset, Instruction *User, Value *Operand) { Users.push_back(new IVStrideUse(this, Offset, User, Operand)); } }; @@ -178,12 +178,12 @@ public: /// IVUsesByStride - A mapping from the strides in StrideOrder to the /// uses in IVUses. - std::map IVUsesByStride; + std::map IVUsesByStride; /// StrideOrder - An ordering of the keys in IVUsesByStride that is stable: /// We use this to iterate over the IVUsesByStride collection without being /// dependent on random ordering of pointers in the process. - SmallVector StrideOrder; + SmallVector StrideOrder; private: virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -203,13 +203,9 @@ public: /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. - const SCEV* getReplacementExpr(const IVStrideUse &U) const; + const SCEV *getReplacementExpr(const IVStrideUse &U) const; void print(raw_ostream &OS, const Module* = 0) const; - virtual void print(std::ostream &OS, const Module* = 0) const; - void print(std::ostream *OS, const Module* M = 0) const { - if (OS) print(*OS, M); - } /// dump - This method is used for debugging. void dump() const; diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h new file mode 100644 index 0000000000000..7ce49d7de332f --- /dev/null +++ b/include/llvm/Analysis/InlineCost.h @@ -0,0 +1,180 @@ +//===- InlineCost.cpp - Cost analysis for inliner ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements heuristics for inlining decisions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INLINECOST_H +#define LLVM_ANALYSIS_INLINECOST_H + +#include +#include +#include +#include + +namespace llvm { + + class Value; + class Function; + class BasicBlock; + class CallSite; + template + class SmallPtrSet; + + // CodeMetrics - Calculate size and a few similar metrics for a set of + // basic blocks. + struct CodeMetrics { + /// NeverInline - True if this callee should never be inlined into a + /// caller. + bool NeverInline; + + /// usesDynamicAlloca - True if this function calls alloca (in the C sense). + bool usesDynamicAlloca; + + /// NumInsts, NumBlocks - Keep track of how large each function is, which + /// is used to estimate the code size cost of inlining it. + unsigned NumInsts, NumBlocks; + + /// NumVectorInsts - Keep track of how many instructions produce vector + /// values. The inliner is being more aggressive with inlining vector + /// kernels. + unsigned NumVectorInsts; + + /// NumRets - Keep track of how many Ret instructions the block contains. + unsigned NumRets; + + CodeMetrics() : NeverInline(false), usesDynamicAlloca(false), NumInsts(0), + NumBlocks(0), NumVectorInsts(0), NumRets(0) {} + + /// analyzeBasicBlock - Add information about the specified basic block + /// to the current structure. + void analyzeBasicBlock(const BasicBlock *BB); + + /// analyzeFunction - Add information about the specified function + /// to the current structure. + void analyzeFunction(Function *F); + }; + + namespace InlineConstants { + // Various magic constants used to adjust heuristics. + const int CallPenalty = 5; + const int LastCallToStaticBonus = -15000; + const int ColdccPenalty = 2000; + const int NoreturnPenalty = 10000; + } + + /// InlineCost - Represent the cost of inlining a function. This + /// supports special values for functions which should "always" or + /// "never" be inlined. Otherwise, the cost represents a unitless + /// amount; smaller values increase the likelyhood of the function + /// being inlined. + class InlineCost { + enum Kind { + Value, + Always, + Never + }; + + // This is a do-it-yourself implementation of + // int Cost : 30; + // unsigned Type : 2; + // We used to use bitfields, but they were sometimes miscompiled (PR3822). + enum { TYPE_BITS = 2 }; + enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS }; + unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS; + + Kind getType() const { + return Kind(TypedCost >> COST_BITS); + } + + int getCost() const { + // Sign-extend the bottom COST_BITS bits. + return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS; + } + + InlineCost(int C, int T) { + TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS); + assert(getCost() == C && "Cost exceeds InlineCost precision"); + } + public: + static InlineCost get(int Cost) { return InlineCost(Cost, Value); } + static InlineCost getAlways() { return InlineCost(0, Always); } + static InlineCost getNever() { return InlineCost(0, Never); } + + bool isVariable() const { return getType() == Value; } + bool isAlways() const { return getType() == Always; } + bool isNever() const { return getType() == Never; } + + /// getValue() - Return a "variable" inline cost's amount. It is + /// an error to call this on an "always" or "never" InlineCost. + int getValue() const { + assert(getType() == Value && "Invalid access of InlineCost"); + return getCost(); + } + }; + + /// InlineCostAnalyzer - Cost analyzer used by inliner. + class InlineCostAnalyzer { + struct ArgInfo { + public: + unsigned ConstantWeight; + unsigned AllocaWeight; + + ArgInfo(unsigned CWeight, unsigned AWeight) + : ConstantWeight(CWeight), AllocaWeight(AWeight) {} + }; + + struct FunctionInfo { + CodeMetrics Metrics; + + /// ArgumentWeights - Each formal argument of the function is inspected to + /// see if it is used in any contexts where making it a constant or alloca + /// would reduce the code size. If so, we add some value to the argument + /// entry here. + std::vector ArgumentWeights; + + /// CountCodeReductionForConstant - Figure out an approximation for how + /// many instructions will be constant folded if the specified value is + /// constant. + unsigned CountCodeReductionForConstant(Value *V); + + /// CountCodeReductionForAlloca - Figure out an approximation of how much + /// smaller the function will be if it is inlined into a context where an + /// argument becomes an alloca. + /// + unsigned CountCodeReductionForAlloca(Value *V); + + /// analyzeFunction - Add information about the specified function + /// to the current structure. + void analyzeFunction(Function *F); + }; + + std::map CachedFunctionInfo; + + public: + + /// getInlineCost - The heuristic used to determine if we should inline the + /// function call or not. + /// + InlineCost getInlineCost(CallSite CS, + SmallPtrSet &NeverInline); + + /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a + /// higher threshold to determine if the function call should be inlined. + float getInlineFudgeFactor(CallSite CS); + + /// resetCachedFunctionInfo - erase any cached cost info for this function. + void resetCachedCostInfo(Function* Caller) { + CachedFunctionInfo[Caller] = FunctionInfo(); + } + }; +} + +#endif diff --git a/include/llvm/Analysis/Interval.h b/include/llvm/Analysis/Interval.h index 1da2022f69617..ca8ad73131a9d 100644 --- a/include/llvm/Analysis/Interval.h +++ b/include/llvm/Analysis/Interval.h @@ -22,11 +22,11 @@ #include "llvm/ADT/GraphTraits.h" #include -#include namespace llvm { class BasicBlock; +class raw_ostream; //===----------------------------------------------------------------------===// // @@ -98,8 +98,7 @@ public: bool isLoop() const; /// print - Show contents in human readable format... - void print(std::ostream &O) const; - void print(std::ostream *O) const { if (O) print(*O); } + void print(raw_ostream &O) const; }; /// succ_begin/succ_end - define methods so that Intervals may be used diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h index 551bb72437984..d842840b66b5d 100644 --- a/include/llvm/Analysis/IntervalIterator.h +++ b/include/llvm/Analysis/IntervalIterator.h @@ -233,7 +233,8 @@ private: }; typedef IntervalIterator function_interval_iterator; -typedef IntervalIterator interval_part_interval_iterator; +typedef IntervalIterator + interval_part_interval_iterator; inline function_interval_iterator intervals_begin(Function *F, diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h index feae6d82f82f5..c1214e7427a4a 100644 --- a/include/llvm/Analysis/IntervalPartition.h +++ b/include/llvm/Analysis/IntervalPartition.h @@ -60,10 +60,7 @@ public: IntervalPartition(IntervalPartition &I, bool); // print - Show contents in human readable format... - virtual void print(std::ostream &O, const Module* = 0) const; - void print(std::ostream *O, const Module* M = 0) const { - if (O) print(*O, M); - } + virtual void print(raw_ostream &O, const Module* = 0) const; // getRootInterval() - Return the root interval that contains the starting // block of the function. diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h index ea17a237caaa8..7944af3b8a5a0 100644 --- a/include/llvm/Analysis/LibCallAliasAnalysis.h +++ b/include/llvm/Analysis/LibCallAliasAnalysis.h @@ -22,7 +22,7 @@ namespace llvm { struct LibCallFunctionInfo; /// LibCallAliasAnalysis - Alias analysis driven from LibCallInfo. - struct LibCallAliasAnalysis : public FunctionPass, AliasAnalysis { + struct LibCallAliasAnalysis : public FunctionPass, public AliasAnalysis { static char ID; // Class identification LibCallInfo *LCI; diff --git a/include/llvm/Analysis/LoopDependenceAnalysis.h b/include/llvm/Analysis/LoopDependenceAnalysis.h index 67da2e7fbc1b1..1d386ba88804f 100644 --- a/include/llvm/Analysis/LoopDependenceAnalysis.h +++ b/include/llvm/Analysis/LoopDependenceAnalysis.h @@ -20,43 +20,102 @@ #ifndef LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H #define LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/raw_ostream.h" -#include +#include "llvm/Support/Allocator.h" namespace llvm { - class AliasAnalysis; - class AnalysisUsage; - class ScalarEvolution; - class Value; +class AliasAnalysis; +class AnalysisUsage; +class ScalarEvolution; +class SCEV; +class Value; +class raw_ostream; - class LoopDependenceAnalysis : public LoopPass { - Loop *L; - AliasAnalysis *AA; - ScalarEvolution *SE; +class LoopDependenceAnalysis : public LoopPass { + AliasAnalysis *AA; + ScalarEvolution *SE; - public: - static char ID; // Class identification, replacement for typeinfo - LoopDependenceAnalysis() : LoopPass(&ID) {} + /// L - The loop we are currently analysing. + Loop *L; - /// TODO: docs - bool isDependencePair(const Value*, const Value*) const; - bool depends(Value*, Value*); + /// TODO: doc + enum DependenceResult { Independent = 0, Dependent = 1, Unknown = 2 }; - bool runOnLoop(Loop*, LPPassManager&); + /// TODO: doc + struct Subscript { + /// TODO: Add distance, direction, breaking conditions, ... + }; - virtual void getAnalysisUsage(AnalysisUsage&) const; + /// DependencePair - Represents a data dependence relation between to memory + /// reference instructions. + struct DependencePair : public FastFoldingSetNode { + Value *A; + Value *B; + DependenceResult Result; + SmallVector Subscripts; - void print(raw_ostream&, const Module* = 0) const; - virtual void print(std::ostream&, const Module* = 0) const; - }; // class LoopDependenceAnalysis + DependencePair(const FoldingSetNodeID &ID, Value *a, Value *b) : + FastFoldingSetNode(ID), A(a), B(b), Result(Unknown), Subscripts() {} + }; + /// findOrInsertDependencePair - Return true if a DependencePair for the + /// given Values already exists, false if a new DependencePair had to be + /// created. The third argument is set to the pair found or created. + bool findOrInsertDependencePair(Value*, Value*, DependencePair*&); - // createLoopDependenceAnalysisPass - This creates an instance of the - // LoopDependenceAnalysis pass. - // - LoopPass *createLoopDependenceAnalysisPass(); + /// getLoops - Collect all loops of the loop-nest L a given SCEV is variant + /// in. + void getLoops(const SCEV*, DenseSet*) const; + + /// isLoopInvariant - True if a given SCEV is invariant in all loops of the + /// loop-nest starting at the innermost loop L. + bool isLoopInvariant(const SCEV*) const; + + /// isAffine - An SCEV is affine with respect to the loop-nest starting at + /// the innermost loop L if it is of the form A+B*X where A, B are invariant + /// in the loop-nest and X is a induction variable in the loop-nest. + bool isAffine(const SCEV*) const; + + /// TODO: doc + bool isZIVPair(const SCEV*, const SCEV*) const; + bool isSIVPair(const SCEV*, const SCEV*) const; + DependenceResult analyseZIV(const SCEV*, const SCEV*, Subscript*) const; + DependenceResult analyseSIV(const SCEV*, const SCEV*, Subscript*) const; + DependenceResult analyseMIV(const SCEV*, const SCEV*, Subscript*) const; + DependenceResult analyseSubscript(const SCEV*, const SCEV*, Subscript*) const; + DependenceResult analysePair(DependencePair*) const; + +public: + static char ID; // Class identification, replacement for typeinfo + LoopDependenceAnalysis() : LoopPass(&ID) {} + + /// isDependencePair - Check wether two values can possibly give rise to a + /// data dependence: that is the case if both are instructions accessing + /// memory and at least one of those accesses is a write. + bool isDependencePair(const Value*, const Value*) const; + + /// depends - Return a boolean indicating if there is a data dependence + /// between two instructions. + bool depends(Value*, Value*); + + bool runOnLoop(Loop*, LPPassManager&); + virtual void releaseMemory(); + virtual void getAnalysisUsage(AnalysisUsage&) const; + void print(raw_ostream&, const Module* = 0) const; + +private: + FoldingSet Pairs; + BumpPtrAllocator PairAllocator; +}; // class LoopDependenceAnalysis + +// createLoopDependenceAnalysisPass - This creates an instance of the +// LoopDependenceAnalysis pass. +// +LoopPass *createLoopDependenceAnalysisPass(); } // namespace llvm diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 8b293cb7b9865..763111063d01b 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -8,7 +8,8 @@ //===----------------------------------------------------------------------===// // // This file defines the LoopInfo class that is used to identify natural loops -// and determine the loop depth of various nodes of the CFG. Note that natural +// and determine the loop depth of various nodes of the CFG. A natural loop +// has exactly one entry-point, which is called the header. Note that natural // loops may actually be several loops that share the same header node. // // This analysis calculates the nesting structure of loops in a function. For @@ -31,17 +32,13 @@ #define LLVM_ANALYSIS_LOOP_INFO_H #include "llvm/Pass.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include -#include namespace llvm { @@ -54,26 +51,27 @@ static void RemoveFromVector(std::vector &V, T *N) { class DominatorTree; class LoopInfo; -template class LoopInfoBase; -template class LoopBase; - -typedef LoopBase Loop; +class Loop; +template class LoopInfoBase; +template class LoopBase; //===----------------------------------------------------------------------===// /// LoopBase class - Instances of this class are used to represent loops that /// are detected in the flow graph /// -template +template class LoopBase { - LoopBase *ParentLoop; + LoopT *ParentLoop; // SubLoops - Loops contained entirely within this one. - std::vector*> SubLoops; + std::vector SubLoops; // Blocks - The list of blocks in this loop. First entry is the header node. std::vector Blocks; - LoopBase(const LoopBase &); // DO NOT IMPLEMENT - const LoopBase&operator=(const LoopBase &);// DO NOT IMPLEMENT + // DO NOT IMPLEMENT + LoopBase(const LoopBase &); + // DO NOT IMPLEMENT + const LoopBase&operator=(const LoopBase &); public: /// Loop ctor - This creates an empty loop. LoopBase() : ParentLoop(0) {} @@ -87,13 +85,13 @@ public: /// blocks, where depth 0 is used for blocks not inside any loops. unsigned getLoopDepth() const { unsigned D = 1; - for (const LoopBase *CurLoop = ParentLoop; CurLoop; + for (const LoopT *CurLoop = ParentLoop; CurLoop; CurLoop = CurLoop->ParentLoop) ++D; return D; } BlockT *getHeader() const { return Blocks.front(); } - LoopBase *getParentLoop() const { return ParentLoop; } + LoopT *getParentLoop() const { return ParentLoop; } /// contains - Return true if the specified basic block is in this loop /// @@ -103,8 +101,8 @@ public: /// iterator/begin/end - Return the loops contained entirely within this loop. /// - const std::vector*> &getSubLoops() const { return SubLoops; } - typedef typename std::vector*>::const_iterator iterator; + const std::vector &getSubLoops() const { return SubLoops; } + typedef typename std::vector::const_iterator iterator; iterator begin() const { return SubLoops.begin(); } iterator end() const { return SubLoops.end(); } bool empty() const { return SubLoops.empty(); } @@ -146,14 +144,6 @@ public: return NumBackEdges; } - /// isLoopInvariant - Return true if the specified value is loop invariant - /// - inline bool isLoopInvariant(Value *V) const { - if (Instruction *I = dyn_cast(V)) - return !contains(I->getParent()); - return true; // All non-instructions are loop invariant - } - //===--------------------------------------------------------------------===// // APIs for simple analysis of the loop. // @@ -223,72 +213,22 @@ public: return 0; } - /// getUniqueExitBlocks - Return all unique successor blocks of this loop. - /// These are the blocks _outside of the current loop_ which are branched to. - /// This assumes that loop is in canonical form. - /// - void getUniqueExitBlocks(SmallVectorImpl &ExitBlocks) const { + /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_). + typedef std::pair Edge; + void getExitEdges(SmallVectorImpl &ExitEdges) const { // Sort the blocks vector so that we can use binary search to do quick // lookups. SmallVector LoopBBs(block_begin(), block_end()); std::sort(LoopBBs.begin(), LoopBBs.end()); - std::vector switchExitBlocks; - - for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { - - BlockT *current = *BI; - switchExitBlocks.clear(); - - typedef GraphTraits BlockTraits; - typedef GraphTraits > InvBlockTraits; + typedef GraphTraits BlockTraits; + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) for (typename BlockTraits::ChildIteratorType I = BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); - I != E; ++I) { - if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) - // If block is inside the loop then it is not a exit block. - continue; - - typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(*I); - BlockT *firstPred = *PI; - - // If current basic block is this exit block's first predecessor - // then only insert exit block in to the output ExitBlocks vector. - // This ensures that same exit block is not inserted twice into - // ExitBlocks vector. - if (current != firstPred) - continue; - - // If a terminator has more then two successors, for example SwitchInst, - // then it is possible that there are multiple edges from current block - // to one exit block. - if (std::distance(BlockTraits::child_begin(current), - BlockTraits::child_end(current)) <= 2) { - ExitBlocks.push_back(*I); - continue; - } - - // In case of multiple edges from current block to exit block, collect - // only one edge in ExitBlocks. Use switchExitBlocks to keep track of - // duplicate edges. - if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I) - == switchExitBlocks.end()) { - switchExitBlocks.push_back(*I); - ExitBlocks.push_back(*I); - } - } - } - } - - /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one - /// block, return that block. Otherwise return null. - BlockT *getUniqueExitBlock() const { - SmallVector UniqueExitBlocks; - getUniqueExitBlocks(UniqueExitBlocks); - if (UniqueExitBlocks.size() == 1) - return UniqueExitBlocks[0]; - return 0; + I != E; ++I) + if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + // Not in current loop? It must be an exit block. + ExitEdges.push_back(std::make_pair(*BI, *I)); } /// getLoopPreheader - If there is a preheader for this loop, return it. A @@ -355,178 +295,6 @@ public: return Latch; } - - /// getCanonicalInductionVariable - Check to see if the loop has a canonical - /// induction variable: an integer recurrence that starts at 0 and increments - /// by one each time through the loop. If so, return the phi node that - /// corresponds to it. - /// - /// The IndVarSimplify pass transforms loops to have a canonical induction - /// variable. - /// - inline PHINode *getCanonicalInductionVariable() const { - BlockT *H = getHeader(); - - BlockT *Incoming = 0, *Backedge = 0; - typedef GraphTraits > InvBlockTraits; - typename InvBlockTraits::ChildIteratorType PI = - InvBlockTraits::child_begin(H); - assert(PI != InvBlockTraits::child_end(H) && - "Loop must have at least one backedge!"); - Backedge = *PI++; - if (PI == InvBlockTraits::child_end(H)) return 0; // dead loop - Incoming = *PI++; - if (PI != InvBlockTraits::child_end(H)) return 0; // multiple backedges? - - if (contains(Incoming)) { - if (contains(Backedge)) - return 0; - std::swap(Incoming, Backedge); - } else if (!contains(Backedge)) - return 0; - - // Loop over all of the PHI nodes, looking for a canonical indvar. - for (typename BlockT::iterator I = H->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - if (ConstantInt *CI = - dyn_cast(PN->getIncomingValueForBlock(Incoming))) - if (CI->isNullValue()) - if (Instruction *Inc = - dyn_cast(PN->getIncomingValueForBlock(Backedge))) - if (Inc->getOpcode() == Instruction::Add && - Inc->getOperand(0) == PN) - if (ConstantInt *CI = dyn_cast(Inc->getOperand(1))) - if (CI->equalsInt(1)) - return PN; - } - return 0; - } - - /// getCanonicalInductionVariableIncrement - Return the LLVM value that holds - /// the canonical induction variable value for the "next" iteration of the - /// loop. This always succeeds if getCanonicalInductionVariable succeeds. - /// - inline Instruction *getCanonicalInductionVariableIncrement() const { - if (PHINode *PN = getCanonicalInductionVariable()) { - bool P1InLoop = contains(PN->getIncomingBlock(1)); - return cast(PN->getIncomingValue(P1InLoop)); - } - return 0; - } - - /// getTripCount - Return a loop-invariant LLVM value indicating the number of - /// times the loop will be executed. Note that this means that the backedge - /// of the loop executes N-1 times. If the trip-count cannot be determined, - /// this returns null. - /// - /// The IndVarSimplify pass transforms loops to have a form that this - /// function easily understands. - /// - inline Value *getTripCount() const { - // Canonical loops will end with a 'cmp ne I, V', where I is the incremented - // canonical induction variable and V is the trip count of the loop. - Instruction *Inc = getCanonicalInductionVariableIncrement(); - if (Inc == 0) return 0; - PHINode *IV = cast(Inc->getOperand(0)); - - BlockT *BackedgeBlock = - IV->getIncomingBlock(contains(IV->getIncomingBlock(1))); - - if (BranchInst *BI = dyn_cast(BackedgeBlock->getTerminator())) - if (BI->isConditional()) { - if (ICmpInst *ICI = dyn_cast(BI->getCondition())) { - if (ICI->getOperand(0) == Inc) { - if (BI->getSuccessor(0) == getHeader()) { - if (ICI->getPredicate() == ICmpInst::ICMP_NE) - return ICI->getOperand(1); - } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { - return ICI->getOperand(1); - } - } - } - } - - return 0; - } - - /// getSmallConstantTripCount - Returns the trip count of this loop as a - /// normal unsigned value, if possible. Returns 0 if the trip count is unknown - /// of not constant. Will also return 0 if the trip count is very large - /// (>= 2^32) - inline unsigned getSmallConstantTripCount() const { - Value* TripCount = this->getTripCount(); - if (TripCount) { - if (ConstantInt *TripCountC = dyn_cast(TripCount)) { - // Guard against huge trip counts. - if (TripCountC->getValue().getActiveBits() <= 32) { - return (unsigned)TripCountC->getZExtValue(); - } - } - } - return 0; - } - - /// getSmallConstantTripMultiple - Returns the largest constant divisor of the - /// trip count of this loop as a normal unsigned value, if possible. This - /// means that the actual trip count is always a multiple of the returned - /// value (don't forget the trip count could very well be zero as well!). - /// - /// Returns 1 if the trip count is unknown or not guaranteed to be the - /// multiple of a constant (which is also the case if the trip count is simply - /// constant, use getSmallConstantTripCount for that case), Will also return 1 - /// if the trip count is very large (>= 2^32). - inline unsigned getSmallConstantTripMultiple() const { - Value* TripCount = this->getTripCount(); - // This will hold the ConstantInt result, if any - ConstantInt *Result = NULL; - if (TripCount) { - // See if the trip count is constant itself - Result = dyn_cast(TripCount); - // if not, see if it is a multiplication - if (!Result) - if (BinaryOperator *BO = dyn_cast(TripCount)) { - switch (BO->getOpcode()) { - case BinaryOperator::Mul: - Result = dyn_cast(BO->getOperand(1)); - break; - default: - break; - } - } - } - // Guard against huge trip counts. - if (Result && Result->getValue().getActiveBits() <= 32) { - return (unsigned)Result->getZExtValue(); - } else { - return 1; - } - } - - /// isLCSSAForm - Return true if the Loop is in LCSSA form - inline bool isLCSSAForm() const { - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallPtrSet LoopBBs(block_begin(), block_end()); - - for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { - BlockT *BB = *BI; - for (typename BlockT::iterator I = BB->begin(), E = BB->end(); I != E;++I) - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; - ++UI) { - BlockT *UserBB = cast(*UI)->getParent(); - if (PHINode *P = dyn_cast(*UI)) { - UserBB = P->getIncomingBlock(UI); - } - - // Check the current block, as a fast-path. Most values are used in - // the same block they are defined in. - if (UserBB != BB && !LoopBBs.count(UserBB)) - return false; - } - } - - return true; - } //===--------------------------------------------------------------------===// // APIs for updating loop information after changing the CFG @@ -538,39 +306,39 @@ public: /// to the specified LoopInfo object as being in the current basic block. It /// is not valid to replace the loop header with this method. /// - void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase &LI); + void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase &LI); /// replaceChildLoopWith - This is used when splitting loops up. It replaces /// the OldChild entry in our children list with NewChild, and updates the /// parent pointer of OldChild to be null and the NewChild to be this loop. /// This updates the loop depth of the new child. - void replaceChildLoopWith(LoopBase *OldChild, - LoopBase *NewChild) { + void replaceChildLoopWith(LoopT *OldChild, + LoopT *NewChild) { assert(OldChild->ParentLoop == this && "This loop is already broken!"); assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!"); - typename std::vector*>::iterator I = + typename std::vector::iterator I = std::find(SubLoops.begin(), SubLoops.end(), OldChild); assert(I != SubLoops.end() && "OldChild not in loop!"); *I = NewChild; OldChild->ParentLoop = 0; - NewChild->ParentLoop = this; + NewChild->ParentLoop = static_cast(this); } /// addChildLoop - Add the specified loop to be a child of this loop. This /// updates the loop depth of the new child. /// - void addChildLoop(LoopBase *NewChild) { + void addChildLoop(LoopT *NewChild) { assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!"); - NewChild->ParentLoop = this; + NewChild->ParentLoop = static_cast(this); SubLoops.push_back(NewChild); } /// removeChildLoop - This removes the specified child from being a subloop of /// this loop. The loop is not deleted, as it will presumably be inserted /// into another loop. - LoopBase *removeChildLoop(iterator I) { + LoopT *removeChildLoop(iterator I) { assert(I != SubLoops.end() && "Cannot remove end iterator!"); - LoopBase *Child = *I; + LoopT *Child = *I; assert(Child->ParentLoop == this && "Child is not a child of this loop!"); SubLoops.erase(SubLoops.begin()+(I-begin())); Child->ParentLoop = 0; @@ -609,16 +377,86 @@ public: /// verifyLoop - Verify loop structure void verifyLoop() const { #ifndef NDEBUG - assert (getHeader() && "Loop header is missing"); - assert (getLoopPreheader() && "Loop preheader is missing"); - assert (getLoopLatch() && "Loop latch is missing"); - for (iterator I = SubLoops.begin(), E = SubLoops.end(); I != E; ++I) - (*I)->verifyLoop(); + assert(!Blocks.empty() && "Loop header is missing"); + + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector LoopBBs(block_begin(), block_end()); + std::sort(LoopBBs.begin(), LoopBBs.end()); + + // Check the individual blocks. + for (block_iterator I = block_begin(), E = block_end(); I != E; ++I) { + BlockT *BB = *I; + bool HasInsideLoopSuccs = false; + bool HasInsideLoopPreds = false; + SmallVector OutsideLoopPreds; + + typedef GraphTraits BlockTraits; + for (typename BlockTraits::ChildIteratorType SI = + BlockTraits::child_begin(BB), SE = BlockTraits::child_end(BB); + SI != SE; ++SI) + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *SI)) { + HasInsideLoopSuccs = true; + break; + } + typedef GraphTraits > InvBlockTraits; + for (typename InvBlockTraits::ChildIteratorType PI = + InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB); + PI != PE; ++PI) { + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *PI)) + HasInsideLoopPreds = true; + else + OutsideLoopPreds.push_back(*PI); + } + + if (BB == getHeader()) { + assert(!OutsideLoopPreds.empty() && "Loop is unreachable!"); + } else if (!OutsideLoopPreds.empty()) { + // A non-header loop shouldn't be reachable from outside the loop, + // though it is permitted if the predecessor is not itself actually + // reachable. + BlockT *EntryBB = BB->getParent()->begin(); + for (df_iterator NI = df_begin(EntryBB), + NE = df_end(EntryBB); NI != NE; ++NI) + for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i) + assert(*NI != OutsideLoopPreds[i] && + "Loop has multiple entry points!"); + } + assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!"); + assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!"); + assert(BB != getHeader()->getParent()->begin() && + "Loop contains function entry block!"); + } + + // Check the subloops. + for (iterator I = begin(), E = end(); I != E; ++I) + // Each block in each subloop should be contained within this loop. + for (block_iterator BI = (*I)->block_begin(), BE = (*I)->block_end(); + BI != BE; ++BI) { + assert(std::binary_search(LoopBBs.begin(), LoopBBs.end(), *BI) && + "Loop does not contain all the blocks of a subloop!"); + } + + // Check the parent loop pointer. + if (ParentLoop) { + assert(std::find(ParentLoop->begin(), ParentLoop->end(), this) != + ParentLoop->end() && + "Loop is not a subloop of its parent!"); + } #endif } - void print(std::ostream &OS, unsigned Depth = 0) const { - OS << std::string(Depth*2, ' ') << "Loop at depth " << getLoopDepth() + /// verifyLoop - Verify loop structure of this loop and all nested loops. + void verifyLoopNest() const { + // Verify this loop. + verifyLoop(); + // Verify the subloops. + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->verifyLoopNest(); + } + + void print(raw_ostream &OS, unsigned Depth = 0) const { + OS.indent(Depth*2) << "Loop at depth " << getLoopDepth() << " containing: "; for (unsigned i = 0; i < getBlocks().size(); ++i) { @@ -635,33 +473,131 @@ public: (*I)->print(OS, Depth+2); } - void print(std::ostream *O, unsigned Depth = 0) const { - if (O) print(*O, Depth); - } - void dump() const { - print(cerr); + print(errs()); } -private: - friend class LoopInfoBase; +protected: + friend class LoopInfoBase; explicit LoopBase(BlockT *BB) : ParentLoop(0) { Blocks.push_back(BB); } }; +class Loop : public LoopBase { +public: + Loop() {} + + /// isLoopInvariant - Return true if the specified value is loop invariant + /// + bool isLoopInvariant(Value *V) const; + + /// isLoopInvariant - Return true if the specified instruction is + /// loop-invariant. + /// + bool isLoopInvariant(Instruction *I) const; + + /// makeLoopInvariant - If the given value is an instruction inside of the + /// loop and it can be hoisted, do so to make it trivially loop-invariant. + /// Return true if the value after any hoisting is loop invariant. This + /// function can be used as a slightly more aggressive replacement for + /// isLoopInvariant. + /// + /// If InsertPt is specified, it is the point to hoist instructions to. + /// If null, the terminator of the loop preheader is used. + /// + bool makeLoopInvariant(Value *V, bool &Changed, + Instruction *InsertPt = 0) const; + + /// makeLoopInvariant - If the given instruction is inside of the + /// loop and it can be hoisted, do so to make it trivially loop-invariant. + /// Return true if the instruction after any hoisting is loop invariant. This + /// function can be used as a slightly more aggressive replacement for + /// isLoopInvariant. + /// + /// If InsertPt is specified, it is the point to hoist instructions to. + /// If null, the terminator of the loop preheader is used. + /// + bool makeLoopInvariant(Instruction *I, bool &Changed, + Instruction *InsertPt = 0) const; + + /// getCanonicalInductionVariable - Check to see if the loop has a canonical + /// induction variable: an integer recurrence that starts at 0 and increments + /// by one each time through the loop. If so, return the phi node that + /// corresponds to it. + /// + /// The IndVarSimplify pass transforms loops to have a canonical induction + /// variable. + /// + PHINode *getCanonicalInductionVariable() const; + + /// getCanonicalInductionVariableIncrement - Return the LLVM value that holds + /// the canonical induction variable value for the "next" iteration of the + /// loop. This always succeeds if getCanonicalInductionVariable succeeds. + /// + Instruction *getCanonicalInductionVariableIncrement() const; + + /// getTripCount - Return a loop-invariant LLVM value indicating the number of + /// times the loop will be executed. Note that this means that the backedge + /// of the loop executes N-1 times. If the trip-count cannot be determined, + /// this returns null. + /// + /// The IndVarSimplify pass transforms loops to have a form that this + /// function easily understands. + /// + Value *getTripCount() const; + + /// getSmallConstantTripCount - Returns the trip count of this loop as a + /// normal unsigned value, if possible. Returns 0 if the trip count is unknown + /// of not constant. Will also return 0 if the trip count is very large + /// (>= 2^32) + unsigned getSmallConstantTripCount() const; + + /// getSmallConstantTripMultiple - Returns the largest constant divisor of the + /// trip count of this loop as a normal unsigned value, if possible. This + /// means that the actual trip count is always a multiple of the returned + /// value (don't forget the trip count could very well be zero as well!). + /// + /// Returns 1 if the trip count is unknown or not guaranteed to be the + /// multiple of a constant (which is also the case if the trip count is simply + /// constant, use getSmallConstantTripCount for that case), Will also return 1 + /// if the trip count is very large (>= 2^32). + unsigned getSmallConstantTripMultiple() const; + + /// isLCSSAForm - Return true if the Loop is in LCSSA form + bool isLCSSAForm() const; + + /// isLoopSimplifyForm - Return true if the Loop is in the form that + /// the LoopSimplify form transforms loops to, which is sometimes called + /// normal form. + bool isLoopSimplifyForm() const; + + /// getUniqueExitBlocks - Return all unique successor blocks of this loop. + /// These are the blocks _outside of the current loop_ which are branched to. + /// This assumes that loop is in canonical form. + /// + void getUniqueExitBlocks(SmallVectorImpl &ExitBlocks) const; + + /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one + /// block, return that block. Otherwise return null. + BasicBlock *getUniqueExitBlock() const; + +private: + friend class LoopInfoBase; + explicit Loop(BasicBlock *BB) : LoopBase(BB) {} +}; //===----------------------------------------------------------------------===// /// LoopInfo - This class builds and contains all of the top level loop /// structures in the specified function. /// -template +template class LoopInfoBase { // BBMap - Mapping of basic blocks to the inner most loop they occur in - std::map*> BBMap; - std::vector*> TopLevelLoops; - friend class LoopBase; + std::map BBMap; + std::vector TopLevelLoops; + friend class LoopBase; void operator=(const LoopInfoBase &); // do not implement LoopInfoBase(const LoopInfo &); // do not implement @@ -670,7 +606,7 @@ public: ~LoopInfoBase() { releaseMemory(); } void releaseMemory() { - for (typename std::vector* >::iterator I = + for (typename std::vector::iterator I = TopLevelLoops.begin(), E = TopLevelLoops.end(); I != E; ++I) delete *I; // Delete all of the loops... @@ -681,7 +617,7 @@ public: /// iterator/begin/end - The interface to the top-level loops in the current /// function. /// - typedef typename std::vector*>::const_iterator iterator; + typedef typename std::vector::const_iterator iterator; iterator begin() const { return TopLevelLoops.begin(); } iterator end() const { return TopLevelLoops.end(); } bool empty() const { return TopLevelLoops.empty(); } @@ -689,15 +625,15 @@ public: /// getLoopFor - Return the inner most loop that BB lives in. If a basic /// block is in no loop (for example the entry node), null is returned. /// - LoopBase *getLoopFor(const BlockT *BB) const { - typename std::map*>::const_iterator I= + LoopT *getLoopFor(const BlockT *BB) const { + typename std::map::const_iterator I= BBMap.find(const_cast(BB)); return I != BBMap.end() ? I->second : 0; } /// operator[] - same as getLoopFor... /// - const LoopBase *operator[](const BlockT *BB) const { + const LoopT *operator[](const BlockT *BB) const { return getLoopFor(BB); } @@ -705,22 +641,22 @@ public: /// depth of 0 means the block is not inside any loop. /// unsigned getLoopDepth(const BlockT *BB) const { - const LoopBase *L = getLoopFor(BB); + const LoopT *L = getLoopFor(BB); return L ? L->getLoopDepth() : 0; } // isLoopHeader - True if the block is a loop header node bool isLoopHeader(BlockT *BB) const { - const LoopBase *L = getLoopFor(BB); + const LoopT *L = getLoopFor(BB); return L && L->getHeader() == BB; } /// removeLoop - This removes the specified top-level loop from this loop info /// object. The loop is not deleted, as it will presumably be inserted into /// another loop. - LoopBase *removeLoop(iterator I) { + LoopT *removeLoop(iterator I) { assert(I != end() && "Cannot remove end iterator!"); - LoopBase *L = *I; + LoopT *L = *I; assert(L->getParentLoop() == 0 && "Not a top-level loop!"); TopLevelLoops.erase(TopLevelLoops.begin() + (I-begin())); return L; @@ -729,17 +665,17 @@ public: /// changeLoopFor - Change the top-level loop that contains BB to the /// specified loop. This should be used by transformations that restructure /// the loop hierarchy tree. - void changeLoopFor(BlockT *BB, LoopBase *L) { - LoopBase *&OldLoop = BBMap[BB]; + void changeLoopFor(BlockT *BB, LoopT *L) { + LoopT *&OldLoop = BBMap[BB]; assert(OldLoop && "Block not in a loop yet!"); OldLoop = L; } /// changeTopLevelLoop - Replace the specified loop in the top-level loops /// list with the indicated loop. - void changeTopLevelLoop(LoopBase *OldLoop, - LoopBase *NewLoop) { - typename std::vector*>::iterator I = + void changeTopLevelLoop(LoopT *OldLoop, + LoopT *NewLoop) { + typename std::vector::iterator I = std::find(TopLevelLoops.begin(), TopLevelLoops.end(), OldLoop); assert(I != TopLevelLoops.end() && "Old loop not at top level!"); *I = NewLoop; @@ -749,7 +685,7 @@ public: /// addTopLevelLoop - This adds the specified loop to the collection of /// top-level loops. - void addTopLevelLoop(LoopBase *New) { + void addTopLevelLoop(LoopT *New) { assert(New->getParentLoop() == 0 && "Loop already in subloop!"); TopLevelLoops.push_back(New); } @@ -758,9 +694,9 @@ public: /// including all of the Loop objects it is nested in and our mapping from /// BasicBlocks to loops. void removeBlock(BlockT *BB) { - typename std::map*>::iterator I = BBMap.find(BB); + typename std::map::iterator I = BBMap.find(BB); if (I != BBMap.end()) { - for (LoopBase *L = I->second; L; L = L->getParentLoop()) + for (LoopT *L = I->second; L; L = L->getParentLoop()) L->removeBlockFromLoop(BB); BBMap.erase(I); @@ -769,8 +705,8 @@ public: // Internals - static bool isNotAlreadyContainedIn(const LoopBase *SubLoop, - const LoopBase *ParentLoop) { + static bool isNotAlreadyContainedIn(const LoopT *SubLoop, + const LoopT *ParentLoop) { if (SubLoop == 0) return true; if (SubLoop == ParentLoop) return false; return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop); @@ -781,11 +717,11 @@ public: for (df_iterator NI = df_begin(RootNode), NE = df_end(RootNode); NI != NE; ++NI) - if (LoopBase *L = ConsiderForLoop(*NI, DT)) + if (LoopT *L = ConsiderForLoop(*NI, DT)) TopLevelLoops.push_back(L); } - LoopBase *ConsiderForLoop(BlockT *BB, DominatorTreeBase &DT) { + LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase &DT) { if (BBMap.find(BB) != BBMap.end()) return 0;// Haven't processed this node? std::vector TodoStack; @@ -796,13 +732,13 @@ public: for (typename InvBlockTraits::ChildIteratorType I = InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB); I != E; ++I) - if (DT.dominates(BB, *I)) // If BB dominates it's predecessor... + if (DT.dominates(BB, *I)) // If BB dominates its predecessor... TodoStack.push_back(*I); if (TodoStack.empty()) return 0; // No backedges to this block... // Create a new loop to represent this basic block... - LoopBase *L = new LoopBase(BB); + LoopT *L = new LoopT(BB); BBMap[BB] = L; BlockT *EntryBlock = BB->getParent()->begin(); @@ -819,13 +755,13 @@ public: // occurs, this child loop gets added to a part of the current loop, // making it a sibling to the current loop. We have to reparent this // loop. - if (LoopBase *SubLoop = - const_cast*>(getLoopFor(X))) + if (LoopT *SubLoop = + const_cast(getLoopFor(X))) if (SubLoop->getHeader() == X && isNotAlreadyContainedIn(SubLoop, L)){ - // Remove the subloop from it's current parent... + // Remove the subloop from its current parent... assert(SubLoop->ParentLoop && SubLoop->ParentLoop != L); - LoopBase *SLP = SubLoop->ParentLoop; // SubLoopParent - typename std::vector*>::iterator I = + LoopT *SLP = SubLoop->ParentLoop; // SubLoopParent + typename std::vector::iterator I = std::find(SLP->SubLoops.begin(), SLP->SubLoops.end(), SubLoop); assert(I != SLP->SubLoops.end() &&"SubLoop not a child of parent?"); SLP->SubLoops.erase(I); // Remove from parent... @@ -849,7 +785,7 @@ public: // If there are any loops nested within this loop, create them now! for (typename std::vector::iterator I = L->Blocks.begin(), E = L->Blocks.end(); I != E; ++I) - if (LoopBase *NewLoop = ConsiderForLoop(*I, DT)) { + if (LoopT *NewLoop = ConsiderForLoop(*I, DT)) { L->SubLoops.push_back(NewLoop); NewLoop->ParentLoop = L; } @@ -858,25 +794,20 @@ public: // loop can be found for them. // for (typename std::vector::iterator I = L->Blocks.begin(), - E = L->Blocks.end(); I != E; ++I) { - typename std::map*>::iterator BBMI = - BBMap.find(*I); - if (BBMI == BBMap.end()) // Not in map yet... - BBMap.insert(BBMI, std::make_pair(*I, L)); // Must be at this level - } + E = L->Blocks.end(); I != E; ++I) + BBMap.insert(std::make_pair(*I, L)); // Now that we have a list of all of the child loops of this loop, check to // see if any of them should actually be nested inside of each other. We // can accidentally pull loops our of their parents, so we must make sure to // organize the loop nests correctly now. { - std::map*> ContainingLoops; + std::map ContainingLoops; for (unsigned i = 0; i != L->SubLoops.size(); ++i) { - LoopBase *Child = L->SubLoops[i]; + LoopT *Child = L->SubLoops[i]; assert(Child->getParentLoop() == L && "Not proper child loop?"); - if (LoopBase *ContainingLoop = - ContainingLoops[Child->getHeader()]) { + if (LoopT *ContainingLoop = ContainingLoops[Child->getHeader()]) { // If there is already a loop which contains this loop, move this loop // into the containing loop. MoveSiblingLoopInto(Child, ContainingLoop); @@ -886,11 +817,11 @@ public: // if any of the contained blocks are loop headers for subloops we // have already processed. for (unsigned b = 0, e = Child->Blocks.size(); b != e; ++b) { - LoopBase *&BlockLoop = ContainingLoops[Child->Blocks[b]]; + LoopT *&BlockLoop = ContainingLoops[Child->Blocks[b]]; if (BlockLoop == 0) { // Child block not processed yet... BlockLoop = Child; } else if (BlockLoop != Child) { - LoopBase *SubLoop = BlockLoop; + LoopT *SubLoop = BlockLoop; // Reparent all of the blocks which used to belong to BlockLoops for (unsigned j = 0, e = SubLoop->Blocks.size(); j != e; ++j) ContainingLoops[SubLoop->Blocks[j]] = Child; @@ -911,14 +842,14 @@ public: /// MoveSiblingLoopInto - This method moves the NewChild loop to live inside /// of the NewParent Loop, instead of being a sibling of it. - void MoveSiblingLoopInto(LoopBase *NewChild, - LoopBase *NewParent) { - LoopBase *OldParent = NewChild->getParentLoop(); + void MoveSiblingLoopInto(LoopT *NewChild, + LoopT *NewParent) { + LoopT *OldParent = NewChild->getParentLoop(); assert(OldParent && OldParent == NewParent->getParentLoop() && NewChild != NewParent && "Not sibling loops!"); // Remove NewChild from being a child of OldParent - typename std::vector*>::iterator I = + typename std::vector::iterator I = std::find(OldParent->SubLoops.begin(), OldParent->SubLoops.end(), NewChild); assert(I != OldParent->SubLoops.end() && "Parent fields incorrect??"); @@ -931,7 +862,7 @@ public: /// InsertLoopInto - This inserts loop L into the specified parent loop. If /// the parent loop contains a loop which should contain L, the loop gets /// inserted into L instead. - void InsertLoopInto(LoopBase *L, LoopBase *Parent) { + void InsertLoopInto(LoopT *L, LoopT *Parent) { BlockT *LHeader = L->getHeader(); assert(Parent->contains(LHeader) && "This loop should not be inserted here!"); @@ -951,11 +882,11 @@ public: // Debugging - void print(std::ostream &OS, const Module* ) const { + void print(raw_ostream &OS) const { for (unsigned i = 0; i < TopLevelLoops.size(); ++i) TopLevelLoops[i]->print(OS); #if 0 - for (std::map::const_iterator I = BBMap.begin(), + for (std::map::const_iterator I = BBMap.begin(), E = BBMap.end(); I != E; ++I) OS << "BB '" << I->first->getName() << "' level = " << I->second->getLoopDepth() << "\n"; @@ -964,8 +895,8 @@ public: }; class LoopInfo : public FunctionPass { - LoopInfoBase LI; - friend class LoopBase; + LoopInfoBase LI; + friend class LoopBase; void operator=(const LoopInfo &); // do not implement LoopInfo(const LoopInfo &); // do not implement @@ -974,12 +905,12 @@ public: LoopInfo() : FunctionPass(&ID) {} - LoopInfoBase& getBase() { return LI; } + LoopInfoBase& getBase() { return LI; } /// iterator/begin/end - The interface to the top-level loops in the current /// function. /// - typedef LoopInfoBase::iterator iterator; + typedef LoopInfoBase::iterator iterator; inline iterator begin() const { return LI.begin(); } inline iterator end() const { return LI.end(); } bool empty() const { return LI.empty(); } @@ -1013,12 +944,12 @@ public: /// virtual bool runOnFunction(Function &F); - virtual void releaseMemory() { LI.releaseMemory(); } + virtual void verifyAnalysis() const; - virtual void print(std::ostream &O, const Module* M = 0) const { - LI.print(O, M); - } + virtual void releaseMemory() { LI.releaseMemory(); } + virtual void print(raw_ostream &O, const Module* M = 0) const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const; /// removeLoop - This removes the specified top-level loop from this loop info @@ -1051,6 +982,13 @@ public: void removeBlock(BasicBlock *BB) { LI.removeBlock(BB); } + + static bool isNotAlreadyContainedIn(const Loop *SubLoop, + const Loop *ParentLoop) { + return + LoopInfoBase::isNotAlreadyContainedIn(SubLoop, + ParentLoop); + } }; @@ -1081,19 +1019,21 @@ template <> struct GraphTraits { } }; -template -void LoopBase::addBasicBlockToLoop(BlockT *NewBB, - LoopInfoBase &LIB) { +template +void +LoopBase::addBasicBlockToLoop(BlockT *NewBB, + LoopInfoBase &LIB) { assert((Blocks.empty() || LIB[getHeader()] == this) && "Incorrect LI specified for this loop!"); assert(NewBB && "Cannot add a null basic block to the loop!"); assert(LIB[NewBB] == 0 && "BasicBlock already in the loop!"); + LoopT *L = static_cast(this); + // Add the loop mapping to the LoopInfo object... - LIB.BBMap[NewBB] = this; + LIB.BBMap[NewBB] = L; // Add the basic block to this loop and all parent loops... - LoopBase *L = this; while (L) { L->Blocks.push_back(NewBB); L = L->getParentLoop(); diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h index 7659b5bf458de..2eb329f7f0e3f 100644 --- a/include/llvm/Analysis/LoopPass.h +++ b/include/llvm/Analysis/LoopPass.h @@ -111,9 +111,13 @@ public: // Delete loop from the loop queue and loop nest (LoopInfo). void deleteLoopFromQueue(Loop *L); - // Insert loop into the loop nest(LoopInfo) and loop queue(LQ). + // Insert loop into the loop queue and add it as a child of the + // given parent. void insertLoop(Loop *L, Loop *ParentLoop); + // Insert a loop into the loop queue. + void insertLoopIntoQueue(Loop *L); + // Reoptimize this loop. LPPassManager will re-insert this loop into the // queue. This allows LoopPass to change loop nest for the loop. This // utility may send LPPassManager into infinite loops so use caution. diff --git a/include/llvm/Analysis/MallocHelper.h b/include/llvm/Analysis/MallocHelper.h new file mode 100644 index 0000000000000..0588dff08a60c --- /dev/null +++ b/include/llvm/Analysis/MallocHelper.h @@ -0,0 +1,86 @@ +//===- llvm/Analysis/MallocHelper.h ---- Identify malloc calls --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions identifies calls to malloc, bitcasts of malloc +// calls, and the types and array sizes associated with them. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MALLOCHELPER_H +#define LLVM_ANALYSIS_MALLOCHELPER_H + +namespace llvm { +class CallInst; +class LLVMContext; +class PointerType; +class TargetData; +class Type; +class Value; + +//===----------------------------------------------------------------------===// +// malloc Call Utility Functions. +// + +/// isMalloc - Returns true if the the value is either a malloc call or a +/// bitcast of the result of a malloc call +bool isMalloc(const Value* I); + +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst* extractMallocCall(const Value* I); +CallInst* extractMallocCall(Value* I); + +/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the +/// instruction is a bitcast of the result of a malloc call. +const CallInst* extractMallocCallFromBitCast(const Value* I); +CallInst* extractMallocCallFromBitCast(Value* I); + +/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// matches the malloc call IR generated by CallInst::CreateMalloc(). This +/// means that it is a malloc call with one bitcast use AND the malloc call's +/// size argument is: +/// 1. a constant not equal to the malloc's allocated type +/// or +/// 2. the result of a multiplication by the malloc's allocated type +/// Otherwise it returns NULL. +/// The unique bitcast is needed to determine the type/size of the array +/// allocation. +CallInst* isArrayMalloc(Value* I, LLVMContext &Context, const TargetData* TD); +const CallInst* isArrayMalloc(const Value* I, LLVMContext &Context, + const TargetData* TD); + +/// getMallocType - Returns the PointerType resulting from the malloc call. +/// This PointerType is the result type of the call's only bitcast use. +/// If there is no unique bitcast use, then return NULL. +const PointerType* getMallocType(const CallInst* CI); + +/// getMallocAllocatedType - Returns the Type allocated by malloc call. This +/// Type is the result type of the call's only bitcast use. If there is no +/// unique bitcast use, then return NULL. +const Type* getMallocAllocatedType(const CallInst* CI); + +/// getMallocArraySize - Returns the array size of a malloc call. The array +/// size is computated in 1 of 3 ways: +/// 1. If the element type if of size 1, then array size is the argument to +/// malloc. +/// 2. Else if the malloc's argument is a constant, the array size is that +/// argument divided by the element type's size. +/// 3. Else the malloc argument must be a multiplication and the array size is +/// the first operand of the multiplication. +/// This function returns constant 1 if: +/// 1. The malloc call's allocated type cannot be determined. +/// 2. IR wasn't created by a call to CallInst::CreateMalloc() with a non-NULL +/// ArraySize. +Value* getMallocArraySize(CallInst* CI, LLVMContext &Context, + const TargetData* TD); + +} // End llvm namespace + +#endif diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index d7d795e08a16e..205c34ab5c89e 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -95,7 +95,7 @@ namespace llvm { /// a instruction definition dependency. bool isDef() const { return Value.getInt() == Def; } - /// isNonLocal - Return true if this MemDepResult represents an query that + /// isNonLocal - Return true if this MemDepResult represents a query that /// is transparent to the start of the block, but where a non-local hasn't /// been done. bool isNonLocal() const { return Value.getInt() == NonLocal; } diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index 35bd821994889..66ab3ea5caf1f 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -21,6 +21,7 @@ namespace llvm { class LoopPass; class ModulePass; class Pass; + class PassInfo; class LibCallInfo; //===--------------------------------------------------------------------===// @@ -71,6 +72,13 @@ namespace llvm { /// FunctionPass *createLibCallAliasAnalysisPass(LibCallInfo *LCI); + //===--------------------------------------------------------------------===// + // + // createScalarEvolutionAliasAnalysisPass - This pass implements a simple + // alias analysis using ScalarEvolution queries. + // + FunctionPass *createScalarEvolutionAliasAnalysisPass(); + //===--------------------------------------------------------------------===// // // createAndersensPass - This pass implements Andersen's interprocedural alias @@ -91,6 +99,20 @@ namespace llvm { // ImmutablePass *createNoProfileInfoPass(); + //===--------------------------------------------------------------------===// + // + // createProfileEstimatorPass - This pass estimates profiling information + // instead of loading it from a previous run. + // + FunctionPass *createProfileEstimatorPass(); + extern const PassInfo *ProfileEstimatorPassID; + + //===--------------------------------------------------------------------===// + // + // createProfileVerifierPass - This pass verifies profiling information. + // + FunctionPass *createProfileVerifierPass(); + //===--------------------------------------------------------------------===// // // createDSAAPass - This pass implements simple context sensitive alias diff --git a/include/llvm/Analysis/PointerTracking.h b/include/llvm/Analysis/PointerTracking.h new file mode 100644 index 0000000000000..a14bbf0290ea5 --- /dev/null +++ b/include/llvm/Analysis/PointerTracking.h @@ -0,0 +1,131 @@ +//===- PointerTracking.h - Pointer Bounds Tracking --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tracking of pointer bounds. +// It knows that the libc functions "calloc" and "realloc" allocate memory, thus +// you should avoid using this pass if they mean something else for your +// language. +// +// All methods assume that the pointer is not NULL, if it is then the returned +// allocation size is wrong, and the result from checkLimits is wrong too. +// It also assumes that pointers are valid, and that it is not analyzing a +// use-after-free scenario. +// Due to these limitations the "size" returned by these methods should be +// considered as either 0 or the returned size. +// +// Another analysis pass should be used to find use-after-free/NULL dereference +// bugs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_POINTERTRACKING_H +#define LLVM_ANALYSIS_POINTERTRACKING_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/PredIteratorCache.h" + +namespace llvm { + class DominatorTree; + class ScalarEvolution; + class SCEV; + class Loop; + class LoopInfo; + class TargetData; + + // Result from solver, assuming pointer is not NULL, + // and it is not a use-after-free situation. + enum SolverResult { + AlwaysFalse,// always false with above constraints + AlwaysTrue,// always true with above constraints + Unknown // it can sometimes be true, sometimes false, or it is undecided + }; + + class PointerTracking : public FunctionPass { + public: + typedef ICmpInst::Predicate Predicate; + static char ID; + PointerTracking(); + + virtual bool doInitialization(Module &M); + + // If this pointer directly points to an allocation, return + // the number of elements of type Ty allocated. + // Otherwise return CouldNotCompute. + // Since allocations can fail by returning NULL, the real element count + // for every allocation is either 0 or the value returned by this function. + const SCEV *getAllocationElementCount(Value *P) const; + + // Same as getAllocationSize() but returns size in bytes. + // We consider one byte as 8 bits. + const SCEV *getAllocationSizeInBytes(Value *V) const; + + // Given a Pointer, determine a base pointer of known size, and an offset + // therefrom. + // When unable to determine, sets Base to NULL, and Limit/Offset to + // CouldNotCompute. + // BaseSize, and Offset are in bytes: Pointer == Base + Offset + void getPointerOffset(Value *Pointer, Value *&Base, const SCEV *& BaseSize, + const SCEV *&Offset) const; + + // Compares the 2 scalar evolution expressions according to predicate, + // and if it can prove that the result is always true or always false + // return AlwaysTrue/AlwaysFalse. Otherwise it returns Unknown. + enum SolverResult compareSCEV(const SCEV *A, Predicate Pred, const SCEV *B, + const Loop *L); + + // Determines whether the condition LHS RHS is sufficient + // for the condition A B to hold. + // Currently only ULT/ULE is supported. + // This errs on the side of returning false. + bool conditionSufficient(const SCEV *LHS, Predicate Pred1, const SCEV *RHS, + const SCEV *A, Predicate Pred2, const SCEV *B, + const Loop *L); + + // Determines whether Offset is known to be always in [0, Limit) bounds. + // This errs on the side of returning Unknown. + enum SolverResult checkLimits(const SCEV *Offset, const SCEV *Limit, + BasicBlock *BB); + + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + void print(raw_ostream &OS, const Module* = 0) const; + private: + Function *FF; + TargetData *TD; + ScalarEvolution *SE; + LoopInfo *LI; + DominatorTree *DT; + + Function *callocFunc; + Function *reallocFunc; + PredIteratorCache predCache; + + SmallPtrSet analyzing; + + enum SolverResult isLoopGuardedBy(const Loop *L, Predicate Pred, + const SCEV *A, const SCEV *B) const; + static bool isMonotonic(const SCEV *S); + bool scevPositive(const SCEV *A, const Loop *L, bool strict=true) const; + bool conditionSufficient(Value *Cond, bool negated, + const SCEV *A, Predicate Pred, const SCEV *B); + Value *getConditionToReach(BasicBlock *A, + DomTreeNodeBase *B, + bool &negated); + Value *getConditionToReach(BasicBlock *A, + BasicBlock *B, + bool &negated); + const SCEV *computeAllocationCount(Value *P, const Type *&Ty) const; + const SCEV *computeAllocationCountForType(Value *P, const Type *Ty) const; + }; +} +#endif + diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h index cd6af74024a54..171cfdb2eac38 100644 --- a/include/llvm/Analysis/PostDominators.h +++ b/include/llvm/Analysis/PostDominators.h @@ -49,6 +49,14 @@ struct PostDominatorTree : public FunctionPass { return DT->getNode(BB); } + inline bool dominates(DomTreeNode* A, DomTreeNode* B) const { + return DT->dominates(A, B); + } + + inline bool dominates(const BasicBlock* A, const BasicBlock* B) const { + return DT->dominates(A, B); + } + inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const { return DT->properlyDominates(A, B); } @@ -57,9 +65,11 @@ struct PostDominatorTree : public FunctionPass { return DT->properlyDominates(A, B); } - virtual void print(std::ostream &OS, const Module* M= 0) const { - DT->print(OS, M); + virtual void releaseMemory() { + DT->releaseMemory(); } + + virtual void print(raw_ostream &OS, const Module*) const; }; FunctionPass* createPostDomTree(); diff --git a/include/llvm/Analysis/ProfileInfo.h b/include/llvm/Analysis/ProfileInfo.h index ff83f97ee042d..2a80f3d4c43a9 100644 --- a/include/llvm/Analysis/ProfileInfo.h +++ b/include/llvm/Analysis/ProfileInfo.h @@ -14,54 +14,123 @@ // // Note that to be useful, all profile-based optimizations should preserve // ProfileInfo, which requires that they notify it when changes to the CFG are -// made. +// made. (This is not implemented yet.) // //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_PROFILEINFO_H #define LLVM_ANALYSIS_PROFILEINFO_H +#include "llvm/BasicBlock.h" +#include #include #include namespace llvm { - class BasicBlock; + class Function; class Pass; + class raw_ostream; - /// ProfileInfo Class - This class holds and maintains edge profiling + /// ProfileInfo Class - This class holds and maintains profiling /// information for some unit of code. class ProfileInfo { + public: + // Types for handling profiling information. + typedef std::pair Edge; + typedef std::pair EdgeWeight; + typedef std::map EdgeWeights; + typedef std::map BlockCounts; + protected: - // EdgeCounts - Count the number of times a transition between two blocks is - // executed. As a special case, we also hold an edge from the null - // BasicBlock to the entry block to indicate how many times the function was - // entered. - std::map, unsigned> EdgeCounts; + // EdgeInformation - Count the number of times a transition between two + // blocks is executed. As a special case, we also hold an edge from the + // null BasicBlock to the entry block to indicate how many times the + // function was entered. + std::map EdgeInformation; + + // BlockInformation - Count the number of times a block is executed. + std::map BlockInformation; + + // FunctionInformation - Count the number of times a function is executed. + std::map FunctionInformation; public: static char ID; // Class identification, replacement for typeinfo virtual ~ProfileInfo(); // We want to be subclassed + // MissingValue - The value that is returned for execution counts in case + // no value is available. + static const double MissingValue; + + // getFunction() - Returns the Function for an Edge, checking for validity. + static const Function* getFunction(Edge e) { + if (e.first) { + return e.first->getParent(); + } else if (e.second) { + return e.second->getParent(); + } + assert(0 && "Invalid ProfileInfo::Edge"); + return (const Function*)0; + } + + // getEdge() - Creates an Edge from two BasicBlocks. + static Edge getEdge(const BasicBlock *Src, const BasicBlock *Dest) { + return std::make_pair(Src, Dest); + } + //===------------------------------------------------------------------===// /// Profile Information Queries /// - unsigned getExecutionCount(BasicBlock *BB) const; + double getExecutionCount(const Function *F); + + double getExecutionCount(const BasicBlock *BB); + + double getEdgeWeight(Edge e) const { + std::map::const_iterator J = + EdgeInformation.find(getFunction(e)); + if (J == EdgeInformation.end()) return MissingValue; - unsigned getEdgeWeight(BasicBlock *Src, BasicBlock *Dest) const { - std::map, unsigned>::const_iterator I= - EdgeCounts.find(std::make_pair(Src, Dest)); - return I != EdgeCounts.end() ? I->second : 0; + EdgeWeights::const_iterator I = J->second.find(e); + if (I == J->second.end()) return MissingValue; + + return I->second; + } + + EdgeWeights &getEdgeWeights (const Function *F) { + return EdgeInformation[F]; } //===------------------------------------------------------------------===// /// Analysis Update Methods /// + void removeBlock(const BasicBlock *BB) { + std::map::iterator J = + BlockInformation.find(BB->getParent()); + if (J == BlockInformation.end()) return; + + J->second.erase(BB); + } + + void removeEdge(Edge e) { + std::map::iterator J = + EdgeInformation.find(getFunction(e)); + if (J == EdgeInformation.end()) return; + J->second.erase(e); + } + + void splitEdge(const BasicBlock *FirstBB, const BasicBlock *SecondBB, + const BasicBlock *NewBB, bool MergeIdenticalEdges = false); + + void replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB); }; /// createProfileLoaderPass - This function returns a Pass that loads the /// profiling information for the module from the specified filename, making /// it available to the optimizers. Pass *createProfileLoaderPass(const std::string &Filename); + + raw_ostream& operator<<(raw_ostream &O, ProfileInfo::Edge E); + } // End llvm namespace #endif diff --git a/include/llvm/Analysis/ProfileInfoLoader.h b/include/llvm/Analysis/ProfileInfoLoader.h index 9076fbc4fb959..9e0c393c428fb 100644 --- a/include/llvm/Analysis/ProfileInfoLoader.h +++ b/include/llvm/Analysis/ProfileInfoLoader.h @@ -27,11 +27,13 @@ class Function; class BasicBlock; class ProfileInfoLoader { + const std::string &Filename; Module &M; std::vector CommandLines; std::vector FunctionCounts; std::vector BlockCounts; std::vector EdgeCounts; + std::vector OptimalEdgeCounts; std::vector BBTrace; bool Warned; public: @@ -40,49 +42,41 @@ public: ProfileInfoLoader(const char *ToolName, const std::string &Filename, Module &M); + static const unsigned Uncounted; + unsigned getNumExecutions() const { return CommandLines.size(); } const std::string &getExecution(unsigned i) const { return CommandLines[i]; } - // getFunctionCounts - This method is used by consumers of function counting - // information. If we do not directly have function count information, we - // compute it from other, more refined, types of profile information. - // - void getFunctionCounts(std::vector > &Counts); + const std::string &getFileName() const { return Filename; } - // hasAccurateBlockCounts - Return true if we can synthesize accurate block - // frequency information from whatever we have. + // getRawFunctionCounts - This method is used by consumers of function + // counting information. // - bool hasAccurateBlockCounts() const { - return !BlockCounts.empty() || !EdgeCounts.empty(); + const std::vector &getRawFunctionCounts() const { + return FunctionCounts; } - // hasAccurateEdgeCounts - Return true if we can synthesize accurate edge - // frequency information from whatever we have. + // getRawBlockCounts - This method is used by consumers of block counting + // information. // - bool hasAccurateEdgeCounts() const { - return !EdgeCounts.empty(); + const std::vector &getRawBlockCounts() const { + return BlockCounts; } - // getBlockCounts - This method is used by consumers of block counting - // information. If we do not directly have block count information, we - // compute it from other, more refined, types of profile information. - // - void getBlockCounts(std::vector > &Counts); - // getEdgeCounts - This method is used by consumers of edge counting - // information. If we do not directly have edge count information, we compute - // it from other, more refined, types of profile information. - // - // Edges are represented as a pair, where the first element is the basic block - // and the second element is the successor number. + // information. // - typedef std::pair Edge; - void getEdgeCounts(std::vector > &Counts); + const std::vector &getRawEdgeCounts() const { + return EdgeCounts; + } - // getBBTrace - This method is used by consumers of basic-block trace - // information. + // getEdgeOptimalCounts - This method is used by consumers of optimal edge + // counting information. // - void getBBTrace(std::vector &Trace); + const std::vector &getRawOptimalEdgeCounts() const { + return OptimalEdgeCounts; + } + }; } // End llvm namespace diff --git a/include/llvm/Analysis/ProfileInfoTypes.h b/include/llvm/Analysis/ProfileInfoTypes.h index f311f8cb90c5c..0d531d5c5f88d 100644 --- a/include/llvm/Analysis/ProfileInfoTypes.h +++ b/include/llvm/Analysis/ProfileInfoTypes.h @@ -22,7 +22,8 @@ enum ProfilingType { BlockInfo = 3, /* Block profiling information */ EdgeInfo = 4, /* Edge profiling information */ PathInfo = 5, /* Path profiling information */ - BBTraceInfo = 6 /* Basic block trace information */ + BBTraceInfo = 6, /* Basic block trace information */ + OptEdgeInfo = 7 /* Edge profiling information, optimal version */ }; #endif /* LLVM_ANALYSIS_PROFILEINFOTYPES_H */ diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 9da5c59a5e540..ed5d18eaf9817 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -22,37 +22,50 @@ #define LLVM_ANALYSIS_SCALAREVOLUTION_H #include "llvm/Pass.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Instructions.h" +#include "llvm/Function.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/DenseMap.h" -#include +#include namespace llvm { class APInt; + class Constant; class ConstantInt; + class DominatorTree; class Type; class ScalarEvolution; class TargetData; + class LLVMContext; + class Loop; + class LoopInfo; + class Operator; /// SCEV - This class represents an analyzed expression in the program. These /// are opaque objects that the client is not allowed to do much with /// directly. /// - class SCEV : public FoldingSetNode { - const unsigned SCEVType; // The SCEV baseclass this node corresponds to + class SCEV : public FastFoldingSetNode { + // The SCEV baseclass this node corresponds to + const unsigned short SCEVType; + protected: + /// SubclassData - This field is initialized to zero and may be used in + /// subclasses to store miscelaneous information. + unsigned short SubclassData; + + private: SCEV(const SCEV &); // DO NOT IMPLEMENT void operator=(const SCEV &); // DO NOT IMPLEMENT protected: virtual ~SCEV(); public: - explicit SCEV(unsigned SCEVTy) : - SCEVType(SCEVTy) {} - - virtual void Profile(FoldingSetNodeID &ID) const = 0; + explicit SCEV(const FoldingSetNodeID &ID, unsigned SCEVTy) : + FastFoldingSetNode(ID), SCEVType(SCEVTy), SubclassData(0) {} unsigned getSCEVType() const { return SCEVType; } @@ -83,26 +96,22 @@ namespace llvm { /// bool isAllOnesValue() const; - /// replaceSymbolicValuesWithConcrete - If this SCEV internally references - /// the symbolic value "Sym", construct and return a new SCEV that produces - /// the same value, but which uses the concrete value Conc instead of the - /// symbolic value. If this SCEV does not use the symbolic value, it - /// returns itself. - virtual const SCEV* - replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const = 0; + /// hasOperand - Test whether this SCEV has Op as a direct or + /// indirect operand. + virtual bool hasOperand(const SCEV *Op) const = 0; /// dominates - Return true if elements that makes up this SCEV dominates /// the specified basic block. virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const = 0; + /// properlyDominates - Return true if elements that makes up this SCEV + /// properly dominate the specified basic block. + virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const = 0; + /// print - Print out the internal representation of this scalar to the /// specified stream. This should really only be used for debugging /// purposes. virtual void print(raw_ostream &OS) const = 0; - void print(std::ostream &OS) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } /// dump - This method is used for debugging. /// @@ -114,11 +123,6 @@ namespace llvm { return OS; } - inline std::ostream &operator<<(std::ostream &OS, const SCEV &S) { - S.print(OS); - return OS; - } - /// SCEVCouldNotCompute - An object of this class is returned by queries that /// could not be answered. For example, if you ask for the number of /// iterations of a linked-list traversal loop, you will get one of these. @@ -128,20 +132,20 @@ namespace llvm { SCEVCouldNotCompute(); // None of these methods are valid for this object. - virtual void Profile(FoldingSetNodeID &ID) const; virtual bool isLoopInvariant(const Loop *L) const; virtual const Type *getType() const; virtual bool hasComputableLoopEvolution(const Loop *L) const; virtual void print(raw_ostream &OS) const; - virtual const SCEV* - replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const; + virtual bool hasOperand(const SCEV *Op) const; virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const { return true; } + virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + return true; + } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const SCEVCouldNotCompute *S) { return true; } static bool classof(const SCEV *S); @@ -163,7 +167,7 @@ namespace llvm { }; friend class SCEVCallbackVH; - friend class SCEVExpander; + friend struct SCEVExpander; /// F - The function we are analyzing. /// @@ -183,7 +187,7 @@ namespace llvm { /// Scalars - This is a cache of the scalars we have analyzed so far. /// - std::map Scalars; + std::map Scalars; /// BackedgeTakenInfo - Information about the backedge-taken count /// of a loop. This currently inclues an exact count and a maximum count. @@ -191,16 +195,16 @@ namespace llvm { struct BackedgeTakenInfo { /// Exact - An expression indicating the exact backedge-taken count of /// the loop if it is known, or a SCEVCouldNotCompute otherwise. - const SCEV* Exact; + const SCEV *Exact; - /// Exact - An expression indicating the least maximum backedge-taken + /// Max - An expression indicating the least maximum backedge-taken /// count of the loop that is known, or a SCEVCouldNotCompute. - const SCEV* Max; + const SCEV *Max; - /*implicit*/ BackedgeTakenInfo(const SCEV* exact) : + /*implicit*/ BackedgeTakenInfo(const SCEV *exact) : Exact(exact), Max(exact) {} - BackedgeTakenInfo(const SCEV* exact, const SCEV* max) : + BackedgeTakenInfo(const SCEV *exact, const SCEV *max) : Exact(exact), Max(max) {} /// hasAnyInfo - Test whether this BackedgeTakenInfo contains any @@ -223,37 +227,42 @@ namespace llvm { /// exit value. std::map ConstantEvolutionLoopExitValue; - /// ValuesAtScopes - This map contains entries for all the instructions - /// that we attempt to compute getSCEVAtScope information for without - /// using SCEV techniques, which can be expensive. - std::map > ValuesAtScopes; + /// ValuesAtScopes - This map contains entries for all the expressions + /// that we attempt to compute getSCEVAtScope information for, which can + /// be expensive in extreme cases. + std::map > ValuesAtScopes; /// createSCEV - We know that there is no SCEV for the specified value. /// Analyze the expression. - const SCEV* createSCEV(Value *V); + const SCEV *createSCEV(Value *V); /// createNodeForPHI - Provide the special handling we need to analyze PHI /// SCEVs. - const SCEV* createNodeForPHI(PHINode *PN); + const SCEV *createNodeForPHI(PHINode *PN); /// createNodeForGEP - Provide the special handling we need to analyze GEP /// SCEVs. - const SCEV* createNodeForGEP(User *GEP); + const SCEV *createNodeForGEP(Operator *GEP); + + /// computeSCEVAtScope - Implementation code for getSCEVAtScope; called + /// at most once for each SCEV+Loop pair. + /// + const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L); - /// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value - /// for the specified instruction and replaces any references to the - /// symbolic value SymName with the specified value. This is used during - /// PHI resolution. - void ReplaceSymbolicValueWithConcrete(Instruction *I, - const SCEV* SymName, - const SCEV* NewVal); + /// ForgetSymbolicValue - This looks up computed SCEV values for all + /// instructions that depend on the given instruction and removes them from + /// the Scalars map if they reference SymName. This is used during PHI + /// resolution. + void ForgetSymbolicName(Instruction *I, const SCEV *SymName); /// getBECount - Subtract the end and start values and divide by the step, /// rounding up, to get the number of times the backedge is executed. Return /// CouldNotCompute if an intermediate computation overflows. - const SCEV* getBECount(const SCEV* Start, - const SCEV* End, - const SCEV* Step); + const SCEV *getBECount(const SCEV *Start, + const SCEV *End, + const SCEV *Step, + bool NoWrap); /// getBackedgeTakenInfo - Return the BackedgeTakenInfo for the given /// loop, lazily computing new values if the loop hasn't been analyzed @@ -290,31 +299,32 @@ namespace llvm { BasicBlock *FBB); /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition - /// of 'icmp op load X, cst', try to see if we can compute the trip count. - const SCEV* + /// of 'icmp op load X, cst', try to see if we can compute the + /// backedge-taken count. + const SCEV * ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS, const Loop *L, ICmpInst::Predicate p); - /// ComputeBackedgeTakenCountExhaustively - If the trip is known to execute + /// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute /// a constant number of times (the condition evolves only from constants), /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot - /// evaluate the trip count of the loop, return CouldNotCompute. - const SCEV* ComputeBackedgeTakenCountExhaustively(const Loop *L, + /// evaluate the backedge-taken count of the loop, return CouldNotCompute. + const SCEV *ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen); /// HowFarToZero - Return the number of times a backedge comparing the /// specified value to zero will execute. If not computable, return /// CouldNotCompute. - const SCEV* HowFarToZero(const SCEV *V, const Loop *L); + const SCEV *HowFarToZero(const SCEV *V, const Loop *L); /// HowFarToNonZero - Return the number of times a backedge checking the /// specified value for nonzero will execute. If not computable, return /// CouldNotCompute. - const SCEV* HowFarToNonZero(const SCEV *V, const Loop *L); + const SCEV *HowFarToNonZero(const SCEV *V, const Loop *L); /// HowManyLessThans - Return the number of times a backedge containing the /// specified less-than comparison will execute. If not computable, return @@ -332,11 +342,25 @@ namespace llvm { /// found. BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB); - /// isNecessaryCond - Test whether the given CondValue value is a condition - /// which is at least as strict as the one described by Pred, LHS, and RHS. - bool isNecessaryCond(Value *Cond, ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS, - bool Inverse); + /// isImpliedCond - Test whether the condition described by Pred, LHS, + /// and RHS is true whenever the given Cond value evaluates to true. + bool isImpliedCond(Value *Cond, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool Inverse); + + /// isImpliedCondOperands - Test whether the condition described by Pred, + /// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS, + /// and FoundRHS is true. + bool isImpliedCondOperands(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS); + + /// isImpliedCondOperandsHelper - Test whether the condition described by + /// Pred, LHS, and RHS is true whenever the condition desribed by Pred, + /// FoundLHS, and FoundRHS is true. + bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, const SCEV *FoundRHS); /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a @@ -345,15 +369,12 @@ namespace llvm { Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L); - /// forgetLoopPHIs - Delete the memoized SCEVs associated with the - /// PHI nodes in the given loop. This is used when the trip count of - /// the loop may have changed. - void forgetLoopPHIs(const Loop *L); - public: static char ID; // Pass identification, replacement for typeid ScalarEvolution(); + LLVMContext &getContext() const { return F->getContext(); } + /// isSCEVable - Test if values of the given type are analyzable within /// the SCEV framework. This primarily includes integer types, and it /// can optionally include pointer types if the ScalarEvolution class @@ -370,127 +391,129 @@ namespace llvm { /// this is the pointer-sized integer type. const Type *getEffectiveSCEVType(const Type *Ty) const; - /// getSCEV - Return a SCEV expression handle for the full generality of the + /// getSCEV - Return a SCEV expression for the full generality of the /// specified expression. - const SCEV* getSCEV(Value *V); - - const SCEV* getConstant(ConstantInt *V); - const SCEV* getConstant(const APInt& Val); - const SCEV* getConstant(const Type *Ty, uint64_t V, bool isSigned = false); - const SCEV* getTruncateExpr(const SCEV* Op, const Type *Ty); - const SCEV* getZeroExtendExpr(const SCEV* Op, const Type *Ty); - const SCEV* getSignExtendExpr(const SCEV* Op, const Type *Ty); - const SCEV* getAnyExtendExpr(const SCEV* Op, const Type *Ty); - const SCEV* getAddExpr(SmallVectorImpl &Ops); - const SCEV* getAddExpr(const SCEV* LHS, const SCEV* RHS) { - SmallVector Ops; + const SCEV *getSCEV(Value *V); + + const SCEV *getConstant(ConstantInt *V); + const SCEV *getConstant(const APInt& Val); + const SCEV *getConstant(const Type *Ty, uint64_t V, bool isSigned = false); + const SCEV *getTruncateExpr(const SCEV *Op, const Type *Ty); + const SCEV *getZeroExtendExpr(const SCEV *Op, const Type *Ty); + const SCEV *getSignExtendExpr(const SCEV *Op, const Type *Ty); + const SCEV *getAnyExtendExpr(const SCEV *Op, const Type *Ty); + const SCEV *getAddExpr(SmallVectorImpl &Ops, + bool HasNUW = false, bool HasNSW = false); + const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, + bool HasNUW = false, bool HasNSW = false) { + SmallVector Ops; Ops.push_back(LHS); Ops.push_back(RHS); - return getAddExpr(Ops); + return getAddExpr(Ops, HasNUW, HasNSW); } - const SCEV* getAddExpr(const SCEV* Op0, const SCEV* Op1, - const SCEV* Op2) { - SmallVector Ops; + const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, + const SCEV *Op2, + bool HasNUW = false, bool HasNSW = false) { + SmallVector Ops; Ops.push_back(Op0); Ops.push_back(Op1); Ops.push_back(Op2); - return getAddExpr(Ops); + return getAddExpr(Ops, HasNUW, HasNSW); } - const SCEV* getMulExpr(SmallVectorImpl &Ops); - const SCEV* getMulExpr(const SCEV* LHS, const SCEV* RHS) { - SmallVector Ops; + const SCEV *getMulExpr(SmallVectorImpl &Ops, + bool HasNUW = false, bool HasNSW = false); + const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, + bool HasNUW = false, bool HasNSW = false) { + SmallVector Ops; Ops.push_back(LHS); Ops.push_back(RHS); - return getMulExpr(Ops); + return getMulExpr(Ops, HasNUW, HasNSW); } - const SCEV* getUDivExpr(const SCEV* LHS, const SCEV* RHS); - const SCEV* getAddRecExpr(const SCEV* Start, const SCEV* Step, - const Loop *L); - const SCEV* getAddRecExpr(SmallVectorImpl &Operands, - const Loop *L); - const SCEV* getAddRecExpr(const SmallVectorImpl &Operands, - const Loop *L) { - SmallVector NewOp(Operands.begin(), Operands.end()); - return getAddRecExpr(NewOp, L); + const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step, + const Loop *L, + bool HasNUW = false, bool HasNSW = false); + const SCEV *getAddRecExpr(SmallVectorImpl &Operands, + const Loop *L, + bool HasNUW = false, bool HasNSW = false); + const SCEV *getAddRecExpr(const SmallVectorImpl &Operands, + const Loop *L, + bool HasNUW = false, bool HasNSW = false) { + SmallVector NewOp(Operands.begin(), Operands.end()); + return getAddRecExpr(NewOp, L, HasNUW, HasNSW); } - const SCEV* getSMaxExpr(const SCEV* LHS, const SCEV* RHS); - const SCEV* getSMaxExpr(SmallVectorImpl &Operands); - const SCEV* getUMaxExpr(const SCEV* LHS, const SCEV* RHS); - const SCEV* getUMaxExpr(SmallVectorImpl &Operands); - const SCEV* getSMinExpr(const SCEV* LHS, const SCEV* RHS); - const SCEV* getUMinExpr(const SCEV* LHS, const SCEV* RHS); - const SCEV* getUnknown(Value *V); - const SCEV* getCouldNotCompute(); + const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getSMaxExpr(SmallVectorImpl &Operands); + const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUMaxExpr(SmallVectorImpl &Operands); + const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getFieldOffsetExpr(const StructType *STy, unsigned FieldNo); + const SCEV *getAllocSizeExpr(const Type *AllocTy); + const SCEV *getUnknown(Value *V); + const SCEV *getCouldNotCompute(); /// getNegativeSCEV - Return the SCEV object corresponding to -V. /// - const SCEV* getNegativeSCEV(const SCEV* V); + const SCEV *getNegativeSCEV(const SCEV *V); /// getNotSCEV - Return the SCEV object corresponding to ~V. /// - const SCEV* getNotSCEV(const SCEV* V); + const SCEV *getNotSCEV(const SCEV *V); /// getMinusSCEV - Return LHS-RHS. /// - const SCEV* getMinusSCEV(const SCEV* LHS, - const SCEV* RHS); + const SCEV *getMinusSCEV(const SCEV *LHS, + const SCEV *RHS); /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion /// of the input value to the specified type. If the type must be /// extended, it is zero extended. - const SCEV* getTruncateOrZeroExtend(const SCEV* V, const Type *Ty); + const SCEV *getTruncateOrZeroExtend(const SCEV *V, const Type *Ty); /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion /// of the input value to the specified type. If the type must be /// extended, it is sign extended. - const SCEV* getTruncateOrSignExtend(const SCEV* V, const Type *Ty); + const SCEV *getTruncateOrSignExtend(const SCEV *V, const Type *Ty); /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of /// the input value to the specified type. If the type must be extended, /// it is zero extended. The conversion must not be narrowing. - const SCEV* getNoopOrZeroExtend(const SCEV* V, const Type *Ty); + const SCEV *getNoopOrZeroExtend(const SCEV *V, const Type *Ty); /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of /// the input value to the specified type. If the type must be extended, /// it is sign extended. The conversion must not be narrowing. - const SCEV* getNoopOrSignExtend(const SCEV* V, const Type *Ty); + const SCEV *getNoopOrSignExtend(const SCEV *V, const Type *Ty); /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of /// the input value to the specified type. If the type must be extended, /// it is extended with unspecified bits. The conversion must not be /// narrowing. - const SCEV* getNoopOrAnyExtend(const SCEV* V, const Type *Ty); + const SCEV *getNoopOrAnyExtend(const SCEV *V, const Type *Ty); /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the /// input value to the specified type. The conversion must not be /// widening. - const SCEV* getTruncateOrNoop(const SCEV* V, const Type *Ty); + const SCEV *getTruncateOrNoop(const SCEV *V, const Type *Ty); /// getIntegerSCEV - Given a SCEVable type, create a constant for the /// specified signed integer value and return a SCEV for the constant. - const SCEV* getIntegerSCEV(int Val, const Type *Ty); + const SCEV *getIntegerSCEV(int Val, const Type *Ty); /// getUMaxFromMismatchedTypes - Promote the operands to the wider of /// the types using zero-extension, and then perform a umax operation /// with them. - const SCEV* getUMaxFromMismatchedTypes(const SCEV* LHS, - const SCEV* RHS); + const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS, + const SCEV *RHS); /// getUMinFromMismatchedTypes - Promote the operands to the wider of /// the types using zero-extension, and then perform a umin operation /// with them. - const SCEV* getUMinFromMismatchedTypes(const SCEV* LHS, - const SCEV* RHS); - - /// hasSCEV - Return true if the SCEV for this value has already been - /// computed. - bool hasSCEV(Value *V) const; + const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS, + const SCEV *RHS); - /// setSCEV - Insert the specified SCEV into the map of current SCEVs for - /// the specified value. - void setSCEV(Value *V, const SCEV* H); - - /// getSCEVAtScope - Return a SCEV expression handle for the specified value + /// getSCEVAtScope - Return a SCEV expression for the specified value /// at the specified scope in the program. The L value specifies a loop /// nest to evaluate the expression at, where null is the top-level or a /// specified loop is immediately inside of the loop. @@ -500,18 +523,24 @@ namespace llvm { /// /// In the case that a relevant loop exit value cannot be computed, the /// original value V is returned. - const SCEV* getSCEVAtScope(const SCEV *S, const Loop *L); + const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L); /// getSCEVAtScope - This is a convenience function which does /// getSCEVAtScope(getSCEV(V), L). - const SCEV* getSCEVAtScope(Value *V, const Loop *L); + const SCEV *getSCEVAtScope(Value *V, const Loop *L); /// isLoopGuardedByCond - Test whether entry to the loop is protected by /// a conditional between LHS and RHS. This is used to help avoid max - /// expressions in loop trip counts. + /// expressions in loop trip counts, and to eliminate casts. bool isLoopGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is + /// protected by a conditional between LHS and RHS. This is used to + /// to eliminate casts. + bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); + /// getBackedgeTakenCount - If the specified loop has a predictable /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute /// object. The backedge-taken count is the number of times the loop header @@ -523,12 +552,12 @@ namespace llvm { /// loop-invariant backedge-taken count (see /// hasLoopInvariantBackedgeTakenCount). /// - const SCEV* getBackedgeTakenCount(const Loop *L); + const SCEV *getBackedgeTakenCount(const Loop *L); /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except /// return the least SCEV value that is known never to be less than the /// actual backedge taken count. - const SCEV* getMaxBackedgeTakenCount(const Loop *L); + const SCEV *getMaxBackedgeTakenCount(const Loop *L); /// hasLoopInvariantBackedgeTakenCount - Return true if the specified loop /// has an analyzable loop-invariant backedge-taken count. @@ -545,24 +574,49 @@ namespace llvm { /// time, the minimum number of times S is divisible by 2. For example, /// given {4,+,8} it returns 2. If S is guaranteed to be 0, it returns the /// bitwidth of S. - uint32_t GetMinTrailingZeros(const SCEV* S); + uint32_t GetMinTrailingZeros(const SCEV *S); - /// GetMinLeadingZeros - Determine the minimum number of zero bits that S is - /// guaranteed to begin with (at every loop iteration). - uint32_t GetMinLeadingZeros(const SCEV* S); + /// getUnsignedRange - Determine the unsigned range for a particular SCEV. + /// + ConstantRange getUnsignedRange(const SCEV *S); - /// GetMinSignBits - Determine the minimum number of sign bits that S is - /// guaranteed to begin with. - uint32_t GetMinSignBits(const SCEV* S); + /// getSignedRange - Determine the signed range for a particular SCEV. + /// + ConstantRange getSignedRange(const SCEV *S); + + /// isKnownNegative - Test if the given expression is known to be negative. + /// + bool isKnownNegative(const SCEV *S); + + /// isKnownPositive - Test if the given expression is known to be positive. + /// + bool isKnownPositive(const SCEV *S); + + /// isKnownNonNegative - Test if the given expression is known to be + /// non-negative. + /// + bool isKnownNonNegative(const SCEV *S); + + /// isKnownNonPositive - Test if the given expression is known to be + /// non-positive. + /// + bool isKnownNonPositive(const SCEV *S); + + /// isKnownNonZero - Test if the given expression is known to be + /// non-zero. + /// + bool isKnownNonZero(const SCEV *S); + + /// isKnownNonZero - Test if the given expression is known to satisfy + /// the condition described by Pred, LHS, and RHS. + /// + bool isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS); virtual bool runOnFunction(Function &F); virtual void releaseMemory(); virtual void getAnalysisUsage(AnalysisUsage &AU) const; - void print(raw_ostream &OS, const Module* = 0) const; - virtual void print(std::ostream &OS, const Module* = 0) const; - void print(std::ostream *OS, const Module* M = 0) const { - if (OS) print(*OS, M); - } + virtual void print(raw_ostream &OS, const Module* = 0) const; private: FoldingSet UniqueSCEVs; diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index 60a23c504310c..915227d77b511 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -17,13 +17,14 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetFolder.h" +#include namespace llvm { /// SCEVExpander - This class uses information about analyze scalars to /// rewrite expressions in canonical form. /// /// Clients should create an instance of this class when rewriting is needed, - /// and destroy it when finished to allow the release of the associated + /// and destroy it when finished to allow the release of the associated /// memory. struct SCEVExpander : public SCEVVisitor { ScalarEvolution &SE; @@ -37,7 +38,8 @@ namespace llvm { friend struct SCEVVisitor; public: explicit SCEVExpander(ScalarEvolution &se) - : SE(se), Builder(TargetFolder(se.TD)) {} + : SE(se), Builder(se.getContext(), + TargetFolder(se.TD, se.getContext())) {} /// clear - Erase the contents of the InsertedExpressions map so that users /// trying to expand the same expression into multiple BasicBlocks or @@ -53,12 +55,14 @@ namespace llvm { /// expandCodeFor - Insert code to directly compute the specified SCEV /// expression into the program. The inserted code is inserted into the /// specified block. - Value *expandCodeFor(const SCEV* SH, const Type *Ty, Instruction *IP) { + Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *IP) { Builder.SetInsertPoint(IP->getParent(), IP); return expandCodeFor(SH, Ty); } private: + LLVMContext &getContext() const { return SE.getContext(); } + /// InsertBinop - Insert the specified binary operator, doing a small amount /// of work to avoid inserting an obviously redundant operation. Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS); @@ -70,8 +74,8 @@ namespace llvm { /// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP /// instead of using ptrtoint+arithmetic+inttoptr. - Value *expandAddToGEP(const SCEV* const *op_begin, - const SCEV* const *op_end, + Value *expandAddToGEP(const SCEV *const *op_begin, + const SCEV *const *op_end, const PointerType *PTy, const Type *Ty, Value *V); Value *expand(const SCEV *S); @@ -80,7 +84,7 @@ namespace llvm { /// expression into the program. The inserted code is inserted into the /// SCEVExpander's current insertion point. If a type is specified, the /// result will be expanded to have that type, with a cast if necessary. - Value *expandCodeFor(const SCEV* SH, const Type *Ty = 0); + Value *expandCodeFor(const SCEV *SH, const Type *Ty = 0); /// isInsertedInstruction - Return true if the specified instruction was /// inserted by the code rewriter. If so, the client should not modify the @@ -111,6 +115,10 @@ namespace llvm { Value *visitUMaxExpr(const SCEVUMaxExpr *S); + Value *visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S); + + Value *visitAllocSizeExpr(const SCEVAllocSizeExpr *S); + Value *visitUnknown(const SCEVUnknown *S) { return S->getValue(); } @@ -118,4 +126,3 @@ namespace llvm { } #endif - diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index c54c86556c36f..2c503506035e5 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -15,6 +15,7 @@ #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Support/ErrorHandling.h" namespace llvm { class ConstantInt; @@ -25,8 +26,8 @@ namespace llvm { // These should be ordered in terms of increasing complexity to make the // folders simpler. scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, - scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUnknown, - scCouldNotCompute + scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, + scFieldOffset, scAllocSize, scUnknown, scCouldNotCompute }; //===--------------------------------------------------------------------===// @@ -36,11 +37,9 @@ namespace llvm { friend class ScalarEvolution; ConstantInt *V; - explicit SCEVConstant(ConstantInt *v) : - SCEV(scConstant), V(v) {} + SCEVConstant(const FoldingSetNodeID &ID, ConstantInt *v) : + SCEV(ID, scConstant), V(v) {} public: - virtual void Profile(FoldingSetNodeID &ID) const; - ConstantInt *getValue() const { return V; } virtual bool isLoopInvariant(const Loop *L) const { @@ -53,16 +52,18 @@ namespace llvm { virtual const Type *getType() const; - const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const { - return this; + virtual bool hasOperand(const SCEV *) const { + return false; } bool dominates(BasicBlock *BB, DominatorTree *DT) const { return true; } + bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + return true; + } + virtual void print(raw_ostream &OS) const; /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -77,15 +78,14 @@ namespace llvm { /// class SCEVCastExpr : public SCEV { protected: - const SCEV* Op; + const SCEV *Op; const Type *Ty; - SCEVCastExpr(unsigned SCEVTy, const SCEV* op, const Type *ty); + SCEVCastExpr(const FoldingSetNodeID &ID, + unsigned SCEVTy, const SCEV *op, const Type *ty); public: - virtual void Profile(FoldingSetNodeID &ID) const; - - const SCEV* getOperand() const { return Op; } + const SCEV *getOperand() const { return Op; } virtual const Type *getType() const { return Ty; } virtual bool isLoopInvariant(const Loop *L) const { @@ -96,8 +96,14 @@ namespace llvm { return Op->hasComputableLoopEvolution(L); } + virtual bool hasOperand(const SCEV *O) const { + return Op == O || Op->hasOperand(O); + } + virtual bool dominates(BasicBlock *BB, DominatorTree *DT) const; + virtual bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const; + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const SCEVCastExpr *S) { return true; } static inline bool classof(const SCEV *S) { @@ -114,18 +120,10 @@ namespace llvm { class SCEVTruncateExpr : public SCEVCastExpr { friend class ScalarEvolution; - SCEVTruncateExpr(const SCEV* op, const Type *ty); + SCEVTruncateExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty); public: - const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const { - const SCEV* H = Op->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - if (H == Op) - return this; - return SE.getTruncateExpr(H, Ty); - } - virtual void print(raw_ostream &OS) const; /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -142,18 +140,10 @@ namespace llvm { class SCEVZeroExtendExpr : public SCEVCastExpr { friend class ScalarEvolution; - SCEVZeroExtendExpr(const SCEV* op, const Type *ty); + SCEVZeroExtendExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty); public: - const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const { - const SCEV* H = Op->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - if (H == Op) - return this; - return SE.getZeroExtendExpr(H, Ty); - } - virtual void print(raw_ostream &OS) const; /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -170,18 +160,10 @@ namespace llvm { class SCEVSignExtendExpr : public SCEVCastExpr { friend class ScalarEvolution; - SCEVSignExtendExpr(const SCEV* op, const Type *ty); + SCEVSignExtendExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty); public: - const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const { - const SCEV* H = Op->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - if (H == Op) - return this; - return SE.getSignExtendExpr(H, Ty); - } - virtual void print(raw_ostream &OS) const; /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -198,22 +180,23 @@ namespace llvm { /// class SCEVNAryExpr : public SCEV { protected: - SmallVector Operands; + SmallVector Operands; - SCEVNAryExpr(enum SCEVTypes T, const SmallVectorImpl &ops) - : SCEV(T), Operands(ops.begin(), ops.end()) {} + SCEVNAryExpr(const FoldingSetNodeID &ID, + enum SCEVTypes T, const SmallVectorImpl &ops) + : SCEV(ID, T), Operands(ops.begin(), ops.end()) {} public: - virtual void Profile(FoldingSetNodeID &ID) const; - unsigned getNumOperands() const { return (unsigned)Operands.size(); } - const SCEV* getOperand(unsigned i) const { + const SCEV *getOperand(unsigned i) const { assert(i < Operands.size() && "Operand index out of range!"); return Operands[i]; } - const SmallVectorImpl &getOperands() const { return Operands; } - typedef SmallVectorImpl::const_iterator op_iterator; + const SmallVectorImpl &getOperands() const { + return Operands; + } + typedef SmallVectorImpl::const_iterator op_iterator; op_iterator op_begin() const { return Operands.begin(); } op_iterator op_end() const { return Operands.end(); } @@ -238,10 +221,28 @@ namespace llvm { return HasVarying; } + virtual bool hasOperand(const SCEV *O) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (O == getOperand(i) || getOperand(i)->hasOperand(O)) + return true; + return false; + } + bool dominates(BasicBlock *BB, DominatorTree *DT) const; + bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const; + virtual const Type *getType() const { return getOperand(0)->getType(); } + bool hasNoUnsignedWrap() const { return SubclassData & (1 << 0); } + void setHasNoUnsignedWrap(bool B) { + SubclassData = (SubclassData & ~(1 << 0)) | (B << 0); + } + bool hasNoSignedWrap() const { return SubclassData & (1 << 1); } + void setHasNoSignedWrap(bool B) { + SubclassData = (SubclassData & ~(1 << 1)) | (B << 1); + } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const SCEVNAryExpr *S) { return true; } static inline bool classof(const SCEV *S) { @@ -259,15 +260,12 @@ namespace llvm { /// class SCEVCommutativeExpr : public SCEVNAryExpr { protected: - SCEVCommutativeExpr(enum SCEVTypes T, - const SmallVectorImpl &ops) - : SCEVNAryExpr(T, ops) {} + SCEVCommutativeExpr(const FoldingSetNodeID &ID, + enum SCEVTypes T, + const SmallVectorImpl &ops) + : SCEVNAryExpr(ID, T, ops) {} public: - const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const; - virtual const char *getOperationStr() const = 0; virtual void print(raw_ostream &OS) const; @@ -289,8 +287,9 @@ namespace llvm { class SCEVAddExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; - explicit SCEVAddExpr(const SmallVectorImpl &ops) - : SCEVCommutativeExpr(scAddExpr, ops) { + SCEVAddExpr(const FoldingSetNodeID &ID, + const SmallVectorImpl &ops) + : SCEVCommutativeExpr(ID, scAddExpr, ops) { } public: @@ -309,8 +308,9 @@ namespace llvm { class SCEVMulExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; - explicit SCEVMulExpr(const SmallVectorImpl &ops) - : SCEVCommutativeExpr(scMulExpr, ops) { + SCEVMulExpr(const FoldingSetNodeID &ID, + const SmallVectorImpl &ops) + : SCEVCommutativeExpr(ID, scMulExpr, ops) { } public: @@ -330,16 +330,14 @@ namespace llvm { class SCEVUDivExpr : public SCEV { friend class ScalarEvolution; - const SCEV* LHS; - const SCEV* RHS; - SCEVUDivExpr(const SCEV* lhs, const SCEV* rhs) - : SCEV(scUDivExpr), LHS(lhs), RHS(rhs) {} + const SCEV *LHS; + const SCEV *RHS; + SCEVUDivExpr(const FoldingSetNodeID &ID, const SCEV *lhs, const SCEV *rhs) + : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {} public: - virtual void Profile(FoldingSetNodeID &ID) const; - - const SCEV* getLHS() const { return LHS; } - const SCEV* getRHS() const { return RHS; } + const SCEV *getLHS() const { return LHS; } + const SCEV *getRHS() const { return RHS; } virtual bool isLoopInvariant(const Loop *L) const { return LHS->isLoopInvariant(L) && RHS->isLoopInvariant(L); @@ -350,19 +348,14 @@ namespace llvm { RHS->hasComputableLoopEvolution(L); } - const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const { - const SCEV* L = LHS->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - const SCEV* R = RHS->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - if (L == LHS && R == RHS) - return this; - else - return SE.getUDivExpr(L, R); + virtual bool hasOperand(const SCEV *O) const { + return O == LHS || O == RHS || LHS->hasOperand(O) || RHS->hasOperand(O); } bool dominates(BasicBlock *BB, DominatorTree *DT) const; + bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const; + virtual const Type *getType() const; void print(raw_ostream &OS) const; @@ -389,25 +382,25 @@ namespace llvm { const Loop *L; - SCEVAddRecExpr(const SmallVectorImpl &ops, const Loop *l) - : SCEVNAryExpr(scAddRecExpr, ops), L(l) { + SCEVAddRecExpr(const FoldingSetNodeID &ID, + const SmallVectorImpl &ops, const Loop *l) + : SCEVNAryExpr(ID, scAddRecExpr, ops), L(l) { for (size_t i = 0, e = Operands.size(); i != e; ++i) assert(Operands[i]->isLoopInvariant(l) && "Operands of AddRec must be loop-invariant!"); } public: - virtual void Profile(FoldingSetNodeID &ID) const; - - const SCEV* getStart() const { return Operands[0]; } + const SCEV *getStart() const { return Operands[0]; } const Loop *getLoop() const { return L; } /// getStepRecurrence - This method constructs and returns the recurrence /// indicating how much this expression steps by. If this is a polynomial /// of degree N, it returns a chrec of degree N-1. - const SCEV* getStepRecurrence(ScalarEvolution &SE) const { + const SCEV *getStepRecurrence(ScalarEvolution &SE) const { if (isAffine()) return getOperand(1); - return SE.getAddRecExpr(SmallVector(op_begin()+1,op_end()), + return SE.getAddRecExpr(SmallVector(op_begin()+1, + op_end()), getLoop()); } @@ -435,7 +428,7 @@ namespace llvm { /// evaluateAtIteration - Return the value of this chain of recurrences at /// the specified iteration number. - const SCEV* evaluateAtIteration(const SCEV* It, ScalarEvolution &SE) const; + const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const; /// getNumIterationsInRange - Return the number of iterations of this loop /// that produce values in the specified constant range. Another way of @@ -443,12 +436,14 @@ namespace llvm { /// value is not in the condition, thus computing the exit count. If the /// iteration count can't be computed, an instance of SCEVCouldNotCompute is /// returned. - const SCEV* getNumIterationsInRange(ConstantRange Range, + const SCEV *getNumIterationsInRange(ConstantRange Range, ScalarEvolution &SE) const; - const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const; + /// getPostIncExpr - Return an expression representing the value of + /// this expression one iteration of the loop ahead. + const SCEVAddRecExpr *getPostIncExpr(ScalarEvolution &SE) const { + return cast(SE.getAddExpr(this, getStepRecurrence(SE))); + } virtual void print(raw_ostream &OS) const; @@ -466,8 +461,12 @@ namespace llvm { class SCEVSMaxExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; - explicit SCEVSMaxExpr(const SmallVectorImpl &ops) - : SCEVCommutativeExpr(scSMaxExpr, ops) { + SCEVSMaxExpr(const FoldingSetNodeID &ID, + const SmallVectorImpl &ops) + : SCEVCommutativeExpr(ID, scSMaxExpr, ops) { + // Max never overflows. + setHasNoUnsignedWrap(true); + setHasNoSignedWrap(true); } public: @@ -487,8 +486,12 @@ namespace llvm { class SCEVUMaxExpr : public SCEVCommutativeExpr { friend class ScalarEvolution; - explicit SCEVUMaxExpr(const SmallVectorImpl &ops) - : SCEVCommutativeExpr(scUMaxExpr, ops) { + SCEVUMaxExpr(const FoldingSetNodeID &ID, + const SmallVectorImpl &ops) + : SCEVCommutativeExpr(ID, scUMaxExpr, ops) { + // Max never overflows. + setHasNoUnsignedWrap(true); + setHasNoSignedWrap(true); } public: @@ -501,22 +504,108 @@ namespace llvm { } }; + //===--------------------------------------------------------------------===// + /// SCEVTargetDataConstant - This node is the base class for representing + /// target-dependent values in a target-independent way. + /// + class SCEVTargetDataConstant : public SCEV { + protected: + const Type *Ty; + SCEVTargetDataConstant(const FoldingSetNodeID &ID, enum SCEVTypes T, + const Type *ty) : + SCEV(ID, T), Ty(ty) {} + + public: + virtual bool isLoopInvariant(const Loop *) const { return true; } + virtual bool hasComputableLoopEvolution(const Loop *) const { + return false; // not computable + } + + virtual bool hasOperand(const SCEV *) const { + return false; + } + + bool dominates(BasicBlock *, DominatorTree *) const { + return true; + } + + bool properlyDominates(BasicBlock *, DominatorTree *) const { + return true; + } + + virtual const Type *getType() const { return Ty; } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEVTargetDataConstant *S) { return true; } + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scFieldOffset || + S->getSCEVType() == scAllocSize; + } + }; + + //===--------------------------------------------------------------------===// + /// SCEVFieldOffsetExpr - This node represents an offsetof expression. + /// + class SCEVFieldOffsetExpr : public SCEVTargetDataConstant { + friend class ScalarEvolution; + + const StructType *STy; + unsigned FieldNo; + SCEVFieldOffsetExpr(const FoldingSetNodeID &ID, const Type *ty, + const StructType *sty, unsigned fieldno) : + SCEVTargetDataConstant(ID, scFieldOffset, ty), + STy(sty), FieldNo(fieldno) {} + + public: + const StructType *getStructType() const { return STy; } + unsigned getFieldNo() const { return FieldNo; } + + virtual void print(raw_ostream &OS) const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEVFieldOffsetExpr *S) { return true; } + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scFieldOffset; + } + }; + + //===--------------------------------------------------------------------===// + /// SCEVAllocSize - This node represents a sizeof expression. + /// + class SCEVAllocSizeExpr : public SCEVTargetDataConstant { + friend class ScalarEvolution; + + const Type *AllocTy; + SCEVAllocSizeExpr(const FoldingSetNodeID &ID, + const Type *ty, const Type *allocty) : + SCEVTargetDataConstant(ID, scAllocSize, ty), + AllocTy(allocty) {} + + public: + const Type *getAllocType() const { return AllocTy; } + + virtual void print(raw_ostream &OS) const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SCEVAllocSizeExpr *S) { return true; } + static inline bool classof(const SCEV *S) { + return S->getSCEVType() == scAllocSize; + } + }; //===--------------------------------------------------------------------===// /// SCEVUnknown - This means that we are dealing with an entirely unknown SCEV - /// value, and only represent it as it's LLVM Value. This is the "bottom" + /// value, and only represent it as its LLVM Value. This is the "bottom" /// value for the analysis. /// class SCEVUnknown : public SCEV { friend class ScalarEvolution; Value *V; - explicit SCEVUnknown(Value *v) : - SCEV(scUnknown), V(v) {} - - public: - virtual void Profile(FoldingSetNodeID &ID) const; + SCEVUnknown(const FoldingSetNodeID &ID, Value *v) : + SCEV(ID, scUnknown), V(v) {} + public: Value *getValue() const { return V; } virtual bool isLoopInvariant(const Loop *L) const; @@ -524,15 +613,14 @@ namespace llvm { return false; // not computable } - const SCEV* replaceSymbolicValuesWithConcrete(const SCEV* Sym, - const SCEV* Conc, - ScalarEvolution &SE) const { - if (&*Sym == this) return Conc; - return this; + virtual bool hasOperand(const SCEV *) const { + return false; } bool dominates(BasicBlock *BB, DominatorTree *DT) const; + bool properlyDominates(BasicBlock *BB, DominatorTree *DT) const; + virtual const Type *getType() const; virtual void print(raw_ostream &OS) const; @@ -570,19 +658,21 @@ namespace llvm { return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S); case scUMaxExpr: return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S); + case scFieldOffset: + return ((SC*)this)->visitFieldOffsetExpr((const SCEVFieldOffsetExpr*)S); + case scAllocSize: + return ((SC*)this)->visitAllocSizeExpr((const SCEVAllocSizeExpr*)S); case scUnknown: return ((SC*)this)->visitUnknown((const SCEVUnknown*)S); case scCouldNotCompute: return ((SC*)this)->visitCouldNotCompute((const SCEVCouldNotCompute*)S); default: - assert(0 && "Unknown SCEV type!"); - abort(); + llvm_unreachable("Unknown SCEV type!"); } } RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S) { - assert(0 && "Invalid use of SCEVCouldNotCompute!"); - abort(); + llvm_unreachable("Invalid use of SCEVCouldNotCompute!"); return RetVal(); } }; diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h index c75531a7e6e0f..820e1bd1e436b 100644 --- a/include/llvm/Analysis/SparsePropagation.h +++ b/include/llvm/Analysis/SparsePropagation.h @@ -17,7 +17,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" -#include #include #include @@ -31,6 +30,8 @@ namespace llvm { class BasicBlock; class Function; class SparseSolver; + class LLVMContext; + class raw_ostream; template class SmallVectorImpl; @@ -71,6 +72,12 @@ public: virtual LatticeVal ComputeConstant(Constant *C) { return getOverdefinedVal(); // always safe } + + /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is + /// one that the we want to handle through ComputeInstructionState. + virtual bool IsSpecialCasedPHI(PHINode *PN) { + return false; + } /// GetConstant - If the specified lattice value is representable as an LLVM /// constant value, return it. Otherwise return null. The returned value @@ -99,7 +106,7 @@ public: } /// PrintValue - Render the specified lattice value to the specified stream. - virtual void PrintValue(LatticeVal V, std::ostream &OS); + virtual void PrintValue(LatticeVal V, raw_ostream &OS); }; @@ -113,6 +120,8 @@ class SparseSolver { /// compute transfer functions. AbstractLatticeFunction *LatticeFunc; + LLVMContext *Context; + DenseMap ValueState; // The state each value is in. SmallPtrSet BBExecutable; // The bbs that are executable. @@ -128,8 +137,8 @@ class SparseSolver { SparseSolver(const SparseSolver&); // DO NOT IMPLEMENT void operator=(const SparseSolver&); // DO NOT IMPLEMENT public: - explicit SparseSolver(AbstractLatticeFunction *Lattice) - : LatticeFunc(Lattice) {} + explicit SparseSolver(AbstractLatticeFunction *Lattice, LLVMContext *C) + : LatticeFunc(Lattice), Context(C) {} ~SparseSolver() { delete LatticeFunc; } @@ -138,7 +147,7 @@ public: /// void Solve(Function &F); - void Print(Function &F, std::ostream &OS) const; + void Print(Function &F, raw_ostream &OS) const; /// getLatticeState - Return the LatticeVal object that corresponds to the /// value. If an value is not in the map, it is returned as untracked, diff --git a/include/llvm/Analysis/Trace.h b/include/llvm/Analysis/Trace.h index fd615fcdae086..99651e192d3b5 100644 --- a/include/llvm/Analysis/Trace.h +++ b/include/llvm/Analysis/Trace.h @@ -18,7 +18,6 @@ #ifndef LLVM_ANALYSIS_TRACE_H #define LLVM_ANALYSIS_TRACE_H -#include "llvm/Support/Streams.h" #include #include @@ -26,6 +25,7 @@ namespace llvm { class BasicBlock; class Function; class Module; + class raw_ostream; class Trace { typedef std::vector BasicBlockListType; @@ -106,13 +106,12 @@ public: /// print - Write trace to output stream. /// - void print (std::ostream &O) const; - void print (std::ostream *O) const { if (O) print(*O); } + void print(raw_ostream &O) const; /// dump - Debugger convenience method; writes trace to standard error /// output stream. /// - void dump () const; + void dump() const; }; } // end namespace llvm diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 5f5f77a5c9fe7..212b5d1da5f5b 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -23,20 +23,33 @@ namespace llvm { class Instruction; class APInt; class TargetData; + class LLVMContext; /// ComputeMaskedBits - Determine which of the bits specified in Mask are /// known to be either zero or one and return them in the KnownZero/KnownOne /// bit sets. This code only analyzes bits in Mask, in order to short-circuit /// processing. + /// + /// This function is defined on values with integer type, values with pointer + /// type (but only if TD is non-null), and vectors of integers. In the case + /// where V is a vector, the mask, known zero, and known one values are the + /// same width as the vector element, and the bit is set only if it is true + /// for all of the elements in the vector. void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, - APInt &KnownOne, TargetData *TD = 0, + APInt &KnownOne, const TargetData *TD = 0, unsigned Depth = 0); /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be /// zero for bits that V cannot have. + /// + /// This function is defined on values with integer type, values with pointer + /// type (but only if TD is non-null), and vectors of integers. In the case + /// where V is a vector, the mask, known zero, and known one values are the + /// same width as the vector element, and the bit is set only if it is true + /// for all of the elements in the vector. bool MaskedValueIsZero(Value *V, const APInt &Mask, - TargetData *TD = 0, unsigned Depth = 0); + const TargetData *TD = 0, unsigned Depth = 0); /// ComputeNumSignBits - Return the number of times the sign bit of the @@ -47,7 +60,7 @@ namespace llvm { /// /// 'Op' must have a scalar integer type. /// - unsigned ComputeNumSignBits(Value *Op, TargetData *TD = 0, + unsigned ComputeNumSignBits(Value *Op, const TargetData *TD = 0, unsigned Depth = 0); /// CannotBeNegativeZero - Return true if we can prove that the specified FP @@ -64,14 +77,16 @@ namespace llvm { Value *FindInsertedValue(Value *V, const unsigned *idx_begin, const unsigned *idx_end, + LLVMContext &Context, Instruction *InsertBefore = 0); /// This is a convenience wrapper for finding values indexed by a single index /// only. inline Value *FindInsertedValue(Value *V, const unsigned Idx, + LLVMContext &Context, Instruction *InsertBefore = 0) { const unsigned Idxs[1] = { Idx }; - return FindInsertedValue(V, &Idxs[0], &Idxs[1], InsertBefore); + return FindInsertedValue(V, &Idxs[0], &Idxs[1], Context, InsertBefore); } /// GetConstantStringInfo - This function computes the length of a diff --git a/include/llvm/Argument.h b/include/llvm/Argument.h index 9c06367798498..3a846c28994dc 100644 --- a/include/llvm/Argument.h +++ b/include/llvm/Argument.h @@ -38,8 +38,7 @@ public: /// Argument ctor - If Function argument is specified, this argument is /// inserted at the end of the argument list for the function. /// - explicit Argument(const Type *Ty, const std::string &Name = "", - Function *F = 0); + explicit Argument(const Type *Ty, const Twine &Name = "", Function *F = 0); inline const Function *getParent() const { return Parent; } inline Function *getParent() { return Parent; } diff --git a/include/llvm/Assembly/Parser.h b/include/llvm/Assembly/Parser.h index 966abaaa2067a..82ec6d81367bf 100644 --- a/include/llvm/Assembly/Parser.h +++ b/include/llvm/Assembly/Parser.h @@ -19,6 +19,7 @@ namespace llvm { class Module; +class MemoryBuffer; class SMDiagnostic; class raw_ostream; class LLVMContext; @@ -48,6 +49,17 @@ Module *ParseAssemblyString( LLVMContext &Context ); +/// This function is the low-level interface to the LLVM Assembly Parser. +/// ParseAssemblyFile and ParseAssemblyString are wrappers around this function. +/// @brief Parse LLVM Assembly from a MemoryBuffer. This function *always* +/// takes ownership of the MemoryBuffer. +Module *ParseAssembly( + MemoryBuffer *F, ///< The MemoryBuffer containing assembly + Module *M, ///< A module to add the assembly too. + SMDiagnostic &Err, ///< Error result info. + LLVMContext &Context +); + } // End llvm namespace #endif diff --git a/include/llvm/Assembly/Writer.h b/include/llvm/Assembly/Writer.h index 5e5fe1560585d..c5b239079a0d0 100644 --- a/include/llvm/Assembly/Writer.h +++ b/include/llvm/Assembly/Writer.h @@ -17,7 +17,6 @@ #ifndef LLVM_ASSEMBLY_WRITER_H #define LLVM_ASSEMBLY_WRITER_H -#include #include namespace llvm { @@ -71,8 +70,6 @@ void WriteTypeSymbolic(raw_ostream &, const Type *, const Module *M); // then even constants get pretty-printed; for example, the type of a null // pointer is printed symbolically. // -void WriteAsOperand(std::ostream &, const Value *, bool PrintTy = true, - const Module *Context = 0); void WriteAsOperand(raw_ostream &, const Value *, bool PrintTy = true, const Module *Context = 0); diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h index 134e350202853..0bbdc349b1b43 100644 --- a/include/llvm/Attributes.h +++ b/include/llvm/Attributes.h @@ -57,6 +57,8 @@ const Attributes NoCapture = 1<<21; ///< Function creates no aliases of pointer const Attributes NoRedZone = 1<<22; /// disable redzone const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point /// instructions. +const Attributes Naked = 1<<24; ///< Naked function +const Attributes InlineHint = 1<<25; ///< source said inlining was desirable /// @brief Attributes that only apply to function parameters. const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture; @@ -65,7 +67,7 @@ const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture; /// be used on return values or function parameters. const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly | NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq | - NoRedZone | NoImplicitFloat; + NoRedZone | NoImplicitFloat | Naked | InlineHint; /// @brief Parameter attributes that do not apply to vararg call arguments. const Attributes VarArgsIncompatible = StructRet; diff --git a/include/llvm/AutoUpgrade.h b/include/llvm/AutoUpgrade.h index f61bd1a08d32b..0a81c807956f6 100644 --- a/include/llvm/AutoUpgrade.h +++ b/include/llvm/AutoUpgrade.h @@ -15,6 +15,7 @@ #define LLVM_AUTOUPGRADE_H namespace llvm { + class Module; class Function; class CallInst; @@ -34,6 +35,9 @@ namespace llvm { /// so that it can update all calls to the old function. void UpgradeCallsToIntrinsic(Function* F); + /// This function checks debug info intrinsics. If an intrinsic is invalid + /// then this function simply removes the intrinsic. + void CheckDebugInfoIntrinsics(Module *M); } // End llvm namespace #endif diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h index 072f6152ea7d8..b497827fd6950 100644 --- a/include/llvm/BasicBlock.h +++ b/include/llvm/BasicBlock.h @@ -22,6 +22,7 @@ namespace llvm { class TerminatorInst; +class LLVMContext; template<> struct ilist_traits : public SymbolTableListTraits { @@ -46,7 +47,7 @@ template<> struct ilist_traits Instruction *ensureHead(Instruction*) const { return createSentinel(); } static void noteHead(Instruction*, Instruction*) {} private: - mutable ilist_node Sentinel; + mutable ilist_half_node Sentinel; }; /// This represents a single basic block in LLVM. A basic block is simply a @@ -82,9 +83,12 @@ private: /// is automatically inserted at either the end of the function (if /// InsertBefore is null), or before the specified basic block. /// - explicit BasicBlock(const std::string &Name = "", Function *Parent = 0, - BasicBlock *InsertBefore = 0); + explicit BasicBlock(LLVMContext &C, const Twine &Name = "", + Function *Parent = 0, BasicBlock *InsertBefore = 0); public: + /// getContext - Get the context in which this basic block lives. + LLVMContext &getContext() const; + /// Instruction iterators... typedef InstListType::iterator iterator; typedef InstListType::const_iterator const_iterator; @@ -92,9 +96,9 @@ public: /// Create - Creates a new BasicBlock. If the Parent parameter is specified, /// the basic block is automatically inserted at either the end of the /// function (if InsertBefore is 0), or before the specified basic block. - static BasicBlock *Create(const std::string &Name = "", Function *Parent = 0, - BasicBlock *InsertBefore = 0) { - return new BasicBlock(Name, Parent, InsertBefore); + static BasicBlock *Create(LLVMContext &Context, const Twine &Name = "", + Function *Parent = 0,BasicBlock *InsertBefore = 0) { + return new BasicBlock(Context, Name, Parent, InsertBefore); } ~BasicBlock(); @@ -227,7 +231,10 @@ public: /// cause a degenerate basic block to be formed, having a terminator inside of /// the basic block). /// - BasicBlock *splitBasicBlock(iterator I, const std::string &BBName = ""); + /// Also note that this doesn't preserve any passes. To split blocks while + /// keeping loop information consistent, use the SplitBlock utility function. + /// + BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = ""); }; } // End llvm namespace diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h index 13583c0d5ecd6..e19e4c09ce329 100644 --- a/include/llvm/Bitcode/Archive.h +++ b/include/llvm/Bitcode/Archive.h @@ -22,7 +22,6 @@ #include "llvm/System/Path.h" #include #include -#include namespace llvm { class MemoryBuffer; diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h index 28249eec0b029..779ef5fa2d839 100644 --- a/include/llvm/Bitcode/BitstreamReader.h +++ b/include/llvm/Bitcode/BitstreamReader.h @@ -17,6 +17,7 @@ #include "llvm/Bitcode/BitCodes.h" #include +#include #include namespace llvm { @@ -260,6 +261,7 @@ public: uint32_t Read(unsigned NumBits) { + assert(NumBits <= 32 && "Cannot return more than 32 bits!"); // If the field is fully contained by CurWord, return it quickly. if (BitsInCurWord >= NumBits) { uint32_t R = CurWord & ((1U << NumBits)-1); @@ -322,17 +324,19 @@ public: } } + // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The + // chunk size of the VBR must still be <= 32 bits though. uint64_t ReadVBR64(unsigned NumBits) { - uint64_t Piece = Read(NumBits); - if ((Piece & (uint64_t(1) << (NumBits-1))) == 0) - return Piece; + uint32_t Piece = Read(NumBits); + if ((Piece & (1U << (NumBits-1))) == 0) + return uint64_t(Piece); uint64_t Result = 0; unsigned NextBit = 0; while (1) { - Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; + Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; - if ((Piece & (uint64_t(1) << (NumBits-1))) == 0) + if ((Piece & (1U << (NumBits-1))) == 0) return Result; NextBit += NumBits-1; diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index 55dd4dd49b0f8..e48a190833655 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -15,6 +15,7 @@ #ifndef BITSTREAM_WRITER_H #define BITSTREAM_WRITER_H +#include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitCodes.h" #include @@ -293,7 +294,9 @@ private: /// known to exist at the end of the the record. template void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl &Vals, - const char *BlobData, unsigned BlobLen) { + const StringRef &Blob) { + const char *BlobData = Blob.data(); + unsigned BlobLen = (unsigned) Blob.size(); unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV; assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo]; @@ -409,7 +412,7 @@ public: /// the first entry. template void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl &Vals) { - EmitRecordWithAbbrevImpl(Abbrev, Vals, 0, 0); + EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef()); } /// EmitRecordWithBlob - Emit the specified record to the stream, using an @@ -418,17 +421,28 @@ public: /// EmitRecord, this routine expects that the first entry in Vals is the code /// of the record. template + void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl &Vals, + const StringRef &Blob) { + EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob); + } + template void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl &Vals, const char *BlobData, unsigned BlobLen) { - EmitRecordWithAbbrevImpl(Abbrev, Vals, BlobData, BlobLen); + return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen)); } /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records /// that end with an array. template + void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl &Vals, + const StringRef &Array) { + EmitRecordWithAbbrevImpl(Abbrev, Vals, Array); + } + template void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl &Vals, const char *ArrayData, unsigned ArrayLen) { - EmitRecordWithAbbrevImpl(Abbrev, Vals, ArrayData, ArrayLen); + return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, + ArrayLen)); } //===--------------------------------------------------------------------===// diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 1ede69dee1610..dccd8e035d795 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -33,7 +33,9 @@ namespace bitc { CONSTANTS_BLOCK_ID, FUNCTION_BLOCK_ID, TYPE_SYMTAB_BLOCK_ID, - VALUE_SYMTAB_BLOCK_ID + VALUE_SYMTAB_BLOCK_ID, + METADATA_BLOCK_ID, + METADATA_ATTACHMENT_ID }; @@ -106,6 +108,14 @@ namespace bitc { VST_CODE_BBENTRY = 2 // VST_BBENTRY: [bbid, namechar x N] }; + enum MetadataCodes { + METADATA_STRING = 1, // MDSTRING: [values] + METADATA_NODE = 2, // MDNODE: [n x (type num, value num)] + METADATA_NAME = 3, // STRING: [values] + METADATA_NAMED_NODE = 4, // NAMEDMDNODE: [n x mdnodes] + METADATA_KIND = 5, // [n x [id, name]] + METADATA_ATTACHMENT = 6 // [m x [value, [n x [id, mdnode]]] + }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each // constant and maintains an implicit current type value. enum ConstantsCodes { @@ -128,8 +138,7 @@ namespace bitc { CST_CODE_CE_CMP = 17, // CE_CMP: [opty, opval, opval, pred] CST_CODE_INLINEASM = 18, // INLINEASM: [sideeffect,asmstr,conststr] CST_CODE_CE_SHUFVEC_EX = 19, // SHUFVEC_EX: [opty, opval, opval, opval] - CST_CODE_MDSTRING = 20, // MDSTRING: [values] - CST_CODE_MDNODE = 21 // MDNODE: [n x (type num, value num)] + CST_CODE_CE_INBOUNDS_GEP = 20 // INBOUNDS_GEP: [n x operands] }; /// CastOpcodes - These are values used in the bitcode files to encode which @@ -171,6 +180,18 @@ namespace bitc { BINOP_XOR = 12 }; + /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing + /// OverflowingBinaryOperator's SubclassOptionalData contents. + enum OverflowingBinaryOperatorOptionalFlags { + OBO_NO_UNSIGNED_WRAP = 0, + OBO_NO_SIGNED_WRAP = 1 + }; + + /// SDivOperatorOptionalFlags - Flags for serializing SDivOperator's + /// SubclassOptionalData contents. + enum SDivOperatorOptionalFlags { + SDIV_EXACT = 0 + }; // The function body block (FUNCTION_BLOCK_ID) describes function bodies. It // can contain a constant block (CONSTANTS_BLOCK_ID). @@ -210,10 +231,12 @@ namespace bitc { FUNC_CODE_INST_GETRESULT = 25, // GETRESULT: [ty, opval, n] FUNC_CODE_INST_EXTRACTVAL = 26, // EXTRACTVAL: [n x operands] FUNC_CODE_INST_INSERTVAL = 27, // INSERTVAL: [n x operands] - // fcmp/icmp returning Int1TY or vector of Int1Ty, NOT for vicmp/vfcmp + // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to + // support legacy vicmp/vfcmp instructions. FUNC_CODE_INST_CMP2 = 28, // CMP2: [opty, opval, opval, pred] // new select on i1 or [N x i1] - FUNC_CODE_INST_VSELECT = 29 // VSELECT: [ty,opval,opval,predty,pred] + FUNC_CODE_INST_VSELECT = 29, // VSELECT: [ty,opval,opval,predty,pred] + FUNC_CODE_INST_INBOUNDS_GEP = 30 // INBOUNDS_GEP: [n x operands] }; } // End bitc namespace } // End llvm namespace diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h index 3d33d75a06a19..7b74bdf76ba7f 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -14,7 +14,6 @@ #ifndef LLVM_BITCODE_H #define LLVM_BITCODE_H -#include #include namespace llvm { @@ -41,10 +40,6 @@ namespace llvm { Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, std::string *ErrMsg = 0); - /// WriteBitcodeToFile - Write the specified module to the specified output - /// stream. - void WriteBitcodeToFile(const Module *M, std::ostream &Out); - /// WriteBitcodeToFile - Write the specified module to the specified /// raw output stream. void WriteBitcodeToFile(const Module *M, raw_ostream &Out); @@ -53,23 +48,48 @@ namespace llvm { /// raw output stream. void WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream); - /// CreateBitcodeWriterPass - Create and return a pass that writes the module - /// to the specified ostream. - ModulePass *CreateBitcodeWriterPass(std::ostream &Str); - /// createBitcodeWriterPass - Create and return a pass that writes the module /// to the specified ostream. ModulePass *createBitcodeWriterPass(raw_ostream &Str); - /// isBitcodeWrapper - Return true fi this is a wrapper for LLVM IR bitcode - /// files. - static bool inline isBitcodeWrapper(unsigned char *BufPtr, - unsigned char *BufEnd) { - return (BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && - BufPtr[2] == 0x17 && BufPtr[3] == 0x0B); + /// isBitcodeWrapper - Return true if the given bytes are the magic bytes + /// for an LLVM IR bitcode wrapper. + /// + static inline bool isBitcodeWrapper(const unsigned char *BufPtr, + const unsigned char *BufEnd) { + // See if you can find the hidden message in the magic bytes :-). + // (Hint: it's a little-endian encoding.) + return BufPtr != BufEnd && + BufPtr[0] == 0xDE && + BufPtr[1] == 0xC0 && + BufPtr[2] == 0x17 && + BufPtr[3] == 0x0B; } - + + /// isRawBitcode - Return true if the given bytes are the magic bytes for + /// raw LLVM IR bitcode (without a wrapper). + /// + static inline bool isRawBitcode(const unsigned char *BufPtr, + const unsigned char *BufEnd) { + // These bytes sort of have a hidden message, but it's not in + // little-endian this time, and it's a little redundant. + return BufPtr != BufEnd && + BufPtr[0] == 'B' && + BufPtr[1] == 'C' && + BufPtr[2] == 0xc0 && + BufPtr[3] == 0xde; + } + + /// isBitcode - Return true if the given bytes are the magic bytes for + /// LLVM IR bitcode, either with or without a wrapper. + /// + static bool inline isBitcode(const unsigned char *BufPtr, + const unsigned char *BufEnd) { + return isBitcodeWrapper(BufPtr, BufEnd) || + isRawBitcode(BufPtr, BufEnd); + } + /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special /// header for padding or other reasons. The format of this header is: /// diff --git a/include/llvm/CallGraphSCCPass.h b/include/llvm/CallGraphSCCPass.h index d5ff17cf247c0..fc9feda5bd8c2 100644 --- a/include/llvm/CallGraphSCCPass.h +++ b/include/llvm/CallGraphSCCPass.h @@ -22,6 +22,7 @@ #define LLVM_CALL_GRAPH_SCC_PASS_H #include "llvm/Pass.h" +#include "llvm/Analysis/CallGraph.h" namespace llvm { @@ -45,7 +46,10 @@ struct CallGraphSCCPass : public Pass { /// non-recursive (or only self-recursive) functions will have an SCC size of /// 1, where recursive portions of the call graph will have SCC size > 1. /// - virtual bool runOnSCC(const std::vector &SCC) = 0; + /// SCC passes that add or delete functions to the SCC are required to update + /// the SCC list, otherwise stale pointers may be dereferenced. + /// + virtual bool runOnSCC(std::vector &SCC) = 0; /// doFinalization - This method is called after the SCC's of the program has /// been processed, allowing the pass to do final cleanup as necessary. diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index ef609e4efefe1..62d0679fb7387 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -16,30 +16,43 @@ #ifndef LLVM_CODEGEN_ASMPRINTER_H #define LLVM_CODEGEN_ASMPRINTER_H -#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/Support/DebugLoc.h" #include "llvm/Target/TargetMachine.h" -#include +#include "llvm/ADT/DenseMap.h" namespace llvm { class GCStrategy; class Constant; class ConstantArray; + class ConstantFP; class ConstantInt; class ConstantStruct; class ConstantVector; class GCMetadataPrinter; + class GlobalValue; class GlobalVariable; + class MachineBasicBlock; + class MachineFunction; + class MachineInstr; + class MachineLoopInfo; + class MachineLoop; + class MachineConstantPool; class MachineConstantPoolEntry; class MachineConstantPoolValue; + class MachineJumpTableInfo; class MachineModuleInfo; + class MCInst; + class MCContext; + class MCSection; + class MCStreamer; + class MCSymbol; class DwarfWriter; class Mangler; - class Section; - class TargetAsmInfo; + class MCAsmInfo; + class TargetLoweringObjectFile; class Type; - class raw_ostream; + class formatted_raw_ostream; /// AsmPrinter - This class is intended to be used as a driving class for all /// asm writers. @@ -57,31 +70,51 @@ namespace llvm { typedef DenseMap gcp_map_type; typedef gcp_map_type::iterator gcp_iterator; gcp_map_type GCMetadataPrinters; - - protected: + + /// If VerboseAsm is set, a pointer to the loop info for this + /// function. + /// + MachineLoopInfo *LI; + + public: /// MMI - If available, this is a pointer to the current MachineModuleInfo. MachineModuleInfo *MMI; + protected: /// DW - If available, this is a pointer to the current dwarf writer. DwarfWriter *DW; - + public: /// Output stream on which we're printing assembly code. /// - raw_ostream &O; + formatted_raw_ostream &O; /// Target machine description. /// TargetMachine &TM; + /// getObjFileLowering - Return information about object file lowering. + TargetLoweringObjectFile &getObjFileLowering() const; + /// Target Asm Printer information. /// - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; /// Target Register Information. /// const TargetRegisterInfo *TRI; + /// OutContext - This is the context for the output file that we are + /// streaming. This owns all of the global MC-related objects for the + /// generated translation unit. + MCContext &OutContext; + + /// OutStreamer - This is the MCStreamer object for the file we are + /// generating. This contains the transient state for the current + /// translation unit that we are generating (such as the current section + /// etc). + MCStreamer &OutStreamer; + /// The current machine function. const MachineFunction *MF; @@ -94,14 +127,9 @@ namespace llvm { /// std::string CurrentFnName; - /// CurrentSection - The current section we are emitting to. This is - /// controlled and used by the SwitchSection method. - std::string CurrentSection; - const Section* CurrentSection_; - - /// IsInTextSection - True if the current section we are emitting to is a - /// text section. - bool IsInTextSection; + /// getCurrentSection() - Return the current section we are emitting to. + const MCSection *getCurrentSection() const; + /// VerboseAsm - Emit comments in assembly output if this is true. /// @@ -113,12 +141,12 @@ namespace llvm { mutable const Function *LastFn; mutable unsigned Counter; - // Private state for processDebugLock() + // Private state for processDebugLoc() mutable DebugLocTuple PrevDLT; protected: - explicit AsmPrinter(raw_ostream &o, TargetMachine &TM, - const TargetAsmInfo *T, bool V); + explicit AsmPrinter(formatted_raw_ostream &o, TargetMachine &TM, + const MCAsmInfo *T, bool V); public: virtual ~AsmPrinter(); @@ -127,54 +155,10 @@ namespace llvm { /// bool isVerbose() const { return VerboseAsm; } - /// SwitchToTextSection - Switch to the specified section of the executable - /// if we are not already in it! If GV is non-null and if the global has an - /// explicitly requested section, we switch to the section indicated for the - /// global instead of NewSection. - /// - /// If the new section is an empty string, this method forgets what the - /// current section is, but does not emit a .section directive. - /// - /// This method is used when about to emit executable code. - /// - void SwitchToTextSection(const char *NewSection, - const GlobalValue *GV = NULL); - - /// SwitchToDataSection - Switch to the specified section of the executable - /// if we are not already in it! If GV is non-null and if the global has an - /// explicitly requested section, we switch to the section indicated for the - /// global instead of NewSection. - /// - /// If the new section is an empty string, this method forgets what the - /// current section is, but does not emit a .section directive. - /// - /// This method is used when about to emit data. For most assemblers, this - /// is the same as the SwitchToTextSection method, but not all assemblers - /// are the same. + /// getFunctionNumber - Return a unique ID for the current function. /// - void SwitchToDataSection(const char *NewSection, - const GlobalValue *GV = NULL); - - /// SwitchToSection - Switch to the specified section of the executable if - /// we are not already in it! - void SwitchToSection(const Section* NS); - - /// getGlobalLinkName - Returns the asm/link name of of the specified - /// global variable. Should be overridden by each target asm printer to - /// generate the appropriate value. - virtual const std::string &getGlobalLinkName(const GlobalVariable *GV, - std::string &LinkName) const; - - /// EmitExternalGlobal - Emit the external reference to a global variable. - /// Should be overridden if an indirect reference should be used. - virtual void EmitExternalGlobal(const GlobalVariable *GV); - - /// getCurrentFunctionEHName - Called to return (and cache) the - /// CurrentFnEHName. - /// - const std::string &getCurrentFunctionEHName(const MachineFunction *MF, - std::string &FuncEHName) const; - + unsigned getFunctionNumber() const { return FunctionNumber; } + protected: /// getAnalysisUsage - Record analysis usage. /// @@ -185,6 +169,14 @@ namespace llvm { /// call this implementation. bool doInitialization(Module &M); + /// EmitStartOfAsmFile - This virtual method can be overridden by targets + /// that want to emit something at the start of their file. + virtual void EmitStartOfAsmFile(Module &M) {} + + /// EmitEndOfAsmFile - This virtual method can be overridden by targets that + /// want to emit something at the end of their file. + virtual void EmitEndOfAsmFile(Module &M) {} + /// doFinalization - Shut down the asmprinter. If you override this in your /// pass, you must make sure to call it explicitly. bool doFinalization(Module &M); @@ -212,14 +204,14 @@ namespace llvm { unsigned AsmVariant, const char *ExtraCode); + /// PrintGlobalVariable - Emit the specified global variable and its + /// initializer to the output stream. + virtual void PrintGlobalVariable(const GlobalVariable *GV) = 0; + /// SetupMachineFunction - This should be called when a new MachineFunction /// is being processed from runOnMachineFunction. void SetupMachineFunction(MachineFunction &MF); - /// getFunctionNumber - Return a unique ID for the current function. - /// - unsigned getFunctionNumber() const { return FunctionNumber; } - /// IncrementFunctionNumber - Increase Function Number. AsmPrinters should /// not normally call this, as the counter is automatically bumped by /// SetupMachineFunction. @@ -241,7 +233,7 @@ namespace llvm { /// special global used by LLVM. If so, emit it and return true, otherwise /// do nothing and return false. bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); - + public: //===------------------------------------------------------------------===// /// LEB 128 number encoding. @@ -267,7 +259,8 @@ namespace llvm { void EOL() const; void EOL(const std::string &Comment) const; void EOL(const char* Comment) const; - + void EOL(const char *Comment, unsigned Encoding) const; + /// EmitULEB128Bytes - Emit an assembler byte data directive to compose an /// unsigned leb128 value. void EmitULEB128Bytes(unsigned Value) const; @@ -332,6 +325,19 @@ namespace llvm { /// debug tables. void printDeclare(const MachineInstr *MI) const; + /// EmitComments - Pretty-print comments for instructions + void EmitComments(const MachineInstr &MI) const; + /// EmitComments - Pretty-print comments for basic blocks + void EmitComments(const MachineBasicBlock &MBB) const; + + /// GetMBBSymbol - Return the MCSymbol corresponding to the specified basic + /// block label. + MCSymbol *GetMBBSymbol(unsigned MBBID) const; + + /// EmitBasicBlockStart - This method prints the label for the specified + /// MachineBasicBlock, an alignment (if present) and a comment describing + /// it if appropriate. + void EmitBasicBlockStart(const MachineBasicBlock *MBB) const; protected: /// EmitZeros - Emit a block of zeros. /// @@ -351,8 +357,8 @@ namespace llvm { virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); /// processDebugLoc - Processes the debug information of each machine - /// instruction's DebugLoc. - void processDebugLoc(DebugLoc DL); + /// instruction's DebugLoc. + void processDebugLoc(const MachineInstr *MI, bool BeforePrintingInsn); /// printInlineAsm - This method formats and prints the specified machine /// instruction that is an inline asm. @@ -362,13 +368,7 @@ namespace llvm { /// that is an implicit def. virtual void printImplicitDef(const MachineInstr *MI) const; - /// printBasicBlockLabel - This method prints the label for the specified - /// MachineBasicBlock - virtual void printBasicBlockLabel(const MachineBasicBlock *MBB, - bool printAlign = false, - bool printColon = false, - bool printComment = true) const; - + /// printPICJumpTableSetLabel - This method prints a set label for the /// specified MachineBasicBlock for a jumptable entry. virtual void printPICJumpTableSetLabel(unsigned uid, @@ -383,22 +383,14 @@ namespace llvm { /// specified type. void printDataDirective(const Type *type, unsigned AddrSpace = 0); - /// printSuffixedName - This prints a name with preceding - /// getPrivateGlobalPrefix and the specified suffix, handling quoted names - /// correctly. - void printSuffixedName(const char *Name, const char *Suffix, - const char *Prefix = 0); - void printSuffixedName(const std::string &Name, const char* Suffix); - /// printVisibility - This prints visibility information about symbol, if /// this is suported by the target. void printVisibility(const std::string& Name, unsigned Visibility) const; /// printOffset - This is just convenient handler for printing offsets. void printOffset(int64_t Offset) const; - + private: - const GlobalValue *findGlobalValue(const Constant* CV); void EmitLLVMUsedList(Constant *List); void EmitXXStructorList(Constant *List); void EmitGlobalConstantStruct(const ConstantStruct* CVS, diff --git a/include/llvm/CodeGen/BinaryObject.h b/include/llvm/CodeGen/BinaryObject.h index ce0c07af9515f..2d4bd73a82eb9 100644 --- a/include/llvm/CodeGen/BinaryObject.h +++ b/include/llvm/CodeGen/BinaryObject.h @@ -68,6 +68,13 @@ public: return !Relocations.empty(); } + /// emitZeros - This callback is invoked to emit a arbitrary number + /// of zero bytes to the data stream. + inline void emitZeros(unsigned Size) { + for (unsigned i=0; i < Size; ++i) + emitByte(0); + } + /// emitByte - This callback is invoked when a byte needs to be /// written to the data stream. inline void emitByte(uint8_t B) { @@ -86,15 +93,15 @@ public: /// emitWord16LE - This callback is invoked when a 16-bit word needs to be /// written to the data stream in correct endian format and correct size. inline void emitWord16LE(uint16_t W) { - Data.push_back((W >> 0) & 255); - Data.push_back((W >> 8) & 255); + Data.push_back((uint8_t)(W >> 0)); + Data.push_back((uint8_t)(W >> 8)); } /// emitWord16BE - This callback is invoked when a 16-bit word needs to be /// written to the data stream in correct endian format and correct size. inline void emitWord16BE(uint16_t W) { - Data.push_back((W >> 8) & 255); - Data.push_back((W >> 0) & 255); + Data.push_back((uint8_t)(W >> 8)); + Data.push_back((uint8_t)(W >> 0)); } /// emitWord - This callback is invoked when a word needs to be @@ -124,49 +131,62 @@ public: emitDWordBE(W); } + /// emitWord64 - This callback is invoked when a x86_fp80 needs to be + /// written to the data stream in correct endian format. + inline void emitWordFP80(const uint64_t *W, unsigned PadSize) { + if (IsLittleEndian) { + emitWord64(W[0]); + emitWord16(W[1]); + } else { + emitWord16(W[1]); + emitWord64(W[0]); + } + emitZeros(PadSize); + } + /// emitWordLE - This callback is invoked when a 32-bit word needs to be /// written to the data stream in little-endian format. inline void emitWordLE(uint32_t W) { - Data.push_back((W >> 0) & 255); - Data.push_back((W >> 8) & 255); - Data.push_back((W >> 16) & 255); - Data.push_back((W >> 24) & 255); + Data.push_back((uint8_t)(W >> 0)); + Data.push_back((uint8_t)(W >> 8)); + Data.push_back((uint8_t)(W >> 16)); + Data.push_back((uint8_t)(W >> 24)); } /// emitWordBE - This callback is invoked when a 32-bit word needs to be /// written to the data stream in big-endian format. /// inline void emitWordBE(uint32_t W) { - Data.push_back((W >> 24) & 255); - Data.push_back((W >> 16) & 255); - Data.push_back((W >> 8) & 255); - Data.push_back((W >> 0) & 255); + Data.push_back((uint8_t)(W >> 24)); + Data.push_back((uint8_t)(W >> 16)); + Data.push_back((uint8_t)(W >> 8)); + Data.push_back((uint8_t)(W >> 0)); } /// emitDWordLE - This callback is invoked when a 64-bit word needs to be /// written to the data stream in little-endian format. inline void emitDWordLE(uint64_t W) { - Data.push_back(unsigned(W >> 0) & 255); - Data.push_back(unsigned(W >> 8) & 255); - Data.push_back(unsigned(W >> 16) & 255); - Data.push_back(unsigned(W >> 24) & 255); - Data.push_back(unsigned(W >> 32) & 255); - Data.push_back(unsigned(W >> 40) & 255); - Data.push_back(unsigned(W >> 48) & 255); - Data.push_back(unsigned(W >> 56) & 255); + Data.push_back((uint8_t)(W >> 0)); + Data.push_back((uint8_t)(W >> 8)); + Data.push_back((uint8_t)(W >> 16)); + Data.push_back((uint8_t)(W >> 24)); + Data.push_back((uint8_t)(W >> 32)); + Data.push_back((uint8_t)(W >> 40)); + Data.push_back((uint8_t)(W >> 48)); + Data.push_back((uint8_t)(W >> 56)); } /// emitDWordBE - This callback is invoked when a 64-bit word needs to be /// written to the data stream in big-endian format. inline void emitDWordBE(uint64_t W) { - Data.push_back(unsigned(W >> 56) & 255); - Data.push_back(unsigned(W >> 48) & 255); - Data.push_back(unsigned(W >> 40) & 255); - Data.push_back(unsigned(W >> 32) & 255); - Data.push_back(unsigned(W >> 24) & 255); - Data.push_back(unsigned(W >> 16) & 255); - Data.push_back(unsigned(W >> 8) & 255); - Data.push_back(unsigned(W >> 0) & 255); + Data.push_back((uint8_t)(W >> 56)); + Data.push_back((uint8_t)(W >> 48)); + Data.push_back((uint8_t)(W >> 40)); + Data.push_back((uint8_t)(W >> 32)); + Data.push_back((uint8_t)(W >> 24)); + Data.push_back((uint8_t)(W >> 16)); + Data.push_back((uint8_t)(W >> 8)); + Data.push_back((uint8_t)(W >> 0)); } /// fixByte - This callback is invoked when a byte needs to be @@ -187,15 +207,15 @@ public: /// emitWord16LE - This callback is invoked when a 16-bit word needs to /// fixup the data stream in little endian format. inline void fixWord16LE(uint16_t W, uint32_t offset) { - Data[offset++] = W & 255; - Data[offset] = (W >> 8) & 255; + Data[offset] = (uint8_t)(W >> 0); + Data[++offset] = (uint8_t)(W >> 8); } /// fixWord16BE - This callback is invoked when a 16-bit word needs to /// fixup data stream in big endian format. inline void fixWord16BE(uint16_t W, uint32_t offset) { - Data[offset++] = (W >> 8) & 255; - Data[offset] = W & 255; + Data[offset] = (uint8_t)(W >> 8); + Data[++offset] = (uint8_t)(W >> 0); } /// emitWord - This callback is invoked when a word needs to @@ -219,19 +239,19 @@ public: /// fixWord32LE - This callback is invoked when a 32-bit word needs to /// fixup the data in little endian format. inline void fixWord32LE(uint32_t W, uint32_t offset) { - Data[offset++] = W & 255; - Data[offset++] = (W >> 8) & 255; - Data[offset++] = (W >> 16) & 255; - Data[offset] = (W >> 24) & 255; + Data[offset] = (uint8_t)(W >> 0); + Data[++offset] = (uint8_t)(W >> 8); + Data[++offset] = (uint8_t)(W >> 16); + Data[++offset] = (uint8_t)(W >> 24); } /// fixWord32BE - This callback is invoked when a 32-bit word needs to /// fixup the data in big endian format. inline void fixWord32BE(uint32_t W, uint32_t offset) { - Data[offset++] = (W >> 24) & 255; - Data[offset++] = (W >> 16) & 255; - Data[offset++] = (W >> 8) & 255; - Data[offset] = W & 255; + Data[offset] = (uint8_t)(W >> 24); + Data[++offset] = (uint8_t)(W >> 16); + Data[++offset] = (uint8_t)(W >> 8); + Data[++offset] = (uint8_t)(W >> 0); } /// fixWord64 - This callback is invoked when a 64-bit word needs to @@ -246,42 +266,42 @@ public: /// fixWord64BE - This callback is invoked when a 64-bit word needs to /// fixup the data in little endian format. inline void fixWord64LE(uint64_t W, uint32_t offset) { - Data[offset++] = W & 255; - Data[offset++] = (W >> 8) & 255; - Data[offset++] = (W >> 16) & 255; - Data[offset++] = (W >> 24) & 255; - Data[offset++] = (W >> 32) & 255; - Data[offset++] = (W >> 40) & 255; - Data[offset++] = (W >> 48) & 255; - Data[offset] = (W >> 56) & 255; + Data[offset] = (uint8_t)(W >> 0); + Data[++offset] = (uint8_t)(W >> 8); + Data[++offset] = (uint8_t)(W >> 16); + Data[++offset] = (uint8_t)(W >> 24); + Data[++offset] = (uint8_t)(W >> 32); + Data[++offset] = (uint8_t)(W >> 40); + Data[++offset] = (uint8_t)(W >> 48); + Data[++offset] = (uint8_t)(W >> 56); } /// fixWord64BE - This callback is invoked when a 64-bit word needs to /// fixup the data in big endian format. inline void fixWord64BE(uint64_t W, uint32_t offset) { - Data[offset++] = (W >> 56) & 255; - Data[offset++] = (W >> 48) & 255; - Data[offset++] = (W >> 40) & 255; - Data[offset++] = (W >> 32) & 255; - Data[offset++] = (W >> 24) & 255; - Data[offset++] = (W >> 16) & 255; - Data[offset++] = (W >> 8) & 255; - Data[offset] = W & 255; + Data[offset] = (uint8_t)(W >> 56); + Data[++offset] = (uint8_t)(W >> 48); + Data[++offset] = (uint8_t)(W >> 40); + Data[++offset] = (uint8_t)(W >> 32); + Data[++offset] = (uint8_t)(W >> 24); + Data[++offset] = (uint8_t)(W >> 16); + Data[++offset] = (uint8_t)(W >> 8); + Data[++offset] = (uint8_t)(W >> 0); } /// emitAlignment - Pad the data to the specified alignment. - void emitAlignment(unsigned Alignment) { + void emitAlignment(unsigned Alignment, uint8_t fill = 0) { if (Alignment <= 1) return; unsigned PadSize = -Data.size() & (Alignment-1); for (unsigned i = 0; i>= 7; if (Value) Byte |= 0x80; emitByte(Byte); @@ -295,7 +315,7 @@ public: bool IsMore; do { - unsigned char Byte = Value & 0x7f; + uint8_t Byte = (uint8_t)(Value & 0x7f); Value >>= 7; IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; if (IsMore) Byte |= 0x80; diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 7c83e24728c40..5e730fc12cc18 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CallingConv.h" namespace llvm { class TargetRegisterInfo; @@ -33,34 +34,35 @@ public: SExt, // The value is sign extended in the location. ZExt, // The value is zero extended in the location. AExt, // The value is extended with undefined upper bits. - BCvt // The value is bit-converted in the location. + BCvt, // The value is bit-converted in the location. + Indirect // The location contains pointer to the value. // TODO: a subset of the value is in the location. }; private: /// ValNo - This is the value number begin assigned (e.g. an argument number). unsigned ValNo; - + /// Loc is either a stack offset or a register number. unsigned Loc; - + /// isMem - True if this is a memory loc, false if it is a register loc. bool isMem : 1; - + /// isCustom - True if this arg/retval requires special handling. bool isCustom : 1; /// Information about how the value is assigned. LocInfo HTP : 6; - + /// ValVT - The type of the value being assigned. - MVT ValVT; + EVT ValVT; /// LocVT - The type of the location being assigned to. - MVT LocVT; + EVT LocVT; public: - - static CCValAssign getReg(unsigned ValNo, MVT ValVT, - unsigned RegNo, MVT LocVT, + + static CCValAssign getReg(unsigned ValNo, EVT ValVT, + unsigned RegNo, EVT LocVT, LocInfo HTP) { CCValAssign Ret; Ret.ValNo = ValNo; @@ -73,8 +75,8 @@ public: return Ret; } - static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, - unsigned RegNo, MVT LocVT, + static CCValAssign getCustomReg(unsigned ValNo, EVT ValVT, + unsigned RegNo, EVT LocVT, LocInfo HTP) { CCValAssign Ret; Ret = getReg(ValNo, ValVT, RegNo, LocVT, HTP); @@ -82,8 +84,8 @@ public: return Ret; } - static CCValAssign getMem(unsigned ValNo, MVT ValVT, - unsigned Offset, MVT LocVT, + static CCValAssign getMem(unsigned ValNo, EVT ValVT, + unsigned Offset, EVT LocVT, LocInfo HTP) { CCValAssign Ret; Ret.ValNo = ValNo; @@ -95,9 +97,9 @@ public: Ret.LocVT = LocVT; return Ret; } - - static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, - unsigned Offset, MVT LocVT, + + static CCValAssign getCustomMem(unsigned ValNo, EVT ValVT, + unsigned Offset, EVT LocVT, LocInfo HTP) { CCValAssign Ret; Ret = getMem(ValNo, ValVT, Offset, LocVT, HTP); @@ -106,57 +108,63 @@ public: } unsigned getValNo() const { return ValNo; } - MVT getValVT() const { return ValVT; } + EVT getValVT() const { return ValVT; } bool isRegLoc() const { return !isMem; } bool isMemLoc() const { return isMem; } - + bool needsCustom() const { return isCustom; } unsigned getLocReg() const { assert(isRegLoc()); return Loc; } unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; } - MVT getLocVT() const { return LocVT; } - + EVT getLocVT() const { return LocVT; } + LocInfo getLocInfo() const { return HTP; } + bool isExtInLoc() const { + return (HTP == AExt || HTP == SExt || HTP == ZExt); + } + }; /// CCAssignFn - This function assigns a location for Val, updating State to /// reflect the change. -typedef bool CCAssignFn(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, +typedef bool CCAssignFn(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); /// CCCustomFn - This function assigns a location for Val, possibly updating /// all args to reflect changes and indicates if it handled it. It must set /// isCustom if it handles the arg and returns true. -typedef bool CCCustomFn(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, CCValAssign::LocInfo &LocInfo, +typedef bool CCCustomFn(unsigned &ValNo, EVT &ValVT, + EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); /// CCState - This class holds information needed while lowering arguments and /// return values. It captures which registers are already assigned and which /// stack slots are used. It provides accessors to allocate these values. class CCState { - unsigned CallingConv; + CallingConv::ID CallingConv; bool IsVarArg; const TargetMachine &TM; const TargetRegisterInfo &TRI; SmallVector &Locs; - + LLVMContext &Context; + unsigned StackOffset; SmallVector UsedRegs; public: - CCState(unsigned CC, bool isVarArg, const TargetMachine &TM, - SmallVector &locs); - + CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM, + SmallVector &locs, LLVMContext &C); + void addLoc(const CCValAssign &V) { Locs.push_back(V); } - + + LLVMContext &getContext() const { return Context; } const TargetMachine &getTarget() const { return TM; } - unsigned getCallingConv() const { return CallingConv; } + CallingConv::ID getCallingConv() const { return CallingConv; } bool isVarArg() const { return IsVarArg; } - + unsigned getNextStackOffset() const { return StackOffset; } /// isAllocated - Return true if the specified register (or an alias) is @@ -164,32 +172,36 @@ public: bool isAllocated(unsigned Reg) const { return UsedRegs[Reg/32] & (1 << (Reg&31)); } - - /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, + + /// AnalyzeFormalArguments - Analyze an array of argument values, /// incorporating info about the formals into this state. - void AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn); - - /// AnalyzeReturn - Analyze the returned values of an ISD::RET node, + void AnalyzeFormalArguments(const SmallVectorImpl &Ins, + CCAssignFn Fn); + + /// AnalyzeReturn - Analyze the returned values of a return, /// incorporating info about the result values into this state. - void AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn); - - /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info - /// about the passed values into this state. - void AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn); + void AnalyzeReturn(const SmallVectorImpl &Outs, + CCAssignFn Fn); + + /// AnalyzeCallOperands - Analyze the outgoing arguments to a call, + /// incorporating info about the passed values into this state. + void AnalyzeCallOperands(const SmallVectorImpl &Outs, + CCAssignFn Fn); /// AnalyzeCallOperands - Same as above except it takes vectors of types /// and argument flags. - void AnalyzeCallOperands(SmallVectorImpl &ArgVTs, + void AnalyzeCallOperands(SmallVectorImpl &ArgVTs, SmallVectorImpl &Flags, CCAssignFn Fn); - /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, + /// AnalyzeCallResult - Analyze the return values of a call, /// incorporating info about the passed values into this state. - void AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn); - + void AnalyzeCallResult(const SmallVectorImpl &Ins, + CCAssignFn Fn); + /// AnalyzeCallResult - Same as above except it's specialized for calls which /// produce a single value. - void AnalyzeCallResult(MVT VT, CCAssignFn Fn); + void AnalyzeCallResult(EVT VT, CCAssignFn Fn); /// getFirstUnallocated - Return the first unallocated register in the set, or /// NumRegs if they are all allocated. @@ -199,7 +211,7 @@ public: return i; return NumRegs; } - + /// AllocateReg - Attempt to allocate one register. If it is not available, /// return zero. Otherwise, return the register, marking it and any aliases /// as allocated. @@ -258,8 +270,8 @@ public: // HandleByVal - Allocate a stack slot large enough to pass an argument by // value. The size and alignment information of the argument is encoded in its // parameter attribute. - void HandleByVal(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, + void HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags); private: diff --git a/include/llvm/CodeGen/DwarfWriter.h b/include/llvm/CodeGen/DwarfWriter.h index facd5f6e6a5ff..e7a2f664eb4c5 100644 --- a/include/llvm/CodeGen/DwarfWriter.h +++ b/include/llvm/CodeGen/DwarfWriter.h @@ -33,8 +33,8 @@ class MachineFunction; class MachineInstr; class Value; class Module; -class GlobalVariable; -class TargetAsmInfo; +class MDNode; +class MCAsmInfo; class raw_ostream; class Instruction; class DICompileUnit; @@ -68,7 +68,7 @@ public: /// BeginModule - Emit all Dwarf sections that should come prior to the /// content. void BeginModule(Module *M, MachineModuleInfo *MMI, raw_ostream &OS, - AsmPrinter *A, const TargetAsmInfo *T); + AsmPrinter *A, const MCAsmInfo *T); /// EndModule - Emit all Dwarf sections that should come after the content. /// @@ -85,21 +85,20 @@ public: /// RecordSourceLine - Register a source line with debug info. Returns a /// unique label ID used to generate a label and provide correspondence to /// the source line list. - unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU); + unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope); /// RecordRegionStart - Indicate the start of a region. - unsigned RecordRegionStart(GlobalVariable *V); + unsigned RecordRegionStart(MDNode *N); /// RecordRegionEnd - Indicate the end of a region. - unsigned RecordRegionEnd(GlobalVariable *V); + unsigned RecordRegionEnd(MDNode *N); /// getRecordSourceLineCount - Count source lines. unsigned getRecordSourceLineCount(); /// RecordVariable - Indicate the declaration of a local variable. /// - void RecordVariable(GlobalVariable *GV, unsigned FrameIndex, - const MachineInstr *MI); + void RecordVariable(MDNode *N, unsigned FrameIndex); /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should /// be emitted. @@ -111,13 +110,10 @@ public: /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. unsigned RecordInlinedFnEnd(DISubprogram SP); - - /// RecordVariableScope - Record scope for the variable declared by - /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. - void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI); + void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L); + void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L); }; - } // end llvm namespace #endif diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index c7b1a42d06b62..1efd1e08acfab 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -91,7 +91,7 @@ public: /// bool SelectInstruction(Instruction *I); - /// SelectInstruction - Do "fast" instruction selection for the given + /// SelectOperator - Do "fast" instruction selection for the given /// LLVM IR operator (Instruction or ConstantExpr), and append /// generated machine instructions to the current block. Return true /// if selection was successful. @@ -137,24 +137,24 @@ protected: /// FastEmit_r - This method is called by target-independent code /// to request that an instruction with the given type and opcode /// be emitted. - virtual unsigned FastEmit_(MVT::SimpleValueType VT, - MVT::SimpleValueType RetVT, + virtual unsigned FastEmit_(MVT VT, + MVT RetVT, ISD::NodeType Opcode); /// FastEmit_r - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and /// register operand be emitted. /// - virtual unsigned FastEmit_r(MVT::SimpleValueType VT, - MVT::SimpleValueType RetVT, + virtual unsigned FastEmit_r(MVT VT, + MVT RetVT, ISD::NodeType Opcode, unsigned Op0); /// FastEmit_rr - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and /// register operands be emitted. /// - virtual unsigned FastEmit_rr(MVT::SimpleValueType VT, - MVT::SimpleValueType RetVT, + virtual unsigned FastEmit_rr(MVT VT, + MVT RetVT, ISD::NodeType Opcode, unsigned Op0, unsigned Op1); @@ -162,8 +162,8 @@ protected: /// to request that an instruction with the given type, opcode, and /// register and immediate operands be emitted. /// - virtual unsigned FastEmit_ri(MVT::SimpleValueType VT, - MVT::SimpleValueType RetVT, + virtual unsigned FastEmit_ri(MVT VT, + MVT RetVT, ISD::NodeType Opcode, unsigned Op0, uint64_t Imm); @@ -171,8 +171,8 @@ protected: /// to request that an instruction with the given type, opcode, and /// register and floating-point immediate operands be emitted. /// - virtual unsigned FastEmit_rf(MVT::SimpleValueType VT, - MVT::SimpleValueType RetVT, + virtual unsigned FastEmit_rf(MVT VT, + MVT RetVT, ISD::NodeType Opcode, unsigned Op0, ConstantFP *FPImm); @@ -180,8 +180,8 @@ protected: /// to request that an instruction with the given type, opcode, and /// register and immediate operands be emitted. /// - virtual unsigned FastEmit_rri(MVT::SimpleValueType VT, - MVT::SimpleValueType RetVT, + virtual unsigned FastEmit_rri(MVT VT, + MVT RetVT, ISD::NodeType Opcode, unsigned Op0, unsigned Op1, uint64_t Imm); @@ -189,33 +189,33 @@ protected: /// to emit an instruction with an immediate operand using FastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. - unsigned FastEmit_ri_(MVT::SimpleValueType VT, + unsigned FastEmit_ri_(MVT VT, ISD::NodeType Opcode, unsigned Op0, uint64_t Imm, - MVT::SimpleValueType ImmType); + MVT ImmType); /// FastEmit_rf_ - This method is a wrapper of FastEmit_rf. It first tries /// to emit an instruction with an immediate operand using FastEmit_rf. /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. - unsigned FastEmit_rf_(MVT::SimpleValueType VT, + unsigned FastEmit_rf_(MVT VT, ISD::NodeType Opcode, unsigned Op0, ConstantFP *FPImm, - MVT::SimpleValueType ImmType); + MVT ImmType); /// FastEmit_i - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and /// immediate operand be emitted. - virtual unsigned FastEmit_i(MVT::SimpleValueType VT, - MVT::SimpleValueType RetVT, + virtual unsigned FastEmit_i(MVT VT, + MVT RetVT, ISD::NodeType Opcode, uint64_t Imm); /// FastEmit_f - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and /// floating-point immediate operand be emitted. - virtual unsigned FastEmit_f(MVT::SimpleValueType VT, - MVT::SimpleValueType RetVT, + virtual unsigned FastEmit_f(MVT VT, + MVT RetVT, ISD::NodeType Opcode, ConstantFP *FPImm); @@ -268,12 +268,12 @@ protected: /// FastEmitInst_extractsubreg - Emit a MachineInstr for an extract_subreg /// from a specified index of a superregister to a specified type. - unsigned FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT, + unsigned FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, uint32_t Idx); /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op /// with all but the least significant bit set to zero. - unsigned FastEmitZExtFromI1(MVT::SimpleValueType VT, + unsigned FastEmitZExtFromI1(MVT VT, unsigned Op); /// FastEmitBranch - Emit an unconditional branch to the given block, @@ -300,6 +300,8 @@ protected: private: bool SelectBinaryOp(User *I, ISD::NodeType ISDOpcode); + bool SelectFNeg(User *I); + bool SelectGetElementPtr(User *I); bool SelectCall(User *I); diff --git a/include/llvm/CodeGen/FileWriters.h b/include/llvm/CodeGen/FileWriters.h index b3781e0c60123..a913d21eb2061 100644 --- a/include/llvm/CodeGen/FileWriters.h +++ b/include/llvm/CodeGen/FileWriters.h @@ -17,14 +17,14 @@ namespace llvm { class PassManagerBase; - class MachineCodeEmitter; + class ObjectCodeEmitter; class TargetMachine; class raw_ostream; - MachineCodeEmitter *AddELFWriter(PassManagerBase &FPM, raw_ostream &O, - TargetMachine &TM); - MachineCodeEmitter *AddMachOWriter(PassManagerBase &FPM, raw_ostream &O, - TargetMachine &TM); + ObjectCodeEmitter *AddELFWriter(PassManagerBase &FPM, raw_ostream &O, + TargetMachine &TM); + ObjectCodeEmitter *AddMachOWriter(PassManagerBase &FPM, raw_ostream &O, + TargetMachine &TM); } // end llvm namespace diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h index e94aba388a4c2..04fd8bed97242 100644 --- a/include/llvm/CodeGen/GCMetadata.h +++ b/include/llvm/CodeGen/GCMetadata.h @@ -42,7 +42,7 @@ namespace llvm { class AsmPrinter; class GCStrategy; class Constant; - class TargetAsmInfo; + class MCAsmInfo; namespace GC { diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h index b693b1bbfc7c4..ff1a205adbfd5 100644 --- a/include/llvm/CodeGen/GCMetadataPrinter.h +++ b/include/llvm/CodeGen/GCMetadataPrinter.h @@ -63,10 +63,10 @@ namespace llvm { /// beginAssembly/finishAssembly - Emit module metadata as assembly code. virtual void beginAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI); + const MCAsmInfo &MAI); virtual void finishAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI); + const MCAsmInfo &MAI); virtual ~GCMetadataPrinter(); }; diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h index 73197af1afdc5..180783a4d624a 100644 --- a/include/llvm/CodeGen/JITCodeEmitter.h +++ b/include/llvm/CodeGen/JITCodeEmitter.h @@ -19,7 +19,7 @@ #include #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/MathExtras.h" #include "llvm/CodeGen/MachineCodeEmitter.h" using namespace std; @@ -162,17 +162,26 @@ public: /// alignment (saturated to BufferEnd of course). void emitAlignment(unsigned Alignment) { if (Alignment == 0) Alignment = 1; + uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr, + Alignment); + CurBufferPtr = std::min(NewPtr, BufferEnd); + } - if(Alignment <= (uintptr_t)(BufferEnd-CurBufferPtr)) { - // Move the current buffer ptr up to the specified alignment. - CurBufferPtr = - (uint8_t*)(((uintptr_t)CurBufferPtr+Alignment-1) & - ~(uintptr_t)(Alignment-1)); - } else { + /// emitAlignmentWithFill - Similar to emitAlignment, except that the + /// extra bytes are filled with the provided byte. + void emitAlignmentWithFill(unsigned Alignment, uint8_t Fill) { + if (Alignment == 0) Alignment = 1; + uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr, + Alignment); + // Fail if we don't have room. + if (NewPtr > BufferEnd) { CurBufferPtr = BufferEnd; + return; + } + while (CurBufferPtr < NewPtr) { + *CurBufferPtr++ = Fill; } } - /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be /// written to the output stream. @@ -267,6 +276,11 @@ public: return Result; } + /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, + /// this method does not allocate memory in the current output buffer, + /// because a global may live longer than the current function. + virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0; + /// StartMachineBasicBlock - This should be called by the target when a new /// basic block is about to be emitted. This way the MCE knows where the /// start of the block is, and can implement getMachineBasicBlockAddress. @@ -285,6 +299,13 @@ public: return CurBufferPtr-BufferBegin; } + /// earlyResolveAddresses - True if the code emitter can use symbol addresses + /// during code emission time. The JIT is capable of doing this because it + /// creates jump tables or constant pools in memory on the fly while the + /// object code emitters rely on a linker to have real addresses and should + /// use relocations instead. + bool earlyResolveAddresses() const { return true; } + /// addRelocation - Whenever a relocatable address is needed, it should be /// noted with this interface. virtual void addRelocation(const MachineRelocation &MR) = 0; diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h index a231f49d81ec5..4d2d0eec52dbc 100644 --- a/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -32,9 +32,7 @@ namespace { (void) llvm::createDeadMachineInstructionElimPass(); - (void) llvm::createSimpleRegisterAllocator(); (void) llvm::createLocalRegisterAllocator(); - (void) llvm::createBigBlockRegisterAllocator(); (void) llvm::createLinearScanRegisterAllocator(); (void) llvm::createPBQPRegisterAllocator(); diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 0cb7e90043873..05bd173dd2ea2 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -21,9 +21,10 @@ #ifndef LLVM_CODEGEN_LIVEINTERVAL_H #define LLVM_CODEGEN_LIVEINTERVAL_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" -#include +#include "llvm/Support/AlignOf.h" #include #include @@ -31,7 +32,210 @@ namespace llvm { class MachineInstr; class MachineRegisterInfo; class TargetRegisterInfo; - struct LiveInterval; + class raw_ostream; + + /// LiveIndex - An opaque wrapper around machine indexes. + class LiveIndex { + friend class VNInfo; + friend class LiveInterval; + friend class LiveIntervals; + friend struct DenseMapInfo; + + public: + + enum Slot { LOAD, USE, DEF, STORE, NUM }; + + private: + + unsigned index; + + static const unsigned PHI_BIT = 1 << 31; + + public: + + /// Construct a default LiveIndex pointing to a reserved index. + LiveIndex() : index(0) {} + + /// Construct an index from the given index, pointing to the given slot. + LiveIndex(LiveIndex m, Slot s) + : index((m.index / NUM) * NUM + s) {} + + /// Print this index to the given raw_ostream. + void print(raw_ostream &os) const; + + /// Compare two LiveIndex objects for equality. + bool operator==(LiveIndex other) const { + return ((index & ~PHI_BIT) == (other.index & ~PHI_BIT)); + } + /// Compare two LiveIndex objects for inequality. + bool operator!=(LiveIndex other) const { + return ((index & ~PHI_BIT) != (other.index & ~PHI_BIT)); + } + + /// Compare two LiveIndex objects. Return true if the first index + /// is strictly lower than the second. + bool operator<(LiveIndex other) const { + return ((index & ~PHI_BIT) < (other.index & ~PHI_BIT)); + } + /// Compare two LiveIndex objects. Return true if the first index + /// is lower than, or equal to, the second. + bool operator<=(LiveIndex other) const { + return ((index & ~PHI_BIT) <= (other.index & ~PHI_BIT)); + } + + /// Compare two LiveIndex objects. Return true if the first index + /// is greater than the second. + bool operator>(LiveIndex other) const { + return ((index & ~PHI_BIT) > (other.index & ~PHI_BIT)); + } + + /// Compare two LiveIndex objects. Return true if the first index + /// is greater than, or equal to, the second. + bool operator>=(LiveIndex other) const { + return ((index & ~PHI_BIT) >= (other.index & ~PHI_BIT)); + } + + /// Returns true if this index represents a load. + bool isLoad() const { + return ((index % NUM) == LOAD); + } + + /// Returns true if this index represents a use. + bool isUse() const { + return ((index % NUM) == USE); + } + + /// Returns true if this index represents a def. + bool isDef() const { + return ((index % NUM) == DEF); + } + + /// Returns true if this index represents a store. + bool isStore() const { + return ((index % NUM) == STORE); + } + + /// Returns the slot for this LiveIndex. + Slot getSlot() const { + return static_cast(index % NUM); + } + + /// Returns true if this index represents a non-PHI use/def. + bool isNonPHIIndex() const { + return ((index & PHI_BIT) == 0); + } + + /// Returns true if this index represents a PHI use/def. + bool isPHIIndex() const { + return ((index & PHI_BIT) == PHI_BIT); + } + + private: + + /// Construct an index from the given index, with its PHI kill marker set. + LiveIndex(bool phi, LiveIndex o) : index(o.index) { + if (phi) + index |= PHI_BIT; + else + index &= ~PHI_BIT; + } + + explicit LiveIndex(unsigned idx) + : index(idx & ~PHI_BIT) {} + + LiveIndex(bool phi, unsigned idx) + : index(idx & ~PHI_BIT) { + if (phi) + index |= PHI_BIT; + } + + LiveIndex(bool phi, unsigned idx, Slot slot) + : index(((idx / NUM) * NUM + slot) & ~PHI_BIT) { + if (phi) + index |= PHI_BIT; + } + + LiveIndex nextSlot_() const { + assert((index & PHI_BIT) == ((index + 1) & PHI_BIT) && + "Index out of bounds."); + return LiveIndex(index + 1); + } + + LiveIndex nextIndex_() const { + assert((index & PHI_BIT) == ((index + NUM) & PHI_BIT) && + "Index out of bounds."); + return LiveIndex(index + NUM); + } + + LiveIndex prevSlot_() const { + assert((index & PHI_BIT) == ((index - 1) & PHI_BIT) && + "Index out of bounds."); + return LiveIndex(index - 1); + } + + LiveIndex prevIndex_() const { + assert((index & PHI_BIT) == ((index - NUM) & PHI_BIT) && + "Index out of bounds."); + return LiveIndex(index - NUM); + } + + int distance(LiveIndex other) const { + return (other.index & ~PHI_BIT) - (index & ~PHI_BIT); + } + + /// Returns an unsigned number suitable as an index into a + /// vector over all instructions. + unsigned getVecIndex() const { + return (index & ~PHI_BIT) / NUM; + } + + /// Scale this index by the given factor. + LiveIndex scale(unsigned factor) const { + unsigned i = (index & ~PHI_BIT) / NUM, + o = (index % ~PHI_BIT) % NUM; + assert(index <= (~0U & ~PHI_BIT) / (factor * NUM) && + "Rescaled interval would overflow"); + return LiveIndex(i * NUM * factor, o); + } + + static LiveIndex emptyKey() { + return LiveIndex(true, 0x7fffffff); + } + + static LiveIndex tombstoneKey() { + return LiveIndex(true, 0x7ffffffe); + } + + static unsigned getHashValue(const LiveIndex &v) { + return v.index * 37; + } + + }; + + inline raw_ostream& operator<<(raw_ostream &os, LiveIndex mi) { + mi.print(os); + return os; + } + + /// Densemap specialization for LiveIndex. + template <> + struct DenseMapInfo { + static inline LiveIndex getEmptyKey() { + return LiveIndex::emptyKey(); + } + static inline LiveIndex getTombstoneKey() { + return LiveIndex::tombstoneKey(); + } + static inline unsigned getHashValue(const LiveIndex &v) { + return LiveIndex::getHashValue(v); + } + static inline bool isEqual(const LiveIndex &LHS, + const LiveIndex &RHS) { + return (LHS == RHS); + } + static inline bool isPod() { return true; } + }; + /// VNInfo - Value Number Information. /// This class holds information about a machine level values, including @@ -48,7 +252,6 @@ namespace llvm { /// index of the MBB in which the PHI originally existed. This can be used /// to insert code (spills or copies) which deals with the value, which will /// be live in to the block. - class VNInfo { private: enum { @@ -60,36 +263,70 @@ namespace llvm { }; unsigned char flags; + union { + MachineInstr *copy; + unsigned reg; + } cr; public: + + typedef SmallVector KillSet; + /// The ID number of this value. unsigned id; /// The index of the defining instruction (if isDefAccurate() returns true). - unsigned def; - MachineInstr *copy; - SmallVector kills; + LiveIndex def; + + KillSet kills; VNInfo() - : flags(IS_UNUSED), id(~1U), def(0), copy(0) {} + : flags(IS_UNUSED), id(~1U) { cr.copy = 0; } /// VNInfo constructor. /// d is presumed to point to the actual defining instr. If it doesn't /// setIsDefAccurate(false) should be called after construction. - VNInfo(unsigned i, unsigned d, MachineInstr *c) - : flags(IS_DEF_ACCURATE), id(i), def(d), copy(c) {} + VNInfo(unsigned i, LiveIndex d, MachineInstr *c) + : flags(IS_DEF_ACCURATE), id(i), def(d) { cr.copy = c; } /// VNInfo construtor, copies values from orig, except for the value number. VNInfo(unsigned i, const VNInfo &orig) - : flags(orig.flags), id(i), def(orig.def), copy(orig.copy), - kills(orig.kills) {} + : flags(orig.flags), cr(orig.cr), id(i), def(orig.def), kills(orig.kills) + { } + + /// Copy from the parameter into this VNInfo. + void copyFrom(VNInfo &src) { + flags = src.flags; + cr = src.cr; + def = src.def; + kills = src.kills; + } /// Used for copying value number info. unsigned getFlags() const { return flags; } void setFlags(unsigned flags) { this->flags = flags; } + /// For a register interval, if this VN was definied by a copy instr + /// getCopy() returns a pointer to it, otherwise returns 0. + /// For a stack interval the behaviour of this method is undefined. + MachineInstr* getCopy() const { return cr.copy; } + /// For a register interval, set the copy member. + /// This method should not be called on stack intervals as it may lead to + /// undefined behavior. + void setCopy(MachineInstr *c) { cr.copy = c; } + + /// For a stack interval, returns the reg which this stack interval was + /// defined from. + /// For a register interval the behaviour of this method is undefined. + unsigned getReg() const { return cr.reg; } + /// For a stack interval, set the defining register. + /// This method should not be called on register intervals as it may lead + /// to undefined behaviour. + void setReg(unsigned reg) { cr.reg = reg; } + /// Returns true if one or more kills are PHI nodes. bool hasPHIKill() const { return flags & HAS_PHI_KILL; } + /// Set the PHI kill flag on this value. void setHasPHIKill(bool hasKill) { if (hasKill) flags |= HAS_PHI_KILL; @@ -100,16 +337,18 @@ namespace llvm { /// Returns true if this value is re-defined by an early clobber somewhere /// during the live range. bool hasRedefByEC() const { return flags & REDEF_BY_EC; } + /// Set the "redef by early clobber" flag on this value. void setHasRedefByEC(bool hasRedef) { if (hasRedef) flags |= REDEF_BY_EC; else flags &= ~REDEF_BY_EC; } - + /// Returns true if this value is defined by a PHI instruction (or was, /// PHI instrucions may have been eliminated). bool isPHIDef() const { return flags & IS_PHI_DEF; } + /// Set the "phi def" flag on this value. void setIsPHIDef(bool phiDef) { if (phiDef) flags |= IS_PHI_DEF; @@ -119,6 +358,7 @@ namespace llvm { /// Returns true if this value is unused. bool isUnused() const { return flags & IS_UNUSED; } + /// Set the "is unused" flag on this value. void setIsUnused(bool unused) { if (unused) flags |= IS_UNUSED; @@ -128,6 +368,7 @@ namespace llvm { /// Returns true if the def is accurate. bool isDefAccurate() const { return flags & IS_DEF_ACCURATE; } + /// Set the "is def accurate" flag on this value. void setIsDefAccurate(bool defAccurate) { if (defAccurate) flags |= IS_DEF_ACCURATE; @@ -135,26 +376,74 @@ namespace llvm { flags &= ~IS_DEF_ACCURATE; } + /// Returns true if the given index is a kill of this value. + bool isKill(LiveIndex k) const { + KillSet::const_iterator + i = std::lower_bound(kills.begin(), kills.end(), k); + return (i != kills.end() && *i == k); + } + + /// addKill - Add a kill instruction index to the specified value + /// number. + void addKill(LiveIndex k) { + if (kills.empty()) { + kills.push_back(k); + } else { + KillSet::iterator + i = std::lower_bound(kills.begin(), kills.end(), k); + kills.insert(i, k); + } + } + + /// Remove the specified kill index from this value's kills list. + /// Returns true if the value was present, otherwise returns false. + bool removeKill(LiveIndex k) { + KillSet::iterator i = std::lower_bound(kills.begin(), kills.end(), k); + if (i != kills.end() && *i == k) { + kills.erase(i); + return true; + } + return false; + } + + /// Remove all kills in the range [s, e). + void removeKills(LiveIndex s, LiveIndex e) { + KillSet::iterator + si = std::lower_bound(kills.begin(), kills.end(), s), + se = std::upper_bound(kills.begin(), kills.end(), e); + + kills.erase(si, se); + } + }; /// LiveRange structure - This represents a simple register range in the /// program, with an inclusive start point and an exclusive end point. /// These ranges are rendered as [start,end). struct LiveRange { - unsigned start; // Start point of the interval (inclusive) - unsigned end; // End point of the interval (exclusive) + LiveIndex start; // Start point of the interval (inclusive) + LiveIndex end; // End point of the interval (exclusive) VNInfo *valno; // identifier for the value contained in this interval. - LiveRange(unsigned S, unsigned E, VNInfo *V) : start(S), end(E), valno(V) { + LiveRange(LiveIndex S, LiveIndex E, VNInfo *V) + : start(S), end(E), valno(V) { + assert(S < E && "Cannot create empty or backwards range"); } /// contains - Return true if the index is covered by this range. /// - bool contains(unsigned I) const { + bool contains(LiveIndex I) const { return start <= I && I < end; } + /// containsRange - Return true if the given range, [S, E), is covered by + /// this range. + bool containsRange(LiveIndex S, LiveIndex E) const { + assert((S < E) && "Backwards interval?"); + return (start <= S && S < end) && (start < E && E <= end); + } + bool operator<(const LiveRange &LR) const { return start < LR.start || (start == LR.start && end < LR.end); } @@ -163,28 +452,29 @@ namespace llvm { } void dump() const; - void print(std::ostream &os) const; - void print(std::ostream *os) const { if (os) print(*os); } + void print(raw_ostream &os) const; private: LiveRange(); // DO NOT IMPLEMENT }; - std::ostream& operator<<(std::ostream& os, const LiveRange &LR); + raw_ostream& operator<<(raw_ostream& os, const LiveRange &LR); - inline bool operator<(unsigned V, const LiveRange &LR) { + inline bool operator<(LiveIndex V, const LiveRange &LR) { return V < LR.start; } - inline bool operator<(const LiveRange &LR, unsigned V) { + inline bool operator<(const LiveRange &LR, LiveIndex V) { return LR.start < V; } /// LiveInterval - This class represents some number of live ranges for a /// register or value. This class also contains a bit of register allocator /// state. - struct LiveInterval { + class LiveInterval { + public: + typedef SmallVector Ranges; typedef SmallVector VNInfoList; @@ -193,8 +483,6 @@ namespace llvm { float weight; // weight of this interval Ranges ranges; // the ranges in which this register is live VNInfoList valnos; // value#'s - - public: struct InstrSlots { enum { @@ -205,14 +493,6 @@ namespace llvm { NUM = 4 }; - static unsigned scale(unsigned slot, unsigned factor) { - unsigned index = slot / NUM, - offset = slot % NUM; - assert(index <= ~0U / (factor * NUM) && - "Rescaled interval would overflow"); - return index * NUM * factor + offset; - } - }; LiveInterval(unsigned Reg, float Weight, bool IsSS = false) @@ -242,8 +522,8 @@ namespace llvm { /// end of the interval. If no LiveRange contains this position, but the /// position is in a hole, this method returns an iterator pointing the the /// LiveRange immediately after the hole. - iterator advanceTo(iterator I, unsigned Pos) { - if (Pos >= endNumber()) + iterator advanceTo(iterator I, LiveIndex Pos) { + if (Pos >= endIndex()) return end(); while (I->end <= Pos) ++I; return I; @@ -286,33 +566,15 @@ namespace llvm { inline const VNInfo *getValNumInfo(unsigned ValNo) const { return valnos[ValNo]; } - - /// copyValNumInfo - Copy the value number info for one value number to - /// another. - void copyValNumInfo(VNInfo *DstValNo, const VNInfo *SrcValNo) { - DstValNo->def = SrcValNo->def; - DstValNo->copy = SrcValNo->copy; - DstValNo->setFlags(SrcValNo->getFlags()); - DstValNo->kills = SrcValNo->kills; - } /// getNextValue - Create a new value number and return it. MIIdx specifies /// the instruction that defines the value number. - VNInfo *getNextValue(unsigned MIIdx, MachineInstr *CopyMI, - bool isDefAccurate, BumpPtrAllocator &VNInfoAllocator) { - - assert(MIIdx != ~0u && MIIdx != ~1u && - "PHI def / unused flags should now be passed explicitly."); -#ifdef __GNUC__ - unsigned Alignment = (unsigned)__alignof__(VNInfo); -#else - // FIXME: ugly. - unsigned Alignment = 8; -#endif + VNInfo *getNextValue(LiveIndex def, MachineInstr *CopyMI, + bool isDefAccurate, BumpPtrAllocator &VNInfoAllocator){ VNInfo *VNI = static_cast(VNInfoAllocator.Allocate((unsigned)sizeof(VNInfo), - Alignment)); - new (VNI) VNInfo((unsigned)valnos.size(), MIIdx, CopyMI); + alignof())); + new (VNI) VNInfo((unsigned)valnos.size(), def, CopyMI); VNI->setIsDefAccurate(isDefAccurate); valnos.push_back(VNI); return VNI; @@ -320,86 +582,31 @@ namespace llvm { /// Create a copy of the given value. The new value will be identical except /// for the Value number. - VNInfo *createValueCopy(const VNInfo *orig, BumpPtrAllocator &VNInfoAllocator) { - -#ifdef __GNUC__ - unsigned Alignment = (unsigned)__alignof__(VNInfo); -#else - // FIXME: ugly. - unsigned Alignment = 8; -#endif + VNInfo *createValueCopy(const VNInfo *orig, + BumpPtrAllocator &VNInfoAllocator) { VNInfo *VNI = static_cast(VNInfoAllocator.Allocate((unsigned)sizeof(VNInfo), - Alignment)); + alignof())); new (VNI) VNInfo((unsigned)valnos.size(), *orig); valnos.push_back(VNI); return VNI; } - /// addKill - Add a kill instruction index to the specified value - /// number. - static void addKill(VNInfo *VNI, unsigned KillIdx) { - SmallVector &kills = VNI->kills; - if (kills.empty()) { - kills.push_back(KillIdx); - } else { - SmallVector::iterator - I = std::lower_bound(kills.begin(), kills.end(), KillIdx); - kills.insert(I, KillIdx); - } - } - /// addKills - Add a number of kills into the VNInfo kill vector. If this /// interval is live at a kill point, then the kill is not added. - void addKills(VNInfo *VNI, const SmallVector &kills) { + void addKills(VNInfo *VNI, const VNInfo::KillSet &kills) { for (unsigned i = 0, e = static_cast(kills.size()); i != e; ++i) { - unsigned KillIdx = kills[i]; - if (!liveBeforeAndAt(KillIdx)) { - SmallVector::iterator - I = std::lower_bound(VNI->kills.begin(), VNI->kills.end(), KillIdx); - VNI->kills.insert(I, KillIdx); + if (!liveBeforeAndAt(kills[i])) { + VNI->addKill(kills[i]); } } } - /// removeKill - Remove the specified kill from the list of kills of - /// the specified val#. - static bool removeKill(VNInfo *VNI, unsigned KillIdx) { - SmallVector &kills = VNI->kills; - SmallVector::iterator - I = std::lower_bound(kills.begin(), kills.end(), KillIdx); - if (I != kills.end() && *I == KillIdx) { - kills.erase(I); - return true; - } - return false; - } - - /// removeKills - Remove all the kills in specified range - /// [Start, End] of the specified val#. - static void removeKills(VNInfo *VNI, unsigned Start, unsigned End) { - SmallVector &kills = VNI->kills; - SmallVector::iterator - I = std::lower_bound(kills.begin(), kills.end(), Start); - SmallVector::iterator - E = std::upper_bound(kills.begin(), kills.end(), End); - kills.erase(I, E); - } - - /// isKill - Return true if the specified index is a kill of the - /// specified val#. - static bool isKill(const VNInfo *VNI, unsigned KillIdx) { - const SmallVector &kills = VNI->kills; - SmallVector::const_iterator - I = std::lower_bound(kills.begin(), kills.end(), KillIdx); - return I != kills.end() && *I == KillIdx; - } - /// isOnlyLROfValNo - Return true if the specified live range is the only /// one defined by the its val#. - bool isOnlyLROfValNo( const LiveRange *LR) { + bool isOnlyLROfValNo(const LiveRange *LR) { for (const_iterator I = begin(), E = end(); I != E; ++I) { const LiveRange *Tmp = I; if (Tmp != LR && Tmp->valno == LR->valno) @@ -423,7 +630,8 @@ namespace llvm { /// MergeInClobberRange - Same as MergeInClobberRanges except it merge in a /// single LiveRange only. - void MergeInClobberRange(unsigned Start, unsigned End, + void MergeInClobberRange(LiveIndex Start, + LiveIndex End, BumpPtrAllocator &VNInfoAllocator); /// MergeValueInAsValue - Merge all of the live ranges of a specific val# @@ -448,51 +656,62 @@ namespace llvm { bool empty() const { return ranges.empty(); } - /// beginNumber - Return the lowest numbered slot covered by interval. - unsigned beginNumber() const { + /// beginIndex - Return the lowest numbered slot covered by interval. + LiveIndex beginIndex() const { if (empty()) - return 0; + return LiveIndex(); return ranges.front().start; } /// endNumber - return the maximum point of the interval of the whole, /// exclusive. - unsigned endNumber() const { + LiveIndex endIndex() const { if (empty()) - return 0; + return LiveIndex(); return ranges.back().end; } - bool expiredAt(unsigned index) const { - return index >= endNumber(); + bool expiredAt(LiveIndex index) const { + return index >= endIndex(); } - bool liveAt(unsigned index) const; + bool liveAt(LiveIndex index) const; // liveBeforeAndAt - Check if the interval is live at the index and the // index just before it. If index is liveAt, check if it starts a new live // range.If it does, then check if the previous live range ends at index-1. - bool liveBeforeAndAt(unsigned index) const; + bool liveBeforeAndAt(LiveIndex index) const; /// getLiveRangeContaining - Return the live range that contains the /// specified index, or null if there is none. - const LiveRange *getLiveRangeContaining(unsigned Idx) const { + const LiveRange *getLiveRangeContaining(LiveIndex Idx) const { const_iterator I = FindLiveRangeContaining(Idx); return I == end() ? 0 : &*I; } + /// getLiveRangeContaining - Return the live range that contains the + /// specified index, or null if there is none. + LiveRange *getLiveRangeContaining(LiveIndex Idx) { + iterator I = FindLiveRangeContaining(Idx); + return I == end() ? 0 : &*I; + } + /// FindLiveRangeContaining - Return an iterator to the live range that /// contains the specified index, or end() if there is none. - const_iterator FindLiveRangeContaining(unsigned Idx) const; + const_iterator FindLiveRangeContaining(LiveIndex Idx) const; /// FindLiveRangeContaining - Return an iterator to the live range that /// contains the specified index, or end() if there is none. - iterator FindLiveRangeContaining(unsigned Idx); + iterator FindLiveRangeContaining(LiveIndex Idx); + + /// findDefinedVNInfo - Find the by the specified + /// index (register interval) or defined + VNInfo *findDefinedVNInfoForRegInt(LiveIndex Idx) const; + + /// findDefinedVNInfo - Find the VNInfo that's defined by the specified + /// register (stack inteval only). + VNInfo *findDefinedVNInfoForStackInt(unsigned Reg) const; - /// findDefinedVNInfo - Find the VNInfo that's defined at the specified - /// index (register interval) or defined by the specified register (stack - /// inteval). - VNInfo *findDefinedVNInfo(unsigned DefIdxOrReg) const; /// overlaps - Return true if the intersection of the two live intervals is /// not empty. @@ -502,7 +721,7 @@ namespace llvm { /// overlaps - Return true if the live interval overlaps a range specified /// by [Start, End). - bool overlaps(unsigned Start, unsigned End) const; + bool overlaps(LiveIndex Start, LiveIndex End) const; /// overlapsFrom - Return true if the intersection of the two live intervals /// is not empty. The specified iterator is a hint that we can begin @@ -526,11 +745,12 @@ namespace llvm { /// isInOneLiveRange - Return true if the range specified is entirely in the /// a single LiveRange of the live interval. - bool isInOneLiveRange(unsigned Start, unsigned End); + bool isInOneLiveRange(LiveIndex Start, LiveIndex End); /// removeRange - Remove the specified range from this interval. Note that /// the range must be a single LiveRange in its entirety. - void removeRange(unsigned Start, unsigned End, bool RemoveDeadValNo = false); + void removeRange(LiveIndex Start, LiveIndex End, + bool RemoveDeadValNo = false); void removeRange(LiveRange LR, bool RemoveDeadValNo = false) { removeRange(LR.start, LR.end, RemoveDeadValNo); @@ -548,24 +768,30 @@ namespace llvm { /// unsigned getSize() const; + /// ComputeJoinedWeight - Set the weight of a live interval after + /// Other has been merged into it. + void ComputeJoinedWeight(const LiveInterval &Other); + bool operator<(const LiveInterval& other) const { - return beginNumber() < other.beginNumber(); + const LiveIndex &thisIndex = beginIndex(); + const LiveIndex &otherIndex = other.beginIndex(); + return (thisIndex < otherIndex || + (thisIndex == otherIndex && reg < other.reg)); } - void print(std::ostream &OS, const TargetRegisterInfo *TRI = 0) const; - void print(std::ostream *OS, const TargetRegisterInfo *TRI = 0) const { - if (OS) print(*OS, TRI); - } + void print(raw_ostream &OS, const TargetRegisterInfo *TRI = 0) const; void dump() const; private: + Ranges::iterator addRangeFrom(LiveRange LR, Ranges::iterator From); - void extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd); - Ranges::iterator extendIntervalStartTo(Ranges::iterator I, unsigned NewStr); + void extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd); + Ranges::iterator extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStr); LiveInterval& operator=(const LiveInterval& rhs); // DO NOT IMPLEMENT + }; - inline std::ostream &operator<<(std::ostream &OS, const LiveInterval &LI) { + inline raw_ostream &operator<<(raw_ostream &OS, const LiveInterval &LI) { LI.print(OS); return OS; } diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index 7ae98bb093d6a..511db6db10c9a 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -20,6 +20,7 @@ #ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H #define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/ADT/BitVector.h" @@ -39,13 +40,13 @@ namespace llvm { class TargetInstrInfo; class TargetRegisterClass; class VirtRegMap; - typedef std::pair IdxMBBPair; + typedef std::pair IdxMBBPair; - inline bool operator<(unsigned V, const IdxMBBPair &IM) { + inline bool operator<(LiveIndex V, const IdxMBBPair &IM) { return V < IM.first; } - inline bool operator<(const IdxMBBPair &IM, unsigned V) { + inline bool operator<(const IdxMBBPair &IM, LiveIndex V) { return IM.first < V; } @@ -70,7 +71,7 @@ namespace llvm { /// MBB2IdxMap - The indexes of the first and last instructions in the /// specified basic block. - std::vector > MBB2IdxMap; + std::vector > MBB2IdxMap; /// Idx2MBBMap - Sorted list of pairs of index of first instruction /// and MBB id. @@ -79,7 +80,7 @@ namespace llvm { /// FunctionSize - The number of instructions present in the function uint64_t FunctionSize; - typedef DenseMap Mi2IndexMap; + typedef DenseMap Mi2IndexMap; Mi2IndexMap mi2iMap_; typedef std::vector Index2MiMap; @@ -88,9 +89,16 @@ namespace llvm { typedef DenseMap Reg2IntervalMap; Reg2IntervalMap r2iMap_; + DenseMap terminatorGaps; + + /// phiJoinCopies - Copy instructions which are PHI joins. + SmallVector phiJoinCopies; + + /// allocatableRegs_ - A bit vector of allocatable registers. BitVector allocatableRegs_; - std::vector ClonedMIs; + /// CloneMIs - A list of clones as result of re-materialization. + std::vector CloneMIs; typedef LiveInterval::InstrSlots InstrSlots; @@ -98,23 +106,40 @@ namespace llvm { static char ID; // Pass identification, replacement for typeid LiveIntervals() : MachineFunctionPass(&ID) {} - static unsigned getBaseIndex(unsigned index) { - return index - (index % InstrSlots::NUM); + LiveIndex getBaseIndex(LiveIndex index) { + return LiveIndex(index, LiveIndex::LOAD); + } + LiveIndex getBoundaryIndex(LiveIndex index) { + return LiveIndex(index, + (LiveIndex::Slot)(LiveIndex::NUM - 1)); + } + LiveIndex getLoadIndex(LiveIndex index) { + return LiveIndex(index, LiveIndex::LOAD); } - static unsigned getBoundaryIndex(unsigned index) { - return getBaseIndex(index + InstrSlots::NUM - 1); + LiveIndex getUseIndex(LiveIndex index) { + return LiveIndex(index, LiveIndex::USE); } - static unsigned getLoadIndex(unsigned index) { - return getBaseIndex(index) + InstrSlots::LOAD; + LiveIndex getDefIndex(LiveIndex index) { + return LiveIndex(index, LiveIndex::DEF); } - static unsigned getUseIndex(unsigned index) { - return getBaseIndex(index) + InstrSlots::USE; + LiveIndex getStoreIndex(LiveIndex index) { + return LiveIndex(index, LiveIndex::STORE); + } + + LiveIndex getNextSlot(LiveIndex m) const { + return m.nextSlot_(); + } + + LiveIndex getNextIndex(LiveIndex m) const { + return m.nextIndex_(); } - static unsigned getDefIndex(unsigned index) { - return getBaseIndex(index) + InstrSlots::DEF; + + LiveIndex getPrevSlot(LiveIndex m) const { + return m.prevSlot_(); } - static unsigned getStoreIndex(unsigned index) { - return getBaseIndex(index) + InstrSlots::STORE; + + LiveIndex getPrevIndex(LiveIndex m) const { + return m.prevIndex_(); } static float getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { @@ -147,20 +172,20 @@ namespace llvm { /// getMBBStartIdx - Return the base index of the first instruction in the /// specified MachineBasicBlock. - unsigned getMBBStartIdx(MachineBasicBlock *MBB) const { + LiveIndex getMBBStartIdx(MachineBasicBlock *MBB) const { return getMBBStartIdx(MBB->getNumber()); } - unsigned getMBBStartIdx(unsigned MBBNo) const { + LiveIndex getMBBStartIdx(unsigned MBBNo) const { assert(MBBNo < MBB2IdxMap.size() && "Invalid MBB number!"); return MBB2IdxMap[MBBNo].first; } /// getMBBEndIdx - Return the store index of the last instruction in the /// specified MachineBasicBlock. - unsigned getMBBEndIdx(MachineBasicBlock *MBB) const { + LiveIndex getMBBEndIdx(MachineBasicBlock *MBB) const { return getMBBEndIdx(MBB->getNumber()); } - unsigned getMBBEndIdx(unsigned MBBNo) const { + LiveIndex getMBBEndIdx(unsigned MBBNo) const { assert(MBBNo < MBB2IdxMap.size() && "Invalid MBB number!"); return MBB2IdxMap[MBBNo].second; } @@ -181,7 +206,7 @@ namespace llvm { /// getMBBFromIndex - given an index in any instruction of an /// MBB return a pointer the MBB - MachineBasicBlock* getMBBFromIndex(unsigned index) const { + MachineBasicBlock* getMBBFromIndex(LiveIndex index) const { std::vector::const_iterator I = std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), index); // Take the pair containing the index @@ -189,14 +214,14 @@ namespace llvm { ((I != Idx2MBBMap.end() && I->first > index) || (I == Idx2MBBMap.end() && Idx2MBBMap.size()>0)) ? (I-1): I; - assert(J != Idx2MBBMap.end() && J->first < index+1 && + assert(J != Idx2MBBMap.end() && J->first <= index && index <= getMBBEndIdx(J->second) && "index does not correspond to an MBB"); return J->second; } /// getInstructionIndex - returns the base index of instr - unsigned getInstructionIndex(MachineInstr* instr) const { + LiveIndex getInstructionIndex(const MachineInstr* instr) const { Mi2IndexMap::const_iterator it = mi2iMap_.find(instr); assert(it != mi2iMap_.end() && "Invalid instruction!"); return it->second; @@ -204,48 +229,49 @@ namespace llvm { /// getInstructionFromIndex - given an index in any slot of an /// instruction return a pointer the instruction - MachineInstr* getInstructionFromIndex(unsigned index) const { - index /= InstrSlots::NUM; // convert index to vector index - assert(index < i2miMap_.size() && + MachineInstr* getInstructionFromIndex(LiveIndex index) const { + // convert index to vector index + unsigned i = index.getVecIndex(); + assert(i < i2miMap_.size() && "index does not correspond to an instruction"); - return i2miMap_[index]; + return i2miMap_[i]; } /// hasGapBeforeInstr - Return true if the previous instruction slot, /// i.e. Index - InstrSlots::NUM, is not occupied. - bool hasGapBeforeInstr(unsigned Index) { - Index = getBaseIndex(Index - InstrSlots::NUM); + bool hasGapBeforeInstr(LiveIndex Index) { + Index = getBaseIndex(getPrevIndex(Index)); return getInstructionFromIndex(Index) == 0; } /// hasGapAfterInstr - Return true if the successive instruction slot, /// i.e. Index + InstrSlots::Num, is not occupied. - bool hasGapAfterInstr(unsigned Index) { - Index = getBaseIndex(Index + InstrSlots::NUM); + bool hasGapAfterInstr(LiveIndex Index) { + Index = getBaseIndex(getNextIndex(Index)); return getInstructionFromIndex(Index) == 0; } /// findGapBeforeInstr - Find an empty instruction slot before the /// specified index. If "Furthest" is true, find one that's furthest /// away from the index (but before any index that's occupied). - unsigned findGapBeforeInstr(unsigned Index, bool Furthest = false) { - Index = getBaseIndex(Index - InstrSlots::NUM); + LiveIndex findGapBeforeInstr(LiveIndex Index, bool Furthest = false) { + Index = getBaseIndex(getPrevIndex(Index)); if (getInstructionFromIndex(Index)) - return 0; // No gap! + return LiveIndex(); // No gap! if (!Furthest) return Index; - unsigned PrevIndex = getBaseIndex(Index - InstrSlots::NUM); + LiveIndex PrevIndex = getBaseIndex(getPrevIndex(Index)); while (getInstructionFromIndex(Index)) { Index = PrevIndex; - PrevIndex = getBaseIndex(Index - InstrSlots::NUM); + PrevIndex = getBaseIndex(getPrevIndex(Index)); } return Index; } /// InsertMachineInstrInMaps - Insert the specified machine instruction /// into the instruction index map at the given index. - void InsertMachineInstrInMaps(MachineInstr *MI, unsigned Index) { - i2miMap_[Index / InstrSlots::NUM] = MI; + void InsertMachineInstrInMaps(MachineInstr *MI, LiveIndex Index) { + i2miMap_[Index.getVecIndex()] = MI; Mi2IndexMap::iterator it = mi2iMap_.find(MI); assert(it == mi2iMap_.end() && "Already in map!"); mi2iMap_[MI] = Index; @@ -265,12 +291,12 @@ namespace llvm { /// findLiveInMBBs - Given a live range, if the value of the range /// is live in any MBB returns true as well as the list of basic blocks /// in which the value is live. - bool findLiveInMBBs(unsigned Start, unsigned End, + bool findLiveInMBBs(LiveIndex Start, LiveIndex End, SmallVectorImpl &MBBs) const; /// findReachableMBBs - Return a list MBB that can be reached via any /// branch or fallthroughs. Return true if the list is not empty. - bool findReachableMBBs(unsigned Start, unsigned End, + bool findReachableMBBs(LiveIndex Start, LiveIndex End, SmallVectorImpl &MBBs) const; // Interval creation @@ -289,7 +315,7 @@ namespace llvm { /// addLiveRangeToEndOfBlock - Given a register and an instruction, /// adds a live range from that instruction to the end of its MBB. LiveRange addLiveRangeToEndOfBlock(unsigned reg, - MachineInstr* startInst); + MachineInstr* startInst); // Interval removal @@ -312,7 +338,7 @@ namespace llvm { // MachineInstr -> index mappings Mi2IndexMap::iterator mi2i = mi2iMap_.find(MI); if (mi2i != mi2iMap_.end()) { - i2miMap_[mi2i->second/InstrSlots::NUM] = 0; + i2miMap_[mi2i->second.index/InstrSlots::NUM] = 0; mi2iMap_.erase(mi2i); } } @@ -323,10 +349,10 @@ namespace llvm { Mi2IndexMap::iterator mi2i = mi2iMap_.find(MI); if (mi2i == mi2iMap_.end()) return; - i2miMap_[mi2i->second/InstrSlots::NUM] = NewMI; + i2miMap_[mi2i->second.index/InstrSlots::NUM] = NewMI; Mi2IndexMap::iterator it = mi2iMap_.find(MI); assert(it != mi2iMap_.end() && "Invalid instruction!"); - unsigned Index = it->second; + LiveIndex Index = it->second; mi2iMap_.erase(it); mi2iMap_[NewMI] = Index; } @@ -344,10 +370,7 @@ namespace llvm { virtual bool runOnMachineFunction(MachineFunction&); /// print - Implement the dump method. - virtual void print(std::ostream &O, const Module* = 0) const; - void print(std::ostream *O, const Module* M = 0) const { - if (O) print(*O, M); - } + virtual void print(raw_ostream &O, const Module* = 0) const; /// addIntervalsForSpills - Create new intervals for spilled defs / uses of /// the given interval. FIXME: It also returns the weight of the spill slot @@ -408,32 +431,40 @@ namespace llvm { private: /// computeIntervals - Compute live intervals. void computeIntervals(); - + + bool isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt, + SmallVector &IdentCopies, + SmallVector &OtherCopies); + + void performEarlyCoalescing(); + /// handleRegisterDef - update intervals for a register def /// (calls handlePhysicalRegisterDef and /// handleVirtualRegisterDef) void handleRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator MI, unsigned MIIdx, + MachineBasicBlock::iterator MI, + LiveIndex MIIdx, MachineOperand& MO, unsigned MOIdx); /// handleVirtualRegisterDef - update intervals for a virtual /// register def void handleVirtualRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI, - unsigned MIIdx, MachineOperand& MO, - unsigned MOIdx, LiveInterval& interval); + LiveIndex MIIdx, MachineOperand& MO, + unsigned MOIdx, + LiveInterval& interval); /// handlePhysicalRegisterDef - update intervals for a physical register /// def. void handlePhysicalRegisterDef(MachineBasicBlock* mbb, MachineBasicBlock::iterator mi, - unsigned MIIdx, MachineOperand& MO, + LiveIndex MIIdx, MachineOperand& MO, LiveInterval &interval, MachineInstr *CopyMI); /// handleLiveInRegister - Create interval for a livein register. void handleLiveInRegister(MachineBasicBlock* mbb, - unsigned MIIdx, + LiveIndex MIIdx, LiveInterval &interval, bool isAlias = false); /// getReMatImplicitUse - If the remat definition MI has one (for now, we @@ -446,7 +477,7 @@ namespace llvm { /// which reaches the given instruction also reaches the specified use /// index. bool isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, - unsigned UseIdx) const; + LiveIndex UseIdx) const; /// isReMaterializable - Returns true if the definition MI of the specified /// val# of the specified interval is re-materializable. Also returns true @@ -461,9 +492,9 @@ namespace llvm { /// MI. If it is successul, MI is updated with the newly created MI and /// returns true. bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm, - MachineInstr *DefMI, unsigned InstrIdx, + MachineInstr *DefMI, LiveIndex InstrIdx, SmallVector &Ops, - bool isSS, int Slot, unsigned Reg); + bool isSS, int FrameIndex, unsigned Reg); /// canFoldMemoryOperand - Return true if the specified load / store /// folding is possible. @@ -474,7 +505,8 @@ namespace llvm { /// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified /// VNInfo that's after the specified index but is within the basic block. bool anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI, - MachineBasicBlock *MBB, unsigned Idx) const; + MachineBasicBlock *MBB, + LiveIndex Idx) const; /// hasAllocatableSuperReg - Return true if the specified physical register /// has any super register that's allocatable. @@ -482,16 +514,17 @@ namespace llvm { /// SRInfo - Spill / restore info. struct SRInfo { - int index; + LiveIndex index; unsigned vreg; bool canFold; - SRInfo(int i, unsigned vr, bool f) : index(i), vreg(vr), canFold(f) {}; + SRInfo(LiveIndex i, unsigned vr, bool f) + : index(i), vreg(vr), canFold(f) {} }; - bool alsoFoldARestore(int Id, int index, unsigned vr, + bool alsoFoldARestore(int Id, LiveIndex index, unsigned vr, BitVector &RestoreMBBs, DenseMap >&RestoreIdxes); - void eraseRestoreInfo(int Id, int index, unsigned vr, + void eraseRestoreInfo(int Id, LiveIndex index, unsigned vr, BitVector &RestoreMBBs, DenseMap >&RestoreIdxes); @@ -510,8 +543,9 @@ namespace llvm { /// functions for addIntervalsForSpills to rewrite uses / defs for the given /// live range. bool rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, unsigned index, unsigned end, MachineInstr *MI, - MachineInstr *OrigDefMI, MachineInstr *DefMI, unsigned Slot, int LdSlot, + bool TrySplit, LiveIndex index, LiveIndex end, + MachineInstr *MI, MachineInstr *OrigDefMI, MachineInstr *DefMI, + unsigned Slot, int LdSlot, bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, VirtRegMap &vrm, const TargetRegisterClass* rc, SmallVector &ReMatIds, const MachineLoopInfo *loopInfo, @@ -533,9 +567,9 @@ namespace llvm { static LiveInterval* createInterval(unsigned Reg); - void printRegName(unsigned reg) const; + void printInstrs(raw_ostream &O) const; + void dumpInstrs() const; }; - } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h index 27ae1be7f9133..d63a222475c67 100644 --- a/include/llvm/CodeGen/LiveStackAnalysis.h +++ b/include/llvm/CodeGen/LiveStackAnalysis.h @@ -102,10 +102,7 @@ namespace llvm { virtual bool runOnMachineFunction(MachineFunction&); /// print - Implement the dump method. - virtual void print(std::ostream &O, const Module* = 0) const; - void print(std::ostream *O, const Module* M = 0) const { - if (O) print(*O, M); - } + virtual void print(raw_ostream &O, const Module* = 0) const; }; } diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h index 26c036269d68c..172fb750944d8 100644 --- a/include/llvm/CodeGen/LiveVariables.h +++ b/include/llvm/CodeGen/LiveVariables.h @@ -29,9 +29,12 @@ #ifndef LLVM_CODEGEN_LIVEVARIABLES_H #define LLVM_CODEGEN_LIVEVARIABLES_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" @@ -146,16 +149,14 @@ private: // Intermediate data structures bool HandlePhysRegKill(unsigned Reg, MachineInstr *MI); void HandlePhysRegUse(unsigned Reg, MachineInstr *MI); - void HandlePhysRegDef(unsigned Reg, MachineInstr *MI); + void HandlePhysRegDef(unsigned Reg, MachineInstr *MI, + SmallVector &Defs); + void UpdatePhysRegDefs(MachineInstr *MI, SmallVector &Defs); /// FindLastPartialDef - Return the last partial def of the specified register. - /// Also returns the sub-register that's defined. - MachineInstr *FindLastPartialDef(unsigned Reg, unsigned &PartDefReg); - - /// hasRegisterUseBelow - Return true if the specified register is used after - /// the current instruction and before it's next definition. - bool hasRegisterUseBelow(unsigned Reg, MachineBasicBlock::iterator I, - MachineBasicBlock *MBB); + /// Also returns the sub-registers that're defined by the instruction. + MachineInstr *FindLastPartialDef(unsigned Reg, + SmallSet &PartDefRegs); /// analyzePHINodes - Gather information about the PHI nodes in here. In /// particular, we want to map the variable information of a virtual diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 134d22663b01d..2a9e86a04c09f 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -16,17 +16,17 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/Support/Streams.h" namespace llvm { class BasicBlock; class MachineFunction; +class raw_ostream; template <> struct ilist_traits : public ilist_default_traits { private: - mutable ilist_node Sentinel; + mutable ilist_half_node Sentinel; // this is only set by the MachineBasicBlock owning the LiveList friend class MachineBasicBlock; @@ -310,8 +310,7 @@ public: // Debugging methods. void dump() const; - void print(std::ostream &OS) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS) const; /// getNumber - MachineBasicBlocks are uniquely numbered at the function /// level, unless they're not in a MachineFunction yet, in which case this @@ -339,7 +338,7 @@ private: // Methods used to maintain doubly linked list of blocks... void removePredecessor(MachineBasicBlock *pred); }; -std::ostream& operator<<(std::ostream &OS, const MachineBasicBlock &MBB); +raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB); //===--------------------------------------------------------------------===// // GraphTraits specializations for machine basic block graphs (machine-CFGs) diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h index eb1ea2dc56b1d..abb6dd9cd087f 100644 --- a/include/llvm/CodeGen/MachineCodeEmitter.h +++ b/include/llvm/CodeGen/MachineCodeEmitter.h @@ -18,6 +18,7 @@ #define LLVM_CODEGEN_MACHINECODEEMITTER_H #include "llvm/Support/DataTypes.h" +#include "llvm/Support/DebugLoc.h" namespace llvm { @@ -74,24 +75,6 @@ public: /// false. /// virtual bool finishFunction(MachineFunction &F) = 0; - - /// startGVStub - This callback is invoked when the JIT needs the - /// address of a GV (e.g. function) that has not been code generated yet. - /// The StubSize specifies the total size required by the stub. - /// - virtual void startGVStub(const GlobalValue* GV, unsigned StubSize, - unsigned Alignment = 1) = 0; - - /// startGVStub - This callback is invoked when the JIT needs the address of a - /// GV (e.g. function) that has not been code generated yet. Buffer points to - /// memory already allocated for this stub. - /// - virtual void startGVStub(const GlobalValue* GV, void *Buffer, - unsigned StubSize) = 0; - - /// finishGVStub - This callback is invoked to terminate a GV stub. - /// - virtual void *finishGVStub(const GlobalValue* F) = 0; /// emitByte - This callback is invoked when a byte needs to be written to the /// output stream. @@ -250,7 +233,12 @@ public: (*(uint64_t*)Addr) = (uint64_t)Value; } - + /// processDebugLoc - Records debug location information about a + /// MachineInstruction. This is called before emitting any bytes associated + /// with the instruction. Even if successive instructions have the same debug + /// location, this method will be called for each one. + virtual void processDebugLoc(DebugLoc DL, bool BeforePrintintInsn) {} + /// emitLabel - Emits a label virtual void emitLabel(uint64_t LabelID) = 0; @@ -288,14 +276,20 @@ public: /// getCurrentPCOffset - Return the offset from the start of the emitted /// buffer that we are currently writing to. - uintptr_t getCurrentPCOffset() const { + virtual uintptr_t getCurrentPCOffset() const { return CurBufferPtr-BufferBegin; } + /// earlyResolveAddresses - True if the code emitter can use symbol addresses + /// during code emission time. The JIT is capable of doing this because it + /// creates jump tables or constant pools in memory on the fly while the + /// object code emitters rely on a linker to have real addresses and should + /// use relocations instead. + virtual bool earlyResolveAddresses() const = 0; + /// addRelocation - Whenever a relocatable address is needed, it should be /// noted with this interface. virtual void addRelocation(const MachineRelocation &MR) = 0; - /// FIXME: These should all be handled with relocations! diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h index 99996cf28fb1e..8d6c1d1e4ca2c 100644 --- a/include/llvm/CodeGen/MachineConstantPool.h +++ b/include/llvm/CodeGen/MachineConstantPool.h @@ -41,8 +41,15 @@ public: /// getType - get type of this MachineConstantPoolValue. /// - inline const Type *getType() const { return Ty; } + const Type *getType() const { return Ty; } + + /// getRelocationInfo - This method classifies the entry according to + /// whether or not it may generate a relocation entry. This must be + /// conservative, so if it might codegen to a relocatable entry, it should say + /// so. The return values are the same as Constant::getRelocationInfo(). + virtual unsigned getRelocationInfo() const = 0; + virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) = 0; @@ -82,7 +89,7 @@ public: MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A) : Alignment(A) { Val.MachineCPVal = V; - Alignment |= 1 << (sizeof(unsigned)*CHAR_BIT-1); + Alignment |= 1U << (sizeof(unsigned)*CHAR_BIT-1); } bool isMachineConstantPoolEntry() const { @@ -94,6 +101,19 @@ public: } const Type *getType() const; + + /// getRelocationInfo - This method classifies the entry according to + /// whether or not it may generate a relocation entry. This must be + /// conservative, so if it might codegen to a relocatable entry, it should say + /// so. The return values are: + /// + /// 0: This constant pool entry is guaranteed to never have a relocation + /// applied to it (because it holds a simple constant like '4'). + /// 1: This entry has relocations, but the entries are guaranteed to be + /// resolvable by the static linker, so the dynamic linker will never see + /// them. + /// 2: This entry may have arbitrary relocations. + unsigned getRelocationInfo() const; }; /// The MachineConstantPool class keeps track of constants referenced by a diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h index 5981e5a3a589f..e56776b1440c8 100644 --- a/include/llvm/CodeGen/MachineDominators.h +++ b/include/llvm/CodeGen/MachineDominators.h @@ -15,13 +15,15 @@ #ifndef LLVM_CODEGEN_MACHINEDOMINATORS_H #define LLVM_CODEGEN_MACHINEDOMINATORS_H +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/DominatorInternals.h" namespace llvm { -inline void WriteAsOperand(std::ostream &, const MachineBasicBlock*, bool t) { } +inline void WriteAsOperand(raw_ostream &, const MachineBasicBlock*, bool t) { } template<> inline void DominatorTreeBase::addRoot(MachineBasicBlock* MBB) { @@ -160,9 +162,7 @@ public: virtual void releaseMemory(); - virtual void print(std::ostream &OS, const Module* M= 0) const { - DT->print(OS, M); - } + virtual void print(raw_ostream &OS, const Module*) const; }; //===------------------------------------- diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 4c981f7caf027..b5479ba09f3cb 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -14,17 +14,20 @@ #ifndef LLVM_CODEGEN_MACHINEFRAMEINFO_H #define LLVM_CODEGEN_MACHINEFRAMEINFO_H +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/DataTypes.h" #include -#include #include namespace llvm { +class raw_ostream; class TargetData; class TargetRegisterClass; class Type; class MachineModuleInfo; class MachineFunction; +class MachineBasicBlock; class TargetFrameInfo; /// The CalleeSavedInfo class tracks the information need to locate where a @@ -130,11 +133,14 @@ class MachineFrameInfo { uint64_t StackSize; /// OffsetAdjustment - The amount that a frame offset needs to be adjusted to - /// have the actual offset from the stack/frame pointer. The calculation is - /// MFI->getObjectOffset(Index) + StackSize - TFI.getOffsetOfLocalArea() + - /// OffsetAdjustment. If OffsetAdjustment is zero (default) then offsets are - /// away from TOS. If OffsetAdjustment == StackSize then offsets are toward - /// TOS. + /// have the actual offset from the stack/frame pointer. The exact usage of + /// this is target-dependent, but it is typically used to adjust between + /// SP-relative and FP-relative offsets. E.G., if objects are accessed via + /// SP then OffsetAdjustment is zero; if FP is used, OffsetAdjustment is set + /// to the distance between the initial SP and the value in FP. For many + /// targets, this value is only used when generating debug info (via + /// TargetRegisterInfo::getFrameIndexOffset); when generating code, the + /// corresponding adjustments are performed directly. int OffsetAdjustment; /// MaxAlignment - The prolog/epilog code inserter may process objects @@ -166,7 +172,10 @@ class MachineFrameInfo { /// epilog code inserter, this data used for debug info and exception /// handling. std::vector CSInfo; - + + /// CSIValid - Has CSInfo been set yet? + bool CSIValid; + /// MMI - This field is set (via setMachineModuleInfo) by a module info /// consumer (ex. DwarfWriter) to indicate that frame layout information /// should be acquired. Typically, it's the responsibility of the target's @@ -185,6 +194,7 @@ public: HasCalls = false; StackProtectorIdx = -1; MaxCallFrameSize = 0; + CSIValid = false; MMI = 0; } @@ -389,6 +399,22 @@ public: CSInfo = CSI; } + /// isCalleeSavedInfoValid - Has the callee saved info been calculated yet? + bool isCalleeSavedInfoValid() const { return CSIValid; } + + void setCalleeSavedInfoValid(bool v) { CSIValid = v; } + + /// getPristineRegs - Return a set of physical registers that are pristine on + /// entry to the MBB. + /// + /// Pristine registers hold a value that is useless to the current function, + /// but that must be preserved - they are callee saved registers that have not + /// been saved yet. + /// + /// Before the PrologueEpilogueInserter has placed the CSR spill code, this + /// method always returns an empty set. + BitVector getPristineRegs(const MachineBasicBlock *MBB) const; + /// getMachineModuleInfo - Used by a prologue/epilogue /// emitter (TargetRegisterInfo) to provide frame layout information. MachineModuleInfo *getMachineModuleInfo() const { return MMI; } @@ -400,9 +426,9 @@ public: /// print - Used by the MachineFunction printer to print information about /// stack objects. Implemented in MachineFunction.cpp /// - void print(const MachineFunction &MF, std::ostream &OS) const; + void print(const MachineFunction &MF, raw_ostream &OS) const; - /// dump - Call print(MF, std::cerr) to be called from the debugger. + /// dump - Print the function to stderr. void dump(const MachineFunction &MF) const; }; diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index ea6a384d22872..ba831cab17253 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -18,15 +18,16 @@ #ifndef LLVM_CODEGEN_MACHINEFUNCTION_H #define LLVM_CODEGEN_MACHINEFUNCTION_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/ilist.h" #include "llvm/Support/DebugLoc.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/Support/Annotation.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Recycler.h" +#include namespace llvm { +class Value; class Function; class MachineRegisterInfo; class MachineFrameInfo; @@ -38,7 +39,7 @@ class TargetRegisterClass; template <> struct ilist_traits : public ilist_default_traits { - mutable ilist_node Sentinel; + mutable ilist_half_node Sentinel; public: MachineBasicBlock *createSentinel() const { return static_cast(&Sentinel); @@ -63,11 +64,11 @@ private: /// of type are accessed/created with MF::getInfo and destroyed when the /// MachineFunction is destroyed. struct MachineFunctionInfo { - virtual ~MachineFunctionInfo() {} + virtual ~MachineFunctionInfo(); }; -class MachineFunction : private Annotation { - const Function *Fn; +class MachineFunction { + Function *Fn; const TargetMachine &Target; // RegInfo - Information about each register in use in the function. @@ -115,12 +116,12 @@ class MachineFunction : private Annotation { unsigned Alignment; public: - MachineFunction(const Function *Fn, const TargetMachine &TM); + MachineFunction(Function *Fn, const TargetMachine &TM); ~MachineFunction(); /// getFunction - Return the LLVM function that this machine code represents /// - const Function *getFunction() const { return Fn; } + Function *getFunction() const { return Fn; } /// getTarget - Return the target machine this machine code is compiled with /// @@ -159,8 +160,8 @@ public: /// void setAlignment(unsigned A) { Alignment = A; } - /// MachineFunctionInfo - Keep track of various per-function pieces of - /// information for backends that would like to do so. + /// getInfo - Keep track of various per-function pieces of information for + /// backends that would like to do so. /// template Ty *getInfo() { @@ -207,8 +208,7 @@ public: /// print - Print out the MachineFunction in a format suitable for debugging /// to the specified stream. /// - void print(std::ostream &OS) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS) const; /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the @@ -229,21 +229,6 @@ public: /// void dump() const; - /// construct - Allocate and initialize a MachineFunction for a given Function - /// and Target - /// - static MachineFunction& construct(const Function *F, const TargetMachine &TM); - - /// destruct - Destroy the MachineFunction corresponding to a given Function - /// - static void destruct(const Function *F); - - /// get - Return a handle to a MachineFunction corresponding to the given - /// Function. This should not be called before "construct()" for a given - /// Function. - /// - static MachineFunction& get(const Function *F); - // Provide accessors for the MachineBasicBlock list... typedef BasicBlockListType::iterator iterator; typedef BasicBlockListType::const_iterator const_iterator; @@ -336,16 +321,42 @@ public: /// void DeleteMachineBasicBlock(MachineBasicBlock *MBB); + /// getMachineMemOperand - Allocate a new MachineMemOperand. + /// MachineMemOperands are owned by the MachineFunction and need not be + /// explicitly deallocated. + MachineMemOperand *getMachineMemOperand(const Value *v, unsigned f, + int64_t o, uint64_t s, + unsigned base_alignment); + + /// getMachineMemOperand - Allocate a new MachineMemOperand by copying + /// an existing one, adjusting by an offset and using the given size. + /// MachineMemOperands are owned by the MachineFunction and need not be + /// explicitly deallocated. + MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, + int64_t Offset, uint64_t Size); + + /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand + /// pointers. This array is owned by the MachineFunction. + MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num); + + /// extractLoadMemRefs - Allocate an array and populate it with just the + /// load information from the given MachineMemOperand sequence. + std::pair + extractLoadMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End); + + /// extractStoreMemRefs - Allocate an array and populate it with just the + /// store information from the given MachineMemOperand sequence. + std::pair + extractStoreMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End); + //===--------------------------------------------------------------------===// // Debug location. // - /// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given - /// source file, line, and column. If none currently exists, create a new - /// DebugLocTuple, and insert it into the DebugIdMap. - unsigned getOrCreateDebugLocID(GlobalVariable *CompileUnit, - unsigned Line, unsigned Col); - /// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object. DebugLocTuple getDebugLocTuple(DebugLoc DL) const; diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h new file mode 100644 index 0000000000000..d020a7b4c73b1 --- /dev/null +++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h @@ -0,0 +1,49 @@ +//===-- MachineFunctionAnalysis.h - Owner of MachineFunctions ----*-C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MachineFunctionAnalysis class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H +#define LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H + +#include "llvm/Pass.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class MachineFunction; + +/// MachineFunctionAnalysis - This class is a Pass that manages a +/// MachineFunction object. +struct MachineFunctionAnalysis : public FunctionPass { +private: + const TargetMachine &TM; + CodeGenOpt::Level OptLevel; + MachineFunction *MF; + +public: + static char ID; + explicit MachineFunctionAnalysis(TargetMachine &tm, + CodeGenOpt::Level OL = CodeGenOpt::Default); + ~MachineFunctionAnalysis(); + + MachineFunction &getMF() const { return *MF; } + CodeGenOpt::Level getOptLevel() const { return OptLevel; } + +private: + virtual bool runOnFunction(Function &F); + virtual void releaseMemory(); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h index 6b5e64abc46c9..bac110316d4f3 100644 --- a/include/llvm/CodeGen/MachineFunctionPass.h +++ b/include/llvm/CodeGen/MachineFunctionPass.h @@ -20,23 +20,34 @@ #define LLVM_CODEGEN_MACHINE_FUNCTION_PASS_H #include "llvm/Pass.h" -#include "llvm/CodeGen/MachineFunction.h" namespace llvm { - // FIXME: This pass should declare that the pass does not invalidate any LLVM - // passes. -struct MachineFunctionPass : public FunctionPass { +class MachineFunction; + +/// MachineFunctionPass - This class adapts the FunctionPass interface to +/// allow convenient creation of passes that operate on the MachineFunction +/// representation. Instead of overriding runOnFunction, subclasses +/// override runOnMachineFunction. +class MachineFunctionPass : public FunctionPass { +protected: explicit MachineFunctionPass(intptr_t ID) : FunctionPass(ID) {} explicit MachineFunctionPass(void *ID) : FunctionPass(ID) {} -protected: /// runOnMachineFunction - This method must be overloaded to perform the /// desired machine code transformation or analysis. /// virtual bool runOnMachineFunction(MachineFunction &MF) = 0; -public: + /// getAnalysisUsage - Subclasses that override getAnalysisUsage + /// must call this. + /// + /// For MachineFunctionPasses, calling AU.preservesCFG() indicates that + /// the pass does not modify the MachineBasicBlock CFG. + /// + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + +private: bool runOnFunction(Function &F); }; diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 2b2f24a88371e..de22710fe6e0f 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -20,29 +20,34 @@ #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Support/DebugLoc.h" -#include #include namespace llvm { +class AliasAnalysis; class TargetInstrDesc; class TargetInstrInfo; class TargetRegisterInfo; class MachineFunction; +class MachineMemOperand; //===----------------------------------------------------------------------===// /// MachineInstr - Representation of each machine instruction. /// class MachineInstr : public ilist_node { +public: + typedef MachineMemOperand **mmo_iterator; + +private: const TargetInstrDesc *TID; // Instruction descriptor. unsigned short NumImplicitOps; // Number of implicit operands (which // are determined at construction time). std::vector Operands; // the operands - std::list MemOperands; // information on memory references + mmo_iterator MemRefs; // information on memory references + mmo_iterator MemRefsEnd; MachineBasicBlock *Parent; // Pointer to the owning basic block. DebugLoc debugLoc; // Source line information. @@ -132,21 +137,14 @@ public: unsigned getNumExplicitOperands() const; /// Access to memory operands of the instruction - std::list::iterator memoperands_begin() - { return MemOperands.begin(); } - std::list::iterator memoperands_end() - { return MemOperands.end(); } - std::list::const_iterator memoperands_begin() const - { return MemOperands.begin(); } - std::list::const_iterator memoperands_end() const - { return MemOperands.end(); } - bool memoperands_empty() const { return MemOperands.empty(); } + mmo_iterator memoperands_begin() const { return MemRefs; } + mmo_iterator memoperands_end() const { return MemRefsEnd; } + bool memoperands_empty() const { return MemRefsEnd == MemRefs; } /// hasOneMemOperand - Return true if this instruction has exactly one /// MachineMemOperand. bool hasOneMemOperand() const { - return !memoperands_empty() && - next(memoperands_begin()) == memoperands_end(); + return MemRefsEnd - MemRefs == 1; } /// isIdenticalTo - Return true if this instruction is identical to (same @@ -208,7 +206,7 @@ public: } /// findRegisterUseOperandIdx() - Returns the operand index that is a use of - /// the specific register or -1 if it is not found. It further tightening + /// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. int findRegisterUseOperandIdx(unsigned Reg, bool isKill = false, const TargetRegisterInfo *TRI = NULL) const; @@ -277,11 +275,13 @@ public: /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. - bool isSafeToMove(const TargetInstrInfo *TII, bool &SawStore) const; + bool isSafeToMove(const TargetInstrInfo *TII, bool &SawStore, + AliasAnalysis *AA) const; /// isSafeToReMat - Return true if it's safe to rematerialize the specified /// instruction which defined the specified register instead of copying it. - bool isSafeToReMat(const TargetInstrInfo *TII, unsigned DstReg) const; + bool isSafeToReMat(const TargetInstrInfo *TII, unsigned DstReg, + AliasAnalysis *AA) const; /// hasVolatileMemoryRef - Return true if this instruction may have a /// volatile memory reference, or if the information describing the @@ -289,19 +289,17 @@ public: /// have no volatile memory references. bool hasVolatileMemoryRef() const; + /// isInvariantLoad - Return true if this instruction is loading from a + /// location whose value is invariant across the function. For example, + /// loading a value from the constant pool or from from the argument area of + /// a function if it does not change. This should only return true of *all* + /// loads the instruction does are invariant (if it does multiple loads). + bool isInvariantLoad(AliasAnalysis *AA) const; + // // Debugging support // - void print(std::ostream *OS, const TargetMachine *TM) const { - if (OS) print(*OS, TM); - } - void print(std::ostream &OS, const TargetMachine *TM = 0) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } - void print(raw_ostream *OS, const TargetMachine *TM) const { - if (OS) print(*OS, TM); - } void print(raw_ostream &OS, const TargetMachine *TM = 0) const; - void print(raw_ostream *OS) const { if (OS) print(*OS); } void dump() const; //===--------------------------------------------------------------------===// @@ -328,13 +326,17 @@ public: /// void RemoveOperand(unsigned i); - /// addMemOperand - Add a MachineMemOperand to the machine instruction, - /// referencing arbitrary storage. - void addMemOperand(MachineFunction &MF, - const MachineMemOperand &MO); + /// addMemOperand - Add a MachineMemOperand to the machine instruction. + /// This function should be used only occasionally. The setMemRefs function + /// is the primary method for setting up a MachineInstr's MemRefs list. + void addMemOperand(MachineFunction &MF, MachineMemOperand *MO); - /// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands. - void clearMemOperands(MachineFunction &MF); + /// setMemRefs - Assign this MachineInstr's memory reference descriptor + /// list. This does not transfer ownership. + void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) { + MemRefs = NewMemRefs; + MemRefsEnd = NewMemRefsEnd; + } private: /// getRegInfo - If this instruction is embedded into a MachineFunction, @@ -360,11 +362,6 @@ private: //===----------------------------------------------------------------------===// // Debugging Support -inline std::ostream& operator<<(std::ostream &OS, const MachineInstr &MI) { - MI.print(OS); - return OS; -} - inline raw_ostream& operator<<(raw_ostream &OS, const MachineInstr &MI) { MI.print(OS); return OS; diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index c6a6679c1b69b..7f681d7cea8c6 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -39,6 +39,7 @@ namespace RegState { class MachineInstrBuilder { MachineInstr *MI; public: + MachineInstrBuilder() : MI(0) {} explicit MachineInstrBuilder(MachineInstr *mi) : MI(mi) {} /// Allow automatic conversion to the machine instruction we are working on. @@ -108,20 +109,19 @@ public: } const MachineInstrBuilder &addMetadata(MDNode *N, - int64_t Offset = 0, - unsigned char TargetFlags = 0) const { + int64_t Offset = 0, + unsigned char TargetFlags = 0) const { MI->addOperand(MachineOperand::CreateMDNode(N, Offset, TargetFlags)); return *this; } const MachineInstrBuilder &addExternalSymbol(const char *FnName, - int64_t Offset = 0, unsigned char TargetFlags = 0) const { - MI->addOperand(MachineOperand::CreateES(FnName, Offset, TargetFlags)); + MI->addOperand(MachineOperand::CreateES(FnName, TargetFlags)); return *this; } - const MachineInstrBuilder &addMemOperand(const MachineMemOperand &MMO) const { + const MachineInstrBuilder &addMemOperand(MachineMemOperand *MMO) const { MI->addMemOperand(*MI->getParent()->getParent(), MMO); return *this; } @@ -191,7 +191,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, /// BuildMI - This version of the builder inserts the newly-built /// instruction at the end of the given MachineBasicBlock, and sets up the first -/// operand as a destination virtual register. +/// operand as a destination virtual register. /// inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB, DebugLoc DL, diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h index 56e2e5499a7ff..3ff2f2e8c7a12 100644 --- a/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -21,13 +21,13 @@ #define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H #include -#include #include namespace llvm { class MachineBasicBlock; class TargetData; +class raw_ostream; /// MachineJumpTableEntry - One jump table in the jump table info. /// @@ -79,10 +79,9 @@ public: /// print - Used by the MachineFunction printer to print information about /// jump tables. Implemented in MachineFunction.cpp /// - void print(std::ostream &OS) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS) const; - /// dump - Call print(std::cerr) to be called from the debugger. + /// dump - Call to stderr. /// void dump() const; }; diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h index 2d19d7a2f8034..65ad4e4841482 100644 --- a/include/llvm/CodeGen/MachineLoopInfo.h +++ b/include/llvm/CodeGen/MachineLoopInfo.h @@ -35,48 +35,23 @@ namespace llvm { -// Provide overrides for Loop methods that don't make sense for machine loops. -template<> inline -PHINode *LoopBase::getCanonicalInductionVariable() const { - assert(0 && "getCanonicalInductionVariable not supported for machine loops!"); - return 0; -} - -template<> inline Instruction* -LoopBase::getCanonicalInductionVariableIncrement() const { - assert(0 && - "getCanonicalInductionVariableIncrement not supported for machine loops!"); - return 0; -} - -template<> -inline bool LoopBase::isLoopInvariant(Value *V) const { - assert(0 && "isLoopInvariant not supported for machine loops!"); - return false; -} - -template<> -inline Value *LoopBase::getTripCount() const { - assert(0 && "getTripCount not supported for machine loops!"); - return 0; -} - -template<> -inline bool LoopBase::isLCSSAForm() const { - assert(0 && "isLCSSAForm not supported for machine loops"); - return false; -} - -typedef LoopBase MachineLoop; +class MachineLoop : public LoopBase { +public: + MachineLoop(); +private: + friend class LoopInfoBase; + explicit MachineLoop(MachineBasicBlock *MBB) + : LoopBase(MBB) {} +}; class MachineLoopInfo : public MachineFunctionPass { - LoopInfoBase LI; - friend class LoopBase; + LoopInfoBase LI; + friend class LoopBase; void operator=(const MachineLoopInfo &); // do not implement MachineLoopInfo(const MachineLoopInfo &); // do not implement - LoopInfoBase& getBase() { return LI; } + LoopInfoBase& getBase() { return LI; } public: static char ID; // Pass identification, replacement for typeid @@ -86,7 +61,7 @@ public: /// iterator/begin/end - The interface to the top-level loops in the current /// function. /// - typedef LoopInfoBase::iterator iterator; + typedef LoopInfoBase::iterator iterator; inline iterator begin() const { return LI.begin(); } inline iterator end() const { return LI.end(); } bool empty() const { return LI.empty(); } diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index 4388c0aab2243..b7e267dd13337 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -20,6 +20,7 @@ namespace llvm { class Value; class FoldingSetNodeID; +class raw_ostream; //===----------------------------------------------------------------------===// /// MachineMemOperand - A description of a memory reference used in the backend. @@ -47,14 +48,17 @@ public: }; /// MachineMemOperand - Construct an MachineMemOperand object with the - /// specified address Value, flags, offset, size, and alignment. + /// specified address Value, flags, offset, size, and base alignment. MachineMemOperand(const Value *v, unsigned int f, int64_t o, uint64_t s, - unsigned int a); + unsigned int base_alignment); - /// getValue - Return the base address of the memory access. - /// Special values are PseudoSourceValue::FPRel, PseudoSourceValue::SPRel, - /// and the other PseudoSourceValue members which indicate references to - /// frame/stack pointer relative references and other special references. + /// getValue - Return the base address of the memory access. This may either + /// be a normal LLVM IR Value, or one of the special values used in CodeGen. + /// Special values are those obtained via + /// PseudoSourceValue::getFixedStack(int), PseudoSourceValue::getStack, and + /// other PseudoSourceValue member functions which return objects which stand + /// for frame/stack pointer relative references and other special references + /// which are not representable in the high-level IR. const Value *getValue() const { return V; } /// getFlags - Return the raw flags of the source value, \see MemOperandFlags. @@ -69,18 +73,34 @@ public: uint64_t getSize() const { return Size; } /// getAlignment - Return the minimum known alignment in bytes of the - /// memory reference. - unsigned int getAlignment() const { return (1u << (Flags >> 3)) >> 1; } + /// actual memory reference. + uint64_t getAlignment() const; + + /// getBaseAlignment - Return the minimum known alignment in bytes of the + /// base address, without the offset. + uint64_t getBaseAlignment() const { return (1u << (Flags >> 3)) >> 1; } bool isLoad() const { return Flags & MOLoad; } bool isStore() const { return Flags & MOStore; } bool isVolatile() const { return Flags & MOVolatile; } + /// refineAlignment - Update this MachineMemOperand to reflect the alignment + /// of MMO, if it has a greater alignment. This must only be used when the + /// new alignment applies to all users of this MachineMemOperand. + void refineAlignment(const MachineMemOperand *MMO); + + /// setValue - Change the SourceValue for this MachineMemOperand. This + /// should only be used when an object is being relocated and all references + /// to it are being updated. + void setValue(const Value *NewSV) { V = NewSV; } + /// Profile - Gather unique data for the object. /// void Profile(FoldingSetNodeID &ID) const; }; +raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO); + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 1872bd26d8aac..5878d67b939d6 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -42,18 +42,34 @@ #include "llvm/CodeGen/MachineLocation.h" #include "llvm/GlobalValue.h" #include "llvm/Pass.h" +#include "llvm/Metadata.h" + +#define ATTACH_DEBUG_INFO_TO_AN_INSN 1 namespace llvm { //===----------------------------------------------------------------------===// // Forward declarations. class Constant; +class MDNode; class GlobalVariable; class MachineBasicBlock; class MachineFunction; class Module; class PointerType; class StructType; + + +/// MachineModuleInfoImpl - This class can be derived from and used by targets +/// to hold private target-specific information for each Module. Objects of +/// type are accessed/created with MMI::getInfo and destroyed when the +/// MachineModuleInfo is destroyed. +class MachineModuleInfoImpl { +public: + virtual ~MachineModuleInfoImpl(); +}; + + //===----------------------------------------------------------------------===// /// LandingPadInfo - This structure is used to retain landing pad info for @@ -80,7 +96,11 @@ struct LandingPadInfo { /// schemes and reformated for specific use. /// class MachineModuleInfo : public ImmutablePass { -private: + /// ObjFileMMI - This is the object-file-format-specific implementation of + /// MachineModuleInfoImpl, which lets targets accumulate whatever info they + /// want. + MachineModuleInfoImpl *ObjFileMMI; + // LabelIDList - One entry per assigned label. Normally the entry is equal to // the list index(+1). If the entry is zero then the label has been deleted. // Any other value indicates the label has been deleted by is mapped to @@ -112,8 +132,9 @@ private: // common EH frames. std::vector Personalities; - // UsedFunctions - the functions in the llvm.used list in a more easily - // searchable format. + /// UsedFunctions - The functions in the @llvm.used list in a more easily + /// searchable format. This does not include the functions in + /// llvm.compiler.used. SmallPtrSet UsedFunctions; /// UsedDbgLabels - labels are used by debug info entries. @@ -125,28 +146,45 @@ private: /// DbgInfoAvailable - True if debugging information is available /// in this module. bool DbgInfoAvailable; + public: static char ID; // Pass identification, replacement for typeid + typedef SmallVector< std::pair< WeakMetadataVH, unsigned>, 4 > VariableDbgInfoMapTy; + VariableDbgInfoMapTy VariableDbgInfo; + MachineModuleInfo(); ~MachineModuleInfo(); - /// doInitialization - Initialize the state for a new module. - /// bool doInitialization(); - - /// doFinalization - Tear down the state after completion of a module. - /// bool doFinalization(); - + /// BeginFunction - Begin gathering function meta information. /// - void BeginFunction(MachineFunction *MF); + void BeginFunction(MachineFunction *) {} /// EndFunction - Discard function meta information. /// void EndFunction(); + /// getInfo - Keep track of various per-function pieces of information for + /// backends that would like to do so. + /// + template + Ty &getObjFileInfo() { + if (ObjFileMMI == 0) + ObjFileMMI = new Ty(*this); + + assert((void*)dynamic_cast(ObjFileMMI) == (void*)ObjFileMMI && + "Invalid concrete type or multiple inheritence for getInfo"); + return *static_cast(ObjFileMMI); + } + + template + const Ty &getObjFileInfo() const { + return const_cast(this)->getObjFileInfo(); + } + /// AnalyzeModule - Scan the module for global debug information. /// void AnalyzeModule(Module &M); @@ -240,9 +278,11 @@ public: return Personalities; } - // UsedFunctions - Return set of the functions in the llvm.used list. - const SmallPtrSet& getUsedFunctions() const { - return UsedFunctions; + /// isUsedFunction - Return true if the functions in the llvm.used list. This + /// does not return true for things in llvm.compiler.used unless they are also + /// in llvm.used. + bool isUsedFunction(const Function *F) { + return UsedFunctions.count(F); } /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. @@ -293,6 +333,14 @@ public: /// of one is required to emit exception handling info. Function *getPersonality() const; + /// setVariableDbgInfo - Collect information used to emit debugging information + /// of a variable. + void setVariableDbgInfo(MDNode *N, unsigned S) { + VariableDbgInfo.push_back(std::make_pair(N, S)); + } + + VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfo; } + }; // End class MachineModuleInfo } // End llvm namespace diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h new file mode 100644 index 0000000000000..44813cbdcd959 --- /dev/null +++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -0,0 +1,79 @@ +//===-- llvm/CodeGen/MachineModuleInfoImpls.h -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines object-file format specific implementations of +// MachineModuleInfoImpl. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H +#define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H + +#include "llvm/CodeGen/MachineModuleInfo.h" + +namespace llvm { + class MCSymbol; + + /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation + /// for MachO targets. + class MachineModuleInfoMachO : public MachineModuleInfoImpl { + /// FnStubs - Darwin '$stub' stubs. The key is something like "Lfoo$stub", + /// the value is something like "_foo". + DenseMap FnStubs; + + /// GVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like + /// "Lfoo$non_lazy_ptr", the value is something like "_foo". + DenseMap GVStubs; + + /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs. The key is something like + /// "Lfoo$non_lazy_ptr", the value is something like "_foo". Unlike GVStubs + /// these are for things with hidden visibility. + DenseMap HiddenGVStubs; + + virtual void Anchor(); // Out of line virtual method. + public: + MachineModuleInfoMachO(const MachineModuleInfo &) {} + + const MCSymbol *&getFnStubEntry(const MCSymbol *Sym) { + assert(Sym && "Key cannot be null"); + return FnStubs[Sym]; + } + + const MCSymbol *&getGVStubEntry(const MCSymbol *Sym) { + assert(Sym && "Key cannot be null"); + return GVStubs[Sym]; + } + + const MCSymbol *&getHiddenGVStubEntry(const MCSymbol *Sym) { + assert(Sym && "Key cannot be null"); + return HiddenGVStubs[Sym]; + } + + /// Accessor methods to return the set of stubs in sorted order. + typedef std::vector > + SymbolListTy; + + SymbolListTy GetFnStubList() const { + return GetSortedStubs(FnStubs); + } + SymbolListTy GetGVStubList() const { + return GetSortedStubs(GVStubs); + } + SymbolListTy GetHiddenGVStubList() const { + return GetSortedStubs(HiddenGVStubs); + } + + private: + static SymbolListTy + GetSortedStubs(const DenseMap &Map); + }; + +} // end namespace llvm + +#endif diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 26ec239f4c8b5..f715c445f8abb 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -16,7 +16,6 @@ #include "llvm/Support/DataTypes.h" #include -#include namespace llvm { @@ -111,7 +110,7 @@ private: GlobalValue *GV; // For MO_GlobalAddress. MDNode *Node; // For MO_Metadata. } Val; - int64_t Offset; // An offset from the object. + int64_t Offset; // An offset from the object. } OffsetedInfo; } Contents; @@ -119,12 +118,6 @@ private: TargetFlags = 0; } public: - MachineOperand(const MachineOperand &M) { - *this = M; - } - - ~MachineOperand() {} - /// getType - Returns the MachineOperandType for this operand. /// MachineOperandType getType() const { return (MachineOperandType)OpKind; } @@ -139,7 +132,6 @@ public: MachineInstr *getParent() { return ParentMI; } const MachineInstr *getParent() const { return ParentMI; } - void print(std::ostream &os, const TargetMachine *TM = 0) const; void print(raw_ostream &os, const TargetMachine *TM = 0) const; //===--------------------------------------------------------------------===// @@ -164,6 +156,8 @@ public: bool isGlobal() const { return OpKind == MO_GlobalAddress; } /// isSymbol - Tests if this is a MO_ExternalSymbol operand. bool isSymbol() const { return OpKind == MO_ExternalSymbol; } + /// isMetadata - Tests if this is a MO_Metadata operand. + bool isMetadata() const { return OpKind == MO_Metadata; } //===--------------------------------------------------------------------===// // Accessors for Register Operands @@ -304,6 +298,8 @@ public: return Contents.OffsetedInfo.Val.Node; } + /// getOffset - Return the offset from the symbol in this operand. This always + /// returns 0 for ExternalSymbol operands. int64_t getOffset() const { assert((isGlobal() || isSymbol() || isCPI()) && "Wrong MachineOperand accessor"); @@ -325,7 +321,7 @@ public: } void setOffset(int64_t Offset) { - assert((isGlobal() || isSymbol() || isCPI()) && + assert((isGlobal() || isSymbol() || isCPI() || isMetadata()) && "Wrong MachineOperand accessor"); Contents.OffsetedInfo.Offset = Offset; } @@ -438,28 +434,14 @@ public: Op.setTargetFlags(TargetFlags); return Op; } - static MachineOperand CreateES(const char *SymName, int64_t Offset = 0, + static MachineOperand CreateES(const char *SymName, unsigned char TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_ExternalSymbol); Op.Contents.OffsetedInfo.Val.SymbolName = SymName; - Op.setOffset(Offset); + Op.setOffset(0); // Offset is always 0. Op.setTargetFlags(TargetFlags); return Op; } - const MachineOperand &operator=(const MachineOperand &MO) { - OpKind = MO.OpKind; - IsDef = MO.IsDef; - IsImp = MO.IsImp; - IsKill = MO.IsKill; - IsDead = MO.IsDead; - IsUndef = MO.IsUndef; - IsEarlyClobber = MO.IsEarlyClobber; - SubReg = MO.SubReg; - ParentMI = MO.ParentMI; - Contents = MO.Contents; - TargetFlags = MO.TargetFlags; - return *this; - } friend class MachineInstr; friend class MachineRegisterInfo; @@ -486,11 +468,6 @@ private: void RemoveRegOperandFromRegInfo(); }; -inline std::ostream &operator<<(std::ostream &OS, const MachineOperand &MO) { - MO.print(OS, 0); - return OS; -} - inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) { MO.print(OS, 0); return OS; diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 80c37b39ca0ca..18e60200b0991 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -16,7 +16,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/iterator.h" #include namespace llvm { @@ -256,7 +255,7 @@ public: /// returns end(). template class defusechain_iterator - : public forward_iterator { + : public std::iterator { MachineOperand *Op; explicit defusechain_iterator(MachineOperand *op) : Op(op) { // If the first node isn't one we're interested in, advance to one that @@ -269,8 +268,10 @@ public: } friend class MachineRegisterInfo; public: - typedef forward_iterator::reference reference; - typedef forward_iterator::pointer pointer; + typedef std::iterator::reference reference; + typedef std::iterator::pointer pointer; defusechain_iterator(const defusechain_iterator &I) : Op(I.Op) {} defusechain_iterator() : Op(0) {} diff --git a/include/llvm/CodeGen/ObjectCodeEmitter.h b/include/llvm/CodeGen/ObjectCodeEmitter.h new file mode 100644 index 0000000000000..8252e07d84b16 --- /dev/null +++ b/include/llvm/CodeGen/ObjectCodeEmitter.h @@ -0,0 +1,178 @@ +//===-- llvm/CodeGen/ObjectCodeEmitter.h - Object Code Emitter -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generalized Object Code Emitter, works with ObjectModule and BinaryObject. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_OBJECTCODEEMITTER_H +#define LLVM_CODEGEN_OBJECTCODEEMITTER_H + +#include "llvm/CodeGen/MachineCodeEmitter.h" + +namespace llvm { + +class BinaryObject; +class MachineBasicBlock; +class MachineCodeEmitter; +class MachineFunction; +class MachineConstantPool; +class MachineJumpTableInfo; +class MachineModuleInfo; + +class ObjectCodeEmitter : public MachineCodeEmitter { +protected: + + /// Binary Object (Section or Segment) we are emitting to. + BinaryObject *BO; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector MBBLocations; + + /// LabelLocations - This vector is a mapping from Label ID's to their + /// address. + std::vector LabelLocations; + + /// CPLocations - This is a map of constant pool indices to offsets from the + /// start of the section for that constant pool index. + std::vector CPLocations; + + /// CPSections - This is a map of constant pool indices to the Section + /// containing the constant pool entry for that index. + std::vector CPSections; + + /// JTLocations - This is a map of jump table indices to offsets from the + /// start of the section for that jump table index. + std::vector JTLocations; + +public: + ObjectCodeEmitter(); + ObjectCodeEmitter(BinaryObject *bo); + virtual ~ObjectCodeEmitter(); + + /// setBinaryObject - set the BinaryObject we are writting to + void setBinaryObject(BinaryObject *bo); + + /// emitByte - This callback is invoked when a byte needs to be + /// written to the data stream, without buffer overflow testing. + void emitByte(uint8_t B); + + /// emitWordLE - This callback is invoked when a 32-bit word needs to be + /// written to the data stream in little-endian format. + void emitWordLE(uint32_t W); + + /// emitWordBE - This callback is invoked when a 32-bit word needs to be + /// written to the data stream in big-endian format. + void emitWordBE(uint32_t W); + + /// emitDWordLE - This callback is invoked when a 64-bit word needs to be + /// written to the data stream in little-endian format. + void emitDWordLE(uint64_t W); + + /// emitDWordBE - This callback is invoked when a 64-bit word needs to be + /// written to the data stream in big-endian format. + void emitDWordBE(uint64_t W); + + /// emitAlignment - Move the CurBufferPtr pointer up the the specified + /// alignment (saturated to BufferEnd of course). + void emitAlignment(unsigned Alignment = 0, uint8_t fill = 0); + + /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be + /// written to the data stream. + void emitULEB128Bytes(uint64_t Value); + + /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be + /// written to the data stream. + void emitSLEB128Bytes(uint64_t Value); + + /// emitString - This callback is invoked when a String needs to be + /// written to the data stream. + void emitString(const std::string &String); + + /// getCurrentPCValue - This returns the address that the next emitted byte + /// will be output to. + uintptr_t getCurrentPCValue() const; + + /// getCurrentPCOffset - Return the offset from the start of the emitted + /// buffer that we are currently writing to. + uintptr_t getCurrentPCOffset() const; + + /// addRelocation - Whenever a relocatable address is needed, it should be + /// noted with this interface. + void addRelocation(const MachineRelocation& relocation); + + /// earlyResolveAddresses - True if the code emitter can use symbol addresses + /// during code emission time. The JIT is capable of doing this because it + /// creates jump tables or constant pools in memory on the fly while the + /// object code emitters rely on a linker to have real addresses and should + /// use relocations instead. + bool earlyResolveAddresses() const { return false; } + + /// startFunction - This callback is invoked when the specified function is + /// about to be code generated. This initializes the BufferBegin/End/Ptr + /// fields. + virtual void startFunction(MachineFunction &F) = 0; + + /// finishFunction - This callback is invoked when the specified function has + /// finished code generation. If a buffer overflow has occurred, this method + /// returns true (the callee is required to try again), otherwise it returns + /// false. + virtual bool finishFunction(MachineFunction &F) = 0; + + /// StartMachineBasicBlock - This should be called by the target when a new + /// basic block is about to be emitted. This way the MCE knows where the + /// start of the block is, and can implement getMachineBasicBlockAddress. + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB); + + /// getMachineBasicBlockAddress - Return the address of the specified + /// MachineBasicBlock, only usable after the label for the MBB has been + /// emitted. + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const; + + /// emitLabel - Emits a label + virtual void emitLabel(uint64_t LabelID) = 0; + + /// getLabelAddress - Return the address of the specified LabelID, only usable + /// after the LabelID has been emitted. + virtual uintptr_t getLabelAddress(uint64_t LabelID) const = 0; + + /// emitJumpTables - Emit all the jump tables for a given jump table info + /// record to the appropriate section. + virtual void emitJumpTables(MachineJumpTableInfo *MJTI) = 0; + + /// getJumpTableEntryAddress - Return the address of the jump table with index + /// 'Index' in the function that last called initJumpTableInfo. + virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const; + + /// emitConstantPool - For each constant pool entry, figure out which section + /// the constant should live in, allocate space for it, and emit it to the + /// Section data buffer. + virtual void emitConstantPool(MachineConstantPool *MCP) = 0; + + /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in + /// the constant pool that was last emitted with the emitConstantPool method. + virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const; + + /// getConstantPoolEntrySection - Return the section of the 'Index' entry in + /// the constant pool that was last emitted with the emitConstantPool method. + virtual uintptr_t getConstantPoolEntrySection(unsigned Index) const; + + /// Specifies the MachineModuleInfo object. This is used for exception handling + /// purposes. + virtual void setModuleInfo(MachineModuleInfo* Info) = 0; + // to be implemented or depreciated with MachineModuleInfo + +}; // end class ObjectCodeEmitter + +} // end namespace llvm + +#endif + diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 7f1c16ff80647..1e7115e090bd8 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -15,7 +15,6 @@ #ifndef LLVM_CODEGEN_PASSES_H #define LLVM_CODEGEN_PASSES_H -#include #include namespace llvm { @@ -25,6 +24,7 @@ namespace llvm { class TargetMachine; class TargetLowering; class RegisterCoalescer; + class raw_ostream; /// createUnreachableBlockEliminationPass - The LLVM code generator does not /// work well with unreachable basic blocks (what live ranges make sense for a @@ -35,8 +35,8 @@ namespace llvm { FunctionPass *createUnreachableBlockEliminationPass(); /// MachineFunctionPrinter pass - This pass prints out the machine function to - /// standard error, as a debugging tool. - FunctionPass *createMachineFunctionPrinterPass(std::ostream *OS, + /// the given stream, as a debugging tool. + FunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS, const std::string &Banner =""); /// MachineLoopInfo pass - This pass is a loop analysis pass. @@ -87,27 +87,12 @@ namespace llvm { /// FunctionPass *createRegisterAllocator(); - /// SimpleRegisterAllocation Pass - This pass converts the input machine code - /// from SSA form to use explicit registers by spilling every register. Wow, - /// great policy huh? - /// - FunctionPass *createSimpleRegisterAllocator(); - /// LocalRegisterAllocation Pass - This pass register allocates the input code /// a basic block at a time, yielding code better than the simple register /// allocator, but not as good as a global allocator. /// FunctionPass *createLocalRegisterAllocator(); - /// BigBlockRegisterAllocation Pass - The BigBlock register allocator - /// munches single basic blocks at a time, like the local register - /// allocator. While the BigBlock allocator is a little slower, and uses - /// somewhat more memory than the local register allocator, it tends to - /// yield the best allocations (of any of the allocators) for blocks that - /// have hundreds or thousands of instructions in sequence. - /// - FunctionPass *createBigBlockRegisterAllocator(); - /// LinearScanRegisterAllocation Pass - This pass implements the linear scan /// register allocation algorithm, a global register allocator. /// @@ -155,11 +140,6 @@ namespace llvm { /// by seeing if the labels map to the same reduced label. FunctionPass *createDebugLabelFoldingPass(); - /// MachineCodeDeletion Pass - This pass deletes all of the machine code for - /// the current function, which should happen after the function has been - /// emitted to a .s file or to memory. - FunctionPass *createMachineCodeDeleter(); - /// getRegisterAllocator - This creates an instance of the register allocator /// for the Sparc. FunctionPass *getRegisterAllocator(TargetMachine &T); @@ -180,7 +160,7 @@ namespace llvm { /// Creates a pass to print GC metadata. /// - FunctionPass *createGCInfoPrinter(std::ostream &OS); + FunctionPass *createGCInfoPrinter(raw_ostream &OS); /// createMachineLICMPass - This pass performs LICM on machine instructions. /// @@ -207,6 +187,10 @@ namespace llvm { /// adapted to code generation. Required if using dwarf exception handling. FunctionPass *createDwarfEHPass(const TargetLowering *tli, bool fast); + /// createSjLjEHPass - This pass adapts exception handling code to use + /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow. + FunctionPass *createSjLjEHPass(const TargetLowering *tli); + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h index 3ad2502fe08a1..c6be645040a86 100644 --- a/include/llvm/CodeGen/PseudoSourceValue.h +++ b/include/llvm/CodeGen/PseudoSourceValue.h @@ -25,18 +25,17 @@ namespace llvm { /// stack frame (e.g., a spill slot), below the stack frame (e.g., argument /// space), or constant pool. class PseudoSourceValue : public Value { + private: + /// printCustom - Implement printing for PseudoSourceValue. This is called + /// from Value::print or Value's operator<<. + /// + virtual void printCustom(raw_ostream &O) const; + public: PseudoSourceValue(); - /// dump - Support for debugging, callable in GDB: V->dump() - // - virtual void dump() const; - - /// print - Implement operator<< on PseudoSourceValue. - /// - virtual void print(raw_ostream &OS) const; - - /// isConstant - Test whether this PseudoSourceValue has a constant value. + /// isConstant - Test whether the memory pointed to by this + /// PseudoSourceValue has a constant value. /// virtual bool isConstant(const MachineFrameInfo *) const; @@ -52,18 +51,21 @@ namespace llvm { /// e.g., a spill slot. static const PseudoSourceValue *getFixedStack(int FI); - /// A source value referencing the area below the stack frame of a function, - /// e.g., the argument space. + /// A pseudo source value referencing the area below the stack frame of + /// a function, e.g., the argument space. static const PseudoSourceValue *getStack(); - /// A source value referencing the global offset table (or something the - /// like). + /// A pseudo source value referencing the global offset table + /// (or something the like). static const PseudoSourceValue *getGOT(); - /// A SV referencing the constant pool + /// A pseudo source value referencing the constant pool. Since constant + /// pools are constant, this doesn't need to identify a specific constant + /// pool entry. static const PseudoSourceValue *getConstantPool(); - /// A SV referencing the jump table + /// A pseudo source value referencing a jump table. Since jump tables are + /// constant, this doesn't need to identify a specific jump table. static const PseudoSourceValue *getJumpTable(); }; } // End llvm namespace diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h index a08e42a5d34bb..100e357654fbc 100644 --- a/include/llvm/CodeGen/RegAllocRegistry.h +++ b/include/llvm/CodeGen/RegAllocRegistry.h @@ -34,7 +34,9 @@ public: RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C) : MachinePassRegistryNode(N, D, (MachinePassCtor)C) - { Registry.Add(this); } + { + Registry.Add(this); + } ~RegisterRegAlloc() { Registry.Remove(this); } diff --git a/include/llvm/CodeGen/RegisterCoalescer.h b/include/llvm/CodeGen/RegisterCoalescer.h index 79dd9db0f11a3..1490aa0172fb0 100644 --- a/include/llvm/CodeGen/RegisterCoalescer.h +++ b/include/llvm/CodeGen/RegisterCoalescer.h @@ -42,7 +42,7 @@ namespace llvm { /// Reset state. Can be used to allow a coalescer run by /// PassManager to be run again by the register allocator. - virtual void reset(MachineFunction &mf) {}; + virtual void reset(MachineFunction &mf) {} /// Register allocators must call this from their own /// getAnalysisUsage to cover the case where the coalescer is not @@ -51,7 +51,7 @@ namespace llvm { /// which to invalidate when running the register allocator or any /// pass that might call coalescing. The long-term solution is to /// allow hierarchies of PassManagers. - virtual void getAnalysisUsage(AnalysisUsage &AU) const {}; + virtual void getAnalysisUsage(AnalysisUsage &AU) const {} }; /// An abstract interface for register allocators to interact with @@ -68,7 +68,7 @@ namespace llvm { /// /// public: /// LinearScanRegallocQuery(LiveIntervals &intervals) - /// : li(intervals) {}; + /// : li(intervals) {} /// /// /// This is pretty slow and conservative, but since linear scan /// /// allocation doesn't pre-compute interference information it's @@ -85,14 +85,14 @@ namespace llvm { /// interferences.insert(&iv->second); /// } /// } - /// }; + /// } /// /// /// This is *really* slow and stupid. See above. /// int getNumberOfInterferences(const LiveInterval &a) const { /// IntervalSet intervals; /// getInterferences(intervals, a); /// return intervals.size(); - /// }; + /// } /// }; /// /// In the allocator: @@ -108,14 +108,14 @@ namespace llvm { public: typedef SmallPtrSet IntervalSet; - virtual ~RegallocQuery() {}; + virtual ~RegallocQuery() {} /// Return whether two live ranges interfere. virtual bool interfere(const LiveInterval &a, const LiveInterval &b) const { // A naive test return a.overlaps(b); - }; + } /// Return the set of intervals that interfere with this one. virtual void getInterferences(IntervalSet &interferences, @@ -129,7 +129,7 @@ namespace llvm { /// coalescing or other modifications. virtual void updateDataForMerge(const LiveInterval &a, const LiveInterval &b, - const MachineInstr ©) {}; + const MachineInstr ©) {} /// Allow the register allocator to communicate when it doesn't /// want a copy coalesced. This may be due to assumptions made by diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 458c2e4487f9c..84b726d73fb3b 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" namespace llvm { @@ -69,14 +68,6 @@ class RegScavenger { /// available, unset means the register is currently being used. BitVector RegsAvailable; - /// CurrDist - Distance from MBB entry to the current instruction MBBI. - /// - unsigned CurrDist; - - /// DistanceMap - Keep track the distance of a MI from the start of the - /// current basic block. - DenseMap DistanceMap; - public: RegScavenger() : MBB(NULL), NumPhysRegs(0), Tracking(false), @@ -86,56 +77,30 @@ public: /// basic block. void enterBasicBlock(MachineBasicBlock *mbb); - /// forward / backward - Move the internal MBB iterator and update register - /// states. + /// initRegState - allow resetting register state info for multiple + /// passes over/within the same function. + void initRegState(); + + /// forward - Move the internal MBB iterator and update register states. void forward(); - void backward(); - /// forward / backward - Move the internal MBB iterator and update register - /// states until it has processed the specific iterator. + /// forward - Move the internal MBB iterator and update register states until + /// it has processed the specific iterator. void forward(MachineBasicBlock::iterator I) { if (!Tracking && MBB->begin() != I) forward(); while (MBBI != I) forward(); } - void backward(MachineBasicBlock::iterator I) { - while (MBBI != I) backward(); - } /// skipTo - Move the internal MBB iterator but do not update register states. /// void skipTo(MachineBasicBlock::iterator I) { MBBI = I; } - /// isReserved - Returns true if a register is reserved. It is never "unused". - bool isReserved(unsigned Reg) const { return ReservedRegs[Reg]; } - - /// isUsed / isUsed - Test if a register is currently being used. - /// - bool isUsed(unsigned Reg) const { return !RegsAvailable[Reg]; } - bool isUnused(unsigned Reg) const { return RegsAvailable[Reg]; } - /// getRegsUsed - return all registers currently in use in used. void getRegsUsed(BitVector &used, bool includeReserved); - /// setUsed / setUnused - Mark the state of one or a number of registers. - /// - void setUsed(unsigned Reg); - void setUsed(BitVector &Regs) { - RegsAvailable &= ~Regs; - } - void setUnused(unsigned Reg, const MachineInstr *MI); - void setUnused(BitVector &Regs) { - RegsAvailable |= Regs; - } - - /// FindUnusedReg - Find a unused register of the specified register class - /// from the specified set of registers. It return 0 is none is found. - unsigned FindUnusedReg(const TargetRegisterClass *RegClass, - const BitVector &Candidates) const; - /// FindUnusedReg - Find a unused register of the specified register class. - /// Exclude callee saved registers if directed. It return 0 is none is found. - unsigned FindUnusedReg(const TargetRegisterClass *RegClass, - bool ExCalleeSaved = false) const; + /// Return 0 if none is found. + unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const; /// setScavengingFrameIndex / getScavengingFrameIndex - accessor and setter of /// ScavengingFrameIndex. @@ -152,16 +117,43 @@ public: return scavengeRegister(RegClass, MBBI, SPAdj); } + /// setUsed - Tell the scavenger a register is used. + /// + void setUsed(unsigned Reg); private: - /// restoreScavengedReg - Restore scavenged by loading it back from the - /// emergency spill slot. Mark it used. - void restoreScavengedReg(); + /// isReserved - Returns true if a register is reserved. It is never "unused". + bool isReserved(unsigned Reg) const { return ReservedRegs.test(Reg); } + + /// isUsed / isUnused - Test if a register is currently being used. + /// + bool isUsed(unsigned Reg) const { return !RegsAvailable.test(Reg); } + bool isUnused(unsigned Reg) const { return RegsAvailable.test(Reg); } + + /// isAliasUsed - Is Reg or an alias currently in use? + bool isAliasUsed(unsigned Reg) const; + + /// setUsed / setUnused - Mark the state of one or a number of registers. + /// + void setUsed(BitVector &Regs) { + RegsAvailable &= ~Regs; + } + void setUnused(BitVector &Regs) { + RegsAvailable |= Regs; + } + + /// Add Reg and all its sub-registers to BV. + void addRegWithSubRegs(BitVector &BV, unsigned Reg); + + /// Add Reg and its aliases to BV. + void addRegWithAliases(BitVector &BV, unsigned Reg); + + unsigned findSurvivorReg(MachineBasicBlock::iterator MI, + BitVector &Candidates, + unsigned InstrLimit, + MachineBasicBlock::iterator &UseMI); - MachineInstr *findFirstUse(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, unsigned Reg, - unsigned &Dist); }; - + } // End llvm namespace #endif diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 7f2c8bc368402..7a40f0233d578 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -224,6 +224,11 @@ namespace RTLIB { O_F32, O_F64, + // MEMORY + MEMCPY, + MEMSET, + MEMMOVE, + // EXCEPTION HANDLING UNWIND_RESUME, @@ -232,27 +237,27 @@ namespace RTLIB { /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. - Libcall getFPEXT(MVT OpVT, MVT RetVT); + Libcall getFPEXT(EVT OpVT, EVT RetVT); /// getFPROUND - Return the FPROUND_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. - Libcall getFPROUND(MVT OpVT, MVT RetVT); + Libcall getFPROUND(EVT OpVT, EVT RetVT); /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. - Libcall getFPTOSINT(MVT OpVT, MVT RetVT); + Libcall getFPTOSINT(EVT OpVT, EVT RetVT); /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. - Libcall getFPTOUINT(MVT OpVT, MVT RetVT); + Libcall getFPTOUINT(EVT OpVT, EVT RetVT); /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. - Libcall getSINTTOFP(MVT OpVT, MVT RetVT); + Libcall getSINTTOFP(EVT OpVT, EVT RetVT); /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. - Libcall getUINTTOFP(MVT OpVT, MVT RetVT); + Libcall getUINTTOFP(EVT OpVT, EVT RetVT); } } diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 237d491e82622..39563f733068c 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -23,6 +23,7 @@ #include "llvm/ADT/PointerIntPair.h" namespace llvm { + class AliasAnalysis; class SUnit; class MachineConstantPool; class MachineFunction; @@ -145,6 +146,11 @@ namespace llvm { return Latency; } + /// setLatency - Set the latency for this edge. + void setLatency(unsigned Lat) { + Latency = Lat; + } + //// getSUnit - Return the SUnit to which this edge points. SUnit *getSUnit() const { return Dep.getPointer(); @@ -238,10 +244,10 @@ namespace llvm { unsigned NodeNum; // Entry # of node in the node vector. unsigned NodeQueueId; // Queue id of node. unsigned short Latency; // Node latency. - short NumPreds; // # of SDep::Data preds. - short NumSuccs; // # of SDep::Data sucss. - short NumPredsLeft; // # of preds not scheduled. - short NumSuccsLeft; // # of succs not scheduled. + unsigned NumPreds; // # of SDep::Data preds. + unsigned NumSuccs; // # of SDep::Data sucss. + unsigned NumPredsLeft; // # of preds not scheduled. + unsigned NumSuccsLeft; // # of succs not scheduled. bool isTwoAddress : 1; // Is a two-address instruction. bool isCommutable : 1; // Is a commutable instruction. bool hasPhysRegDefs : 1; // Has physreg defs that are being used. @@ -429,8 +435,8 @@ namespace llvm { class ScheduleDAG { public: - MachineBasicBlock *BB; // The block in which to insert instructions. - MachineBasicBlock::iterator InsertPos;// The position to insert instructions. + MachineBasicBlock *BB; // The block in which to insert instructions + MachineBasicBlock::iterator InsertPos;// The position to insert instructions const TargetMachine &TM; // Target processor const TargetInstrInfo *TII; // Target instruction information const TargetRegisterInfo *TRI; // Target processor register info @@ -456,7 +462,8 @@ namespace llvm { /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock /// according to the order specified in Sequence. /// - virtual MachineBasicBlock *EmitSchedule() = 0; + virtual MachineBasicBlock* + EmitSchedule(DenseMap*) = 0; void dumpSchedule() const; @@ -484,19 +491,25 @@ namespace llvm { /// BuildSchedGraph - Build SUnits and set up their Preds and Succs /// to form the scheduling dependency graph. /// - virtual void BuildSchedGraph() = 0; + virtual void BuildSchedGraph(AliasAnalysis *AA) = 0; /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU) = 0; + /// ComputeOperandLatency - Override dependence edge latency using + /// operand use/def information + /// + virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const { }; + /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. /// virtual void Schedule() = 0; - /// ForceUnitLatencies - Return true if all scheduling edges should be given a - /// latency value of one. The default is to return false; schedulers may + /// ForceUnitLatencies - Return true if all scheduling edges should be given + /// a latency value of one. The default is to return false; schedulers may /// override this as needed. virtual bool ForceUnitLatencies() const { return false; } @@ -504,27 +517,11 @@ namespace llvm { /// void EmitNoop(); - void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO); - void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap); - - private: - /// EmitLiveInCopy - Emit a copy for a live in physical register. If the - /// physical register has only a single copy use, then coalesced the copy - /// if possible. - void EmitLiveInCopy(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &InsertPos, - unsigned VirtReg, unsigned PhysReg, - const TargetRegisterClass *RC, - DenseMap &CopyRegMap); - - /// EmitLiveInCopies - If this is the first basic block in the function, - /// and if it has live ins that need to be copied into vregs, emit the - /// copies into the top of the block. - void EmitLiveInCopies(MachineBasicBlock *MBB); }; - class SUnitIterator : public forward_iterator { + class SUnitIterator : public std::iterator { SUnit *Node; unsigned Operand; @@ -536,7 +533,7 @@ namespace llvm { bool operator!=(const SUnitIterator& x) const { return !operator==(x); } const SUnitIterator &operator=(const SUnitIterator &I) { - assert(I.Node == Node && "Cannot assign iterators to two different nodes!"); + assert(I.Node==Node && "Cannot assign iterators to two different nodes!"); Operand = I.Operand; return *this; } diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h index 369882d258e3b..09e3e88613164 100644 --- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h +++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h @@ -43,6 +43,11 @@ public: return NoHazard; } + /// Reset - This callback is invoked when a new block of + /// instructions is about to be schedule. The hazard state should be + /// set to an initialized state. + virtual void Reset() {} + /// EmitInstruction - This callback is invoked when an instruction is /// emitted, to advance the hazard state. virtual void EmitInstruction(SUnit *) {} diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 8abd78dd2abb2..e0198ef2e3f45 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/RecyclingAllocator.h" #include "llvm/Target/TargetMachine.h" #include #include @@ -37,7 +38,7 @@ class FunctionLoweringInfo; template<> struct ilist_traits : public ilist_default_traits { private: - mutable ilist_node Sentinel; + mutable ilist_half_node Sentinel; public: SDNode *createSentinel() const { return static_cast(&Sentinel); @@ -78,6 +79,7 @@ class SelectionDAG { FunctionLoweringInfo &FLI; MachineModuleInfo *MMI; DwarfWriter *DW; + LLVMContext* Context; /// EntryNode - The starting token. SDNode EntryNode; @@ -98,7 +100,7 @@ class SelectionDAG { NodeAllocatorType NodeAllocator; /// CSEMap - This structure is used to memoize nodes, automatically performing - /// CSE with existing nodes with a duplicate is requested. + /// CSE with existing nodes when a duplicate is requested. FoldingSet CSEMap; /// OperandAllocator - Pool allocation for machine-opcode SDNode operands. @@ -138,6 +140,7 @@ public: FunctionLoweringInfo &getFunctionLoweringInfo() const { return FLI; } MachineModuleInfo *getMachineModuleInfo() const { return MMI; } DwarfWriter *getDwarfWriter() const { return DW; } + LLVMContext *getContext() const {return Context; } /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'. /// @@ -242,70 +245,70 @@ public: /// getVTList - Return an SDVTList that represents the list of values /// specified. - SDVTList getVTList(MVT VT); - SDVTList getVTList(MVT VT1, MVT VT2); - SDVTList getVTList(MVT VT1, MVT VT2, MVT VT3); - SDVTList getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4); - SDVTList getVTList(const MVT *VTs, unsigned NumVTs); + SDVTList getVTList(EVT VT); + SDVTList getVTList(EVT VT1, EVT VT2); + SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3); + SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4); + SDVTList getVTList(const EVT *VTs, unsigned NumVTs); //===--------------------------------------------------------------------===// // Node creation methods. // - SDValue getConstant(uint64_t Val, MVT VT, bool isTarget = false); - SDValue getConstant(const APInt &Val, MVT VT, bool isTarget = false); - SDValue getConstant(const ConstantInt &Val, MVT VT, bool isTarget = false); + SDValue getConstant(uint64_t Val, EVT VT, bool isTarget = false); + SDValue getConstant(const APInt &Val, EVT VT, bool isTarget = false); + SDValue getConstant(const ConstantInt &Val, EVT VT, bool isTarget = false); SDValue getIntPtrConstant(uint64_t Val, bool isTarget = false); - SDValue getTargetConstant(uint64_t Val, MVT VT) { + SDValue getTargetConstant(uint64_t Val, EVT VT) { return getConstant(Val, VT, true); } - SDValue getTargetConstant(const APInt &Val, MVT VT) { + SDValue getTargetConstant(const APInt &Val, EVT VT) { return getConstant(Val, VT, true); } - SDValue getTargetConstant(const ConstantInt &Val, MVT VT) { + SDValue getTargetConstant(const ConstantInt &Val, EVT VT) { return getConstant(Val, VT, true); } - SDValue getConstantFP(double Val, MVT VT, bool isTarget = false); - SDValue getConstantFP(const APFloat& Val, MVT VT, bool isTarget = false); - SDValue getConstantFP(const ConstantFP &CF, MVT VT, bool isTarget = false); - SDValue getTargetConstantFP(double Val, MVT VT) { + SDValue getConstantFP(double Val, EVT VT, bool isTarget = false); + SDValue getConstantFP(const APFloat& Val, EVT VT, bool isTarget = false); + SDValue getConstantFP(const ConstantFP &CF, EVT VT, bool isTarget = false); + SDValue getTargetConstantFP(double Val, EVT VT) { return getConstantFP(Val, VT, true); } - SDValue getTargetConstantFP(const APFloat& Val, MVT VT) { + SDValue getTargetConstantFP(const APFloat& Val, EVT VT) { return getConstantFP(Val, VT, true); } - SDValue getTargetConstantFP(const ConstantFP &Val, MVT VT) { + SDValue getTargetConstantFP(const ConstantFP &Val, EVT VT) { return getConstantFP(Val, VT, true); } - SDValue getGlobalAddress(const GlobalValue *GV, MVT VT, + SDValue getGlobalAddress(const GlobalValue *GV, EVT VT, int64_t offset = 0, bool isTargetGA = false, unsigned char TargetFlags = 0); - SDValue getTargetGlobalAddress(const GlobalValue *GV, MVT VT, + SDValue getTargetGlobalAddress(const GlobalValue *GV, EVT VT, int64_t offset = 0, unsigned char TargetFlags = 0) { return getGlobalAddress(GV, VT, offset, true, TargetFlags); } - SDValue getFrameIndex(int FI, MVT VT, bool isTarget = false); - SDValue getTargetFrameIndex(int FI, MVT VT) { + SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false); + SDValue getTargetFrameIndex(int FI, EVT VT) { return getFrameIndex(FI, VT, true); } - SDValue getJumpTable(int JTI, MVT VT, bool isTarget = false, + SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false, unsigned char TargetFlags = 0); - SDValue getTargetJumpTable(int JTI, MVT VT, unsigned char TargetFlags = 0) { + SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) { return getJumpTable(JTI, VT, true, TargetFlags); } - SDValue getConstantPool(Constant *C, MVT VT, + SDValue getConstantPool(Constant *C, EVT VT, unsigned Align = 0, int Offs = 0, bool isT=false, unsigned char TargetFlags = 0); - SDValue getTargetConstantPool(Constant *C, MVT VT, + SDValue getTargetConstantPool(Constant *C, EVT VT, unsigned Align = 0, int Offset = 0, unsigned char TargetFlags = 0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } - SDValue getConstantPool(MachineConstantPoolValue *C, MVT VT, + SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Align = 0, int Offs = 0, bool isT=false, unsigned char TargetFlags = 0); SDValue getTargetConstantPool(MachineConstantPoolValue *C, - MVT VT, unsigned Align = 0, + EVT VT, unsigned Align = 0, int Offset = 0, unsigned char TargetFlags=0) { return getConstantPool(C, VT, Align, Offset, true, TargetFlags); } @@ -313,15 +316,14 @@ public: // to provide debug info for the BB at that time, so keep this one around. SDValue getBasicBlock(MachineBasicBlock *MBB); SDValue getBasicBlock(MachineBasicBlock *MBB, DebugLoc dl); - SDValue getExternalSymbol(const char *Sym, MVT VT); - SDValue getExternalSymbol(const char *Sym, DebugLoc dl, MVT VT); - SDValue getTargetExternalSymbol(const char *Sym, MVT VT, + SDValue getExternalSymbol(const char *Sym, EVT VT); + SDValue getExternalSymbol(const char *Sym, DebugLoc dl, EVT VT); + SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags = 0); - SDValue getArgFlags(ISD::ArgFlagsTy Flags); - SDValue getValueType(MVT); - SDValue getRegister(unsigned Reg, MVT VT); + SDValue getValueType(EVT); + SDValue getRegister(unsigned Reg, EVT VT); SDValue getDbgStopPoint(DebugLoc DL, SDValue Root, - unsigned Line, unsigned Col, Value *CU); + unsigned Line, unsigned Col, MDNode *CU); SDValue getLabel(unsigned Opcode, DebugLoc dl, SDValue Root, unsigned LabelID); @@ -348,7 +350,7 @@ public: return getNode(ISD::CopyToReg, dl, VTs, Ops, Flag.getNode() ? 4 : 3); } - SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, MVT VT) { + SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, EVT VT) { SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, getRegister(Reg, VT) }; return getNode(ISD::CopyFromReg, dl, VTs, Ops, 2); @@ -357,7 +359,7 @@ public: // This version of the getCopyFromReg method takes an extra operand, which // indicates that there is potentially an incoming flag value (if Flag is not // null) and that there should be a flag result. - SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, MVT VT, + SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, EVT VT, SDValue Flag) { SDVTList VTs = getVTList(VT, MVT::Other, MVT::Flag); SDValue Ops[] = { Chain, getRegister(Reg, VT), Flag }; @@ -368,7 +370,7 @@ public: /// Returns the ConvertRndSat Note: Avoid using this node because it may /// disappear in the future and most targets don't support it. - SDValue getConvertRndSat(MVT VT, DebugLoc dl, SDValue Val, SDValue DTy, + SDValue getConvertRndSat(EVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code); @@ -376,15 +378,23 @@ public: /// elements in VT, which must be a vector type, must match the number of /// mask elements NumElts. A integer mask element equal to -1 is treated as /// undefined. - SDValue getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, + SDValue getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, const int *MaskElts); + /// getSExtOrTrunc - Convert Op, which must be of integer type, to the + /// integer type VT, by either sign-extending or truncating it. + SDValue getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT); + + /// getZExtOrTrunc - Convert Op, which must be of integer type, to the + /// integer type VT, by either zero-extending or truncating it. + SDValue getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT); + /// getZeroExtendInReg - Return the expression required to zero extend the Op /// value assuming it was the smaller SrcTy value. - SDValue getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT SrcTy); + SDValue getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT SrcTy); /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). - SDValue getNOT(DebugLoc DL, SDValue Val, MVT VT); + SDValue getNOT(DebugLoc DL, SDValue Val, EVT VT); /// getCALLSEQ_START - Return a new CALLSEQ_START node, which always must have /// a flag result (to ensure it's not CSE'd). CALLSEQ_START does not have a @@ -413,36 +423,36 @@ public: } /// getUNDEF - Return an UNDEF node. UNDEF does not have a useful DebugLoc. - SDValue getUNDEF(MVT VT) { + SDValue getUNDEF(EVT VT) { return getNode(ISD::UNDEF, DebugLoc::getUnknownLoc(), VT); } /// getGLOBAL_OFFSET_TABLE - Return a GLOBAL_OFFSET_TABLE node. This does /// not have a useful DebugLoc. - SDValue getGLOBAL_OFFSET_TABLE(MVT VT) { + SDValue getGLOBAL_OFFSET_TABLE(EVT VT) { return getNode(ISD::GLOBAL_OFFSET_TABLE, DebugLoc::getUnknownLoc(), VT); } /// getNode - Gets or creates the specified node. /// - SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT); - SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, SDValue N); - SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, SDValue N1, SDValue N2); - SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT); + SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N); + SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2); + SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3); - SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4); - SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5); - SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, const SDUse *Ops, unsigned NumOps); - SDValue getNode(unsigned Opcode, DebugLoc DL, MVT VT, + SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, const SDValue *Ops, unsigned NumOps); SDValue getNode(unsigned Opcode, DebugLoc DL, - const std::vector &ResultTys, + const std::vector &ResultTys, const SDValue *Ops, unsigned NumOps); - SDValue getNode(unsigned Opcode, DebugLoc DL, const MVT *VTs, unsigned NumVTs, + SDValue getNode(unsigned Opcode, DebugLoc DL, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps); SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, const SDValue *Ops, unsigned NumOps); @@ -458,6 +468,12 @@ public: SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5); + /// getStackArgumentTokenFactor - Compute a TokenFactor to force all + /// the incoming stack arguments to be loaded from the stack. This is + /// used in tail call lowering to protect stack arguments from being + /// clobbered. + SDValue getStackArgumentTokenFactor(SDValue Chain); + SDValue getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, @@ -475,7 +491,7 @@ public: /// getSetCC - Helper function to make it easier to build SetCC's if you just /// have an ISD::CondCode instead of an SDValue. /// - SDValue getSetCC(DebugLoc DL, MVT VT, SDValue LHS, SDValue RHS, + SDValue getSetCC(DebugLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond) { return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond)); } @@ -483,7 +499,7 @@ public: /// getVSetCC - Helper function to make it easier to build VSetCC's nodes /// if you just have an ISD::CondCode instead of an SDValue. /// - SDValue getVSetCC(DebugLoc DL, MVT VT, SDValue LHS, SDValue RHS, + SDValue getVSetCC(DebugLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond) { return getNode(ISD::VSETCC, DL, VT, LHS, RHS, getCondCode(Cond)); } @@ -499,82 +515,89 @@ public: /// getVAArg - VAArg produces a result and token chain, and takes a pointer /// and a source value as input. - SDValue getVAArg(MVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, + SDValue getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue SV); /// getAtomic - Gets a node for an atomic op, produces result and chain and /// takes 3 operands - SDValue getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, SDValue Chain, + SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, const Value* PtrVal, unsigned Alignment=0); + SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, + MachineMemOperand *MMO); /// getAtomic - Gets a node for an atomic op, produces result and chain and /// takes 2 operands. - SDValue getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, SDValue Chain, + SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value* PtrVal, unsigned Alignment = 0); + SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, + SDValue Ptr, SDValue Val, + MachineMemOperand *MMO); /// getMemIntrinsicNode - Creates a MemIntrinsicNode that may produce a - /// result and takes a list of operands. + /// result and takes a list of operands. Opcode may be INTRINSIC_VOID, + /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not + /// less than FIRST_TARGET_MEMORY_OPCODE. SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, - const MVT *VTs, unsigned NumVTs, + const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, - MVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, const Value *srcValue, int SVOff, unsigned Align = 0, bool Vol = false, bool ReadMem = true, bool WriteMem = true); SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, - MVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, const Value *srcValue, int SVOff, unsigned Align = 0, bool Vol = false, bool ReadMem = true, bool WriteMem = true); + SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, MachineMemOperand *MMO); + /// getMergeValues - Create a MERGE_VALUES node from the given operands. SDValue getMergeValues(const SDValue *Ops, unsigned NumOps, DebugLoc dl); - /// getCall - Create a CALL node from the given information. - /// - SDValue getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs, - bool IsTailCall, bool isInreg, SDVTList VTs, - const SDValue *Operands, unsigned NumOperands, - unsigned NumFixedArgs); - /// getLoad - Loads are not normal binary operators: their result type is not /// determined by their operands, and they produce a value AND a token chain. /// - SDValue getLoad(MVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, + SDValue getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, bool isVolatile=false, unsigned Alignment=0); - SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT, + SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, SDValue Chain, SDValue Ptr, const Value *SV, - int SVOffset, MVT EVT, bool isVolatile=false, + int SVOffset, EVT MemVT, bool isVolatile=false, unsigned Alignment=0); SDValue getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); SDValue getLoad(ISD::MemIndexedMode AM, DebugLoc dl, ISD::LoadExtType ExtType, - MVT VT, SDValue Chain, - SDValue Ptr, SDValue Offset, - const Value *SV, int SVOffset, MVT EVT, - bool isVolatile=false, unsigned Alignment=0); + EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset, + const Value *SV, int SVOffset, EVT MemVT, + bool isVolatile=false, unsigned Alignment=0); + SDValue getLoad(ISD::MemIndexedMode AM, DebugLoc dl, ISD::LoadExtType ExtType, + EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset, + EVT MemVT, MachineMemOperand *MMO); /// getStore - Helper function to build ISD::STORE nodes. /// SDValue getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, int SVOffset, bool isVolatile=false, unsigned Alignment=0); + SDValue getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, + MachineMemOperand *MMO); SDValue getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, - const Value *SV, int SVOffset, MVT TVT, + const Value *SV, int SVOffset, EVT TVT, bool isVolatile=false, unsigned Alignment=0); + SDValue getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, + EVT TVT, MachineMemOperand *MMO); SDValue getIndexedStore(SDValue OrigStoe, DebugLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); /// getSrcValue - Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); - /// getMemOperand - Construct a node to track a memory reference - /// through the backend. - SDValue getMemOperand(const MachineMemOperand &MO); - /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue getShiftAmountOperand(SDValue Op); @@ -600,91 +623,104 @@ public: /// specified node to have the specified return type, Target opcode, and /// operands. Note that target opcodes are stored as /// ~TargetOpcode in the node opcode field. The resultant node is returned. - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT, SDValue Op1); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT, + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT); + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, SDValue Op1); + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, SDValue Op1, SDValue Op2); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT, + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT, + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, const SDValue *Ops, unsigned NumOps); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1, MVT VT2); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1, - MVT VT2, const SDValue *Ops, unsigned NumOps); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1, - MVT VT2, MVT VT3, const SDValue *Ops, unsigned NumOps); - SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, MVT VT1, - MVT VT2, MVT VT3, MVT VT4, const SDValue *Ops, + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, EVT VT2); + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + EVT VT2, const SDValue *Ops, unsigned NumOps); + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps); + SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, + EVT VT2, EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1, - MVT VT2, SDValue Op1); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1, - MVT VT2, SDValue Op1, SDValue Op2); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1, - MVT VT2, SDValue Op1, SDValue Op2, SDValue Op3); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, MVT VT1, - MVT VT2, MVT VT3, SDValue Op1, SDValue Op2, SDValue Op3); + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + EVT VT2, SDValue Op1); + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + EVT VT2, SDValue Op1, SDValue Op2); + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3); + SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, + EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3); SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, SDVTList VTs, const SDValue *Ops, unsigned NumOps); /// MorphNodeTo - These *mutate* the specified node to have the specified /// return type, opcode, and operands. - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT, SDValue Op1); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT, + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT); + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT, SDValue Op1); + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT, SDValue Op1, SDValue Op2); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT, + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT, + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT, const SDValue *Ops, unsigned NumOps); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1, MVT VT2); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1, - MVT VT2, const SDValue *Ops, unsigned NumOps); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1, - MVT VT2, MVT VT3, const SDValue *Ops, unsigned NumOps); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1, - MVT VT2, SDValue Op1); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1, - MVT VT2, SDValue Op1, SDValue Op2); - SDNode *MorphNodeTo(SDNode *N, unsigned Opc, MVT VT1, - MVT VT2, SDValue Op1, SDValue Op2, SDValue Op3); + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1, EVT VT2); + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1, + EVT VT2, const SDValue *Ops, unsigned NumOps); + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1, + EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps); + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1, + EVT VT2, SDValue Op1); + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1, + EVT VT2, SDValue Op1, SDValue Op2); + SDNode *MorphNodeTo(SDNode *N, unsigned Opc, EVT VT1, + EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3); SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, const SDValue *Ops, unsigned NumOps); - /// getTargetNode - These are used for target selectors to create a new node - /// with specified return type(s), target opcode, and operands. + /// getMachineNode - These are used for target selectors to create a new node + /// with specified return type(s), MachineInstr opcode, and operands. /// - /// Note that getTargetNode returns the resultant node. If there is already a - /// node of the specified opcode and operands, it returns that node instead of - /// the current one. - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, SDValue Op1); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, SDValue Op1, - SDValue Op2); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - SDValue Op1, SDValue Op2, SDValue Op3); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - const SDValue *Ops, unsigned NumOps); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, - SDValue Op1); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, SDValue Op1, SDValue Op2); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, SDValue Op1, SDValue Op2, SDValue Op3); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, - const SDValue *Ops, unsigned NumOps); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, MVT VT3, - SDValue Op1, SDValue Op2); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, MVT VT3, - SDValue Op1, SDValue Op2, SDValue Op3); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, MVT VT3, - const SDValue *Ops, unsigned NumOps); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, MVT VT2, MVT VT3, - MVT VT4, const SDValue *Ops, unsigned NumOps); - SDNode *getTargetNode(unsigned Opcode, DebugLoc dl, - const std::vector &ResultTys, const SDValue *Ops, - unsigned NumOps); + /// Note that getMachineNode returns the resultant node. If there is already + /// a node of the specified opcode and operands, it returns that node instead + /// of the current one. + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2, SDValue Op3); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + const SDValue *Ops, unsigned NumOps); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, + SDValue Op1); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, + EVT VT2, SDValue Op1, SDValue Op2); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, + EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, + const SDValue *Ops, unsigned NumOps); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, + EVT VT3, SDValue Op1, SDValue Op2); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, + EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, + EVT VT3, const SDValue *Ops, unsigned NumOps); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2, + EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, + const std::vector &ResultTys, const SDValue *Ops, + unsigned NumOps); + MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, SDVTList VTs, + const SDValue *Ops, unsigned NumOps); + + /// getTargetExtractSubreg - A convenience function for creating + /// TargetInstrInfo::EXTRACT_SUBREG nodes. + SDValue getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand); + + /// getTargetInsertSubreg - A convenience function for creating + /// TargetInstrInfo::INSERT_SUBREG nodes. + SDValue getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand, SDValue Subreg); /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. @@ -792,20 +828,20 @@ public: /// CreateStackTemporary - Create a stack temporary, suitable for holding the /// specified value type. If minAlign is specified, the slot size will have /// at least that alignment. - SDValue CreateStackTemporary(MVT VT, unsigned minAlign = 1); + SDValue CreateStackTemporary(EVT VT, unsigned minAlign = 1); /// CreateStackTemporary - Create a stack temporary suitable for holding /// either of the specified value types. - SDValue CreateStackTemporary(MVT VT1, MVT VT2); + SDValue CreateStackTemporary(EVT VT1, EVT VT2); /// FoldConstantArithmetic - SDValue FoldConstantArithmetic(unsigned Opcode, - MVT VT, + EVT VT, ConstantSDNode *Cst1, ConstantSDNode *Cst2); /// FoldSetCC - Constant fold a setcc to true or false. - SDValue FoldSetCC(MVT VT, SDValue N1, + SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, DebugLoc dl); /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We @@ -835,6 +871,9 @@ public: /// class to allow target nodes to be understood. unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const; + /// isKnownNeverNan - Test whether the given SDValue is known to never be NaN. + bool isKnownNeverNaN(SDValue Op) const; + /// isVerifiedDebugInfoDesc - Returns true if the specified SDValue has /// been verified as a debug information descriptor. bool isVerifiedDebugInfoDesc(SDValue Op) const; @@ -855,7 +894,7 @@ private: void DeleteNodeNotInCSEMaps(SDNode *N); void DeallocateNode(SDNode *N); - unsigned getMVTAlignment(MVT MemoryVT) const; + unsigned getEVTAlignment(EVT MemoryVT) const; void allnodes_clear(); @@ -866,7 +905,7 @@ private: std::vector CondCodeNodes; std::vector ValueTypeNodes; - std::map ExtendedValueTypeNodes; + std::map ExtendedValueTypeNodes; StringMap ExternalSymbols; std::map,SDNode*> TargetExternalSymbols; diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index d2c0dc420f8a2..2b713f10df565 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -19,6 +19,7 @@ #include "llvm/Pass.h" #include "llvm/Constant.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { class FastISel; @@ -39,7 +40,7 @@ namespace llvm { /// SelectionDAGISel - This is the common base class used for SelectionDAG-based /// pattern-matching instruction selectors. -class SelectionDAGISel : public FunctionPass { +class SelectionDAGISel : public MachineFunctionPass { public: const TargetMachine &TM; TargetLowering &TLI; @@ -62,9 +63,9 @@ public: virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnFunction(Function &Fn); + virtual bool runOnMachineFunction(MachineFunction &MF); - unsigned MakeReg(MVT VT); + unsigned MakeReg(EVT VT); virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) {} virtual void InstructionSelect() = 0; diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 975253751c8d0..d7c8f1ca0096b 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -22,18 +22,15 @@ #include "llvm/Constants.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/DebugLoc.h" #include -#include namespace llvm { @@ -52,7 +49,7 @@ template struct ilist_traits; /// SelectionDAG::getVTList(...). /// struct SDVTList { - const MVT *VTs; + const EVT *VTs; unsigned int NumVTs; }; @@ -97,7 +94,7 @@ namespace ISD { AssertSext, AssertZext, // Various leaf nodes. - BasicBlock, VALUETYPE, ARG_FLAGS, CONDCODE, Register, + BasicBlock, VALUETYPE, CONDCODE, Register, Constant, ConstantFP, GlobalAddress, GlobalTLSAddress, FrameIndex, JumpTable, ConstantPool, ExternalSymbol, @@ -121,6 +118,10 @@ namespace ISD { // address of the exception block on entry to an landing pad block. EXCEPTIONADDR, + // RESULT, OUTCHAIN = LSDAADDR(INCHAIN) - This node represents the + // address of the Language Specific Data Area for the enclosing function. + LSDAADDR, + // RESULT, OUTCHAIN = EHSELECTION(INCHAIN, EXCEPTION) - This node represents // the selection index of the exception thrown. EHSELECTION, @@ -180,38 +181,6 @@ namespace ISD { // UNDEF - An undefined node UNDEF, - /// FORMAL_ARGUMENTS(CHAIN, CC#, ISVARARG, FLAG0, ..., FLAGn) - This node - /// represents the formal arguments for a function. CC# is a Constant value - /// indicating the calling convention of the function, and ISVARARG is a - /// flag that indicates whether the function is varargs or not. This node - /// has one result value for each incoming argument, plus one for the output - /// chain. It must be custom legalized. See description of CALL node for - /// FLAG argument contents explanation. - /// - FORMAL_ARGUMENTS, - - /// RV1, RV2...RVn, CHAIN = CALL(CHAIN, CALLEE, - /// ARG0, FLAG0, ARG1, FLAG1, ... ARGn, FLAGn) - /// This node represents a fully general function call, before the legalizer - /// runs. This has one result value for each argument / flag pair, plus - /// a chain result. It must be custom legalized. Flag argument indicates - /// misc. argument attributes. Currently: - /// Bit 0 - signness - /// Bit 1 - 'inreg' attribute - /// Bit 2 - 'sret' attribute - /// Bit 4 - 'byval' attribute - /// Bit 5 - 'nest' attribute - /// Bit 6-9 - alignment of byval structures - /// Bit 10-26 - size of byval structures - /// Bits 31:27 - argument ABI alignment in the first argument piece and - /// alignment '1' in other argument pieces. - /// - /// CALL nodes use the CallSDNode subclass of SDNode, which - /// additionally carries information about the calling convention, - /// whether the call is varargs, and if it's marked as a tail call. - /// - CALL, - // EXTRACT_ELEMENT - This is used to get the lower or upper (determined by // a Constant, which is required to be operand #1) half of the integer or // float value specified as operand #0. This is only for use before @@ -225,9 +194,9 @@ namespace ISD { // MERGE_VALUES - This node takes multiple discrete operands and returns // them all as its individual results. This nodes has exactly the same - // number of inputs and outputs, and is only valid before legalization. - // This node is useful for some pieces of the code generator that want to - // think about a single node with multiple results, not multiple nodes. + // number of inputs and outputs. This node is useful for some pieces of the + // code generator that want to think about a single node with multiple + // results, not multiple nodes. MERGE_VALUES, // Simple integer binary arithmetic operators. @@ -303,7 +272,9 @@ namespace ISD { INSERT_VECTOR_ELT, /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR - /// identified by the (potentially variable) element number IDX. + /// identified by the (potentially variable) element number IDX. If the + /// return type is an integer type larger than the element type of the + /// vector, the result is extended to the width of the return type. EXTRACT_VECTOR_ELT, /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of @@ -318,7 +289,7 @@ namespace ISD { EXTRACT_SUBVECTOR, /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as - /// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int + /// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int /// values that indicate which value (or undef) each result element will /// get. These constant ints are accessible through the /// ShuffleVectorSDNode class. This is quite similar to the Altivec @@ -363,12 +334,11 @@ namespace ISD { // them with (op #2) as a CondCodeSDNode. SETCC, - // Vector SetCC operator - This evaluates to a vector of integer elements - // with the high bit in each element set to true if the comparison is true - // and false if the comparison is false. All other bits in each element - // are undefined. The operands to this are the left and right operands - // to compare (ops #0, and #1) and the condition code to compare them with - // (op #2) as a CondCodeSDNode. + // RESULT = VSETCC(LHS, RHS, COND) operator - This evaluates to a vector of + // integer elements with all bits of the result elements set to true if the + // comparison is true or all cleared if the comparison is false. The + // operands to this are the left and right operands to compare (LHS/RHS) and + // the condition code to compare them with (COND) as a CondCodeSDNode. VSETCC, // SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded @@ -514,12 +484,6 @@ namespace ISD { // chain, cc, lhs, rhs, block to branch to if condition is true. BR_CC, - // RET - Return from function. The first operand is the chain, - // and any subsequent operands are pairs of return value and return value - // attributes (see CALL for description of attributes) for the function. - // This operation can have variable number of operands. - RET, - // INLINEASM - Represents an inline asm block. This node always has two // return values: a chain and a flag result. The inputs are as follows: // Operand #0 : Input chain. @@ -535,12 +499,6 @@ namespace ISD { DBG_LABEL, EH_LABEL, - // DECLARE - Represents a llvm.dbg.declare intrinsic. It's used to track - // local variable declarations for debugging information. First operand is - // a chain, while the next two operands are first two arguments (address - // and variable) of a llvm.dbg.declare instruction. - DECLARE, - // STACKSAVE - STACKSAVE has one operand, an input chain. It produces a // value, the same type as the pointer type for the system, and an output // chain. @@ -575,11 +533,6 @@ namespace ISD { // make reference to a value in the LLVM IR. SRCVALUE, - // MEMOPERAND - This is a node that contains a MachineMemOperand which - // records information about a memory reference. This is used to make - // AliasAnalysis queries from the backend. - MEMOPERAND, - // PCMARKER - This corresponds to the pcmarker intrinsic. PCMARKER, @@ -656,10 +609,17 @@ namespace ISD { ATOMIC_LOAD_UMIN, ATOMIC_LOAD_UMAX, - // BUILTIN_OP_END - This must be the last enum value in this list. + /// BUILTIN_OP_END - This must be the last enum value in this list. + /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END }; + /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations + /// which do not reference a specific memory location should be less than + /// this value. Those that do must not be less than this value, and can + /// be used with SelectionDAG::getMemIntrinsicNode. + static const int FIRST_TARGET_MEMORY_OPCODE = 1 << 14; + /// Node predicates /// isBuildVectorAllOnes - Return true if the specified node is a @@ -893,7 +853,7 @@ public: /// getValueType - Return the ValueType of the referenced return value. /// - inline MVT getValueType() const; + inline EVT getValueType() const; /// getValueSizeInBits - Returns the size of the value in bits. /// @@ -906,6 +866,7 @@ public: inline unsigned getNumOperands() const; inline const SDValue &getOperand(unsigned i) const; inline uint64_t getConstantOperandVal(unsigned i) const; + inline bool isTargetMemoryOpcode() const; inline bool isTargetOpcode() const; inline bool isMachineOpcode() const; inline unsigned getMachineOpcode() const; @@ -1002,7 +963,7 @@ public: /// getResNo - Convenience function for get().getResNo(). unsigned getResNo() const { return Val.getResNo(); } /// getValueType - Convenience function for get().getValueType(). - MVT getValueType() const { return Val.getValueType(); } + EVT getValueType() const { return Val.getValueType(); } /// operator== - Convenience function for get().operator== bool operator==(const SDValue &V) const { @@ -1070,17 +1031,17 @@ class SDNode : public FoldingSetNode, public ilist_node { private: /// NodeType - The operation that this node performs. /// - short NodeType; + int16_t NodeType; /// OperandsNeedDelete - This is true if OperandList was new[]'d. If true, /// then they will be delete[]'d when the node is destroyed. - unsigned short OperandsNeedDelete : 1; + uint16_t OperandsNeedDelete : 1; protected: /// SubclassData - This member is defined by this class, but is not used for /// anything. Subclasses can use it to hold whatever state they find useful. /// This field is initialized to zero by the ctor. - unsigned short SubclassData : 15; + uint16_t SubclassData : 15; private: /// NodeId - Unique id per SDNode in the DAG. @@ -1092,7 +1053,7 @@ private: /// ValueList - The types of the values this node defines. SDNode's may /// define multiple values simultaneously. - const MVT *ValueList; + const EVT *ValueList; /// UseList - List of uses for this SDNode. SDUse *UseList; @@ -1104,7 +1065,7 @@ private: DebugLoc debugLoc; /// getValueTypeList - Return a pointer to the specified value type. - static const MVT *getValueTypeList(MVT VT); + static const EVT *getValueTypeList(EVT VT); friend class SelectionDAG; friend struct ilist_traits; @@ -1124,6 +1085,13 @@ public: /// \ISD namespace). bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; } + /// isTargetMemoryOpcode - Test if this node has a target-specific + /// memory-referencing opcode (in the \ISD namespace and + /// greater than FIRST_TARGET_MEMORY_OPCODE). + bool isTargetMemoryOpcode() const { + return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE; + } + /// isMachineOpcode - Test if this node has a post-isel opcode, directly /// corresponding to a MachineInstr opcode. bool isMachineOpcode() const { return NodeType < 0; } @@ -1168,14 +1136,16 @@ public: /// use_iterator - This class provides iterator support for SDUse /// operands that use a specific SDNode. class use_iterator - : public forward_iterator { + : public std::iterator { SDUse *Op; explicit use_iterator(SDUse *op) : Op(op) { } friend class SDNode; public: - typedef forward_iterator::reference reference; - typedef forward_iterator::pointer pointer; + typedef std::iterator::reference reference; + typedef std::iterator::pointer pointer; use_iterator(const use_iterator &I) : Op(I.Op) {} use_iterator() : Op(0) {} @@ -1278,7 +1248,7 @@ public: /// to which the flag operand points. Otherwise return NULL. SDNode *getFlaggedNode() const { if (getNumOperands() != 0 && - getOperand(getNumOperands()-1).getValueType() == MVT::Flag) + getOperand(getNumOperands()-1).getValueType().getSimpleVT() == MVT::Flag) return getOperand(getNumOperands()-1).getNode(); return 0; } @@ -1306,7 +1276,7 @@ public: /// getValueType - Return the type of a specified result. /// - MVT getValueType(unsigned ResNo) const { + EVT getValueType(unsigned ResNo) const { assert(ResNo < NumValues && "Illegal result number!"); return ValueList[ResNo]; } @@ -1317,7 +1287,7 @@ public: return getValueType(ResNo).getSizeInBits(); } - typedef const MVT* value_iterator; + typedef const EVT* value_iterator; value_iterator value_begin() const { return ValueList; } value_iterator value_end() const { return ValueList+NumValues; } @@ -1332,6 +1302,7 @@ public: void dump() const; void dumpr() const; void dump(const SelectionDAG *G) const; + void dumpr(const SelectionDAG *G) const; static bool classof(const SDNode *) { return true; } @@ -1344,7 +1315,7 @@ public: void addUse(SDUse &U) { U.addToList(&UseList); } protected: - static SDVTList getSDVTList(MVT VT) { + static SDVTList getSDVTList(EVT VT) { SDVTList Ret = { getValueTypeList(VT), 1 }; return Ret; } @@ -1438,7 +1409,7 @@ protected: inline unsigned SDValue::getOpcode() const { return Node->getOpcode(); } -inline MVT SDValue::getValueType() const { +inline EVT SDValue::getValueType() const { return Node->getValueType(ResNo); } inline unsigned SDValue::getNumOperands() const { @@ -1453,6 +1424,9 @@ inline uint64_t SDValue::getConstantOperandVal(unsigned i) const { inline bool SDValue::isTargetOpcode() const { return Node->isTargetOpcode(); } +inline bool SDValue::isTargetMemoryOpcode() const { + return Node->isTargetMemoryOpcode(); +} inline bool SDValue::isMachineOpcode() const { return Node->isMachineOpcode(); } @@ -1549,45 +1523,57 @@ public: class MemSDNode : public SDNode { private: // MemoryVT - VT of in-memory value. - MVT MemoryVT; - - //! SrcValue - Memory location for alias analysis. - const Value *SrcValue; + EVT MemoryVT; - //! SVOffset - Memory location offset. Note that base is defined in MemSDNode - int SVOffset; +protected: + /// MMO - Memory reference information. + MachineMemOperand *MMO; public: - MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT MemoryVT, - const Value *srcValue, int SVOff, - unsigned alignment, bool isvolatile); + MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT MemoryVT, + MachineMemOperand *MMO); MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, const SDValue *Ops, - unsigned NumOps, MVT MemoryVT, const Value *srcValue, int SVOff, - unsigned alignment, bool isvolatile); + unsigned NumOps, EVT MemoryVT, MachineMemOperand *MMO); + + bool readMem() const { return MMO->isLoad(); } + bool writeMem() const { return MMO->isStore(); } /// Returns alignment and volatility of the memory access - unsigned getAlignment() const { return (1u << (SubclassData >> 6)) >> 1; } - bool isVolatile() const { return (SubclassData >> 5) & 1; } + unsigned getOriginalAlignment() const { + return MMO->getBaseAlignment(); + } + unsigned getAlignment() const { + return MMO->getAlignment(); + } /// getRawSubclassData - Return the SubclassData value, which contains an - /// encoding of the alignment and volatile information, as well as bits - /// used by subclasses. This function should only be used to compute a - /// FoldingSetNodeID value. + /// encoding of the volatile flag, as well as bits used by subclasses. This + /// function should only be used to compute a FoldingSetNodeID value. unsigned getRawSubclassData() const { return SubclassData; } + bool isVolatile() const { return (SubclassData >> 5) & 1; } + /// Returns the SrcValue and offset that describes the location of the access - const Value *getSrcValue() const { return SrcValue; } - int getSrcValueOffset() const { return SVOffset; } + const Value *getSrcValue() const { return MMO->getValue(); } + int64_t getSrcValueOffset() const { return MMO->getOffset(); } /// getMemoryVT - Return the type of the in-memory value. - MVT getMemoryVT() const { return MemoryVT; } + EVT getMemoryVT() const { return MemoryVT; } /// getMemOperand - Return a MachineMemOperand object describing the memory /// reference performed by operation. - MachineMemOperand getMemOperand() const; + MachineMemOperand *getMemOperand() const { return MMO; } + + /// refineAlignment - Update this MemSDNode's MachineMemOperand information + /// to reflect the alignment of NewMMO, if it has a greater alignment. + /// This must only be used when the new alignment applies to all users of + /// this MachineMemOperand. + void refineAlignment(const MachineMemOperand *NewMMO) { + MMO->refineAlignment(NewMMO); + } const SDValue &getChain() const { return getOperand(0); } const SDValue &getBasePtr() const { @@ -1613,9 +1599,7 @@ public: N->getOpcode() == ISD::ATOMIC_LOAD_MAX || N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || - N->getOpcode() == ISD::INTRINSIC_W_CHAIN || - N->getOpcode() == ISD::INTRINSIC_VOID || - N->isTargetOpcode(); + N->isTargetMemoryOpcode(); } }; @@ -1633,19 +1617,20 @@ public: // Swp: swap value // SrcVal: address to update as a Value (used for MemOperand) // Align: alignment of memory - AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, MVT MemVT, + AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, EVT MemVT, SDValue Chain, SDValue Ptr, - SDValue Cmp, SDValue Swp, const Value* SrcVal, - unsigned Align=0) - : MemSDNode(Opc, dl, VTL, MemVT, SrcVal, /*SVOffset=*/0, - Align, /*isVolatile=*/true) { + SDValue Cmp, SDValue Swp, MachineMemOperand *MMO) + : MemSDNode(Opc, dl, VTL, MemVT, MMO) { + assert(readMem() && "Atomic MachineMemOperand is not a load!"); + assert(writeMem() && "Atomic MachineMemOperand is not a store!"); InitOperands(Ops, Chain, Ptr, Cmp, Swp); } - AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, MVT MemVT, + AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, EVT MemVT, SDValue Chain, SDValue Ptr, - SDValue Val, const Value* SrcVal, unsigned Align=0) - : MemSDNode(Opc, dl, VTL, MemVT, SrcVal, /*SVOffset=*/0, - Align, /*isVolatile=*/true) { + SDValue Val, MachineMemOperand *MMO) + : MemSDNode(Opc, dl, VTL, MemVT, MMO) { + assert(readMem() && "Atomic MachineMemOperand is not a load!"); + assert(writeMem() && "Atomic MachineMemOperand is not a store!"); InitOperands(Ops, Chain, Ptr, Val); } @@ -1675,24 +1660,18 @@ public: } }; -/// MemIntrinsicSDNode - This SDNode is used for target intrinsic that touches -/// memory and need an associated memory operand. -/// +/// MemIntrinsicSDNode - This SDNode is used for target intrinsics that touch +/// memory and need an associated MachineMemOperand. Its opcode may be +/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, or a target-specific opcode with a +/// value not less than FIRST_TARGET_MEMORY_OPCODE. class MemIntrinsicSDNode : public MemSDNode { - bool ReadMem; // Intrinsic reads memory - bool WriteMem; // Intrinsic writes memory public: MemIntrinsicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, const SDValue *Ops, unsigned NumOps, - MVT MemoryVT, const Value *srcValue, int SVO, - unsigned Align, bool Vol, bool ReadMem, bool WriteMem) - : MemSDNode(Opc, dl, VTs, Ops, NumOps, MemoryVT, srcValue, SVO, Align, Vol), - ReadMem(ReadMem), WriteMem(WriteMem) { + EVT MemoryVT, MachineMemOperand *MMO) + : MemSDNode(Opc, dl, VTs, Ops, NumOps, MemoryVT, MMO) { } - bool readMem() const { return ReadMem; } - bool writeMem() const { return WriteMem; } - // Methods to support isa and dyn_cast static bool classof(const MemIntrinsicSDNode *) { return true; } static bool classof(const SDNode *N) { @@ -1700,7 +1679,7 @@ public: // early a node with a target opcode can be of this class return N->getOpcode() == ISD::INTRINSIC_W_CHAIN || N->getOpcode() == ISD::INTRINSIC_VOID || - N->isTargetOpcode(); + N->isTargetMemoryOpcode(); } }; @@ -1720,7 +1699,7 @@ class ShuffleVectorSDNode : public SDNode { const int *Mask; protected: friend class SelectionDAG; - ShuffleVectorSDNode(MVT VT, DebugLoc dl, SDValue N1, SDValue N2, + ShuffleVectorSDNode(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, const int *M) : SDNode(ISD::VECTOR_SHUFFLE, dl, getSDVTList(VT)), Mask(M) { InitOperands(Ops, N1, N2); @@ -1728,7 +1707,7 @@ protected: public: void getMask(SmallVectorImpl &M) const { - MVT VT = getValueType(0); + EVT VT = getValueType(0); M.clear(); for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) M.push_back(Mask[i]); @@ -1743,7 +1722,7 @@ public: assert(isSplat() && "Cannot get splat index for non-splat!"); return Mask[0]; } - static bool isSplatMask(const int *Mask, MVT VT); + static bool isSplatMask(const int *Mask, EVT VT); static bool classof(const ShuffleVectorSDNode *) { return true; } static bool classof(const SDNode *N) { @@ -1754,7 +1733,7 @@ public: class ConstantSDNode : public SDNode { const ConstantInt *Value; friend class SelectionDAG; - ConstantSDNode(bool isTarget, const ConstantInt *val, MVT VT) + ConstantSDNode(bool isTarget, const ConstantInt *val, EVT VT) : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, DebugLoc::getUnknownLoc(), getSDVTList(VT)), Value(val) { } @@ -1778,7 +1757,7 @@ public: class ConstantFPSDNode : public SDNode { const ConstantFP *Value; friend class SelectionDAG; - ConstantFPSDNode(bool isTarget, const ConstantFP *val, MVT VT) + ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT) : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, DebugLoc::getUnknownLoc(), getSDVTList(VT)), Value(val) { } @@ -1807,7 +1786,7 @@ public: } bool isExactlyValue(const APFloat& V) const; - bool isValueValidForType(MVT VT, const APFloat& Val); + bool isValueValidForType(EVT VT, const APFloat& Val); static bool classof(const ConstantFPSDNode *) { return true; } static bool classof(const SDNode *N) { @@ -1821,7 +1800,7 @@ class GlobalAddressSDNode : public SDNode { int64_t Offset; unsigned char TargetFlags; friend class SelectionDAG; - GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, MVT VT, + GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, EVT VT, int64_t o, unsigned char TargetFlags); public: @@ -1843,7 +1822,7 @@ public: class FrameIndexSDNode : public SDNode { int FI; friend class SelectionDAG; - FrameIndexSDNode(int fi, MVT VT, bool isTarg) + FrameIndexSDNode(int fi, EVT VT, bool isTarg) : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex, DebugLoc::getUnknownLoc(), getSDVTList(VT)), FI(fi) { } @@ -1862,7 +1841,7 @@ class JumpTableSDNode : public SDNode { int JTI; unsigned char TargetFlags; friend class SelectionDAG; - JumpTableSDNode(int jti, MVT VT, bool isTarg, unsigned char TF) + JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF) : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, DebugLoc::getUnknownLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { } @@ -1887,7 +1866,7 @@ class ConstantPoolSDNode : public SDNode { unsigned Alignment; // Minimum alignment requirement of CP (not log2 value). unsigned char TargetFlags; friend class SelectionDAG; - ConstantPoolSDNode(bool isTarget, Constant *c, MVT VT, int o, unsigned Align, + ConstantPoolSDNode(bool isTarget, Constant *c, EVT VT, int o, unsigned Align, unsigned char TF) : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, DebugLoc::getUnknownLoc(), @@ -1896,7 +1875,7 @@ class ConstantPoolSDNode : public SDNode { Val.ConstVal = c; } ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, - MVT VT, int o, unsigned Align, unsigned char TF) + EVT VT, int o, unsigned Align, unsigned char TF) : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) { @@ -1988,10 +1967,6 @@ public: /// used when the SelectionDAG needs to make a simple reference to something /// in the LLVM IR representation. /// -/// Note that this is not used for carrying alias information; that is done -/// with MemOperandSDNode, which includes a Value which is required to be a -/// pointer, and several other fields specific to memory references. -/// class SrcValueSDNode : public SDNode { const Value *V; friend class SelectionDAG; @@ -2011,32 +1986,10 @@ public: }; -/// MemOperandSDNode - An SDNode that holds a MachineMemOperand. This is -/// used to represent a reference to memory after ISD::LOAD -/// and ISD::STORE have been lowered. -/// -class MemOperandSDNode : public SDNode { - friend class SelectionDAG; - /// Create a MachineMemOperand node - explicit MemOperandSDNode(const MachineMemOperand &mo) - : SDNode(ISD::MEMOPERAND, DebugLoc::getUnknownLoc(), - getSDVTList(MVT::Other)), MO(mo) {} - -public: - /// MO - The contained MachineMemOperand. - const MachineMemOperand MO; - - static bool classof(const MemOperandSDNode *) { return true; } - static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::MEMOPERAND; - } -}; - - class RegisterSDNode : public SDNode { unsigned Reg; friend class SelectionDAG; - RegisterSDNode(unsigned reg, MVT VT) + RegisterSDNode(unsigned reg, EVT VT) : SDNode(ISD::Register, DebugLoc::getUnknownLoc(), getSDVTList(VT)), Reg(reg) { } @@ -2054,10 +2007,10 @@ class DbgStopPointSDNode : public SDNode { SDUse Chain; unsigned Line; unsigned Column; - Value *CU; + MDNode *CU; friend class SelectionDAG; DbgStopPointSDNode(SDValue ch, unsigned l, unsigned c, - Value *cu) + MDNode *cu) : SDNode(ISD::DBG_STOPPOINT, DebugLoc::getUnknownLoc(), getSDVTList(MVT::Other)), Line(l), Column(c), CU(cu) { InitOperands(&Chain, ch); @@ -2065,7 +2018,7 @@ class DbgStopPointSDNode : public SDNode { public: unsigned getLine() const { return Line; } unsigned getColumn() const { return Column; } - Value *getCompileUnit() const { return CU; } + MDNode *getCompileUnit() const { return CU; } static bool classof(const DbgStopPointSDNode *) { return true; } static bool classof(const SDNode *N) { @@ -2096,7 +2049,7 @@ class ExternalSymbolSDNode : public SDNode { unsigned char TargetFlags; friend class SelectionDAG; - ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, MVT VT) + ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT) : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, DebugLoc::getUnknownLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) { @@ -2135,7 +2088,7 @@ public: class CvtRndSatSDNode : public SDNode { ISD::CvtCode CvtCode; friend class SelectionDAG; - explicit CvtRndSatSDNode(MVT VT, DebugLoc dl, const SDValue *Ops, + explicit CvtRndSatSDNode(EVT VT, DebugLoc dl, const SDValue *Ops, unsigned NumOps, ISD::CvtCode Code) : SDNode(ISD::CONVERT_RNDSAT, dl, getSDVTList(VT), Ops, NumOps), CvtCode(Code) { @@ -2233,93 +2186,54 @@ namespace ISD { /// getRawBits - Represent the flags as a bunch of bits. uint64_t getRawBits() const { return Flags; } }; -} - -/// ARG_FLAGSSDNode - Leaf node holding parameter flags. -class ARG_FLAGSSDNode : public SDNode { - ISD::ArgFlagsTy TheFlags; - friend class SelectionDAG; - explicit ARG_FLAGSSDNode(ISD::ArgFlagsTy Flags) - : SDNode(ISD::ARG_FLAGS, DebugLoc::getUnknownLoc(), - getSDVTList(MVT::Other)), TheFlags(Flags) { - } -public: - ISD::ArgFlagsTy getArgFlags() const { return TheFlags; } - - static bool classof(const ARG_FLAGSSDNode *) { return true; } - static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::ARG_FLAGS; - } -}; - -/// CallSDNode - Node for calls -- ISD::CALL. -class CallSDNode : public SDNode { - unsigned CallingConv; - bool IsVarArg; - bool IsTailCall; - unsigned NumFixedArgs; - // We might eventually want a full-blown Attributes for the result; that - // will expand the size of the representation. At the moment we only - // need Inreg. - bool Inreg; - friend class SelectionDAG; - CallSDNode(unsigned cc, DebugLoc dl, bool isvararg, bool istailcall, - bool isinreg, SDVTList VTs, const SDValue *Operands, - unsigned numOperands, unsigned numFixedArgs) - : SDNode(ISD::CALL, dl, VTs, Operands, numOperands), - CallingConv(cc), IsVarArg(isvararg), IsTailCall(istailcall), - NumFixedArgs(numFixedArgs), Inreg(isinreg) {} -public: - unsigned getCallingConv() const { return CallingConv; } - unsigned isVarArg() const { return IsVarArg; } - unsigned isTailCall() const { return IsTailCall; } - unsigned isInreg() const { return Inreg; } - - /// Set this call to not be marked as a tail call. Normally setter - /// methods in SDNodes are unsafe because it breaks the CSE map, - /// but we don't include the tail call flag for calls so it's ok - /// in this case. - void setNotTailCall() { IsTailCall = false; } - - SDValue getChain() const { return getOperand(0); } - SDValue getCallee() const { return getOperand(1); } - unsigned getNumArgs() const { return (getNumOperands() - 2) / 2; } - unsigned getNumFixedArgs() const { - if (isVarArg()) - return NumFixedArgs; - else - return getNumArgs(); - } - SDValue getArg(unsigned i) const { return getOperand(2+2*i); } - SDValue getArgFlagsVal(unsigned i) const { - return getOperand(3+2*i); - } - ISD::ArgFlagsTy getArgFlags(unsigned i) const { - return cast(getArgFlagsVal(i).getNode())->getArgFlags(); - } - - unsigned getNumRetVals() const { return getNumValues() - 1; } - MVT getRetValType(unsigned i) const { return getValueType(i); } + /// InputArg - This struct carries flags and type information about a + /// single incoming (formal) argument or incoming (from the perspective + /// of the caller) return value virtual register. + /// + struct InputArg { + ArgFlagsTy Flags; + EVT VT; + bool Used; + + InputArg() : VT(MVT::Other), Used(false) {} + InputArg(ISD::ArgFlagsTy flags, EVT vt, bool used) + : Flags(flags), VT(vt), Used(used) { + assert(VT.isSimple() && + "InputArg value type must be Simple!"); + } + }; - static bool classof(const CallSDNode *) { return true; } - static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::CALL; - } -}; + /// OutputArg - This struct carries flags and a value for a + /// single outgoing (actual) argument or outgoing (from the perspective + /// of the caller) return value virtual register. + /// + struct OutputArg { + ArgFlagsTy Flags; + SDValue Val; + bool IsFixed; + + OutputArg() : IsFixed(false) {} + OutputArg(ISD::ArgFlagsTy flags, SDValue val, bool isfixed) + : Flags(flags), Val(val), IsFixed(isfixed) { + assert(Val.getValueType().isSimple() && + "OutputArg value type must be Simple!"); + } + }; +} -/// VTSDNode - This class is used to represent MVT's, which are used +/// VTSDNode - This class is used to represent EVT's, which are used /// to parameterize some operations. class VTSDNode : public SDNode { - MVT ValueType; + EVT ValueType; friend class SelectionDAG; - explicit VTSDNode(MVT VT) + explicit VTSDNode(EVT VT) : SDNode(ISD::VALUETYPE, DebugLoc::getUnknownLoc(), getSDVTList(MVT::Other)), ValueType(VT) { } public: - MVT getVT() const { return ValueType; } + EVT getVT() const { return ValueType; } static bool classof(const VTSDNode *) { return true; } static bool classof(const SDNode *N) { @@ -2340,9 +2254,8 @@ class LSBaseSDNode : public MemSDNode { public: LSBaseSDNode(ISD::NodeType NodeTy, DebugLoc dl, SDValue *Operands, unsigned numOperands, SDVTList VTs, ISD::MemIndexedMode AM, - MVT VT, const Value *SV, int SVO, unsigned Align, bool Vol) - : MemSDNode(NodeTy, dl, VTs, VT, SV, SVO, Align, Vol) { - assert(Align != 0 && "Loads and stores should have non-zero aligment"); + EVT MemVT, MachineMemOperand *MMO) + : MemSDNode(NodeTy, dl, VTs, MemVT, MMO) { SubclassData |= AM << 2; assert(getAddressingMode() == AM && "MemIndexedMode encoding error!"); InitOperands(Ops, Operands, numOperands); @@ -2378,12 +2291,14 @@ public: class LoadSDNode : public LSBaseSDNode { friend class SelectionDAG; LoadSDNode(SDValue *ChainPtrOff, DebugLoc dl, SDVTList VTs, - ISD::MemIndexedMode AM, ISD::LoadExtType ETy, MVT LVT, - const Value *SV, int O=0, unsigned Align=0, bool Vol=false) + ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT, + MachineMemOperand *MMO) : LSBaseSDNode(ISD::LOAD, dl, ChainPtrOff, 3, - VTs, AM, LVT, SV, O, Align, Vol) { + VTs, AM, MemVT, MMO) { SubclassData |= (unsigned short)ETy; assert(getExtensionType() == ETy && "LoadExtType encoding error!"); + assert(readMem() && "Load MachineMemOperand is not a load!"); + assert(!writeMem() && "Load MachineMemOperand is a store!"); } public: @@ -2407,12 +2322,14 @@ public: class StoreSDNode : public LSBaseSDNode { friend class SelectionDAG; StoreSDNode(SDValue *ChainValuePtrOff, DebugLoc dl, SDVTList VTs, - ISD::MemIndexedMode AM, bool isTrunc, MVT SVT, - const Value *SV, int O=0, unsigned Align=0, bool Vol=false) + ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT, + MachineMemOperand *MMO) : LSBaseSDNode(ISD::STORE, dl, ChainValuePtrOff, 4, - VTs, AM, SVT, SV, O, Align, Vol) { + VTs, AM, MemVT, MMO) { SubclassData |= (unsigned short)isTrunc; assert(isTruncatingStore() == isTrunc && "isTrunc encoding error!"); + assert(!readMem() && "Store MachineMemOperand is a load!"); + assert(writeMem() && "Store MachineMemOperand is not a store!"); } public: @@ -2431,8 +2348,47 @@ public: } }; +/// MachineSDNode - An SDNode that represents everything that will be needed +/// to construct a MachineInstr. These nodes are created during the +/// instruction selection proper phase. +/// +class MachineSDNode : public SDNode { +public: + typedef MachineMemOperand **mmo_iterator; + +private: + friend class SelectionDAG; + MachineSDNode(unsigned Opc, const DebugLoc DL, SDVTList VTs) + : SDNode(Opc, DL, VTs), MemRefs(0), MemRefsEnd(0) {} + + /// LocalOperands - Operands for this instruction, if they fit here. If + /// they don't, this field is unused. + SDUse LocalOperands[4]; + + /// MemRefs - Memory reference descriptions for this instruction. + mmo_iterator MemRefs; + mmo_iterator MemRefsEnd; + +public: + mmo_iterator memoperands_begin() const { return MemRefs; } + mmo_iterator memoperands_end() const { return MemRefsEnd; } + bool memoperands_empty() const { return MemRefsEnd == MemRefs; } + + /// setMemRefs - Assign this MachineSDNodes's memory reference descriptor + /// list. This does not transfer ownership. + void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) { + MemRefs = NewMemRefs; + MemRefsEnd = NewMemRefsEnd; + } + + static bool classof(const MachineSDNode *) { return true; } + static bool classof(const SDNode *N) { + return N->isMachineOpcode(); + } +}; -class SDNodeIterator : public forward_iterator { +class SDNodeIterator : public std::iterator { SDNode *Node; unsigned Operand; @@ -2490,7 +2446,7 @@ typedef LoadSDNode LargestSDNode; /// MostAlignedSDNode - The SDNode class with the greatest alignment /// requirement. /// -typedef ARG_FLAGSSDNode MostAlignedSDNode; +typedef GlobalAddressSDNode MostAlignedSDNode; namespace ISD { /// isNormalLoad - Returns true if the specified node is a non-extending diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index e661c58940e1d..1f0dd21088173 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -23,8 +23,10 @@ namespace llvm { class Type; + class LLVMContext; + struct EVT; - struct MVT { // MVT = Machine Value Type + class MVT { // MVT = Machine Value Type public: enum SimpleValueType { // If you change this numbering, you must change the values in @@ -59,184 +61,368 @@ namespace llvm { v8i16 = 21, // 8 x i16 v16i16 = 22, // 16 x i16 v2i32 = 23, // 2 x i32 - v3i32 = 24, // 3 x i32 - v4i32 = 25, // 4 x i32 - v8i32 = 26, // 8 x i32 - v1i64 = 27, // 1 x i64 - v2i64 = 28, // 2 x i64 - v4i64 = 29, // 4 x i64 - - v2f32 = 30, // 2 x f32 - v3f32 = 31, // 3 x f32 - v4f32 = 32, // 4 x f32 - v8f32 = 33, // 8 x f32 - v2f64 = 34, // 2 x f64 - v4f64 = 35, // 4 x f64 - + v4i32 = 24, // 4 x i32 + v8i32 = 25, // 8 x i32 + v1i64 = 26, // 1 x i64 + v2i64 = 27, // 2 x i64 + v4i64 = 28, // 4 x i64 + + v2f32 = 29, // 2 x f32 + v4f32 = 30, // 4 x f32 + v8f32 = 31, // 8 x f32 + v2f64 = 32, // 2 x f64 + v4f64 = 33, // 4 x f64 + FIRST_VECTOR_VALUETYPE = v2i8, LAST_VECTOR_VALUETYPE = v4f64, - LAST_VALUETYPE = 36, // This always remains at the end of the list. + LAST_VALUETYPE = 34, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. - // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors + // EVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors // This value must be a multiple of 32. MAX_ALLOWED_VALUETYPE = 64, + // Metadata - This is MDNode or MDString. + Metadata = 250, + // iPTRAny - An int value the size of the pointer of the current // target to any address space. This must only be used internal to // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR. - iPTRAny = 252, + iPTRAny = 251, + + // vAny - A vector with any length and element size. This is used + // for intrinsics that have overloadings based on vector types. + // This is only for tblgen's consumption! + vAny = 252, // fAny - Any floating-point or vector floating-point value. This is used // for intrinsics that have overloadings based on floating-point types. // This is only for tblgen's consumption! - fAny = 253, + fAny = 253, // iAny - An integer or vector integer value of any bit width. This is // used for intrinsics that have overloadings based on integer bit widths. // This is only for tblgen's consumption! - iAny = 254, + iAny = 254, // iPTR - An int value the size of the pointer of the current // target. This should only be used internal to tblgen! - iPTR = 255, + iPTR = 255, // LastSimpleValueType - The greatest valid SimpleValueType value. - LastSimpleValueType = 255 - }; + LastSimpleValueType = 255, - private: - /// This union holds low-level value types. Valid values include any of - /// the values in the SimpleValueType enum, or any value returned from one - /// of the MVT methods. Any value type equal to one of the SimpleValueType - /// enum values is a "simple" value type. All others are "extended". - /// - /// Note that simple doesn't necessary mean legal for the target machine. - /// All legal value types must be simple, but often there are some simple - /// value types that are not legal. - /// - union { - uintptr_t V; - const Type *LLVMTy; + // INVALID_SIMPLE_VALUE_TYPE - Simple value types greater than or equal + // to this are considered extended value types. + INVALID_SIMPLE_VALUE_TYPE = LastSimpleValueType + 1 }; - public: - MVT() {} - MVT(SimpleValueType S) : V(S) {} + SimpleValueType SimpleTy; + + MVT() : SimpleTy((SimpleValueType)(INVALID_SIMPLE_VALUE_TYPE)) {} + MVT(SimpleValueType SVT) : SimpleTy(SVT) { } + + bool operator>(const MVT& S) const { return SimpleTy > S.SimpleTy; } + bool operator<(const MVT& S) const { return SimpleTy < S.SimpleTy; } + bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; } + bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; } + bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; } + + /// isFloatingPoint - Return true if this is a FP, or a vector FP type. + bool isFloatingPoint() const { + return ((SimpleTy >= MVT::f32 && SimpleTy <= MVT::ppcf128) || + (SimpleTy >= MVT::v2f32 && SimpleTy <= MVT::v4f64)); + } + + /// isInteger - Return true if this is an integer, or a vector integer type. + bool isInteger() const { + return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE && + SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) || + (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v4i64)); + } - bool operator==(const MVT VT) const { - return getRawBits() == VT.getRawBits(); + /// isVector - Return true if this is a vector value type. + bool isVector() const { + return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE && + SimpleTy <= MVT::LAST_VECTOR_VALUETYPE); } - bool operator!=(const MVT VT) const { - return getRawBits() != VT.getRawBits(); + + /// isPow2VectorType - Retuns true if the given vector is a power of 2. + bool isPow2VectorType() const { + unsigned NElts = getVectorNumElements(); + return !(NElts & (NElts - 1)); } - /// getFloatingPointVT - Returns the MVT that represents a floating point - /// type with the given number of bits. There are two floating point types - /// with 128 bits - this returns f128 rather than ppcf128. + /// getPow2VectorType - Widens the length of the given vector EVT up to + /// the nearest power of 2 and returns that type. + MVT getPow2VectorType() const { + if (!isPow2VectorType()) { + unsigned NElts = getVectorNumElements(); + unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts); + return MVT::getVectorVT(getVectorElementType(), Pow2NElts); + } + else { + return *this; + } + } + + MVT getVectorElementType() const { + switch (SimpleTy) { + default: + return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); + case v2i8 : + case v4i8 : + case v8i8 : + case v16i8: + case v32i8: return i8; + case v2i16: + case v4i16: + case v8i16: + case v16i16: return i16; + case v2i32: + case v4i32: + case v8i32: return i32; + case v1i64: + case v2i64: + case v4i64: return i64; + case v2f32: + case v4f32: + case v8f32: return f32; + case v2f64: + case v4f64: return f64; + } + } + + unsigned getVectorNumElements() const { + switch (SimpleTy) { + default: + return ~0U; + case v32i8: return 32; + case v16i8: + case v16i16: return 16; + case v8i8 : + case v8i16: + case v8i32: + case v8f32: return 8; + case v4i8: + case v4i16: + case v4i32: + case v4i64: + case v4f32: + case v4f64: return 4; + case v2i8: + case v2i16: + case v2i32: + case v2i64: + case v2f32: + case v2f64: return 2; + case v1i64: return 1; + } + } + + unsigned getSizeInBits() const { + switch (SimpleTy) { + case iPTR: + assert(0 && "Value type size is target-dependent. Ask TLI."); + case iPTRAny: + case iAny: + case fAny: + assert(0 && "Value type is overloaded."); + default: + assert(0 && "getSizeInBits called on extended MVT."); + case i1 : return 1; + case i8 : return 8; + case i16 : + case v2i8: return 16; + case f32 : + case i32 : + case v4i8: + case v2i16: return 32; + case f64 : + case i64 : + case v8i8: + case v4i16: + case v2i32: + case v1i64: + case v2f32: return 64; + case f80 : return 80; + case f128: + case ppcf128: + case i128: + case v16i8: + case v8i16: + case v4i32: + case v2i64: + case v4f32: + case v2f64: return 128; + case v32i8: + case v16i16: + case v8i32: + case v4i64: + case v8f32: + case v4f64: return 256; + } + } + static MVT getFloatingPointVT(unsigned BitWidth) { switch (BitWidth) { default: assert(false && "Bad bit width!"); case 32: - return f32; + return MVT::f32; case 64: - return f64; + return MVT::f64; case 80: - return f80; + return MVT::f80; case 128: - return f128; + return MVT::f128; } } - - /// getIntegerVT - Returns the MVT that represents an integer with the given - /// number of bits. + static MVT getIntegerVT(unsigned BitWidth) { switch (BitWidth) { default: - break; + return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); case 1: - return i1; + return MVT::i1; case 8: - return i8; + return MVT::i8; case 16: - return i16; + return MVT::i16; case 32: - return i32; + return MVT::i32; case 64: - return i64; + return MVT::i64; case 128: - return i128; + return MVT::i128; } - return getExtendedIntegerVT(BitWidth); } - - /// getVectorVT - Returns the MVT that represents a vector NumElements in - /// length, where each element is of type VT. + static MVT getVectorVT(MVT VT, unsigned NumElements) { - switch (VT.V) { + switch (VT.SimpleTy) { default: break; - case i8: - if (NumElements == 2) return v2i8; - if (NumElements == 4) return v4i8; - if (NumElements == 8) return v8i8; - if (NumElements == 16) return v16i8; - if (NumElements == 32) return v32i8; + case MVT::i8: + if (NumElements == 2) return MVT::v2i8; + if (NumElements == 4) return MVT::v4i8; + if (NumElements == 8) return MVT::v8i8; + if (NumElements == 16) return MVT::v16i8; + if (NumElements == 32) return MVT::v32i8; break; - case i16: - if (NumElements == 2) return v2i16; - if (NumElements == 4) return v4i16; - if (NumElements == 8) return v8i16; - if (NumElements == 16) return v16i16; + case MVT::i16: + if (NumElements == 2) return MVT::v2i16; + if (NumElements == 4) return MVT::v4i16; + if (NumElements == 8) return MVT::v8i16; + if (NumElements == 16) return MVT::v16i16; break; - case i32: - if (NumElements == 2) return v2i32; - if (NumElements == 3) return v3i32; - if (NumElements == 4) return v4i32; - if (NumElements == 8) return v8i32; + case MVT::i32: + if (NumElements == 2) return MVT::v2i32; + if (NumElements == 4) return MVT::v4i32; + if (NumElements == 8) return MVT::v8i32; break; - case i64: - if (NumElements == 1) return v1i64; - if (NumElements == 2) return v2i64; - if (NumElements == 4) return v4i64; + case MVT::i64: + if (NumElements == 1) return MVT::v1i64; + if (NumElements == 2) return MVT::v2i64; + if (NumElements == 4) return MVT::v4i64; break; - case f32: - if (NumElements == 2) return v2f32; - if (NumElements == 3) return v3f32; - if (NumElements == 4) return v4f32; - if (NumElements == 8) return v8f32; + case MVT::f32: + if (NumElements == 2) return MVT::v2f32; + if (NumElements == 4) return MVT::v4f32; + if (NumElements == 8) return MVT::v8f32; break; - case f64: - if (NumElements == 2) return v2f64; - if (NumElements == 4) return v4f64; + case MVT::f64: + if (NumElements == 2) return MVT::v2f64; + if (NumElements == 4) return MVT::v4f64; break; } - return getExtendedVectorVT(VT, NumElements); + return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); } - - /// getIntVectorWithNumElements - Return any integer vector type that has - /// the specified number of elements. + static MVT getIntVectorWithNumElements(unsigned NumElts) { switch (NumElts) { - default: return getVectorVT(i8, NumElts); - case 1: return v1i64; - case 2: return v2i32; - case 3: return v3i32; - case 4: return v4i16; - case 8: return v8i8; - case 16: return v16i8; + default: return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); + case 1: return MVT::v1i64; + case 2: return MVT::v2i32; + case 4: return MVT::v4i16; + case 8: return MVT::v8i8; + case 16: return MVT::v16i8; + } + } + }; + + struct EVT { // EVT = Extended Value Type + private: + MVT V; + const Type *LLVMTy; + + public: + EVT() : V((MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE)), + LLVMTy(0) {} + EVT(MVT::SimpleValueType SVT) : V(SVT), LLVMTy(0) { } + EVT(MVT S) : V(S), LLVMTy(0) {} + + bool operator==(const EVT VT) const { + if (V.SimpleTy == VT.V.SimpleTy) { + if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) + return LLVMTy == VT.LLVMTy; + return true; + } + return false; + } + bool operator!=(const EVT VT) const { + if (V.SimpleTy == VT.V.SimpleTy) { + if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) + return LLVMTy != VT.LLVMTy; + return false; } + return true; + } + + /// getFloatingPointVT - Returns the EVT that represents a floating point + /// type with the given number of bits. There are two floating point types + /// with 128 bits - this returns f128 rather than ppcf128. + static EVT getFloatingPointVT(unsigned BitWidth) { + return MVT::getFloatingPointVT(BitWidth); + } + + /// getIntegerVT - Returns the EVT that represents an integer with the given + /// number of bits. + static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) { + MVT M = MVT::getIntegerVT(BitWidth); + if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) + return getExtendedIntegerVT(Context, BitWidth); + else + return M; + } + + /// getVectorVT - Returns the EVT that represents a vector NumElements in + /// length, where each element is of type VT. + static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) { + MVT M = MVT::getVectorVT(VT.V, NumElements); + if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) + return getExtendedVectorVT(Context, VT, NumElements); + else + return M; + } + + /// getIntVectorWithNumElements - Return any integer vector type that has + /// the specified number of elements. + static EVT getIntVectorWithNumElements(LLVMContext &C, unsigned NumElts) { + MVT M = MVT::getIntVectorWithNumElements(NumElts); + if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) + return getVectorVT(C, MVT::i8, NumElts); + else + return M; } - /// isSimple - Test if the given MVT is simple (as opposed to being + /// isSimple - Test if the given EVT is simple (as opposed to being /// extended). bool isSimple() const { - return V <= LastSimpleValueType; + return V.SimpleTy <= MVT::LastSimpleValueType; } - /// isExtended - Test if the given MVT is extended (as opposed to + /// isExtended - Test if the given EVT is extended (as opposed to /// being simple). bool isExtended() const { return !isSimple(); @@ -245,44 +431,53 @@ namespace llvm { /// isFloatingPoint - Return true if this is a FP, or a vector FP type. bool isFloatingPoint() const { return isSimple() ? - ((V >= f32 && V <= ppcf128) || - (V >= v2f32 && V <= v4f64)) : isExtendedFloatingPoint(); + ((V >= MVT::f32 && V <= MVT::ppcf128) || + (V >= MVT::v2f32 && V <= MVT::v4f64)) : isExtendedFloatingPoint(); } /// isInteger - Return true if this is an integer, or a vector integer type. bool isInteger() const { return isSimple() ? - ((V >= FIRST_INTEGER_VALUETYPE && V <= LAST_INTEGER_VALUETYPE) || - (V >= v2i8 && V <= v4i64)) : isExtendedInteger(); + ((V >= MVT::FIRST_INTEGER_VALUETYPE && + V <= MVT::LAST_INTEGER_VALUETYPE) || + (V >= MVT::v2i8 && V <= MVT::v4i64)) : isExtendedInteger(); } /// isVector - Return true if this is a vector value type. bool isVector() const { return isSimple() ? - (V >= FIRST_VECTOR_VALUETYPE && V <= LAST_VECTOR_VALUETYPE) : + (V >= MVT::FIRST_VECTOR_VALUETYPE && V <= + MVT::LAST_VECTOR_VALUETYPE) : isExtendedVector(); } /// is64BitVector - Return true if this is a 64-bit vector type. bool is64BitVector() const { return isSimple() ? - (V==v8i8 || V==v4i16 || V==v2i32 || V==v1i64 || V==v2f32) : + (V==MVT::v8i8 || V==MVT::v4i16 || V==MVT::v2i32 || + V==MVT::v1i64 || V==MVT::v2f32) : isExtended64BitVector(); } /// is128BitVector - Return true if this is a 128-bit vector type. bool is128BitVector() const { return isSimple() ? - (V==v16i8 || V==v8i16 || V==v4i32 || - V==v2i64 || V==v4f32 || V==v2f64) : + (V==MVT::v16i8 || V==MVT::v8i16 || V==MVT::v4i32 || + V==MVT::v2i64 || V==MVT::v4f32 || V==MVT::v2f64) : isExtended128BitVector(); } /// is256BitVector - Return true if this is a 256-bit vector type. inline bool is256BitVector() const { - return isSimple() ? - (V==v8f32 || V==v4f64 || V==v32i8 || V==v16i16 || V==v8i32 || - V==v4i64) : isExtended256BitVector(); + return isSimple() ? + (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 || + V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) : + isExtended256BitVector(); + } + + /// isOverloaded - Return true if this is an overloaded type for TableGen. + bool isOverloaded() const { + return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny); } /// isByteSized - Return true if the bit size is a multiple of 8. @@ -297,165 +492,88 @@ namespace llvm { } /// bitsEq - Return true if this has the same number of bits as VT. - bool bitsEq(MVT VT) const { + bool bitsEq(EVT VT) const { return getSizeInBits() == VT.getSizeInBits(); } /// bitsGT - Return true if this has more bits than VT. - bool bitsGT(MVT VT) const { + bool bitsGT(EVT VT) const { return getSizeInBits() > VT.getSizeInBits(); } /// bitsGE - Return true if this has no less bits than VT. - bool bitsGE(MVT VT) const { + bool bitsGE(EVT VT) const { return getSizeInBits() >= VT.getSizeInBits(); } /// bitsLT - Return true if this has less bits than VT. - bool bitsLT(MVT VT) const { + bool bitsLT(EVT VT) const { return getSizeInBits() < VT.getSizeInBits(); } /// bitsLE - Return true if this has no more bits than VT. - bool bitsLE(MVT VT) const { + bool bitsLE(EVT VT) const { return getSizeInBits() <= VT.getSizeInBits(); } /// getSimpleVT - Return the SimpleValueType held in the specified - /// simple MVT. - SimpleValueType getSimpleVT() const { + /// simple EVT. + MVT getSimpleVT() const { assert(isSimple() && "Expected a SimpleValueType!"); - return SimpleValueType(V); + return V; } /// getVectorElementType - Given a vector type, return the type of /// each element. - MVT getVectorElementType() const { + EVT getVectorElementType() const { assert(isVector() && "Invalid vector type!"); - switch (V) { - default: + if (isSimple()) + return V.getVectorElementType(); + else return getExtendedVectorElementType(); - case v2i8 : - case v4i8 : - case v8i8 : - case v16i8: - case v32i8: return i8; - case v2i16: - case v4i16: - case v8i16: - case v16i16: return i16; - case v2i32: - case v3i32: - case v4i32: - case v8i32: return i32; - case v1i64: - case v2i64: - case v4i64: return i64; - case v2f32: - case v3f32: - case v4f32: - case v8f32: return f32; - case v2f64: - case v4f64: return f64; - } } /// getVectorNumElements - Given a vector type, return the number of /// elements it contains. unsigned getVectorNumElements() const { assert(isVector() && "Invalid vector type!"); - switch (V) { - default: + if (isSimple()) + return V.getVectorNumElements(); + else return getExtendedVectorNumElements(); - case v32i8: return 32; - case v16i8: - case v16i16: return 16; - case v8i8 : - case v8i16: - case v8i32: - case v8f32: return 8; - case v4i8: - case v4i16: - case v4i32: - case v4i64: - case v4f32: - case v4f64: return 4; - case v3i32: - case v3f32: return 3; - case v2i8: - case v2i16: - case v2i32: - case v2i64: - case v2f32: - case v2f64: return 2; - case v1i64: return 1; - } } /// getSizeInBits - Return the size of the specified value type in bits. unsigned getSizeInBits() const { - switch (V) { - case iPTR: - assert(0 && "Value type size is target-dependent. Ask TLI."); - case iPTRAny: - case iAny: - case fAny: - assert(0 && "Value type is overloaded."); - default: + if (isSimple()) + return V.getSizeInBits(); + else return getExtendedSizeInBits(); - case i1 : return 1; - case i8 : return 8; - case i16 : - case v2i8: return 16; - case f32 : - case i32 : - case v4i8: - case v2i16: return 32; - case f64 : - case i64 : - case v8i8: - case v4i16: - case v2i32: - case v1i64: - case v2f32: return 64; - case f80 : return 80; - case v3i32: - case v3f32: return 96; - case f128: - case ppcf128: - case i128: - case v16i8: - case v8i16: - case v4i32: - case v2i64: - case v4f32: - case v2f64: return 128; - case v32i8: - case v16i16: - case v8i32: - case v4i64: - case v8f32: - case v4f64: return 256; - } + } + + /// getStoreSize - Return the number of bytes overwritten by a store + /// of the specified value type. + unsigned getStoreSize() const { + return (getSizeInBits() + 7) / 8; } /// getStoreSizeInBits - Return the number of bits overwritten by a store /// of the specified value type. unsigned getStoreSizeInBits() const { - return (getSizeInBits() + 7)/8*8; + return getStoreSize() * 8; } - /// getRoundIntegerType - Rounds the bit-width of the given integer MVT up + /// getRoundIntegerType - Rounds the bit-width of the given integer EVT up /// to the nearest power of two (and at least to eight), and returns the - /// integer MVT with that number of bits. - MVT getRoundIntegerType() const { + /// integer EVT with that number of bits. + EVT getRoundIntegerType(LLVMContext &Context) const { assert(isInteger() && !isVector() && "Invalid integer type!"); unsigned BitWidth = getSizeInBits(); if (BitWidth <= 8) - return i8; + return EVT(MVT::i8); else - return getIntegerVT(1 << Log2_32_Ceil(BitWidth)); + return getIntegerVT(Context, 1 << Log2_32_Ceil(BitWidth)); } /// isPow2VectorType - Retuns true if the given vector is a power of 2. @@ -464,41 +582,48 @@ namespace llvm { return !(NElts & (NElts - 1)); } - /// getPow2VectorType - Widens the length of the given vector MVT up to + /// getPow2VectorType - Widens the length of the given vector EVT up to /// the nearest power of 2 and returns that type. - MVT getPow2VectorType() const { + EVT getPow2VectorType(LLVMContext &Context) const { if (!isPow2VectorType()) { unsigned NElts = getVectorNumElements(); unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts); - return MVT::getVectorVT(getVectorElementType(), Pow2NElts); + return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts); } else { return *this; } } - /// getMVTString - This function returns value type as a string, + /// getEVTString - This function returns value type as a string, /// e.g. "i32". - std::string getMVTString() const; + std::string getEVTString() const; - /// getTypeForMVT - This method returns an LLVM type corresponding to the - /// specified MVT. For integer types, this returns an unsigned type. Note + /// getTypeForEVT - This method returns an LLVM type corresponding to the + /// specified EVT. For integer types, this returns an unsigned type. Note /// that this will abort for types that cannot be represented. - const Type *getTypeForMVT() const; + const Type *getTypeForEVT(LLVMContext &Context) const; - /// getMVT - Return the value type corresponding to the specified type. + /// getEVT - Return the value type corresponding to the specified type. /// This returns all pointers as iPTR. If HandleUnknown is true, unknown /// types are returned as Other, otherwise they are invalid. - static MVT getMVT(const Type *Ty, bool HandleUnknown = false); + static EVT getEVT(const Type *Ty, bool HandleUnknown = false); - /// getRawBits - Represent the type as a bunch of bits. - uintptr_t getRawBits() const { return V; } + intptr_t getRawBits() { + if (V.SimpleTy <= MVT::LastSimpleValueType) + return V.SimpleTy; + else + return (intptr_t)(LLVMTy); + } /// compareRawBits - A meaningless but well-behaved order, useful for /// constructing containers. struct compareRawBits { - bool operator()(MVT L, MVT R) const { - return L.getRawBits() < R.getRawBits(); + bool operator()(EVT L, EVT R) const { + if (L.V.SimpleTy == R.V.SimpleTy) + return L.LLVMTy < R.LLVMTy; + else + return L.V.SimpleTy < R.V.SimpleTy; } }; @@ -506,15 +631,16 @@ namespace llvm { // Methods for handling the Extended-type case in functions above. // These are all out-of-line to prevent users of this header file // from having a dependency on Type.h. - static MVT getExtendedIntegerVT(unsigned BitWidth); - static MVT getExtendedVectorVT(MVT VT, unsigned NumElements); + static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth); + static EVT getExtendedVectorVT(LLVMContext &C, EVT VT, + unsigned NumElements); bool isExtendedFloatingPoint() const; bool isExtendedInteger() const; bool isExtendedVector() const; bool isExtended64BitVector() const; bool isExtended128BitVector() const; bool isExtended256BitVector() const; - MVT getExtendedVectorElementType() const; + EVT getExtendedVectorElementType() const; unsigned getExtendedVectorNumElements() const; unsigned getExtendedSizeInBits() const; }; diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td index 7f6728bb678e2..986555b976e9f 100644 --- a/include/llvm/CodeGen/ValueTypes.td +++ b/include/llvm/CodeGen/ValueTypes.td @@ -44,23 +44,26 @@ def v4i16 : ValueType<64 , 20>; // 4 x i16 vector value def v8i16 : ValueType<128, 21>; // 8 x i16 vector value def v16i16 : ValueType<256, 22>; // 16 x i16 vector value def v2i32 : ValueType<64 , 23>; // 2 x i32 vector value -def v3i32 : ValueType<96 , 24>; // 3 x i32 vector value -def v4i32 : ValueType<128, 25>; // 4 x i32 vector value -def v8i32 : ValueType<256, 26>; // 8 x f32 vector value -def v1i64 : ValueType<64 , 27>; // 1 x i64 vector value -def v2i64 : ValueType<128, 28>; // 2 x i64 vector value -def v4i64 : ValueType<256, 29>; // 4 x f64 vector value +def v4i32 : ValueType<128, 24>; // 4 x i32 vector value +def v8i32 : ValueType<256, 25>; // 8 x i32 vector value +def v1i64 : ValueType<64 , 26>; // 1 x i64 vector value +def v2i64 : ValueType<128, 27>; // 2 x i64 vector value +def v4i64 : ValueType<256, 28>; // 4 x f64 vector value + +def v2f32 : ValueType<64, 29>; // 2 x f32 vector value +def v4f32 : ValueType<128, 30>; // 4 x f32 vector value +def v8f32 : ValueType<256, 31>; // 8 x f32 vector value +def v2f64 : ValueType<128, 32>; // 2 x f64 vector value +def v4f64 : ValueType<256, 33>; // 4 x f64 vector value + +def MetadataVT: ValueType<0, 250>; // Metadata -def v2f32 : ValueType<64, 30>; // 2 x f32 vector value -def v3f32 : ValueType<96 , 31>; // 3 x f32 vector value -def v4f32 : ValueType<128, 32>; // 4 x f32 vector value -def v8f32 : ValueType<256, 33>; // 8 x f32 vector value -def v2f64 : ValueType<128, 34>; // 2 x f64 vector value -def v4f64 : ValueType<256, 35>; // 4 x f64 vector value - // Pseudo valuetype mapped to the current pointer size to any address space. // Should only be used in TableGen. -def iPTRAny : ValueType<0, 252>; +def iPTRAny : ValueType<0, 251>; + +// Pseudo valuetype to represent "vector of any size" +def vAny : ValueType<0 , 252>; // Pseudo valuetype to represent "float of any format" def fAny : ValueType<0 , 253>; diff --git a/include/llvm/CompilerDriver/BuiltinOptions.h b/include/llvm/CompilerDriver/BuiltinOptions.h index 492dffd30725f..fe44c30a7ad58 100644 --- a/include/llvm/CompilerDriver/BuiltinOptions.h +++ b/include/llvm/CompilerDriver/BuiltinOptions.h @@ -22,6 +22,7 @@ namespace SaveTempsEnum { enum Values { Cwd, Obj, Unset }; } extern llvm::cl::list InputFilenames; extern llvm::cl::opt OutputFilename; +extern llvm::cl::opt TempDirname; extern llvm::cl::list Languages; extern llvm::cl::opt DryRun; extern llvm::cl::opt VerboseMode; diff --git a/include/llvm/CompilerDriver/Common.td b/include/llvm/CompilerDriver/Common.td index 1f6bacc787bcd..5b7c543f1c923 100644 --- a/include/llvm/CompilerDriver/Common.td +++ b/include/llvm/CompilerDriver/Common.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains common definitions used in llvmc2 tool description files. +// This file contains common definitions used in llvmc tool description files. // //===----------------------------------------------------------------------===// @@ -39,29 +39,35 @@ def prefix_list_option; def extern; def help; def hidden; +def init; def multi_val; def one_or_more; def really_hidden; def required; def zero_or_one; -// Empty DAG marker. -def empty; - // The 'case' construct. def case; +// Boolean constants. +def true; +def false; + // Boolean operators. def and; def or; +def not; // Primitive tests. def switch_on; def parameter_equals; def element_in_list; def input_languages_contain; +def empty; def not_empty; def default; +def single_input_file; +def multiple_input_files; // Possible actions. @@ -76,6 +82,9 @@ def error; def inc_weight; def dec_weight; +// Empty DAG marker. +def empty_dag_marker; + // Used to specify plugin priority. class PluginPriority { int priority = p; @@ -105,10 +114,10 @@ class EdgeBase { dag weight = d; } -class Edge : EdgeBase; +class Edge : EdgeBase; // Edge and SimpleEdge are synonyms. -class SimpleEdge : EdgeBase; +class SimpleEdge : EdgeBase; // Optionally enabled edge. class OptionalEdge : EdgeBase; diff --git a/include/llvm/CompilerDriver/CompilationGraph.h b/include/llvm/CompilerDriver/CompilationGraph.h index 825d4c40f8aaa..3daafd58a7c25 100644 --- a/include/llvm/CompilerDriver/CompilationGraph.h +++ b/include/llvm/CompilerDriver/CompilationGraph.h @@ -18,7 +18,6 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" @@ -242,7 +241,8 @@ namespace llvmc { /// NodeChildIterator - Another auxiliary class needed by GraphTraits. - class NodeChildIterator : public bidirectional_iterator { + class NodeChildIterator : public + std::iterator { typedef NodeChildIterator ThisType; typedef Node::container_type::iterator iterator; diff --git a/include/llvm/CompilerDriver/ForceLinkage.h b/include/llvm/CompilerDriver/ForceLinkage.h index 58ea16710e49d..830c04e2d3078 100644 --- a/include/llvm/CompilerDriver/ForceLinkage.h +++ b/include/llvm/CompilerDriver/ForceLinkage.h @@ -41,6 +41,26 @@ namespace llvmc { LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_5); #endif +#ifdef LLVMC_BUILTIN_PLUGIN_6 + LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_6); +#endif + +#ifdef LLVMC_BUILTIN_PLUGIN_7 + LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_7); +#endif + +#ifdef LLVMC_BUILTIN_PLUGIN_8 + LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_8); +#endif + +#ifdef LLVMC_BUILTIN_PLUGIN_9 + LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_9); +#endif + +#ifdef LLVMC_BUILTIN_PLUGIN_10 + LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_10); +#endif + namespace force_linkage { struct LinkageForcer { @@ -68,6 +88,26 @@ namespace force_linkage { LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_5); #endif +#ifdef LLVMC_BUILTIN_PLUGIN_6 + LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_6); +#endif + +#ifdef LLVMC_BUILTIN_PLUGIN_7 + LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_7); +#endif + +#ifdef LLVMC_BUILTIN_PLUGIN_8 + LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_8); +#endif + +#ifdef LLVMC_BUILTIN_PLUGIN_9 + LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_9); +#endif + +#ifdef LLVMC_BUILTIN_PLUGIN_10 + LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_10); +#endif + } }; } // End namespace force_linkage. diff --git a/include/llvm/Config/AsmParsers.def.in b/include/llvm/Config/AsmParsers.def.in new file mode 100644 index 0000000000000..041af837541cf --- /dev/null +++ b/include/llvm/Config/AsmParsers.def.in @@ -0,0 +1,29 @@ +//===- llvm/Config/AsmParsers.def - LLVM Assembly Parsers -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file enumerates all of the assembly-language parsers +// supported by this build of LLVM. Clients of this file should define +// the LLVM_ASM_PARSER macro to be a function-like macro with a +// single parameter (the name of the target whose assembly can be +// generated); including this file will then enumerate all of the +// targets with assembly parsers. +// +// The set of targets supported by LLVM is generated at configuration +// time, at which point this header is generated. Do not modify this +// header directly. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ASM_PARSER +# error Please define the macro LLVM_ASM_PARSER(TargetName) +#endif + +@LLVM_ENUM_ASM_PARSERS@ + +#undef LLVM_ASM_PARSER diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index 180e8c523859e..fa5d316ef076d 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -6,14 +6,6 @@ /* Define if dlopen(0) will open the symbols of the program */ #undef CAN_DLOPEN_SELF -/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP - systems. This function is required for `alloca.c' support on those systems. - */ -#undef CRAY_STACKSEG_END - -/* Define to 1 if using `alloca.c'. */ -#undef C_ALLOCA - /* Define if CBE is enabled for printf %a output */ #undef ENABLE_CBE_PRINTF_A @@ -23,13 +15,6 @@ /* Define if threads enabled */ #cmakedefine ENABLE_THREADS ${ENABLE_THREADS} -/* Define to 1 if you have `alloca', as a function or macro. */ -#cmakedefine HAVE_ALLOCA ${HAVE_ALLOCA} - -/* Define to 1 if you have and it should be used (not on Ultrix). - */ -#cmakedefine HAVE_ALLOCA_H ${HAVE_ALLOCA_H} - /* Define to 1 if you have the `argz_append' function. */ #undef HAVE_ARGZ_APPEND @@ -113,7 +98,7 @@ #cmakedefine HAVE_FCNTL_H ${HAVE_FCNTL_H} /* Set to 1 if the finite function is found in */ -#undef HAVE_FINITE_IN_IEEEFP_H +#cmakedefine HAVE_FINITE_IN_IEEEFP_H ${HAVE_FINITE_IN_IEEEFP_H} /* Define to 1 if you have the `floorf' function. */ #cmakedefine HAVE_FLOORF ${HAVE_FLOORF} @@ -181,9 +166,6 @@ /* Define if you have the libdl library or equivalent. */ #undef HAVE_LIBDL -/* Define to 1 if you have the `elf' library (-lelf). */ -#undef HAVE_LIBELF - /* Define to 1 if you have the `imagehlp' library (-limagehlp). */ #cmakedefine HAVE_LIBIMAGEHLP ${HAVE_LIBIMAGEHLP} @@ -240,13 +222,13 @@ #cmakedefine HAVE_MEMORY_H ${HAVE_MEMORY_H} /* Define to 1 if you have the `mkdtemp' function. */ -#undef HAVE_MKDTEMP +#cmakedefine HAVE_MKDTEMP ${HAVE_MKDTEMP} /* Define to 1 if you have the `mkstemp' function. */ -#undef HAVE_MKSTEMP +#cmakedefine HAVE_MKSTEMP ${HAVE_MKSTEMP} /* Define to 1 if you have the `mktemp' function. */ -#undef HAVE_MKTEMP +#cmakedefine HAVE_MKTEMP ${HAVE_MKTEMP} /* Define to 1 if you have a working `mmap' system call. */ #undef HAVE_MMAP @@ -307,7 +289,10 @@ #undef HAVE_ROUNDF /* Define to 1 if you have the `sbrk' function. */ -#undef HAVE_SBRK +#cmakedefine HAVE_SBRK ${HAVE_SBRK} + +/* Define to 1 if you have the `setenv' function. */ +#cmakedefine HAVE_SETENV ${HAVE_SETENV} /* Define to 1 if you have the `setjmp' function. */ #undef HAVE_SETJMP @@ -364,13 +349,13 @@ #undef HAVE_STRDUP /* Define to 1 if you have the `strerror' function. */ -#cmakedefine HAVE_STRERROR +#cmakedefine HAVE_STRERROR ${HAVE_STRERROR} /* Define to 1 if you have the `strerror_r' function. */ -#cmakedefine HAVE_STRERROR_R +#cmakedefine HAVE_STRERROR_R ${HAVE_STRERROR_R} /* Define to 1 if you have the `strerror_s' function. */ -#cmakedefine HAVE_STRERROR_S +#cmakedefine HAVE_STRERROR_S ${HAVE_STRERROR_S} /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H @@ -470,6 +455,9 @@ /* Installation directory for man pages */ #undef LLVM_MANDIR +/* Build multithreading support into LLVM */ +#cmakedefine LLVM_MULTITHREADED ${LLVM_MULTITHREADED} + /* Define if this is Unixish platform */ #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX} diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index 4dd1345d0e609..5257df97b2b35 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -60,6 +60,9 @@ /* Define to 1 if you have the `ceilf' function. */ #undef HAVE_CEILF +/* Define if the neat program is available */ +#undef HAVE_CIRCO + /* Define to 1 if you have the `closedir' function. */ #undef HAVE_CLOSEDIR @@ -109,6 +112,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_FCNTL_H +/* Define if the neat program is available */ +#undef HAVE_FDP + /* Define if libffi is available on this platform. */ #undef HAVE_FFI_CALL @@ -178,9 +184,6 @@ /* Define if you have the libdl library or equivalent. */ #undef HAVE_LIBDL -/* Define to 1 if you have the `elf' library (-lelf). */ -#undef HAVE_LIBELF - /* Define to 1 if you have the `imagehlp' library (-limagehlp). */ #undef HAVE_LIBIMAGEHLP @@ -267,6 +270,9 @@ /* Define to 1 if you have the `nearbyintf' function. */ #undef HAVE_NEARBYINTF +/* Define if the neat program is available */ +#undef HAVE_NEATO + /* Define to 1 if you have the `opendir' function. */ #undef HAVE_OPENDIR @@ -315,6 +321,9 @@ /* Define to 1 if you have the `sbrk' function. */ #undef HAVE_SBRK +/* Define to 1 if you have the `setenv' function. */ +#undef HAVE_SETENV + /* Define to 1 if you have the `setjmp' function. */ #undef HAVE_SETJMP @@ -431,6 +440,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_TERMIOS_H +/* Define if the neat program is available */ +#undef HAVE_TWOPI + /* Define to 1 if the system has the type `uint64_t'. */ #undef HAVE_UINT64_T @@ -491,18 +503,30 @@ /* Define if this is Win32ish platform */ #undef LLVM_ON_WIN32 +/* Define to path to circo program if found or 'echo circo' otherwise */ +#undef LLVM_PATH_CIRCO + /* Define to path to dot program if found or 'echo dot' otherwise */ #undef LLVM_PATH_DOT /* Define to path to dotty program if found or 'echo dotty' otherwise */ #undef LLVM_PATH_DOTTY +/* Define to path to fdp program if found or 'echo fdp' otherwise */ +#undef LLVM_PATH_FDP + /* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */ #undef LLVM_PATH_GRAPHVIZ /* Define to path to gv program if found or 'echo gv' otherwise */ #undef LLVM_PATH_GV +/* Define to path to neato program if found or 'echo neato' otherwise */ +#undef LLVM_PATH_NEATO + +/* Define to path to twopi program if found or 'echo twopi' otherwise */ +#undef LLVM_PATH_TWOPI + /* Installation prefix directory */ #undef LLVM_PREFIX @@ -568,6 +592,9 @@ /* Define to 1 if your declares `struct tm'. */ #undef TM_IN_SYS_TIME +/* Define if we have the oprofile JIT-support library */ +#undef USE_OPROFILE + /* Define if use udis86 library */ #undef USE_UDIS86 diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h index d4949d1a0ffd1..a42c7d4371711 100644 --- a/include/llvm/Constant.h +++ b/include/llvm/Constant.h @@ -17,21 +17,10 @@ #include "llvm/User.h" namespace llvm { - template class SmallVectorImpl; + class APInt; - /// If object contains references to other objects, then relocations are - /// usually required for emission of such object (especially in PIC mode). One - /// usually distinguishes local and global relocations. Local relocations are - /// made wrt objects in the same module and these objects have local (internal - /// or private) linkage. Global relocations are made wrt externally visible - /// objects. In most cases local relocations can be resolved via so-called - /// 'pre-link' technique. - namespace Reloc { - const unsigned None = 0; - const unsigned Local = 1 << 0; ///< Local relocations are required - const unsigned Global = 1 << 1; ///< Global relocations are required - const unsigned LocalOrGlobal = Local | Global; - } + template class SmallVectorImpl; + class LLVMContext; /// This is an important base class in LLVM. It provides the common facilities /// of all constant values in an LLVM program. A constant is a value that is @@ -53,35 +42,47 @@ namespace llvm { class Constant : public User { void operator=(const Constant &); // Do not implement Constant(const Constant &); // Do not implement + protected: Constant(const Type *ty, ValueTy vty, Use *Ops, unsigned NumOps) : User(ty, vty, Ops, NumOps) {} void destroyConstantImpl(); public: - /// Static constructor to get a '0' constant of arbitrary type... - /// - static Constant *getNullValue(const Type *Ty); - - /// Static constructor to get a '-1' constant. This supports integers and - /// vectors. - /// - static Constant *getAllOnesValue(const Type *Ty); - /// isNullValue - Return true if this is the value that would be returned by /// getNullValue. virtual bool isNullValue() const = 0; + /// isNegativeZeroValue - Return true if the value is what would be returned + /// by getZeroValueForNegation. + virtual bool isNegativeZeroValue() const { return isNullValue(); } + /// canTrap - Return true if evaluation of this constant could trap. This is /// true for things like constant expressions that could divide by zero. bool canTrap() const; - /// ContainsRelocations - Return true if the constant value contains - /// relocations which cannot be resolved at compile time. Note that answer is - /// not exclusive: there can be possibility that relocations of other kind are - /// required as well. - bool ContainsRelocations(unsigned Kind = Reloc::LocalOrGlobal) const; - + enum PossibleRelocationsTy { + NoRelocation = 0, + LocalRelocation = 1, + GlobalRelocations = 2 + }; + + /// getRelocationInfo - This method classifies the entry according to + /// whether or not it may generate a relocation entry. This must be + /// conservative, so if it might codegen to a relocatable entry, it should say + /// so. The return values are: + /// + /// NoRelocation: This constant pool entry is guaranteed to never have a + /// relocation applied to it (because it holds a simple constant like + /// '4'). + /// LocalRelocation: This entry has relocations, but the entries are + /// guaranteed to be resolvable by the static linker, so the dynamic + /// linker will never see them. + /// GlobalRelocations: This entry may have arbitrary relocations. + /// + /// FIXME: This really should not be in VMCore. + PossibleRelocationsTy getRelocationInfo() const; + // Specialize get/setOperand for Constants as their operands are always // constants as well. Constant *getOperand(unsigned i) { @@ -98,7 +99,8 @@ public: /// type, returns the elements of the vector in the specified smallvector. /// This handles breaking down a vector undef into undef elements, etc. For /// constant exprs and other cases we can't handle, we return an empty vector. - void getVectorElements(SmallVectorImpl &Elts) const; + void getVectorElements(LLVMContext &Context, + SmallVectorImpl &Elts) const; /// destroyConstant - Called if some element of this constant is no longer /// valid. At this point only other constants may be on the use_list for this @@ -135,6 +137,17 @@ public: "implemented for all constants that have operands!"); assert(0 && "Constants that do not have operands cannot be using 'From'!"); } + + static Constant* getNullValue(const Type* Ty); + + /// @returns the value for an integer constant of the given type that has all + /// its bits set to true. + /// @brief Get the all ones value + static Constant* getAllOnesValue(const Type* Ty); + + /// getIntegerValue - Return the value for an integer or pointer constant, + /// or a vector thereof, with the given scalar value. + static Constant* getIntegerValue(const Type* Ty, const APInt &V); }; } // End llvm namespace diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h index da69d25cf621f..7715286bbe5c5 100644 --- a/include/llvm/Constants.h +++ b/include/llvm/Constants.h @@ -45,7 +45,6 @@ struct ConvertConstantType; /// represents both boolean and integral constants. /// @brief Class for constant integers. class ConstantInt : public Constant { - static ConstantInt *TheTrueVal, *TheFalseVal; void *operator new(size_t, unsigned); // DO NOT IMPLEMENT ConstantInt(const ConstantInt &); // DO NOT IMPLEMENT ConstantInt(const IntegerType *Ty, const APInt& V); @@ -56,10 +55,47 @@ protected: return User::operator new(s, 0); } public: + static ConstantInt *getTrue(LLVMContext &Context); + static ConstantInt *getFalse(LLVMContext &Context); + + /// If Ty is a vector type, return a Constant with a splat of the given + /// value. Otherwise return a ConstantInt for the given value. + static Constant *get(const Type *Ty, uint64_t V, bool isSigned = false); + + /// Return a ConstantInt with the specified integer value for the specified + /// type. If the type is wider than 64 bits, the value will be zero-extended + /// to fit the type, unless isSigned is true, in which case the value will + /// be interpreted as a 64-bit signed integer and sign-extended to fit + /// the type. + /// @brief Get a ConstantInt for a specific value. + static ConstantInt *get(const IntegerType *Ty, uint64_t V, + bool isSigned = false); + + /// Return a ConstantInt with the specified value for the specified type. The + /// value V will be canonicalized to a an unsigned APInt. Accessing it with + /// either getSExtValue() or getZExtValue() will yield a correctly sized and + /// signed value for the type Ty. + /// @brief Get a ConstantInt for a specific signed value. + static ConstantInt *getSigned(const IntegerType *Ty, int64_t V); + static Constant *getSigned(const Type *Ty, int64_t V); + + /// Return a ConstantInt with the specified value and an implied Type. The + /// type is the integer type that corresponds to the bit width of the value. + static ConstantInt *get(LLVMContext &Context, const APInt &V); + + /// Return a ConstantInt constructed from the string strStart with the given + /// radix. + static ConstantInt *get(const IntegerType *Ty, const StringRef &Str, + uint8_t radix); + + /// If Ty is a vector type, return a Constant with a splat of the given + /// value. Otherwise return a ConstantInt for the given value. + static Constant *get(const Type* Ty, const APInt& V); + /// Return the constant as an APInt value reference. This allows clients to /// obtain a copy of the value, with all its precision in tact. /// @brief Return the constant's value. - inline const APInt& getValue() const { + inline const APInt &getValue() const { return Val; } @@ -92,49 +128,6 @@ public: return Val == V; } - /// getTrue/getFalse - Return the singleton true/false values. - static inline ConstantInt *getTrue() { - if (TheTrueVal) return TheTrueVal; - return CreateTrueFalseVals(true); - } - static inline ConstantInt *getFalse() { - if (TheFalseVal) return TheFalseVal; - return CreateTrueFalseVals(false); - } - - /// Return a ConstantInt with the specified integer value for the specified - /// type. If the type is wider than 64 bits, the value will be zero-extended - /// to fit the type, unless isSigned is true, in which case the value will - /// be interpreted as a 64-bit signed integer and sign-extended to fit - /// the type. - /// @brief Get a ConstantInt for a specific value. - static ConstantInt *get(const IntegerType *Ty, - uint64_t V, bool isSigned = false); - - /// If Ty is a vector type, return a Constant with a splat of the given - /// value. Otherwise return a ConstantInt for the given value. - static Constant *get(const Type *Ty, uint64_t V, bool isSigned = false); - - /// Return a ConstantInt with the specified value for the specified type. The - /// value V will be canonicalized to a an unsigned APInt. Accessing it with - /// either getSExtValue() or getZExtValue() will yield a correctly sized and - /// signed value for the type Ty. - /// @brief Get a ConstantInt for a specific signed value. - static ConstantInt *getSigned(const IntegerType *Ty, int64_t V) { - return get(Ty, V, true); - } - static Constant *getSigned(const Type *Ty, int64_t V) { - return get(Ty, V, true); - } - - /// Return a ConstantInt with the specified value and an implied Type. The - /// type is the integer type that corresponds to the bit width of the value. - static ConstantInt *get(const APInt &V); - - /// If Ty is a vector type, return a Constant with a splat of the given - /// value. Otherwise return a ConstantInt for the given value. - static Constant *get(const Type *Ty, const APInt &V); - /// getType - Specialize the getType() method to always return an IntegerType, /// which reduces the amount of casting needed in parts of the compiler. /// @@ -227,19 +220,11 @@ public: return Val.getLimitedValue(Limit); } - /// @returns the value for an integer constant of the given type that has all - /// its bits set to true. - /// @brief Get the all ones value - static ConstantInt *getAllOnesValue(const Type *Ty); - /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const ConstantInt *) { return true; } static bool classof(const Value *V) { return V->getValueID() == ConstantIntVal; } - static void ResetTrueFalse() { TheTrueVal = TheFalseVal = 0; } -private: - static ConstantInt *CreateTrueFalseVals(bool WhichOne); }; @@ -250,6 +235,7 @@ class ConstantFP : public Constant { APFloat Val; void *operator new(size_t, unsigned);// DO NOT IMPLEMENT ConstantFP(const ConstantFP &); // DO NOT IMPLEMENT + friend class LLVMContextImpl; protected: ConstantFP(const Type *Ty, const APFloat& V); protected: @@ -258,26 +244,35 @@ protected: return User::operator new(s, 0); } public: - /// get() - Static factory methods - Return objects of the specified value - static ConstantFP *get(const APFloat &V); - + /// Floating point negation must be implemented with f(x) = -0.0 - x. This + /// method returns the negative zero constant for floating point or vector + /// floating point types; for all other types, it returns the null value. + static Constant *getZeroValueForNegation(const Type *Ty); + /// get() - This returns a ConstantFP, or a vector containing a splat of a /// ConstantFP, for the specified value in the specified type. This should /// only be used for simple constant values like 2.0/1.0 etc, that are /// known-valid both as host double and as the target format. - static Constant *get(const Type *Ty, double V); - + static Constant *get(const Type* Ty, double V); + static Constant *get(const Type* Ty, const StringRef &Str); + static ConstantFP *get(LLVMContext &Context, const APFloat &V); + static ConstantFP *getNegativeZero(const Type* Ty); + static ConstantFP *getInfinity(const Type *Ty, bool Negative = false); + /// isValueValidForType - return true if Ty is big enough to represent V. - static bool isValueValidForType(const Type *Ty, const APFloat& V); + static bool isValueValidForType(const Type *Ty, const APFloat &V); inline const APFloat& getValueAPF() const { return Val; } /// isNullValue - Return true if this is the value that would be returned by /// getNullValue. Don't depend on == for doubles to tell us it's zero, it /// considers -0.0 to be null as well as 0.0. :( virtual bool isNullValue() const; - - // Get a negative zero. - static ConstantFP *getNegativeZero(const Type* Ty); + + /// isNegativeZeroValue - Return true if the value is what would be returned + /// by getZeroValueForNegation. + virtual bool isNegativeZeroValue() const { + return Val.isZero() && Val.isNegative(); + } /// isExactlyValue - We don't rely on operator== working on double values, as /// it returns true for things that are clearly not equal, like -0.0 and 0.0. @@ -285,7 +280,7 @@ public: /// two floating point values. The version with a double operand is retained /// because it's so convenient to write isExactlyValue(2.0), but please use /// it only for simple constants. - bool isExactlyValue(const APFloat& V) const; + bool isExactlyValue(const APFloat &V) const; bool isExactlyValue(double V) const { bool ignored; @@ -319,10 +314,8 @@ protected: return User::operator new(s, 0); } public: - /// get() - static factory method for creating a null aggregate. It is - /// illegal to call this method with a non-aggregate type. - static ConstantAggregateZero *get(const Type *Ty); - + static ConstantAggregateZero* get(const Type *Ty); + /// isNullValue - Return true if this is the value that would be returned by /// getNullValue. virtual bool isNullValue() const { return true; } @@ -348,22 +341,20 @@ class ConstantArray : public Constant { protected: ConstantArray(const ArrayType *T, const std::vector &Val); public: - /// get() - Static factory methods - Return objects of the specified value - static Constant *get(const ArrayType *T, const std::vector &); - static Constant *get(const ArrayType *T, - Constant*const*Vals, unsigned NumVals) { - // FIXME: make this the primary ctor method. - return get(T, std::vector(Vals, Vals+NumVals)); - } - + // ConstantArray accessors + static Constant *get(const ArrayType *T, const std::vector &V); + static Constant *get(const ArrayType *T, Constant *const *Vals, + unsigned NumVals); + /// This method constructs a ConstantArray and initializes it with a text /// string. The default behavior (AddNull==true) causes a null terminator to /// be placed at the end of the array. This effectively increases the length /// of the array by one (you've been warned). However, in some situations /// this is not desired so if AddNull==false then the string is copied without - /// null termination. - static Constant *get(const std::string &Initializer, bool AddNull = true); - + /// null termination. + static Constant *get(LLVMContext &Context, const StringRef &Initializer, + bool AddNull = true); + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -406,7 +397,7 @@ public: }; template <> -struct OperandTraits : VariadicOperandTraits<> { +struct OperandTraits : public VariadicOperandTraits<> { }; DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantArray, Constant) @@ -421,16 +412,13 @@ class ConstantStruct : public Constant { protected: ConstantStruct(const StructType *T, const std::vector &Val); public: - /// get() - Static factory methods - Return objects of the specified value - /// + // ConstantStruct accessors static Constant *get(const StructType *T, const std::vector &V); - static Constant *get(const std::vector &V, bool Packed = false); - static Constant *get(Constant*const* Vals, unsigned NumVals, - bool Packed = false) { - // FIXME: make this the primary ctor method. - return get(std::vector(Vals, Vals+NumVals), Packed); - } - + static Constant *get(LLVMContext &Context, + const std::vector &V, bool Packed); + static Constant *get(LLVMContext &Context, + Constant *const *Vals, unsigned NumVals, bool Packed); + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -458,7 +446,7 @@ public: }; template <> -struct OperandTraits : VariadicOperandTraits<> { +struct OperandTraits : public VariadicOperandTraits<> { }; DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantStruct, Constant) @@ -473,13 +461,10 @@ class ConstantVector : public Constant { protected: ConstantVector(const VectorType *T, const std::vector &Val); public: - /// get() - Static factory methods - Return objects of the specified value - static Constant *get(const VectorType *T, const std::vector &); + // ConstantVector accessors + static Constant *get(const VectorType *T, const std::vector &V); static Constant *get(const std::vector &V); - static Constant *get(Constant*const* Vals, unsigned NumVals) { - // FIXME: make this the primary ctor method. - return get(std::vector(Vals, Vals+NumVals)); - } + static Constant *get(Constant *const *Vals, unsigned NumVals); /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -490,11 +475,6 @@ public: inline const VectorType *getType() const { return reinterpret_cast(Value::getType()); } - - /// @returns the value for a vector integer constant of the given type that - /// has all its bits set to true. - /// @brief Get the all ones value - static ConstantVector *getAllOnesValue(const VectorType *Ty); /// isNullValue - Return true if this is the value that would be returned by /// getNullValue. This always returns false because zero vectors are always @@ -522,7 +502,7 @@ public: }; template <> -struct OperandTraits : VariadicOperandTraits<> { +struct OperandTraits : public VariadicOperandTraits<> { }; DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantVector, Constant) @@ -590,13 +570,17 @@ protected: // These private methods are used by the type resolution code to create // ConstantExprs in intermediate forms. static Constant *getTy(const Type *Ty, unsigned Opcode, - Constant *C1, Constant *C2); + Constant *C1, Constant *C2, + unsigned Flags = 0); static Constant *getCompareTy(unsigned short pred, Constant *C1, Constant *C2); static Constant *getSelectTy(const Type *Ty, Constant *C1, Constant *C2, Constant *C3); static Constant *getGetElementPtrTy(const Type *Ty, Constant *C, Value* const *Idxs, unsigned NumIdxs); + static Constant *getInBoundsGetElementPtrTy(const Type *Ty, Constant *C, + Value* const *Idxs, + unsigned NumIdxs); static Constant *getExtractElementTy(const Type *Ty, Constant *Val, Constant *Idx); static Constant *getInsertElementTy(const Type *Ty, Constant *Val, @@ -617,6 +601,43 @@ public: /// Cast constant expr /// + + /// getAlignOf constant expr - computes the alignment of a type in a target + /// independent way (Note: the return type is an i32; Note: assumes that i8 + /// is byte aligned). + static Constant *getAlignOf(const Type* Ty); + + /// getSizeOf constant expr - computes the size of a type in a target + /// independent way (Note: the return type is an i64). + /// + static Constant *getSizeOf(const Type* Ty); + + /// getOffsetOf constant expr - computes the offset of a field in a target + /// independent way (Note: the return type is an i64). + /// + static Constant *getOffsetOf(const StructType* Ty, unsigned FieldNo); + + static Constant *getNeg(Constant *C); + static Constant *getFNeg(Constant *C); + static Constant *getNot(Constant *C); + static Constant *getAdd(Constant *C1, Constant *C2); + static Constant *getFAdd(Constant *C1, Constant *C2); + static Constant *getSub(Constant *C1, Constant *C2); + static Constant *getFSub(Constant *C1, Constant *C2); + static Constant *getMul(Constant *C1, Constant *C2); + static Constant *getFMul(Constant *C1, Constant *C2); + static Constant *getUDiv(Constant *C1, Constant *C2); + static Constant *getSDiv(Constant *C1, Constant *C2); + static Constant *getFDiv(Constant *C1, Constant *C2); + static Constant *getURem(Constant *C1, Constant *C2); + static Constant *getSRem(Constant *C1, Constant *C2); + static Constant *getFRem(Constant *C1, Constant *C2); + static Constant *getAnd(Constant *C1, Constant *C2); + static Constant *getOr(Constant *C1, Constant *C2); + static Constant *getXor(Constant *C1, Constant *C2); + static Constant *getShl(Constant *C1, Constant *C2); + static Constant *getLShr(Constant *C1, Constant *C2); + static Constant *getAShr(Constant *C1, Constant *C2); static Constant *getTrunc (Constant *C, const Type *Ty); static Constant *getSExt (Constant *C, const Type *Ty); static Constant *getZExt (Constant *C, const Type *Ty); @@ -630,6 +651,10 @@ public: static Constant *getIntToPtr(Constant *C, const Type *Ty); static Constant *getBitCast (Constant *C, const Type *Ty); + static Constant *getNSWAdd(Constant *C1, Constant *C2); + static Constant *getNSWSub(Constant *C1, Constant *C2); + static Constant *getExactSDiv(Constant *C1, Constant *C2); + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -688,69 +713,51 @@ public: /// and the getIndices() method may be used. bool hasIndices() const; + /// @brief Return true if this is a getelementptr expression and all + /// the index operands are compile-time known integers within the + /// corresponding notional static array extents. Note that this is + /// not equivalant to, a subset of, or a superset of the "inbounds" + /// property. + bool isGEPWithNoNotionalOverIndexing() const; + /// Select constant expr /// static Constant *getSelect(Constant *C, Constant *V1, Constant *V2) { return getSelectTy(V1->getType(), C, V1, V2); } - /// getAlignOf constant expr - computes the alignment of a type in a target - /// independent way (Note: the return type is an i32; Note: assumes that i8 - /// is byte aligned). - /// - static Constant *getAlignOf(const Type *Ty); - - /// getSizeOf constant expr - computes the size of a type in a target - /// independent way (Note: the return type is an i64). - /// - static Constant *getSizeOf(const Type *Ty); - - /// ConstantExpr::get - Return a binary or shift operator constant expression, + /// get - Return a binary or shift operator constant expression, /// folding if possible. /// - static Constant *get(unsigned Opcode, Constant *C1, Constant *C2); + static Constant *get(unsigned Opcode, Constant *C1, Constant *C2, + unsigned Flags = 0); - /// @brief Return an ICmp, FCmp, VICmp, or VFCmp comparison operator constant - /// expression. + /// @brief Return an ICmp or FCmp comparison operator constant expression. static Constant *getCompare(unsigned short pred, Constant *C1, Constant *C2); - /// ConstantExpr::get* - Return some common constants without having to + /// get* - Return some common constants without having to /// specify the full Instruction::OPCODE identifier. /// - static Constant *getNeg(Constant *C); - static Constant *getFNeg(Constant *C); - static Constant *getNot(Constant *C); - static Constant *getAdd(Constant *C1, Constant *C2); - static Constant *getFAdd(Constant *C1, Constant *C2); - static Constant *getSub(Constant *C1, Constant *C2); - static Constant *getFSub(Constant *C1, Constant *C2); - static Constant *getMul(Constant *C1, Constant *C2); - static Constant *getFMul(Constant *C1, Constant *C2); - static Constant *getUDiv(Constant *C1, Constant *C2); - static Constant *getSDiv(Constant *C1, Constant *C2); - static Constant *getFDiv(Constant *C1, Constant *C2); - static Constant *getURem(Constant *C1, Constant *C2); // unsigned rem - static Constant *getSRem(Constant *C1, Constant *C2); // signed rem - static Constant *getFRem(Constant *C1, Constant *C2); - static Constant *getAnd(Constant *C1, Constant *C2); - static Constant *getOr(Constant *C1, Constant *C2); - static Constant *getXor(Constant *C1, Constant *C2); static Constant *getICmp(unsigned short pred, Constant *LHS, Constant *RHS); static Constant *getFCmp(unsigned short pred, Constant *LHS, Constant *RHS); - static Constant *getVICmp(unsigned short pred, Constant *LHS, Constant *RHS); - static Constant *getVFCmp(unsigned short pred, Constant *LHS, Constant *RHS); - static Constant *getShl(Constant *C1, Constant *C2); - static Constant *getLShr(Constant *C1, Constant *C2); - static Constant *getAShr(Constant *C1, Constant *C2); /// Getelementptr form. std::vector is only accepted for convenience: /// all elements must be Constant's. /// static Constant *getGetElementPtr(Constant *C, - Constant* const *IdxList, unsigned NumIdx); + Constant *const *IdxList, unsigned NumIdx); static Constant *getGetElementPtr(Constant *C, Value* const *IdxList, unsigned NumIdx); - + + /// Create an "inbounds" getelementptr. See the documentation for the + /// "inbounds" flag in LangRef.html for details. + static Constant *getInBoundsGetElementPtr(Constant *C, + Constant *const *IdxList, + unsigned NumIdx); + static Constant *getInBoundsGetElementPtr(Constant *C, + Value* const *IdxList, + unsigned NumIdx); + static Constant *getExtractElement(Constant *Vec, Constant *Idx); static Constant *getInsertElement(Constant *Vec, Constant *Elt,Constant *Idx); static Constant *getShuffleVector(Constant *V1, Constant *V2, Constant *Mask); @@ -759,11 +766,6 @@ public: static Constant *getInsertValue(Constant *Agg, Constant *Val, const unsigned *IdxList, unsigned NumIdx); - /// Floating point negation must be implemented with f(x) = -0.0 - x. This - /// method returns the negative zero constant for floating point or vector - /// floating point types; for all other types, it returns the null value. - static Constant *getZeroValueForNegationExpr(const Type *Ty); - /// isNullValue - Return true if this is the value that would be returned by /// getNullValue. virtual bool isNullValue() const { return false; } @@ -792,7 +794,7 @@ public: Constant *getWithOperands(const std::vector &Ops) const { return getWithOperands(&Ops[0], (unsigned)Ops.size()); } - Constant *getWithOperands(Constant* const *Ops, unsigned NumOps) const; + Constant *getWithOperands(Constant *const *Ops, unsigned NumOps) const; virtual void destroyConstant(); virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U); @@ -805,7 +807,7 @@ public: }; template <> -struct OperandTraits : VariadicOperandTraits<1> { +struct OperandTraits : public VariadicOperandTraits<1> { }; DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantExpr, Constant) @@ -845,62 +847,6 @@ public: return V->getValueID() == UndefValueVal; } }; - -//===----------------------------------------------------------------------===// -/// MDString - a single uniqued string. -/// These are used to efficiently contain a byte sequence for metadata. -/// -class MDString : public Constant { - MDString(const MDString &); // DO NOT IMPLEMENT - void *operator new(size_t, unsigned); // DO NOT IMPLEMENT - MDString(const char *begin, const char *end); - - const char *StrBegin, *StrEnd; -protected: - // allocate space for exactly zero operands - void *operator new(size_t s) { - return User::operator new(s, 0); - } -public: - /// get() - Static factory methods - Return objects of the specified value. - /// - static MDString *get(const char *StrBegin, const char *StrEnd); - static MDString *get(const std::string &Str); - - /// size() - The length of this string. - /// - intptr_t size() const { return StrEnd - StrBegin; } - - /// begin() - Pointer to the first byte of the string. - /// - const char *begin() const { return StrBegin; } - - /// end() - Pointer to one byte past the end of the string. - /// - const char *end() const { return StrEnd; } - - /// getType() specialization - Type is always MetadataTy. - /// - inline const Type *getType() const { - return Type::MetadataTy; - } - - /// isNullValue - Return true if this is the value that would be returned by - /// getNullValue. This always returns false because getNullValue will never - /// produce metadata. - virtual bool isNullValue() const { - return false; - } - - virtual void destroyConstant(); - - /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const MDString *) { return true; } - static bool classof(const Value *V) { - return V->getValueID() == MDStringVal; - } -}; - } // End llvm namespace #endif diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h index 053091b86fe16..fb51430b481e1 100644 --- a/include/llvm/DerivedTypes.h +++ b/include/llvm/DerivedTypes.h @@ -31,12 +31,13 @@ class PointerValType; class VectorValType; class IntegerValType; class APInt; +class LLVMContext; class DerivedType : public Type { friend class Type; protected: - explicit DerivedType(TypeID id) : Type(id) {} + explicit DerivedType(LLVMContext &C, TypeID id) : Type(C, id) {} /// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type /// that the current type has transitioned from being abstract to being @@ -82,8 +83,11 @@ public: /// Int64Ty. /// @brief Integer representation type class IntegerType : public DerivedType { + friend class LLVMContextImpl; + protected: - explicit IntegerType(unsigned NumBits) : DerivedType(IntegerTyID) { + explicit IntegerType(LLVMContext &C, unsigned NumBits) : + DerivedType(C, IntegerTyID) { setSubclassData(NumBits); } friend class TypeMap; @@ -101,7 +105,7 @@ public: /// that instance will be returned. Otherwise a new one will be created. Only /// one instance with a given NumBits value is ever created. /// @brief Get or create an IntegerType instance. - static const IntegerType* get(unsigned NumBits); + static const IntegerType* get(LLVMContext &C, unsigned NumBits); /// @brief Get the number of bits in this IntegerType unsigned getBitWidth() const { return getSubclassData(); } @@ -207,7 +211,8 @@ public: /// and VectorType class CompositeType : public DerivedType { protected: - inline explicit CompositeType(TypeID id) : DerivedType(id) { } + inline explicit CompositeType(LLVMContext &C, TypeID id) : + DerivedType(C, id) { } public: /// getTypeAtIndex - Given an index value into the type, return the type of @@ -235,25 +240,28 @@ class StructType : public CompositeType { friend class TypeMap; StructType(const StructType &); // Do not implement const StructType &operator=(const StructType &); // Do not implement - StructType(const std::vector &Types, bool isPacked); + StructType(LLVMContext &C, + const std::vector &Types, bool isPacked); public: /// StructType::get - This static method is the primary way to create a /// StructType. /// - static StructType *get(const std::vector &Params, + static StructType *get(LLVMContext &Context, + const std::vector &Params, bool isPacked=false); /// StructType::get - Create an empty structure type. /// - static StructType *get(bool isPacked=false) { - return get(std::vector(), isPacked); + static StructType *get(LLVMContext &Context, bool isPacked=false) { + return get(Context, std::vector(), isPacked); } /// StructType::get - This static method is a convenience method for /// creating structure types by specifying the elements as arguments. /// Note that this method always returns a non-packed struct. To get /// an empty struct, pass NULL, NULL. - static StructType *get(const Type *type, ...) END_WITH_NULL; + static StructType *get(LLVMContext &Context, + const Type *type, ...) END_WITH_NULL; /// isValidElementType - Return true if the specified type is valid as a /// element type. @@ -310,7 +318,7 @@ class SequentialType : public CompositeType { SequentialType* this_() { return this; } protected: SequentialType(TypeID TID, const Type *ElType) - : CompositeType(TID), ContainedType(ElType, this_()) { + : CompositeType(ElType->getContext(), TID), ContainedType(ElType, this_()) { ContainedTys = &ContainedType; NumContainedTys = 1; } @@ -396,7 +404,7 @@ public: /// static VectorType *getInteger(const VectorType *VTy) { unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - const Type *EltTy = IntegerType::get(EltBits); + const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits); return VectorType::get(EltTy, VTy->getNumElements()); } @@ -406,7 +414,7 @@ public: /// static VectorType *getExtendedElementVectorType(const VectorType *VTy) { unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); - const Type *EltTy = IntegerType::get(EltBits * 2); + const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2); return VectorType::get(EltTy, VTy->getNumElements()); } @@ -418,7 +426,7 @@ public: unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); assert((EltBits & 1) == 0 && "Cannot truncate vector element with odd bit-width"); - const Type *EltTy = IntegerType::get(EltBits / 2); + const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2); return VectorType::get(EltTy, VTy->getNumElements()); } @@ -431,7 +439,7 @@ public: /// @brief Return the number of bits in the Vector type. inline unsigned getBitWidth() const { - return NumElements *getElementType()->getPrimitiveSizeInBits(); + return NumElements * getElementType()->getPrimitiveSizeInBits(); } // Implement the AbstractTypeUser interface. @@ -490,12 +498,12 @@ public: class OpaqueType : public DerivedType { OpaqueType(const OpaqueType &); // DO NOT IMPLEMENT const OpaqueType &operator=(const OpaqueType &); // DO NOT IMPLEMENT - OpaqueType(); + OpaqueType(LLVMContext &C); public: /// OpaqueType::get - Static factory method for the OpaqueType class... /// - static OpaqueType *get() { - return new OpaqueType(); // All opaque types are distinct + static OpaqueType *get(LLVMContext &C) { + return new OpaqueType(C); // All opaque types are distinct } // Implement support for type inquiry through isa, cast, and dyn_cast: diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index 613adb574e3ca..b9da0fcfce193 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -19,6 +19,7 @@ #include #include #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/System/Mutex.h" #include "llvm/Target/TargetMachine.h" @@ -26,6 +27,7 @@ namespace llvm { struct GenericValue; class Constant; +class ExecutionEngine; class Function; class GlobalVariable; class GlobalValue; @@ -39,38 +41,66 @@ class TargetData; class Type; class ExecutionEngineState { +public: + class MapUpdatingCVH : public CallbackVH { + ExecutionEngineState &EES; + + public: + MapUpdatingCVH(ExecutionEngineState &EES, const GlobalValue *GV); + + operator const GlobalValue*() const { + return cast(getValPtr()); + } + + virtual void deleted(); + virtual void allUsesReplacedWith(Value *new_value); + }; + private: + ExecutionEngine &EE; + /// GlobalAddressMap - A mapping between LLVM global values and their /// actualized version... - std::map GlobalAddressMap; + std::map GlobalAddressMap; /// GlobalAddressReverseMap - This is the reverse mapping of GlobalAddressMap, /// used to convert raw addresses into the LLVM global value that is emitted /// at the address. This map is not computed unless getGlobalValueAtAddress /// is called at some point. - std::map GlobalAddressReverseMap; + std::map > GlobalAddressReverseMap; public: - std::map & + ExecutionEngineState(ExecutionEngine &EE) : EE(EE) {} + + MapUpdatingCVH getVH(const GlobalValue *GV) { + return MapUpdatingCVH(*this, GV); + } + + std::map & getGlobalAddressMap(const MutexGuard &) { return GlobalAddressMap; } - std::map & + std::map > & getGlobalAddressReverseMap(const MutexGuard &) { return GlobalAddressReverseMap; } + + // Returns the address ToUnmap was mapped to. + void *RemoveMapping(const MutexGuard &, const GlobalValue *ToUnmap); }; class ExecutionEngine { const TargetData *TD; - ExecutionEngineState state; + ExecutionEngineState EEState; bool LazyCompilationDisabled; bool GVCompilationDisabled; bool SymbolSearchingDisabled; bool DlsymStubsEnabled; + friend class EngineBuilder; // To allow access to JITCtor and InterpCtor. + protected: /// Modules - This is a list of ModuleProvider's that we are JIT'ing from. We /// use a smallvector to optimize for the case where there is only one module. @@ -86,9 +116,13 @@ protected: // To avoid having libexecutionengine depend on the JIT and interpreter // libraries, the JIT and Interpreter set these functions to ctor pointers // at startup time if they are linked in. - typedef ExecutionEngine *(*EECtorFn)(ModuleProvider*, std::string*, - CodeGenOpt::Level OptLevel); - static EECtorFn JITCtor, InterpCtor; + static ExecutionEngine *(*JITCtor)(ModuleProvider *MP, + std::string *ErrorStr, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel, + bool GVsWithCode); + static ExecutionEngine *(*InterpCtor)(ModuleProvider *MP, + std::string *ErrorStr); /// LazyFunctionCreator - If an unknown function is needed, this function /// pointer is invoked to create it. If this returns null, the JIT will abort. @@ -118,8 +152,18 @@ public: bool ForceInterpreter = false, std::string *ErrorStr = 0, CodeGenOpt::Level OptLevel = - CodeGenOpt::Default); - + CodeGenOpt::Default, + // Allocating globals with code breaks + // freeMachineCodeForFunction and is probably + // unsafe and bad for performance. However, + // we have clients who depend on this + // behavior, so we must support it. + // Eventually, when we're willing to break + // some backwards compatability, this flag + // should be flipped to false, so that by + // default freeMachineCodeForFunction works. + bool GVsWithCode = true); + /// create - This is the factory method for creating an execution engine which /// is appropriate for the current machine. This takes ownership of the /// module. @@ -128,11 +172,15 @@ public: /// createJIT - This is the factory method for creating a JIT for the current /// machine, it does not fall back to the interpreter. This takes ownership /// of the ModuleProvider and JITMemoryManager if successful. + /// + /// Clients should make sure to initialize targets prior to calling this + /// function. static ExecutionEngine *createJIT(ModuleProvider *MP, std::string *ErrorStr = 0, JITMemoryManager *JMM = 0, CodeGenOpt::Level OptLevel = - CodeGenOpt::Default); + CodeGenOpt::Default, + bool GVsWithCode = true); /// addModuleProvider - Add a ModuleProvider to the list of modules that we /// can JIT from. Note that this takes ownership of the ModuleProvider: when @@ -189,8 +237,8 @@ public: /// at the specified location. This is used internally as functions are JIT'd /// and as global variables are laid out in memory. It can and should also be /// used by clients of the EE that want to have an LLVM global overlay - /// existing data in memory. After adding a mapping for GV, you must not - /// destroy it until you've removed the mapping. + /// existing data in memory. Mappings are automatically removed when their + /// GlobalValue is destroyed. void addGlobalMapping(const GlobalValue *GV, void *Addr); /// clearAllGlobalMappings - Clear all global mappings and start over again @@ -214,29 +262,23 @@ public: void *getPointerToGlobalIfAvailable(const GlobalValue *GV); /// getPointerToGlobal - This returns the address of the specified global - /// value. This may involve code generation if it's a function. After - /// getting a pointer to GV, it and all globals it transitively refers to have - /// been passed to addGlobalMapping. You must clear the mapping for each - /// referred-to global before destroying it. If a referred-to global RTG is a - /// function and this ExecutionEngine is a JIT compiler, calling - /// updateGlobalMapping(RTG, 0) will leak the function's machine code, so you - /// should call freeMachineCodeForFunction(RTG) instead. Note that - /// optimizations can move and delete non-external GlobalValues without - /// notifying the ExecutionEngine. + /// value. This may involve code generation if it's a function. /// void *getPointerToGlobal(const GlobalValue *GV); /// getPointerToFunction - The different EE's represent function bodies in /// different ways. They should each implement this to say what a function - /// pointer should look like. See getPointerToGlobal for the requirements on - /// destroying F and any GlobalValues it refers to. + /// pointer should look like. When F is destroyed, the ExecutionEngine will + /// remove its global mapping but will not yet free its machine code. Call + /// freeMachineCodeForFunction(F) explicitly to do that. Note that global + /// optimizations can destroy Functions without notifying the ExecutionEngine. /// virtual void *getPointerToFunction(Function *F) = 0; /// getPointerToFunctionOrStub - If the specified function has been /// code-gen'd, return a pointer to the function. If not, compile it, or use - /// a stub to implement lazy compilation if available. See getPointerToGlobal - /// for the requirements on destroying F and any GlobalValues it refers to. + /// a stub to implement lazy compilation if available. See + /// getPointerToFunction for the requirements on destroying F. /// virtual void *getPointerToFunctionOrStub(Function *F) { // Default implementation, just codegen the function. @@ -272,8 +314,7 @@ public: /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the - /// Emitter. See getPointerToGlobal for the requirements on destroying GV and - /// any GlobalValues it refers to. + /// Emitter. virtual void *getOrEmitGlobalVariable(const GlobalVariable *GV) { return getPointerToGlobal((GlobalValue*)GV); } @@ -282,8 +323,8 @@ public: /// the JIT. See JITEventListener.h for more details. Does not /// take ownership of the argument. The argument may be NULL, in /// which case these functions do nothing. - virtual void RegisterJITEventListener(JITEventListener *L) {} - virtual void UnregisterJITEventListener(JITEventListener *L) {} + virtual void RegisterJITEventListener(JITEventListener *) {} + virtual void UnregisterJITEventListener(JITEventListener *) {} /// DisableLazyCompilation - If called, the JIT will abort if lazy compilation /// is ever attempted. @@ -357,6 +398,102 @@ protected: const Type *Ty); }; +namespace EngineKind { + // These are actually bitmasks that get or-ed together. + enum Kind { + JIT = 0x1, + Interpreter = 0x2 + }; + const static Kind Either = (Kind)(JIT | Interpreter); +} + +/// EngineBuilder - Builder class for ExecutionEngines. Use this by +/// stack-allocating a builder, chaining the various set* methods, and +/// terminating it with a .create() call. +class EngineBuilder { + + private: + ModuleProvider *MP; + EngineKind::Kind WhichEngine; + std::string *ErrorStr; + CodeGenOpt::Level OptLevel; + JITMemoryManager *JMM; + bool AllocateGVsWithCode; + + /// InitEngine - Does the common initialization of default options. + /// + void InitEngine() { + WhichEngine = EngineKind::Either; + ErrorStr = NULL; + OptLevel = CodeGenOpt::Default; + JMM = NULL; + AllocateGVsWithCode = false; + } + + public: + /// EngineBuilder - Constructor for EngineBuilder. If create() is called and + /// is successful, the created engine takes ownership of the module + /// provider. + EngineBuilder(ModuleProvider *mp) : MP(mp) { + InitEngine(); + } + + /// EngineBuilder - Overloaded constructor that automatically creates an + /// ExistingModuleProvider for an existing module. + EngineBuilder(Module *m); + + /// setEngineKind - Controls whether the user wants the interpreter, the JIT, + /// or whichever engine works. This option defaults to EngineKind::Either. + EngineBuilder &setEngineKind(EngineKind::Kind w) { + WhichEngine = w; + return *this; + } + + /// setJITMemoryManager - Sets the memory manager to use. This allows + /// clients to customize their memory allocation policies. If create() is + /// called and is successful, the created engine takes ownership of the + /// memory manager. This option defaults to NULL. + EngineBuilder &setJITMemoryManager(JITMemoryManager *jmm) { + JMM = jmm; + return *this; + } + + /// setErrorStr - Set the error string to write to on error. This option + /// defaults to NULL. + EngineBuilder &setErrorStr(std::string *e) { + ErrorStr = e; + return *this; + } + + /// setOptLevel - Set the optimization level for the JIT. This option + /// defaults to CodeGenOpt::Default. + EngineBuilder &setOptLevel(CodeGenOpt::Level l) { + OptLevel = l; + return *this; + } + + /// setAllocateGVsWithCode - Sets whether global values should be allocated + /// into the same buffer as code. For most applications this should be set + /// to false. Allocating globals with code breaks freeMachineCodeForFunction + /// and is probably unsafe and bad for performance. However, we have clients + /// who depend on this behavior, so we must support it. This option defaults + /// to false so that users of the new API can safely use the new memory + /// manager and free machine code. + EngineBuilder &setAllocateGVsWithCode(bool a) { + AllocateGVsWithCode = a; + return *this; + } + + ExecutionEngine *create(); + +}; + +inline bool operator<(const ExecutionEngineState::MapUpdatingCVH& lhs, + const ExecutionEngineState::MapUpdatingCVH& rhs) { + return static_cast(lhs) < + static_cast(rhs); +} + } // End llvm namespace #endif diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h index dd76f26c87769..8d3a1d77f04ef 100644 --- a/include/llvm/ExecutionEngine/JITEventListener.h +++ b/include/llvm/ExecutionEngine/JITEventListener.h @@ -16,13 +16,28 @@ #define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H #include "llvm/Support/DataTypes.h" +#include "llvm/Support/DebugLoc.h" + +#include namespace llvm { class Function; +class MachineFunction; /// Empty for now, but this object will contain all details about the /// generated machine code that a Listener might care about. struct JITEvent_EmittedFunctionDetails { + const MachineFunction *MF; + + struct LineStart { + // The address at which the current line changes. + uintptr_t Address; + // The new location information. These can be translated to + // DebugLocTuples using MF->getDebugLocTuple(). + DebugLoc Loc; + }; + // This holds line boundary information sorted by address. + std::vector LineStarts; }; /// JITEventListener - This interface is used by the JIT to notify clients about @@ -52,7 +67,9 @@ public: virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr) {} }; +// These return NULL if support isn't available. JITEventListener *createMacOSJITEventListener(); +JITEventListener *createOProfileJITEventListener(); } // end namespace llvm. diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h index 688a1626d2c3f..21dee553474c9 100644 --- a/include/llvm/ExecutionEngine/JITMemoryManager.h +++ b/include/llvm/ExecutionEngine/JITMemoryManager.h @@ -15,9 +15,12 @@ #define LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H #include "llvm/Support/DataTypes.h" +#include namespace llvm { + class Function; + class GlobalValue; /// JITMemoryManager - This interface is used by the JIT to allocate and manage /// memory for the code generated by the JIT. This can be reimplemented by @@ -28,6 +31,7 @@ protected: bool HasGOT; bool SizeRequired; public: + JITMemoryManager() : HasGOT(false), SizeRequired(false) {} virtual ~JITMemoryManager(); @@ -37,11 +41,16 @@ public: /// setMemoryWritable - When code generation is in progress, /// the code pages may need permissions changed. - virtual void setMemoryWritable(void) = 0; + virtual void setMemoryWritable() = 0; /// setMemoryExecutable - When code generation is done and we're ready to /// start execution, the code pages may need permissions changed. - virtual void setMemoryExecutable(void) = 0; + virtual void setMemoryExecutable() = 0; + + /// setPoisonMemory - Setting this flag to true makes the memory manager + /// garbage values over freed memory. This is useful for testing and + /// debugging, and is be turned on by default in debug mode. + virtual void setPoisonMemory(bool poison) = 0; //===--------------------------------------------------------------------===// // Global Offset Table Management @@ -82,16 +91,19 @@ public: //===--------------------------------------------------------------------===// // Main Allocation Functions //===--------------------------------------------------------------------===// - - /// startFunctionBody - When we start JITing a function, the JIT calls this + + /// startFunctionBody - When we start JITing a function, the JIT calls this /// method to allocate a block of free RWX memory, which returns a pointer to - /// it. The JIT doesn't know ahead of time how much space it will need to - /// emit the function, so it doesn't pass in the size. Instead, this method - /// is required to pass back a "valid size". The JIT will be careful to not - /// write more than the returned ActualSize bytes of memory. - virtual uint8_t *startFunctionBody(const Function *F, + /// it. If the JIT wants to request a block of memory of at least a certain + /// size, it passes that value as ActualSize, and this method returns a block + /// with at least that much space. If the JIT doesn't know ahead of time how + /// much space it will need to emit the function, it passes 0 for the + /// ActualSize. In either case, this method is required to pass back the size + /// of the allocated block through ActualSize. The JIT will be careful to + /// not write more than the returned ActualSize bytes of memory. + virtual uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) = 0; - + /// allocateStub - This method is called by the JIT to allocate space for a /// function stub (used to handle limited branch displacements) while it is /// JIT compiling a function. For example, if foo calls bar, and if bar @@ -112,9 +124,14 @@ public: virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart, uint8_t *FunctionEnd) = 0; - /// allocateSpace - Allocate a memory block of the given size. + /// allocateSpace - Allocate a memory block of the given size. This method + /// cannot be called between calls to startFunctionBody and endFunctionBody. virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0; - + + /// allocateGlobal - Allocate memory for a global. + /// + virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0; + /// deallocateMemForFunction - Free JIT memory for the specified function. /// This is never called when the JIT is currently emitting a function. virtual void deallocateMemForFunction(const Function *F) = 0; @@ -128,6 +145,49 @@ public: /// the exception table. virtual void endExceptionTable(const Function *F, uint8_t *TableStart, uint8_t *TableEnd, uint8_t* FrameRegister) = 0; + + /// CheckInvariants - For testing only. Return true if all internal + /// invariants are preserved, or return false and set ErrorStr to a helpful + /// error message. + virtual bool CheckInvariants(std::string &ErrorStr) { + return true; + } + + /// GetDefaultCodeSlabSize - For testing only. Returns DefaultCodeSlabSize + /// from DefaultJITMemoryManager. + virtual size_t GetDefaultCodeSlabSize() { + return 0; + } + + /// GetDefaultDataSlabSize - For testing only. Returns DefaultCodeSlabSize + /// from DefaultJITMemoryManager. + virtual size_t GetDefaultDataSlabSize() { + return 0; + } + + /// GetDefaultStubSlabSize - For testing only. Returns DefaultCodeSlabSize + /// from DefaultJITMemoryManager. + virtual size_t GetDefaultStubSlabSize() { + return 0; + } + + /// GetNumCodeSlabs - For testing only. Returns the number of MemoryBlocks + /// allocated for code. + virtual unsigned GetNumCodeSlabs() { + return 0; + } + + /// GetNumDataSlabs - For testing only. Returns the number of MemoryBlocks + /// allocated for data. + virtual unsigned GetNumDataSlabs() { + return 0; + } + + /// GetNumStubSlabs - For testing only. Returns the number of MemoryBlocks + /// allocated for function stubs. + virtual unsigned GetNumStubSlabs() { + return 0; + } }; } // end namespace llvm. diff --git a/include/llvm/Function.h b/include/llvm/Function.h index 34ced973dd558..088c99952e9fb 100644 --- a/include/llvm/Function.h +++ b/include/llvm/Function.h @@ -19,9 +19,9 @@ #define LLVM_FUNCTION_H #include "llvm/GlobalValue.h" +#include "llvm/CallingConv.h" #include "llvm/BasicBlock.h" #include "llvm/Argument.h" -#include "llvm/Support/Annotation.h" #include "llvm/Attributes.h" namespace llvm { @@ -46,7 +46,7 @@ template<> struct ilist_traits static ValueSymbolTable *getSymTab(Function *ItemParent); private: - mutable ilist_node Sentinel; + mutable ilist_half_node Sentinel; }; template<> struct ilist_traits @@ -63,10 +63,10 @@ template<> struct ilist_traits static ValueSymbolTable *getSymTab(Function *ItemParent); private: - mutable ilist_node Sentinel; + mutable ilist_half_node Sentinel; }; -class Function : public GlobalValue, public Annotable, +class Function : public GlobalValue, public ilist_node { public: typedef iplist ArgumentListType; @@ -87,7 +87,7 @@ private: AttrListPtr AttributeList; ///< Parameter attributes // The Calling Convention is stored in Value::SubclassData. - /*unsigned CallingConvention;*/ + /*CallingConv::ID CallingConvention;*/ friend class SymbolTableListTraits; @@ -114,11 +114,11 @@ private: /// the module. /// Function(const FunctionType *Ty, LinkageTypes Linkage, - const std::string &N = "", Module *M = 0); + const Twine &N = "", Module *M = 0); public: static Function *Create(const FunctionType *Ty, LinkageTypes Linkage, - const std::string &N = "", Module *M = 0) { + const Twine &N = "", Module *M = 0) { return new(0) Function(Ty, Linkage, N, M); } @@ -129,7 +129,7 @@ public: /// getContext - Return a pointer to the LLVMContext associated with this /// function, or NULL if this function is not bound to a context yet. - LLVMContext* getContext(); + LLVMContext &getContext() const; /// isVarArg - Return true if this function takes a variable number of /// arguments. @@ -151,12 +151,14 @@ public: unsigned getIntrinsicID() const; bool isIntrinsic() const { return getIntrinsicID() != 0; } - /// getCallingConv()/setCallingConv(uint) - These method get and set the + /// getCallingConv()/setCallingConv(CC) - These method get and set the /// calling convention of this function. The enum values for the known /// calling conventions are defined in CallingConv.h. - unsigned getCallingConv() const { return SubclassData >> 1; } - void setCallingConv(unsigned CC) { - SubclassData = (SubclassData & 1) | (CC << 1); + CallingConv::ID getCallingConv() const { + return static_cast(SubclassData >> 1); + } + void setCallingConv(CallingConv::ID CC) { + SubclassData = (SubclassData & 1) | (static_cast(CC) << 1); } /// getAttributes - Return the attribute list for this Function. diff --git a/include/llvm/GlobalAlias.h b/include/llvm/GlobalAlias.h index b106116f37506..9b3f4505697d4 100644 --- a/include/llvm/GlobalAlias.h +++ b/include/llvm/GlobalAlias.h @@ -40,7 +40,7 @@ public: } /// GlobalAlias ctor - If a parent module is specified, the alias is /// automatically inserted into the end of the specified module's alias list. - GlobalAlias(const Type *Ty, LinkageTypes Linkage, const std::string &Name = "", + GlobalAlias(const Type *Ty, LinkageTypes Linkage, const Twine &Name = "", Constant* Aliasee = 0, Module *Parent = 0); /// Provide fast operand accessors @@ -88,7 +88,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<1> { +struct OperandTraits : public FixedNumOperandTraits<1> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Value) diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h index 3b7f67d5d0366..7b0de34d9cb6f 100644 --- a/include/llvm/GlobalValue.h +++ b/include/llvm/GlobalValue.h @@ -37,13 +37,14 @@ public: WeakAnyLinkage, ///< Keep one copy of named function when linking (weak) WeakODRLinkage, ///< Same, but only replaced by something equivalent. AppendingLinkage, ///< Special purpose, only applies to global arrays - InternalLinkage, ///< Rename collisions when linking (static functions) - PrivateLinkage, ///< Like Internal, but omit from symbol table + InternalLinkage, ///< Rename collisions when linking (static functions). + PrivateLinkage, ///< Like Internal, but omit from symbol table. + LinkerPrivateLinkage, ///< Like Private, but linker removes. DLLImportLinkage, ///< Function to be imported from DLL - DLLExportLinkage, ///< Function to be accessible from DLL - ExternalWeakLinkage,///< ExternalWeak linkage description - GhostLinkage, ///< Stand-in functions for streaming fns from BC files - CommonLinkage ///< Tentative definitions + DLLExportLinkage, ///< Function to be accessible from DLL. + ExternalWeakLinkage,///< ExternalWeak linkage description. + GhostLinkage, ///< Stand-in functions for streaming fns from BC files. + CommonLinkage ///< Tentative definitions. }; /// @brief An enumeration for the kinds of visibility of global values. @@ -55,10 +56,10 @@ public: protected: GlobalValue(const Type *ty, ValueTy vty, Use *Ops, unsigned NumOps, - LinkageTypes linkage, const std::string &name = "") + LinkageTypes linkage, const Twine &Name = "") : Constant(ty, vty, Ops, NumOps), Parent(0), Linkage(linkage), Visibility(DefaultVisibility), Alignment(0) { - if (!name.empty()) setName(name); + setName(Name); } Module *Parent; @@ -80,6 +81,7 @@ public: } VisibilityTypes getVisibility() const { return VisibilityTypes(Visibility); } + bool hasDefaultVisibility() const { return Visibility == DefaultVisibility; } bool hasHiddenVisibility() const { return Visibility == HiddenVisibility; } bool hasProtectedVisibility() const { return Visibility == ProtectedVisibility; @@ -88,7 +90,7 @@ public: bool hasSection() const { return !Section.empty(); } const std::string &getSection() const { return Section; } - void setSection(const std::string &S) { Section = S; } + void setSection(const StringRef &S) { Section = S; } /// If the usage is empty (except transitively dead constants), then this /// global value can can be safely deleted since the destructor will @@ -122,8 +124,10 @@ public: bool hasAppendingLinkage() const { return Linkage == AppendingLinkage; } bool hasInternalLinkage() const { return Linkage == InternalLinkage; } bool hasPrivateLinkage() const { return Linkage == PrivateLinkage; } + bool hasLinkerPrivateLinkage() const { return Linkage==LinkerPrivateLinkage; } bool hasLocalLinkage() const { - return Linkage == InternalLinkage || Linkage == PrivateLinkage; + return hasInternalLinkage() || hasPrivateLinkage() || + hasLinkerPrivateLinkage(); } bool hasDLLImportLinkage() const { return Linkage == DLLImportLinkage; } bool hasDLLExportLinkage() const { return Linkage == DLLExportLinkage; } diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h index ae64ccf614be3..56b2b9d3ebac9 100644 --- a/include/llvm/GlobalVariable.h +++ b/include/llvm/GlobalVariable.h @@ -28,6 +28,7 @@ namespace llvm { class Module; class Constant; +class LLVMContext; template class SymbolTableListTraits; @@ -49,15 +50,16 @@ public: } /// GlobalVariable ctor - If a parent module is specified, the global is /// automatically inserted into the end of the specified modules global list. - GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes Linkage, - Constant *Initializer = 0, const std::string &Name = "", - Module *Parent = 0, bool ThreadLocal = false, - unsigned AddressSpace = 0); + GlobalVariable(LLVMContext &Context, const Type *Ty, bool isConstant, + LinkageTypes Linkage, + Constant *Initializer = 0, const Twine &Name = "", + bool ThreadLocal = false, unsigned AddressSpace = 0); /// GlobalVariable ctor - This creates a global and inserts it before the /// specified other global. - GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes Linkage, - Constant *Initializer, const std::string &Name, - GlobalVariable *InsertBefore, bool ThreadLocal = false, + GlobalVariable(Module &M, const Type *Ty, bool isConstant, + LinkageTypes Linkage, Constant *Initializer, + const Twine &Name, + GlobalVariable *InsertBefore = 0, bool ThreadLocal = false, unsigned AddressSpace = 0); ~GlobalVariable() { @@ -149,7 +151,7 @@ public: }; template <> -struct OperandTraits : OptionalOperandTraits<> { +struct OperandTraits : public OptionalOperandTraits<> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value) diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h index 84292cf19e3dd..bc55031b0d42e 100644 --- a/include/llvm/InlineAsm.h +++ b/include/llvm/InlineAsm.h @@ -31,18 +31,22 @@ class InlineAsm : public Value { std::string AsmString, Constraints; bool HasSideEffects; + bool IsMsAsm; - InlineAsm(const FunctionType *Ty, const std::string &AsmString, - const std::string &Constraints, bool hasSideEffects); + InlineAsm(const FunctionType *Ty, const StringRef &AsmString, + const StringRef &Constraints, bool hasSideEffects, + bool isMsAsm = false); virtual ~InlineAsm(); public: /// InlineAsm::get - Return the the specified uniqued inline asm string. /// - static InlineAsm *get(const FunctionType *Ty, const std::string &AsmString, - const std::string &Constraints, bool hasSideEffects); + static InlineAsm *get(const FunctionType *Ty, const StringRef &AsmString, + const StringRef &Constraints, bool hasSideEffects, + bool isMsAsm = false); bool hasSideEffects() const { return HasSideEffects; } + bool isMsAsm() const { return IsMsAsm; } /// getType - InlineAsm's are always pointers. /// @@ -61,7 +65,7 @@ public: /// the specified constraint string is legal for the type. This returns true /// if legal, false if not. /// - static bool Verify(const FunctionType *Ty, const std::string &Constraints); + static bool Verify(const FunctionType *Ty, const StringRef &Constraints); // Constraint String Parsing enum ConstraintPrefix { @@ -106,7 +110,7 @@ public: /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the /// fields in this structure. If the constraint string is not understood, /// return true, otherwise return false. - bool Parse(const std::string &Str, + bool Parse(const StringRef &Str, std::vector &ConstraintsSoFar); }; @@ -114,7 +118,7 @@ public: /// constraints and their prefixes. If this returns an empty vector, and if /// the constraint string itself isn't empty, there was an error parsing. static std::vector - ParseConstraints(const std::string &ConstraintString); + ParseConstraints(const StringRef &ConstraintString); /// ParseConstraints - Parse the constraints of this inlineasm object, /// returning them the same way that ParseConstraints(str) does. diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h index 1eab983da68ad..cc923dec2987e 100644 --- a/include/llvm/InstrTypes.h +++ b/include/llvm/InstrTypes.h @@ -18,10 +18,13 @@ #include "llvm/Instruction.h" #include "llvm/OperandTraits.h" +#include "llvm/Operator.h" #include "llvm/DerivedTypes.h" namespace llvm { +class LLVMContext; + //===----------------------------------------------------------------------===// // TerminatorInst Class //===----------------------------------------------------------------------===// @@ -50,7 +53,7 @@ protected: virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0; public: - virtual Instruction *clone() const = 0; + virtual TerminatorInst *clone() const = 0; /// getNumSuccessors - Return the number of successors that this terminator /// has. @@ -87,7 +90,6 @@ public: class UnaryInstruction : public Instruction { void *operator new(size_t, unsigned); // Do not implement - UnaryInstruction(const UnaryInstruction&); // Do not implement protected: UnaryInstruction(const Type *Ty, unsigned iType, Value *V, @@ -128,7 +130,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<1> { +struct OperandTraits : public FixedNumOperandTraits<1> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value) @@ -142,9 +144,9 @@ class BinaryOperator : public Instruction { protected: void init(BinaryOps iType); BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty, - const std::string &Name, Instruction *InsertBefore); + const Twine &Name, Instruction *InsertBefore); BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty, - const std::string &Name, BasicBlock *InsertAtEnd); + const Twine &Name, BasicBlock *InsertAtEnd); public: // allocate space for exactly two operands void *operator new(size_t s) { @@ -160,7 +162,7 @@ public: /// Instruction is allowed to be a dereferenced end iterator. /// static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2, - const std::string &Name = "", + const Twine &Name = "", Instruction *InsertBefore = 0); /// Create() - Construct a binary instruction, given the opcode and the two @@ -168,49 +170,111 @@ public: /// BasicBlock specified. /// static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2, - const std::string &Name, - BasicBlock *InsertAtEnd); + const Twine &Name, BasicBlock *InsertAtEnd); /// Create* - These methods just forward to Create, and are useful when you /// statically know what type of instruction you're going to create. These /// helpers just save some typing. #define HANDLE_BINARY_INST(N, OPC, CLASS) \ static BinaryOperator *Create##OPC(Value *V1, Value *V2, \ - const std::string &Name = "") {\ + const Twine &Name = "") {\ return Create(Instruction::OPC, V1, V2, Name);\ } #include "llvm/Instruction.def" #define HANDLE_BINARY_INST(N, OPC, CLASS) \ static BinaryOperator *Create##OPC(Value *V1, Value *V2, \ - const std::string &Name, BasicBlock *BB) {\ + const Twine &Name, BasicBlock *BB) {\ return Create(Instruction::OPC, V1, V2, Name, BB);\ } #include "llvm/Instruction.def" #define HANDLE_BINARY_INST(N, OPC, CLASS) \ static BinaryOperator *Create##OPC(Value *V1, Value *V2, \ - const std::string &Name, Instruction *I) {\ + const Twine &Name, Instruction *I) {\ return Create(Instruction::OPC, V1, V2, Name, I);\ } #include "llvm/Instruction.def" + /// CreateNSWAdd - Create an Add operator with the NSW flag set. + /// + static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2, + const Twine &Name = "") { + BinaryOperator *BO = CreateAdd(V1, V2, Name); + BO->setHasNoSignedWrap(true); + return BO; + } + static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2, + const Twine &Name, BasicBlock *BB) { + BinaryOperator *BO = CreateAdd(V1, V2, Name, BB); + BO->setHasNoSignedWrap(true); + return BO; + } + static BinaryOperator *CreateNSWAdd(Value *V1, Value *V2, + const Twine &Name, Instruction *I) { + BinaryOperator *BO = CreateAdd(V1, V2, Name, I); + BO->setHasNoSignedWrap(true); + return BO; + } + + /// CreateNSWSub - Create an Sub operator with the NSW flag set. + /// + static BinaryOperator *CreateNSWSub(Value *V1, Value *V2, + const Twine &Name = "") { + BinaryOperator *BO = CreateSub(V1, V2, Name); + BO->setHasNoSignedWrap(true); + return BO; + } + static BinaryOperator *CreateNSWSub(Value *V1, Value *V2, + const Twine &Name, BasicBlock *BB) { + BinaryOperator *BO = CreateSub(V1, V2, Name, BB); + BO->setHasNoSignedWrap(true); + return BO; + } + static BinaryOperator *CreateNSWSub(Value *V1, Value *V2, + const Twine &Name, Instruction *I) { + BinaryOperator *BO = CreateSub(V1, V2, Name, I); + BO->setHasNoSignedWrap(true); + return BO; + } + + /// CreateExactSDiv - Create an SDiv operator with the exact flag set. + /// + static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2, + const Twine &Name = "") { + BinaryOperator *BO = CreateSDiv(V1, V2, Name); + BO->setIsExact(true); + return BO; + } + static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2, + const Twine &Name, BasicBlock *BB) { + BinaryOperator *BO = CreateSDiv(V1, V2, Name, BB); + BO->setIsExact(true); + return BO; + } + static BinaryOperator *CreateExactSDiv(Value *V1, Value *V2, + const Twine &Name, Instruction *I) { + BinaryOperator *BO = CreateSDiv(V1, V2, Name, I); + BO->setIsExact(true); + return BO; + } + /// Helper functions to construct and inspect unary operations (NEG and NOT) /// via binary operators SUB and XOR: /// /// CreateNeg, CreateNot - Create the NEG and NOT /// instructions out of SUB and XOR instructions. /// - static BinaryOperator *CreateNeg(Value *Op, const std::string &Name = "", + static BinaryOperator *CreateNeg(Value *Op, const Twine &Name = "", Instruction *InsertBefore = 0); - static BinaryOperator *CreateNeg(Value *Op, const std::string &Name, + static BinaryOperator *CreateNeg(Value *Op, const Twine &Name, BasicBlock *InsertAtEnd); - static BinaryOperator *CreateFNeg(Value *Op, const std::string &Name = "", + static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name = "", Instruction *InsertBefore = 0); - static BinaryOperator *CreateFNeg(Value *Op, const std::string &Name, + static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name, BasicBlock *InsertAtEnd); - static BinaryOperator *CreateNot(Value *Op, const std::string &Name = "", + static BinaryOperator *CreateNot(Value *Op, const Twine &Name = "", Instruction *InsertBefore = 0); - static BinaryOperator *CreateNot(Value *Op, const std::string &Name, + static BinaryOperator *CreateNot(Value *Op, const Twine &Name, BasicBlock *InsertAtEnd); /// isNeg, isFNeg, isNot - Check if the given Value is a @@ -244,6 +308,30 @@ public: /// bool swapOperands(); + /// setHasNoUnsignedWrap - Set or clear the nsw flag on this instruction, + /// which must be an operator which supports this flag. See LangRef.html + /// for the meaning of this flag. + void setHasNoUnsignedWrap(bool b = true); + + /// setHasNoSignedWrap - Set or clear the nsw flag on this instruction, + /// which must be an operator which supports this flag. See LangRef.html + /// for the meaning of this flag. + void setHasNoSignedWrap(bool b = true); + + /// setIsExact - Set or clear the exact flag on this instruction, + /// which must be an operator which supports this flag. See LangRef.html + /// for the meaning of this flag. + void setIsExact(bool b = true); + + /// hasNoUnsignedWrap - Determine whether the no unsigned wrap flag is set. + bool hasNoUnsignedWrap() const; + + /// hasNoSignedWrap - Determine whether the no signed wrap flag is set. + bool hasNoSignedWrap() const; + + /// isExact - Determine whether the exact flag is set. + bool isExact() const; + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const BinaryOperator *) { return true; } static inline bool classof(const Instruction *I) { @@ -255,7 +343,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<2> { +struct OperandTraits : public FixedNumOperandTraits<2> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value) @@ -271,22 +359,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value) /// if (isa(Instr)) { ... } /// @brief Base class of casting instructions. class CastInst : public UnaryInstruction { - /// @brief Copy constructor - CastInst(const CastInst &CI) - : UnaryInstruction(CI.getType(), CI.getOpcode(), CI.getOperand(0)) { - } - /// @brief Do not allow default construction - CastInst(); protected: /// @brief Constructor with insert-before-instruction semantics for subclasses CastInst(const Type *Ty, unsigned iType, Value *S, - const std::string &NameStr = "", Instruction *InsertBefore = 0) + const Twine &NameStr = "", Instruction *InsertBefore = 0) : UnaryInstruction(Ty, iType, S, InsertBefore) { setName(NameStr); } /// @brief Constructor with insert-at-end-of-block semantics for subclasses CastInst(const Type *Ty, unsigned iType, Value *S, - const std::string &NameStr, BasicBlock *InsertAtEnd) + const Twine &NameStr, BasicBlock *InsertAtEnd) : UnaryInstruction(Ty, iType, S, InsertAtEnd) { setName(NameStr); } @@ -301,7 +383,7 @@ public: Instruction::CastOps, ///< The opcode of the cast instruction Value *S, ///< The value to be casted (operand 0) const Type *Ty, ///< The type to which cast should be made - const std::string &Name = "", ///< Name for the instruction + const Twine &Name = "", ///< Name for the instruction Instruction *InsertBefore = 0 ///< Place to insert the instruction ); /// Provides a way to construct any of the CastInst subclasses using an @@ -314,7 +396,7 @@ public: Instruction::CastOps, ///< The opcode for the cast instruction Value *S, ///< The value to be casted (operand 0) const Type *Ty, ///< The type to which operand is casted - const std::string &Name, ///< The name for the instruction + const Twine &Name, ///< The name for the instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); @@ -322,7 +404,7 @@ public: static CastInst *CreateZExtOrBitCast( Value *S, ///< The value to be casted (operand 0) const Type *Ty, ///< The type to which cast should be made - const std::string &Name = "", ///< Name for the instruction + const Twine &Name = "", ///< Name for the instruction Instruction *InsertBefore = 0 ///< Place to insert the instruction ); @@ -330,7 +412,7 @@ public: static CastInst *CreateZExtOrBitCast( Value *S, ///< The value to be casted (operand 0) const Type *Ty, ///< The type to which operand is casted - const std::string &Name, ///< The name for the instruction + const Twine &Name, ///< The name for the instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); @@ -338,7 +420,7 @@ public: static CastInst *CreateSExtOrBitCast( Value *S, ///< The value to be casted (operand 0) const Type *Ty, ///< The type to which cast should be made - const std::string &Name = "", ///< Name for the instruction + const Twine &Name = "", ///< Name for the instruction Instruction *InsertBefore = 0 ///< Place to insert the instruction ); @@ -346,7 +428,7 @@ public: static CastInst *CreateSExtOrBitCast( Value *S, ///< The value to be casted (operand 0) const Type *Ty, ///< The type to which operand is casted - const std::string &Name, ///< The name for the instruction + const Twine &Name, ///< The name for the instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); @@ -354,7 +436,7 @@ public: static CastInst *CreatePointerCast( Value *S, ///< The pointer value to be casted (operand 0) const Type *Ty, ///< The type to which operand is casted - const std::string &Name, ///< The name for the instruction + const Twine &Name, ///< The name for the instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); @@ -362,7 +444,7 @@ public: static CastInst *CreatePointerCast( Value *S, ///< The pointer value to be casted (operand 0) const Type *Ty, ///< The type to which cast should be made - const std::string &Name = "", ///< Name for the instruction + const Twine &Name = "", ///< Name for the instruction Instruction *InsertBefore = 0 ///< Place to insert the instruction ); @@ -371,7 +453,7 @@ public: Value *S, ///< The pointer value to be casted (operand 0) const Type *Ty, ///< The type to which cast should be made bool isSigned, ///< Whether to regard S as signed or not - const std::string &Name = "", ///< Name for the instruction + const Twine &Name = "", ///< Name for the instruction Instruction *InsertBefore = 0 ///< Place to insert the instruction ); @@ -380,7 +462,7 @@ public: Value *S, ///< The integer value to be casted (operand 0) const Type *Ty, ///< The integer type to which operand is casted bool isSigned, ///< Whether to regard S as signed or not - const std::string &Name, ///< The name for the instruction + const Twine &Name, ///< The name for the instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); @@ -388,7 +470,7 @@ public: static CastInst *CreateFPCast( Value *S, ///< The floating point value to be casted const Type *Ty, ///< The floating point type to cast to - const std::string &Name = "", ///< Name for the instruction + const Twine &Name = "", ///< Name for the instruction Instruction *InsertBefore = 0 ///< Place to insert the instruction ); @@ -396,7 +478,7 @@ public: static CastInst *CreateFPCast( Value *S, ///< The floating point value to be casted const Type *Ty, ///< The floating point type to cast to - const std::string &Name, ///< The name for the instruction + const Twine &Name, ///< The name for the instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); @@ -404,7 +486,7 @@ public: static CastInst *CreateTruncOrBitCast( Value *S, ///< The value to be casted (operand 0) const Type *Ty, ///< The type to which cast should be made - const std::string &Name = "", ///< Name for the instruction + const Twine &Name = "", ///< Name for the instruction Instruction *InsertBefore = 0 ///< Place to insert the instruction ); @@ -412,7 +494,7 @@ public: static CastInst *CreateTruncOrBitCast( Value *S, ///< The value to be casted (operand 0) const Type *Ty, ///< The type to which operand is casted - const std::string &Name, ///< The name for the instruction + const Twine &Name, ///< The name for the instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); @@ -471,7 +553,7 @@ public: const Type *SrcTy, ///< SrcTy of 1st cast const Type *MidTy, ///< DstTy of 1st cast & SrcTy of 2nd cast const Type *DstTy, ///< DstTy of 2nd cast - const Type *IntPtrTy ///< Integer type corresponding to Ptr types + const Type *IntPtrTy ///< Integer type corresponding to Ptr types, or null ); /// @brief Return the opcode of this CastInst @@ -512,11 +594,11 @@ class CmpInst: public Instruction { CmpInst(); // do not implement protected: CmpInst(const Type *ty, Instruction::OtherOps op, unsigned short pred, - Value *LHS, Value *RHS, const std::string &Name = "", + Value *LHS, Value *RHS, const Twine &Name = "", Instruction *InsertBefore = 0); CmpInst(const Type *ty, Instruction::OtherOps op, unsigned short pred, - Value *LHS, Value *RHS, const std::string &Name, + Value *LHS, Value *RHS, const Twine &Name, BasicBlock *InsertAtEnd); public: @@ -569,8 +651,9 @@ public: /// instruction into a BasicBlock right before the specified instruction. /// The specified Instruction is allowed to be a dereferenced end iterator. /// @brief Create a CmpInst - static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1, - Value *S2, const std::string &Name = "", + static CmpInst *Create(OtherOps Op, + unsigned short predicate, Value *S1, + Value *S2, const Twine &Name = "", Instruction *InsertBefore = 0); /// Construct a compare instruction, given the opcode, the predicate and the @@ -578,8 +661,7 @@ public: /// the BasicBlock specified. /// @brief Create a CmpInst static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1, - Value *S2, const std::string &Name, - BasicBlock *InsertAtEnd); + Value *S2, const Twine &Name, BasicBlock *InsertAtEnd); /// @brief Get the opcode casted to the right type OtherOps getOpcode() const { @@ -655,26 +737,26 @@ public: static inline bool classof(const CmpInst *) { return true; } static inline bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ICmp || - I->getOpcode() == Instruction::FCmp || - I->getOpcode() == Instruction::VICmp || - I->getOpcode() == Instruction::VFCmp; + I->getOpcode() == Instruction::FCmp; } static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - /// @brief Create a result type for fcmp/icmp (but not vicmp/vfcmp) + + /// @brief Create a result type for fcmp/icmp static const Type* makeCmpResultType(const Type* opnd_type) { if (const VectorType* vt = dyn_cast(opnd_type)) { - return VectorType::get(Type::Int1Ty, vt->getNumElements()); + return VectorType::get(Type::getInt1Ty(opnd_type->getContext()), + vt->getNumElements()); } - return Type::Int1Ty; + return Type::getInt1Ty(opnd_type->getContext()); } }; // FIXME: these are redundant if CmpInst < BinaryOperator template <> -struct OperandTraits : FixedNumOperandTraits<2> { +struct OperandTraits : public FixedNumOperandTraits<2> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value) diff --git a/include/llvm/Instruction.def b/include/llvm/Instruction.def index 98fda77705489..e603c1257ef13 100644 --- a/include/llvm/Instruction.def +++ b/include/llvm/Instruction.def @@ -169,10 +169,8 @@ HANDLE_OTHER_INST(52, InsertElement, InsertElementInst) // insert into vector HANDLE_OTHER_INST(53, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. HANDLE_OTHER_INST(54, ExtractValue, ExtractValueInst)// extract from aggregate HANDLE_OTHER_INST(55, InsertValue, InsertValueInst) // insert into aggregate -HANDLE_OTHER_INST(56, VICmp , VICmpInst ) // Vec Int comparison instruction. -HANDLE_OTHER_INST(57, VFCmp , VFCmpInst ) // Vec FP point comparison instr. - LAST_OTHER_INST(57) + LAST_OTHER_INST(55) #undef FIRST_TERM_INST #undef HANDLE_TERM_INST diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h index 7d946e85a6d00..fdae3d7d74426 100644 --- a/include/llvm/Instruction.h +++ b/include/llvm/Instruction.h @@ -20,6 +20,8 @@ namespace llvm { +class LLVMContext; + template class SymbolTableListTraits; @@ -52,6 +54,11 @@ public: /// extra information (e.g. load is volatile) agree. bool isIdenticalTo(const Instruction *I) const; + /// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it + /// ignores the SubclassOptionalData flags, which specify conditions + /// under which the instruction's result is undefined. + bool isIdenticalToWhenDefined(const Instruction *I) const; + /// This function determines if the specified instruction executes the same /// operation as the current one. This means that the opcodes, type, operand /// types and any other factors affecting the operation must be the same. This @@ -166,13 +173,6 @@ public: bool isCommutative() const { return isCommutative(getOpcode()); } static bool isCommutative(unsigned op); - /// isTrapping - Return true if the instruction may trap. - /// - bool isTrapping() const { - return isTrapping(getOpcode()); - } - static bool isTrapping(unsigned op); - /// mayWriteToMemory - Return true if this instruction may modify memory. /// bool mayWriteToMemory() const; @@ -187,10 +187,34 @@ public: /// mayHaveSideEffects - Return true if the instruction may have side effects. /// + /// Note that this does not consider malloc and alloca to have side + /// effects because the newly allocated memory is completely invisible to + /// instructions which don't used the returned value. For cases where this + /// matters, isSafeToSpeculativelyExecute may be more appropriate. bool mayHaveSideEffects() const { return mayWriteToMemory() || mayThrow(); } + /// isSafeToSpeculativelyExecute - Return true if the instruction does not + /// have any effects besides calculating the result and does not have + /// undefined behavior. + /// + /// This method never returns true for an instruction that returns true for + /// mayHaveSideEffects; however, this method also does some other checks in + /// addition. It checks for undefined behavior, like dividing by zero or + /// loading from an invalid pointer (but not for undefined results, like a + /// shift with a shift amount larger than the width of the result). It checks + /// for malloc and alloca because speculatively executing them might cause a + /// memory leak. It also returns false for instructions related to control + /// flow, specifically terminators and PHI nodes. + /// + /// This method only looks at the instruction itself and its operands, so if + /// this method returns true, it is safe to move the instruction as long as + /// the correct dominance relationships for the operands and users hold. + /// However, this method can return true for instructions that read memory; + /// for such instructions, moving them may change the resulting value. + bool isSafeToSpeculativelyExecute() const; + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *) { return true; } static inline bool classof(const Value *V) { diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h index 59ae6100d2cdc..b28fcbb9516fe 100644 --- a/include/llvm/Instructions.h +++ b/include/llvm/Instructions.h @@ -20,6 +20,8 @@ #include "llvm/DerivedTypes.h" #include "llvm/Attributes.h" #include "llvm/BasicBlock.h" +#include "llvm/CallingConv.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/SmallVector.h" #include @@ -28,6 +30,8 @@ namespace llvm { class ConstantInt; class ConstantRange; class APInt; +class LLVMContext; +class DominatorTree; //===----------------------------------------------------------------------===// // AllocationInst Class @@ -38,10 +42,12 @@ class APInt; /// class AllocationInst : public UnaryInstruction { protected: - AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy, unsigned Align, - const std::string &Name = "", Instruction *InsertBefore = 0); - AllocationInst(const Type *Ty, Value *ArraySize, unsigned iTy, unsigned Align, - const std::string &Name, BasicBlock *InsertAtEnd); + AllocationInst(const Type *Ty, Value *ArraySize, + unsigned iTy, unsigned Align, const Twine &Name = "", + Instruction *InsertBefore = 0); + AllocationInst(const Type *Ty, Value *ArraySize, + unsigned iTy, unsigned Align, const Twine &Name, + BasicBlock *InsertAtEnd); public: // Out of line virtual method, so the vtable, etc. has a home. virtual ~AllocationInst(); @@ -51,7 +57,7 @@ public: /// bool isArrayAllocation() const; - /// getArraySize - Get the number of element allocated, for a simple + /// getArraySize - Get the number of elements allocated. For a simple /// allocation of a single element, this will return a constant 1 value. /// const Value *getArraySize() const { return getOperand(0); } @@ -74,7 +80,7 @@ public: unsigned getAlignment() const { return (1u << SubclassData) >> 1; } void setAlignment(unsigned Align); - virtual Instruction *clone() const = 0; + virtual AllocationInst *clone() const = 0; // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const AllocationInst *) { return true; } @@ -95,30 +101,33 @@ public: /// MallocInst - an instruction to allocated memory on the heap /// class MallocInst : public AllocationInst { - MallocInst(const MallocInst &MI); public: explicit MallocInst(const Type *Ty, Value *ArraySize = 0, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) - : AllocationInst(Ty, ArraySize, Malloc, 0, NameStr, InsertBefore) {} - MallocInst(const Type *Ty, Value *ArraySize, const std::string &NameStr, - BasicBlock *InsertAtEnd) + : AllocationInst(Ty, ArraySize, Malloc, + 0, NameStr, InsertBefore) {} + MallocInst(const Type *Ty, Value *ArraySize, + const Twine &NameStr, BasicBlock *InsertAtEnd) : AllocationInst(Ty, ArraySize, Malloc, 0, NameStr, InsertAtEnd) {} - MallocInst(const Type *Ty, const std::string &NameStr, + MallocInst(const Type *Ty, const Twine &NameStr, Instruction *InsertBefore = 0) : AllocationInst(Ty, 0, Malloc, 0, NameStr, InsertBefore) {} - MallocInst(const Type *Ty, const std::string &NameStr, + MallocInst(const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd) : AllocationInst(Ty, 0, Malloc, 0, NameStr, InsertAtEnd) {} - MallocInst(const Type *Ty, Value *ArraySize, unsigned Align, - const std::string &NameStr, BasicBlock *InsertAtEnd) - : AllocationInst(Ty, ArraySize, Malloc, Align, NameStr, InsertAtEnd) {} - MallocInst(const Type *Ty, Value *ArraySize, unsigned Align, - const std::string &NameStr = "", - Instruction *InsertBefore = 0) - : AllocationInst(Ty, ArraySize, Malloc, Align, NameStr, InsertBefore) {} + MallocInst(const Type *Ty, Value *ArraySize, + unsigned Align, const Twine &NameStr, + BasicBlock *InsertAtEnd) + : AllocationInst(Ty, ArraySize, Malloc, + Align, NameStr, InsertAtEnd) {} + MallocInst(const Type *Ty, Value *ArraySize, + unsigned Align, const Twine &NameStr = "", + Instruction *InsertBefore = 0) + : AllocationInst(Ty, ArraySize, + Malloc, Align, NameStr, InsertBefore) {} virtual MallocInst *clone() const; @@ -140,29 +149,35 @@ public: /// AllocaInst - an instruction to allocate memory on the stack /// class AllocaInst : public AllocationInst { - AllocaInst(const AllocaInst &); public: - explicit AllocaInst(const Type *Ty, Value *ArraySize = 0, - const std::string &NameStr = "", + explicit AllocaInst(const Type *Ty, + Value *ArraySize = 0, + const Twine &NameStr = "", Instruction *InsertBefore = 0) - : AllocationInst(Ty, ArraySize, Alloca, 0, NameStr, InsertBefore) {} - AllocaInst(const Type *Ty, Value *ArraySize, const std::string &NameStr, + : AllocationInst(Ty, ArraySize, Alloca, + 0, NameStr, InsertBefore) {} + AllocaInst(const Type *Ty, + Value *ArraySize, const Twine &NameStr, BasicBlock *InsertAtEnd) : AllocationInst(Ty, ArraySize, Alloca, 0, NameStr, InsertAtEnd) {} - AllocaInst(const Type *Ty, const std::string &NameStr, + AllocaInst(const Type *Ty, const Twine &NameStr, Instruction *InsertBefore = 0) : AllocationInst(Ty, 0, Alloca, 0, NameStr, InsertBefore) {} - AllocaInst(const Type *Ty, const std::string &NameStr, + AllocaInst(const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd) : AllocationInst(Ty, 0, Alloca, 0, NameStr, InsertAtEnd) {} - AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, - const std::string &NameStr = "", Instruction *InsertBefore = 0) - : AllocationInst(Ty, ArraySize, Alloca, Align, NameStr, InsertBefore) {} - AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align, - const std::string &NameStr, BasicBlock *InsertAtEnd) - : AllocationInst(Ty, ArraySize, Alloca, Align, NameStr, InsertAtEnd) {} + AllocaInst(const Type *Ty, Value *ArraySize, + unsigned Align, const Twine &NameStr = "", + Instruction *InsertBefore = 0) + : AllocationInst(Ty, ArraySize, Alloca, + Align, NameStr, InsertBefore) {} + AllocaInst(const Type *Ty, Value *ArraySize, + unsigned Align, const Twine &NameStr, + BasicBlock *InsertAtEnd) + : AllocationInst(Ty, ArraySize, Alloca, + Align, NameStr, InsertAtEnd) {} virtual AllocaInst *clone() const; @@ -219,27 +234,17 @@ public: /// SubclassData field in Value to store whether or not the load is volatile. /// class LoadInst : public UnaryInstruction { - - LoadInst(const LoadInst &LI) - : UnaryInstruction(LI.getType(), Load, LI.getOperand(0)) { - setVolatile(LI.isVolatile()); - setAlignment(LI.getAlignment()); - -#ifndef NDEBUG - AssertOK(); -#endif - } void AssertOK(); public: - LoadInst(Value *Ptr, const std::string &NameStr, Instruction *InsertBefore); - LoadInst(Value *Ptr, const std::string &NameStr, BasicBlock *InsertAtEnd); - LoadInst(Value *Ptr, const std::string &NameStr, bool isVolatile = false, + LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore); + LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd); + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile = false, Instruction *InsertBefore = 0); - LoadInst(Value *Ptr, const std::string &NameStr, bool isVolatile, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, Instruction *InsertBefore = 0); - LoadInst(Value *Ptr, const std::string &NameStr, bool isVolatile, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, BasicBlock *InsertAtEnd); - LoadInst(Value *Ptr, const std::string &NameStr, bool isVolatile, + LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, BasicBlock *InsertAtEnd); LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore); @@ -274,6 +279,11 @@ public: const Value *getPointerOperand() const { return getOperand(0); } static unsigned getPointerOperandIndex() { return 0U; } + unsigned getPointerAddressSpace() const { + return cast(getPointerOperand()->getType())->getAddressSpace(); + } + + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const LoadInst *) { return true; } static inline bool classof(const Instruction *I) { @@ -293,18 +303,6 @@ public: /// class StoreInst : public Instruction { void *operator new(size_t, unsigned); // DO NOT IMPLEMENT - - StoreInst(const StoreInst &SI) : Instruction(SI.getType(), Store, - &Op<0>(), 2) { - Op<0>() = SI.Op<0>(); - Op<1>() = SI.Op<1>(); - setVolatile(SI.isVolatile()); - setAlignment(SI.getAlignment()); - -#ifndef NDEBUG - AssertOK(); -#endif - } void AssertOK(); public: // allocate space for exactly two operands @@ -350,6 +348,10 @@ public: const Value *getPointerOperand() const { return getOperand(1); } static unsigned getPointerOperandIndex() { return 1U; } + unsigned getPointerAddressSpace() const { + return cast(getPointerOperand()->getType())->getAddressSpace(); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const StoreInst *) { return true; } static inline bool classof(const Instruction *I) { @@ -361,7 +363,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<2> { +struct OperandTraits : public FixedNumOperandTraits<2> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value) @@ -384,12 +386,12 @@ static inline const Type *checkType(const Type *Ty) { class GetElementPtrInst : public Instruction { GetElementPtrInst(const GetElementPtrInst &GEPI); void init(Value *Ptr, Value* const *Idx, unsigned NumIdx, - const std::string &NameStr); - void init(Value *Ptr, Value *Idx, const std::string &NameStr); + const Twine &NameStr); + void init(Value *Ptr, Value *Idx, const Twine &NameStr); template void init(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, // This argument ensures that we have an iterator we can // do arithmetic on in constant time std::random_access_iterator_tag) { @@ -436,25 +438,25 @@ class GetElementPtrInst : public Instruction { inline GetElementPtrInst(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, unsigned Values, - const std::string &NameStr, + const Twine &NameStr, Instruction *InsertBefore); template inline GetElementPtrInst(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, unsigned Values, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); /// Constructors - These two constructors are convenience methods because one /// and two index getelementptr instructions are so common. - GetElementPtrInst(Value *Ptr, Value *Idx, const std::string &NameStr = "", + GetElementPtrInst(Value *Ptr, Value *Idx, const Twine &NameStr = "", Instruction *InsertBefore = 0); GetElementPtrInst(Value *Ptr, Value *Idx, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); public: template static GetElementPtrInst *Create(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { typename std::iterator_traits::difference_type Values = 1 + std::distance(IdxBegin, IdxEnd); @@ -464,7 +466,7 @@ public: template static GetElementPtrInst *Create(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { typename std::iterator_traits::difference_type Values = 1 + std::distance(IdxBegin, IdxEnd); @@ -475,16 +477,54 @@ public: /// Constructors - These two creators are convenience methods because one /// index getelementptr instructions are so common. static GetElementPtrInst *Create(Value *Ptr, Value *Idx, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new(2) GetElementPtrInst(Ptr, Idx, NameStr, InsertBefore); } static GetElementPtrInst *Create(Value *Ptr, Value *Idx, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { return new(2) GetElementPtrInst(Ptr, Idx, NameStr, InsertAtEnd); } + /// Create an "inbounds" getelementptr. See the documentation for the + /// "inbounds" flag in LangRef.html for details. + template + static GetElementPtrInst *CreateInBounds(Value *Ptr, InputIterator IdxBegin, + InputIterator IdxEnd, + const Twine &NameStr = "", + Instruction *InsertBefore = 0) { + GetElementPtrInst *GEP = Create(Ptr, IdxBegin, IdxEnd, + NameStr, InsertBefore); + GEP->setIsInBounds(true); + return GEP; + } + template + static GetElementPtrInst *CreateInBounds(Value *Ptr, + InputIterator IdxBegin, + InputIterator IdxEnd, + const Twine &NameStr, + BasicBlock *InsertAtEnd) { + GetElementPtrInst *GEP = Create(Ptr, IdxBegin, IdxEnd, + NameStr, InsertAtEnd); + GEP->setIsInBounds(true); + return GEP; + } + static GetElementPtrInst *CreateInBounds(Value *Ptr, Value *Idx, + const Twine &NameStr = "", + Instruction *InsertBefore = 0) { + GetElementPtrInst *GEP = Create(Ptr, Idx, NameStr, InsertBefore); + GEP->setIsInBounds(true); + return GEP; + } + static GetElementPtrInst *CreateInBounds(Value *Ptr, Value *Idx, + const Twine &NameStr, + BasicBlock *InsertAtEnd) { + GetElementPtrInst *GEP = Create(Ptr, Idx, NameStr, InsertAtEnd); + GEP->setIsInBounds(true); + return GEP; + } + virtual GetElementPtrInst *clone() const; /// Transparently provide more efficient getOperand methods. @@ -532,6 +572,10 @@ public: static unsigned getPointerOperandIndex() { return 0U; // get index for modifying correct operand } + + unsigned getPointerAddressSpace() const { + return cast(getType())->getAddressSpace(); + } /// getPointerOperandType - Method to return the pointer operand as a /// PointerType. @@ -558,6 +602,12 @@ public: /// a constant offset between them. bool hasAllConstantIndices() const; + /// setIsInBounds - Set or clear the inbounds flag on this GEP instruction. + /// See LangRef.html for the meaning of inbounds on a getelementptr. + void setIsInBounds(bool b = true); + + /// isInBounds - Determine whether the GEP has the inbounds flag. + bool isInBounds() const; // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const GetElementPtrInst *) { return true; } @@ -570,7 +620,7 @@ public: }; template <> -struct OperandTraits : VariadicOperandTraits<1> { +struct OperandTraits : public VariadicOperandTraits<1> { }; template @@ -578,7 +628,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, unsigned Values, - const std::string &NameStr, + const Twine &NameStr, Instruction *InsertBefore) : Instruction(PointerType::get(checkType( getIndexedType(Ptr->getType(), @@ -596,7 +646,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, unsigned Values, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) : Instruction(PointerType::get(checkType( getIndexedType(Ptr->getType(), @@ -626,11 +676,11 @@ class ICmpInst: public CmpInst { public: /// @brief Constructor with insert-before-instruction semantics. ICmpInst( + Instruction *InsertBefore, ///< Where to insert Predicate pred, ///< The predicate to use for the comparison Value *LHS, ///< The left-hand-side of the expression Value *RHS, ///< The right-hand-side of the expression - const std::string &NameStr = "", ///< Name of the instruction - Instruction *InsertBefore = 0 ///< Where to insert + const Twine &NameStr = "" ///< Name of the instruction ) : CmpInst(makeCmpResultType(LHS->getType()), Instruction::ICmp, pred, LHS, RHS, NameStr, InsertBefore) { @@ -645,16 +695,35 @@ public: "Invalid operand types for ICmp instruction"); } - /// @brief Constructor with insert-at-block-end semantics. + /// @brief Constructor with insert-at-end semantics. + ICmpInst( + BasicBlock &InsertAtEnd, ///< Block to insert into. + Predicate pred, ///< The predicate to use for the comparison + Value *LHS, ///< The left-hand-side of the expression + Value *RHS, ///< The right-hand-side of the expression + const Twine &NameStr = "" ///< Name of the instruction + ) : CmpInst(makeCmpResultType(LHS->getType()), + Instruction::ICmp, pred, LHS, RHS, NameStr, + &InsertAtEnd) { + assert(pred >= CmpInst::FIRST_ICMP_PREDICATE && + pred <= CmpInst::LAST_ICMP_PREDICATE && + "Invalid ICmp predicate value"); + assert(getOperand(0)->getType() == getOperand(1)->getType() && + "Both operands to ICmp instruction are not of the same type!"); + // Check that the operands are the right type + assert((getOperand(0)->getType()->isIntOrIntVector() || + isa(getOperand(0)->getType())) && + "Invalid operand types for ICmp instruction"); + } + + /// @brief Constructor with no-insertion semantics ICmpInst( Predicate pred, ///< The predicate to use for the comparison Value *LHS, ///< The left-hand-side of the expression Value *RHS, ///< The right-hand-side of the expression - const std::string &NameStr, ///< Name of the instruction - BasicBlock *InsertAtEnd ///< Block to insert into. + const Twine &NameStr = "" ///< Name of the instruction ) : CmpInst(makeCmpResultType(LHS->getType()), - Instruction::ICmp, pred, LHS, RHS, NameStr, - InsertAtEnd) { + Instruction::ICmp, pred, LHS, RHS, NameStr) { assert(pred >= CmpInst::FIRST_ICMP_PREDICATE && pred <= CmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp predicate value"); @@ -781,11 +850,11 @@ class FCmpInst: public CmpInst { public: /// @brief Constructor with insert-before-instruction semantics. FCmpInst( + Instruction *InsertBefore, ///< Where to insert Predicate pred, ///< The predicate to use for the comparison Value *LHS, ///< The left-hand-side of the expression Value *RHS, ///< The right-hand-side of the expression - const std::string &NameStr = "", ///< Name of the instruction - Instruction *InsertBefore = 0 ///< Where to insert + const Twine &NameStr = "" ///< Name of the instruction ) : CmpInst(makeCmpResultType(LHS->getType()), Instruction::FCmp, pred, LHS, RHS, NameStr, InsertBefore) { @@ -797,17 +866,34 @@ public: assert(getOperand(0)->getType()->isFPOrFPVector() && "Invalid operand types for FCmp instruction"); } + + /// @brief Constructor with insert-at-end semantics. + FCmpInst( + BasicBlock &InsertAtEnd, ///< Block to insert into. + Predicate pred, ///< The predicate to use for the comparison + Value *LHS, ///< The left-hand-side of the expression + Value *RHS, ///< The right-hand-side of the expression + const Twine &NameStr = "" ///< Name of the instruction + ) : CmpInst(makeCmpResultType(LHS->getType()), + Instruction::FCmp, pred, LHS, RHS, NameStr, + &InsertAtEnd) { + assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && + "Invalid FCmp predicate value"); + assert(getOperand(0)->getType() == getOperand(1)->getType() && + "Both operands to FCmp instruction are not of the same type!"); + // Check that the operands are the right type + assert(getOperand(0)->getType()->isFPOrFPVector() && + "Invalid operand types for FCmp instruction"); + } - /// @brief Constructor with insert-at-block-end semantics. + /// @brief Constructor with no-insertion semantics FCmpInst( Predicate pred, ///< The predicate to use for the comparison Value *LHS, ///< The left-hand-side of the expression Value *RHS, ///< The right-hand-side of the expression - const std::string &NameStr, ///< Name of the instruction - BasicBlock *InsertAtEnd ///< Block to insert into. + const Twine &NameStr = "" ///< Name of the instruction ) : CmpInst(makeCmpResultType(LHS->getType()), - Instruction::FCmp, pred, LHS, RHS, NameStr, - InsertAtEnd) { + Instruction::FCmp, pred, LHS, RHS, NameStr) { assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp predicate value"); assert(getOperand(0)->getType() == getOperand(1)->getType() && @@ -858,119 +944,6 @@ public: static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - -}; - -//===----------------------------------------------------------------------===// -// VICmpInst Class -//===----------------------------------------------------------------------===// - -/// This instruction compares its operands according to the predicate given -/// to the constructor. It only operates on vectors of integers. -/// The operands must be identical types. -/// @brief Represents a vector integer comparison operator. -class VICmpInst: public CmpInst { -public: - /// @brief Constructor with insert-before-instruction semantics. - VICmpInst( - Predicate pred, ///< The predicate to use for the comparison - Value *LHS, ///< The left-hand-side of the expression - Value *RHS, ///< The right-hand-side of the expression - const std::string &NameStr = "", ///< Name of the instruction - Instruction *InsertBefore = 0 ///< Where to insert - ) : CmpInst(LHS->getType(), Instruction::VICmp, pred, LHS, RHS, NameStr, - InsertBefore) { - assert(pred >= CmpInst::FIRST_ICMP_PREDICATE && - pred <= CmpInst::LAST_ICMP_PREDICATE && - "Invalid VICmp predicate value"); - assert(getOperand(0)->getType() == getOperand(1)->getType() && - "Both operands to VICmp instruction are not of the same type!"); - } - - /// @brief Constructor with insert-at-block-end semantics. - VICmpInst( - Predicate pred, ///< The predicate to use for the comparison - Value *LHS, ///< The left-hand-side of the expression - Value *RHS, ///< The right-hand-side of the expression - const std::string &NameStr, ///< Name of the instruction - BasicBlock *InsertAtEnd ///< Block to insert into. - ) : CmpInst(LHS->getType(), Instruction::VICmp, pred, LHS, RHS, NameStr, - InsertAtEnd) { - assert(pred >= CmpInst::FIRST_ICMP_PREDICATE && - pred <= CmpInst::LAST_ICMP_PREDICATE && - "Invalid VICmp predicate value"); - assert(getOperand(0)->getType() == getOperand(1)->getType() && - "Both operands to VICmp instruction are not of the same type!"); - } - - /// @brief Return the predicate for this instruction. - Predicate getPredicate() const { return Predicate(SubclassData); } - - virtual VICmpInst *clone() const; - - // Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const VICmpInst *) { return true; } - static inline bool classof(const Instruction *I) { - return I->getOpcode() == Instruction::VICmp; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } -}; - -//===----------------------------------------------------------------------===// -// VFCmpInst Class -//===----------------------------------------------------------------------===// - -/// This instruction compares its operands according to the predicate given -/// to the constructor. It only operates on vectors of floating point values. -/// The operands must be identical types. -/// @brief Represents a vector floating point comparison operator. -class VFCmpInst: public CmpInst { -public: - /// @brief Constructor with insert-before-instruction semantics. - VFCmpInst( - Predicate pred, ///< The predicate to use for the comparison - Value *LHS, ///< The left-hand-side of the expression - Value *RHS, ///< The right-hand-side of the expression - const std::string &NameStr = "", ///< Name of the instruction - Instruction *InsertBefore = 0 ///< Where to insert - ) : CmpInst(VectorType::getInteger(cast(LHS->getType())), - Instruction::VFCmp, pred, LHS, RHS, NameStr, InsertBefore) { - assert(pred <= CmpInst::LAST_FCMP_PREDICATE && - "Invalid VFCmp predicate value"); - assert(getOperand(0)->getType() == getOperand(1)->getType() && - "Both operands to VFCmp instruction are not of the same type!"); - } - - /// @brief Constructor with insert-at-block-end semantics. - VFCmpInst( - Predicate pred, ///< The predicate to use for the comparison - Value *LHS, ///< The left-hand-side of the expression - Value *RHS, ///< The right-hand-side of the expression - const std::string &NameStr, ///< Name of the instruction - BasicBlock *InsertAtEnd ///< Block to insert into. - ) : CmpInst(VectorType::getInteger(cast(LHS->getType())), - Instruction::VFCmp, pred, LHS, RHS, NameStr, InsertAtEnd) { - assert(pred <= CmpInst::LAST_FCMP_PREDICATE && - "Invalid VFCmp predicate value"); - assert(getOperand(0)->getType() == getOperand(1)->getType() && - "Both operands to VFCmp instruction are not of the same type!"); - } - - /// @brief Return the predicate for this instruction. - Predicate getPredicate() const { return Predicate(SubclassData); } - - virtual VFCmpInst *clone() const; - - /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const VFCmpInst *) { return true; } - static inline bool classof(const Instruction *I) { - return I->getOpcode() == Instruction::VFCmp; - } - static inline bool classof(const Value *V) { - return isa(V) && classof(cast(V)); - } }; //===----------------------------------------------------------------------===// @@ -992,7 +965,7 @@ class CallInst : public Instruction { template void init(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr, + const Twine &NameStr, // This argument ensures that we have an iterator we can // do arithmetic on in constant time std::random_access_iterator_tag) { @@ -1011,7 +984,7 @@ class CallInst : public Instruction { /// @brief Construct a CallInst from a range of arguments template CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr, Instruction *InsertBefore); + const Twine &NameStr, Instruction *InsertBefore); /// Construct a CallInst given a range of arguments. InputIterator /// must be a random-access iterator pointing to contiguous storage @@ -1021,20 +994,20 @@ class CallInst : public Instruction { /// @brief Construct a CallInst from a range of arguments template inline CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); - CallInst(Value *F, Value *Actual, const std::string& NameStr, + CallInst(Value *F, Value *Actual, const Twine &NameStr, Instruction *InsertBefore); - CallInst(Value *F, Value *Actual, const std::string& NameStr, + CallInst(Value *F, Value *Actual, const Twine &NameStr, BasicBlock *InsertAtEnd); - explicit CallInst(Value *F, const std::string &NameStr, + explicit CallInst(Value *F, const Twine &NameStr, Instruction *InsertBefore); - CallInst(Value *F, const std::string &NameStr, BasicBlock *InsertAtEnd); + CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd); public: template static CallInst *Create(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new((unsigned)(ArgEnd - ArgBegin + 1)) CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertBefore); @@ -1042,27 +1015,39 @@ public: template static CallInst *Create(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr, BasicBlock *InsertAtEnd) { + const Twine &NameStr, BasicBlock *InsertAtEnd) { return new((unsigned)(ArgEnd - ArgBegin + 1)) CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertAtEnd); } static CallInst *Create(Value *F, Value *Actual, - const std::string& NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new(2) CallInst(F, Actual, NameStr, InsertBefore); } - static CallInst *Create(Value *F, Value *Actual, const std::string& NameStr, + static CallInst *Create(Value *F, Value *Actual, const Twine &NameStr, BasicBlock *InsertAtEnd) { return new(2) CallInst(F, Actual, NameStr, InsertAtEnd); } - static CallInst *Create(Value *F, const std::string &NameStr = "", + static CallInst *Create(Value *F, const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new(1) CallInst(F, NameStr, InsertBefore); } - static CallInst *Create(Value *F, const std::string &NameStr, + static CallInst *Create(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd) { return new(1) CallInst(F, NameStr, InsertAtEnd); } + /// CreateMalloc - Generate the IR for a call to malloc: + /// 1. Compute the malloc call's argument as the specified type's size, + /// possibly multiplied by the array size if the array size is not + /// constant 1. + /// 2. Call malloc with that argument. + /// 3. Bitcast the result of the malloc call to the specified type. + static Value *CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy, + const Type *AllocTy, Value *ArraySize = 0, + const Twine &Name = ""); + static Value *CreateMalloc(BasicBlock *InsertAtEnd, const Type *IntPtrTy, + const Type *AllocTy, Value *ArraySize = 0, + const Twine &Name = ""); ~CallInst(); @@ -1078,9 +1063,11 @@ public: /// getCallingConv/setCallingConv - Get or set the calling convention of this /// function call. - unsigned getCallingConv() const { return SubclassData >> 1; } - void setCallingConv(unsigned CC) { - SubclassData = (SubclassData & 1) | (CC << 1); + CallingConv::ID getCallingConv() const { + return static_cast(SubclassData >> 1); + } + void setCallingConv(CallingConv::ID CC) { + SubclassData = (SubclassData & 1) | (static_cast(CC) << 1); } /// getAttributes - Return the parameter attributes for this call. @@ -1176,12 +1163,12 @@ public: }; template <> -struct OperandTraits : VariadicOperandTraits<1> { +struct OperandTraits : public VariadicOperandTraits<1> { }; template CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr, BasicBlock *InsertAtEnd) + const Twine &NameStr, BasicBlock *InsertAtEnd) : Instruction(cast(cast(Func->getType()) ->getElementType())->getReturnType(), Instruction::Call, @@ -1193,7 +1180,7 @@ CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, template CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr, Instruction *InsertBefore) + const Twine &NameStr, Instruction *InsertBefore) : Instruction(cast(cast(Func->getType()) ->getElementType())->getReturnType(), Instruction::Call, @@ -1219,18 +1206,14 @@ class SelectInst : public Instruction { Op<2>() = S2; } - SelectInst(const SelectInst &SI) - : Instruction(SI.getType(), SI.getOpcode(), &Op<0>(), 3) { - init(SI.Op<0>(), SI.Op<1>(), SI.Op<2>()); - } - SelectInst(Value *C, Value *S1, Value *S2, const std::string &NameStr, + SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr, Instruction *InsertBefore) : Instruction(S1->getType(), Instruction::Select, &Op<0>(), 3, InsertBefore) { init(C, S1, S2); setName(NameStr); } - SelectInst(Value *C, Value *S1, Value *S2, const std::string &NameStr, + SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock *InsertAtEnd) : Instruction(S1->getType(), Instruction::Select, &Op<0>(), 3, InsertAtEnd) { @@ -1239,20 +1222,23 @@ class SelectInst : public Instruction { } public: static SelectInst *Create(Value *C, Value *S1, Value *S2, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new(3) SelectInst(C, S1, S2, NameStr, InsertBefore); } static SelectInst *Create(Value *C, Value *S1, Value *S2, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { return new(3) SelectInst(C, S1, S2, NameStr, InsertAtEnd); } - Value *getCondition() const { return Op<0>(); } - Value *getTrueValue() const { return Op<1>(); } - Value *getFalseValue() const { return Op<2>(); } - + const Value *getCondition() const { return Op<0>(); } + const Value *getTrueValue() const { return Op<1>(); } + const Value *getFalseValue() const { return Op<2>(); } + Value *getCondition() { return Op<0>(); } + Value *getTrueValue() { return Op<1>(); } + Value *getFalseValue() { return Op<2>(); } + /// areInvalidOperands - Return a string if the specified operands are invalid /// for a select operation, otherwise return null. static const char *areInvalidOperands(Value *Cond, Value *True, Value *False); @@ -1277,7 +1263,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<3> { +struct OperandTraits : public FixedNumOperandTraits<3> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value) @@ -1290,15 +1276,13 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value) /// an argument of the specified type given a va_list and increments that list /// class VAArgInst : public UnaryInstruction { - VAArgInst(const VAArgInst &VAA) - : UnaryInstruction(VAA.getType(), VAArg, VAA.getOperand(0)) {} public: - VAArgInst(Value *List, const Type *Ty, const std::string &NameStr = "", + VAArgInst(Value *List, const Type *Ty, const Twine &NameStr = "", Instruction *InsertBefore = 0) : UnaryInstruction(Ty, VAArg, List, InsertBefore) { setName(NameStr); } - VAArgInst(Value *List, const Type *Ty, const std::string &NameStr, + VAArgInst(Value *List, const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd) : UnaryInstruction(Ty, VAArg, List, InsertAtEnd) { setName(NameStr); @@ -1324,25 +1308,21 @@ public: /// element from a VectorType value /// class ExtractElementInst : public Instruction { - ExtractElementInst(const ExtractElementInst &EE) : - Instruction(EE.getType(), ExtractElement, &Op<0>(), 2) { - Op<0>() = EE.Op<0>(); - Op<1>() = EE.Op<1>(); - } - -public: - // allocate space for exactly two operands - void *operator new(size_t s) { - return User::operator new(s, 2); // FIXME: "unsigned Idx" forms of ctor? - } - ExtractElementInst(Value *Vec, Value *Idx, const std::string &NameStr = "", + ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr = "", Instruction *InsertBefore = 0); - ExtractElementInst(Value *Vec, unsigned Idx, const std::string &NameStr = "", - Instruction *InsertBefore = 0); - ExtractElementInst(Value *Vec, Value *Idx, const std::string &NameStr, - BasicBlock *InsertAtEnd); - ExtractElementInst(Value *Vec, unsigned Idx, const std::string &NameStr, + ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr, BasicBlock *InsertAtEnd); +public: + static ExtractElementInst *Create(Value *Vec, Value *Idx, + const Twine &NameStr = "", + Instruction *InsertBefore = 0) { + return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertBefore); + } + static ExtractElementInst *Create(Value *Vec, Value *Idx, + const Twine &NameStr, + BasicBlock *InsertAtEnd) { + return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertAtEnd); + } /// isValidOperands - Return true if an extractelement instruction can be /// formed with the specified operands. @@ -1350,6 +1330,16 @@ public: virtual ExtractElementInst *clone() const; + Value *getVectorOperand() { return Op<0>(); } + Value *getIndexOperand() { return Op<1>(); } + const Value *getVectorOperand() const { return Op<0>(); } + const Value *getIndexOperand() const { return Op<1>(); } + + const VectorType *getVectorOperandType() const { + return reinterpret_cast(getVectorOperand()->getType()); + } + + /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -1364,7 +1354,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<2> { +struct OperandTraits : public FixedNumOperandTraits<2> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value) @@ -1377,38 +1367,19 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value) /// element into a VectorType value /// class InsertElementInst : public Instruction { - InsertElementInst(const InsertElementInst &IE); InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, - const std::string &NameStr = "", - Instruction *InsertBefore = 0); - InsertElementInst(Value *Vec, Value *NewElt, unsigned Idx, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0); InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, - const std::string &NameStr, BasicBlock *InsertAtEnd); - InsertElementInst(Value *Vec, Value *NewElt, unsigned Idx, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); public: - static InsertElementInst *Create(const InsertElementInst &IE) { - return new(IE.getNumOperands()) InsertElementInst(IE); - } static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx, - const std::string &NameStr = "", - Instruction *InsertBefore = 0) { - return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore); - } - static InsertElementInst *Create(Value *Vec, Value *NewElt, unsigned Idx, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore); } static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx, - const std::string &NameStr, - BasicBlock *InsertAtEnd) { - return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd); - } - static InsertElementInst *Create(Value *Vec, Value *NewElt, unsigned Idx, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd); } @@ -1440,7 +1411,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<3> { +struct OperandTraits : public FixedNumOperandTraits<3> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value) @@ -1453,17 +1424,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value) /// input vectors. /// class ShuffleVectorInst : public Instruction { - ShuffleVectorInst(const ShuffleVectorInst &IE); public: // allocate space for exactly three operands void *operator new(size_t s) { return User::operator new(s, 3); } ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefor = 0); ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); /// isValidOperands - Return true if a shufflevector instruction can be /// formed with the specified operands. @@ -1497,7 +1467,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<3> { +struct OperandTraits : public FixedNumOperandTraits<3> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorInst, Value) @@ -1514,12 +1484,12 @@ class ExtractValueInst : public UnaryInstruction { ExtractValueInst(const ExtractValueInst &EVI); void init(const unsigned *Idx, unsigned NumIdx, - const std::string &NameStr); - void init(unsigned Idx, const std::string &NameStr); + const Twine &NameStr); + void init(unsigned Idx, const Twine &NameStr); template void init(InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, // This argument ensures that we have an iterator we can // do arithmetic on in constant time std::random_access_iterator_tag) { @@ -1569,12 +1539,12 @@ class ExtractValueInst : public UnaryInstruction { template inline ExtractValueInst(Value *Agg, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, Instruction *InsertBefore); template inline ExtractValueInst(Value *Agg, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); // allocate space for exactly one operand void *operator new(size_t s) { @@ -1585,7 +1555,7 @@ public: template static ExtractValueInst *Create(Value *Agg, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertBefore); @@ -1593,7 +1563,7 @@ public: template static ExtractValueInst *Create(Value *Agg, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { return new ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertAtEnd); } @@ -1602,13 +1572,13 @@ public: /// index extractvalue instructions are much more common than those with /// more than one. static ExtractValueInst *Create(Value *Agg, unsigned Idx, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { unsigned Idxs[1] = { Idx }; return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertBefore); } static ExtractValueInst *Create(Value *Agg, unsigned Idx, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { unsigned Idxs[1] = { Idx }; return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertAtEnd); @@ -1668,7 +1638,7 @@ template ExtractValueInst::ExtractValueInst(Value *Agg, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, Instruction *InsertBefore) : UnaryInstruction(checkType(getIndexedType(Agg->getType(), IdxBegin, IdxEnd)), @@ -1680,7 +1650,7 @@ template ExtractValueInst::ExtractValueInst(Value *Agg, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) : UnaryInstruction(checkType(getIndexedType(Agg->getType(), IdxBegin, IdxEnd)), @@ -1703,13 +1673,13 @@ class InsertValueInst : public Instruction { void *operator new(size_t, unsigned); // Do not implement InsertValueInst(const InsertValueInst &IVI); void init(Value *Agg, Value *Val, const unsigned *Idx, unsigned NumIdx, - const std::string &NameStr); - void init(Value *Agg, Value *Val, unsigned Idx, const std::string &NameStr); + const Twine &NameStr); + void init(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr); template void init(Value *Agg, Value *Val, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, // This argument ensures that we have an iterator we can // do arithmetic on in constant time std::random_access_iterator_tag) { @@ -1733,20 +1703,20 @@ class InsertValueInst : public Instruction { template inline InsertValueInst(Value *Agg, Value *Val, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, Instruction *InsertBefore); template inline InsertValueInst(Value *Agg, Value *Val, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); /// Constructors - These two constructors are convenience methods because one /// and two index insertvalue instructions are so common. InsertValueInst(Value *Agg, Value *Val, - unsigned Idx, const std::string &NameStr = "", + unsigned Idx, const Twine &NameStr = "", Instruction *InsertBefore = 0); InsertValueInst(Value *Agg, Value *Val, unsigned Idx, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); public: // allocate space for exactly two operands void *operator new(size_t s) { @@ -1756,7 +1726,7 @@ public: template static InsertValueInst *Create(Value *Agg, Value *Val, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd, NameStr, InsertBefore); @@ -1764,7 +1734,7 @@ public: template static InsertValueInst *Create(Value *Agg, Value *Val, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd, NameStr, InsertAtEnd); @@ -1774,12 +1744,12 @@ public: /// index insertvalue instructions are much more common than those with /// more than one. static InsertValueInst *Create(Value *Agg, Value *Val, unsigned Idx, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new InsertValueInst(Agg, Val, Idx, NameStr, InsertBefore); } static InsertValueInst *Create(Value *Agg, Value *Val, unsigned Idx, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { return new InsertValueInst(Agg, Val, Idx, NameStr, InsertAtEnd); } @@ -1832,7 +1802,7 @@ public: }; template <> -struct OperandTraits : FixedNumOperandTraits<2> { +struct OperandTraits : public FixedNumOperandTraits<2> { }; template @@ -1840,7 +1810,7 @@ InsertValueInst::InsertValueInst(Value *Agg, Value *Val, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, Instruction *InsertBefore) : Instruction(Agg->getType(), InsertValue, OperandTraits::op_begin(this), @@ -1853,7 +1823,7 @@ InsertValueInst::InsertValueInst(Value *Agg, Value *Val, InputIterator IdxBegin, InputIterator IdxEnd, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) : Instruction(Agg->getType(), InsertValue, OperandTraits::op_begin(this), @@ -1882,24 +1852,24 @@ class PHINode : public Instruction { void *operator new(size_t s) { return User::operator new(s, 0); } - explicit PHINode(const Type *Ty, const std::string &NameStr = "", + explicit PHINode(const Type *Ty, const Twine &NameStr = "", Instruction *InsertBefore = 0) : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore), ReservedSpace(0) { setName(NameStr); } - PHINode(const Type *Ty, const std::string &NameStr, BasicBlock *InsertAtEnd) + PHINode(const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd) : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd), ReservedSpace(0) { setName(NameStr); } public: - static PHINode *Create(const Type *Ty, const std::string &NameStr = "", + static PHINode *Create(const Type *Ty, const Twine &NameStr = "", Instruction *InsertBefore = 0) { return new PHINode(Ty, NameStr, InsertBefore); } - static PHINode *Create(const Type *Ty, const std::string &NameStr, + static PHINode *Create(const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd) { return new PHINode(Ty, NameStr, InsertAtEnd); } @@ -1940,19 +1910,29 @@ public: return i/2; } + /// getIncomingBlock - Return incoming basic block #i. + /// + BasicBlock *getIncomingBlock(unsigned i) const { + return cast(getOperand(i*2+1)); + } + /// getIncomingBlock - Return incoming basic block corresponding - /// to value use iterator + /// to an operand of the PHI. /// - template - BasicBlock *getIncomingBlock(value_use_iterator I) const { - assert(this == *I && "Iterator doesn't point to PHI's Uses?"); - return static_cast((&I.getUse() + 1)->get()); + BasicBlock *getIncomingBlock(const Use &U) const { + assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?"); + return cast((&U + 1)->get()); } - /// getIncomingBlock - Return incoming basic block number x + + /// getIncomingBlock - Return incoming basic block corresponding + /// to value use iterator. /// - BasicBlock *getIncomingBlock(unsigned i) const { - return static_cast(getOperand(i*2+1)); + template + BasicBlock *getIncomingBlock(value_use_iterator I) const { + return getIncomingBlock(I.getUse()); } + + void setIncomingBlock(unsigned i, BasicBlock *BB) { setOperand(i*2+1, BB); } @@ -2013,7 +1993,12 @@ public: /// hasConstantValue - If the specified PHI node always merges together the /// same value, return the value, otherwise return null. /// - Value *hasConstantValue(bool AllowNonDominatingInstruction = false) const; + /// If the PHI has undef operands, but all the rest of the operands are + /// some unique value, return that value if it can be proved that the + /// value dominates the PHI. If DT is null, use a conservative check, + /// otherwise use DT to test for dominance. + /// + Value *hasConstantValue(DominatorTree *DT = 0) const; /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const PHINode *) { return true; } @@ -2028,7 +2013,7 @@ public: }; template <> -struct OperandTraits : HungoffOperandTraits<2> { +struct OperandTraits : public HungoffOperandTraits<2> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value) @@ -2057,18 +2042,21 @@ private: // // NOTE: If the Value* passed is of type void then the constructor behaves as // if it was passed NULL. - explicit ReturnInst(Value *retVal = 0, Instruction *InsertBefore = 0); - ReturnInst(Value *retVal, BasicBlock *InsertAtEnd); - explicit ReturnInst(BasicBlock *InsertAtEnd); + explicit ReturnInst(LLVMContext &C, Value *retVal = 0, + Instruction *InsertBefore = 0); + ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd); + explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd); public: - static ReturnInst* Create(Value *retVal = 0, Instruction *InsertBefore = 0) { - return new(!!retVal) ReturnInst(retVal, InsertBefore); + static ReturnInst* Create(LLVMContext &C, Value *retVal = 0, + Instruction *InsertBefore = 0) { + return new(!!retVal) ReturnInst(C, retVal, InsertBefore); } - static ReturnInst* Create(Value *retVal, BasicBlock *InsertAtEnd) { - return new(!!retVal) ReturnInst(retVal, InsertAtEnd); + static ReturnInst* Create(LLVMContext &C, Value *retVal, + BasicBlock *InsertAtEnd) { + return new(!!retVal) ReturnInst(C, retVal, InsertAtEnd); } - static ReturnInst* Create(BasicBlock *InsertAtEnd) { - return new(0) ReturnInst(InsertAtEnd); + static ReturnInst* Create(LLVMContext &C, BasicBlock *InsertAtEnd) { + return new(0) ReturnInst(C, InsertAtEnd); } virtual ~ReturnInst(); @@ -2101,7 +2089,7 @@ public: }; template <> -struct OperandTraits : OptionalOperandTraits<> { +struct OperandTraits : public OptionalOperandTraits<> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value) @@ -2209,7 +2197,7 @@ private: }; template <> -struct OperandTraits : VariadicOperandTraits<1> {}; +struct OperandTraits : public VariadicOperandTraits<1> {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value) @@ -2358,7 +2346,7 @@ private: }; template <> -struct OperandTraits : HungoffOperandTraits<2> { +struct OperandTraits : public HungoffOperandTraits<2> { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value) @@ -2380,7 +2368,7 @@ class InvokeInst : public TerminatorInst { template void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr, + const Twine &NameStr, // This argument ensures that we have an iterator we can // do arithmetic on in constant time std::random_access_iterator_tag) { @@ -2402,7 +2390,7 @@ class InvokeInst : public TerminatorInst { inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, InputIterator ArgBegin, InputIterator ArgEnd, unsigned Values, - const std::string &NameStr, Instruction *InsertBefore); + const Twine &NameStr, Instruction *InsertBefore); /// Construct an InvokeInst given a range of arguments. /// InputIterator must be a random-access iterator pointing to @@ -2415,13 +2403,13 @@ class InvokeInst : public TerminatorInst { inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, InputIterator ArgBegin, InputIterator ArgEnd, unsigned Values, - const std::string &NameStr, BasicBlock *InsertAtEnd); + const Twine &NameStr, BasicBlock *InsertAtEnd); public: template static InvokeInst *Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr = "", + const Twine &NameStr = "", Instruction *InsertBefore = 0) { unsigned Values(ArgEnd - ArgBegin + 3); return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd, @@ -2431,7 +2419,7 @@ public: static InvokeInst *Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, InputIterator ArgBegin, InputIterator ArgEnd, - const std::string &NameStr, + const Twine &NameStr, BasicBlock *InsertAtEnd) { unsigned Values(ArgEnd - ArgBegin + 3); return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd, @@ -2445,9 +2433,11 @@ public: /// getCallingConv/setCallingConv - Get or set the calling convention of this /// function call. - unsigned getCallingConv() const { return SubclassData; } - void setCallingConv(unsigned CC) { - SubclassData = CC; + CallingConv::ID getCallingConv() const { + return static_cast(SubclassData); + } + void setCallingConv(CallingConv::ID CC) { + SubclassData = static_cast(CC); } /// getAttributes - Return the parameter attributes for this invoke. @@ -2474,7 +2464,7 @@ public: /// @brief Determine if the call does not access memory. bool doesNotAccessMemory() const { - return paramHasAttr(0, Attribute::ReadNone); + return paramHasAttr(~0, Attribute::ReadNone); } void setDoesNotAccessMemory(bool NotAccessMemory = true) { if (NotAccessMemory) addAttribute(~0, Attribute::ReadNone); @@ -2574,7 +2564,7 @@ private: }; template <> -struct OperandTraits : VariadicOperandTraits<3> { +struct OperandTraits : public VariadicOperandTraits<3> { }; template @@ -2582,7 +2572,7 @@ InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, InputIterator ArgBegin, InputIterator ArgEnd, unsigned Values, - const std::string &NameStr, Instruction *InsertBefore) + const Twine &NameStr, Instruction *InsertBefore) : TerminatorInst(cast(cast(Func->getType()) ->getElementType())->getReturnType(), Instruction::Invoke, @@ -2596,7 +2586,7 @@ InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, InputIterator ArgBegin, InputIterator ArgEnd, unsigned Values, - const std::string &NameStr, BasicBlock *InsertAtEnd) + const Twine &NameStr, BasicBlock *InsertAtEnd) : TerminatorInst(cast(cast(Func->getType()) ->getElementType())->getReturnType(), Instruction::Invoke, @@ -2623,8 +2613,8 @@ public: void *operator new(size_t s) { return User::operator new(s, 0); } - explicit UnwindInst(Instruction *InsertBefore = 0); - explicit UnwindInst(BasicBlock *InsertAtEnd); + explicit UnwindInst(LLVMContext &C, Instruction *InsertBefore = 0); + explicit UnwindInst(LLVMContext &C, BasicBlock *InsertAtEnd); virtual UnwindInst *clone() const; @@ -2660,8 +2650,8 @@ public: void *operator new(size_t s) { return User::operator new(s, 0); } - explicit UnreachableInst(Instruction *InsertBefore = 0); - explicit UnreachableInst(BasicBlock *InsertAtEnd); + explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = 0); + explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd); virtual UnreachableInst *clone() const; @@ -2687,16 +2677,12 @@ private: /// @brief This class represents a truncation of integer types. class TruncInst : public CastInst { - /// Private copy constructor - TruncInst(const TruncInst &CI) - : CastInst(CI.getType(), Trunc, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics TruncInst( Value *S, ///< The value to be truncated const Type *Ty, ///< The (smaller) type to truncate to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -2704,12 +2690,12 @@ public: TruncInst( Value *S, ///< The value to be truncated const Type *Ty, ///< The (smaller) type to truncate to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical TruncInst - virtual CastInst *clone() const; + virtual TruncInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const TruncInst *) { return true; } @@ -2727,16 +2713,12 @@ public: /// @brief This class represents zero extension of integer types. class ZExtInst : public CastInst { - /// @brief Private copy constructor - ZExtInst(const ZExtInst &CI) - : CastInst(CI.getType(), ZExt, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics ZExtInst( Value *S, ///< The value to be zero extended const Type *Ty, ///< The type to zero extend to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -2744,12 +2726,12 @@ public: ZExtInst( Value *S, ///< The value to be zero extended const Type *Ty, ///< The type to zero extend to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical ZExtInst - virtual CastInst *clone() const; + virtual ZExtInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const ZExtInst *) { return true; } @@ -2767,16 +2749,12 @@ public: /// @brief This class represents a sign extension of integer types. class SExtInst : public CastInst { - /// @brief Private copy constructor - SExtInst(const SExtInst &CI) - : CastInst(CI.getType(), SExt, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics SExtInst( Value *S, ///< The value to be sign extended const Type *Ty, ///< The type to sign extend to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -2784,12 +2762,12 @@ public: SExtInst( Value *S, ///< The value to be sign extended const Type *Ty, ///< The type to sign extend to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical SExtInst - virtual CastInst *clone() const; + virtual SExtInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const SExtInst *) { return true; } @@ -2807,15 +2785,12 @@ public: /// @brief This class represents a truncation of floating point types. class FPTruncInst : public CastInst { - FPTruncInst(const FPTruncInst &CI) - : CastInst(CI.getType(), FPTrunc, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics FPTruncInst( Value *S, ///< The value to be truncated const Type *Ty, ///< The type to truncate to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -2823,12 +2798,12 @@ public: FPTruncInst( Value *S, ///< The value to be truncated const Type *Ty, ///< The type to truncate to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical FPTruncInst - virtual CastInst *clone() const; + virtual FPTruncInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const FPTruncInst *) { return true; } @@ -2846,15 +2821,12 @@ public: /// @brief This class represents an extension of floating point types. class FPExtInst : public CastInst { - FPExtInst(const FPExtInst &CI) - : CastInst(CI.getType(), FPExt, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics FPExtInst( Value *S, ///< The value to be extended const Type *Ty, ///< The type to extend to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -2862,12 +2834,12 @@ public: FPExtInst( Value *S, ///< The value to be extended const Type *Ty, ///< The type to extend to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical FPExtInst - virtual CastInst *clone() const; + virtual FPExtInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const FPExtInst *) { return true; } @@ -2885,15 +2857,12 @@ public: /// @brief This class represents a cast unsigned integer to floating point. class UIToFPInst : public CastInst { - UIToFPInst(const UIToFPInst &CI) - : CastInst(CI.getType(), UIToFP, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics UIToFPInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -2901,12 +2870,12 @@ public: UIToFPInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical UIToFPInst - virtual CastInst *clone() const; + virtual UIToFPInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const UIToFPInst *) { return true; } @@ -2924,15 +2893,12 @@ public: /// @brief This class represents a cast from signed integer to floating point. class SIToFPInst : public CastInst { - SIToFPInst(const SIToFPInst &CI) - : CastInst(CI.getType(), SIToFP, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics SIToFPInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -2940,12 +2906,12 @@ public: SIToFPInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical SIToFPInst - virtual CastInst *clone() const; + virtual SIToFPInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const SIToFPInst *) { return true; } @@ -2963,15 +2929,12 @@ public: /// @brief This class represents a cast from floating point to unsigned integer class FPToUIInst : public CastInst { - FPToUIInst(const FPToUIInst &CI) - : CastInst(CI.getType(), FPToUI, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics FPToUIInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -2979,12 +2942,12 @@ public: FPToUIInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< Where to insert the new instruction ); /// @brief Clone an identical FPToUIInst - virtual CastInst *clone() const; + virtual FPToUIInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const FPToUIInst *) { return true; } @@ -3002,15 +2965,12 @@ public: /// @brief This class represents a cast from floating point to signed integer. class FPToSIInst : public CastInst { - FPToSIInst(const FPToSIInst &CI) - : CastInst(CI.getType(), FPToSI, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics FPToSIInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -3018,12 +2978,12 @@ public: FPToSIInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical FPToSIInst - virtual CastInst *clone() const; + virtual FPToSIInst *clone() const; /// @brief Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const FPToSIInst *) { return true; } @@ -3041,15 +3001,12 @@ public: /// @brief This class represents a cast from an integer to a pointer. class IntToPtrInst : public CastInst { - IntToPtrInst(const IntToPtrInst &CI) - : CastInst(CI.getType(), IntToPtr, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics IntToPtrInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -3057,12 +3014,12 @@ public: IntToPtrInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical IntToPtrInst - virtual CastInst *clone() const; + virtual IntToPtrInst *clone() const; // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const IntToPtrInst *) { return true; } @@ -3080,15 +3037,12 @@ public: /// @brief This class represents a cast from a pointer to an integer class PtrToIntInst : public CastInst { - PtrToIntInst(const PtrToIntInst &CI) - : CastInst(CI.getType(), PtrToInt, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics PtrToIntInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -3096,12 +3050,12 @@ public: PtrToIntInst( Value *S, ///< The value to be converted const Type *Ty, ///< The type to convert to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical PtrToIntInst - virtual CastInst *clone() const; + virtual PtrToIntInst *clone() const; // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const PtrToIntInst *) { return true; } @@ -3119,15 +3073,12 @@ public: /// @brief This class represents a no-op cast from one type to another. class BitCastInst : public CastInst { - BitCastInst(const BitCastInst &CI) - : CastInst(CI.getType(), BitCast, CI.getOperand(0)) { - } public: /// @brief Constructor with insert-before-instruction semantics BitCastInst( Value *S, ///< The value to be casted const Type *Ty, ///< The type to casted to - const std::string &NameStr = "", ///< A name for the new instruction + const Twine &NameStr = "", ///< A name for the new instruction Instruction *InsertBefore = 0 ///< Where to insert the new instruction ); @@ -3135,12 +3086,12 @@ public: BitCastInst( Value *S, ///< The value to be casted const Type *Ty, ///< The type to casted to - const std::string &NameStr, ///< A name for the new instruction + const Twine &NameStr, ///< A name for the new instruction BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); /// @brief Clone an identical BitCastInst - virtual CastInst *clone() const; + virtual BitCastInst *clone() const; // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const BitCastInst *) { return true; } diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h index 8f5e05f70cd79..6a8f376392613 100644 --- a/include/llvm/IntrinsicInst.h +++ b/include/llvm/IntrinsicInst.h @@ -25,6 +25,7 @@ #define LLVM_INTRINSICINST_H #include "llvm/Constants.h" +#include "llvm/Metadata.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" @@ -85,8 +86,8 @@ namespace llvm { struct DbgStopPointInst : public DbgInfoIntrinsic { Value *getLineValue() const { return const_cast(getOperand(1)); } Value *getColumnValue() const { return const_cast(getOperand(2)); } - Value *getContext() const { - return StripCast(getOperand(3)); + MDNode *getContext() const { + return cast(getOperand(3)); } unsigned getLine() const { @@ -112,7 +113,7 @@ namespace llvm { /// DbgFuncStartInst - This represents the llvm.dbg.func.start instruction. /// struct DbgFuncStartInst : public DbgInfoIntrinsic { - Value *getSubprogram() const { return StripCast(getOperand(1)); } + MDNode *getSubprogram() const { return cast(getOperand(1)); } // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const DbgFuncStartInst *) { return true; } @@ -127,7 +128,7 @@ namespace llvm { /// DbgRegionStartInst - This represents the llvm.dbg.region.start /// instruction. struct DbgRegionStartInst : public DbgInfoIntrinsic { - Value *getContext() const { return StripCast(getOperand(1)); } + MDNode *getContext() const { return cast(getOperand(1)); } // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const DbgRegionStartInst *) { return true; } @@ -142,7 +143,7 @@ namespace llvm { /// DbgRegionEndInst - This represents the llvm.dbg.region.end instruction. /// struct DbgRegionEndInst : public DbgInfoIntrinsic { - Value *getContext() const { return StripCast(getOperand(1)); } + MDNode *getContext() const { return cast(getOperand(1)); } // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const DbgRegionEndInst *) { return true; } @@ -158,7 +159,7 @@ namespace llvm { /// struct DbgDeclareInst : public DbgInfoIntrinsic { Value *getAddress() const { return getOperand(1); } - Value *getVariable() const { return StripCast(getOperand(2)); } + MDNode *getVariable() const { return cast(getOperand(2)); } // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const DbgDeclareInst *) { return true; } @@ -202,9 +203,13 @@ namespace llvm { "setLength called with value of wrong type!"); setOperand(3, L); } - void setAlignment(unsigned A) { - const Type *Int32Ty = getOperand(4)->getType(); - setOperand(4, ConstantInt::get(Int32Ty, A)); + + void setAlignment(Constant* A) { + setOperand(4, A); + } + + const Type *getAlignmentType() const { + return getOperand(4)->getType(); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -308,8 +313,7 @@ namespace llvm { // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const EHSelectorInst *) { return true; } static inline bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::eh_selector_i32 || - I->getIntrinsicID() == Intrinsic::eh_selector_i64; + return I->getIntrinsicID() == Intrinsic::eh_selector; } static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); diff --git a/include/llvm/Intrinsics.h b/include/llvm/Intrinsics.h index 227eb5a5b70f4..8f1b1aee1f5a9 100644 --- a/include/llvm/Intrinsics.h +++ b/include/llvm/Intrinsics.h @@ -23,6 +23,7 @@ namespace llvm { class Type; class FunctionType; class Function; +class LLVMContext; class Module; class AttrListPtr; @@ -47,7 +48,8 @@ namespace Intrinsic { /// Intrinsic::getType(ID) - Return the function type for an intrinsic. /// - const FunctionType *getType(ID id, const Type **Tys = 0, unsigned numTys = 0); + const FunctionType *getType(LLVMContext &Context, ID id, + const Type **Tys = 0, unsigned numTys = 0); /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be /// overloaded. @@ -61,7 +63,7 @@ namespace Intrinsic { /// declaration for an intrinsic, and return it. /// /// The Tys and numTys parameters are for intrinsics with overloaded types - /// (i.e., those using iAny or fAny). For a declaration for an overloaded + /// (e.g., those using iAny or fAny). For a declaration for an overloaded /// intrinsic, Tys should point to an array of numTys pointers to Type, /// and must provide exactly one type for each overloaded type in the /// intrinsic. diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index c036151329c62..38ac4c2927c0b 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -1,10 +1,10 @@ //===- Intrinsics.td - Defines all LLVM intrinsics ---------*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines properties of all LLVM intrinsics. @@ -21,7 +21,7 @@ class IntrinsicProperty; // Intr*Mem - Memory properties. An intrinsic is allowed to have exactly one of // these properties set. They are listed from the most aggressive (best to use -// if correct) to the least aggressive. If no property is set, the worst case +// if correct) to the least aggressive. If no property is set, the worst case // is assumed (IntrWriteMem). // IntrNoMem - The intrinsic does not access memory or have any other side @@ -42,7 +42,7 @@ def IntrReadMem : IntrinsicProperty; // and writes may be volatile, but except for this it has no other side effects. def IntrWriteArgMem : IntrinsicProperty; -// IntrWriteMem - This intrinsic may read or modify unspecified memory or has +// IntrWriteMem - This intrinsic may read or modify unspecified memory or has // other side effects. It cannot be modified by the optimizer. This is the // default if the intrinsic has no other Intr*Mem property. def IntrWriteMem : IntrinsicProperty; @@ -66,12 +66,12 @@ class LLVMType { class LLVMPointerType : LLVMType{ LLVMType ElTy = elty; -} +} class LLVMAnyPointerType : LLVMType{ LLVMType ElTy = elty; -} +} // Match the type of another intrinsic parameter. Number is an index into the // list of overloaded types for the intrinsic, excluding all the fixed types. @@ -84,7 +84,7 @@ class LLVMMatchType int Number = num; } -// Match the type of another intrinsic parameter that is expected to be +// Match the type of another intrinsic parameter that is expected to be // an integral vector type, but change the element size to be twice as wide // or half as wide as the other type. This is only useful when the intrinsic // is overloaded, so the matched type should be declared as iAny. @@ -94,6 +94,7 @@ class LLVMTruncatedElementVectorType : LLVMMatchType; def llvm_void_ty : LLVMType; def llvm_anyint_ty : LLVMType; def llvm_anyfloat_ty : LLVMType; +def llvm_anyvector_ty : LLVMType; def llvm_i1_ty : LLVMType; def llvm_i8_ty : LLVMType; def llvm_i16_ty : LLVMType; @@ -109,6 +110,7 @@ def llvm_ptrptr_ty : LLVMPointerType; // i8** def llvm_anyptr_ty : LLVMAnyPointerType; // (space)i8* def llvm_empty_ty : LLVMType; // { } def llvm_descriptor_ty : LLVMPointerType; // { }* +def llvm_metadata_ty : LLVMType; // !{...} def llvm_v2i8_ty : LLVMType; // 2 x i8 def llvm_v4i8_ty : LLVMType; // 4 x i8 @@ -127,7 +129,6 @@ def llvm_v2i64_ty : LLVMType; // 2 x i64 def llvm_v4i64_ty : LLVMType; // 4 x i64 def llvm_v2f32_ty : LLVMType; // 2 x float -def llvm_v3f32_ty : LLVMType; // 3 x float def llvm_v4f32_ty : LLVMType; // 4 x float def llvm_v8f32_ty : LLVMType; // 8 x float def llvm_v2f64_ty : LLVMType; // 2 x double @@ -173,7 +174,7 @@ class GCCBuiltin { //===--------------- Variable Argument Handling Intrinsics ----------------===// -// +// def int_vastart : Intrinsic<[llvm_void_ty], [llvm_ptr_ty], [], "llvm.va_start">; def int_vacopy : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_ptr_ty], [], @@ -181,7 +182,7 @@ def int_vacopy : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_ptr_ty], [], def int_vaend : Intrinsic<[llvm_void_ty], [llvm_ptr_ty], [], "llvm.va_end">; //===------------------- Garbage Collection Intrinsics --------------------===// -// +// def int_gcroot : Intrinsic<[llvm_void_ty], [llvm_ptrptr_ty, llvm_ptr_ty]>; def int_gcread : Intrinsic<[llvm_ptr_ty], @@ -192,7 +193,7 @@ def int_gcwrite : Intrinsic<[llvm_void_ty], [IntrWriteArgMem, NoCapture<1>, NoCapture<2>]>; //===--------------------- Code Generator Intrinsics ----------------------===// -// +// def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; def int_frameaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>; @@ -242,7 +243,7 @@ let Properties = [IntrReadMem] in { def int_sqrt : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_powi : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>; def int_sin : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_cos : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_cos : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_pow : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; def int_log : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; @@ -267,11 +268,6 @@ let Properties = [IntrNoMem] in { def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; - def int_part_select : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty]>; - def int_part_set : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_anyint_ty, - llvm_i32_ty, llvm_i32_ty]>; } //===------------------------ Debugger Intrinsics -------------------------===// @@ -282,25 +278,22 @@ let Properties = [IntrNoMem] in { // places. let Properties = [IntrNoMem] in { def int_dbg_stoppoint : Intrinsic<[llvm_void_ty], - [llvm_i32_ty, llvm_i32_ty, - llvm_descriptor_ty]>; - def int_dbg_region_start : Intrinsic<[llvm_void_ty], [llvm_descriptor_ty]>; - def int_dbg_region_end : Intrinsic<[llvm_void_ty], [llvm_descriptor_ty]>; - def int_dbg_func_start : Intrinsic<[llvm_void_ty], [llvm_descriptor_ty]>; + [llvm_i32_ty, llvm_i32_ty, + llvm_metadata_ty]>; + def int_dbg_region_start : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>; + def int_dbg_region_end : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>; + def int_dbg_func_start : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>; def int_dbg_declare : Intrinsic<[llvm_void_ty], - [llvm_descriptor_ty, llvm_descriptor_ty]>; + [llvm_descriptor_ty, llvm_metadata_ty]>; } //===------------------ Exception Handling Intrinsics----------------------===// // -def int_eh_exception : Intrinsic<[llvm_ptr_ty]>; -def int_eh_selector_i32 : Intrinsic<[llvm_i32_ty], - [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>; -def int_eh_selector_i64 : Intrinsic<[llvm_i64_ty], - [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>; +def int_eh_exception : Intrinsic<[llvm_ptr_ty], [], [IntrReadMem]>; +def int_eh_selector : Intrinsic<[llvm_i32_ty], + [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>; -def int_eh_typeid_for_i32 : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; -def int_eh_typeid_for_i64 : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty]>; +def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; def int_eh_return_i32 : Intrinsic<[llvm_void_ty], [llvm_i32_ty, llvm_ptr_ty]>; def int_eh_return_i64 : Intrinsic<[llvm_void_ty], [llvm_i64_ty, llvm_ptr_ty]>; @@ -311,19 +304,20 @@ def int_eh_unwind_init: Intrinsic<[llvm_void_ty]>, def int_eh_dwarf_cfa : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>; let Properties = [IntrNoMem] in { -def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; -def int_eh_sjlj_longjmp : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_i32_ty]>; + def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; + def int_eh_sjlj_longjmp : Intrinsic<[llvm_void_ty], [llvm_ptr_ty]>; + def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty]>; } //===---------------- Generic Variable Attribute Intrinsics----------------===// // def int_var_annotation : Intrinsic<[llvm_void_ty], [llvm_ptr_ty, llvm_ptr_ty, - llvm_ptr_ty, llvm_i32_ty], + llvm_ptr_ty, llvm_i32_ty], [], "llvm.var.annotation">; def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType], [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty, - llvm_i32_ty], + llvm_i32_ty], [], "llvm.ptr.annotation">; def int_annotation : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_ptr_ty, @@ -423,7 +417,23 @@ def int_atomic_load_umax : Intrinsic<[llvm_anyint_ty], LLVMMatchType<0>], [IntrWriteArgMem, NoCapture<0>]>, GCCBuiltin<"__sync_fetch_and_umax">; - + +//===------------------------- Memory Use Markers -------------------------===// +// +def int_lifetime_start : Intrinsic<[llvm_void_ty], + [llvm_i64_ty, llvm_ptr_ty], + [IntrWriteArgMem, NoCapture<1>]>; +def int_lifetime_end : Intrinsic<[llvm_void_ty], + [llvm_i64_ty, llvm_ptr_ty], + [IntrWriteArgMem, NoCapture<1>]>; +def int_invariant_start : Intrinsic<[llvm_descriptor_ty], + [llvm_i64_ty, llvm_ptr_ty], + [IntrReadArgMem, NoCapture<1>]>; +def int_invariant_end : Intrinsic<[llvm_void_ty], + [llvm_descriptor_ty, llvm_i64_ty, + llvm_ptr_ty], + [IntrWriteArgMem, NoCapture<2>]>; + //===-------------------------- Other Intrinsics --------------------------===// // def int_flt_rounds : Intrinsic<[llvm_i32_ty]>, @@ -464,3 +474,4 @@ include "llvm/IntrinsicsARM.td" include "llvm/IntrinsicsCellSPU.td" include "llvm/IntrinsicsAlpha.td" include "llvm/IntrinsicsXCore.td" +include "llvm/IntrinsicsBlackfin.td" diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td index 4723ffb530d28..c408a2f374ec4 100644 --- a/include/llvm/IntrinsicsARM.td +++ b/include/llvm/IntrinsicsARM.td @@ -27,41 +27,36 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". // The following classes do not correspond directly to GCC builtins. class Neon_1Arg_Intrinsic - : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; - class Neon_1Arg_Float_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; class Neon_1Arg_Narrow_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedElementVectorType<0>], [IntrNoMem]>; class Neon_1Arg_Long_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedElementVectorType<0>], [IntrNoMem]>; class Neon_2Arg_Intrinsic - : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem]>; - class Neon_2Arg_Float_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class Neon_2Arg_Narrow_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : Intrinsic<[llvm_anyvector_ty], [LLVMExtendedElementVectorType<0>, LLVMExtendedElementVectorType<0>], [IntrNoMem]>; class Neon_2Arg_Long_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedElementVectorType<0>, LLVMTruncatedElementVectorType<0>], [IntrNoMem]>; class Neon_2Arg_Wide_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMTruncatedElementVectorType<0>], [IntrNoMem]>; class Neon_3Arg_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class Neon_3Arg_Long_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMTruncatedElementVectorType<0>, LLVMTruncatedElementVectorType<0>], @@ -70,6 +65,28 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.". : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; class Neon_CvtFPToFx_Intrinsic : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; + + // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors. + // Besides the table, VTBL has one other v8i8 argument and VTBX has two. + // Overall, the classes range from 2 to 6 v8i8 arguments. + class Neon_Tbl2Arg_Intrinsic + : Intrinsic<[llvm_v8i8_ty], + [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>; + class Neon_Tbl3Arg_Intrinsic + : Intrinsic<[llvm_v8i8_ty], + [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>; + class Neon_Tbl4Arg_Intrinsic + : Intrinsic<[llvm_v8i8_ty], + [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], + [IntrNoMem]>; + class Neon_Tbl5Arg_Intrinsic + : Intrinsic<[llvm_v8i8_ty], + [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, + llvm_v8i8_ty], [IntrNoMem]>; + class Neon_Tbl6Arg_Intrinsic + : Intrinsic<[llvm_v8i8_ty], + [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, + llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>; } // Arithmetic ops @@ -110,18 +127,16 @@ let Properties = [IntrNoMem, Commutative] in { // Vector Maximum. def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic; def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic; - def int_arm_neon_vmaxf : Neon_2Arg_Float_Intrinsic; // Vector Minimum. def int_arm_neon_vmins : Neon_2Arg_Intrinsic; def int_arm_neon_vminu : Neon_2Arg_Intrinsic; - def int_arm_neon_vminf : Neon_2Arg_Float_Intrinsic; // Vector Reciprocal Step. - def int_arm_neon_vrecps : Neon_2Arg_Float_Intrinsic; + def int_arm_neon_vrecps : Neon_2Arg_Intrinsic; // Vector Reciprocal Square Root Step. - def int_arm_neon_vrsqrts : Neon_2Arg_Float_Intrinsic; + def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic; } // Vector Subtract. @@ -155,7 +170,6 @@ let TargetPrefix = "arm" in { // Vector Absolute Differences. def int_arm_neon_vabds : Neon_2Arg_Intrinsic; def int_arm_neon_vabdu : Neon_2Arg_Intrinsic; -def int_arm_neon_vabdf : Neon_2Arg_Float_Intrinsic; def int_arm_neon_vabdls : Neon_2Arg_Long_Intrinsic; def int_arm_neon_vabdlu : Neon_2Arg_Long_Intrinsic; @@ -166,17 +180,16 @@ def int_arm_neon_vabals : Neon_3Arg_Long_Intrinsic; def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic; // Vector Pairwise Add. -def int_arm_neon_vpaddi : Neon_2Arg_Intrinsic; -def int_arm_neon_vpaddf : Neon_2Arg_Float_Intrinsic; +def int_arm_neon_vpadd : Neon_2Arg_Intrinsic; // Vector Pairwise Add Long. // Note: This is different than the other "long" NEON intrinsics because // the result vector has half as many elements as the source vector. // The source and destination vector types must be specified separately. let TargetPrefix = "arm" in { - def int_arm_neon_vpaddls : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], + def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; - def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], + def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; } @@ -184,21 +197,19 @@ let TargetPrefix = "arm" in { // Note: This is similar to vpaddl but the destination vector also appears // as the first argument. let TargetPrefix = "arm" in { - def int_arm_neon_vpadals : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_anyint_ty], + def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>; - def int_arm_neon_vpadalu : Intrinsic<[llvm_anyint_ty], - [LLVMMatchType<0>, llvm_anyint_ty], + def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty], [IntrNoMem]>; } // Vector Pairwise Maximum and Minimum. def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic; def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic; -def int_arm_neon_vpmaxf : Neon_2Arg_Float_Intrinsic; def int_arm_neon_vpmins : Neon_2Arg_Intrinsic; def int_arm_neon_vpminu : Neon_2Arg_Intrinsic; -def int_arm_neon_vpminf : Neon_2Arg_Float_Intrinsic; // Vector Shifts: // @@ -253,7 +264,6 @@ def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic; // Vector Absolute Value and Saturating Absolute Value. def int_arm_neon_vabs : Neon_1Arg_Intrinsic; -def int_arm_neon_vabsf : Neon_1Arg_Float_Intrinsic; def int_arm_neon_vqabs : Neon_1Arg_Intrinsic; // Vector Saturating Negate. @@ -268,11 +278,9 @@ def int_arm_neon_vcnt : Neon_1Arg_Intrinsic; // Vector Reciprocal Estimate. def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic; -def int_arm_neon_vrecpef : Neon_1Arg_Float_Intrinsic; // Vector Reciprocal Square Root Estimate. def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic; -def int_arm_neon_vrsqrtef : Neon_1Arg_Float_Intrinsic; // Vector Conversions Between Floating-point and Fixed-point. def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic; @@ -288,38 +296,81 @@ def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic; def int_arm_neon_vmovls : Neon_1Arg_Long_Intrinsic; def int_arm_neon_vmovlu : Neon_1Arg_Long_Intrinsic; +// Vector Table Lookup. +// The first 1-4 arguments are the table. +def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic; +def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic; +def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic; +def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic; + +// Vector Table Extension. +// Some elements of the destination vector may not be updated, so the original +// value of that vector is passed as the first argument. The next 1-4 +// arguments after that are the table. +def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic; +def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic; +def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic; +def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic; + let TargetPrefix = "arm" in { // De-interleaving vector loads from N-element structures. - def int_arm_neon_vld3i : Intrinsic<[llvm_anyint_ty], - [llvm_ptr_ty], [IntrReadArgMem]>; - def int_arm_neon_vld3f : Intrinsic<[llvm_anyfloat_ty], - [llvm_ptr_ty], [IntrReadArgMem]>; - def int_arm_neon_vld4i : Intrinsic<[llvm_anyint_ty], - [llvm_ptr_ty], [IntrReadArgMem]>; - def int_arm_neon_vld4f : Intrinsic<[llvm_anyfloat_ty], - [llvm_ptr_ty], [IntrReadArgMem]>; + def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty], + [llvm_ptr_ty], [IntrReadArgMem]>; + def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [llvm_ptr_ty], [IntrReadArgMem]>; + def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>], + [llvm_ptr_ty], [IntrReadArgMem]>; + def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_ptr_ty], [IntrReadArgMem]>; + + // Vector load N-element structure to one lane. + def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [llvm_ptr_ty, LLVMMatchType<0>, + LLVMMatchType<0>, llvm_i32_ty], + [IntrReadArgMem]>; + def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>], + [llvm_ptr_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + llvm_i32_ty], [IntrReadArgMem]>; + def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_ptr_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, llvm_i32_ty], + [IntrReadArgMem]>; // Interleaving vector stores from N-element structures. - def int_arm_neon_vst3i : Intrinsic<[llvm_void_ty], - [llvm_ptr_ty, llvm_anyint_ty], + def int_arm_neon_vst1 : Intrinsic<[llvm_void_ty], + [llvm_ptr_ty, llvm_anyvector_ty], + [IntrWriteArgMem]>; + def int_arm_neon_vst2 : Intrinsic<[llvm_void_ty], + [llvm_ptr_ty, llvm_anyvector_ty, + LLVMMatchType<0>], [IntrWriteArgMem]>; + def int_arm_neon_vst3 : Intrinsic<[llvm_void_ty], + [llvm_ptr_ty, llvm_anyvector_ty, + LLVMMatchType<0>, LLVMMatchType<0>], [IntrWriteArgMem]>; - def int_arm_neon_vst3f : Intrinsic<[llvm_void_ty], - [llvm_ptr_ty, llvm_anyfloat_ty], - [IntrWriteArgMem]>; - def int_arm_neon_vst4i : Intrinsic<[llvm_void_ty], - [llvm_ptr_ty, llvm_anyint_ty], - [IntrWriteArgMem]>; - def int_arm_neon_vst4f : Intrinsic<[llvm_void_ty], - [llvm_ptr_ty, llvm_anyfloat_ty], - [IntrWriteArgMem]>; - - // Vector Table Lookup - def int_arm_neon_vtbl : Intrinsic<[llvm_v8i8_ty], - [llvm_anyint_ty, llvm_v8i8_ty], - [IntrNoMem]>; - // Vector Table Extension - def int_arm_neon_vtbx : Intrinsic<[llvm_v8i8_ty], - [llvm_v8i8_ty, llvm_anyint_ty, - llvm_v8i8_ty], [IntrNoMem]>; + def int_arm_neon_vst4 : Intrinsic<[llvm_void_ty], + [llvm_ptr_ty, llvm_anyvector_ty, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>], [IntrWriteArgMem]>; + + // Vector store N-element structure from one lane. + def int_arm_neon_vst2lane : Intrinsic<[llvm_void_ty], + [llvm_ptr_ty, llvm_anyvector_ty, + LLVMMatchType<0>, llvm_i32_ty], + [IntrWriteArgMem]>; + def int_arm_neon_vst3lane : Intrinsic<[llvm_void_ty], + [llvm_ptr_ty, llvm_anyvector_ty, + LLVMMatchType<0>, LLVMMatchType<0>, + llvm_i32_ty], [IntrWriteArgMem]>; + def int_arm_neon_vst4lane : Intrinsic<[llvm_void_ty], + [llvm_ptr_ty, llvm_anyvector_ty, + LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, llvm_i32_ty], + [IntrWriteArgMem]>; } diff --git a/include/llvm/IntrinsicsBlackfin.td b/include/llvm/IntrinsicsBlackfin.td new file mode 100644 index 0000000000000..188e18cc91f65 --- /dev/null +++ b/include/llvm/IntrinsicsBlackfin.td @@ -0,0 +1,34 @@ +//===- IntrinsicsBlackfin.td - Defines Blackfin intrinsics -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the blackfin-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Core synchronisation etc. +// +// These intrinsics have sideeffects. Each represent a single instruction, but +// workarounds are sometimes required depending on the cpu. + +let TargetPrefix = "bfin" in { + + // Execute csync instruction with workarounds + def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">, + Intrinsic<[llvm_void_ty]>; + + // Execute ssync instruction with workarounds + def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">, + Intrinsic<[llvm_void_ty]>; + + // Execute idle instruction with workarounds + def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">, + Intrinsic<[llvm_void_ty]>; + +} diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 37ba59c92186d..5be032bb82046 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1,10 +1,10 @@ //===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines all of the X86-specific intrinsics. @@ -129,7 +129,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">, Intrinsic<[llvm_v2i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v2i32_ty], [IntrNoMem]>; } @@ -814,9 +814,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Vector insert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_pinsrb : GCCBuiltin<"__builtin_ia32_vec_set_v16qi">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem]>; def int_x86_sse41_insertps : GCCBuiltin<"__builtin_ia32_insertps128">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty], [IntrNoMem]>; @@ -867,6 +864,105 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>; } +// Test instruction with bitwise comparison. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse41_ptestz : GCCBuiltin<"__builtin_ia32_ptestz128">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_sse41_ptestc : GCCBuiltin<"__builtin_ia32_ptestc128">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; + def int_x86_sse41_ptestnzc : GCCBuiltin<"__builtin_ia32_ptestnzc128">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// +// SSE4.2 + +// Miscellaneous +// CRC Instruction +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse42_crc32_8 : GCCBuiltin<"__builtin_ia32_crc32qi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_crc32_16 : GCCBuiltin<"__builtin_ia32_crc32hi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_sse42_crc32_32 : GCCBuiltin<"__builtin_ia32_crc32si">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_sse42_crc32_64 : GCCBuiltin<"__builtin_ia32_crc32di">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; +} + +// String/text processing ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_sse42_pcmpistrm128 : GCCBuiltin<"__builtin_ia32_pcmpistrm128">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpistri128 : GCCBuiltin<"__builtin_ia32_pcmpistri128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpestrm128 : GCCBuiltin<"__builtin_ia32_pcmpestrm128">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpestri128 : GCCBuiltin<"__builtin_ia32_pcmpestri128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; + def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">, + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; +} //===----------------------------------------------------------------------===// // MMX diff --git a/include/llvm/LLVMContext.h b/include/llvm/LLVMContext.h index efe12cccb6d15..a135f671b7714 100644 --- a/include/llvm/LLVMContext.h +++ b/include/llvm/LLVMContext.h @@ -15,35 +15,10 @@ #ifndef LLVM_LLVMCONTEXT_H #define LLVM_LLVMCONTEXT_H -#include "llvm/Support/DataTypes.h" -#include -#include - namespace llvm { class LLVMContextImpl; -class Constant; -class ConstantInt; -class ConstantPointerNull; -class ConstantStruct; -class ConstantAggregateZero; -class ConstantArray; -class ConstantFP; -class ConstantVector; -class UndefValue; -class MDNode; -class MDString; -class IntegerType; -class PointerType; -class StructType; -class ArrayType; -class VectorType; -class OpaqueType; -class FunctionType; -class Type; -class APInt; -class APFloat; -class Value; +class MetadataContext; /// This is an important class for using LLVM in a threaded context. It /// (opaquely) owns and manages the core "global" data of LLVM's core @@ -51,170 +26,16 @@ class Value; /// LLVMContext itself provides no locking guarantees, so you should be careful /// to have one context per thread. class LLVMContext { - LLVMContextImpl* pImpl; + // DO NOT IMPLEMENT + LLVMContext(LLVMContext&); + void operator=(LLVMContext&); + public: + LLVMContextImpl* const pImpl; + MetadataContext &getMetadata(); + bool RemoveDeadMetadata(); LLVMContext(); ~LLVMContext(); - - // Constant accessors - Constant* getNullValue(const Type* Ty); - Constant* getAllOnesValue(const Type* Ty); - - // UndefValue accessors - UndefValue* getUndef(const Type* Ty); - - // ConstantInt accessors - ConstantInt* getConstantIntTrue(); - ConstantInt* getConstantIntFalse(); - Constant* getConstantInt(const Type* Ty, uint64_t V, - bool isSigned = false); - ConstantInt* getConstantInt(const IntegerType* Ty, uint64_t V, - bool isSigned = false); - ConstantInt* getConstantIntSigned(const IntegerType* Ty, int64_t V); - ConstantInt* getConstantInt(const APInt& V); - Constant* getConstantInt(const Type* Ty, const APInt& V); - ConstantInt* getConstantIntAllOnesValue(const Type* Ty); - - // ConstantPointerNull accessors - ConstantPointerNull* getConstantPointerNull(const PointerType* T); - - // ConstantStruct accessors - Constant* getConstantStruct(const StructType* T, - const std::vector& V); - Constant* getConstantStruct(const std::vector& V, - bool Packed = false); - Constant* getConstantStruct(Constant* const *Vals, unsigned NumVals, - bool Packed = false); - - // ConstantAggregateZero accessors - ConstantAggregateZero* getConstantAggregateZero(const Type* Ty); - - // ConstantArray accessors - Constant* getConstantArray(const ArrayType* T, - const std::vector& V); - Constant* getConstantArray(const ArrayType* T, Constant* const* Vals, - unsigned NumVals); - Constant* getConstantArray(const std::string& Initializer, - bool AddNull = false); - - // ConstantExpr accessors - Constant* getConstantExpr(unsigned Opcode, Constant* C1, Constant* C2); - Constant* getConstantExprTrunc(Constant* C, const Type* Ty); - Constant* getConstantExprSExt(Constant* C, const Type* Ty); - Constant* getConstantExprZExt(Constant* C, const Type* Ty); - Constant* getConstantExprFPTrunc(Constant* C, const Type* Ty); - Constant* getConstantExprFPExtend(Constant* C, const Type* Ty); - Constant* getConstantExprUIToFP(Constant* C, const Type* Ty); - Constant* getConstantExprSIToFP(Constant* C, const Type* Ty); - Constant* getConstantExprFPToUI(Constant* C, const Type* Ty); - Constant* getConstantExprFPToSI(Constant* C, const Type* Ty); - Constant* getConstantExprPtrToInt(Constant* C, const Type* Ty); - Constant* getConstantExprIntToPtr(Constant* C, const Type* Ty); - Constant* getConstantExprBitCast(Constant* C, const Type* Ty); - Constant* getConstantExprCast(unsigned ops, Constant* C, const Type* Ty); - Constant* getConstantExprZExtOrBitCast(Constant* C, const Type* Ty); - Constant* getConstantExprSExtOrBitCast(Constant* C, const Type* Ty); - Constant* getConstantExprTruncOrBitCast(Constant* C, const Type* Ty); - Constant* getConstantExprPointerCast(Constant* C, const Type* Ty); - Constant* getConstantExprIntegerCast(Constant* C, const Type* Ty, - bool isSigned); - Constant* getConstantExprFPCast(Constant* C, const Type* Ty); - Constant* getConstantExprSelect(Constant* C, Constant* V1, Constant* V2); - Constant* getConstantExprAlignOf(const Type* Ty); - Constant* getConstantExprCompare(unsigned short pred, - Constant* C1, Constant* C2); - Constant* getConstantExprNeg(Constant* C); - Constant* getConstantExprFNeg(Constant* C); - Constant* getConstantExprNot(Constant* C); - Constant* getConstantExprAdd(Constant* C1, Constant* C2); - Constant* getConstantExprFAdd(Constant* C1, Constant* C2); - Constant* getConstantExprSub(Constant* C1, Constant* C2); - Constant* getConstantExprFSub(Constant* C1, Constant* C2); - Constant* getConstantExprMul(Constant* C1, Constant* C2); - Constant* getConstantExprFMul(Constant* C1, Constant* C2); - Constant* getConstantExprUDiv(Constant* C1, Constant* C2); - Constant* getConstantExprSDiv(Constant* C1, Constant* C2); - Constant* getConstantExprFDiv(Constant* C1, Constant* C2); - Constant* getConstantExprURem(Constant* C1, Constant* C2); - Constant* getConstantExprSRem(Constant* C1, Constant* C2); - Constant* getConstantExprFRem(Constant* C1, Constant* C2); - Constant* getConstantExprAnd(Constant* C1, Constant* C2); - Constant* getConstantExprOr(Constant* C1, Constant* C2); - Constant* getConstantExprXor(Constant* C1, Constant* C2); - Constant* getConstantExprICmp(unsigned short pred, Constant* LHS, - Constant* RHS); - Constant* getConstantExprFCmp(unsigned short pred, Constant* LHS, - Constant* RHS); - Constant* getConstantExprVICmp(unsigned short pred, Constant* LHS, - Constant* RHS); - Constant* getConstantExprVFCmp(unsigned short pred, Constant* LHS, - Constant* RHS); - Constant* getConstantExprShl(Constant* C1, Constant* C2); - Constant* getConstantExprLShr(Constant* C1, Constant* C2); - Constant* getConstantExprAShr(Constant* C1, Constant* C2); - Constant* getConstantExprGetElementPtr(Constant* C, Constant* const* IdxList, - unsigned NumIdx); - Constant* getConstantExprGetElementPtr(Constant* C, Value* const* IdxList, - unsigned NumIdx); - Constant* getConstantExprExtractElement(Constant* Vec, Constant* Idx); - Constant* getConstantExprInsertElement(Constant* Vec, Constant* Elt, - Constant* Idx); - Constant* getConstantExprShuffleVector(Constant* V1, Constant* V2, - Constant* Mask); - Constant* getConstantExprExtractValue(Constant* Agg, const unsigned* IdxList, - unsigned NumIdx); - Constant* getConstantExprInsertValue(Constant* Agg, Constant* Val, - const unsigned* IdxList, - unsigned NumIdx); - Constant* getZeroValueForNegation(const Type* Ty); - - // ConstantFP accessors - ConstantFP* getConstantFP(const APFloat& V); - Constant* getConstantFP(const Type* Ty, double V); - ConstantFP* getConstantFPNegativeZero(const Type* Ty); - - // ConstantVector accessors - Constant* getConstantVector(const VectorType* T, - const std::vector& V); - Constant* getConstantVector(const std::vector& V); - Constant* getConstantVector(Constant* const* Vals, unsigned NumVals); - ConstantVector* getConstantVectorAllOnesValue(const VectorType* Ty); - - // MDNode accessors - MDNode* getMDNode(Value* const* Vals, unsigned NumVals); - - // MDString accessors - MDString* getMDString(const char *StrBegin, const char *StrEnd); - MDString* getMDString(const std::string &Str); - - // FunctionType accessors - FunctionType* getFunctionType(const Type* Result, - const std::vector& Params, - bool isVarArg); - - // IntegerType accessors - const IntegerType* getIntegerType(unsigned NumBits); - - // OpaqueType accessors - OpaqueType* getOpaqueType(); - - // StructType accessors - StructType* getStructType(bool isPacked=false); - StructType* getStructType(const std::vector& Params, - bool isPacked = false); - - // ArrayType accessors - ArrayType* getArrayType(const Type* ElementType, uint64_t NumElements); - - // PointerType accessors - PointerType* getPointerType(const Type* ElementType, unsigned AddressSpace); - PointerType* getPointerTypeUnqual(const Type* ElementType); - - // VectorType accessors - VectorType* getVectorType(const Type* ElementType, unsigned NumElements); - VectorType* getVectorTypeInteger(const VectorType* VTy); - VectorType* getVectorTypeExtendedElement(const VectorType* VTy); - VectorType* getVectorTypeTruncatedElement(const VectorType* VTy); }; /// FOR BACKWARDS COMPATIBILITY - Returns a global context. diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index e199758f01572..e9a0542bf10b4 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -18,8 +18,8 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/FindUsedTypes.h" #include "llvm/Analysis/IntervalPartition.h" -#include "llvm/Analysis/LoopVR.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/PointerTracking.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Assembly/PrintModulePass.h" @@ -50,6 +50,7 @@ namespace { (void) llvm::createStructRetPromotionPass(); (void) llvm::createBasicAliasAnalysisPass(); (void) llvm::createLibCallAliasAnalysisPass(0); + (void) llvm::createScalarEvolutionAliasAnalysisPass(); (void) llvm::createBlockPlacementPass(); (void) llvm::createBlockProfilerPass(); (void) llvm::createBreakCriticalEdgesPass(); @@ -62,13 +63,13 @@ namespace { (void) llvm::createDeadStoreEliminationPass(); (void) llvm::createDeadTypeEliminationPass(); (void) llvm::createEdgeProfilerPass(); + (void) llvm::createOptimalEdgeProfilerPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerPass(); (void) llvm::createFunctionProfilerPass(); (void) llvm::createGlobalDCEPass(); (void) llvm::createGlobalOptimizerPass(); (void) llvm::createGlobalsModRefPass(); - (void) llvm::createGVNPREPass(); (void) llvm::createIPConstantPropagationPass(); (void) llvm::createIPSCCPPass(); (void) llvm::createIndVarSimplifyPass(); @@ -91,6 +92,8 @@ namespace { (void) llvm::createLowerSwitchPass(); (void) llvm::createNoAAPass(); (void) llvm::createNoProfileInfoPass(); + (void) llvm::createProfileEstimatorPass(); + (void) llvm::createProfileVerifierPass(); (void) llvm::createProfileLoaderPass(); (void) llvm::createPromoteMemoryToRegisterPass(); (void) llvm::createDemoteRegisterToMemoryPass(); @@ -114,7 +117,7 @@ namespace { (void) llvm::createRSProfilingPass(); (void) llvm::createIndMemRemPass(); (void) llvm::createInstCountPass(); - (void) llvm::createPredicateSimplifierPass(); + (void) llvm::createCodeGenLICMPass(); (void) llvm::createCodeGenPreparePass(); (void) llvm::createGVNPass(); (void) llvm::createMemCpyOptPass(); @@ -130,11 +133,12 @@ namespace { (void) llvm::createDbgInfoPrinterPass(); (void) llvm::createPartialInliningPass(); (void) llvm::createSSIPass(); + (void) llvm::createSSIEverythingPass(); (void)new llvm::IntervalPartition(); (void)new llvm::FindUsedTypes(); (void)new llvm::ScalarEvolution(); - (void)new llvm::LoopVR(); + (void)new llvm::PointerTracking(); ((llvm::Function*)0)->viewCFGOnly(); llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0); X.add((llvm::Value*)0, 0); // for -print-alias-sets diff --git a/include/llvm/LinkAllVMCore.h b/include/llvm/LinkAllVMCore.h index e5a51971f164b..0ee18d57a04fa 100644 --- a/include/llvm/LinkAllVMCore.h +++ b/include/llvm/LinkAllVMCore.h @@ -46,7 +46,7 @@ namespace { if (std::getenv("bar") != (char*) -1) return; llvm::Module* M = new llvm::Module("", llvm::getGlobalContext()); - (void)new llvm::UnreachableInst(); + (void)new llvm::UnreachableInst(llvm::getGlobalContext()); (void) llvm::createVerifierPass(); (void) new llvm::Mangler(*M,""); } diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h index 2d0c2cd96b764..1e1da867113b9 100644 --- a/include/llvm/Linker.h +++ b/include/llvm/Linker.h @@ -14,11 +14,12 @@ #ifndef LLVM_LINKER_H #define LLVM_LINKER_H -#include "llvm/System/Path.h" #include #include +#include "llvm/ADT/StringRef.h" namespace llvm { + namespace sys { class Path; } class Module; class LLVMContext; @@ -32,7 +33,7 @@ class LLVMContext; /// The Linker can link Modules from memory, bitcode files, or bitcode /// archives. It retains a set of search paths in which to find any libraries /// presented to it. By default, the linker will generate error and warning -/// messages to std::cerr but this capability can be turned off with the +/// messages to stderr but this capability can be turned off with the /// QuietWarnings and QuietErrors flags. It can also be instructed to verbosely /// print out the linking actions it is taking with the Verbose flag. /// @brief The LLVM Linker. @@ -52,9 +53,9 @@ class Linker { /// This enumeration is used to control various optional features of the /// linker. enum ControlFlags { - Verbose = 1, ///< Print to std::cerr what steps the linker is taking - QuietWarnings = 2, ///< Don't print warnings to std::cerr. - QuietErrors = 4 ///< Don't print errors to std::cerr. + Verbose = 1, ///< Print to stderr what steps the linker is taking + QuietWarnings = 2, ///< Don't print warnings to stderr. + QuietErrors = 4 ///< Don't print errors to stderr. }; /// @} @@ -64,17 +65,16 @@ class Linker { /// Construct the Linker with an empty module which will be given the /// name \p progname. \p progname will also be used for error messages. /// @brief Construct with empty module - Linker( - const std::string& progname, ///< name of tool running linker - const std::string& modulename, ///< name of linker's end-result module - LLVMContext& C, ///< Context for global info - unsigned Flags = 0 ///< ControlFlags (one or more |'d together) + Linker(const StringRef &progname, ///< name of tool running linker + const StringRef &modulename, ///< name of linker's end-result module + LLVMContext &C, ///< Context for global info + unsigned Flags = 0 ///< ControlFlags (one or more |'d together) ); /// Construct the Linker with a previously defined module, \p aModule. Use /// \p progname for the name of the program in error messages. /// @brief Construct with existing module - Linker(const std::string& progname, Module* aModule, unsigned Flags = 0); + Linker(const StringRef& progname, Module* aModule, unsigned Flags = 0); /// Destruct the Linker. /// @brief Destructor @@ -114,9 +114,9 @@ class Linker { /// true, indicating an error occurred. At most one error is retained so /// this function always returns the last error that occurred. Note that if /// the Quiet control flag is not set, the error string will have already - /// been printed to std::cerr. + /// been printed to stderr. /// @brief Get the text of the last error that occurred. - const std::string& getLastError() const { return Error; } + const std::string &getLastError() const { return Error; } /// @} /// @name Mutators @@ -214,7 +214,7 @@ class Linker { /// @returns true if an error occurs, false otherwise /// @brief Link one library into the module bool LinkInLibrary ( - const std::string& Library, ///< The library to link in + const StringRef &Library, ///< The library to link in bool& is_native ///< Indicates if lib a native library ); @@ -267,7 +267,7 @@ class Linker { /// will be empty (i.e. sys::Path::isEmpty() will return true). /// @returns A sys::Path to the found library /// @brief Find a library from its short name. - sys::Path FindLib(const std::string &Filename); + sys::Path FindLib(const StringRef &Filename); /// @} /// @name Implementation @@ -277,9 +277,9 @@ class Linker { /// Module it contains (wrapped in an auto_ptr), or 0 if an error occurs. std::auto_ptr LoadObject(const sys::Path& FN); - bool warning(const std::string& message); - bool error(const std::string& message); - void verbose(const std::string& message); + bool warning(const StringRef &message); + bool error(const StringRef &message); + void verbose(const StringRef &message); /// @} /// @name Data diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h new file mode 100644 index 0000000000000..fb69630ff52ee --- /dev/null +++ b/include/llvm/MC/MCAsmInfo.h @@ -0,0 +1,472 @@ +//===-- llvm/MC/MCAsmInfo.h - Asm info --------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a class to be used as the basis for target specific +// asm writers. This class primarily takes care of global printing constants, +// which are used in very similar ways across all targets. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ASM_INFO_H +#define LLVM_TARGET_ASM_INFO_H + +#include + +namespace llvm { + /// MCAsmInfo - This class is intended to be used as a base class for asm + /// properties and features specific to the target. + namespace ExceptionHandling { enum ExceptionsType { None, Dwarf, SjLj }; } + + class MCAsmInfo { + protected: + //===------------------------------------------------------------------===// + // Properties to be set by the target writer, used to configure asm printer. + // + + /// ZeroFillDirective - Directive for emitting a global to the ZeroFill + /// section on this target. Null if this target doesn't support zerofill. + const char *ZeroFillDirective; // Default is null. + + /// NonexecutableStackDirective - Directive for declaring to the + /// linker and beyond that the emitted code does not require stack + /// memory to be executable. + const char *NonexecutableStackDirective; // Default is null. + + /// NeedsSet - True if target asm treats expressions in data directives + /// as linktime-relocatable. For assembly-time computation, we need to + /// use a .set. Thus: + /// .set w, x-y + /// .long w + /// is computed at assembly time, while + /// .long x-y + /// is relocated if the relative locations of x and y change at linktime. + /// We want both these things in different places. + bool NeedsSet; // Defaults to false. + + /// MaxInstLength - This is the maximum possible length of an instruction, + /// which is needed to compute the size of an inline asm. + unsigned MaxInstLength; // Defaults to 4. + + /// PCSymbol - The symbol used to represent the current PC. Used in PC + /// relative expressions. + const char *PCSymbol; // Defaults to "$". + + /// SeparatorChar - This character, if specified, is used to separate + /// instructions from each other when on the same line. This is used to + /// measure inline asm instructions. + char SeparatorChar; // Defaults to ';' + + /// CommentColumn - This indicates the comment num (zero-based) at + /// which asm comments should be printed. + unsigned CommentColumn; // Defaults to 60 + + /// CommentString - This indicates the comment character used by the + /// assembler. + const char *CommentString; // Defaults to "#" + + /// GlobalPrefix - If this is set to a non-empty string, it is prepended + /// onto all global symbols. This is often used for "_" or ".". + const char *GlobalPrefix; // Defaults to "" + + /// PrivateGlobalPrefix - This prefix is used for globals like constant + /// pool entries that are completely private to the .s file and should not + /// have names in the .o file. This is often "." or "L". + const char *PrivateGlobalPrefix; // Defaults to "." + + /// LinkerPrivateGlobalPrefix - This prefix is used for symbols that should + /// be passed through the assembler but be removed by the linker. This + /// is "l" on Darwin, currently used for some ObjC metadata. + const char *LinkerPrivateGlobalPrefix; // Defaults to "" + + /// InlineAsmStart/End - If these are nonempty, they contain a directive to + /// emit before and after an inline assembly statement. + const char *InlineAsmStart; // Defaults to "#APP\n" + const char *InlineAsmEnd; // Defaults to "#NO_APP\n" + + /// AssemblerDialect - Which dialect of an assembler variant to use. + unsigned AssemblerDialect; // Defaults to 0 + + /// AllowQuotesInName - This is true if the assembler allows for complex + /// symbol names to be surrounded in quotes. This defaults to false. + bool AllowQuotesInName; + + /// AllowNameToStartWithDigit - This is true if the assembler allows symbol + /// names to start with a digit (e.g., "0x0021"). This defaults to false. + bool AllowNameToStartWithDigit; + + //===--- Data Emission Directives -------------------------------------===// + + /// ZeroDirective - this should be set to the directive used to get some + /// number of zero bytes emitted to the current section. Common cases are + /// "\t.zero\t" and "\t.space\t". If this is set to null, the + /// Data*bitsDirective's will be used to emit zero bytes. + const char *ZeroDirective; // Defaults to "\t.zero\t" + const char *ZeroDirectiveSuffix; // Defaults to "" + + /// AsciiDirective - This directive allows emission of an ascii string with + /// the standard C escape characters embedded into it. + const char *AsciiDirective; // Defaults to "\t.ascii\t" + + /// AscizDirective - If not null, this allows for special handling of + /// zero terminated strings on this target. This is commonly supported as + /// ".asciz". If a target doesn't support this, it can be set to null. + const char *AscizDirective; // Defaults to "\t.asciz\t" + + /// DataDirectives - These directives are used to output some unit of + /// integer data to the current section. If a data directive is set to + /// null, smaller data directives will be used to emit the large sizes. + const char *Data8bitsDirective; // Defaults to "\t.byte\t" + const char *Data16bitsDirective; // Defaults to "\t.short\t" + const char *Data32bitsDirective; // Defaults to "\t.long\t" + const char *Data64bitsDirective; // Defaults to "\t.quad\t" + + /// getDataASDirective - Return the directive that should be used to emit + /// data of the specified size to the specified numeric address space. + virtual const char *getDataASDirective(unsigned Size, unsigned AS) const { + assert(AS != 0 && "Don't know the directives for default addr space"); + return 0; + } + + /// SunStyleELFSectionSwitchSyntax - This is true if this target uses "Sun + /// Style" syntax for section switching ("#alloc,#write" etc) instead of the + /// normal ELF syntax (,"a,w") in .section directives. + bool SunStyleELFSectionSwitchSyntax; // Defaults to false. + + /// UsesELFSectionDirectiveForBSS - This is true if this target uses ELF + /// '.section' directive before the '.bss' one. It's used for PPC/Linux + /// which doesn't support the '.bss' directive only. + bool UsesELFSectionDirectiveForBSS; // Defaults to false. + + //===--- Alignment Information ----------------------------------------===// + + /// AlignDirective - The directive used to emit round up to an alignment + /// boundary. + /// + const char *AlignDirective; // Defaults to "\t.align\t" + + /// AlignmentIsInBytes - If this is true (the default) then the asmprinter + /// emits ".align N" directives, where N is the number of bytes to align to. + /// Otherwise, it emits ".align log2(N)", e.g. 3 to align to an 8 byte + /// boundary. + bool AlignmentIsInBytes; // Defaults to true + + /// TextAlignFillValue - If non-zero, this is used to fill the executable + /// space created as the result of a alignment directive. + unsigned TextAlignFillValue; // Defaults to 0 + + //===--- Section Switching Directives ---------------------------------===// + + /// JumpTableDirective - if non-null, the directive to emit before jump + /// table entries. FIXME: REMOVE THIS. + const char *JumpTableDirective; // Defaults to NULL. + const char *PICJumpTableDirective; // Defaults to NULL. + + + //===--- Global Variable Emission Directives --------------------------===// + + /// GlobalDirective - This is the directive used to declare a global entity. + /// + const char *GlobalDirective; // Defaults to NULL. + + /// ExternDirective - This is the directive used to declare external + /// globals. + /// + const char *ExternDirective; // Defaults to NULL. + + /// SetDirective - This is the name of a directive that can be used to tell + /// the assembler to set the value of a variable to some expression. + const char *SetDirective; // Defaults to null. + + /// LCOMMDirective - This is the name of a directive (if supported) that can + /// be used to efficiently declare a local (internal) block of zero + /// initialized data in the .bss/.data section. The syntax expected is: + /// @verbatim SYMBOLNAME LENGTHINBYTES, ALIGNMENT + /// @endverbatim + const char *LCOMMDirective; // Defaults to null. + + const char *COMMDirective; // Defaults to "\t.comm\t". + + /// COMMDirectiveTakesAlignment - True if COMMDirective take a third + /// argument that specifies the alignment of the declaration. + bool COMMDirectiveTakesAlignment; // Defaults to true. + + /// HasDotTypeDotSizeDirective - True if the target has .type and .size + /// directives, this is true for most ELF targets. + bool HasDotTypeDotSizeDirective; // Defaults to true. + + /// HasSingleParameterDotFile - True if the target has a single parameter + /// .file directive, this is true for ELF targets. + bool HasSingleParameterDotFile; // Defaults to true. + + /// UsedDirective - This directive, if non-null, is used to declare a global + /// as being used somehow that the assembler can't see. This prevents dead + /// code elimination on some targets. + const char *UsedDirective; // Defaults to NULL. + + /// WeakRefDirective - This directive, if non-null, is used to declare a + /// global as being a weak undefined symbol. + const char *WeakRefDirective; // Defaults to NULL. + + /// WeakDefDirective - This directive, if non-null, is used to declare a + /// global as being a weak defined symbol. + const char *WeakDefDirective; // Defaults to NULL. + + /// HiddenDirective - This directive, if non-null, is used to declare a + /// global or function as having hidden visibility. + const char *HiddenDirective; // Defaults to "\t.hidden\t". + + /// ProtectedDirective - This directive, if non-null, is used to declare a + /// global or function as having protected visibility. + const char *ProtectedDirective; // Defaults to "\t.protected\t". + + //===--- Dwarf Emission Directives -----------------------------------===// + + /// AbsoluteDebugSectionOffsets - True if we should emit abolute section + /// offsets for debug information. + bool AbsoluteDebugSectionOffsets; // Defaults to false. + + /// AbsoluteEHSectionOffsets - True if we should emit abolute section + /// offsets for EH information. Defaults to false. + bool AbsoluteEHSectionOffsets; + + /// HasLEB128 - True if target asm supports leb128 directives. + bool HasLEB128; // Defaults to false. + + /// hasDotLocAndDotFile - True if target asm supports .loc and .file + /// directives for emitting debugging information. + bool HasDotLocAndDotFile; // Defaults to false. + + /// SupportsDebugInformation - True if target supports emission of debugging + /// information. + bool SupportsDebugInformation; // Defaults to false. + + /// SupportsExceptionHandling - True if target supports exception handling. + ExceptionHandling::ExceptionsType ExceptionsType; // Defaults to None + + /// RequiresFrameSection - true if the Dwarf2 output needs a frame section + bool DwarfRequiresFrameSection; // Defaults to true. + + /// DwarfUsesInlineInfoSection - True if DwarfDebugInlineSection is used to + /// encode inline subroutine information. + bool DwarfUsesInlineInfoSection; // Defaults to false. + + /// Is_EHSymbolPrivate - If set, the "_foo.eh" is made private so that it + /// doesn't show up in the symbol table of the object file. + bool Is_EHSymbolPrivate; // Defaults to true. + + /// GlobalEHDirective - This is the directive used to make exception frame + /// tables globally visible. + const char *GlobalEHDirective; // Defaults to NULL. + + /// SupportsWeakEmptyEHFrame - True if target assembler and linker will + /// handle a weak_definition of constant 0 for an omitted EH frame. + bool SupportsWeakOmittedEHFrame; // Defaults to true. + + /// DwarfSectionOffsetDirective - Special section offset directive. + const char* DwarfSectionOffsetDirective; // Defaults to NULL + + //===--- CBE Asm Translation Table -----------------------------------===// + + const char *const *AsmTransCBE; // Defaults to empty + + public: + explicit MCAsmInfo(); + virtual ~MCAsmInfo(); + + /// getSLEB128Size - Compute the number of bytes required for a signed + /// leb128 value. + static unsigned getSLEB128Size(int Value); + + /// getULEB128Size - Compute the number of bytes required for an unsigned + /// leb128 value. + static unsigned getULEB128Size(unsigned Value); + + // Data directive accessors. + // + const char *getData8bitsDirective(unsigned AS = 0) const { + return AS == 0 ? Data8bitsDirective : getDataASDirective(8, AS); + } + const char *getData16bitsDirective(unsigned AS = 0) const { + return AS == 0 ? Data16bitsDirective : getDataASDirective(16, AS); + } + const char *getData32bitsDirective(unsigned AS = 0) const { + return AS == 0 ? Data32bitsDirective : getDataASDirective(32, AS); + } + const char *getData64bitsDirective(unsigned AS = 0) const { + return AS == 0 ? Data64bitsDirective : getDataASDirective(64, AS); + } + + + bool usesSunStyleELFSectionSwitchSyntax() const { + return SunStyleELFSectionSwitchSyntax; + } + + bool usesELFSectionDirectiveForBSS() const { + return UsesELFSectionDirectiveForBSS; + } + + // Accessors. + // + const char *getZeroFillDirective() const { + return ZeroFillDirective; + } + const char *getNonexecutableStackDirective() const { + return NonexecutableStackDirective; + } + bool needsSet() const { + return NeedsSet; + } + unsigned getMaxInstLength() const { + return MaxInstLength; + } + const char *getPCSymbol() const { + return PCSymbol; + } + char getSeparatorChar() const { + return SeparatorChar; + } + unsigned getCommentColumn() const { + return CommentColumn; + } + const char *getCommentString() const { + return CommentString; + } + const char *getGlobalPrefix() const { + return GlobalPrefix; + } + const char *getPrivateGlobalPrefix() const { + return PrivateGlobalPrefix; + } + const char *getLinkerPrivateGlobalPrefix() const { + return LinkerPrivateGlobalPrefix; + } + const char *getInlineAsmStart() const { + return InlineAsmStart; + } + const char *getInlineAsmEnd() const { + return InlineAsmEnd; + } + unsigned getAssemblerDialect() const { + return AssemblerDialect; + } + bool doesAllowQuotesInName() const { + return AllowQuotesInName; + } + bool doesAllowNameToStartWithDigit() const { + return AllowNameToStartWithDigit; + } + const char *getZeroDirective() const { + return ZeroDirective; + } + const char *getZeroDirectiveSuffix() const { + return ZeroDirectiveSuffix; + } + const char *getAsciiDirective() const { + return AsciiDirective; + } + const char *getAscizDirective() const { + return AscizDirective; + } + const char *getJumpTableDirective(bool isPIC) const { + return isPIC ? PICJumpTableDirective : JumpTableDirective; + } + const char *getAlignDirective() const { + return AlignDirective; + } + bool getAlignmentIsInBytes() const { + return AlignmentIsInBytes; + } + unsigned getTextAlignFillValue() const { + return TextAlignFillValue; + } + const char *getGlobalDirective() const { + return GlobalDirective; + } + const char *getExternDirective() const { + return ExternDirective; + } + const char *getSetDirective() const { + return SetDirective; + } + const char *getLCOMMDirective() const { + return LCOMMDirective; + } + const char *getCOMMDirective() const { + return COMMDirective; + } + bool getCOMMDirectiveTakesAlignment() const { + return COMMDirectiveTakesAlignment; + } + bool hasDotTypeDotSizeDirective() const { + return HasDotTypeDotSizeDirective; + } + bool hasSingleParameterDotFile() const { + return HasSingleParameterDotFile; + } + const char *getUsedDirective() const { + return UsedDirective; + } + const char *getWeakRefDirective() const { + return WeakRefDirective; + } + const char *getWeakDefDirective() const { + return WeakDefDirective; + } + const char *getHiddenDirective() const { + return HiddenDirective; + } + const char *getProtectedDirective() const { + return ProtectedDirective; + } + bool isAbsoluteDebugSectionOffsets() const { + return AbsoluteDebugSectionOffsets; + } + bool isAbsoluteEHSectionOffsets() const { + return AbsoluteEHSectionOffsets; + } + bool hasLEB128() const { + return HasLEB128; + } + bool hasDotLocAndDotFile() const { + return HasDotLocAndDotFile; + } + bool doesSupportDebugInformation() const { + return SupportsDebugInformation; + } + bool doesSupportExceptionHandling() const { + return ExceptionsType != ExceptionHandling::None; + } + ExceptionHandling::ExceptionsType getExceptionHandlingType() const { + return ExceptionsType; + } + bool doesDwarfRequireFrameSection() const { + return DwarfRequiresFrameSection; + } + bool doesDwarfUsesInlineInfoSection() const { + return DwarfUsesInlineInfoSection; + } + bool is_EHSymbolPrivate() const { + return Is_EHSymbolPrivate; + } + const char *getGlobalEHDirective() const { + return GlobalEHDirective; + } + bool getSupportsWeakOmittedEHFrame() const { + return SupportsWeakOmittedEHFrame; + } + const char *getDwarfSectionOffsetDirective() const { + return DwarfSectionOffsetDirective; + } + const char *const *getAsmCBE() const { + return AsmTransCBE; + } + }; +} + +#endif diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h new file mode 100644 index 0000000000000..a3ee1593c3ac4 --- /dev/null +++ b/include/llvm/MC/MCAsmInfoCOFF.h @@ -0,0 +1,24 @@ +//===-- MCAsmInfoCOFF.h - COFF asm properties -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_COFF_TARGET_ASM_INFO_H +#define LLVM_COFF_TARGET_ASM_INFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class MCAsmInfoCOFF : public MCAsmInfo { + protected: + explicit MCAsmInfoCOFF(); + + }; +} + + +#endif // LLVM_COFF_TARGET_ASM_INFO_H diff --git a/include/llvm/MC/MCAsmInfoDarwin.h b/include/llvm/MC/MCAsmInfoDarwin.h new file mode 100644 index 0000000000000..c85aa3da9572e --- /dev/null +++ b/include/llvm/MC/MCAsmInfoDarwin.h @@ -0,0 +1,32 @@ +//===---- MCAsmInfoDarwin.h - Darwin asm properties -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on Darwin-based targets +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DARWIN_TARGET_ASM_INFO_H +#define LLVM_DARWIN_TARGET_ASM_INFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class GlobalValue; + class GlobalVariable; + class Type; + class Mangler; + + struct MCAsmInfoDarwin : public MCAsmInfo { + explicit MCAsmInfoDarwin(); + }; +} + + +#endif // LLVM_DARWIN_TARGET_ASM_INFO_H diff --git a/include/llvm/MC/MCAsmLexer.h b/include/llvm/MC/MCAsmLexer.h new file mode 100644 index 0000000000000..e66425abef6a2 --- /dev/null +++ b/include/llvm/MC/MCAsmLexer.h @@ -0,0 +1,141 @@ +//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCASMLEXER_H +#define LLVM_MC_MCASMLEXER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCAsmLexer; +class MCInst; +class SMLoc; +class Target; + +/// AsmToken - Target independent representation for an assembler token. +struct AsmToken { + enum TokenKind { + // Markers + Eof, Error, + + // String values. + Identifier, + String, + + // Integer values. + Integer, + + // No-value. + EndOfStatement, + Colon, + Plus, Minus, Tilde, + Slash, // '/' + LParen, RParen, LBrac, RBrac, LCurly, RCurly, + Star, Comma, Dollar, Equal, EqualEqual, + + Pipe, PipePipe, Caret, + Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, + Less, LessEqual, LessLess, LessGreater, + Greater, GreaterEqual, GreaterGreater + }; + + TokenKind Kind; + + /// A reference to the entire token contents; this is always a pointer into + /// a memory buffer owned by the source manager. + StringRef Str; + + int64_t IntVal; + +public: + AsmToken() {} + AsmToken(TokenKind _Kind, const StringRef &_Str, int64_t _IntVal = 0) + : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} + + TokenKind getKind() const { return Kind; } + bool is(TokenKind K) const { return Kind == K; } + bool isNot(TokenKind K) const { return Kind != K; } + + SMLoc getLoc() const; + + /// getStringContents - Get the contents of a string token (without quotes). + StringRef getStringContents() const { + assert(Kind == String && "This token isn't a string!"); + return Str.slice(1, Str.size() - 1); + } + + /// getIdentifier - Get the identifier string for the current token, which + /// should be an identifier or a string. This gets the portion of the string + /// which should be used as the identifier, e.g., it does not include the + /// quotes on strings. + StringRef getIdentifier() const { + if (Kind == Identifier) + return getString(); + return getStringContents(); + } + + /// getString - Get the string for the current token, this includes all + /// characters (for example, the quotes on strings) in the token. + /// + /// The returned StringRef points into the source manager's memory buffer, and + /// is safe to store across calls to Lex(). + StringRef getString() const { return Str; } + + // FIXME: Don't compute this in advance, it makes every token larger, and is + // also not generally what we want (it is nicer for recovery etc. to lex 123br + // as a single token, then diagnose as an invalid number). + int64_t getIntVal() const { + assert(Kind == Integer && "This token isn't an integer!"); + return IntVal; + } +}; + +/// MCAsmLexer - Generic assembler lexer interface, for use by target specific +/// assembly lexers. +class MCAsmLexer { + /// The current token, stored in the base class for faster access. + AsmToken CurTok; + + MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT + void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT +protected: // Can only create subclasses. + MCAsmLexer(); + + virtual AsmToken LexToken() = 0; + +public: + virtual ~MCAsmLexer(); + + /// Lex - Consume the next token from the input stream and return it. + /// + /// The lexer will continuosly return the end-of-file token once the end of + /// the main input file has been reached. + const AsmToken &Lex() { + return CurTok = LexToken(); + } + + /// getTok - Get the current (last) lexed token. + const AsmToken &getTok() { + return CurTok; + } + + /// getKind - Get the kind of current token. + AsmToken::TokenKind getKind() const { return CurTok.getKind(); } + + /// is - Check if the current token has kind \arg K. + bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } + + /// isNot - Check if the current token has kind \arg K. + bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/MC/MCAsmParser.h b/include/llvm/MC/MCAsmParser.h new file mode 100644 index 0000000000000..c1b5d133cdea9 --- /dev/null +++ b/include/llvm/MC/MCAsmParser.h @@ -0,0 +1,79 @@ +//===-- llvm/MC/MCAsmParser.h - Abstract Asm Parser Interface ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCASMPARSER_H +#define LLVM_MC_MCASMPARSER_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCAsmLexer; +class MCContext; +class MCExpr; +class MCStreamer; +class MCValue; +class SMLoc; +class Twine; + +/// MCAsmParser - Generic assembler parser interface, for use by target specific +/// assembly parsers. +class MCAsmParser { + MCAsmParser(const MCAsmParser &); // DO NOT IMPLEMENT + void operator=(const MCAsmParser &); // DO NOT IMPLEMENT +protected: // Can only create subclasses. + MCAsmParser(); + +public: + virtual ~MCAsmParser(); + + virtual MCAsmLexer &getLexer() = 0; + + virtual MCContext &getContext() = 0; + + /// getSteamer - Return the output streamer for the assembler. + virtual MCStreamer &getStreamer() = 0; + + /// Warning - Emit a warning at the location \arg L, with the message \arg + /// Msg. + virtual void Warning(SMLoc L, const Twine &Msg) = 0; + + /// Warning - Emit an error at the location \arg L, with the message \arg + /// Msg. + /// + /// \return The return value is always true, as an idiomatic convenience to + /// clients. + virtual bool Error(SMLoc L, const Twine &Msg) = 0; + + /// ParseExpression - Parse an arbitrary expression. + /// + /// @param Res - The value of the expression. The result is undefined + /// on error. + /// @result - False on success. + virtual bool ParseExpression(const MCExpr *&Res) = 0; + + /// ParseParenExpression - Parse an arbitrary expression, assuming that an + /// initial '(' has already been consumed. + /// + /// @param Res - The value of the expression. The result is undefined + /// on error. + /// @result - False on success. + virtual bool ParseParenExpression(const MCExpr *&Res) = 0; + + /// ParseAbsoluteExpression - Parse an expression which must evaluate to an + /// absolute value. + /// + /// @param Res - The value of the absolute expression. The result is undefined + /// on error. + /// @result - False on success. + virtual bool ParseAbsoluteExpression(int64_t &Res) = 0; +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h new file mode 100644 index 0000000000000..892f54839db38 --- /dev/null +++ b/include/llvm/MC/MCAssembler.h @@ -0,0 +1,661 @@ +//===- MCAssembler.h - Object File Generation -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCASSEMBLER_H +#define LLVM_MC_MCASSEMBLER_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/DataTypes.h" +#include // FIXME: Shouldn't be needed. + +namespace llvm { +class raw_ostream; +class MCAssembler; +class MCContext; +class MCSection; +class MCSectionData; + +class MCFragment : public ilist_node { + MCFragment(const MCFragment&); // DO NOT IMPLEMENT + void operator=(const MCFragment&); // DO NOT IMPLEMENT + +public: + enum FragmentType { + FT_Data, + FT_Align, + FT_Fill, + FT_Org, + FT_ZeroFill + }; + +private: + FragmentType Kind; + + /// Parent - The data for the section this fragment is in. + MCSectionData *Parent; + + /// @name Assembler Backend Data + /// @{ + // + // FIXME: This could all be kept private to the assembler implementation. + + /// Offset - The offset of this fragment in its section. This is ~0 until + /// initialized. + uint64_t Offset; + + /// FileSize - The file size of this section. This is ~0 until initialized. + uint64_t FileSize; + + /// @} + +protected: + MCFragment(FragmentType _Kind, MCSectionData *_Parent = 0); + +public: + // Only for sentinel. + MCFragment(); + virtual ~MCFragment(); + + FragmentType getKind() const { return Kind; } + + MCSectionData *getParent() const { return Parent; } + void setParent(MCSectionData *Value) { Parent = Value; } + + // FIXME: This should be abstract, fix sentinel. + virtual uint64_t getMaxFileSize() const { + assert(0 && "Invalid getMaxFileSize call!"); + return 0; + }; + + /// @name Assembler Backend Support + /// @{ + // + // FIXME: This could all be kept private to the assembler implementation. + + uint64_t getAddress() const; + + uint64_t getFileSize() const { + assert(FileSize != ~UINT64_C(0) && "File size not set!"); + return FileSize; + } + void setFileSize(uint64_t Value) { + assert(Value <= getMaxFileSize() && "Invalid file size!"); + FileSize = Value; + } + + uint64_t getOffset() const { + assert(Offset != ~UINT64_C(0) && "File offset not set!"); + return Offset; + } + void setOffset(uint64_t Value) { Offset = Value; } + + /// @} + + static bool classof(const MCFragment *O) { return true; } +}; + +class MCDataFragment : public MCFragment { + SmallString<32> Contents; + +public: + MCDataFragment(MCSectionData *SD = 0) : MCFragment(FT_Data, SD) {} + + /// @name Accessors + /// @{ + + uint64_t getMaxFileSize() const { + return Contents.size(); + } + + SmallString<32> &getContents() { return Contents; } + const SmallString<32> &getContents() const { return Contents; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Data; + } + static bool classof(const MCDataFragment *) { return true; } +}; + +class MCAlignFragment : public MCFragment { + /// Alignment - The alignment to ensure, in bytes. + unsigned Alignment; + + /// Value - Value to use for filling padding bytes. + int64_t Value; + + /// ValueSize - The size of the integer (in bytes) of \arg Value. + unsigned ValueSize; + + /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment + /// cannot be satisfied in this width then this fragment is ignored. + unsigned MaxBytesToEmit; + +public: + MCAlignFragment(unsigned _Alignment, int64_t _Value, unsigned _ValueSize, + unsigned _MaxBytesToEmit, MCSectionData *SD = 0) + : MCFragment(FT_Align, SD), Alignment(_Alignment), + Value(_Value),ValueSize(_ValueSize), + MaxBytesToEmit(_MaxBytesToEmit) {} + + /// @name Accessors + /// @{ + + uint64_t getMaxFileSize() const { + return std::max(Alignment - 1, MaxBytesToEmit); + } + + unsigned getAlignment() const { return Alignment; } + + int64_t getValue() const { return Value; } + + unsigned getValueSize() const { return ValueSize; } + + unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Align; + } + static bool classof(const MCAlignFragment *) { return true; } +}; + +class MCFillFragment : public MCFragment { + /// Value - Value to use for filling bytes. + MCValue Value; + + /// ValueSize - The size (in bytes) of \arg Value to use when filling. + unsigned ValueSize; + + /// Count - The number of copies of \arg Value to insert. + uint64_t Count; + +public: + MCFillFragment(MCValue _Value, unsigned _ValueSize, uint64_t _Count, + MCSectionData *SD = 0) + : MCFragment(FT_Fill, SD), + Value(_Value), ValueSize(_ValueSize), Count(_Count) {} + + /// @name Accessors + /// @{ + + uint64_t getMaxFileSize() const { + return ValueSize * Count; + } + + MCValue getValue() const { return Value; } + + unsigned getValueSize() const { return ValueSize; } + + uint64_t getCount() const { return Count; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Fill; + } + static bool classof(const MCFillFragment *) { return true; } +}; + +class MCOrgFragment : public MCFragment { + /// Offset - The offset this fragment should start at. + MCValue Offset; + + /// Value - Value to use for filling bytes. + int8_t Value; + +public: + MCOrgFragment(MCValue _Offset, int8_t _Value, MCSectionData *SD = 0) + : MCFragment(FT_Org, SD), + Offset(_Offset), Value(_Value) {} + + /// @name Accessors + /// @{ + + uint64_t getMaxFileSize() const { + // FIXME: This doesn't make much sense. + return ~UINT64_C(0); + } + + MCValue getOffset() const { return Offset; } + + uint8_t getValue() const { return Value; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Org; + } + static bool classof(const MCOrgFragment *) { return true; } +}; + +/// MCZeroFillFragment - Represent data which has a fixed size and alignment, +/// but requires no physical space in the object file. +class MCZeroFillFragment : public MCFragment { + /// Size - The size of this fragment. + uint64_t Size; + + /// Alignment - The alignment for this fragment. + unsigned Alignment; + +public: + MCZeroFillFragment(uint64_t _Size, unsigned _Alignment, MCSectionData *SD = 0) + : MCFragment(FT_ZeroFill, SD), + Size(_Size), Alignment(_Alignment) {} + + /// @name Accessors + /// @{ + + uint64_t getMaxFileSize() const { + // FIXME: This also doesn't make much sense, this method is misnamed. + return ~UINT64_C(0); + } + + uint64_t getSize() const { return Size; } + + unsigned getAlignment() const { return Alignment; } + + /// @} + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_ZeroFill; + } + static bool classof(const MCZeroFillFragment *) { return true; } +}; + +// FIXME: Should this be a separate class, or just merged into MCSection? Since +// we anticipate the fast path being through an MCAssembler, the only reason to +// keep it out is for API abstraction. +class MCSectionData : public ilist_node { + MCSectionData(const MCSectionData&); // DO NOT IMPLEMENT + void operator=(const MCSectionData&); // DO NOT IMPLEMENT + +public: + /// Fixup - Represent a fixed size region of bytes inside some fragment which + /// needs to be rewritten. This region will either be rewritten by the + /// assembler or cause a relocation entry to be generated. + struct Fixup { + /// Fragment - The fragment containing the fixup. + MCFragment *Fragment; + + /// Offset - The offset inside the fragment which needs to be rewritten. + uint64_t Offset; + + /// Value - The expression to eventually write into the fragment. + // + // FIXME: We could probably get away with requiring the client to pass in an + // owned reference whose lifetime extends past that of the fixup. + MCValue Value; + + /// Size - The fixup size. + unsigned Size; + + /// FixedValue - The value to replace the fix up by. + // + // FIXME: This should not be here. + uint64_t FixedValue; + + public: + Fixup(MCFragment &_Fragment, uint64_t _Offset, const MCValue &_Value, + unsigned _Size) + : Fragment(&_Fragment), Offset(_Offset), Value(_Value), Size(_Size), + FixedValue(0) {} + }; + + typedef iplist FragmentListType; + + typedef FragmentListType::const_iterator const_iterator; + typedef FragmentListType::iterator iterator; + + typedef std::vector::const_iterator const_fixup_iterator; + typedef std::vector::iterator fixup_iterator; + +private: + iplist Fragments; + const MCSection *Section; + + /// Alignment - The maximum alignment seen in this section. + unsigned Alignment; + + /// @name Assembler Backend Data + /// @{ + // + // FIXME: This could all be kept private to the assembler implementation. + + /// Address - The computed address of this section. This is ~0 until + /// initialized. + uint64_t Address; + + /// Size - The content size of this section. This is ~0 until initialized. + uint64_t Size; + + /// FileSize - The size of this section in the object file. This is ~0 until + /// initialized. + uint64_t FileSize; + + /// LastFixupLookup - Cache for the last looked up fixup. + mutable unsigned LastFixupLookup; + + /// Fixups - The list of fixups in this section. + std::vector Fixups; + + /// @} + +public: + // Only for use as sentinel. + MCSectionData(); + MCSectionData(const MCSection &Section, MCAssembler *A = 0); + + const MCSection &getSection() const { return *Section; } + + unsigned getAlignment() const { return Alignment; } + void setAlignment(unsigned Value) { Alignment = Value; } + + /// @name Fragment Access + /// @{ + + const FragmentListType &getFragmentList() const { return Fragments; } + FragmentListType &getFragmentList() { return Fragments; } + + iterator begin() { return Fragments.begin(); } + const_iterator begin() const { return Fragments.begin(); } + + iterator end() { return Fragments.end(); } + const_iterator end() const { return Fragments.end(); } + + size_t size() const { return Fragments.size(); } + + bool empty() const { return Fragments.empty(); } + + /// @} + /// @name Fixup Access + /// @{ + + std::vector &getFixups() { + return Fixups; + } + + fixup_iterator fixup_begin() { + return Fixups.begin(); + } + + fixup_iterator fixup_end() { + return Fixups.end(); + } + + size_t fixup_size() const { return Fixups.size(); } + + /// @} + /// @name Assembler Backend Support + /// @{ + // + // FIXME: This could all be kept private to the assembler implementation. + + /// LookupFixup - Look up the fixup for the given \arg Fragment and \arg + /// Offset. + /// + /// If multiple fixups exist for the same fragment and offset it is undefined + /// which one is returned. + // + // FIXME: This isn't horribly slow in practice, but there are much nicer + // solutions to applying the fixups. + const Fixup *LookupFixup(const MCFragment *Fragment, uint64_t Offset) const; + + uint64_t getAddress() const { + assert(Address != ~UINT64_C(0) && "Address not set!"); + return Address; + } + void setAddress(uint64_t Value) { Address = Value; } + + uint64_t getSize() const { + assert(Size != ~UINT64_C(0) && "File size not set!"); + return Size; + } + void setSize(uint64_t Value) { Size = Value; } + + uint64_t getFileSize() const { + assert(FileSize != ~UINT64_C(0) && "File size not set!"); + return FileSize; + } + void setFileSize(uint64_t Value) { FileSize = Value; } + + /// @} +}; + +// FIXME: Same concerns as with SectionData. +class MCSymbolData : public ilist_node { +public: + const MCSymbol *Symbol; + + /// Fragment - The fragment this symbol's value is relative to, if any. + MCFragment *Fragment; + + /// Offset - The offset to apply to the fragment address to form this symbol's + /// value. + uint64_t Offset; + + /// IsExternal - True if this symbol is visible outside this translation + /// unit. + unsigned IsExternal : 1; + + /// IsPrivateExtern - True if this symbol is private extern. + unsigned IsPrivateExtern : 1; + + /// CommonSize - The size of the symbol, if it is 'common', or 0. + // + // FIXME: Pack this in with other fields? We could put it in offset, since a + // common symbol can never get a definition. + uint64_t CommonSize; + + /// CommonAlign - The alignment of the symbol, if it is 'common'. + // + // FIXME: Pack this in with other fields? + unsigned CommonAlign; + + /// Flags - The Flags field is used by object file implementations to store + /// additional per symbol information which is not easily classified. + uint32_t Flags; + + /// Index - Index field, for use by the object file implementation. + uint64_t Index; + +public: + // Only for use as sentinel. + MCSymbolData(); + MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, uint64_t _Offset, + MCAssembler *A = 0); + + /// @name Accessors + /// @{ + + const MCSymbol &getSymbol() const { return *Symbol; } + + MCFragment *getFragment() const { return Fragment; } + void setFragment(MCFragment *Value) { Fragment = Value; } + + uint64_t getOffset() const { return Offset; } + void setOffset(uint64_t Value) { Offset = Value; } + + /// @} + /// @name Symbol Attributes + /// @{ + + bool isExternal() const { return IsExternal; } + void setExternal(bool Value) { IsExternal = Value; } + + bool isPrivateExtern() const { return IsPrivateExtern; } + void setPrivateExtern(bool Value) { IsPrivateExtern = Value; } + + /// isCommon - Is this a 'common' symbol. + bool isCommon() const { return CommonSize != 0; } + + /// setCommon - Mark this symbol as being 'common'. + /// + /// \param Size - The size of the symbol. + /// \param Align - The alignment of the symbol. + void setCommon(uint64_t Size, unsigned Align) { + CommonSize = Size; + CommonAlign = Align; + } + + /// getCommonSize - Return the size of a 'common' symbol. + uint64_t getCommonSize() const { + assert(isCommon() && "Not a 'common' symbol!"); + return CommonSize; + } + + /// getCommonAlignment - Return the alignment of a 'common' symbol. + unsigned getCommonAlignment() const { + assert(isCommon() && "Not a 'common' symbol!"); + return CommonAlign; + } + + /// getFlags - Get the (implementation defined) symbol flags. + uint32_t getFlags() const { return Flags; } + + /// setFlags - Set the (implementation defined) symbol flags. + void setFlags(uint32_t Value) { Flags = Value; } + + /// getIndex - Get the (implementation defined) index. + uint64_t getIndex() const { return Index; } + + /// setIndex - Set the (implementation defined) index. + void setIndex(uint64_t Value) { Index = Value; } + + /// @} +}; + +// FIXME: This really doesn't belong here. See comments below. +struct IndirectSymbolData { + MCSymbol *Symbol; + MCSectionData *SectionData; +}; + +class MCAssembler { +public: + typedef iplist SectionDataListType; + typedef iplist SymbolDataListType; + + typedef SectionDataListType::const_iterator const_iterator; + typedef SectionDataListType::iterator iterator; + + typedef SymbolDataListType::const_iterator const_symbol_iterator; + typedef SymbolDataListType::iterator symbol_iterator; + + typedef std::vector::iterator indirect_symbol_iterator; + +private: + MCAssembler(const MCAssembler&); // DO NOT IMPLEMENT + void operator=(const MCAssembler&); // DO NOT IMPLEMENT + + MCContext &Context; + + raw_ostream &OS; + + iplist Sections; + + iplist Symbols; + + std::vector IndirectSymbols; + + unsigned SubsectionsViaSymbols : 1; + +private: + /// LayoutSection - Assign offsets and sizes to the fragments in the section + /// \arg SD, and update the section size. The section file offset should + /// already have been computed. + void LayoutSection(MCSectionData &SD); + +public: + /// Construct a new assembler instance. + /// + /// \arg OS - The stream to output to. + // + // FIXME: How are we going to parameterize this? Two obvious options are stay + // concrete and require clients to pass in a target like object. The other + // option is to make this abstract, and have targets provide concrete + // implementations as we do with AsmParser. + MCAssembler(MCContext &_Context, raw_ostream &OS); + ~MCAssembler(); + + MCContext &getContext() const { return Context; } + + /// Finish - Do final processing and write the object to the output stream. + void Finish(); + + // FIXME: This does not belong here. + bool getSubsectionsViaSymbols() const { + return SubsectionsViaSymbols; + } + void setSubsectionsViaSymbols(bool Value) { + SubsectionsViaSymbols = Value; + } + + /// @name Section List Access + /// @{ + + const SectionDataListType &getSectionList() const { return Sections; } + SectionDataListType &getSectionList() { return Sections; } + + iterator begin() { return Sections.begin(); } + const_iterator begin() const { return Sections.begin(); } + + iterator end() { return Sections.end(); } + const_iterator end() const { return Sections.end(); } + + size_t size() const { return Sections.size(); } + + /// @} + /// @name Symbol List Access + /// @{ + + const SymbolDataListType &getSymbolList() const { return Symbols; } + SymbolDataListType &getSymbolList() { return Symbols; } + + symbol_iterator symbol_begin() { return Symbols.begin(); } + const_symbol_iterator symbol_begin() const { return Symbols.begin(); } + + symbol_iterator symbol_end() { return Symbols.end(); } + const_symbol_iterator symbol_end() const { return Symbols.end(); } + + size_t symbol_size() const { return Symbols.size(); } + + /// @} + /// @name Indirect Symbol List Access + /// @{ + + // FIXME: This is a total hack, this should not be here. Once things are + // factored so that the streamer has direct access to the .o writer, it can + // disappear. + std::vector &getIndirectSymbols() { + return IndirectSymbols; + } + + indirect_symbol_iterator indirect_symbol_begin() { + return IndirectSymbols.begin(); + } + + indirect_symbol_iterator indirect_symbol_end() { + return IndirectSymbols.end(); + } + + size_t indirect_symbol_size() const { return IndirectSymbols.size(); } + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h new file mode 100644 index 0000000000000..ad42dc2e5b46e --- /dev/null +++ b/include/llvm/MC/MCCodeEmitter.h @@ -0,0 +1,34 @@ +//===-- llvm/MC/MCCodeEmitter.h - Instruction Encoding ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCCODEEMITTER_H +#define LLVM_MC_MCCODEEMITTER_H + +namespace llvm { +class MCInst; +class raw_ostream; + +/// MCCodeEmitter - Generic instruction encoding interface. +class MCCodeEmitter { + MCCodeEmitter(const MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const MCCodeEmitter &); // DO NOT IMPLEMENT +protected: // Can only create subclasses. + MCCodeEmitter(); + +public: + virtual ~MCCodeEmitter(); + + /// EncodeInstruction - Encode the given \arg Inst to bytes on the output + /// stream \arg OS. + virtual void EncodeInstruction(const MCInst &Inst, raw_ostream &OS) const = 0; +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 846e195139de1..955aa8b083888 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -18,8 +18,11 @@ namespace llvm { class MCValue; class MCSection; class MCSymbol; + class StringRef; - /// MCContext - Context object for machine code objects. + /// MCContext - Context object for machine code objects. This class owns all + /// of the sections that it creates. + /// class MCContext { MCContext(const MCContext&); // DO NOT IMPLEMENT MCContext &operator=(const MCContext&); // DO NOT IMPLEMENT @@ -33,32 +36,33 @@ namespace llvm { /// SymbolValues - Bindings of symbols to values. // // FIXME: Is there a good reason to not just put this in the MCSymbol? - DenseMap SymbolValues; + DenseMap SymbolValues; /// Allocator - Allocator object used for creating machine code objects. /// /// We use a bump pointer allocator to avoid the need to track all allocated /// objects. BumpPtrAllocator Allocator; - public: MCContext(); ~MCContext(); - /// GetSection - Get or create a new section with the given @param Name. - MCSection *GetSection(const char *Name); - + /// @name Symbol Managment + /// @{ + /// CreateSymbol - Create a new symbol with the specified @param Name. /// /// @param Name - The symbol name, which must be unique across all symbols. - MCSymbol *CreateSymbol(const char *Name); + MCSymbol *CreateSymbol(const StringRef &Name); /// GetOrCreateSymbol - Lookup the symbol inside with the specified /// @param Name. If it exists, return it. If not, create a forward /// reference and return it. /// /// @param Name - The symbol name, which must be unique across all symbols. - MCSymbol *GetOrCreateSymbol(const char *Name); + /// @param IsTemporary - Whether this symbol is an assembler temporary, + /// which should not survive into the symbol table for the translation unit. + MCSymbol *GetOrCreateSymbol(const StringRef &Name); /// CreateTemporarySymbol - Create a new temporary symbol with the specified /// @param Name. @@ -66,22 +70,26 @@ namespace llvm { /// @param Name - The symbol name, for debugging purposes only, temporary /// symbols do not surive assembly. If non-empty the name must be unique /// across all symbols. - MCSymbol *CreateTemporarySymbol(const char *Name = ""); + MCSymbol *CreateTemporarySymbol(const StringRef &Name = ""); /// LookupSymbol - Get the symbol for @param Name, or null. - MCSymbol *LookupSymbol(const char *Name) const; + MCSymbol *LookupSymbol(const StringRef &Name) const; - /// ClearSymbolValue - Erase a value binding for @param Symbol, if one - /// exists. - void ClearSymbolValue(MCSymbol *Symbol); + /// @} + /// @name Symbol Value Table + /// @{ - /// SetSymbolValue - Set the value binding for @param Symbol to @param - /// Value. - void SetSymbolValue(MCSymbol *Symbol, const MCValue &Value); + /// ClearSymbolValue - Erase a value binding for @arg Symbol, if one exists. + void ClearSymbolValue(const MCSymbol *Symbol); - /// GetSymbolValue - Return the current value for @param Symbol, or null if + /// SetSymbolValue - Set the value binding for @arg Symbol to @arg Value. + void SetSymbolValue(const MCSymbol *Symbol, const MCValue &Value); + + /// GetSymbolValue - Return the current value for @arg Symbol, or null if /// none exists. - const MCValue *GetSymbolValue(MCSymbol *Symbol) const; + const MCValue *GetSymbolValue(const MCSymbol *Symbol) const; + + /// @} void *Allocate(unsigned Size, unsigned Align = 8) { return Allocator.Allocate(Size, Align); diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h new file mode 100644 index 0000000000000..ef10b80359361 --- /dev/null +++ b/include/llvm/MC/MCDisassembler.h @@ -0,0 +1,50 @@ +//===-- llvm/MC/MCDisassembler.h - Disassembler interface -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef MCDISASSEMBLER_H +#define MCDISASSEMBLER_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MCInst; +class MemoryObject; +class raw_ostream; + +/// MCDisassembler - Superclass for all disassemblers. Consumes a memory region +/// and provides an array of assembly instructions. +class MCDisassembler { +public: + /// Constructor - Performs initial setup for the disassembler. + MCDisassembler() {} + + virtual ~MCDisassembler(); + + /// getInstruction - Returns the disassembly of a single instruction. + /// + /// @param instr - An MCInst to populate with the contents of the + /// instruction. + /// @param size - A value to populate with the size of the instruction, or + /// the number of bytes consumed while attempting to decode + /// an invalid instruction. + /// @param region - The memory object to use as a source for machine code. + /// @param address - The address, in the memory space of region, of the first + /// byte of the instruction. + /// @param vStream - The stream to print warnings and diagnostic messages on. + /// @return - True if the instruction is valid; false otherwise. + virtual bool getInstruction(MCInst& instr, + uint64_t& size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream) const = 0; +}; + +} // namespace llvm + +#endif diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h new file mode 100644 index 0000000000000..19a32e7addedf --- /dev/null +++ b/include/llvm/MC/MCExpr.h @@ -0,0 +1,328 @@ +//===- MCExpr.h - Assembly Level Expressions --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCEXPR_H +#define LLVM_MC_MCEXPR_H + +#include "llvm/Support/Casting.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { +class MCAsmInfo; +class MCContext; +class MCSymbol; +class MCValue; +class raw_ostream; +class StringRef; + +/// MCExpr - Base class for the full range of assembler expressions which are +/// needed for parsing. +class MCExpr { +public: + enum ExprKind { + Binary, ///< Binary expressions. + Constant, ///< Constant expressions. + SymbolRef, ///< References to labels and assigned expressions. + Unary ///< Unary expressions. + }; + +private: + ExprKind Kind; + + MCExpr(const MCExpr&); // DO NOT IMPLEMENT + void operator=(const MCExpr&); // DO NOT IMPLEMENT + +protected: + MCExpr(ExprKind _Kind) : Kind(_Kind) {} + +public: + /// @name Accessors + /// @{ + + ExprKind getKind() const { return Kind; } + + /// @} + /// @name Utility Methods + /// @{ + + void print(raw_ostream &OS, const MCAsmInfo *MAI) const; + void dump() const; + + /// @} + /// @name Expression Evaluation + /// @{ + + /// EvaluateAsAbsolute - Try to evaluate the expression to an absolute value. + /// + /// @param Res - The absolute value, if evaluation succeeds. + /// @result - True on success. + bool EvaluateAsAbsolute(MCContext &Ctx, int64_t &Res) const; + + /// EvaluateAsRelocatable - Try to evaluate the expression to a relocatable + /// value, i.e. an expression of the fixed form (a - b + constant). + /// + /// @param Res - The relocatable value, if evaluation succeeds. + /// @result - True on success. + bool EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const; + + /// @} + + static bool classof(const MCExpr *) { return true; } +}; + +//// MCConstantExpr - Represent a constant integer expression. +class MCConstantExpr : public MCExpr { + int64_t Value; + + MCConstantExpr(int64_t _Value) + : MCExpr(MCExpr::Constant), Value(_Value) {} + +public: + /// @name Construction + /// @{ + + static const MCConstantExpr *Create(int64_t Value, MCContext &Ctx); + + /// @} + /// @name Accessors + /// @{ + + int64_t getValue() const { return Value; } + + /// @} + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Constant; + } + static bool classof(const MCConstantExpr *) { return true; } +}; + +/// MCSymbolRefExpr - Represent a reference to a symbol from inside an +/// expression. +/// +/// A symbol reference in an expression may be a use of a label, a use of an +/// assembler variable (defined constant), or constitute an implicit definition +/// of the symbol as external. +class MCSymbolRefExpr : public MCExpr { + const MCSymbol *Symbol; + + MCSymbolRefExpr(const MCSymbol *_Symbol) + : MCExpr(MCExpr::SymbolRef), Symbol(_Symbol) {} + +public: + /// @name Construction + /// @{ + + static const MCSymbolRefExpr *Create(const MCSymbol *Symbol, MCContext &Ctx); + static const MCSymbolRefExpr *Create(const StringRef &Name, MCContext &Ctx); + + + + /// @} + /// @name Accessors + /// @{ + + const MCSymbol &getSymbol() const { return *Symbol; } + + /// @} + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::SymbolRef; + } + static bool classof(const MCSymbolRefExpr *) { return true; } +}; + +/// MCUnaryExpr - Unary assembler expressions. +class MCUnaryExpr : public MCExpr { +public: + enum Opcode { + LNot, ///< Logical negation. + Minus, ///< Unary minus. + Not, ///< Bitwise negation. + Plus ///< Unary plus. + }; + +private: + Opcode Op; + const MCExpr *Expr; + + MCUnaryExpr(Opcode _Op, const MCExpr *_Expr) + : MCExpr(MCExpr::Unary), Op(_Op), Expr(_Expr) {} + +public: + /// @name Construction + /// @{ + + static const MCUnaryExpr *Create(Opcode Op, const MCExpr *Expr, + MCContext &Ctx); + static const MCUnaryExpr *CreateLNot(const MCExpr *Expr, MCContext &Ctx) { + return Create(LNot, Expr, Ctx); + } + static const MCUnaryExpr *CreateMinus(const MCExpr *Expr, MCContext &Ctx) { + return Create(Minus, Expr, Ctx); + } + static const MCUnaryExpr *CreateNot(const MCExpr *Expr, MCContext &Ctx) { + return Create(Not, Expr, Ctx); + } + static const MCUnaryExpr *CreatePlus(const MCExpr *Expr, MCContext &Ctx) { + return Create(Plus, Expr, Ctx); + } + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this unary expression. + Opcode getOpcode() const { return Op; } + + /// getSubExpr - Get the child of this unary expression. + const MCExpr *getSubExpr() const { return Expr; } + + /// @} + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Unary; + } + static bool classof(const MCUnaryExpr *) { return true; } +}; + +/// MCBinaryExpr - Binary assembler expressions. +class MCBinaryExpr : public MCExpr { +public: + enum Opcode { + Add, ///< Addition. + And, ///< Bitwise and. + Div, ///< Division. + EQ, ///< Equality comparison. + GT, ///< Greater than comparison. + GTE, ///< Greater than or equal comparison. + LAnd, ///< Logical and. + LOr, ///< Logical or. + LT, ///< Less than comparison. + LTE, ///< Less than or equal comparison. + Mod, ///< Modulus. + Mul, ///< Multiplication. + NE, ///< Inequality comparison. + Or, ///< Bitwise or. + Shl, ///< Bitwise shift left. + Shr, ///< Bitwise shift right. + Sub, ///< Subtraction. + Xor ///< Bitwise exclusive or. + }; + +private: + Opcode Op; + const MCExpr *LHS, *RHS; + + MCBinaryExpr(Opcode _Op, const MCExpr *_LHS, const MCExpr *_RHS) + : MCExpr(MCExpr::Binary), Op(_Op), LHS(_LHS), RHS(_RHS) {} + +public: + /// @name Construction + /// @{ + + static const MCBinaryExpr *Create(Opcode Op, const MCExpr *LHS, + const MCExpr *RHS, MCContext &Ctx); + static const MCBinaryExpr *CreateAdd(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Add, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateAnd(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(And, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateDiv(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Div, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateEQ(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(EQ, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateGT(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(GT, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateGTE(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(GTE, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateLAnd(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(LAnd, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateLOr(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(LOr, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateLT(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(LT, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateLTE(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(LTE, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateMod(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Mod, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateMul(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Mul, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateNE(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(NE, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateOr(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Or, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateShl(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Shl, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateShr(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Shr, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateSub(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Sub, LHS, RHS, Ctx); + } + static const MCBinaryExpr *CreateXor(const MCExpr *LHS, const MCExpr *RHS, + MCContext &Ctx) { + return Create(Xor, LHS, RHS, Ctx); + } + + /// @} + /// @name Accessors + /// @{ + + /// getOpcode - Get the kind of this binary expression. + Opcode getOpcode() const { return Op; } + + /// getLHS - Get the left-hand side expression of the binary operator. + const MCExpr *getLHS() const { return LHS; } + + /// getRHS - Get the right-hand side expression of the binary operator. + const MCExpr *getRHS() const { return RHS; } + + /// @} + + static bool classof(const MCExpr *E) { + return E->getKind() == MCExpr::Binary; + } + static bool classof(const MCBinaryExpr *) { return true; } +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h index 8b638d4c743ee..0fc4d186b975c 100644 --- a/include/llvm/MC/MCInst.h +++ b/include/llvm/MC/MCInst.h @@ -16,12 +16,13 @@ #ifndef LLVM_MC_MCINST_H #define LLVM_MC_MCINST_H -#include "llvm/MC/MCValue.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/DebugLoc.h" namespace llvm { +class raw_ostream; +class MCAsmInfo; +class MCExpr; /// MCOperand - Instances of this class represent operands of the MCInst class. /// This is a simple discriminated union. @@ -30,19 +31,14 @@ class MCOperand { kInvalid, ///< Uninitialized. kRegister, ///< Register operand. kImmediate, ///< Immediate operand. - kMBBLabel, ///< Basic block label. - kMCValue ///< Relocatable immediate operand. + kExpr ///< Relocatable immediate operand. }; unsigned char Kind; union { unsigned RegVal; int64_t ImmVal; - MCValue MCValueVal; - struct { - unsigned FunctionNo; - unsigned BlockNo; - } MBBLabel; + const MCExpr *ExprVal; }; public: @@ -52,8 +48,7 @@ public: bool isValid() const { return Kind != kInvalid; } bool isReg() const { return Kind == kRegister; } bool isImm() const { return Kind == kImmediate; } - bool isMBBLabel() const { return Kind == kMBBLabel; } - bool isMCValue() const { return Kind == kMCValue; } + bool isExpr() const { return Kind == kExpr; } /// getReg - Returns the register number. unsigned getReg() const { @@ -76,41 +71,36 @@ public: ImmVal = Val; } - unsigned getMBBLabelFunction() const { - assert(isMBBLabel() && "Wrong accessor"); - return MBBLabel.FunctionNo; + const MCExpr *getExpr() const { + assert(isExpr() && "This is not an expression"); + return ExprVal; } - unsigned getMBBLabelBlock() const { - assert(isMBBLabel() && "Wrong accessor"); - return MBBLabel.BlockNo; - } - - const MCValue &getMCValue() const { - assert(isMCValue() && "This is not an MCValue"); - return MCValueVal; - } - void setMCValue(const MCValue &Val) { - assert(isMCValue() && "This is not an MCValue"); - MCValueVal = Val; + void setExpr(const MCExpr *Val) { + assert(isExpr() && "This is not an expression"); + ExprVal = Val; } - void MakeReg(unsigned Reg) { - Kind = kRegister; - RegVal = Reg; - } - void MakeImm(int64_t Val) { - Kind = kImmediate; - ImmVal = Val; + static MCOperand CreateReg(unsigned Reg) { + MCOperand Op; + Op.Kind = kRegister; + Op.RegVal = Reg; + return Op; } - void MakeMBBLabel(unsigned Fn, unsigned MBB) { - Kind = kMBBLabel; - MBBLabel.FunctionNo = Fn; - MBBLabel.BlockNo = MBB; + static MCOperand CreateImm(int64_t Val) { + MCOperand Op; + Op.Kind = kImmediate; + Op.ImmVal = Val; + return Op; } - void MakeMCValue(const MCValue &Val) { - Kind = kMCValue; - MCValueVal = Val; + static MCOperand CreateExpr(const MCExpr *Val) { + MCOperand Op; + Op.Kind = kExpr; + Op.ExprVal = Val; + return Op; } + + void print(raw_ostream &OS, const MCAsmInfo *MAI) const; + void dump() const; }; @@ -120,13 +110,12 @@ class MCInst { unsigned Opcode; SmallVector Operands; public: - MCInst() : Opcode(~0U) {} + MCInst() : Opcode(0) {} void setOpcode(unsigned Op) { Opcode = Op; } unsigned getOpcode() const { return Opcode; } - DebugLoc getDebugLoc() const { return DebugLoc(); } - + const MCOperand &getOperand(unsigned i) const { return Operands[i]; } MCOperand &getOperand(unsigned i) { return Operands[i]; } unsigned getNumOperands() const { return Operands.size(); } @@ -134,6 +123,9 @@ public: void addOperand(const MCOperand &Op) { Operands.push_back(Op); } + + void print(raw_ostream &OS, const MCAsmInfo *MAI) const; + void dump() const; }; diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h new file mode 100644 index 0000000000000..d62a9dae7c540 --- /dev/null +++ b/include/llvm/MC/MCInstPrinter.h @@ -0,0 +1,37 @@ +//===-- MCInstPrinter.h - Convert an MCInst to target assembly syntax -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCINSTPRINTER_H +#define LLVM_MC_MCINSTPRINTER_H + +namespace llvm { +class MCInst; +class raw_ostream; +class MCAsmInfo; + + +/// MCInstPrinter - This is an instance of a target assembly language printer +/// that converts an MCInst to valid target assembly syntax. +class MCInstPrinter { +protected: + raw_ostream &O; + const MCAsmInfo &MAI; +public: + MCInstPrinter(raw_ostream &o, const MCAsmInfo &mai) : O(o), MAI(mai) {} + + virtual ~MCInstPrinter(); + + /// printInst - Print the specified MCInst to the current raw_ostream. + /// + virtual void printInst(const MCInst *MI) = 0; +}; + +} // namespace llvm + +#endif diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 1b127b52e1c9e..9e071864e65f0 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -15,25 +15,57 @@ #define LLVM_MC_MCSECTION_H #include +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/SectionKind.h" namespace llvm { - + class MCContext; + class MCAsmInfo; + class raw_ostream; + /// MCSection - Instances of this class represent a uniqued identifier for a /// section in the current translation unit. The MCContext class uniques and /// creates these. class MCSection { - std::string Name; - private: - friend class MCContext; - MCSection(const char *_Name) : Name(_Name) {} - MCSection(const MCSection&); // DO NOT IMPLEMENT void operator=(const MCSection&); // DO NOT IMPLEMENT + protected: + MCSection(SectionKind K) : Kind(K) {} + SectionKind Kind; public: + virtual ~MCSection(); - const std::string &getName() const { return Name; } + SectionKind getKind() const { return Kind; } + + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const = 0; }; + class MCSectionCOFF : public MCSection { + std::string Name; + + /// IsDirective - This is true if the section name is a directive, not + /// something that should be printed with ".section". + /// + /// FIXME: This is a hack. Switch to a semantic view of the section instead + /// of a syntactic one. + bool IsDirective; + + MCSectionCOFF(const StringRef &name, bool isDirective, SectionKind K) + : MCSection(K), Name(name), IsDirective(isDirective) { + } + public: + + static MCSectionCOFF *Create(const StringRef &Name, bool IsDirective, + SectionKind K, MCContext &Ctx); + + const std::string &getName() const { return Name; } + bool isDirective() const { return IsDirective; } + + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const; + }; + } // end namespace llvm #endif diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h new file mode 100644 index 0000000000000..57fa903f717a3 --- /dev/null +++ b/include/llvm/MC/MCSectionELF.h @@ -0,0 +1,191 @@ +//===- MCSectionELF.h - ELF Machine Code Sections ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCSectionELF class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSECTIONELF_H +#define LLVM_MC_MCSECTIONELF_H + +#include "llvm/MC/MCSection.h" + +namespace llvm { + +/// MCSectionELF - This represents a section on linux, lots of unix variants +/// and some bare metal systems. +class MCSectionELF : public MCSection { + std::string SectionName; + + /// Type - This is the sh_type field of a section, drawn from the enums below. + unsigned Type; + + /// Flags - This is the sh_flags field of a section, drawn from the enums. + /// below. + unsigned Flags; + + /// IsExplicit - Indicates that this section comes from globals with an + /// explicit section specfied. + bool IsExplicit; + +protected: + MCSectionELF(const StringRef &Section, unsigned type, unsigned flags, + SectionKind K, bool isExplicit) + : MCSection(K), SectionName(Section.str()), Type(type), Flags(flags), + IsExplicit(isExplicit) {} +public: + + static MCSectionELF *Create(const StringRef &Section, unsigned Type, + unsigned Flags, SectionKind K, bool isExplicit, + MCContext &Ctx); + + /// ShouldOmitSectionDirective - Decides whether a '.section' directive + /// should be printed before the section name + bool ShouldOmitSectionDirective(const char *Name, + const MCAsmInfo &MAI) const; + + /// ShouldPrintSectionType - Only prints the section type if supported + bool ShouldPrintSectionType(unsigned Ty) const; + + /// HasCommonSymbols - True if this section holds common symbols, this is + /// indicated on the ELF object file by a symbol with SHN_COMMON section + /// header index. + bool HasCommonSymbols() const; + + /// These are the section type and flags fields. An ELF section can have + /// only one Type, but can have more than one of the flags specified. + /// + /// Valid section types. + enum { + // This value marks the section header as inactive. + SHT_NULL = 0x00U, + + // Holds information defined by the program, with custom format and meaning. + SHT_PROGBITS = 0x01U, + + // This section holds a symbol table. + SHT_SYMTAB = 0x02U, + + // The section holds a string table. + SHT_STRTAB = 0x03U, + + // The section holds relocation entries with explicit addends. + SHT_RELA = 0x04U, + + // The section holds a symbol hash table. + SHT_HASH = 0x05U, + + // Information for dynamic linking. + SHT_DYNAMIC = 0x06U, + + // The section holds information that marks the file in some way. + SHT_NOTE = 0x07U, + + // A section of this type occupies no space in the file. + SHT_NOBITS = 0x08U, + + // The section holds relocation entries without explicit addends. + SHT_REL = 0x09U, + + // This section type is reserved but has unspecified semantics. + SHT_SHLIB = 0x0AU, + + // This section holds a symbol table. + SHT_DYNSYM = 0x0BU, + + // This section contains an array of pointers to initialization functions. + SHT_INIT_ARRAY = 0x0EU, + + // This section contains an array of pointers to termination functions. + SHT_FINI_ARRAY = 0x0FU, + + // This section contains an array of pointers to functions that are invoked + // before all other initialization functions. + SHT_PREINIT_ARRAY = 0x10U, + + // A section group is a set of sections that are related and that must be + // treated specially by the linker. + SHT_GROUP = 0x11U, + + // This section is associated with a section of type SHT_SYMTAB, when the + // referenced symbol table contain the escape value SHN_XINDEX + SHT_SYMTAB_SHNDX = 0x12U, + + LAST_KNOWN_SECTION_TYPE = SHT_SYMTAB_SHNDX + }; + + /// Valid section flags. + enum { + // The section contains data that should be writable. + SHF_WRITE = 0x1U, + + // The section occupies memory during execution. + SHF_ALLOC = 0x2U, + + // The section contains executable machine instructions. + SHF_EXECINSTR = 0x4U, + + // The data in the section may be merged to eliminate duplication. + SHF_MERGE = 0x10U, + + // Elements in the section consist of null-terminated character strings. + SHF_STRINGS = 0x20U, + + // A field in this section holds a section header table index. + SHF_INFO_LINK = 0x40U, + + // Adds special ordering requirements for link editors. + SHF_LINK_ORDER = 0x80U, + + // This section requires special OS-specific processing to avoid incorrect + // behavior. + SHF_OS_NONCONFORMING = 0x100U, + + // This section is a member of a section group. + SHF_GROUP = 0x200U, + + // This section holds Thread-Local Storage. + SHF_TLS = 0x400U, + + /// FIRST_TARGET_DEP_FLAG - This is the first flag that subclasses are + /// allowed to specify. + FIRST_TARGET_DEP_FLAG = 0x800U, + + /// TARGET_INDEP_SHF - This is the bitmask for all the target independent + /// section flags. Targets can define their own target flags above these. + /// If they do that, they should implement their own MCSectionELF subclasses + /// and implement the virtual method hooks below to handle printing needs. + TARGET_INDEP_SHF = FIRST_TARGET_DEP_FLAG-1U + }; + + StringRef getSectionName() const { + return StringRef(SectionName); + } + + unsigned getType() const { return Type; } + unsigned getFlags() const { return Flags; } + + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const; + + + /// PrintTargetSpecificSectionFlags - Targets that define their own + /// MCSectionELF subclasses with target specific section flags should + /// implement this method if they end up adding letters to the attributes + /// list. + virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI, + raw_ostream &OS) const { + } + + +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h new file mode 100644 index 0000000000000..251c88fa9481d --- /dev/null +++ b/include/llvm/MC/MCSectionMachO.h @@ -0,0 +1,175 @@ +//===- MCSectionMachO.h - MachO Machine Code Sections -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCSectionMachO class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCSECTIONMACHO_H +#define LLVM_MC_MCSECTIONMACHO_H + +#include "llvm/MC/MCSection.h" + +namespace llvm { + +/// MCSectionMachO - This represents a section on a Mach-O system (used by +/// Mac OS X). On a Mac system, these are also described in +/// /usr/include/mach-o/loader.h. +class MCSectionMachO : public MCSection { + char SegmentName[16]; // Not necessarily null terminated! + char SectionName[16]; // Not necessarily null terminated! + + /// TypeAndAttributes - This is the SECTION_TYPE and SECTION_ATTRIBUTES + /// field of a section, drawn from the enums below. + unsigned TypeAndAttributes; + + /// Reserved2 - The 'reserved2' field of a section, used to represent the + /// size of stubs, for example. + unsigned Reserved2; + + MCSectionMachO(const StringRef &Segment, const StringRef &Section, + unsigned TAA, unsigned reserved2, SectionKind K) + : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) { + assert(Segment.size() <= 16 && Section.size() <= 16 && + "Segment or section string too long"); + for (unsigned i = 0; i != 16; ++i) { + if (i < Segment.size()) + SegmentName[i] = Segment[i]; + else + SegmentName[i] = 0; + + if (i < Section.size()) + SectionName[i] = Section[i]; + else + SectionName[i] = 0; + } + } +public: + + static MCSectionMachO *Create(const StringRef &Segment, + const StringRef &Section, + unsigned TypeAndAttributes, + unsigned Reserved2, + SectionKind K, MCContext &Ctx); + + /// These are the section type and attributes fields. A MachO section can + /// have only one Type, but can have any of the attributes specified. + enum { + // TypeAndAttributes bitmasks. + SECTION_TYPE = 0x000000FFU, + SECTION_ATTRIBUTES = 0xFFFFFF00U, + + // Valid section types. + + /// S_REGULAR - Regular section. + S_REGULAR = 0x00U, + /// S_ZEROFILL - Zero fill on demand section. + S_ZEROFILL = 0x01U, + /// S_CSTRING_LITERALS - Section with literal C strings. + S_CSTRING_LITERALS = 0x02U, + /// S_4BYTE_LITERALS - Section with 4 byte literals. + S_4BYTE_LITERALS = 0x03U, + /// S_8BYTE_LITERALS - Section with 8 byte literals. + S_8BYTE_LITERALS = 0x04U, + /// S_LITERAL_POINTERS - Section with pointers to literals. + S_LITERAL_POINTERS = 0x05U, + /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers. + S_NON_LAZY_SYMBOL_POINTERS = 0x06U, + /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers. + S_LAZY_SYMBOL_POINTERS = 0x07U, + /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in + /// the Reserved2 field. + S_SYMBOL_STUBS = 0x08U, + /// S_SYMBOL_STUBS - Section with only function pointers for + /// initialization. + S_MOD_INIT_FUNC_POINTERS = 0x09U, + /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for + /// termination. + S_MOD_TERM_FUNC_POINTERS = 0x0AU, + /// S_COALESCED - Section contains symbols that are to be coalesced. + S_COALESCED = 0x0BU, + /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4 + /// gigabytes). + S_GB_ZEROFILL = 0x0CU, + /// S_INTERPOSING - Section with only pairs of function pointers for + /// interposing. + S_INTERPOSING = 0x0DU, + /// S_16BYTE_LITERALS - Section with only 16 byte literals. + S_16BYTE_LITERALS = 0x0EU, + /// S_DTRACE_DOF - Section contains DTrace Object Format. + S_DTRACE_DOF = 0x0FU, + /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to + /// lazy loaded dylibs. + S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10U, + + LAST_KNOWN_SECTION_TYPE = S_LAZY_DYLIB_SYMBOL_POINTERS, + + + // Valid section attributes. + + /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine + /// instructions. + S_ATTR_PURE_INSTRUCTIONS = 1U << 31, + /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be + /// in a ranlib table of contents. + S_ATTR_NO_TOC = 1U << 30, + /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section + /// in files with the MY_DYLDLINK flag. + S_ATTR_STRIP_STATIC_SYMS = 1U << 29, + /// S_ATTR_NO_DEAD_STRIP - No dead stripping. + S_ATTR_NO_DEAD_STRIP = 1U << 28, + /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks. + S_ATTR_LIVE_SUPPORT = 1U << 27, + /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by + /// dyld. + S_ATTR_SELF_MODIFYING_CODE = 1U << 26, + /// S_ATTR_DEBUG - A debug section. + S_ATTR_DEBUG = 1U << 25, + /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions. + S_ATTR_SOME_INSTRUCTIONS = 1U << 10, + /// S_ATTR_EXT_RELOC - Section has external relocation entries. + S_ATTR_EXT_RELOC = 1U << 9, + /// S_ATTR_LOC_RELOC - Section has local relocation entries. + S_ATTR_LOC_RELOC = 1U << 8 + }; + + StringRef getSegmentName() const { + // SegmentName is not necessarily null terminated! + if (SegmentName[15]) + return StringRef(SegmentName, 16); + return StringRef(SegmentName); + } + StringRef getSectionName() const { + // SectionName is not necessarily null terminated! + if (SectionName[15]) + return StringRef(SectionName, 16); + return StringRef(SectionName); + } + + unsigned getTypeAndAttributes() const { return TypeAndAttributes; } + unsigned getStubSize() const { return Reserved2; } + + /// ParseSectionSpecifier - Parse the section specifier indicated by "Spec". + /// This is a string that can appear after a .section directive in a mach-o + /// flavored .s file. If successful, this fills in the specified Out + /// parameters and returns an empty string. When an invalid section + /// specifier is present, this returns a string indicating the problem. + static std::string ParseSectionSpecifier(StringRef Spec, // In. + StringRef &Segment, // Out. + StringRef &Section, // Out. + unsigned &TAA, // Out. + unsigned &StubSize); // Out. + + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const; +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 54de8a31076ff..248e6b0a4481b 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -17,17 +17,21 @@ #include "llvm/Support/DataTypes.h" namespace llvm { + class MCAsmInfo; + class MCCodeEmitter; class MCContext; - class MCValue; + class MCExpr; class MCInst; + class MCInstPrinter; class MCSection; class MCSymbol; + class StringRef; class raw_ostream; /// MCStreamer - Streaming machine code generation interface. This interface /// is intended to provide a programatic interface that is very similar to the /// level that an assembler .s file provides. It has callbacks to emit bytes, - /// "emit directives", etc. The implementation of this interface retains + /// handle directives, etc. The implementation of this interface retains /// state to know what the current section is etc. /// /// There are multiple implementations of this interface: one for writing out @@ -53,6 +57,10 @@ namespace llvm { SymbolAttrLast = WeakReference }; + enum AssemblerFlag { + SubsectionsViaSymbols /// .subsections_via_symbols (Apple) + }; + private: MCContext &Context; @@ -62,6 +70,10 @@ namespace llvm { protected: MCStreamer(MCContext &Ctx); + /// CurSection - This is the current section code is being emitted to, it is + /// kept up to date by SwitchSection. + const MCSection *CurSection; + public: virtual ~MCStreamer(); @@ -69,13 +81,17 @@ namespace llvm { /// @name Symbol & Section Management /// @{ + + /// getCurrentSection - Return the current seciton that the streamer is + /// emitting code to. + const MCSection *getCurrentSection() const { return CurSection; } /// SwitchSection - Set the current section where code is being emitted to - /// @param Section. + /// @param Section. This is required to update CurSection. /// /// This corresponds to assembler directives like .section, .text, etc. - virtual void SwitchSection(MCSection *Section) = 0; - + virtual void SwitchSection(const MCSection *Section) = 0; + /// EmitLabel - Emit a label for @param Symbol into the current section. /// /// This corresponds to an assembler statement such as: @@ -84,11 +100,11 @@ namespace llvm { /// @param Symbol - The symbol to emit. A given symbol should only be /// emitted as a label once, and symbols emitted as a label should never be /// used in an assignment. - // - // FIXME: What to do about the current section? Should we get rid of the - // symbol section in the constructor and initialize it here? virtual void EmitLabel(MCSymbol *Symbol) = 0; + /// EmitAssemblerFlag - Note in the output the specified @param Flag + virtual void EmitAssemblerFlag(AssemblerFlag Flag) = 0; + /// EmitAssignment - Emit an assignment of @param Value to @param Symbol. /// /// This corresponds to an assembler statement such as: @@ -100,31 +116,46 @@ namespace llvm { /// /// @param Symbol - The symbol being assigned to. /// @param Value - The value for the symbol. - /// @param MakeAbsolute - If true, then the symbol should be given the - /// absolute value of @param Value, even if @param Value would be - /// relocatable expression. This corresponds to the ".set" directive. - virtual void EmitAssignment(MCSymbol *Symbol, const MCValue &Value, - bool MakeAbsolute = false) = 0; + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) = 0; /// EmitSymbolAttribute - Add the given @param Attribute to @param Symbol. - // - // FIXME: This doesn't make much sense, could we just have attributes be on - // the symbol and make the printer smart enough to add the right symbols? - // This should work as long as the order of attributes in the file doesn't - // matter. virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute) = 0; + /// EmitSymbolDesc - Set the @param DescValue for the @param Symbol. + /// + /// @param Symbol - The symbol to have its n_desc field set. + /// @param DescValue - The value to set into the n_desc field. + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) = 0; + + /// EmitCommonSymbol - Emit a common or local common symbol. + /// + /// @param Symbol - The common symbol to emit. + /// @param Size - The size of the common symbol. + /// @param ByteAlignment - The alignment of the symbol if + /// non-zero. This must be a power of 2 on some targets. + virtual void EmitCommonSymbol(MCSymbol *Symbol, unsigned Size, + unsigned ByteAlignment) = 0; + + /// EmitZerofill - Emit a the zerofill section and an option symbol. + /// + /// @param Section - The zerofill section to create and or to put the symbol + /// @param Symbol - The zerofill symbol to emit, if non-NULL. + /// @param Size - The size of the zerofill symbol. + /// @param ByteAlignment - The alignment of the zerofill symbol if + /// non-zero. This must be a power of 2 on some targets. + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0,unsigned ByteAlignment = 0) = 0; + /// @} /// @name Generating Data /// @{ - /// EmitBytes - Emit @param Length bytes starting at @param Data into the - /// output. + /// EmitBytes - Emit the bytes in \arg Data into the output. /// /// This is used to implement assembler directives such as .byte, .ascii, /// etc. - virtual void EmitBytes(const char *Data, unsigned Length) = 0; + virtual void EmitBytes(const StringRef &Data) = 0; /// EmitValue - Emit the expression @param Value into the output as a native /// integer of the given @param Size bytes. @@ -135,7 +166,7 @@ namespace llvm { /// @param Value - The value to emit. /// @param Size - The size of the integer (in bytes) to emit. This must /// match a native machine width. - virtual void EmitValue(const MCValue &Value, unsigned Size) = 0; + virtual void EmitValue(const MCExpr *Value, unsigned Size) = 0; /// EmitValueToAlignment - Emit some number of copies of @param Value until /// the byte alignment @param ByteAlignment is reached. @@ -163,12 +194,10 @@ namespace llvm { /// /// This is used to implement assembler directives such as .org. /// - /// @param Offset - The offset to reach.This may be an expression, but the + /// @param Offset - The offset to reach. This may be an expression, but the /// expression must be associated with the current section. /// @param Value - The value to use when filling bytes. - // - // FIXME: How are we going to signal failures out of this? - virtual void EmitValueToOffset(const MCValue &Offset, + virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0) = 0; /// @} @@ -181,10 +210,17 @@ namespace llvm { virtual void Finish() = 0; }; + /// createNullStreamer - Create a dummy machine code streamer, which does + /// nothing. This is useful for timing the assembler front end. + MCStreamer *createNullStreamer(MCContext &Ctx); + /// createAsmStreamer - Create a machine code streamer which will print out /// assembly for the native target, suitable for compiling with a native /// assembler. - MCStreamer *createAsmStreamer(MCContext &Ctx, raw_ostream &OS); + MCStreamer *createAsmStreamer(MCContext &Ctx, raw_ostream &OS, + const MCAsmInfo &MAI, + MCInstPrinter *InstPrint = 0, + MCCodeEmitter *CE = 0); // FIXME: These two may end up getting rolled into a single // createObjectStreamer interface, which implements the assembler backend, and @@ -192,7 +228,8 @@ namespace llvm { /// createMachOStream - Create a machine code streamer which will generative /// Mach-O format object files. - MCStreamer *createMachOStreamer(MCContext &Ctx, raw_ostream &OS); + MCStreamer *createMachOStreamer(MCContext &Ctx, raw_ostream &OS, + MCCodeEmitter *CE = 0); /// createELFStreamer - Create a machine code streamer which will generative /// ELF format object files. diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index 235e6614f973f..5dd7d68585c78 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -15,10 +15,14 @@ #define LLVM_MC_MCSYMBOL_H #include +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" namespace llvm { + class MCAsmInfo; class MCSection; class MCContext; + class raw_ostream; /// MCSymbol - Instances of this class represent a symbol name in the MC file, /// and MCSymbols are created and unique'd by the MCContext class. @@ -28,38 +32,85 @@ namespace llvm { /// it is a reference to an external entity, it has a null section. /// class MCSymbol { + // Special sentinal value for the absolute pseudo section. + // + // FIXME: Use a PointerInt wrapper for this? + static const MCSection *AbsolutePseudoSection; + /// Name - The name of the symbol. std::string Name; - /// Section - The section the symbol is defined in, or null if the symbol - /// has not been defined in the associated translation unit. - MCSection *Section; - + + /// Section - The section the symbol is defined in. This is null for + /// undefined symbols, and the special AbsolutePseudoSection value for + /// absolute symbols. + const MCSection *Section; + /// IsTemporary - True if this is an assembler temporary label, which /// typically does not survive in the .o file's symbol table. Usually /// "Lfoo" or ".foo". unsigned IsTemporary : 1; - - /// IsExternal - True if this symbol has been implicitly defined as an - /// external, for example by using it in an expression without ever emitting - /// it as a label. The @var Section for an external symbol is always null. - unsigned IsExternal : 1; private: // MCContext creates and uniques these. friend class MCContext; - MCSymbol(const char *_Name, bool _IsTemporary) - : Name(_Name), Section(0), IsTemporary(_IsTemporary), IsExternal(false) {} + MCSymbol(const StringRef &_Name, bool _IsTemporary) + : Name(_Name), Section(0), IsTemporary(_IsTemporary) {} MCSymbol(const MCSymbol&); // DO NOT IMPLEMENT void operator=(const MCSymbol&); // DO NOT IMPLEMENT public: - - MCSection *getSection() const { return Section; } - void setSection(MCSection *Value) { Section = Value; } + /// getName - Get the symbol name. + const std::string &getName() const { return Name; } - bool isExternal() const { return IsExternal; } - void setExternal(bool Value) { IsExternal = Value; } + /// @name Symbol Type + /// @{ - const std::string &getName() const { return Name; } + /// isTemporary - Check if this is an assembler temporary symbol. + bool isTemporary() const { + return IsTemporary; + } + + /// isDefined - Check if this symbol is defined (i.e., it has an address). + /// + /// Defined symbols are either absolute or in some section. + bool isDefined() const { + return Section != 0; + } + + /// isUndefined - Check if this symbol undefined (i.e., implicitly defined). + bool isUndefined() const { + return !isDefined(); + } + + /// isAbsolute - Check if this this is an absolute symbol. + bool isAbsolute() const { + return Section == AbsolutePseudoSection; + } + + /// getSection - Get the section associated with a defined, non-absolute + /// symbol. + const MCSection &getSection() const { + assert(!isUndefined() && !isAbsolute() && "Invalid accessor!"); + return *Section; + } + + /// setSection - Mark the symbol as defined in the section \arg S. + void setSection(const MCSection &S) { Section = &S; } + + /// setUndefined - Mark the symbol as undefined. + void setUndefined() { + Section = 0; + } + + /// setAbsolute - Mark the symbol as absolute. + void setAbsolute() { Section = AbsolutePseudoSection; } + + /// @} + + /// print - Print the value to the stream \arg OS. + void print(raw_ostream &OS, const MCAsmInfo *MAI) const; + + /// dump - Print the value to stderr. + void dump() const; }; } // end namespace llvm diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h index d032f170c3f6f..62aca6e3a6f6c 100644 --- a/include/llvm/MC/MCValue.h +++ b/include/llvm/MC/MCValue.h @@ -20,6 +20,7 @@ namespace llvm { class MCSymbol; +class raw_ostream; /// MCValue - This represents an "assembler immediate". In its most general /// form, this can hold "SymbolA - SymbolB + imm64". Not all targets supports @@ -32,13 +33,13 @@ class MCSymbol; /// Note that this class must remain a simple POD value class, because we need /// it to live in unions etc. class MCValue { - MCSymbol *SymA, *SymB; + const MCSymbol *SymA, *SymB; int64_t Cst; public: int64_t getConstant() const { return Cst; } - MCSymbol *getSymA() const { return SymA; } - MCSymbol *getSymB() const { return SymB; } + const MCSymbol *getSymA() const { return SymA; } + const MCSymbol *getSymB() const { return SymB; } /// isAbsolute - Is this an absolute (as opposed to relocatable) value. bool isAbsolute() const { return !SymA && !SymB; } @@ -48,11 +49,19 @@ public: /// /// @result - The value's associated section, or null for external or constant /// values. - MCSection *getAssociatedSection() const { - return SymA ? SymA->getSection() : 0; - } + // + // FIXME: Switch to a tagged section, so this can return the tagged section + // value. + const MCSection *getAssociatedSection() const; + + /// print - Print the value to the stream \arg OS. + void print(raw_ostream &OS, const MCAsmInfo *MAI) const; + + /// dump - Print the value to stderr. + void dump() const; - static MCValue get(MCSymbol *SymA, MCSymbol *SymB = 0, int64_t Val = 0) { + static MCValue get(const MCSymbol *SymA, const MCSymbol *SymB = 0, + int64_t Val = 0) { MCValue R; assert((!SymB || SymA) && "Invalid relocatable MCValue!"); R.Cst = Val; diff --git a/include/llvm/MC/SectionKind.h b/include/llvm/MC/SectionKind.h new file mode 100644 index 0000000000000..945cff790a487 --- /dev/null +++ b/include/llvm/MC/SectionKind.h @@ -0,0 +1,221 @@ +//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_SECTIONKIND_H +#define LLVM_MC_SECTIONKIND_H + +namespace llvm { + +/// SectionKind - This is a simple POD value that classifies the properties of +/// a section. A section is classified into the deepest possible +/// classification, and then the target maps them onto their sections based on +/// what capabilities they have. +/// +/// The comments below describe these as if they were an inheritance hierarchy +/// in order to explain the predicates below. +/// +class SectionKind { + enum Kind { + /// Metadata - Debug info sections or other metadata. + Metadata, + + /// Text - Text section, used for functions and other executable code. + Text, + + /// ReadOnly - Data that is never written to at program runtime by the + /// program or the dynamic linker. Things in the top-level readonly + /// SectionKind are not mergeable. + ReadOnly, + + /// MergableCString - Any null-terminated string which allows merging. + /// These values are known to end in a nul value of the specified size, + /// not otherwise contain a nul value, and be mergable. This allows the + /// linker to unique the strings if it so desires. + + /// Mergeable1ByteCString - 1 byte mergable, null terminated, string. + Mergeable1ByteCString, + + /// Mergeable2ByteCString - 2 byte mergable, null terminated, string. + Mergeable2ByteCString, + + /// Mergeable4ByteCString - 4 byte mergable, null terminated, string. + Mergeable4ByteCString, + + /// MergeableConst - These are sections for merging fixed-length + /// constants together. For example, this can be used to unique + /// constant pool entries etc. + MergeableConst, + + /// MergeableConst4 - This is a section used by 4-byte constants, + /// for example, floats. + MergeableConst4, + + /// MergeableConst8 - This is a section used by 8-byte constants, + /// for example, doubles. + MergeableConst8, + + /// MergeableConst16 - This is a section used by 16-byte constants, + /// for example, vectors. + MergeableConst16, + + /// Writeable - This is the base of all segments that need to be written + /// to during program runtime. + + /// ThreadLocal - This is the base of all TLS segments. All TLS + /// objects must be writeable, otherwise there is no reason for them to + /// be thread local! + + /// ThreadBSS - Zero-initialized TLS data objects. + ThreadBSS, + + /// ThreadData - Initialized TLS data objects. + ThreadData, + + /// GlobalWriteableData - Writeable data that is global (not thread + /// local). + + /// BSS - Zero initialized writeable data. + BSS, + + /// DataRel - This is the most general form of data that is written + /// to by the program, it can have random relocations to arbitrary + /// globals. + DataRel, + + /// DataRelLocal - This is writeable data that has a non-zero + /// initializer and has relocations in it, but all of the + /// relocations are known to be within the final linked image + /// the global is linked into. + DataRelLocal, + + /// DataNoRel - This is writeable data that has a non-zero + /// initializer, but whose initializer is known to have no + /// relocations. + DataNoRel, + + /// ReadOnlyWithRel - These are global variables that are never + /// written to by the program, but that have relocations, so they + /// must be stuck in a writeable section so that the dynamic linker + /// can write to them. If it chooses to, the dynamic linker can + /// mark the pages these globals end up on as read-only after it is + /// done with its relocation phase. + ReadOnlyWithRel, + + /// ReadOnlyWithRelLocal - This is data that is readonly by the + /// program, but must be writeable so that the dynamic linker + /// can perform relocations in it. This is used when we know + /// that all the relocations are to globals in this final + /// linked image. + ReadOnlyWithRelLocal + + } K : 8; +public: + + bool isMetadata() const { return K == Metadata; } + bool isText() const { return K == Text; } + + bool isReadOnly() const { + return K == ReadOnly || isMergeableCString() || + isMergeableConst(); + } + + bool isMergeableCString() const { + return K == Mergeable1ByteCString || K == Mergeable2ByteCString || + K == Mergeable4ByteCString; + } + bool isMergeable1ByteCString() const { return K == Mergeable1ByteCString; } + bool isMergeable2ByteCString() const { return K == Mergeable2ByteCString; } + bool isMergeable4ByteCString() const { return K == Mergeable4ByteCString; } + + bool isMergeableConst() const { + return K == MergeableConst || K == MergeableConst4 || + K == MergeableConst8 || K == MergeableConst16; + } + bool isMergeableConst4() const { return K == MergeableConst4; } + bool isMergeableConst8() const { return K == MergeableConst8; } + bool isMergeableConst16() const { return K == MergeableConst16; } + + bool isWriteable() const { + return isThreadLocal() || isGlobalWriteableData(); + } + + bool isThreadLocal() const { + return K == ThreadData || K == ThreadBSS; + } + + bool isThreadBSS() const { return K == ThreadBSS; } + bool isThreadData() const { return K == ThreadData; } + + bool isGlobalWriteableData() const { + return isBSS() || isDataRel() || isReadOnlyWithRel(); + } + + bool isBSS() const { return K == BSS; } + + bool isDataRel() const { + return K == DataRel || K == DataRelLocal || K == DataNoRel; + } + + bool isDataRelLocal() const { + return K == DataRelLocal || K == DataNoRel; + } + + bool isDataNoRel() const { return K == DataNoRel; } + + bool isReadOnlyWithRel() const { + return K == ReadOnlyWithRel || K == ReadOnlyWithRelLocal; + } + + bool isReadOnlyWithRelLocal() const { + return K == ReadOnlyWithRelLocal; + } +private: + static SectionKind get(Kind K) { + SectionKind Res; + Res.K = K; + return Res; + } +public: + + static SectionKind getMetadata() { return get(Metadata); } + static SectionKind getText() { return get(Text); } + static SectionKind getReadOnly() { return get(ReadOnly); } + static SectionKind getMergeable1ByteCString() { + return get(Mergeable1ByteCString); + } + static SectionKind getMergeable2ByteCString() { + return get(Mergeable2ByteCString); + } + static SectionKind getMergeable4ByteCString() { + return get(Mergeable4ByteCString); + } + static SectionKind getMergeableConst() { return get(MergeableConst); } + static SectionKind getMergeableConst4() { return get(MergeableConst4); } + static SectionKind getMergeableConst8() { return get(MergeableConst8); } + static SectionKind getMergeableConst16() { return get(MergeableConst16); } + static SectionKind getThreadBSS() { return get(ThreadBSS); } + static SectionKind getThreadData() { return get(ThreadData); } + static SectionKind getBSS() { return get(BSS); } + static SectionKind getDataRel() { return get(DataRel); } + static SectionKind getDataRelLocal() { return get(DataRelLocal); } + static SectionKind getDataNoRel() { return get(DataNoRel); } + static SectionKind getReadOnlyWithRel() { return get(ReadOnlyWithRel); } + static SectionKind getReadOnlyWithRelLocal(){ + return get(ReadOnlyWithRelLocal); + } +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h new file mode 100644 index 0000000000000..63c2da2e7dfd2 --- /dev/null +++ b/include/llvm/Metadata.h @@ -0,0 +1,377 @@ +//===-- llvm/Metadata.h - Metadata definitions ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// @file +/// This file contains the declarations for metadata subclasses. +/// They represent the different flavors of metadata that live in LLVM. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MDNODE_H +#define LLVM_MDNODE_H + +#include "llvm/User.h" +#include "llvm/Type.h" +#include "llvm/OperandTraits.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ValueHandle.h" + +namespace llvm { +class Constant; +class Instruction; +class LLVMContext; + +//===----------------------------------------------------------------------===// +// MetadataBase - A base class for MDNode, MDString and NamedMDNode. +class MetadataBase : public User { +private: + /// ReservedSpace - The number of operands actually allocated. NumOperands is + /// the number actually in use. + unsigned ReservedSpace; + +protected: + MetadataBase(const Type *Ty, unsigned scid) + : User(Ty, scid, NULL, 0), ReservedSpace(0) {} + + void resizeOperands(unsigned NumOps); +public: + /// isNullValue - Return true if this is the value that would be returned by + /// getNullValue. This always returns false because getNullValue will never + /// produce metadata. + virtual bool isNullValue() const { + return false; + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const MetadataBase *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == MDStringVal || V->getValueID() == MDNodeVal + || V->getValueID() == NamedMDNodeVal; + } +}; + +//===----------------------------------------------------------------------===// +/// MDString - a single uniqued string. +/// These are used to efficiently contain a byte sequence for metadata. +/// MDString is always unnamd. +class MDString : public MetadataBase { + MDString(const MDString &); // DO NOT IMPLEMENT + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + unsigned getNumOperands(); // DO NOT IMPLEMENT + + StringRef Str; +protected: + explicit MDString(LLVMContext &C, const char *begin, unsigned l) + : MetadataBase(Type::getMetadataTy(C), Value::MDStringVal), Str(begin, l) {} + +public: + // Do not allocate any space for operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } + static MDString *get(LLVMContext &Context, const StringRef &Str); + + StringRef getString() const { return Str; } + + unsigned length() const { return Str.size(); } + + /// begin() - Pointer to the first byte of the string. + /// + const char *begin() const { return Str.begin(); } + + /// end() - Pointer to one byte past the end of the string. + /// + const char *end() const { return Str.end(); } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const MDString *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == MDStringVal; + } +}; + +//===----------------------------------------------------------------------===// +/// MDNode - a tuple of other values. +/// These contain a list of the values that represent the metadata. +/// MDNode is always unnamed. +class MDNode : public MetadataBase, public FoldingSetNode { + MDNode(const MDNode &); // DO NOT IMPLEMENT + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + // getNumOperands - Make this only available for private uses. + unsigned getNumOperands() { return User::getNumOperands(); } + + friend class ElementVH; + // Use CallbackVH to hold MDNOde elements. + struct ElementVH : public CallbackVH { + MDNode *Parent; + ElementVH(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {} + ~ElementVH() {} + + virtual void deleted() { + Parent->replaceElement(this->operator Value*(), 0); + } + + virtual void allUsesReplacedWith(Value *NV) { + Parent->replaceElement(this->operator Value*(), NV); + } + }; + // Replace each instance of F from the element list of this node with T. + void replaceElement(Value *F, Value *T); + + SmallVector Node; + +protected: + explicit MDNode(LLVMContext &C, Value*const* Vals, unsigned NumVals); +public: + // Do not allocate any space for operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } + // Constructors and destructors. + static MDNode *get(LLVMContext &Context, + Value* const* Vals, unsigned NumVals); + + /// dropAllReferences - Remove all uses and clear node vector. + void dropAllReferences(); + + /// ~MDNode - Destroy MDNode. + ~MDNode(); + + /// getElement - Return specified element. + Value *getElement(unsigned i) const { + assert (getNumElements() > i && "Invalid element number!"); + return Node[i]; + } + + /// getNumElements - Return number of MDNode elements. + unsigned getNumElements() const { + return Node.size(); + } + + // Element access + typedef SmallVectorImpl::const_iterator const_elem_iterator; + typedef SmallVectorImpl::iterator elem_iterator; + /// elem_empty - Return true if MDNode is empty. + bool elem_empty() const { return Node.empty(); } + const_elem_iterator elem_begin() const { return Node.begin(); } + const_elem_iterator elem_end() const { return Node.end(); } + elem_iterator elem_begin() { return Node.begin(); } + elem_iterator elem_end() { return Node.end(); } + + /// isNullValue - Return true if this is the value that would be returned by + /// getNullValue. This always returns false because getNullValue will never + /// produce metadata. + virtual bool isNullValue() const { + return false; + } + + /// Profile - calculate a unique identifier for this MDNode to collapse + /// duplicates + void Profile(FoldingSetNodeID &ID) const; + + virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { + llvm_unreachable("This should never be called because MDNodes have no ops"); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const MDNode *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == MDNodeVal; + } +}; + +//===----------------------------------------------------------------------===// +/// WeakMetadataVH - a weak value handle for metadata. +class WeakMetadataVH : public WeakVH { +public: + WeakMetadataVH() : WeakVH() {} + WeakMetadataVH(MetadataBase *M) : WeakVH(M) {} + WeakMetadataVH(const WeakMetadataVH &RHS) : WeakVH(RHS) {} + + operator Value*() const { + llvm_unreachable("WeakMetadataVH only handles Metadata"); + } + + operator MetadataBase*() const { + return dyn_cast_or_null(getValPtr()); + } +}; + +//===----------------------------------------------------------------------===// +/// NamedMDNode - a tuple of other metadata. +/// NamedMDNode is always named. All NamedMDNode element has a type of metadata. +template + class SymbolTableListTraits; + +class NamedMDNode : public MetadataBase, public ilist_node { + friend class SymbolTableListTraits; + friend class LLVMContextImpl; + + NamedMDNode(const NamedMDNode &); // DO NOT IMPLEMENT + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + // getNumOperands - Make this only available for private uses. + unsigned getNumOperands() { return User::getNumOperands(); } + + Module *Parent; + SmallVector Node; + typedef SmallVectorImpl::iterator elem_iterator; + +protected: + explicit NamedMDNode(LLVMContext &C, const Twine &N, MetadataBase*const* Vals, + unsigned NumVals, Module *M = 0); +public: + // Do not allocate any space for operands. + void *operator new(size_t s) { + return User::operator new(s, 0); + } + static NamedMDNode *Create(LLVMContext &C, const Twine &N, + MetadataBase*const*MDs, + unsigned NumMDs, Module *M = 0) { + return new NamedMDNode(C, N, MDs, NumMDs, M); + } + + static NamedMDNode *Create(const NamedMDNode *NMD, Module *M = 0); + + /// eraseFromParent - Drop all references and remove the node from parent + /// module. + void eraseFromParent(); + + /// dropAllReferences - Remove all uses and clear node vector. + void dropAllReferences(); + + /// ~NamedMDNode - Destroy NamedMDNode. + ~NamedMDNode(); + + /// getParent - Get the module that holds this named metadata collection. + inline Module *getParent() { return Parent; } + inline const Module *getParent() const { return Parent; } + void setParent(Module *M) { Parent = M; } + + /// getElement - Return specified element. + MetadataBase *getElement(unsigned i) const { + assert (getNumElements() > i && "Invalid element number!"); + return Node[i]; + } + + /// getNumElements - Return number of NamedMDNode elements. + unsigned getNumElements() const { + return Node.size(); + } + + /// addElement - Add metadata element. + void addElement(MetadataBase *M) { + resizeOperands(0); + OperandList[NumOperands++] = M; + Node.push_back(WeakMetadataVH(M)); + } + + typedef SmallVectorImpl::const_iterator const_elem_iterator; + bool elem_empty() const { return Node.empty(); } + const_elem_iterator elem_begin() const { return Node.begin(); } + const_elem_iterator elem_end() const { return Node.end(); } + elem_iterator elem_begin() { return Node.begin(); } + elem_iterator elem_end() { return Node.end(); } + + /// isNullValue - Return true if this is the value that would be returned by + /// getNullValue. This always returns false because getNullValue will never + /// produce metadata. + virtual bool isNullValue() const { + return false; + } + + virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { + llvm_unreachable( + "This should never be called because NamedMDNodes have no ops"); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const NamedMDNode *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == NamedMDNodeVal; + } +}; + +//===----------------------------------------------------------------------===// +/// MetadataContext - +/// MetadataContext handles uniquing and assignment of IDs for custom metadata +/// types. Custom metadata handler names do not contain spaces. And the name +/// must start with an alphabet. The regular expression used to check name +/// is [a-zA-Z$._][a-zA-Z$._0-9]* +class MetadataContext { +public: + typedef std::pair MDPairTy; + typedef SmallVector MDMapTy; + typedef DenseMap MDStoreTy; + friend class BitcodeReader; +private: + + /// MetadataStore - Collection of metadata used in this context. + MDStoreTy MetadataStore; + + /// MDHandlerNames - Map to hold metadata handler names. + StringMap MDHandlerNames; + +public: + /// RegisterMDKind - Register a new metadata kind and return its ID. + /// A metadata kind can be registered only once. + unsigned RegisterMDKind(const char *Name); + + /// getMDKind - Return metadata kind. If the requested metadata kind + /// is not registered then return 0. + unsigned getMDKind(const char *Name); + + /// validName - Return true if Name is a valid custom metadata handler name. + bool validName(const char *Name); + + /// getMD - Get the metadata of given kind attached with an Instruction. + /// If the metadata is not found then return 0. + MDNode *getMD(unsigned Kind, const Instruction *Inst); + + /// getMDs - Get the metadata attached with an Instruction. + const MDMapTy *getMDs(const Instruction *Inst); + + /// addMD - Attach the metadata of given kind with an Instruction. + void addMD(unsigned Kind, MDNode *Node, Instruction *Inst); + + /// removeMD - Remove metadata of given kind attached with an instuction. + void removeMD(unsigned Kind, Instruction *Inst); + + /// removeMDs - Remove all metadata attached with an instruction. + void removeMDs(const Instruction *Inst); + + /// copyMD - If metadata is attached with Instruction In1 then attach + /// the same metadata to In2. + void copyMD(Instruction *In1, Instruction *In2); + + /// getHandlerNames - Get handler names. This is used by bitcode + /// writer. + const StringMap *getHandlerNames(); + + /// ValueIsDeleted - This handler is used to update metadata store + /// when a value is deleted. + void ValueIsDeleted(const Value *V) {} + void ValueIsDeleted(const Instruction *Inst) { + removeMDs(Inst); + } + void ValueIsRAUWd(Value *V1, Value *V2); + + /// ValueIsCloned - This handler is used to update metadata store + /// when In1 is cloned to create In2. + void ValueIsCloned(const Instruction *In1, Instruction *In2); +}; + +} // end llvm namespace + +#endif diff --git a/include/llvm/Module.h b/include/llvm/Module.h index 7a139cc1e0a92..501625df7a3dc 100644 --- a/include/llvm/Module.h +++ b/include/llvm/Module.h @@ -18,6 +18,7 @@ #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/GlobalAlias.h" +#include "llvm/Metadata.h" #include "llvm/Support/DataTypes.h" #include @@ -56,6 +57,21 @@ template<> struct ilist_traits static GlobalAlias *createSentinel(); static void destroySentinel(GlobalAlias *GA) { delete GA; } }; +template<> struct ilist_traits + : public SymbolTableListTraits { + // createSentinel is used to get hold of a node that marks the end of + // the list... + NamedMDNode *createSentinel() const { + return static_cast(&Sentinel); + } + static void destroySentinel(NamedMDNode*) {} + + NamedMDNode *provideInitialHead() const { return createSentinel(); } + NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); } + static void noteHead(NamedMDNode*, NamedMDNode*) {} +private: + mutable ilist_node Sentinel; +}; /// A Module instance is used to store all the information related to an /// LLVM module. Modules are the top level container of all other LLVM @@ -78,25 +94,31 @@ public: typedef iplist FunctionListType; /// The type for the list of aliases. typedef iplist AliasListType; + /// The type for the list of named metadata. + typedef iplist NamedMDListType; /// The type for the list of dependent libraries. typedef std::vector LibraryListType; /// The Global Variable iterator. - typedef GlobalListType::iterator global_iterator; + typedef GlobalListType::iterator global_iterator; /// The Global Variable constant iterator. - typedef GlobalListType::const_iterator const_global_iterator; + typedef GlobalListType::const_iterator const_global_iterator; /// The Function iterators. - typedef FunctionListType::iterator iterator; + typedef FunctionListType::iterator iterator; /// The Function constant iterator - typedef FunctionListType::const_iterator const_iterator; + typedef FunctionListType::const_iterator const_iterator; /// The Global Alias iterators. - typedef AliasListType::iterator alias_iterator; + typedef AliasListType::iterator alias_iterator; /// The Global Alias constant iterator - typedef AliasListType::const_iterator const_alias_iterator; + typedef AliasListType::const_iterator const_alias_iterator; + /// The named metadata iterators. + typedef NamedMDListType::iterator named_metadata_iterator; + /// The named metadata constant interators. + typedef NamedMDListType::const_iterator const_named_metadata_iterator; /// The Library list iterator. typedef LibraryListType::const_iterator lib_iterator; @@ -110,12 +132,13 @@ public: /// @name Member Variables /// @{ private: - LLVMContext& Context; ///< The LLVMContext from which types and + LLVMContext& Context; ///< The LLVMContext from which types and ///< constants are allocated. GlobalListType GlobalList; ///< The Global Variables in the module FunctionListType FunctionList; ///< The Functions in the module AliasListType AliasList; ///< The Aliases in the module LibraryListType LibraryList; ///< The Libraries needed by the module + NamedMDListType NamedMDList; ///< The named metadata in the module std::string GlobalScopeAsm; ///< Inline Asm at global scope. ValueSymbolTable *ValSymTab; ///< Symbol table for values TypeSymbolTable *TypeSymTab; ///< Symbol table for types @@ -131,7 +154,7 @@ private: public: /// The Module constructor. Note that there is no default constructor. You /// must provide a name for the module upon construction. - explicit Module(const std::string &ModuleID, LLVMContext& C); + explicit Module(const StringRef &ModuleID, LLVMContext& C); /// The module destructor. This will dropAllReferences. ~Module(); @@ -146,7 +169,7 @@ public: /// Get the data layout string for the module's target platform. This encodes /// the type sizes and alignments expected by this module. /// @returns the data layout as a string - const std::string& getDataLayout() const { return DataLayout; } + const std::string &getDataLayout() const { return DataLayout; } /// Get the target triple which is a string describing the target host. /// @returns a string containing the target triple. @@ -173,20 +196,20 @@ public: public: /// Set the module identifier. - void setModuleIdentifier(const std::string &ID) { ModuleID = ID; } + void setModuleIdentifier(const StringRef &ID) { ModuleID = ID; } /// Set the data layout - void setDataLayout(const std::string& DL) { DataLayout = DL; } + void setDataLayout(const StringRef &DL) { DataLayout = DL; } /// Set the target triple. - void setTargetTriple(const std::string &T) { TargetTriple = T; } + void setTargetTriple(const StringRef &T) { TargetTriple = T; } /// Set the module-scope inline assembly blocks. - void setModuleInlineAsm(const std::string &Asm) { GlobalScopeAsm = Asm; } + void setModuleInlineAsm(const StringRef &Asm) { GlobalScopeAsm = Asm; } /// Append to the module-scope inline assembly blocks, automatically /// appending a newline to the end. - void appendModuleInlineAsm(const std::string &Asm) { + void appendModuleInlineAsm(const StringRef &Asm) { GlobalScopeAsm += Asm; GlobalScopeAsm += '\n'; } @@ -198,8 +221,7 @@ public: /// getNamedValue - Return the first global value in the module with /// the specified name, of arbitrary type. This method returns null /// if a global with the specified name is not found. - GlobalValue *getNamedValue(const std::string &Name) const; - GlobalValue *getNamedValue(const char *Name) const; + GlobalValue *getNamedValue(const StringRef &Name) const; /// @} /// @name Function Accessors @@ -214,10 +236,10 @@ public: /// the existing function. /// 4. Finally, the function exists but has the wrong prototype: return the /// function with a constantexpr cast to the right prototype. - Constant *getOrInsertFunction(const std::string &Name, const FunctionType *T, + Constant *getOrInsertFunction(const StringRef &Name, const FunctionType *T, AttrListPtr AttributeList); - Constant *getOrInsertFunction(const std::string &Name, const FunctionType *T); + Constant *getOrInsertFunction(const StringRef &Name, const FunctionType *T); /// getOrInsertFunction - Look up the specified function in the module symbol /// table. If it does not exist, add a prototype for the function and return @@ -226,21 +248,20 @@ public: /// named function has a different type. This version of the method takes a /// null terminated list of function arguments, which makes it easier for /// clients to use. - Constant *getOrInsertFunction(const std::string &Name, + Constant *getOrInsertFunction(const StringRef &Name, AttrListPtr AttributeList, const Type *RetTy, ...) END_WITH_NULL; - Constant *getOrInsertFunction(const std::string &Name, const Type *RetTy, ...) + Constant *getOrInsertFunction(const StringRef &Name, const Type *RetTy, ...) END_WITH_NULL; - Constant *getOrInsertTargetIntrinsic(const std::string &Name, + Constant *getOrInsertTargetIntrinsic(const StringRef &Name, const FunctionType *Ty, AttrListPtr AttributeList); /// getFunction - Look up the specified function in the module symbol table. /// If it does not exist, return null. - Function *getFunction(const std::string &Name) const; - Function *getFunction(const char *Name) const; + Function *getFunction(const StringRef &Name) const; /// @} /// @name Global Variable Accessors @@ -250,13 +271,13 @@ public: /// symbol table. If it does not exist, return null. If AllowInternal is set /// to true, this function will return types that have InternalLinkage. By /// default, these types are not returned. - GlobalVariable *getGlobalVariable(const std::string &Name, + GlobalVariable *getGlobalVariable(const StringRef &Name, bool AllowInternal = false) const; /// getNamedGlobal - Return the first global variable in the module with the /// specified name, of arbitrary type. This method returns null if a global /// with the specified name is not found. - GlobalVariable *getNamedGlobal(const std::string &Name) const { + GlobalVariable *getNamedGlobal(const StringRef &Name) const { return getGlobalVariable(Name, true); } @@ -267,7 +288,7 @@ public: /// with a constantexpr cast to the right type. /// 3. Finally, if the existing global is the correct delclaration, return /// the existing global. - Constant *getOrInsertGlobal(const std::string &Name, const Type *Ty); + Constant *getOrInsertGlobal(const StringRef &Name, const Type *Ty); /// @} /// @name Global Alias Accessors @@ -276,7 +297,21 @@ public: /// getNamedAlias - Return the first global alias in the module with the /// specified name, of arbitrary type. This method returns null if a global /// with the specified name is not found. - GlobalAlias *getNamedAlias(const std::string &Name) const; + GlobalAlias *getNamedAlias(const StringRef &Name) const; + +/// @} +/// @name Named Metadata Accessors +/// @{ +public: + /// getNamedMetadata - Return the first NamedMDNode in the module with the + /// specified name. This method returns null if a NamedMDNode with the + /// specified name is not found. + NamedMDNode *getNamedMetadata(const StringRef &Name) const; + + /// getOrInsertNamedMetadata - Return the first named MDNode in the module + /// with the specified name. This method returns a new NamedMDNode if a + /// NamedMDNode with the specified name is not found. + NamedMDNode *getOrInsertNamedMetadata(const StringRef &Name); /// @} /// @name Type Accessors @@ -285,7 +320,7 @@ public: /// addTypeName - Insert an entry in the symbol table mapping Str to Type. If /// there is already an entry for this name, true is returned and the symbol /// table is not modified. - bool addTypeName(const std::string &Name, const Type *Ty); + bool addTypeName(const StringRef &Name, const Type *Ty); /// getTypeName - If there is at least one entry in the symbol table for the /// specified type, return it. @@ -293,7 +328,7 @@ public: /// getTypeByName - Return the type with the specified name in this module, or /// null if there is none by that name. - const Type *getTypeByName(const std::string &Name) const; + const Type *getTypeByName(const StringRef &Name) const; /// @} /// @name Direct access to the globals list, functions list, and symbol table @@ -320,6 +355,13 @@ public: static iplist Module::*getSublistAccess(GlobalAlias*) { return &Module::AliasList; } + /// Get the Module's list of named metadata (constant). + const NamedMDListType &getNamedMDList() const { return NamedMDList; } + /// Get the Module's list of named metadata. + NamedMDListType &getNamedMDList() { return NamedMDList; } + static iplist Module::*getSublistAccess(NamedMDNode *) { + return &Module::NamedMDList; + } /// Get the symbol table of global variable and function identifiers const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; } /// Get the Module's symbol table of global variable and function identifiers. @@ -372,9 +414,9 @@ public: /// @brief Returns the number of items in the list of libraries. inline size_t lib_size() const { return LibraryList.size(); } /// @brief Add a library to the list of dependent libraries - void addLibrary(const std::string& Lib); + void addLibrary(const StringRef &Lib); /// @brief Remove a library from the list of dependent libraries - void removeLibrary(const std::string& Lib); + void removeLibrary(const StringRef &Lib); /// @brief Get all the libraries inline const LibraryListType& getLibraries() const { return LibraryList; } @@ -390,18 +432,42 @@ public: alias_iterator alias_end () { return AliasList.end(); } /// Get a constant iterator to the last alias. const_alias_iterator alias_end () const { return AliasList.end(); } - /// Determine how many functions are in the Module's list of aliases. + /// Determine how many aliases are in the Module's list of aliases. size_t alias_size () const { return AliasList.size(); } /// Determine if the list of aliases is empty. bool alias_empty() const { return AliasList.empty(); } + +/// @} +/// @name Named Metadata Iteration +/// @{ +public: + /// Get an iterator to the first named metadata. + named_metadata_iterator named_metadata_begin() + { return NamedMDList.begin(); } + /// Get a constant iterator to the first named metadata. + const_named_metadata_iterator named_metadata_begin() const + { return NamedMDList.begin(); } + /// Get an iterator to the last named metadata. + named_metadata_iterator named_metadata_end () + { return NamedMDList.end(); } + /// Get a constant iterator to the last named metadata. + const_named_metadata_iterator named_metadata_end () const + { return NamedMDList.end(); } + /// Determine how many NamedMDNodes are in the Module's list of named metadata. + size_t named_metadata_size () const + { return NamedMDList.size(); } + /// Determine if the list of named metadata is empty. + bool named_metadata_empty() const + { return NamedMDList.empty(); } + + /// @} /// @name Utility functions for printing and dumping Module objects /// @{ public: /// Print the module to an output stream with AssemblyAnnotationWriter. void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const; - void print(std::ostream &OS, AssemblyAnnotationWriter *AAW) const; /// Dump the module to stderr (for debugging). void dump() const; @@ -415,11 +481,7 @@ public: /// @} }; -/// An iostream inserter for modules. -inline std::ostream &operator<<(std::ostream &O, const Module &M) { - M.print(O, 0); - return O; -} +/// An raw_ostream inserter for modules. inline raw_ostream &operator<<(raw_ostream &O, const Module &M) { M.print(O, 0); return O; diff --git a/include/llvm/OperandTraits.h b/include/llvm/OperandTraits.h index 83c1025c07b7a..7c879c88f13ba 100644 --- a/include/llvm/OperandTraits.h +++ b/include/llvm/OperandTraits.h @@ -44,11 +44,10 @@ struct FixedNumOperandTraits { }; template struct Layout { - struct overlay : prefix, U { + struct overlay : public prefix, public U { overlay(); // DO NOT IMPLEMENT }; }; - static inline void *allocate(unsigned); // FIXME }; //===----------------------------------------------------------------------===// @@ -56,7 +55,7 @@ struct FixedNumOperandTraits { //===----------------------------------------------------------------------===// template -struct OptionalOperandTraits : FixedNumOperandTraits { +struct OptionalOperandTraits : public FixedNumOperandTraits { static unsigned operands(const User *U) { return U->getNumOperands(); } @@ -81,7 +80,6 @@ struct VariadicOperandTraits { static unsigned operands(const User *U) { return U->getNumOperands(); } - static inline void *allocate(unsigned); // FIXME }; //===----------------------------------------------------------------------===// @@ -109,7 +107,6 @@ struct HungoffOperandTraits { static unsigned operands(const User *U) { return U->getNumOperands(); } - static inline void *allocate(unsigned); // FIXME }; /// Macro for generating in-class operand accessor declarations. diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h new file mode 100644 index 0000000000000..2b5cc57e75ddc --- /dev/null +++ b/include/llvm/Operator.h @@ -0,0 +1,306 @@ +//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines various classes for working with Instructions and +// ConstantExprs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OPERATOR_H +#define LLVM_OPERATOR_H + +#include "llvm/Instruction.h" +#include "llvm/Constants.h" + +namespace llvm { + +class GetElementPtrInst; +class BinaryOperator; +class ConstantExpr; + +/// Operator - This is a utility class that provides an abstraction for the +/// common functionality between Instructions and ConstantExprs. +/// +class Operator : public User { +private: + // Do not implement any of these. The Operator class is intended to be used + // as a utility, and is never itself instantiated. + void *operator new(size_t, unsigned); + void *operator new(size_t s); + Operator(); + ~Operator(); + +public: + /// getOpcode - Return the opcode for this Instruction or ConstantExpr. + /// + unsigned getOpcode() const { + if (const Instruction *I = dyn_cast(this)) + return I->getOpcode(); + return cast(this)->getOpcode(); + } + + /// getOpcode - If V is an Instruction or ConstantExpr, return its + /// opcode. Otherwise return UserOp1. + /// + static unsigned getOpcode(const Value *V) { + if (const Instruction *I = dyn_cast(V)) + return I->getOpcode(); + if (const ConstantExpr *CE = dyn_cast(V)) + return CE->getOpcode(); + return Instruction::UserOp1; + } + + static inline bool classof(const Operator *) { return true; } + static inline bool classof(const Instruction *I) { return true; } + static inline bool classof(const ConstantExpr *I) { return true; } + static inline bool classof(const Value *V) { + return isa(V) || isa(V); + } +}; + +/// OverflowingBinaryOperator - Utility class for integer arithmetic operators +/// which may exhibit overflow - Add, Sub, and Mul. It does not include SDiv, +/// despite that operator having the potential for overflow. +/// +class OverflowingBinaryOperator : public Operator { +public: + enum { + NoUnsignedWrap = (1 << 0), + NoSignedWrap = (1 << 1) + }; + +private: + ~OverflowingBinaryOperator(); // do not implement + + friend class BinaryOperator; + friend class ConstantExpr; + void setHasNoUnsignedWrap(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap); + } + void setHasNoSignedWrap(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap); + } + +public: + /// hasNoUnsignedWrap - Test whether this operation is known to never + /// undergo unsigned overflow, aka the nuw property. + bool hasNoUnsignedWrap() const { + return SubclassOptionalData & NoUnsignedWrap; + } + + /// hasNoSignedWrap - Test whether this operation is known to never + /// undergo signed overflow, aka the nsw property. + bool hasNoSignedWrap() const { + return SubclassOptionalData & NoSignedWrap; + } + + static inline bool classof(const OverflowingBinaryOperator *) { return true; } + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::Add || + I->getOpcode() == Instruction::Sub || + I->getOpcode() == Instruction::Mul; + } + static inline bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::Add || + CE->getOpcode() == Instruction::Sub || + CE->getOpcode() == Instruction::Mul; + } + static inline bool classof(const Value *V) { + return (isa(V) && classof(cast(V))) || + (isa(V) && classof(cast(V))); + } +}; + +/// AddOperator - Utility class for integer addition operators. +/// +class AddOperator : public OverflowingBinaryOperator { + ~AddOperator(); // do not implement +public: + static inline bool classof(const AddOperator *) { return true; } + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::Add; + } + static inline bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::Add; + } + static inline bool classof(const Value *V) { + return (isa(V) && classof(cast(V))) || + (isa(V) && classof(cast(V))); + } +}; + +/// SubOperator - Utility class for integer subtraction operators. +/// +class SubOperator : public OverflowingBinaryOperator { + ~SubOperator(); // do not implement +public: + static inline bool classof(const SubOperator *) { return true; } + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::Sub; + } + static inline bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::Sub; + } + static inline bool classof(const Value *V) { + return (isa(V) && classof(cast(V))) || + (isa(V) && classof(cast(V))); + } +}; + +/// MulOperator - Utility class for integer multiplication operators. +/// +class MulOperator : public OverflowingBinaryOperator { + ~MulOperator(); // do not implement +public: + static inline bool classof(const MulOperator *) { return true; } + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::Mul; + } + static inline bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::Mul; + } + static inline bool classof(const Value *V) { + return (isa(V) && classof(cast(V))) || + (isa(V) && classof(cast(V))); + } +}; + +/// SDivOperator - An Operator with opcode Instruction::SDiv. +/// +class SDivOperator : public Operator { +public: + enum { + IsExact = (1 << 0) + }; + +private: + ~SDivOperator(); // do not implement + + friend class BinaryOperator; + friend class ConstantExpr; + void setIsExact(bool B) { + SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact); + } + +public: + /// isExact - Test whether this division is known to be exact, with + /// zero remainder. + bool isExact() const { + return SubclassOptionalData & IsExact; + } + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const SDivOperator *) { return true; } + static inline bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::SDiv; + } + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::SDiv; + } + static inline bool classof(const Value *V) { + return (isa(V) && classof(cast(V))) || + (isa(V) && classof(cast(V))); + } +}; + +class GEPOperator : public Operator { + enum { + IsInBounds = (1 << 0) + }; + + ~GEPOperator(); // do not implement + + friend class GetElementPtrInst; + friend class ConstantExpr; + void setIsInBounds(bool B) { + SubclassOptionalData = + (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds); + } + +public: + /// isInBounds - Test whether this is an inbounds GEP, as defined + /// by LangRef.html. + bool isInBounds() const { + return SubclassOptionalData & IsInBounds; + } + + inline op_iterator idx_begin() { return op_begin()+1; } + inline const_op_iterator idx_begin() const { return op_begin()+1; } + inline op_iterator idx_end() { return op_end(); } + inline const_op_iterator idx_end() const { return op_end(); } + + Value *getPointerOperand() { + return getOperand(0); + } + const Value *getPointerOperand() const { + return getOperand(0); + } + static unsigned getPointerOperandIndex() { + return 0U; // get index for modifying correct operand + } + + /// getPointerOperandType - Method to return the pointer operand as a + /// PointerType. + const PointerType *getPointerOperandType() const { + return reinterpret_cast(getPointerOperand()->getType()); + } + + unsigned getNumIndices() const { // Note: always non-negative + return getNumOperands() - 1; + } + + bool hasIndices() const { + return getNumOperands() > 1; + } + + /// hasAllZeroIndices - Return true if all of the indices of this GEP are + /// zeros. If so, the result pointer and the first operand have the same + /// value, just potentially different types. + bool hasAllZeroIndices() const { + for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { + if (Constant *C = dyn_cast(I)) + if (C->isNullValue()) + continue; + return false; + } + return true; + } + + /// hasAllConstantIndices - Return true if all of the indices of this GEP are + /// constant integers. If so, the result pointer and the first operand have + /// a constant offset between them. + bool hasAllConstantIndices() const { + for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) { + if (!isa(I)) + return false; + } + return true; + } + + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const GEPOperator *) { return true; } + static inline bool classof(const GetElementPtrInst *) { return true; } + static inline bool classof(const ConstantExpr *CE) { + return CE->getOpcode() == Instruction::GetElementPtr; + } + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::GetElementPtr; + } + static inline bool classof(const Value *V) { + return (isa(V) && classof(cast(V))) || + (isa(V) && classof(cast(V))); + } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h index eea99e028dd8d..eb4c92281c9b3 100644 --- a/include/llvm/Pass.h +++ b/include/llvm/Pass.h @@ -29,11 +29,8 @@ #ifndef LLVM_PASS_H #define LLVM_PASS_H -#include "llvm/Module.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Streams.h" #include -#include #include #include @@ -48,7 +45,8 @@ class ImmutablePass; class PMStack; class AnalysisResolver; class PMDataManager; -class LLVMContext; +class raw_ostream; +class StringRef; // AnalysisID - Use the PassInfo to identify a pass... typedef const PassInfo* AnalysisID; @@ -78,9 +76,6 @@ class Pass { void operator=(const Pass&); // DO NOT IMPLEMENT Pass(const Pass &); // DO NOT IMPLEMENT -protected: - LLVMContext* Context; - public: explicit Pass(intptr_t pid) : Resolver(0), PassID(pid) { assert(pid && "pid cannot be 0"); @@ -108,9 +103,8 @@ public: /// provide the Module* in case the analysis doesn't need it it can just be /// ignored. /// - virtual void print(std::ostream &O, const Module *M) const; - void print(std::ostream *O, const Module *M) const { if (O) print(*O, M); } - void dump() const; // dump - call print(std::cerr, 0); + virtual void print(raw_ostream &O, const Module *M) const; + void dump() const; // dump - Print to stderr. /// Each pass is responsible for assigning a pass manager to itself. /// PMS is the stack of available pass manager. @@ -171,6 +165,10 @@ public: // or null if it is not known. static const PassInfo *lookupPassInfo(intptr_t TI); + // lookupPassInfo - Return the pass info object for the pass with the given + // argument string, or null if it is not known. + static const PassInfo *lookupPassInfo(const StringRef &Arg); + /// getAnalysisIfAvailable() - Subclasses use this function to /// get analysis information that might be around, for example to update it. /// This is different than getAnalysis in that it can fail (if the analysis @@ -198,7 +196,7 @@ public: AnalysisType &getAnalysis() const; // Defined in PassAnalysisSupport.h template - AnalysisType &getAnalysis(Function &F); // Defined in PassanalysisSupport.h + AnalysisType &getAnalysis(Function &F); // Defined in PassAnalysisSupport.h template AnalysisType &getAnalysisID(const PassInfo *PI) const; @@ -207,9 +205,6 @@ public: AnalysisType &getAnalysisID(const PassInfo *PI, Function &F); }; -inline std::ostream &operator<<(std::ostream &OS, const Pass &P) { - P.print(OS, 0); return OS; -} //===----------------------------------------------------------------------===// /// ModulePass class - This class is used to implement unstructured @@ -281,11 +276,8 @@ public: /// doInitialization - Virtual method overridden by subclasses to do /// any necessary per-module initialization. /// - virtual bool doInitialization(Module &M) { - Context = &M.getContext(); - return false; - } - + virtual bool doInitialization(Module &M) { return false; } + /// runOnFunction - Virtual method overriden by subclasses to do the /// per-function processing of the pass. /// @@ -336,10 +328,7 @@ public: /// doInitialization - Virtual method overridden by subclasses to do /// any necessary per-module initialization. /// - virtual bool doInitialization(Module &M) { - Context = &M.getContext(); - return false; - } + virtual bool doInitialization(Module &M) { return false; } /// doInitialization - Virtual method overridden by BasicBlockPass subclasses /// to do any necessary per-function initialization. diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h index b09ba45e346d3..f339481c1edea 100644 --- a/include/llvm/PassAnalysisSupport.h +++ b/include/llvm/PassAnalysisSupport.h @@ -24,6 +24,8 @@ namespace llvm { +class StringRef; + // No need to include Pass.h, we are being included by it! //===----------------------------------------------------------------------===// @@ -79,6 +81,9 @@ public: return *this; } + // addPreserved - Add the specified Pass class to the set of analyses + // preserved by this pass. + // template AnalysisUsage &addPreserved() { assert(Pass::getClassPassInfo() && "Pass class not registered!"); @@ -86,6 +91,18 @@ public: return *this; } + // addPreserved - Add the Pass with the specified argument string to the set + // of analyses preserved by this pass. If no such Pass exists, do nothing. + // This can be useful when a pass is trivially preserved, but may not be + // linked in. Be careful about spelling! + // + AnalysisUsage &addPreserved(const StringRef &Arg) { + const PassInfo *PI = Pass::lookupPassInfo(Arg); + // If the pass exists, preserve it. Otherwise silently do nothing. + if (PI) Preserved.push_back(PI); + return *this; + } + // setPreservesAll - Set by analyses that do not transform their input at all void setPreservesAll() { PreservesAll = true; } bool getPreservesAll() const { return PreservesAll; } diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h index 1aa0d3a6fa427..5a8f55570a822 100644 --- a/include/llvm/PassManagers.h +++ b/include/llvm/PassManagers.h @@ -91,9 +91,11 @@ #include "llvm/Support/PrettyStackTrace.h" namespace llvm { + class Module; class Pass; + class StringRef; class Value; - class Module; + class Timer; /// FunctionPassManager and PassManager, two top level managers, serve /// as the public interface of pass manager infrastructure. @@ -121,7 +123,7 @@ class PassManagerPrettyStackEntry : public PrettyStackTraceEntry { Value *V; Module *M; public: - PassManagerPrettyStackEntry(Pass *p) + explicit PassManagerPrettyStackEntry(Pass *p) : P(p), V(0), M(0) {} // When P is releaseMemory'd. PassManagerPrettyStackEntry(Pass *p, Value &v) : P(p), V(&v), M(0) {} // When P is run on V @@ -278,14 +280,16 @@ public: /// verifyPreservedAnalysis -- Verify analysis presreved by pass P. void verifyPreservedAnalysis(Pass *P); - /// verifyDomInfo -- Verify dominator information if it is available. - void verifyDomInfo(Pass &P, Function &F); - /// Remove Analysis that is not preserved by the pass void removeNotPreservedAnalysis(Pass *P); - /// Remove dead passes - void removeDeadPasses(Pass *P, const char *Msg, enum PassDebuggingString); + /// Remove dead passes used by P. + void removeDeadPasses(Pass *P, const StringRef &Msg, + enum PassDebuggingString); + + /// Remove P. + void freePass(Pass *P, const StringRef &Msg, + enum PassDebuggingString); /// Add pass P into the PassVector. Update /// AvailableAnalysis appropriately if ProcessAnalysis is true. @@ -340,7 +344,7 @@ public: void dumpLastUses(Pass *P, unsigned Offset) const; void dumpPassArguments() const; void dumpPassInfo(Pass *P, enum PassDebuggingString S1, - enum PassDebuggingString S2, const char *Msg); + enum PassDebuggingString S2, const StringRef &Msg); void dumpRequiredSet(const Pass *P) const; void dumpPreservedSet(const Pass *P) const; @@ -378,8 +382,13 @@ protected: // then PMT_Last active pass mangers. std::map *InheritedAnalysis[PMT_Last]; + + /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions + /// or higher is specified. + bool isPassDebuggingExecutionsOrMore() const; + private: - void dumpAnalysisUsage(const char *Msg, const Pass *P, + void dumpAnalysisUsage(const StringRef &Msg, const Pass *P, const AnalysisUsage::VectorType &Set) const; // Set of available Analysis. This information is used while scheduling @@ -449,9 +458,9 @@ public: } }; -} +extern Timer *StartPassTimer(Pass *); +extern void StopPassTimer(Pass *, Timer *); -extern void StartPassTimer(llvm::Pass *); -extern void StopPassTimer(llvm::Pass *); +} #endif diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h index fe3ca520659cb..b5e581a6f4e71 100644 --- a/include/llvm/PassSupport.h +++ b/include/llvm/PassSupport.h @@ -190,14 +190,11 @@ struct RegisterPass : public PassInfo { /// a nice name with the interface. /// class RegisterAGBase : public PassInfo { - PassInfo *InterfaceInfo; - const PassInfo *ImplementationInfo; - bool isDefaultImplementation; protected: - explicit RegisterAGBase(const char *Name, - intptr_t InterfaceID, - intptr_t PassID = 0, - bool isDefault = false); + RegisterAGBase(const char *Name, + intptr_t InterfaceID, + intptr_t PassID = 0, + bool isDefault = false); }; template diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h index c0414f970a295..4c848788c73dd 100644 --- a/include/llvm/Support/Allocator.h +++ b/include/llvm/Support/Allocator.h @@ -15,6 +15,8 @@ #define LLVM_SUPPORT_ALLOCATOR_H #include "llvm/Support/AlignOf.h" +#include "llvm/Support/DataTypes.h" +#include #include namespace llvm { @@ -41,21 +43,104 @@ public: void PrintStats() const {} }; -/// BumpPtrAllocator - This allocator is useful for containers that need very -/// simple memory allocation strategies. In particular, this just keeps +/// MemSlab - This structure lives at the beginning of every slab allocated by +/// the bump allocator. +class MemSlab { +public: + size_t Size; + MemSlab *NextPtr; +}; + +/// SlabAllocator - This class can be used to parameterize the underlying +/// allocation strategy for the bump allocator. In particular, this is used +/// by the JIT to allocate contiguous swathes of executable memory. The +/// interface uses MemSlab's instead of void *'s so that the allocator +/// doesn't have to remember the size of the pointer it allocated. +class SlabAllocator { +public: + virtual ~SlabAllocator(); + virtual MemSlab *Allocate(size_t Size) = 0; + virtual void Deallocate(MemSlab *Slab) = 0; +}; + +/// MallocSlabAllocator - The default slab allocator for the bump allocator +/// is an adapter class for MallocAllocator that just forwards the method +/// calls and translates the arguments. +class MallocSlabAllocator : public SlabAllocator { + /// Allocator - The underlying allocator that we forward to. + /// + MallocAllocator Allocator; + +public: + MallocSlabAllocator() : Allocator() { } + virtual ~MallocSlabAllocator(); + virtual MemSlab *Allocate(size_t Size); + virtual void Deallocate(MemSlab *Slab); +}; + +/// BumpPtrAllocator - This allocator is useful for containers that need +/// very simple memory allocation strategies. In particular, this just keeps /// allocating memory, and never deletes it until the entire block is dead. This /// makes allocation speedy, but must only be used when the trade-off is ok. class BumpPtrAllocator { BumpPtrAllocator(const BumpPtrAllocator &); // do not implement void operator=(const BumpPtrAllocator &); // do not implement - void *TheMemory; + /// SlabSize - Allocate data into slabs of this size unless we get an + /// allocation above SizeThreshold. + size_t SlabSize; + + /// SizeThreshold - For any allocation larger than this threshold, we should + /// allocate a separate slab. + size_t SizeThreshold; + + /// Allocator - The underlying allocator we use to get slabs of memory. This + /// defaults to MallocSlabAllocator, which wraps malloc, but it could be + /// changed to use a custom allocator. + SlabAllocator &Allocator; + + /// CurSlab - The slab that we are currently allocating into. + /// + MemSlab *CurSlab; + + /// CurPtr - The current pointer into the current slab. This points to the + /// next free byte in the slab. + char *CurPtr; + + /// End - The end of the current slab. + /// + char *End; + + /// BytesAllocated - This field tracks how many bytes we've allocated, so + /// that we can compute how much space was wasted. + size_t BytesAllocated; + + /// AlignPtr - Align Ptr to Alignment bytes, rounding up. Alignment should + /// be a power of two. This method rounds up, so AlignPtr(7, 4) == 8 and + /// AlignPtr(8, 4) == 8. + static char *AlignPtr(char *Ptr, size_t Alignment); + + /// StartNewSlab - Allocate a new slab and move the bump pointers over into + /// the new slab. Modifies CurPtr and End. + void StartNewSlab(); + + /// DeallocateSlabs - Deallocate all memory slabs after and including this + /// one. + void DeallocateSlabs(MemSlab *Slab); + + static MallocSlabAllocator DefaultSlabAllocator; + public: - BumpPtrAllocator(); + BumpPtrAllocator(size_t size = 4096, size_t threshold = 4096, + SlabAllocator &allocator = DefaultSlabAllocator); ~BumpPtrAllocator(); + /// Reset - Deallocate all but the current slab and reset the current pointer + /// to the beginning of it, freeing all memory allocated so far. void Reset(); + /// Allocate - Allocate space at the specified alignment. + /// void *Allocate(size_t Size, size_t Alignment); /// Allocate space, but do not construct, one object. @@ -83,9 +168,11 @@ public: void Deallocate(const void * /*Ptr*/) {} + unsigned GetNumSlabs() const; + void PrintStats() const; }; } // end namespace llvm -#endif +#endif // LLVM_SUPPORT_ALLOCATOR_H diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h index b0b857bf02800..3a20696f05aa3 100644 --- a/include/llvm/Support/CFG.h +++ b/include/llvm/Support/CFG.h @@ -18,17 +18,17 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/Function.h" #include "llvm/InstrTypes.h" -#include "llvm/ADT/iterator.h" namespace llvm { -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // BasicBlock pred_iterator definition -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// template // Predecessor Iterator -class PredIterator : public forward_iterator<_Ptr, ptrdiff_t> { - typedef forward_iterator<_Ptr, ptrdiff_t> super; +class PredIterator : public std::iterator { + typedef std::iterator super; _USE_iterator It; public: typedef PredIterator<_Ptr,_USE_iterator> _Self; @@ -80,15 +80,16 @@ inline pred_const_iterator pred_end(const BasicBlock *BB) { -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // BasicBlock succ_iterator definition -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// template // Successor Iterator -class SuccIterator : public bidirectional_iterator { +class SuccIterator : public std::iterator { const Term_ Term; unsigned idx; - typedef bidirectional_iterator super; + typedef std::iterator super; public: typedef SuccIterator _Self; typedef typename super::pointer pointer; diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h index dc41590fb8a56..285b558afccba 100644 --- a/include/llvm/Support/CallSite.h +++ b/include/llvm/Support/CallSite.h @@ -26,6 +26,7 @@ #include "llvm/Attributes.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/BasicBlock.h" +#include "llvm/CallingConv.h" #include "llvm/Instruction.h" namespace llvm { @@ -40,8 +41,6 @@ public: CallSite(CallInst *CI) : I(reinterpret_cast(CI), true) {} CallSite(InvokeInst *II) : I(reinterpret_cast(II), false) {} CallSite(Instruction *C); - CallSite(const CallSite &CS) : I(CS.I) {} - CallSite &operator=(const CallSite &CS) { I = CS.I; return *this; } bool operator==(const CallSite &CS) const { return I == CS.I; } bool operator!=(const CallSite &CS) const { return I != CS.I; } @@ -63,8 +62,8 @@ public: /// getCallingConv/setCallingConv - get or set the calling convention of the /// call. - unsigned getCallingConv() const; - void setCallingConv(unsigned CC); + CallingConv::ID getCallingConv() const; + void setCallingConv(CallingConv::ID CC); /// getAttributes/setAttributes - get or set the parameter attributes of /// the call. diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h index 48988f8a6bb83..35fb29ec6cb3b 100644 --- a/include/llvm/Support/Casting.h +++ b/include/llvm/Support/Casting.h @@ -235,7 +235,7 @@ inline typename cast_retty::ret_type dyn_cast_or_null(const Y &Val) { #ifdef DEBUG_CAST_OPERATORS -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" struct bar { bar() {} @@ -251,7 +251,7 @@ struct foo { }; template <> inline bool isa_impl(const bar &Val) { - cerr << "Classof: " << &Val << "\n"; + errs() << "Classof: " << &Val << "\n"; return true; } diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index 3ae50136e4a9d..dc73979bb09ba 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -21,18 +21,17 @@ #define LLVM_SUPPORT_COMMANDLINE_H #include "llvm/Support/type_traits.h" -#include "llvm/Support/DataTypes.h" #include "llvm/Support/Compiler.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" #include #include #include -#include #include #include namespace llvm { - + /// cl Namespace - This namespace contains all of the command line option /// processing machinery. It is intentionally a short name to make qualified /// usage concise. @@ -68,7 +67,7 @@ void MarkOptionsChanged(); // Flags permitted to be passed to command line arguments // -enum NumOccurrences { // Flags for the number of occurrences allowed +enum NumOccurrencesFlag { // Flags for the number of occurrences allowed Optional = 0x01, // Zero or One occurrence ZeroOrMore = 0x02, // Zero or more occurrences allowed Required = 0x03, // One occurrence required @@ -143,8 +142,8 @@ class Option { // an argument. Should return true if there was an error processing the // argument and the program should exit. // - virtual bool handleOccurrence(unsigned pos, const char *ArgName, - const std::string &Arg) = 0; + virtual bool handleOccurrence(unsigned pos, StringRef ArgName, + StringRef Arg) = 0; virtual enum ValueExpected getValueExpectedFlagDefault() const { return ValueOptional; @@ -163,8 +162,8 @@ public: const char *HelpStr; // The descriptive text message for --help const char *ValueStr; // String describing what the value of this option is - inline enum NumOccurrences getNumOccurrencesFlag() const { - return static_cast(Flags & OccurrencesMask); + inline enum NumOccurrencesFlag getNumOccurrencesFlag() const { + return static_cast(Flags & OccurrencesMask); } inline enum ValueExpected getValueExpectedFlag() const { int VE = Flags & ValueMask; @@ -198,7 +197,7 @@ public: Flags |= Flag; } - void setNumOccurrencesFlag(enum NumOccurrences Val) { + void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) { setFlag(Val, OccurrencesMask); } void setValueExpectedFlag(enum ValueExpected Val) { setFlag(Val, ValueMask); } @@ -215,8 +214,7 @@ protected: getOptionHiddenFlag() != 0 && "Not all default flags specified!"); } - inline void setNumAdditionalVals(unsigned n) - { AdditionalVals = n; } + inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; } public: // addArgument - Register this argument with the commandline system. // @@ -232,15 +230,15 @@ public: // virtual void printOptionInfo(size_t GlobalWidth) const = 0; - virtual void getExtraOptionNames(std::vector &) {} + virtual void getExtraOptionNames(SmallVectorImpl &) {} - // addOccurrence - Wrapper around handleOccurrence that enforces Flags + // addOccurrence - Wrapper around handleOccurrence that enforces Flags. // - bool addOccurrence(unsigned pos, const char *ArgName, - const std::string &Value, bool MultiArg = false); + bool addOccurrence(unsigned pos, StringRef ArgName, + StringRef Value, bool MultiArg = false); // Prints option name followed by message. Always returns true. - bool error(std::string Message, const char *ArgName = 0); + bool error(const Twine &Message, StringRef ArgName = StringRef()); public: inline int getNumOccurrences() const { return NumOccurrences; } @@ -399,7 +397,7 @@ struct generic_parser_base { hasArgStr = O.hasArgStr(); } - void getExtraOptionNames(std::vector &OptionNames) { + void getExtraOptionNames(SmallVectorImpl &OptionNames) { // If there has been no argstr specified, that means that we need to add an // argument for every possible option. This ensures that our options are // vectored to us. @@ -458,9 +456,8 @@ public: } // parse - Return true on error. - bool parse(Option &O, const char *ArgName, const std::string &Arg, - DataType &V) { - std::string ArgVal; + bool parse(Option &O, StringRef ArgName, StringRef Arg, DataType &V) { + StringRef ArgVal; if (hasArgStr) ArgVal = Arg; else @@ -468,12 +465,12 @@ public: for (unsigned i = 0, e = static_cast(Values.size()); i != e; ++i) - if (ArgVal == Values[i].first) { + if (Values[i].first == ArgVal) { V = Values[i].second.first; return false; } - return O.error(": Cannot find option named '" + ArgVal + "'!"); + return O.error("Cannot find option named '" + ArgVal + "'!"); } /// addLiteralOption - Add an entry to the mapping table. @@ -505,7 +502,7 @@ struct basic_parser_impl { // non-template implementation of basic_parser return ValueRequired; } - void getExtraOptionNames(std::vector &) {} + void getExtraOptionNames(SmallVectorImpl &) {} void initialize(Option &) {} @@ -541,7 +538,7 @@ class parser : public basic_parser { public: // parse - Return true on error. - bool parse(Option &O, const char *ArgName, const std::string &Arg, bool &Val); + bool parse(Option &O, StringRef ArgName, StringRef Arg, bool &Val); template void initialize(Opt &O) { @@ -568,8 +565,7 @@ template<> class parser : public basic_parser { public: // parse - Return true on error. - bool parse(Option &O, const char *ArgName, const std::string &Arg, - boolOrDefault &Val); + bool parse(Option &O, StringRef ArgName, StringRef Arg, boolOrDefault &Val); enum ValueExpected getValueExpectedFlagDefault() const { return ValueOptional; @@ -591,7 +587,7 @@ template<> class parser : public basic_parser { public: // parse - Return true on error. - bool parse(Option &O, const char *ArgName, const std::string &Arg, int &Val); + bool parse(Option &O, StringRef ArgName, StringRef Arg, int &Val); // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "int"; } @@ -610,7 +606,7 @@ template<> class parser : public basic_parser { public: // parse - Return true on error. - bool parse(Option &O, const char *AN, const std::string &Arg, unsigned &Val); + bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned &Val); // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "uint"; } @@ -628,7 +624,7 @@ template<> class parser : public basic_parser { public: // parse - Return true on error. - bool parse(Option &O, const char *AN, const std::string &Arg, double &Val); + bool parse(Option &O, StringRef ArgName, StringRef Arg, double &Val); // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "number"; } @@ -646,7 +642,7 @@ template<> class parser : public basic_parser { public: // parse - Return true on error. - bool parse(Option &O, const char *AN, const std::string &Arg, float &Val); + bool parse(Option &O, StringRef ArgName, StringRef Arg, float &Val); // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "number"; } @@ -664,9 +660,8 @@ template<> class parser : public basic_parser { public: // parse - Return true on error. - bool parse(Option &, const char *, const std::string &Arg, - std::string &Value) { - Value = Arg; + bool parse(Option &, StringRef ArgName, StringRef Arg, std::string &Value) { + Value = Arg.str(); return false; } @@ -686,8 +681,7 @@ template<> class parser : public basic_parser { public: // parse - Return true on error. - bool parse(Option &, const char *, const std::string &Arg, - char &Value) { + bool parse(Option &, StringRef ArgName, StringRef Arg, char &Value) { Value = Arg[0]; return false; } @@ -726,8 +720,10 @@ template<> struct applicator { static void opt(const char *Str, Opt &O) { O.setArgStr(Str); } }; -template<> struct applicator { - static void opt(NumOccurrences NO, Option &O) { O.setNumOccurrencesFlag(NO); } +template<> struct applicator { + static void opt(NumOccurrencesFlag NO, Option &O) { + O.setNumOccurrencesFlag(NO); + } }; template<> struct applicator { static void opt(ValueExpected VE, Option &O) { O.setValueExpectedFlag(VE); } @@ -770,7 +766,7 @@ public: bool setLocation(Option &O, DataType &L) { if (Location) - return O.error(": cl::location(x) specified more than once!"); + return O.error("cl::location(x) specified more than once!"); Location = &L; return false; } @@ -833,8 +829,8 @@ class opt : public Option, is_class::value> { ParserClass Parser; - virtual bool handleOccurrence(unsigned pos, const char *ArgName, - const std::string &Arg) { + virtual bool handleOccurrence(unsigned pos, StringRef ArgName, + StringRef Arg) { typename ParserClass::parser_data_type Val = typename ParserClass::parser_data_type(); if (Parser.parse(*this, ArgName, Arg, Val)) @@ -847,7 +843,7 @@ class opt : public Option, virtual enum ValueExpected getValueExpectedFlagDefault() const { return Parser.getValueExpectedFlagDefault(); } - virtual void getExtraOptionNames(std::vector &OptionNames) { + virtual void getExtraOptionNames(SmallVectorImpl &OptionNames) { return Parser.getExtraOptionNames(OptionNames); } @@ -964,7 +960,7 @@ public: bool setLocation(Option &O, StorageClass &L) { if (Location) - return O.error(": cl::location(x) specified more than once!"); + return O.error("cl::location(x) specified more than once!"); Location = &L; return false; } @@ -1002,12 +998,11 @@ class list : public Option, public list_storage { virtual enum ValueExpected getValueExpectedFlagDefault() const { return Parser.getValueExpectedFlagDefault(); } - virtual void getExtraOptionNames(std::vector &OptionNames) { + virtual void getExtraOptionNames(SmallVectorImpl &OptionNames) { return Parser.getExtraOptionNames(OptionNames); } - virtual bool handleOccurrence(unsigned pos, const char *ArgName, - const std::string &Arg) { + virtual bool handleOccurrence(unsigned pos, StringRef ArgName, StringRef Arg){ typename ParserClass::parser_data_type Val = typename ParserClass::parser_data_type(); if (Parser.parse(*this, ArgName, Arg, Val)) @@ -1139,7 +1134,7 @@ public: bool setLocation(Option &O, unsigned &L) { if (Location) - return O.error(": cl::location(x) specified more than once!"); + return O.error("cl::location(x) specified more than once!"); Location = &L; return false; } @@ -1202,12 +1197,11 @@ class bits : public Option, public bits_storage { virtual enum ValueExpected getValueExpectedFlagDefault() const { return Parser.getValueExpectedFlagDefault(); } - virtual void getExtraOptionNames(std::vector &OptionNames) { + virtual void getExtraOptionNames(SmallVectorImpl &OptionNames) { return Parser.getExtraOptionNames(OptionNames); } - virtual bool handleOccurrence(unsigned pos, const char *ArgName, - const std::string &Arg) { + virtual bool handleOccurrence(unsigned pos, StringRef ArgName, StringRef Arg){ typename ParserClass::parser_data_type Val = typename ParserClass::parser_data_type(); if (Parser.parse(*this, ArgName, Arg, Val)) @@ -1307,8 +1301,8 @@ public: class alias : public Option { Option *AliasFor; - virtual bool handleOccurrence(unsigned pos, const char * /*ArgName*/, - const std::string &Arg) { + virtual bool handleOccurrence(unsigned pos, StringRef /*ArgName*/, + StringRef Arg) { return AliasFor->handleOccurrence(pos, AliasFor->ArgStr, Arg); } // Handle printing stuff... @@ -1317,15 +1311,15 @@ class alias : public Option { void done() { if (!hasArgStr()) - error(": cl::alias must have argument name specified!"); + error("cl::alias must have argument name specified!"); if (AliasFor == 0) - error(": cl::alias must have an cl::aliasopt(option) specified!"); + error("cl::alias must have an cl::aliasopt(option) specified!"); addArgument(); } public: void setAliasFor(Option &O) { if (AliasFor) - error(": cl::alias must only have one cl::aliasopt(...) specified!"); + error("cl::alias must only have one cl::aliasopt(...) specified!"); AliasFor = &O; } @@ -1366,7 +1360,7 @@ struct aliasopt { // extrahelp - provide additional help at the end of the normal help // output. All occurrences of cl::extrahelp will be accumulated and -// printed to std::cerr at the end of the regular help, just before +// printed to stderr at the end of the regular help, just before // exit is called. struct extrahelp { const char * morehelp; diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index 90292df38196c..342a97d761ee6 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -23,7 +23,7 @@ #define VISIBILITY_HIDDEN #endif -#if (__GNUC__ >= 4) +#if (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) #define ATTRIBUTE_USED __attribute__((__used__)) #else #define ATTRIBUTE_USED @@ -56,4 +56,10 @@ #define DISABLE_INLINE #endif +#ifdef __GNUC__ +#define NORETURN __attribute__((noreturn)) +#else +#define NORETURN +#endif + #endif diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h index 35065a060866c..99cb92078f367 100644 --- a/include/llvm/Support/ConstantFolder.h +++ b/include/llvm/Support/ConstantFolder.h @@ -21,9 +21,12 @@ namespace llvm { +class LLVMContext; + /// ConstantFolder - Create constants with minimum, target independent, folding. class ConstantFolder { public: + explicit ConstantFolder(LLVMContext &) {} //===--------------------------------------------------------------------===// // Binary Operators @@ -32,12 +35,18 @@ public: Constant *CreateAdd(Constant *LHS, Constant *RHS) const { return ConstantExpr::getAdd(LHS, RHS); } + Constant *CreateNSWAdd(Constant *LHS, Constant *RHS) const { + return ConstantExpr::getNSWAdd(LHS, RHS); + } Constant *CreateFAdd(Constant *LHS, Constant *RHS) const { return ConstantExpr::getFAdd(LHS, RHS); } Constant *CreateSub(Constant *LHS, Constant *RHS) const { return ConstantExpr::getSub(LHS, RHS); } + Constant *CreateNSWSub(Constant *LHS, Constant *RHS) const { + return ConstantExpr::getNSWSub(LHS, RHS); + } Constant *CreateFSub(Constant *LHS, Constant *RHS) const { return ConstantExpr::getFSub(LHS, RHS); } @@ -53,6 +62,9 @@ public: Constant *CreateSDiv(Constant *LHS, Constant *RHS) const { return ConstantExpr::getSDiv(LHS, RHS); } + Constant *CreateExactSDiv(Constant *LHS, Constant *RHS) const { + return ConstantExpr::getExactSDiv(LHS, RHS); + } Constant *CreateFDiv(Constant *LHS, Constant *RHS) const { return ConstantExpr::getFDiv(LHS, RHS); } @@ -116,6 +128,15 @@ public: return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx); } + Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList, + unsigned NumIdx) const { + return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx); + } + Constant *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList, + unsigned NumIdx) const { + return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx); + } + //===--------------------------------------------------------------------===// // Cast/Conversion Operators //===--------------------------------------------------------------------===// @@ -124,10 +145,16 @@ public: const Type *DestTy) const { return ConstantExpr::getCast(Op, C, DestTy); } + Constant *CreatePointerCast(Constant *C, const Type *DestTy) const { + return ConstantExpr::getPointerCast(C, DestTy); + } Constant *CreateIntCast(Constant *C, const Type *DestTy, bool isSigned) const { return ConstantExpr::getIntegerCast(C, DestTy, isSigned); } + Constant *CreateFPCast(Constant *C, const Type *DestTy) const { + return ConstantExpr::getFPCast(C, DestTy); + } Constant *CreateBitCast(Constant *C, const Type *DestTy) const { return CreateCast(Instruction::BitCast, C, DestTy); @@ -138,6 +165,13 @@ public: Constant *CreatePtrToInt(Constant *C, const Type *DestTy) const { return CreateCast(Instruction::PtrToInt, C, DestTy); } + Constant *CreateZExtOrBitCast(Constant *C, const Type *DestTy) const { + return ConstantExpr::getZExtOrBitCast(C, DestTy); + } + Constant *CreateSExtOrBitCast(Constant *C, const Type *DestTy) const { + return ConstantExpr::getSExtOrBitCast(C, DestTy); + } + Constant *CreateTruncOrBitCast(Constant *C, const Type *DestTy) const { return ConstantExpr::getTruncOrBitCast(C, DestTy); } @@ -154,14 +188,6 @@ public: Constant *RHS) const { return ConstantExpr::getCompare(P, LHS, RHS); } - Constant *CreateVICmp(CmpInst::Predicate P, Constant *LHS, - Constant *RHS) const { - return ConstantExpr::getCompare(P, LHS, RHS); - } - Constant *CreateVFCmp(CmpInst::Predicate P, Constant *LHS, - Constant *RHS) const { - return ConstantExpr::getCompare(P, LHS, RHS); - } //===--------------------------------------------------------------------===// // Other Instructions diff --git a/include/llvm/Support/ConstantRange.h b/include/llvm/Support/ConstantRange.h index 098fab5f98dd0..e9c8c7cb2e14e 100644 --- a/include/llvm/Support/ConstantRange.h +++ b/include/llvm/Support/ConstantRange.h @@ -24,7 +24,9 @@ // [0, 0) = {} = Empty set // [255, 255) = {0..255} = Full Set // -// Note that ConstantRange always keeps unsigned values. +// Note that ConstantRange can be used to represent either signed or +// unsigned ranges. +// //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_CONSTANT_RANGE_H @@ -35,11 +37,14 @@ namespace llvm { +/// ConstantRange - This class represents an range of values. +/// class ConstantRange { APInt Lower, Upper; static ConstantRange intersect1Wrapped(const ConstantRange &LHS, const ConstantRange &RHS); - public: + +public: /// Initialize a full (the default) or empty set for the specified bit width. /// explicit ConstantRange(uint32_t BitWidth, bool isFullSet = true); @@ -53,6 +58,16 @@ class ConstantRange { /// assert out if the two APInt's are not the same bit width. ConstantRange(const APInt& Lower, const APInt& Upper); + /// makeICmpRegion - Produce the smallest range that contains all values that + /// might satisfy the comparison specified by Pred when compared to any value + /// contained within Other. + /// + /// Solves for range X in 'for all x in X, there exists a y in Y such that + /// icmp op x, y is true'. Every value that might make the comparison true + /// is included in the resulting range. + static ConstantRange makeICmpRegion(unsigned Pred, + const ConstantRange &Other); + /// getLower - Return the lower value for this range... /// const APInt &getLower() const { return Lower; } @@ -83,6 +98,10 @@ class ConstantRange { /// bool contains(const APInt &Val) const; + /// contains - Return true if the other range is a subset of this one. + /// + bool contains(const ConstantRange &CR) const; + /// getSingleElement - If this set contains a single element, return it, /// otherwise return null. /// @@ -134,21 +153,13 @@ class ConstantRange { ConstantRange subtract(const APInt &CI) const; /// intersectWith - Return the range that results from the intersection of - /// this range with another range. The resultant range is pruned as much as - /// possible, but there may be cases where elements are included that are in - /// one of the sets but not the other. For example: [100, 8) intersect [3, - /// 120) yields [3, 120) - /// - ConstantRange intersectWith(const ConstantRange &CR) const; - - /// maximalIntersectWith - Return the range that results from the intersection - /// of this range with another range. The resultant range is guaranteed to + /// this range with another range. The resultant range is guaranteed to /// include all elements contained in both input ranges, and to have the /// smallest possible set size that does so. Because there may be two - /// intersections with the same set size, A.maximalIntersectWith(B) might not - /// be equal to B.maximalIntersectWith(A). + /// intersections with the same set size, A.intersectWith(B) might not + /// be equal to B.intersectWith(A). /// - ConstantRange maximalIntersectWith(const ConstantRange &CR) const; + ConstantRange intersectWith(const ConstantRange &CR) const; /// unionWith - Return the range that results from the union of this range /// with another range. The resultant range is guaranteed to include the @@ -176,6 +187,28 @@ class ConstantRange { /// truncated to the specified type. ConstantRange truncate(uint32_t BitWidth) const; + /// add - Return a new range representing the possible values resulting + /// from an addition of a value in this range and a value in Other. + ConstantRange add(const ConstantRange &Other) const; + + /// multiply - Return a new range representing the possible values resulting + /// from a multiplication of a value in this range and a value in Other. + /// TODO: This isn't fully implemented yet. + ConstantRange multiply(const ConstantRange &Other) const; + + /// smax - Return a new range representing the possible values resulting + /// from a signed maximum of a value in this range and a value in Other. + ConstantRange smax(const ConstantRange &Other) const; + + /// umax - Return a new range representing the possible values resulting + /// from an unsigned maximum of a value in this range and a value in Other. + ConstantRange umax(const ConstantRange &Other) const; + + /// udiv - Return a new range representing the possible values resulting + /// from an unsigned division of a value in this range and a value in Other. + /// TODO: This isn't fully implemented yet. + ConstantRange udiv(const ConstantRange &Other) const; + /// print - Print out the bounds to a stream... /// void print(raw_ostream &OS) const; diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake index 4d6fcc8e09740..ad210ed3b54d3 100644 --- a/include/llvm/Support/DataTypes.h.cmake +++ b/include/llvm/Support/DataTypes.h.cmake @@ -1,22 +1,25 @@ -//===-- include/Support/DataTypes.h - Define fixed size types ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file was developed by the LLVM research group and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains definitions to figure out the size of _HOST_ data types. -// This file is important because different host OS's define different macros, -// which makes portability tough. This file exports the following definitions: -// -// [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types -// [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values. -// -// No library is required when using these functinons. -// -//===----------------------------------------------------------------------===// +/*===-- include/Support/DataTypes.h - Define fixed size types -----*- C -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file contains definitions to figure out the size of _HOST_ data types.*| +|* This file is important because different host OS's define different macros,*| +|* which makes portability tough. This file exports the following *| +|* definitions: *| +|* *| +|* [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*| +|* [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values. *| +|* *| +|* No library is required when using these functinons. *| +|* *| +|*===----------------------------------------------------------------------===*/ + +/* Please leave this file C-compatible. */ #ifndef SUPPORT_DATATYPES_H #define SUPPORT_DATATYPES_H @@ -24,18 +27,21 @@ #cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H} #cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H} #cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H} -#undef HAVE_UINT64_T -#undef HAVE_U_INT64_T +#cmakedefine HAVE_UINT64_T ${HAVE_UINT64_T} +#cmakedefine HAVE_U_INT64_T ${HAVE_U_INT64_T} -// FIXME: UGLY HACK (Added by Kevin) -#define HAVE_UINT64_T 1 +#ifdef __cplusplus +#include +#else +#include +#endif #ifndef _MSC_VER -// Note that this header's correct operation depends on __STDC_LIMIT_MACROS -// being defined. We would define it here, but in order to prevent Bad Things -// happening when system headers or C++ STL headers include stdint.h before -// we define it here, we define it on the g++ command line (in Makefile.rules). +/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS + being defined. We would define it here, but in order to prevent Bad Things + happening when system headers or C++ STL headers include stdint.h before we + define it here, we define it on the g++ command line (in Makefile.rules). */ #if !defined(__STDC_LIMIT_MACROS) # error "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h" #endif @@ -45,7 +51,7 @@ "#including Support/DataTypes.h" #endif -// Note that includes , if this is a C99 system. +/* Note that includes , if this is a C99 system. */ #ifdef HAVE_SYS_TYPES_H #include #endif @@ -58,17 +64,11 @@ #include #endif -#ifdef __cplusplus -#include -#else -#include -#endif - #ifdef _AIX #include "llvm/Support/AIXDataTypesFix.h" #endif -// Handle incorrect definition of uint64_t as u_int64_t +/* Handle incorrect definition of uint64_t as u_int64_t */ #ifndef HAVE_UINT64_T #ifdef HAVE_U_INT64_T typedef u_int64_t uint64_t; @@ -90,11 +90,16 @@ typedef u_int64_t uint64_t; #endif #else /* _MSC_VER */ -// Visual C++ doesn't provide standard integer headers, but it does provide -// built-in data types. +/* Visual C++ doesn't provide standard integer headers, but it does provide + built-in data types. */ #include #include #include +#ifdef __cplusplus +#include +#else +#include +#endif typedef __int64 int64_t; typedef unsigned __int64 uint64_t; typedef signed int int32_t; diff --git a/include/llvm/Support/DataTypes.h.in b/include/llvm/Support/DataTypes.h.in index 72063f7d2add3..405f4764c8f57 100644 --- a/include/llvm/Support/DataTypes.h.in +++ b/include/llvm/Support/DataTypes.h.in @@ -1,22 +1,25 @@ -//===-- include/Support/DataTypes.h - Define fixed size types ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains definitions to figure out the size of _HOST_ data types. -// This file is important because different host OS's define different macros, -// which makes portability tough. This file exports the following definitions: -// -// [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types -// [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values. -// -// No library is required when using these functinons. -// -//===----------------------------------------------------------------------===// +/*===-- include/Support/DataTypes.h - Define fixed size types -----*- C -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file contains definitions to figure out the size of _HOST_ data types.*| +|* This file is important because different host OS's define different macros,*| +|* which makes portability tough. This file exports the following *| +|* definitions: *| +|* *| +|* [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*| +|* [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values. *| +|* *| +|* No library is required when using these functinons. *| +|* *| +|*===----------------------------------------------------------------------===*/ + +/* Please leave this file C-compatible. */ #ifndef SUPPORT_DATATYPES_H #define SUPPORT_DATATYPES_H @@ -27,12 +30,18 @@ #undef HAVE_UINT64_T #undef HAVE_U_INT64_T +#ifdef __cplusplus +#include +#else +#include +#endif + #ifndef _MSC_VER -// Note that this header's correct operation depends on __STDC_LIMIT_MACROS -// being defined. We would define it here, but in order to prevent Bad Things -// happening when system headers or C++ STL headers include stdint.h before -// we define it here, we define it on the g++ command line (in Makefile.rules). +/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS + being defined. We would define it here, but in order to prevent Bad Things + happening when system headers or C++ STL headers include stdint.h before we + define it here, we define it on the g++ command line (in Makefile.rules). */ #if !defined(__STDC_LIMIT_MACROS) # error "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h" #endif @@ -42,7 +51,7 @@ "#including Support/DataTypes.h" #endif -// Note that includes , if this is a C99 system. +/* Note that includes , if this is a C99 system. */ #ifdef HAVE_SYS_TYPES_H #include #endif @@ -55,17 +64,11 @@ #include #endif -#ifdef __cplusplus -#include -#else -#include -#endif - #ifdef _AIX #include "llvm/Support/AIXDataTypesFix.h" #endif -// Handle incorrect definition of uint64_t as u_int64_t +/* Handle incorrect definition of uint64_t as u_int64_t */ #ifndef HAVE_UINT64_T #ifdef HAVE_U_INT64_T typedef u_int64_t uint64_t; @@ -87,8 +90,8 @@ typedef u_int64_t uint64_t; #endif #else /* _MSC_VER */ -// Visual C++ doesn't provide standard integer headers, but it does provide -// built-in data types. +/* Visual C++ doesn't provide standard integer headers, but it does provide + built-in data types. */ #include #include #include diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h index 52d0d3fb40556..6f82ea716f240 100644 --- a/include/llvm/Support/Debug.h +++ b/include/llvm/Support/Debug.h @@ -18,61 +18,65 @@ // can specify '-debug-only=foo' to enable JUST the debug information for the // foo class. // -// When compiling in release mode, the -debug-* options and all code in DEBUG() -// statements disappears, so it does not effect the runtime of the code. +// When compiling without assertions, the -debug-* options and all code in +// DEBUG() statements disappears, so it does not effect the runtime of the code. // //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_DEBUG_H #define LLVM_SUPPORT_DEBUG_H -#include "llvm/Support/Streams.h" - namespace llvm { // DebugFlag - This boolean is set to true if the '-debug' command line option // is specified. This should probably not be referenced directly, instead, use // the DEBUG macro below. // +#ifndef NDEBUG extern bool DebugFlag; +#endif // isCurrentDebugType - Return true if the specified string is the debug type // specified on the command line, or if none was specified on the command line // with the -debug-only=X option. // +#ifndef NDEBUG bool isCurrentDebugType(const char *Type); +#else +#define isCurrentDebugType(X) (false) +#endif + +// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug +// information. In the '-debug' option is specified on the commandline, and if +// this is a debug build, then the code specified as the option to the macro +// will be executed. Otherwise it will not be. Example: +// +// DEBUG_WITH_TYPE("bitset", errs() << "Bitset contains: " << Bitset << "\n"); +// +// This will emit the debug information if -debug is present, and -debug-only is +// not specified, or is specified as "bitset". + +#ifdef NDEBUG +#define DEBUG_WITH_TYPE(TYPE, X) do { } while (0) +#else +#define DEBUG_WITH_TYPE(TYPE, X) \ + do { if (DebugFlag && isCurrentDebugType(TYPE)) { X; } } while (0) +#endif // DEBUG macro - This macro should be used by passes to emit debug information. // In the '-debug' option is specified on the commandline, and if this is a // debug build, then the code specified as the option to the macro will be // executed. Otherwise it will not be. Example: // -// DEBUG(cerr << "Bitset contains: " << Bitset << "\n"); +// DEBUG(errs() << "Bitset contains: " << Bitset << "\n"); // #ifndef DEBUG_TYPE #define DEBUG_TYPE "" #endif -#ifdef NDEBUG -#define DEBUG(X) -#else -#define DEBUG(X) \ - do { if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { X; } } while (0) -#endif - -/// getErrorOutputStream - Returns the error output stream (std::cerr). This -/// places the std::c* I/O streams into one .cpp file and relieves the whole -/// program from having to have hundreds of static c'tor/d'tors for them. -/// -OStream &getErrorOutputStream(const char *DebugType); - -#ifdef NDEBUG -#define DOUT llvm::OStream(0) -#else -#define DOUT llvm::getErrorOutputStream(DEBUG_TYPE) -#endif - +#define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X) + } // End llvm namespace #endif diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h index 5c089efc98ce9..55c3c4ffbd76f 100644 --- a/include/llvm/Support/DebugLoc.h +++ b/include/llvm/Support/DebugLoc.h @@ -19,20 +19,25 @@ #include namespace llvm { - class GlobalVariable; + class MDNode; /// DebugLocTuple - Debug location tuple of filename id, line and column. /// struct DebugLocTuple { - GlobalVariable *CompileUnit; + MDNode *Scope; + MDNode *InlinedAtLoc; unsigned Line, Col; - DebugLocTuple(GlobalVariable *v, unsigned l, unsigned c) - : CompileUnit(v), Line(l), Col(c) {}; + DebugLocTuple() + : Scope(0), InlinedAtLoc(0), Line(~0U), Col(~0U) {}; + + DebugLocTuple(MDNode *n, MDNode *i, unsigned l, unsigned c) + : Scope(n), InlinedAtLoc(i), Line(l), Col(c) {}; bool operator==(const DebugLocTuple &DLT) const { - return CompileUnit == DLT.CompileUnit && - Line == DLT.Line && Col == DLT.Col; + return Scope == DLT.Scope && + InlinedAtLoc == DLT.InlinedAtLoc && + Line == DLT.Line && Col == DLT.Col; } bool operator!=(const DebugLocTuple &DLT) const { return !(*this == DLT); @@ -60,23 +65,25 @@ namespace llvm { bool operator!=(const DebugLoc &DL) const { return !(*this == DL); } }; - // Partially specialize DenseMapInfo for DebugLocTyple. + // Specialize DenseMapInfo for DebugLocTuple. template<> struct DenseMapInfo { static inline DebugLocTuple getEmptyKey() { - return DebugLocTuple(0, ~0U, ~0U); + return DebugLocTuple(0, 0, ~0U, ~0U); } static inline DebugLocTuple getTombstoneKey() { - return DebugLocTuple((GlobalVariable*)~1U, ~1U, ~1U); + return DebugLocTuple((MDNode*)~1U, (MDNode*)~1U, ~1U, ~1U); } static unsigned getHashValue(const DebugLocTuple &Val) { - return DenseMapInfo::getHashValue(Val.CompileUnit) ^ + return DenseMapInfo::getHashValue(Val.Scope) ^ + DenseMapInfo::getHashValue(Val.InlinedAtLoc) ^ DenseMapInfo::getHashValue(Val.Line) ^ DenseMapInfo::getHashValue(Val.Col); } static bool isEqual(const DebugLocTuple &LHS, const DebugLocTuple &RHS) { - return LHS.CompileUnit == RHS.CompileUnit && - LHS.Line == RHS.Line && - LHS.Col == RHS.Col; + return LHS.Scope == RHS.Scope && + LHS.InlinedAtLoc == RHS.InlinedAtLoc && + LHS.Line == RHS.Line && + LHS.Col == RHS.Col; } static bool isPod() { return true; } diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h index 55838b8144bcf..bfccc522b9796 100644 --- a/include/llvm/Support/Dwarf.h +++ b/include/llvm/Support/Dwarf.h @@ -449,6 +449,7 @@ enum dwarf_constants { // Call frame instruction encodings DW_CFA_extended = 0x00, + DW_CFA_nop = 0x00, DW_CFA_advance_loc = 0x40, DW_CFA_offset = 0x80, DW_CFA_restore = 0xc0, diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h new file mode 100644 index 0000000000000..67bccf09269e6 --- /dev/null +++ b/include/llvm/Support/ErrorHandling.h @@ -0,0 +1,87 @@ +//===- llvm/Support/ErrorHandling.h - Callbacks for errors ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an API used to indicate error conditions. +// Callbacks can be registered for these errors through this API. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_ERRORHANDLING_H +#define LLVM_SUPPORT_ERRORHANDLING_H + +#include "llvm/Support/Compiler.h" +#include + +namespace llvm { + class Twine; + + /// An error handler callback. + typedef void (*llvm_error_handler_t)(void *user_data, + const std::string& reason); + + /// llvm_instal_error_handler - Installs a new error handler to be used + /// whenever a serious (non-recoverable) error is encountered by LLVM. + /// + /// If you are using llvm_start_multithreaded, you should register the handler + /// before doing that. + /// + /// If no error handler is installed the default is to print the error message + /// to stderr, and call exit(1). If an error handler is installed then it is + /// the handler's responsibility to log the message, it will no longer be + /// printed to stderr. If the error handler returns, then exit(1) will be + /// called. + /// + /// It is dangerous to naively use an error handler which throws an exception. + /// Even though some applications desire to gracefully recover from arbitrary + /// faults, blindly throwing exceptions through unfamiliar code isn't a way to + /// achieve this. + /// + /// \param user_data - An argument which will be passed to the install error + /// handler. + void llvm_install_error_handler(llvm_error_handler_t handler, + void *user_data = 0); + + /// Restores default error handling behaviour. + /// This must not be called between llvm_start_multithreaded() and + /// llvm_stop_multithreaded(). + void llvm_remove_error_handler(); + + /// Reports a serious error, calling any installed error handler. These + /// functions are intended to be used for error conditions which are outside + /// the control of the compiler (I/O errors, invalid user input, etc.) + /// + /// If no error handler is installed the default is to print the message to + /// standard error, followed by a newline. + /// After the error handler is called this function will call exit(1), it + /// does not return. + void llvm_report_error(const char *reason) NORETURN; + void llvm_report_error(const std::string &reason) NORETURN; + void llvm_report_error(const Twine &reason) NORETURN; + + /// This function calls abort(), and prints the optional message to stderr. + /// Use the llvm_unreachable macro (that adds location info), instead of + /// calling this function directly. + void llvm_unreachable_internal(const char *msg=0, const char *file=0, + unsigned line=0) NORETURN; +} + +/// Prints the message and location info to stderr in !NDEBUG builds. +/// This is intended to be used for "impossible" situations that imply +/// a bug in the compiler. +/// +/// In NDEBUG mode it only prints "UNREACHABLE executed". +/// Use this instead of assert(0), so that the compiler knows this path +/// is not reachable even for NDEBUG builds. +#ifndef NDEBUG +#define llvm_unreachable(msg) llvm_unreachable_internal(msg, __FILE__, __LINE__) +#else +#define llvm_unreachable(msg) llvm_unreachable_internal() +#endif + +#endif diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h index 2ab097faf56d9..df03f66ddc7d1 100644 --- a/include/llvm/Support/Format.h +++ b/include/llvm/Support/Format.h @@ -36,6 +36,10 @@ class format_object_base { protected: const char *Fmt; virtual void home(); // Out of line virtual method. + + /// snprint - Call snprintf() for this object, on the given buffer and size. + virtual int snprint(char *Buffer, unsigned BufferSize) const = 0; + public: format_object_base(const char *fmt) : Fmt(fmt) {} virtual ~format_object_base() {} @@ -43,7 +47,23 @@ public: /// print - Format the object into the specified buffer. On success, this /// returns the length of the formatted string. If the buffer is too small, /// this returns a length to retry with, which will be larger than BufferSize. - virtual unsigned print(char *Buffer, unsigned BufferSize) const = 0; + unsigned print(char *Buffer, unsigned BufferSize) const { + assert(BufferSize && "Invalid buffer size!"); + + // Print the string, leaving room for the terminating null. + int N = snprint(Buffer, BufferSize); + + // VC++ and old GlibC return negative on overflow, just double the size. + if (N < 0) + return BufferSize*2; + + // Other impls yield number of bytes needed, not including the final '\0'. + if (unsigned(N) >= BufferSize) + return N+1; + + // Otherwise N is the length of output (not including the final '\0'). + return N; + } }; /// format_object1 - This is a templated helper class used by the format @@ -58,17 +78,8 @@ public: : format_object_base(fmt), Val(val) { } - /// print - Format the object into the specified buffer. On success, this - /// returns the length of the formatted string. If the buffer is too small, - /// this returns a length to retry with, which will be larger than BufferSize. - virtual unsigned print(char *Buffer, unsigned BufferSize) const { - int N = snprintf(Buffer, BufferSize-1, Fmt, Val); - if (N < 0) // VC++ and old GlibC return negative on overflow. - return BufferSize*2; - if (unsigned(N) >= BufferSize-1)// Other impls yield number of bytes needed. - return N+1; - // If N is positive and <= BufferSize-1, then the string fit, yay. - return N; + virtual int snprint(char *Buffer, unsigned BufferSize) const { + return snprintf(Buffer, BufferSize, Fmt, Val); } }; @@ -85,17 +96,8 @@ public: : format_object_base(fmt), Val1(val1), Val2(val2) { } - /// print - Format the object into the specified buffer. On success, this - /// returns the length of the formatted string. If the buffer is too small, - /// this returns a length to retry with, which will be larger than BufferSize. - virtual unsigned print(char *Buffer, unsigned BufferSize) const { - int N = snprintf(Buffer, BufferSize-1, Fmt, Val1, Val2); - if (N < 0) // VC++ and old GlibC return negative on overflow. - return BufferSize*2; - if (unsigned(N) >= BufferSize-1)// Other impls yield number of bytes needed. - return N+1; - // If N is positive and <= BufferSize-1, then the string fit, yay. - return N; + virtual int snprint(char *Buffer, unsigned BufferSize) const { + return snprintf(Buffer, BufferSize, Fmt, Val1, Val2); } }; @@ -113,17 +115,8 @@ public: : format_object_base(fmt), Val1(val1), Val2(val2), Val3(val3) { } - /// print - Format the object into the specified buffer. On success, this - /// returns the length of the formatted string. If the buffer is too small, - /// this returns a length to retry with, which will be larger than BufferSize. - virtual unsigned print(char *Buffer, unsigned BufferSize) const { - int N = snprintf(Buffer, BufferSize-1, Fmt, Val1, Val2, Val3); - if (N < 0) // VC++ and old GlibC return negative on overflow. - return BufferSize*2; - if (unsigned(N) >= BufferSize-1)// Other impls yield number of bytes needed. - return N+1; - // If N is positive and <= BufferSize-1, then the string fit, yay. - return N; + virtual int snprint(char *Buffer, unsigned BufferSize) const { + return snprintf(Buffer, BufferSize, Fmt, Val1, Val2, Val3); } }; diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h new file mode 100644 index 0000000000000..24a3546200ac7 --- /dev/null +++ b/include/llvm/Support/FormattedStream.h @@ -0,0 +1,150 @@ +//===-- llvm/CodeGen/FormattedStream.h - Formatted streams ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains raw_ostream implementations for streams to do +// things like pretty-print comments. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_FORMATTEDSTREAM_H +#define LLVM_SUPPORT_FORMATTEDSTREAM_H + +#include "llvm/Support/raw_ostream.h" + +namespace llvm +{ + /// formatted_raw_ostream - Formatted raw_fd_ostream to handle + /// asm-specific constructs. + /// + class formatted_raw_ostream : public raw_ostream { + public: + /// DELETE_STREAM - Tell the destructor to delete the held stream. + /// + static const bool DELETE_STREAM = true; + + /// PRESERVE_STREAM - Tell the destructor to not delete the held + /// stream. + /// + static const bool PRESERVE_STREAM = false; + + private: + /// TheStream - The real stream we output to. We set it to be + /// unbuffered, since we're already doing our own buffering. + /// + raw_ostream *TheStream; + + /// DeleteStream - Do we need to delete TheStream in the + /// destructor? + /// + bool DeleteStream; + + /// ColumnScanned - The current output column of the data that's + /// been flushed and the portion of the buffer that's been + /// scanned. The column scheme is zero-based. + /// + unsigned ColumnScanned; + + /// Scanned - This points to one past the last character in the + /// buffer we've scanned. + /// + const char *Scanned; + + virtual void write_impl(const char *Ptr, size_t Size); + + /// current_pos - Return the current position within the stream, + /// not counting the bytes currently in the buffer. + virtual uint64_t current_pos() { + // This has the same effect as calling TheStream.current_pos(), + // but that interface is private. + return TheStream->tell() - TheStream->GetNumBytesInBuffer(); + } + + /// ComputeColumn - Examine the given output buffer and figure out which + /// column we end up in after output. + /// + void ComputeColumn(const char *Ptr, size_t size); + + public: + /// formatted_raw_ostream - Open the specified file for + /// writing. If an error occurs, information about the error is + /// put into ErrorInfo, and the stream should be immediately + /// destroyed; the string will be empty if no error occurred. + /// + /// As a side effect, the given Stream is set to be Unbuffered. + /// This is because formatted_raw_ostream does its own buffering, + /// so it doesn't want another layer of buffering to be happening + /// underneath it. + /// + formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) + : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) { + setStream(Stream, Delete); + } + explicit formatted_raw_ostream() + : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) { + Scanned = 0; + } + + ~formatted_raw_ostream() { + flush(); + releaseStream(); + } + + void setStream(raw_ostream &Stream, bool Delete = false) { + releaseStream(); + + TheStream = &Stream; + DeleteStream = Delete; + + // This formatted_raw_ostream inherits from raw_ostream, so it'll do its + // own buffering, and it doesn't need or want TheStream to do another + // layer of buffering underneath. Resize the buffer to what TheStream + // had been using, and tell TheStream not to do its own buffering. + if (size_t BufferSize = TheStream->GetBufferSize()) + SetBufferSize(BufferSize); + else + SetUnbuffered(); + TheStream->SetUnbuffered(); + + Scanned = 0; + } + + /// PadToColumn - Align the output to some column number. If the current + /// column is already equal to or more than NewCol, PadToColumn inserts one + /// space. + /// + /// \param NewCol - The column to move to. + void PadToColumn(unsigned NewCol); + + private: + void releaseStream() { + // Delete the stream if needed. Otherwise, transfer the buffer + // settings from this raw_ostream back to the underlying stream. + if (!TheStream) + return; + if (DeleteStream) + delete TheStream; + else if (size_t BufferSize = GetBufferSize()) + TheStream->SetBufferSize(BufferSize); + else + TheStream->SetUnbuffered(); + } + }; + +/// fouts() - This returns a reference to a formatted_raw_ostream for +/// standard output. Use it like: fouts() << "foo" << "bar"; +formatted_raw_ostream &fouts(); + +/// ferrs() - This returns a reference to a formatted_raw_ostream for +/// standard error. Use it like: ferrs() << "foo" << "bar"; +formatted_raw_ostream &ferrs(); + +} // end llvm namespace + + +#endif diff --git a/include/llvm/Support/GetElementPtrTypeIterator.h b/include/llvm/Support/GetElementPtrTypeIterator.h index e1cda75c5f6a1..f5915c992cdbb 100644 --- a/include/llvm/Support/GetElementPtrTypeIterator.h +++ b/include/llvm/Support/GetElementPtrTypeIterator.h @@ -21,8 +21,9 @@ namespace llvm { template class generic_gep_type_iterator - : public forward_iterator { - typedef forward_iterator super; + : public std::iterator { + typedef std::iterator super; ItTy OpIt; const Type *CurTy; diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h index 01b44d0b8e2f2..bd3fcea11025a 100644 --- a/include/llvm/Support/GraphWriter.h +++ b/include/llvm/Support/GraphWriter.h @@ -24,53 +24,33 @@ #define LLVM_SUPPORT_GRAPHWRITER_H #include "llvm/Support/DOTGraphTraits.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/System/Path.h" -#include #include +#include namespace llvm { namespace DOT { // Private functions... - inline std::string EscapeString(const std::string &Label) { - std::string Str(Label); - for (unsigned i = 0; i != Str.length(); ++i) - switch (Str[i]) { - case '\n': - Str.insert(Str.begin()+i, '\\'); // Escape character... - ++i; - Str[i] = 'n'; - break; - case '\t': - Str.insert(Str.begin()+i, ' '); // Convert to two spaces - ++i; - Str[i] = ' '; - break; - case '\\': - if (i+1 != Str.length()) - switch (Str[i+1]) { - case 'l': continue; // don't disturb \l - case '|': case '{': case '}': - Str.erase(Str.begin()+i); continue; - default: break; - } - case '{': case '}': - case '<': case '>': - case '|': case '"': - Str.insert(Str.begin()+i, '\\'); // Escape character... - ++i; // don't infinite loop - break; - } - return Str; - } + std::string EscapeString(const std::string &Label); +} + +namespace GraphProgram { + enum Name { + DOT, + FDP, + NEATO, + TWOPI, + CIRCO + }; } -void DisplayGraph(const sys::Path& Filename); +void DisplayGraph(const sys::Path& Filename, bool wait=true, GraphProgram::Name program = GraphProgram::DOT); template class GraphWriter { - std::ostream &O; + raw_ostream &O; const GraphType &G; bool ShortNames; @@ -80,7 +60,7 @@ class GraphWriter { typedef typename GTraits::nodes_iterator node_iterator; typedef typename GTraits::ChildIteratorType child_iterator; public: - GraphWriter(std::ostream &o, const GraphType &g, bool SN) : + GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g), ShortNames(SN) {} void writeHeader(const std::string &Name) { @@ -222,7 +202,7 @@ public: for (unsigned i = 0; i != NumEdgeSources; ++i) { if (i) O << "|"; - O << ""; + O << ""; if (EdgeSourceLabels) O << (*EdgeSourceLabels)[i]; } O << "}}"; @@ -241,8 +221,12 @@ public: if (SrcNodePort >= 0) O << ":s" << SrcNodePort; O << " -> Node" << DestNodeID; - if (DestNodePort >= 0) - O << ":d" << DestNodePort; + if (DestNodePort >= 0) { + if (DOTTraits::hasEdgeDestLabels()) + O << ":d" << DestNodePort; + else + O << ":s" << DestNodePort; + } if (!Attrs.empty()) O << "[" << Attrs << "]"; @@ -251,10 +235,10 @@ public: }; template -std::ostream &WriteGraph(std::ostream &O, const GraphType &G, - bool ShortNames = false, - const std::string &Name = "", - const std::string &Title = "") { +raw_ostream &WriteGraph(raw_ostream &O, const GraphType &G, + bool ShortNames = false, + const std::string &Name = "", + const std::string &Title = "") { // Start the graph emission process... GraphWriter W(O, G, ShortNames); @@ -273,33 +257,30 @@ std::ostream &WriteGraph(std::ostream &O, const GraphType &G, } template -sys::Path WriteGraph(const GraphType &G, - const std::string& Name, - bool ShortNames = false, - const std::string& Title = "") { +sys::Path WriteGraph(const GraphType &G, const std::string &Name, + bool ShortNames = false, const std::string &Title = "") { std::string ErrMsg; sys::Path Filename = sys::Path::GetTemporaryDirectory(&ErrMsg); if (Filename.isEmpty()) { - cerr << "Error: " << ErrMsg << "\n"; + errs() << "Error: " << ErrMsg << "\n"; return Filename; } Filename.appendComponent(Name + ".dot"); if (Filename.makeUnique(true,&ErrMsg)) { - cerr << "Error: " << ErrMsg << "\n"; + errs() << "Error: " << ErrMsg << "\n"; return sys::Path(); } - cerr << "Writing '" << Filename << "'... "; + errs() << "Writing '" << Filename.str() << "'... "; - std::ofstream O(Filename.c_str()); + std::string ErrorInfo; + raw_fd_ostream O(Filename.c_str(), ErrorInfo); - if (O.good()) { + if (ErrorInfo.empty()) { WriteGraph(O, G, ShortNames, Name, Title); - cerr << " done. \n"; - - O.close(); + errs() << " done. \n"; } else { - cerr << "error opening file for writing!\n"; + errs() << "error opening file '" << Filename.str() << "' for writing!\n"; Filename.clear(); } @@ -310,17 +291,15 @@ sys::Path WriteGraph(const GraphType &G, /// then cleanup. For use from the debugger. /// template -void ViewGraph(const GraphType& G, - const std::string& Name, - bool ShortNames = false, - const std::string& Title = "") { - sys::Path Filename = WriteGraph(G, Name, ShortNames, Title); +void ViewGraph(const GraphType &G, const std::string &Name, + bool ShortNames = false, const std::string &Title = "", + GraphProgram::Name Program = GraphProgram::DOT) { + sys::Path Filename = WriteGraph(G, Name, ShortNames, Title); - if (Filename.isEmpty()) { + if (Filename.isEmpty()) return; - } - DisplayGraph(Filename); + DisplayGraph(Filename, true, Program); } } // End llvm namespace diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h index ed6a3f19ef7ab..1f659787eb795 100644 --- a/include/llvm/Support/IRBuilder.h +++ b/include/llvm/Support/IRBuilder.h @@ -20,37 +20,86 @@ #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Function.h" +#include "llvm/Metadata.h" +#include "llvm/LLVMContext.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/ConstantFolder.h" namespace llvm { +/// IRBuilderDefaultInserter - This provides the default implementation of the +/// IRBuilder 'InsertHelper' method that is called whenever an instruction is +/// created by IRBuilder and needs to be inserted. By default, this inserts the +/// instruction at the insertion point. +template +class IRBuilderDefaultInserter { +protected: + void InsertHelper(Instruction *I, const Twine &Name, + BasicBlock *BB, BasicBlock::iterator InsertPt) const { + if (BB) BB->getInstList().insert(InsertPt, I); + if (preserveNames) + I->setName(Name); + } +}; + + /// IRBuilder - This provides a uniform API for creating instructions and /// inserting them into a basic block: either at the end of a BasicBlock, or /// at a specific iterator location in a block. /// /// Note that the builder does not expose the full generality of LLVM -/// instructions. For example, it cannot be used to create instructions with -/// arbitrary names (specifically, names with nul characters in them) - It only -/// supports nul-terminated C strings. For fully generic names, use -/// I->setName(). For access to extra instruction properties, use the mutators +/// instructions. For access to extra instruction properties, use the mutators /// (e.g. setVolatile) on the instructions after they have been created. /// The first template argument handles whether or not to preserve names in the /// final instruction output. This defaults to on. The second template argument /// specifies a class to use for creating constants. This defaults to creating -/// minimally folded constants. -template class IRBuilder{ +/// minimally folded constants. The fourth template argument allows clients to +/// specify custom insertion hooks that are called on every newly created +/// insertion. +template > +class IRBuilder : public Inserter { BasicBlock *BB; BasicBlock::iterator InsertPt; + unsigned MDKind; + MDNode *CurDbgLocation; + LLVMContext &Context; T Folder; public: - IRBuilder(const T& F = T()) : Folder(F) { ClearInsertionPoint(); } - explicit IRBuilder(BasicBlock *TheBB, const T& F = T()) - : Folder(F) { SetInsertPoint(TheBB); } - IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F = T()) - : Folder(F) { SetInsertPoint(TheBB, IP); } + IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter()) + : Inserter(I), MDKind(0), CurDbgLocation(0), Context(C), Folder(F) { + ClearInsertionPoint(); + } + + explicit IRBuilder(LLVMContext &C) + : MDKind(0), CurDbgLocation(0), Context(C), Folder(C) { + ClearInsertionPoint(); + } + + explicit IRBuilder(BasicBlock *TheBB, const T &F) + : MDKind(0), CurDbgLocation(0), Context(TheBB->getContext()), Folder(F) { + SetInsertPoint(TheBB); + } + + explicit IRBuilder(BasicBlock *TheBB) + : MDKind(0), CurDbgLocation(0), Context(TheBB->getContext()), + Folder(Context) { + SetInsertPoint(TheBB); + } + + IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F) + : MDKind(0), CurDbgLocation(0), Context(TheBB->getContext()), Folder(F) { + SetInsertPoint(TheBB, IP); + } + + IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP) + : MDKind(0), CurDbgLocation(0), Context(TheBB->getContext()), + Folder(Context) { + SetInsertPoint(TheBB, IP); + } /// getFolder - Get the constant folder being used. - const T& getFolder() { return Folder; } + const T &getFolder() { return Folder; } /// isNamePreserving - Return true if this builder is configured to actually /// add the requested names to IR created through it. @@ -84,20 +133,75 @@ public: InsertPt = IP; } + /// SetCurrentDebugLocation - Set location information used by debugging + /// information. + void SetCurrentDebugLocation(MDNode *L) { + if (MDKind == 0) + MDKind = Context.getMetadata().getMDKind("dbg"); + if (MDKind == 0) + MDKind = Context.getMetadata().RegisterMDKind("dbg"); + CurDbgLocation = L; + } + + MDNode *getCurrentDebugLocation() const { return CurDbgLocation; } + + /// SetDebugLocation - Set location information for the given instruction. + void SetDebugLocation(Instruction *I) { + if (CurDbgLocation) + Context.getMetadata().addMD(MDKind, CurDbgLocation, I); + } + /// Insert - Insert and return the specified instruction. template - InstTy *Insert(InstTy *I, const char *Name = "") const { - InsertHelper(I, Name); + InstTy *Insert(InstTy *I, const Twine &Name = "") const { + this->InsertHelper(I, Name, BB, InsertPt); + if (CurDbgLocation) + Context.getMetadata().addMD(MDKind, CurDbgLocation, I); return I; } - /// InsertHelper - Insert the specified instruction at the specified insertion - /// point. This is split out of Insert so that it isn't duplicated for every - /// template instantiation. - void InsertHelper(Instruction *I, const char *Name) const { - if (BB) BB->getInstList().insert(InsertPt, I); - if (preserveNames && Name[0]) - I->setName(Name); + //===--------------------------------------------------------------------===// + // Type creation methods + //===--------------------------------------------------------------------===// + + /// getInt1Ty - Fetch the type representing a single bit + const Type *getInt1Ty() { + return Type::getInt1Ty(Context); + } + + /// getInt8Ty - Fetch the type representing an 8-bit integer. + const Type *getInt8Ty() { + return Type::getInt8Ty(Context); + } + + /// getInt16Ty - Fetch the type representing a 16-bit integer. + const Type *getInt16Ty() { + return Type::getInt16Ty(Context); + } + + /// getInt32Ty - Fetch the type resepresenting a 32-bit integer. + const Type *getInt32Ty() { + return Type::getInt32Ty(Context); + } + + /// getInt64Ty - Fetch the type representing a 64-bit integer. + const Type *getInt64Ty() { + return Type::getInt64Ty(Context); + } + + /// getFloatTy - Fetch the type representing a 32-bit floating point value. + const Type *getFloatTy() { + return Type::getFloatTy(Context); + } + + /// getDoubleTy - Fetch the type representing a 64-bit floating point value. + const Type *getDoubleTy() { + return Type::getDoubleTy(Context); + } + + /// getVoidTy - Fetch the type representing void. + const Type *getVoidTy() { + return Type::getVoidTy(Context); } //===--------------------------------------------------------------------===// @@ -106,14 +210,14 @@ public: /// CreateRetVoid - Create a 'ret void' instruction. ReturnInst *CreateRetVoid() { - return Insert(ReturnInst::Create()); + return Insert(ReturnInst::Create(Context)); } /// @verbatim /// CreateRet - Create a 'ret ' instruction. /// @endverbatim ReturnInst *CreateRet(Value *V) { - return Insert(ReturnInst::Create(V)); + return Insert(ReturnInst::Create(Context, V)); } /// CreateAggregateRet - Create a sequence of N insertvalue instructions, @@ -128,7 +232,7 @@ public: Value *V = UndefValue::get(RetType); for (unsigned i = 0; i != N; ++i) V = CreateInsertValue(V, retVals[i], i, "mrv"); - return Insert(ReturnInst::Create(V)); + return Insert(ReturnInst::Create(Context, V)); } /// CreateBr - Create an unconditional 'br label X' instruction. @@ -153,126 +257,144 @@ public: template InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest, BasicBlock *UnwindDest, InputIterator ArgBegin, - InputIterator ArgEnd, const char *Name = "") { + InputIterator ArgEnd, const Twine &Name = "") { return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, ArgBegin, ArgEnd), Name); } UnwindInst *CreateUnwind() { - return Insert(new UnwindInst()); + return Insert(new UnwindInst(Context)); } UnreachableInst *CreateUnreachable() { - return Insert(new UnreachableInst()); + return Insert(new UnreachableInst(Context)); } //===--------------------------------------------------------------------===// // Instruction creation methods: Binary Operators //===--------------------------------------------------------------------===// - Value *CreateAdd(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateAdd(LC, RC); return Insert(BinaryOperator::CreateAdd(LHS, RHS), Name); } - Value *CreateFAdd(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") { + if (Constant *LC = dyn_cast(LHS)) + if (Constant *RC = dyn_cast(RHS)) + return Folder.CreateNSWAdd(LC, RC); + return Insert(BinaryOperator::CreateNSWAdd(LHS, RHS), Name); + } + Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateFAdd(LC, RC); return Insert(BinaryOperator::CreateFAdd(LHS, RHS), Name); } - Value *CreateSub(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateSub(LC, RC); return Insert(BinaryOperator::CreateSub(LHS, RHS), Name); } - Value *CreateFSub(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") { + if (Constant *LC = dyn_cast(LHS)) + if (Constant *RC = dyn_cast(RHS)) + return Folder.CreateNSWSub(LC, RC); + return Insert(BinaryOperator::CreateNSWSub(LHS, RHS), Name); + } + Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateFSub(LC, RC); return Insert(BinaryOperator::CreateFSub(LHS, RHS), Name); } - Value *CreateMul(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateMul(LC, RC); return Insert(BinaryOperator::CreateMul(LHS, RHS), Name); } - Value *CreateFMul(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateFMul(LC, RC); return Insert(BinaryOperator::CreateFMul(LHS, RHS), Name); } - Value *CreateUDiv(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateUDiv(LC, RC); return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name); } - Value *CreateSDiv(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateSDiv(LC, RC); return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name); } - Value *CreateFDiv(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateExactSDiv(Value *LHS, Value *RHS, const Twine &Name = "") { + if (Constant *LC = dyn_cast(LHS)) + if (Constant *RC = dyn_cast(RHS)) + return Folder.CreateExactSDiv(LC, RC); + return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name); + } + Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateFDiv(LC, RC); return Insert(BinaryOperator::CreateFDiv(LHS, RHS), Name); } - Value *CreateURem(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateURem(LC, RC); return Insert(BinaryOperator::CreateURem(LHS, RHS), Name); } - Value *CreateSRem(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateSRem(LC, RC); return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name); } - Value *CreateFRem(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateFRem(LC, RC); return Insert(BinaryOperator::CreateFRem(LHS, RHS), Name); } - Value *CreateShl(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateShl(LC, RC); return Insert(BinaryOperator::CreateShl(LHS, RHS), Name); } - Value *CreateLShr(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateLShr(LC, RC); return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name); } - Value *CreateAShr(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateAShr(LC, RC); return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name); } - Value *CreateAnd(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateAnd(LC, RC); return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name); } - Value *CreateOr(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateOr(LC, RC); return Insert(BinaryOperator::CreateOr(LHS, RHS), Name); } - Value *CreateXor(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateXor(LC, RC); @@ -280,24 +402,24 @@ public: } Value *CreateBinOp(Instruction::BinaryOps Opc, - Value *LHS, Value *RHS, const char *Name = "") { + Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateBinOp(Opc, LC, RC); return Insert(BinaryOperator::Create(Opc, LHS, RHS), Name); } - Value *CreateNeg(Value *V, const char *Name = "") { + Value *CreateNeg(Value *V, const Twine &Name = "") { if (Constant *VC = dyn_cast(V)) return Folder.CreateNeg(VC); return Insert(BinaryOperator::CreateNeg(V), Name); } - Value *CreateFNeg(Value *V, const char *Name = "") { + Value *CreateFNeg(Value *V, const Twine &Name = "") { if (Constant *VC = dyn_cast(V)) return Folder.CreateFNeg(VC); return Insert(BinaryOperator::CreateFNeg(V), Name); } - Value *CreateNot(Value *V, const char *Name = "") { + Value *CreateNot(Value *V, const Twine &Name = "") { if (Constant *VC = dyn_cast(V)) return Folder.CreateNot(VC); return Insert(BinaryOperator::CreateNot(V), Name); @@ -308,20 +430,25 @@ public: //===--------------------------------------------------------------------===// MallocInst *CreateMalloc(const Type *Ty, Value *ArraySize = 0, - const char *Name = "") { + const Twine &Name = "") { return Insert(new MallocInst(Ty, ArraySize), Name); } AllocaInst *CreateAlloca(const Type *Ty, Value *ArraySize = 0, - const char *Name = "") { + const Twine &Name = "") { return Insert(new AllocaInst(Ty, ArraySize), Name); } FreeInst *CreateFree(Value *Ptr) { return Insert(new FreeInst(Ptr)); } - LoadInst *CreateLoad(Value *Ptr, const char *Name = "") { + // Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of + // converting the string to 'bool' for the isVolatile parameter. + LoadInst *CreateLoad(Value *Ptr, const char *Name) { + return Insert(new LoadInst(Ptr), Name); + } + LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") { return Insert(new LoadInst(Ptr), Name); } - LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const char *Name = "") { + LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const Twine &Name = "") { return Insert(new LoadInst(Ptr, 0, isVolatile), Name); } StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) { @@ -329,38 +456,69 @@ public: } template Value *CreateGEP(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, - const char *Name = "") { + const Twine &Name = "") { if (Constant *PC = dyn_cast(Ptr)) { // Every index must be constant. InputIterator i; - for (i = IdxBegin; i < IdxEnd; ++i) { - if (!dyn_cast(*i)) + for (i = IdxBegin; i < IdxEnd; ++i) + if (!isa(*i)) break; - } if (i == IdxEnd) return Folder.CreateGetElementPtr(PC, &IdxBegin[0], IdxEnd - IdxBegin); } return Insert(GetElementPtrInst::Create(Ptr, IdxBegin, IdxEnd), Name); } - Value *CreateGEP(Value *Ptr, Value *Idx, const char *Name = "") { + template + Value *CreateInBoundsGEP(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, + const Twine &Name = "") { + if (Constant *PC = dyn_cast(Ptr)) { + // Every index must be constant. + InputIterator i; + for (i = IdxBegin; i < IdxEnd; ++i) + if (!isa(*i)) + break; + if (i == IdxEnd) + return Folder.CreateInBoundsGetElementPtr(PC, + &IdxBegin[0], + IdxEnd - IdxBegin); + } + return Insert(GetElementPtrInst::CreateInBounds(Ptr, IdxBegin, IdxEnd), + Name); + } + Value *CreateGEP(Value *Ptr, Value *Idx, const Twine &Name = "") { if (Constant *PC = dyn_cast(Ptr)) if (Constant *IC = dyn_cast(Idx)) return Folder.CreateGetElementPtr(PC, &IC, 1); return Insert(GetElementPtrInst::Create(Ptr, Idx), Name); } - Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const char *Name = "") { - Value *Idx = ConstantInt::get(Type::Int32Ty, Idx0); + Value *CreateInBoundsGEP(Value *Ptr, Value *Idx, const Twine &Name = "") { + if (Constant *PC = dyn_cast(Ptr)) + if (Constant *IC = dyn_cast(Idx)) + return Folder.CreateInBoundsGetElementPtr(PC, &IC, 1); + return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name); + } + Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name = "") { + Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0); if (Constant *PC = dyn_cast(Ptr)) return Folder.CreateGetElementPtr(PC, &Idx, 1); return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name); } + Value *CreateConstInBoundsGEP1_32(Value *Ptr, unsigned Idx0, + const Twine &Name = "") { + Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0); + + if (Constant *PC = dyn_cast(Ptr)) + return Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1); + + return Insert(GetElementPtrInst::CreateInBounds(Ptr, &Idx, &Idx+1), Name); + } Value *CreateConstGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1, - const char *Name = "") { + const Twine &Name = "") { Value *Idxs[] = { - ConstantInt::get(Type::Int32Ty, Idx0), - ConstantInt::get(Type::Int32Ty, Idx1) + ConstantInt::get(Type::getInt32Ty(Context), Idx0), + ConstantInt::get(Type::getInt32Ty(Context), Idx1) }; if (Constant *PC = dyn_cast(Ptr)) @@ -368,19 +526,40 @@ public: return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name); } - Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const char *Name = "") { - Value *Idx = ConstantInt::get(Type::Int64Ty, Idx0); + Value *CreateConstInBoundsGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1, + const Twine &Name = "") { + Value *Idxs[] = { + ConstantInt::get(Type::getInt32Ty(Context), Idx0), + ConstantInt::get(Type::getInt32Ty(Context), Idx1) + }; + + if (Constant *PC = dyn_cast(Ptr)) + return Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2); + + return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs, Idxs+2), Name); + } + Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") { + Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0); if (Constant *PC = dyn_cast(Ptr)) return Folder.CreateGetElementPtr(PC, &Idx, 1); return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name); } - Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1, - const char *Name = "") { + Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0, + const Twine &Name = "") { + Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0); + + if (Constant *PC = dyn_cast(Ptr)) + return Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1); + + return Insert(GetElementPtrInst::CreateInBounds(Ptr, &Idx, &Idx+1), Name); + } + Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1, + const Twine &Name = "") { Value *Idxs[] = { - ConstantInt::get(Type::Int64Ty, Idx0), - ConstantInt::get(Type::Int64Ty, Idx1) + ConstantInt::get(Type::getInt64Ty(Context), Idx0), + ConstantInt::get(Type::getInt64Ty(Context), Idx1) }; if (Constant *PC = dyn_cast(Ptr)) @@ -388,235 +567,272 @@ public: return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name); } - Value *CreateStructGEP(Value *Ptr, unsigned Idx, const char *Name = "") { - return CreateConstGEP2_32(Ptr, 0, Idx, Name); + Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1, + const Twine &Name = "") { + Value *Idxs[] = { + ConstantInt::get(Type::getInt64Ty(Context), Idx0), + ConstantInt::get(Type::getInt64Ty(Context), Idx1) + }; + + if (Constant *PC = dyn_cast(Ptr)) + return Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2); + + return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs, Idxs+2), Name); + } + Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = "") { + return CreateConstInBoundsGEP2_32(Ptr, 0, Idx, Name); } - Value *CreateGlobalString(const char *Str = "", const char *Name = "") { - Constant *StrConstant = ConstantArray::get(Str, true); - GlobalVariable *gv = new GlobalVariable(StrConstant->getType(), + Value *CreateGlobalString(const char *Str = "", const Twine &Name = "") { + Constant *StrConstant = ConstantArray::get(Context, Str, true); + Module &M = *BB->getParent()->getParent(); + GlobalVariable *gv = new GlobalVariable(M, + StrConstant->getType(), true, GlobalValue::InternalLinkage, StrConstant, "", - BB->getParent()->getParent(), + 0, false); gv->setName(Name); return gv; } - Value *CreateGlobalStringPtr(const char *Str = "", const char *Name = "") { + Value *CreateGlobalStringPtr(const char *Str = "", const Twine &Name = "") { Value *gv = CreateGlobalString(Str, Name); - Value *zero = ConstantInt::get(Type::Int32Ty, 0); + Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0); Value *Args[] = { zero, zero }; - return CreateGEP(gv, Args, Args+2, Name); + return CreateInBoundsGEP(gv, Args, Args+2, Name); } //===--------------------------------------------------------------------===// // Instruction creation methods: Cast/Conversion Operators //===--------------------------------------------------------------------===// - Value *CreateTrunc(Value *V, const Type *DestTy, const char *Name = "") { + Value *CreateTrunc(Value *V, const Type *DestTy, const Twine &Name = "") { return CreateCast(Instruction::Trunc, V, DestTy, Name); } - Value *CreateZExt(Value *V, const Type *DestTy, const char *Name = "") { + Value *CreateZExt(Value *V, const Type *DestTy, const Twine &Name = "") { return CreateCast(Instruction::ZExt, V, DestTy, Name); } - Value *CreateSExt(Value *V, const Type *DestTy, const char *Name = "") { + Value *CreateSExt(Value *V, const Type *DestTy, const Twine &Name = "") { return CreateCast(Instruction::SExt, V, DestTy, Name); } - Value *CreateFPToUI(Value *V, const Type *DestTy, const char *Name = ""){ + Value *CreateFPToUI(Value *V, const Type *DestTy, const Twine &Name = ""){ return CreateCast(Instruction::FPToUI, V, DestTy, Name); } - Value *CreateFPToSI(Value *V, const Type *DestTy, const char *Name = ""){ + Value *CreateFPToSI(Value *V, const Type *DestTy, const Twine &Name = ""){ return CreateCast(Instruction::FPToSI, V, DestTy, Name); } - Value *CreateUIToFP(Value *V, const Type *DestTy, const char *Name = ""){ + Value *CreateUIToFP(Value *V, const Type *DestTy, const Twine &Name = ""){ return CreateCast(Instruction::UIToFP, V, DestTy, Name); } - Value *CreateSIToFP(Value *V, const Type *DestTy, const char *Name = ""){ + Value *CreateSIToFP(Value *V, const Type *DestTy, const Twine &Name = ""){ return CreateCast(Instruction::SIToFP, V, DestTy, Name); } Value *CreateFPTrunc(Value *V, const Type *DestTy, - const char *Name = "") { + const Twine &Name = "") { return CreateCast(Instruction::FPTrunc, V, DestTy, Name); } - Value *CreateFPExt(Value *V, const Type *DestTy, const char *Name = "") { + Value *CreateFPExt(Value *V, const Type *DestTy, const Twine &Name = "") { return CreateCast(Instruction::FPExt, V, DestTy, Name); } Value *CreatePtrToInt(Value *V, const Type *DestTy, - const char *Name = "") { + const Twine &Name = "") { return CreateCast(Instruction::PtrToInt, V, DestTy, Name); } Value *CreateIntToPtr(Value *V, const Type *DestTy, - const char *Name = "") { + const Twine &Name = "") { return CreateCast(Instruction::IntToPtr, V, DestTy, Name); } Value *CreateBitCast(Value *V, const Type *DestTy, - const char *Name = "") { + const Twine &Name = "") { return CreateCast(Instruction::BitCast, V, DestTy, Name); } - + Value *CreateZExtOrBitCast(Value *V, const Type *DestTy, + const Twine &Name = "") { + if (V->getType() == DestTy) + return V; + if (Constant *VC = dyn_cast(V)) + return Folder.CreateZExtOrBitCast(VC, DestTy); + return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name); + } + Value *CreateSExtOrBitCast(Value *V, const Type *DestTy, + const Twine &Name = "") { + if (V->getType() == DestTy) + return V; + if (Constant *VC = dyn_cast(V)) + return Folder.CreateSExtOrBitCast(VC, DestTy); + return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name); + } + Value *CreateTruncOrBitCast(Value *V, const Type *DestTy, + const Twine &Name = "") { + if (V->getType() == DestTy) + return V; + if (Constant *VC = dyn_cast(V)) + return Folder.CreateTruncOrBitCast(VC, DestTy); + return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name); + } Value *CreateCast(Instruction::CastOps Op, Value *V, const Type *DestTy, - const char *Name = "") { + const Twine &Name = "") { if (V->getType() == DestTy) return V; if (Constant *VC = dyn_cast(V)) return Folder.CreateCast(Op, VC, DestTy); return Insert(CastInst::Create(Op, V, DestTy), Name); } + Value *CreatePointerCast(Value *V, const Type *DestTy, + const Twine &Name = "") { + if (V->getType() == DestTy) + return V; + if (Constant *VC = dyn_cast(V)) + return Folder.CreatePointerCast(VC, DestTy); + return Insert(CastInst::CreatePointerCast(V, DestTy), Name); + } Value *CreateIntCast(Value *V, const Type *DestTy, bool isSigned, - const char *Name = "") { + const Twine &Name = "") { if (V->getType() == DestTy) return V; if (Constant *VC = dyn_cast(V)) return Folder.CreateIntCast(VC, DestTy, isSigned); return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name); } + Value *CreateFPCast(Value *V, const Type *DestTy, const Twine &Name = "") { + if (V->getType() == DestTy) + return V; + if (Constant *VC = dyn_cast(V)) + return Folder.CreateFPCast(VC, DestTy); + return Insert(CastInst::CreateFPCast(V, DestTy), Name); + } //===--------------------------------------------------------------------===// // Instruction creation methods: Compare Instructions //===--------------------------------------------------------------------===// - Value *CreateICmpEQ(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_EQ, LHS, RHS, Name); } - Value *CreateICmpNE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_NE, LHS, RHS, Name); } - Value *CreateICmpUGT(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_UGT, LHS, RHS, Name); } - Value *CreateICmpUGE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_UGE, LHS, RHS, Name); } - Value *CreateICmpULT(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_ULT, LHS, RHS, Name); } - Value *CreateICmpULE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_ULE, LHS, RHS, Name); } - Value *CreateICmpSGT(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_SGT, LHS, RHS, Name); } - Value *CreateICmpSGE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_SGE, LHS, RHS, Name); } - Value *CreateICmpSLT(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_SLT, LHS, RHS, Name); } - Value *CreateICmpSLE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateICmp(ICmpInst::ICMP_SLE, LHS, RHS, Name); } - Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_OEQ, LHS, RHS, Name); } - Value *CreateFCmpOGT(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpOGT(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_OGT, LHS, RHS, Name); } - Value *CreateFCmpOGE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_OGE, LHS, RHS, Name); } - Value *CreateFCmpOLT(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_OLT, LHS, RHS, Name); } - Value *CreateFCmpOLE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpOLE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_OLE, LHS, RHS, Name); } - Value *CreateFCmpONE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpONE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_ONE, LHS, RHS, Name); } - Value *CreateFCmpORD(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_ORD, LHS, RHS, Name); } - Value *CreateFCmpUNO(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_UNO, LHS, RHS, Name); } - Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_UEQ, LHS, RHS, Name); } - Value *CreateFCmpUGT(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_UGT, LHS, RHS, Name); } - Value *CreateFCmpUGE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_UGE, LHS, RHS, Name); } - Value *CreateFCmpULT(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpULT(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_ULT, LHS, RHS, Name); } - Value *CreateFCmpULE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpULE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_ULE, LHS, RHS, Name); } - Value *CreateFCmpUNE(Value *LHS, Value *RHS, const char *Name = "") { + Value *CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name = "") { return CreateFCmp(FCmpInst::FCMP_UNE, LHS, RHS, Name); } Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, - const char *Name = "") { + const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateICmp(P, LC, RC); return Insert(new ICmpInst(P, LHS, RHS), Name); } Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, - const char *Name = "") { + const Twine &Name = "") { if (Constant *LC = dyn_cast(LHS)) if (Constant *RC = dyn_cast(RHS)) return Folder.CreateFCmp(P, LC, RC); return Insert(new FCmpInst(P, LHS, RHS), Name); } - Value *CreateVICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, - const char *Name = "") { - if (Constant *LC = dyn_cast(LHS)) - if (Constant *RC = dyn_cast(RHS)) - return Folder.CreateVICmp(P, LC, RC); - return Insert(new VICmpInst(P, LHS, RHS), Name); - } - Value *CreateVFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, - const char *Name = "") { - if (Constant *LC = dyn_cast(LHS)) - if (Constant *RC = dyn_cast(RHS)) - return Folder.CreateVFCmp(P, LC, RC); - return Insert(new VFCmpInst(P, LHS, RHS), Name); - } - //===--------------------------------------------------------------------===// // Instruction creation methods: Other Instructions //===--------------------------------------------------------------------===// - PHINode *CreatePHI(const Type *Ty, const char *Name = "") { + PHINode *CreatePHI(const Type *Ty, const Twine &Name = "") { return Insert(PHINode::Create(Ty), Name); } - CallInst *CreateCall(Value *Callee, const char *Name = "") { + CallInst *CreateCall(Value *Callee, const Twine &Name = "") { return Insert(CallInst::Create(Callee), Name); } - CallInst *CreateCall(Value *Callee, Value *Arg, const char *Name = "") { + CallInst *CreateCall(Value *Callee, Value *Arg, const Twine &Name = "") { return Insert(CallInst::Create(Callee, Arg), Name); } CallInst *CreateCall2(Value *Callee, Value *Arg1, Value *Arg2, - const char *Name = "") { + const Twine &Name = "") { Value *Args[] = { Arg1, Arg2 }; return Insert(CallInst::Create(Callee, Args, Args+2), Name); } CallInst *CreateCall3(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3, - const char *Name = "") { + const Twine &Name = "") { Value *Args[] = { Arg1, Arg2, Arg3 }; return Insert(CallInst::Create(Callee, Args, Args+3), Name); } CallInst *CreateCall4(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3, - Value *Arg4, const char *Name = "") { + Value *Arg4, const Twine &Name = "") { Value *Args[] = { Arg1, Arg2, Arg3, Arg4 }; return Insert(CallInst::Create(Callee, Args, Args+4), Name); } template CallInst *CreateCall(Value *Callee, InputIterator ArgBegin, - InputIterator ArgEnd, const char *Name = "") { + InputIterator ArgEnd, const Twine &Name = "") { return Insert(CallInst::Create(Callee, ArgBegin, ArgEnd), Name); } Value *CreateSelect(Value *C, Value *True, Value *False, - const char *Name = "") { + const Twine &Name = "") { if (Constant *CC = dyn_cast(C)) if (Constant *TC = dyn_cast(True)) if (Constant *FC = dyn_cast(False)) @@ -624,20 +840,20 @@ public: return Insert(SelectInst::Create(C, True, False), Name); } - VAArgInst *CreateVAArg(Value *List, const Type *Ty, const char *Name = "") { + VAArgInst *CreateVAArg(Value *List, const Type *Ty, const Twine &Name = "") { return Insert(new VAArgInst(List, Ty), Name); } Value *CreateExtractElement(Value *Vec, Value *Idx, - const char *Name = "") { + const Twine &Name = "") { if (Constant *VC = dyn_cast(Vec)) if (Constant *IC = dyn_cast(Idx)) return Folder.CreateExtractElement(VC, IC); - return Insert(new ExtractElementInst(Vec, Idx), Name); + return Insert(ExtractElementInst::Create(Vec, Idx), Name); } Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, - const char *Name = "") { + const Twine &Name = "") { if (Constant *VC = dyn_cast(Vec)) if (Constant *NC = dyn_cast(NewElt)) if (Constant *IC = dyn_cast(Idx)) @@ -646,7 +862,7 @@ public: } Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask, - const char *Name = "") { + const Twine &Name = "") { if (Constant *V1C = dyn_cast(V1)) if (Constant *V2C = dyn_cast(V2)) if (Constant *MC = dyn_cast(Mask)) @@ -655,7 +871,7 @@ public: } Value *CreateExtractValue(Value *Agg, unsigned Idx, - const char *Name = "") { + const Twine &Name = "") { if (Constant *AggC = dyn_cast(Agg)) return Folder.CreateExtractValue(AggC, &Idx, 1); return Insert(ExtractValueInst::Create(Agg, Idx), Name); @@ -665,14 +881,14 @@ public: Value *CreateExtractValue(Value *Agg, InputIterator IdxBegin, InputIterator IdxEnd, - const char *Name = "") { + const Twine &Name = "") { if (Constant *AggC = dyn_cast(Agg)) return Folder.CreateExtractValue(AggC, IdxBegin, IdxEnd - IdxBegin); return Insert(ExtractValueInst::Create(Agg, IdxBegin, IdxEnd), Name); } Value *CreateInsertValue(Value *Agg, Value *Val, unsigned Idx, - const char *Name = "") { + const Twine &Name = "") { if (Constant *AggC = dyn_cast(Agg)) if (Constant *ValC = dyn_cast(Val)) return Folder.CreateInsertValue(AggC, ValC, &Idx, 1); @@ -683,11 +899,10 @@ public: Value *CreateInsertValue(Value *Agg, Value *Val, InputIterator IdxBegin, InputIterator IdxEnd, - const char *Name = "") { + const Twine &Name = "") { if (Constant *AggC = dyn_cast(Agg)) if (Constant *ValC = dyn_cast(Val)) - return Folder.CreateInsertValue(AggC, ValC, - IdxBegin, IdxEnd - IdxBegin); + return Folder.CreateInsertValue(AggC, ValC, IdxBegin, IdxEnd-IdxBegin); return Insert(InsertValueInst::Create(Agg, Val, IdxBegin, IdxEnd), Name); } @@ -696,30 +911,32 @@ public: //===--------------------------------------------------------------------===// /// CreateIsNull - Return an i1 value testing if \arg Arg is null. - Value *CreateIsNull(Value *Arg, const char *Name = "") { + Value *CreateIsNull(Value *Arg, const Twine &Name = "") { return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()), Name); } /// CreateIsNotNull - Return an i1 value testing if \arg Arg is not null. - Value *CreateIsNotNull(Value *Arg, const char *Name = "") { + Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") { return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()), Name); } /// CreatePtrDiff - Return the i64 difference between two pointer values, /// dividing out the size of the pointed-to objects. This is intended to - /// implement C-style pointer subtraction. - Value *CreatePtrDiff(Value *LHS, Value *RHS, const char *Name = "") { + /// implement C-style pointer subtraction. As such, the pointers must be + /// appropriately aligned for their element types and pointing into the + /// same object. + Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "") { assert(LHS->getType() == RHS->getType() && "Pointer subtraction operand types must match!"); const PointerType *ArgType = cast(LHS->getType()); - Value *LHS_int = CreatePtrToInt(LHS, Type::Int64Ty); - Value *RHS_int = CreatePtrToInt(RHS, Type::Int64Ty); + Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context)); + Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context)); Value *Difference = CreateSub(LHS_int, RHS_int); - return CreateSDiv(Difference, - ConstantExpr::getSizeOf(ArgType->getElementType()), - Name); + return CreateExactSDiv(Difference, + ConstantExpr::getSizeOf(ArgType->getElementType()), + Name); } }; diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h new file mode 100644 index 0000000000000..e7780b05d534f --- /dev/null +++ b/include/llvm/Support/IRReader.h @@ -0,0 +1,115 @@ +//===---- llvm/Support/IRReader.h - Reader for LLVM IR files ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions for reading LLVM IR. They support both +// Bitcode and Assembly, automatically detecting the input format. +// +// These functions must be defined in a header file in order to avoid +// library dependencies, since they reference both Bitcode and Assembly +// functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_IRREADER_H +#define LLVM_SUPPORT_IRREADER_H + +#include "llvm/Assembly/Parser.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/ModuleProvider.h" + +namespace llvm { + + /// If the given MemoryBuffer holds a bitcode image, return a ModuleProvider + /// for it which does lazy deserialization of function bodies. Otherwise, + /// attempt to parse it as LLVM Assembly and return a fully populated + /// ModuleProvider. This function *always* takes ownership of the given + /// MemoryBuffer. + inline ModuleProvider *getIRModuleProvider(MemoryBuffer *Buffer, + SMDiagnostic &Err, + LLVMContext &Context) { + if (isBitcode((const unsigned char *)Buffer->getBufferStart(), + (const unsigned char *)Buffer->getBufferEnd())) { + std::string ErrMsg; + ModuleProvider *MP = getBitcodeModuleProvider(Buffer, Context, &ErrMsg); + if (MP == 0) { + Err = SMDiagnostic(Buffer->getBufferIdentifier(), -1, -1, ErrMsg, ""); + // ParseBitcodeFile does not take ownership of the Buffer in the + // case of an error. + delete Buffer; + } + return MP; + } + + Module *M = ParseAssembly(Buffer, 0, Err, Context); + if (M == 0) + return 0; + return new ExistingModuleProvider(M); + } + + /// If the given file holds a bitcode image, return a ModuleProvider + /// for it which does lazy deserialization of function bodies. Otherwise, + /// attempt to parse it as LLVM Assembly and return a fully populated + /// ModuleProvider. + inline ModuleProvider *getIRFileModuleProvider(const std::string &Filename, + SMDiagnostic &Err, + LLVMContext &Context) { + std::string ErrMsg; + MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg); + if (F == 0) { + Err = SMDiagnostic(Filename, -1, -1, + "Could not open input file '" + Filename + "'", ""); + return 0; + } + + return getIRModuleProvider(F, Err, Context); + } + + /// If the given MemoryBuffer holds a bitcode image, return a Module + /// for it. Otherwise, attempt to parse it as LLVM Assembly and return + /// a Module for it. This function *always* takes ownership of the given + /// MemoryBuffer. + inline Module *ParseIR(MemoryBuffer *Buffer, + SMDiagnostic &Err, + LLVMContext &Context) { + if (isBitcode((const unsigned char *)Buffer->getBufferStart(), + (const unsigned char *)Buffer->getBufferEnd())) { + std::string ErrMsg; + Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg); + // ParseBitcodeFile does not take ownership of the Buffer. + delete Buffer; + if (M == 0) + Err = SMDiagnostic(Buffer->getBufferIdentifier(), -1, -1, ErrMsg, ""); + return M; + } + + return ParseAssembly(Buffer, 0, Err, Context); + } + + /// If the given file holds a bitcode image, return a Module for it. + /// Otherwise, attempt to parse it as LLVM Assembly and return a Module + /// for it. + inline Module *ParseIRFile(const std::string &Filename, + SMDiagnostic &Err, + LLVMContext &Context) { + std::string ErrMsg; + MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg); + if (F == 0) { + Err = SMDiagnostic(Filename, -1, -1, + "Could not open input file '" + Filename + "'", ""); + return 0; + } + + return ParseIR(F, Err, Context); + } + +} + +#endif diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h index 597cc9d905421..5d7c2f72ba7b1 100644 --- a/include/llvm/Support/InstVisitor.h +++ b/include/llvm/Support/InstVisitor.h @@ -14,6 +14,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Module.h" +#include "llvm/Support/ErrorHandling.h" namespace llvm { @@ -30,13 +31,13 @@ namespace llvm { /// @brief Base class for instruction visitors /// -/// Instruction visitors are used when you want to perform different action for -/// different kinds of instruction without without having to use lots of casts -/// and a big switch statement (in your code that is). +/// Instruction visitors are used when you want to perform different actions +/// for different kinds of instructions without having to use lots of casts +/// and a big switch statement (in your code, that is). /// /// To define your own visitor, inherit from this class, specifying your /// new type for the 'SubClass' template parameter, and "override" visitXXX -/// functions in your class. I say "overriding" because this class is defined +/// functions in your class. I say "override" because this class is defined /// in terms of statically resolved overloading, not virtual functions. /// /// For example, here is a visitor that counts the number of malloc @@ -58,12 +59,12 @@ namespace llvm { /// NumMallocs = CMV.Count; /// /// The defined has 'visit' methods for Instruction, and also for BasicBlock, -/// Function, and Module, which recursively process all conained instructions. +/// Function, and Module, which recursively process all contained instructions. /// /// Note that if you don't implement visitXXX for some instruction type, /// the visitXXX method for instruction superclass will be invoked. So /// if instructions are added in the future, they will be automatically -/// supported, if you handle on of their superclasses. +/// supported, if you handle one of their superclasses. /// /// The optional second template argument specifies the type that instruction /// visitation functions should return. If you specify this, you *MUST* provide @@ -113,8 +114,7 @@ public: // RetTy visit(Instruction &I) { switch (I.getOpcode()) { - default: assert(0 && "Unknown instruction type encountered!"); - abort(); + default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file... #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: return \ @@ -165,8 +165,6 @@ public: RetTy visitUnreachableInst(UnreachableInst &I) { DELEGATE(TerminatorInst);} RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);} RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);} - RetTy visitVICmpInst(VICmpInst &I) { DELEGATE(CmpInst);} - RetTy visitVFCmpInst(VFCmpInst &I) { DELEGATE(CmpInst);} RetTy visitMallocInst(MallocInst &I) { DELEGATE(AllocationInst);} RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(AllocationInst);} RetTy visitFreeInst(FreeInst &I) { DELEGATE(Instruction); } @@ -195,7 +193,7 @@ public: RetTy visitExtractValueInst(ExtractValueInst &I) { DELEGATE(Instruction);} RetTy visitInsertValueInst(InsertValueInst &I) { DELEGATE(Instruction); } - // Next level propagators... if the user does not overload a specific + // Next level propagators: If the user does not overload a specific // instruction type, they can overload one of these to get the whole class // of instructions... // @@ -206,7 +204,7 @@ public: RetTy visitCastInst(CastInst &I) { DELEGATE(Instruction); } // If the user wants a 'default' case, they can choose to override this - // function. If this function is not overloaded in the users subclass, then + // function. If this function is not overloaded in the user's subclass, then // this instruction just gets ignored. // // Note that you MUST override this function if your return type is not void. diff --git a/include/llvm/Support/LeakDetector.h b/include/llvm/Support/LeakDetector.h index 8d74ac663b11b..7dbfdbf3d52b2 100644 --- a/include/llvm/Support/LeakDetector.h +++ b/include/llvm/Support/LeakDetector.h @@ -56,9 +56,9 @@ struct LeakDetector { /// The specified message will be printed indicating when the check was /// performed. /// - static void checkForGarbage(const std::string &Message) { + static void checkForGarbage(LLVMContext &C, const std::string &Message) { #ifndef NDEBUG - checkForGarbageImpl(Message); + checkForGarbageImpl(C, Message); #endif } @@ -83,7 +83,7 @@ private: static void removeGarbageObjectImpl(const Value *Object); static void addGarbageObjectImpl(void *Object); static void removeGarbageObjectImpl(void *Object); - static void checkForGarbageImpl(const std::string &Message); + static void checkForGarbageImpl(LLVMContext &C, const std::string &Message); }; } // End llvm namespace diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h index 4fc648319ad47..b8e223587fbd3 100644 --- a/include/llvm/Support/ManagedStatic.h +++ b/include/llvm/Support/ManagedStatic.h @@ -27,10 +27,12 @@ void* object_creator() { /// object_deleter - Helper method for ManagedStatic. /// -template -void object_deleter(void *Ptr) { - delete (C*)Ptr; -} +template struct object_deleter { + static void call(void * Ptr) { delete (T*)Ptr; } +}; +template struct object_deleter { + static void call(void * Ptr) { delete[] (T*)Ptr; } +}; /// ManagedStaticBase - Common base class for ManagedStatic instances. class ManagedStaticBase { @@ -62,28 +64,28 @@ public: C &operator*() { void* tmp = Ptr; if (llvm_is_multithreaded()) sys::MemoryFence(); - if (!tmp) RegisterManagedStatic(object_creator, object_deleter); + if (!tmp) RegisterManagedStatic(object_creator, object_deleter::call); return *static_cast(Ptr); } C *operator->() { void* tmp = Ptr; if (llvm_is_multithreaded()) sys::MemoryFence(); - if (!tmp) RegisterManagedStatic(object_creator, object_deleter); + if (!tmp) RegisterManagedStatic(object_creator, object_deleter::call); return static_cast(Ptr); } const C &operator*() const { void* tmp = Ptr; if (llvm_is_multithreaded()) sys::MemoryFence(); - if (!tmp) RegisterManagedStatic(object_creator, object_deleter); + if (!tmp) RegisterManagedStatic(object_creator, object_deleter::call); return *static_cast(Ptr); } const C *operator->() const { void* tmp = Ptr; if (llvm_is_multithreaded()) sys::MemoryFence(); - if (!tmp) RegisterManagedStatic(object_creator, object_deleter); + if (!tmp) RegisterManagedStatic(object_creator, object_deleter::call); return static_cast(Ptr); } diff --git a/include/llvm/Support/Mangler.h b/include/llvm/Support/Mangler.h index 8f672bdd6f658..03c564897bb81 100644 --- a/include/llvm/Support/Mangler.h +++ b/include/llvm/Support/Mangler.h @@ -23,8 +23,17 @@ class Type; class Module; class Value; class GlobalValue; +template class SmallVectorImpl; class Mangler { +public: + enum ManglerPrefixTy { + Default, ///< Emit default string before each symbol. + Private, ///< Emit "private" prefix before each symbol. + LinkerPrivate ///< Emit "linker private" prefix before each symbol. + }; + +private: /// Prefix - This string is added to each symbol that is emitted, unless the /// symbol is marked as not needing this prefix. const char *Prefix; @@ -33,48 +42,50 @@ class Mangler { /// linkage. const char *PrivatePrefix; + /// LinkerPrivatePrefix - This string is emitted before each symbol with + /// "linker_private" linkage. + const char *LinkerPrivatePrefix; + /// UseQuotes - If this is set, the target accepts global names in quotes, /// e.g. "foo bar" is a legal name. This syntax is used instead of escaping /// the space character. By default, this is false. bool UseQuotes; - /// PreserveAsmNames - If this is set, the asm escape character is not removed - /// from names with 'asm' specifiers. - bool PreserveAsmNames; + /// SymbolsCanStartWithDigit - If this is set, the target allows symbols to + /// start with digits (e.g., "0x0021"). By default, this is false. + bool SymbolsCanStartWithDigit; - /// Memo - This is used to remember the name that we assign a value. + /// AnonGlobalIDs - We need to give global values the same name every time + /// they are mangled. This keeps track of the number we give to anonymous + /// ones. /// - DenseMap Memo; + DenseMap AnonGlobalIDs; - /// Count - This simple counter is used to unique value names. + /// NextAnonGlobalID - This simple counter is used to unique value names. /// - unsigned Count; - - /// TypeMap - If the client wants us to unique types, this keeps track of the - /// current assignments and TypeCounter keeps track of the next id to assign. - DenseMap TypeMap; - unsigned TypeCounter; + unsigned NextAnonGlobalID; /// AcceptableChars - This bitfield contains a one for each character that is /// allowed to be part of an unmangled name. - unsigned AcceptableChars[256/32]; -public: + unsigned AcceptableChars[256 / 32]; +public: // Mangler ctor - if a prefix is specified, it will be prepended onto all // symbols. - Mangler(Module &M, const char *Prefix = "", const char *privatePrefix = ""); + Mangler(Module &M, const char *Prefix = "", const char *privatePrefix = "", + const char *linkerPrivatePrefix = ""); /// setUseQuotes - If UseQuotes is set to true, this target accepts quoted /// strings for assembler labels. void setUseQuotes(bool Val) { UseQuotes = Val; } - /// setPreserveAsmNames - If the mangler should not strip off the asm name - /// @verbatim identifier (\001), this should be set. @endverbatim - void setPreserveAsmNames(bool Val) { PreserveAsmNames = Val; } + /// setSymbolsCanStartWithDigit - If SymbolsCanStartWithDigit is set to true, + /// this target allows symbols to start with digits. + void setSymbolsCanStartWithDigit(bool Val) { SymbolsCanStartWithDigit = Val; } /// Acceptable Characters - This allows the target to specify which characters /// are acceptable to the assembler without being mangled. By default we - /// allow letters, numbers, '_', '$', and '.', which is what GAS accepts. + /// allow letters, numbers, '_', '$', '.', which is what GAS accepts, and '@'. void markCharAcceptable(unsigned char X) { AcceptableChars[X/32] |= 1 << (X&31); } @@ -85,11 +96,13 @@ public: return (AcceptableChars[X/32] & (1 << (X&31))) != 0; } - /// getValueName - Returns the mangled name of V, an LLVM Value, - /// in the current module. + /// getMangledName - Returns the mangled name of V, an LLVM Value, + /// in the current module. If 'Suffix' is specified, the name ends with the + /// specified suffix. If 'ForcePrivate' is specified, the label is specified + /// to have a private label prefix. /// - std::string getValueName(const GlobalValue *V, const char *Suffix = ""); - std::string getValueName(const Value *V); + std::string getMangledName(const GlobalValue *V, const char *Suffix = "", + bool ForcePrivate = false); /// makeNameProper - We don't want identifier names with ., space, or /// - in them, so we mangle these characters into the strings "d_", @@ -98,13 +111,14 @@ public: /// does this for you, so there's no point calling it on the result /// from getValueName. /// - std::string makeNameProper(const std::string &x, const char *Prefix = 0, - const char *PrivatePrefix = 0); - -private: - /// getTypeID - Return a unique ID for the specified LLVM type. - /// - unsigned getTypeID(const Type *Ty); + std::string makeNameProper(const std::string &x, + ManglerPrefixTy PrefixTy = Mangler::Default); + + /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix + /// and the specified global variable's name. If the global variable doesn't + /// have a name, this fills in a unique name for the global. + void getNameWithPrefix(SmallVectorImpl &OutName, const GlobalValue *GV, + bool isImplicitlyPrivate); }; } // End llvm namespace diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 85e19acd9ea77..6fa618eb1af85 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -52,6 +52,16 @@ inline bool isUInt32(int64_t Value) { return static_cast(Value) == Value; } +template +inline bool isInt(int64_t x) { + return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1))); +} + +template +inline bool isUint(uint64_t x) { + return N >= 64 || x < (UINT64_C(1)< 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC) return __builtin_bswap32(Value); #elif defined(_MSC_VER) && !defined(_DEBUG) return _byteswap_ulong(Value); @@ -124,7 +134,7 @@ inline uint32_t ByteSwap_32(uint32_t Value) { /// ByteSwap_64 - This function returns a byte-swapped representation of the /// 64-bit argument, Value. inline uint64_t ByteSwap_64(uint64_t Value) { -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC) return __builtin_bswap64(Value); #elif defined(_MSC_VER) && !defined(_DEBUG) return _byteswap_uint64(Value); @@ -425,6 +435,13 @@ inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align) { return ((Value + Align - 1) / Align) * Align; } +/// OffsetToAlignment - Return the offset to the next integer (mod 2**64) that +/// is greater than or equal to \arg Value and is a multiple of \arg +/// Align. Align must be non-zero. +inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) { + return RoundUpToAlignment(Value, Align) - Value; +} + /// abs64 - absolute value of a 64-bit int. Not all environments support /// "abs" on whatever their name for the 64-bit int type is. The absolute /// value of the largest negative number is undefined, as with "abs". diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h index 58a217f6c79ec..eb4784cbf5808 100644 --- a/include/llvm/Support/MemoryBuffer.h +++ b/include/llvm/Support/MemoryBuffer.h @@ -14,6 +14,7 @@ #ifndef LLVM_SUPPORT_MEMORYBUFFER_H #define LLVM_SUPPORT_MEMORYBUFFER_H +#include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" #include @@ -42,6 +43,10 @@ public: const char *getBufferEnd() const { return BufferEnd; } size_t getBufferSize() const { return BufferEnd-BufferStart; } + StringRef getBuffer() const { + return StringRef(BufferStart, getBufferSize()); + } + /// getBufferIdentifier - Return an identifier for this buffer, typically the /// filename it was read from. virtual const char *getBufferIdentifier() const { diff --git a/include/llvm/Support/MemoryObject.h b/include/llvm/Support/MemoryObject.h new file mode 100644 index 0000000000000..dec0f134b306c --- /dev/null +++ b/include/llvm/Support/MemoryObject.h @@ -0,0 +1,70 @@ +//===- MemoryObject.h - Abstract memory interface ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MEMORYOBJECT_H +#define MEMORYOBJECT_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +/// MemoryObject - Abstract base class for contiguous addressable memory. +/// Necessary for cases in which the memory is in another process, in a +/// file, or on a remote machine. +/// All size and offset parameters are uint64_ts, to allow 32-bit processes +/// access to 64-bit address spaces. +class MemoryObject { +public: + /// Destructor - Override as necessary. + virtual ~MemoryObject(); + + /// getBase - Returns the lowest valid address in the region. + /// + /// @result - The lowest valid address. + virtual uint64_t getBase() const = 0; + + /// getExtent - Returns the size of the region in bytes. (The region is + /// contiguous, so the highest valid address of the region + /// is getBase() + getExtent() - 1). + /// + /// @result - The size of the region. + virtual uint64_t getExtent() const = 0; + + /// readByte - Tries to read a single byte from the region. + /// + /// @param address - The address of the byte, in the same space as getBase(). + /// @param ptr - A pointer to a byte to be filled in. Must be non-NULL. + /// @result - 0 if successful; -1 if not. Failure may be due to a + /// bounds violation or an implementation-specific error. + virtual int readByte(uint64_t address, uint8_t* ptr) const = 0; + + /// readBytes - Tries to read a contiguous range of bytes from the + /// region, up to the end of the region. + /// You should override this function if there is a quicker + /// way than going back and forth with individual bytes. + /// + /// @param address - The address of the first byte, in the same space as + /// getBase(). + /// @param size - The maximum number of bytes to copy. + /// @param buf - A pointer to a buffer to be filled in. Must be non-NULL + /// and large enough to hold size bytes. + /// @param copied - A pointer to a nunber that is filled in with the number + /// of bytes actually read. May be NULL. + /// @result - 0 if successful; -1 if not. Failure may be due to a + /// bounds violation or an implementation-specific error. + virtual int readBytes(uint64_t address, + uint64_t size, + uint8_t* buf, + uint64_t* copied) const; +}; + +} + +#endif + diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h index a49cf8424067d..1f671c19250d2 100644 --- a/include/llvm/Support/NoFolder.h +++ b/include/llvm/Support/NoFolder.h @@ -28,9 +28,12 @@ namespace llvm { +class LLVMContext; + /// NoFolder - Create "constants" (actually, values) with no folding. class NoFolder { public: + explicit NoFolder(LLVMContext &) {} //===--------------------------------------------------------------------===// // Binary Operators @@ -39,12 +42,18 @@ public: Value *CreateAdd(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateAdd(LHS, RHS); } + Value *CreateNSWAdd(Constant *LHS, Constant *RHS) const { + return BinaryOperator::CreateNSWAdd(LHS, RHS); + } Value *CreateFAdd(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateFAdd(LHS, RHS); } Value *CreateSub(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateSub(LHS, RHS); } + Value *CreateNSWSub(Constant *LHS, Constant *RHS) const { + return BinaryOperator::CreateNSWSub(LHS, RHS); + } Value *CreateFSub(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateFSub(LHS, RHS); } @@ -60,6 +69,9 @@ public: Value *CreateSDiv(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateSDiv(LHS, RHS); } + Value *CreateExactSDiv(Constant *LHS, Constant *RHS) const { + return BinaryOperator::CreateExactSDiv(LHS, RHS); + } Value *CreateFDiv(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateFDiv(LHS, RHS); } @@ -120,6 +132,15 @@ public: return GetElementPtrInst::Create(C, IdxList, IdxList+NumIdx); } + Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList, + unsigned NumIdx) const { + return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx); + } + Value *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList, + unsigned NumIdx) const { + return GetElementPtrInst::CreateInBounds(C, IdxList, IdxList+NumIdx); + } + //===--------------------------------------------------------------------===// // Cast/Conversion Operators //===--------------------------------------------------------------------===// @@ -143,12 +164,6 @@ public: Value *CreateFCmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const { return new FCmpInst(P, LHS, RHS); } - Value *CreateVICmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const { - return new VICmpInst(P, LHS, RHS); - } - Value *CreateVFCmp(CmpInst::Predicate P, Constant *LHS, Constant *RHS) const { - return new VFCmpInst(P, LHS, RHS); - } //===--------------------------------------------------------------------===// // Other Instructions diff --git a/include/llvm/Support/PassNameParser.h b/include/llvm/Support/PassNameParser.h index e489e0a6f0b56..66ce3f2e2085c 100644 --- a/include/llvm/Support/PassNameParser.h +++ b/include/llvm/Support/PassNameParser.h @@ -24,6 +24,7 @@ #define LLVM_SUPPORT_PASS_NAME_PARSER_H #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Pass.h" #include #include @@ -65,9 +66,9 @@ public: virtual void passRegistered(const PassInfo *P) { if (ignorablePass(P) || !Opt) return; if (findOption(P->getPassArgument()) != getNumOptions()) { - cerr << "Two passes with the same argument (-" + errs() << "Two passes with the same argument (-" << P->getPassArgument() << ") attempted to be registered!\n"; - abort(); + llvm_unreachable(0); } addLiteralOption(P->getPassArgument(), P, P->getPassName()); } diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h index fda925f5a9a8c..c0b6a6b98c09d 100644 --- a/include/llvm/Support/PatternMatch.h +++ b/include/llvm/Support/PatternMatch.h @@ -58,7 +58,7 @@ struct constantint_ty { if (const ConstantInt *CI = dyn_cast(V)) { const APInt &CIV = CI->getValue(); if (Val >= 0) - return CIV == Val; + return CIV == static_cast(Val); // If Val is negative, and CI is shorter than it, truncate to the right // number of bits. If it is larger, then we have to sign extend. Just // compare their negated values. @@ -87,6 +87,18 @@ struct zero_ty { /// m_Zero() - Match an arbitrary zero/null constant. inline zero_ty m_Zero() { return zero_ty(); } +struct one_ty { + template + bool match(ITy *V) { + if (const ConstantInt *C = dyn_cast(V)) + return C->isOne(); + return false; + } +}; + +/// m_One() - Match a an integer 1. +inline one_ty m_One() { return one_ty(); } + template struct bind_ty { @@ -311,7 +323,8 @@ struct BinaryOpClass_match { template bool match(OpTy *V) { if (Class *I = dyn_cast(V)) - if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) { + if (L.match(I->getOperand(0)) && + R.match(I->getOperand(1))) { if (Opcode) *Opcode = I->getOpcode(); return true; @@ -356,7 +369,8 @@ struct CmpClass_match { template bool match(OpTy *V) { if (Class *I = dyn_cast(V)) - if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) { + if (L.match(I->getOperand(0)) && + R.match(I->getOperand(1))) { Predicate = I->getPredicate(); return true; } @@ -403,7 +417,7 @@ struct SelectClass_match { }; template -inline SelectClass_match +inline SelectClass_match m_Select(const Cond &C, const LHS &L, const RHS &R) { return SelectClass_match(C, L, R); } @@ -503,7 +517,7 @@ struct neg_match { } private: bool matchIfNeg(Value *LHS, Value *RHS) { - return LHS == ConstantExpr::getZeroValueForNegationExpr(LHS->getType()) && + return LHS == ConstantFP::getZeroValueForNegation(LHS->getType()) && L.match(RHS); } }; @@ -532,7 +546,7 @@ struct fneg_match { } private: bool matchIfFNeg(Value *LHS, Value *RHS) { - return LHS == ConstantExpr::getZeroValueForNegationExpr(LHS->getType()) && + return LHS == ConstantFP::getZeroValueForNegation(LHS->getType()) && L.match(RHS); } }; diff --git a/include/llvm/Support/PointerLikeTypeTraits.h b/include/llvm/Support/PointerLikeTypeTraits.h index b0edd3bd09f99..d64993f54d1df 100644 --- a/include/llvm/Support/PointerLikeTypeTraits.h +++ b/include/llvm/Support/PointerLikeTypeTraits.h @@ -50,12 +50,16 @@ public: // Provide PointerLikeTypeTraits for const pointers. template class PointerLikeTypeTraits { + typedef PointerLikeTypeTraits NonConst; + public: - static inline const void *getAsVoidPointer(const T* P) { return P; } + static inline const void *getAsVoidPointer(const T* P) { + return NonConst::getAsVoidPointer(const_cast(P)); + } static inline const T *getFromVoidPointer(const void *P) { - return static_cast(P); + return NonConst::getFromVoidPointer(const_cast(P)); } - enum { NumLowBitsAvailable = 2 }; + enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable }; }; // Provide PointerLikeTypeTraits for uintptr_t. diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h index 909d286f28b63..0db84e1a14c6b 100644 --- a/include/llvm/Support/PrettyStackTrace.h +++ b/include/llvm/Support/PrettyStackTrace.h @@ -18,6 +18,12 @@ namespace llvm { class raw_ostream; + + /// DisablePrettyStackTrace - Set this to true to disable this module. This + /// might be neccessary if the host application installs its own signal + /// handlers which conflict with the ones installed by this module. + /// Defaults to false. + extern bool DisablePrettyStackTrace; /// PrettyStackTraceEntry - This class is used to represent a frame of the /// "pretty" stack trace that is dumped when a program crashes. You can define diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h index 2fa0365d5f503..d8f8c78941429 100644 --- a/include/llvm/Support/Recycler.h +++ b/include/llvm/Support/Recycler.h @@ -34,7 +34,8 @@ struct RecyclerStruct { }; template<> -struct ilist_traits : ilist_default_traits { +struct ilist_traits : + public ilist_default_traits { static RecyclerStruct *getPrev(const RecyclerStruct *t) { return t->Prev; } static RecyclerStruct *getNext(const RecyclerStruct *t) { return t->Next; } static void setPrev(RecyclerStruct *t, RecyclerStruct *p) { t->Prev = p; } diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h new file mode 100644 index 0000000000000..c954c0d31abe2 --- /dev/null +++ b/include/llvm/Support/Regex.h @@ -0,0 +1,63 @@ +//===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a POSIX regular expression matcher. +// +//===----------------------------------------------------------------------===// + +#include + +struct llvm_regex; + +namespace llvm { + class StringRef; + template class SmallVectorImpl; + + class Regex { + public: + enum { + NoFlags=0, + /// Compile for matching that ignores upper/lower case distinctions. + IgnoreCase=1, + /// Compile for newline-sensitive matching. With this flag '[^' bracket + /// expressions and '.' never match newline. A ^ anchor matches the + /// null string after any newline in the string in addition to its normal + /// function, and the $ anchor matches the null string before any + /// newline in the string in addition to its normal function. + Newline=2 + }; + + /// Compiles the given POSIX Extended Regular Expression \arg Regex. + /// This implementation supports regexes and matching strings with embedded + /// NUL characters. + Regex(const StringRef &Regex, unsigned Flags = NoFlags); + ~Regex(); + + /// isValid - returns the error encountered during regex compilation, or + /// matching, if any. + bool isValid(std::string &Error); + + /// getNumMatches - In a valid regex, return the number of parenthesized + /// matches it contains. The number filled in by match will include this + /// many entries plus one for the whole regex (as element 0). + unsigned getNumMatches() const; + + /// matches - Match the regex against a given \arg String. + /// + /// \param Matches - If given, on a succesful match this will be filled in + /// with references to the matched group expressions (inside \arg String), + /// the first group is always the entire pattern. + /// + /// This returns true on a successful match. + bool match(const StringRef &String, SmallVectorImpl *Matches=0); + private: + struct llvm_regex *preg; + int error; + }; +} diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h index 454679bda8349..4db88825afb55 100644 --- a/include/llvm/Support/Registry.h +++ b/include/llvm/Support/Registry.h @@ -77,9 +77,6 @@ namespace llvm { static listener *ListenerHead, *ListenerTail; public: - class iterator; - - /// Node in linked list of entries. /// class node { diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h index 7c8a139091bfb..5b6f56b436285 100644 --- a/include/llvm/Support/SourceMgr.h +++ b/include/llvm/Support/SourceMgr.h @@ -65,10 +65,14 @@ class SourceMgr { // include files in. std::vector IncludeDirectories; + /// LineNoCache - This is a cache for line number queries, its implementation + /// is really private to SourceMgr.cpp. + mutable void *LineNoCache; + SourceMgr(const SourceMgr&); // DO NOT IMPLEMENT void operator=(const SourceMgr&); // DO NOT IMPLEMENT public: - SourceMgr() {} + SourceMgr() : LineNoCache(0) {} ~SourceMgr(); void setIncludeDirs(const std::vector &Dirs) { @@ -145,17 +149,6 @@ public: const std::string &Msg, const std::string &LineStr) : Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg), LineContents(LineStr) {} - SMDiagnostic(const SMDiagnostic &RHS) { - operator=(RHS); - } - - void operator=(const SMDiagnostic &E) { - Filename = E.Filename; - LineNo = E.LineNo; - ColumnNo = E.ColumnNo; - Message = E.Message; - LineContents = E.LineContents; - } void Print(const char *ProgName, raw_ostream &S); }; diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h index 5c63034a863c9..c71e6b94fa279 100644 --- a/include/llvm/Support/StandardPasses.h +++ b/include/llvm/Support/StandardPasses.h @@ -80,6 +80,8 @@ namespace llvm { } } + /// createStandardModulePasses - Add the standard module passes. This is + /// expected to be run after the standard function passes. static inline void createStandardModulePasses(PassManager *PM, unsigned OptimizationLevel, bool OptimizeSize, @@ -91,71 +93,69 @@ namespace llvm { if (OptimizationLevel == 0) { if (InliningPass) PM->add(InliningPass); - } else { - if (UnitAtATime) - PM->add(createRaiseAllocationsPass()); // call %malloc -> malloc inst - PM->add(createCFGSimplificationPass()); // Clean up disgusting code - // Kill useless allocas - PM->add(createPromoteMemoryToRegisterPass()); - if (UnitAtATime) { - PM->add(createGlobalOptimizerPass()); // Optimize out global vars - PM->add(createGlobalDCEPass()); // Remove unused fns and globs - // IP Constant Propagation - PM->add(createIPConstantPropagationPass()); - PM->add(createDeadArgEliminationPass()); // Dead argument elimination - } - PM->add(createInstructionCombiningPass()); // Clean up after IPCP & DAE - PM->add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - if (UnitAtATime) { - if (HaveExceptions) - PM->add(createPruneEHPass()); // Remove dead EH info - PM->add(createFunctionAttrsPass()); // Set readonly/readnone attrs - } - if (InliningPass) - PM->add(InliningPass); - if (OptimizationLevel > 2) - PM->add(createArgumentPromotionPass()); // Scalarize uninlined fn args - if (SimplifyLibCalls) - PM->add(createSimplifyLibCallsPass()); // Library Call Optimizations - PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl. - PM->add(createJumpThreadingPass()); // Thread jumps. - PM->add(createCFGSimplificationPass()); // Merge & remove BBs - PM->add(createScalarReplAggregatesPass()); // Break up aggregate allocas - PM->add(createInstructionCombiningPass()); // Combine silly seq's - PM->add(createCondPropagationPass()); // Propagate conditionals - PM->add(createTailCallEliminationPass()); // Eliminate tail calls - PM->add(createCFGSimplificationPass()); // Merge & remove BBs - PM->add(createReassociatePass()); // Reassociate expressions - PM->add(createLoopRotatePass()); // Rotate Loop - PM->add(createLICMPass()); // Hoist loop invariants - PM->add(createLoopUnswitchPass(OptimizeSize)); - PM->add(createLoopIndexSplitPass()); // Split loop index - PM->add(createInstructionCombiningPass()); - PM->add(createIndVarSimplifyPass()); // Canonicalize indvars - PM->add(createLoopDeletionPass()); // Delete dead loops - if (UnrollLoops) - PM->add(createLoopUnrollPass()); // Unroll small loops - PM->add(createInstructionCombiningPass()); // Clean up after the unroller - PM->add(createGVNPass()); // Remove redundancies - PM->add(createMemCpyOptPass()); // Remove memcpy / form memset - PM->add(createSCCPPass()); // Constant prop with SCCP + return; + } - // Run instcombine after redundancy elimination to exploit opportunities - // opened up by them. - PM->add(createInstructionCombiningPass()); - PM->add(createCondPropagationPass()); // Propagate conditionals - PM->add(createDeadStoreEliminationPass()); // Delete dead stores - PM->add(createAggressiveDCEPass()); // Delete dead instructions - PM->add(createCFGSimplificationPass()); // Merge & remove BBs - - if (UnitAtATime) { - PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes - PM->add(createDeadTypeEliminationPass()); // Eliminate dead types - } - - if (OptimizationLevel > 1 && UnitAtATime) - PM->add(createConstantMergePass()); // Merge dup global constants + if (UnitAtATime) + PM->add(createRaiseAllocationsPass()); // call %malloc -> malloc inst + PM->add(createCFGSimplificationPass()); // Clean up disgusting code + if (UnitAtATime) { + PM->add(createGlobalOptimizerPass()); // Optimize out global vars + PM->add(createGlobalDCEPass()); // Remove unused fns and globs + // IP Constant Propagation + PM->add(createIPConstantPropagationPass()); + PM->add(createDeadArgEliminationPass()); // Dead argument elimination } + PM->add(createInstructionCombiningPass()); // Clean up after IPCP & DAE + PM->add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + if (UnitAtATime) { + if (HaveExceptions) + PM->add(createPruneEHPass()); // Remove dead EH info + PM->add(createFunctionAttrsPass()); // Set readonly/readnone attrs + } + if (InliningPass) + PM->add(InliningPass); + if (OptimizationLevel > 2) + PM->add(createArgumentPromotionPass()); // Scalarize uninlined fn args + if (SimplifyLibCalls) + PM->add(createSimplifyLibCallsPass()); // Library Call Optimizations + PM->add(createInstructionCombiningPass()); // Cleanup for scalarrepl. + PM->add(createJumpThreadingPass()); // Thread jumps. + PM->add(createCFGSimplificationPass()); // Merge & remove BBs + PM->add(createScalarReplAggregatesPass()); // Break up aggregate allocas + PM->add(createInstructionCombiningPass()); // Combine silly seq's + PM->add(createCondPropagationPass()); // Propagate conditionals + PM->add(createTailCallEliminationPass()); // Eliminate tail calls + PM->add(createCFGSimplificationPass()); // Merge & remove BBs + PM->add(createReassociatePass()); // Reassociate expressions + PM->add(createLoopRotatePass()); // Rotate Loop + PM->add(createLICMPass()); // Hoist loop invariants + PM->add(createLoopUnswitchPass(OptimizeSize)); + PM->add(createInstructionCombiningPass()); + PM->add(createIndVarSimplifyPass()); // Canonicalize indvars + PM->add(createLoopDeletionPass()); // Delete dead loops + if (UnrollLoops) + PM->add(createLoopUnrollPass()); // Unroll small loops + PM->add(createInstructionCombiningPass()); // Clean up after the unroller + PM->add(createGVNPass()); // Remove redundancies + PM->add(createMemCpyOptPass()); // Remove memcpy / form memset + PM->add(createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + PM->add(createInstructionCombiningPass()); + PM->add(createCondPropagationPass()); // Propagate conditionals + PM->add(createDeadStoreEliminationPass()); // Delete dead stores + PM->add(createAggressiveDCEPass()); // Delete dead instructions + PM->add(createCFGSimplificationPass()); // Merge & remove BBs + + if (UnitAtATime) { + PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + PM->add(createDeadTypeEliminationPass()); // Eliminate dead types + } + + if (OptimizationLevel > 1 && UnitAtATime) + PM->add(createConstantMergePass()); // Merge dup global constants } static inline void addOnePass(PassManager *PM, Pass *P, bool AndVerify) { @@ -230,10 +230,8 @@ namespace llvm { addOnePass(PM, createInstructionCombiningPass(), VerifyEach); addOnePass(PM, createJumpThreadingPass(), VerifyEach); - // Cleanup jump threading. - addOnePass(PM, createPromoteMemoryToRegisterPass(), VerifyEach); - // Delete basic blocks, which optimization passes may have killed... + // Delete basic blocks, which optimization passes may have killed. addOnePass(PM, createCFGSimplificationPass(), VerifyEach); // Now that we have optimized the program, discard unreachable functions. diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h index 98db8e2bf37c9..82e46d42c69dc 100644 --- a/include/llvm/Support/StringPool.h +++ b/include/llvm/Support/StringPool.h @@ -1,4 +1,4 @@ -//===-- StringPool.h - Interned string pool -------------------------------===// +//===-- StringPool.h - Interned string pool ---------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -64,12 +64,7 @@ namespace llvm { /// intern - Adds a string to the pool and returns a reference-counted /// pointer to it. No additional memory is allocated if the string already /// exists in the pool. - PooledStringPtr intern(const char *Begin, const char *End); - - /// intern - Adds a null-terminated string to the pool and returns a - /// reference-counted pointer to it. No additional memory is allocated if - /// the string already exists in the pool. - inline PooledStringPtr intern(const char *Str); + PooledStringPtr intern(const StringRef &Str); /// empty - Checks whether the pool is empty. Returns true if so. /// @@ -139,10 +134,6 @@ namespace llvm { inline bool operator!=(const PooledStringPtr &That) { return S != That.S; } }; - PooledStringPtr StringPool::intern(const char *Str) { - return intern(Str, Str + strlen(Str)); - } - } // End llvm namespace #endif diff --git a/include/llvm/Support/SystemUtils.h b/include/llvm/Support/SystemUtils.h index 9a33fa31f2269..b3d83fc243458 100644 --- a/include/llvm/Support/SystemUtils.h +++ b/include/llvm/Support/SystemUtils.h @@ -15,27 +15,29 @@ #ifndef LLVM_SUPPORT_SYSTEMUTILS_H #define LLVM_SUPPORT_SYSTEMUTILS_H -#include "llvm/System/Program.h" +#include namespace llvm { + class raw_ostream; + namespace sys { class Path; } -/// Determine if the ostream provided is connected to the std::cout and +/// Determine if the raw_ostream provided is connected to the outs() and /// displayed or not (to a console window). If so, generate a warning message /// advising against display of bitcode and return true. Otherwise just return /// false /// @brief Check for output written to a console bool CheckBitcodeOutputToConsole( - std::ostream* stream_to_check, ///< The stream to be checked - bool print_warning = true ///< Control whether warnings are printed + raw_ostream &stream_to_check, ///< The stream to be checked + bool print_warning = true ///< Control whether warnings are printed ); /// FindExecutable - Find a named executable, giving the argv[0] of program -/// being executed. This allows us to find another LLVM tool if it is built into -/// the same directory, but that directory is neither the current directory, nor -/// in the PATH. If the executable cannot be found, return an empty string. +/// being executed. This allows us to find another LLVM tool if it is built in +/// the same directory. If the executable cannot be found, return an +/// empty string. /// @brief Find a named executable. sys::Path FindExecutable(const std::string &ExeName, - const std::string &ProgramPath); + const char *Argv0, void *MainAddr); } // End llvm namespace diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h index b0700c1dadd84..8e28632b7eb82 100644 --- a/include/llvm/Support/TargetFolder.h +++ b/include/llvm/Support/TargetFolder.h @@ -25,21 +25,24 @@ namespace llvm { class TargetData; +class LLVMContext; /// TargetFolder - Create constants with target dependent folding. class TargetFolder { const TargetData *TD; + LLVMContext &Context; /// Fold - Fold the constant using target specific information. Constant *Fold(Constant *C) const { if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *CF = ConstantFoldConstantExpression(CE, TD)) + if (Constant *CF = ConstantFoldConstantExpression(CE, Context, TD)) return CF; return C; } public: - explicit TargetFolder(const TargetData *TheTD) : TD(TheTD) {} + explicit TargetFolder(const TargetData *TheTD, LLVMContext &C) : + TD(TheTD), Context(C) {} //===--------------------------------------------------------------------===// // Binary Operators @@ -48,12 +51,18 @@ public: Constant *CreateAdd(Constant *LHS, Constant *RHS) const { return Fold(ConstantExpr::getAdd(LHS, RHS)); } + Constant *CreateNSWAdd(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getNSWAdd(LHS, RHS)); + } Constant *CreateFAdd(Constant *LHS, Constant *RHS) const { return Fold(ConstantExpr::getFAdd(LHS, RHS)); } Constant *CreateSub(Constant *LHS, Constant *RHS) const { return Fold(ConstantExpr::getSub(LHS, RHS)); } + Constant *CreateNSWSub(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getNSWSub(LHS, RHS)); + } Constant *CreateFSub(Constant *LHS, Constant *RHS) const { return Fold(ConstantExpr::getFSub(LHS, RHS)); } @@ -69,6 +78,9 @@ public: Constant *CreateSDiv(Constant *LHS, Constant *RHS) const { return Fold(ConstantExpr::getSDiv(LHS, RHS)); } + Constant *CreateExactSDiv(Constant *LHS, Constant *RHS) const { + return Fold(ConstantExpr::getExactSDiv(LHS, RHS)); + } Constant *CreateFDiv(Constant *LHS, Constant *RHS) const { return Fold(ConstantExpr::getFDiv(LHS, RHS)); } @@ -132,6 +144,15 @@ public: return Fold(ConstantExpr::getGetElementPtr(C, IdxList, NumIdx)); } + Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList, + unsigned NumIdx) const { + return Fold(ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx)); + } + Constant *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList, + unsigned NumIdx) const { + return Fold(ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx)); + } + //===--------------------------------------------------------------------===// // Cast/Conversion Operators //===--------------------------------------------------------------------===// @@ -176,14 +197,6 @@ public: Constant *RHS) const { return Fold(ConstantExpr::getCompare(P, LHS, RHS)); } - Constant *CreateVICmp(CmpInst::Predicate P, Constant *LHS, - Constant *RHS) const { - return Fold(ConstantExpr::getCompare(P, LHS, RHS)); - } - Constant *CreateVFCmp(CmpInst::Predicate P, Constant *LHS, - Constant *RHS) const { - return Fold(ConstantExpr::getCompare(P, LHS, RHS)); - } //===--------------------------------------------------------------------===// // Other Instructions diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h index 71b7ee58fd3ca..54f1da96cad67 100644 --- a/include/llvm/Support/Timer.h +++ b/include/llvm/Support/Timer.h @@ -19,15 +19,15 @@ #include "llvm/System/Mutex.h" #include #include -#include #include namespace llvm { class TimerGroup; +class raw_ostream; /// Timer - This class is used to track the amount of time spent between -/// invocations of it's startTimer()/stopTimer() methods. Given appropriate OS +/// invocations of its startTimer()/stopTimer() methods. Given appropriate OS /// support it can also keep track of the RSS of the program at various points. /// By default, the Timer will print the amount of time it has captured to /// standard error when the laster timer is destroyed, otherwise it is printed @@ -112,7 +112,7 @@ public: /// print - Print the current timer to standard error, and reset the "Started" /// flag. - void print(const Timer &Total, std::ostream &OS); + void print(const Timer &Total, raw_ostream &OS); private: friend class TimerGroup; diff --git a/include/llvm/Support/TypeBuilder.h b/include/llvm/Support/TypeBuilder.h index b0ae516b815c9..fb22e3f5241d5 100644 --- a/include/llvm/Support/TypeBuilder.h +++ b/include/llvm/Support/TypeBuilder.h @@ -16,6 +16,7 @@ #define LLVM_SUPPORT_TYPEBUILDER_H #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include namespace llvm { @@ -49,15 +50,14 @@ namespace llvm { /// namespace llvm { /// template class TypeBuilder { /// public: -/// static const StructType *get() { -/// // Using the static result variable ensures that the type is -/// // only looked up once. -/// static const StructType *const result = StructType::get( -/// TypeBuilder, xcompile>::get(), -/// TypeBuilder*, xcompile>::get(), -/// TypeBuilder*[], xcompile>::get(), +/// static const StructType *get(LLVMContext &Context) { +/// // If you cache this result, be sure to cache it separately +/// // for each LLVMContext. +/// return StructType::get( +/// TypeBuilder, xcompile>::get(Context), +/// TypeBuilder*, xcompile>::get(Context), +/// TypeBuilder*[], xcompile>::get(Context), /// NULL); -/// return result; /// } /// /// // You may find this a convenient place to put some constants @@ -71,9 +71,6 @@ namespace llvm { /// } /// } // namespace llvm /// -/// Using the static result variable ensures that the type is only looked up -/// once. -/// /// TypeBuilder cannot handle recursive types or types you only know at runtime. /// If you try to give it a recursive type, it will deadlock, infinitely /// recurse, or throw a recursive_init exception. @@ -104,10 +101,8 @@ template class TypeBuilder // Pointers template class TypeBuilder { public: - static const PointerType *get() { - static const PointerType *const result = - PointerType::getUnqual(TypeBuilder::get()); - return result; + static const PointerType *get(LLVMContext &Context) { + return PointerType::getUnqual(TypeBuilder::get(Context)); } }; @@ -117,19 +112,15 @@ template class TypeBuilder {}; // Arrays template class TypeBuilder { public: - static const ArrayType *get() { - static const ArrayType *const result = - ArrayType::get(TypeBuilder::get(), N); - return result; + static const ArrayType *get(LLVMContext &Context) { + return ArrayType::get(TypeBuilder::get(Context), N); } }; /// LLVM uses an array of length 0 to represent an unknown-length array. template class TypeBuilder { public: - static const ArrayType *get() { - static const ArrayType *const result = - ArrayType::get(TypeBuilder::get(), 0); - return result; + static const ArrayType *get(LLVMContext &Context) { + return ArrayType::get(TypeBuilder::get(Context), 0); } }; @@ -158,10 +149,8 @@ public: #define DEFINE_INTEGRAL_TYPEBUILDER(T) \ template<> class TypeBuilder { \ public: \ - static const IntegerType *get() { \ - static const IntegerType *const result = \ - IntegerType::get(sizeof(T) * CHAR_BIT); \ - return result; \ + static const IntegerType *get(LLVMContext &Context) { \ + return IntegerType::get(Context, sizeof(T) * CHAR_BIT); \ } \ }; \ template<> class TypeBuilder { \ @@ -189,53 +178,52 @@ DEFINE_INTEGRAL_TYPEBUILDER(unsigned long long); template class TypeBuilder, cross> { public: - static const IntegerType *get() { - static const IntegerType *const result = IntegerType::get(num_bits); - return result; + static const IntegerType *get(LLVMContext &C) { + return IntegerType::get(C, num_bits); } }; template<> class TypeBuilder { public: - static const Type *get() { - return Type::FloatTy; + static const Type *get(LLVMContext& C) { + return Type::getFloatTy(C); } }; template<> class TypeBuilder {}; template<> class TypeBuilder { public: - static const Type *get() { - return Type::DoubleTy; + static const Type *get(LLVMContext& C) { + return Type::getDoubleTy(C); } }; template<> class TypeBuilder {}; template class TypeBuilder { public: - static const Type *get() { return Type::FloatTy; } + static const Type *get(LLVMContext& C) { return Type::getFloatTy(C); } }; template class TypeBuilder { public: - static const Type *get() { return Type::DoubleTy; } + static const Type *get(LLVMContext& C) { return Type::getDoubleTy(C); } }; template class TypeBuilder { public: - static const Type *get() { return Type::X86_FP80Ty; } + static const Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); } }; template class TypeBuilder { public: - static const Type *get() { return Type::FP128Ty; } + static const Type *get(LLVMContext& C) { return Type::getFP128Ty(C); } }; template class TypeBuilder { public: - static const Type *get() { return Type::PPC_FP128Ty; } + static const Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); } }; template class TypeBuilder { public: - static const Type *get() { - return Type::VoidTy; + static const Type *get(LLVMContext &C) { + return Type::getVoidTy(C); } }; @@ -246,64 +234,43 @@ template<> class TypeBuilder template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { - return FunctionType::get(TypeBuilder::get(), false); + static const FunctionType *get(LLVMContext &Context) { + return FunctionType::get(TypeBuilder::get(Context), false); } }; template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(1); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, false); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, false); } }; template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(2); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, false); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, false); } }; template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(3); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, false); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, false); } }; @@ -311,20 +278,15 @@ template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(4); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, false); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, false); } }; @@ -332,85 +294,58 @@ template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(5); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, false); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, false); } }; template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { - return FunctionType::get(TypeBuilder::get(), true); + static const FunctionType *get(LLVMContext &Context) { + return FunctionType::get(TypeBuilder::get(Context), true); } }; template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(1); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, true); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), params, true); } }; template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(2); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, true); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, true); } }; template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(3); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, true); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, true); } }; @@ -418,20 +353,15 @@ template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(4); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, true); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, true); } }; @@ -439,21 +369,16 @@ template class TypeBuilder { public: - static const FunctionType *get() { - static const FunctionType *const result = create(); - return result; - } - -private: - static const FunctionType *create() { + static const FunctionType *get(LLVMContext &Context) { std::vector params; params.reserve(5); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - params.push_back(TypeBuilder::get()); - return FunctionType::get(TypeBuilder::get(), params, true); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + params.push_back(TypeBuilder::get(Context)); + return FunctionType::get(TypeBuilder::get(Context), + params, true); } }; diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h index a97a5e88142d7..e6363ffea9809 100644 --- a/include/llvm/Support/ValueHandle.h +++ b/include/llvm/Support/ValueHandle.h @@ -14,6 +14,7 @@ #ifndef LLVM_SUPPORT_VALUEHANDLE_H #define LLVM_SUPPORT_VALUEHANDLE_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Value.h" @@ -44,73 +45,87 @@ protected: /// fully general Callback version does have a vtable. enum HandleBaseKind { Assert, - Weak, - Callback + Callback, + Tracking, + Weak }; private: - + PointerIntPair PrevPair; ValueHandleBase *Next; Value *VP; + + explicit ValueHandleBase(const ValueHandleBase&); // DO NOT IMPLEMENT. public: explicit ValueHandleBase(HandleBaseKind Kind) : PrevPair(0, Kind), Next(0), VP(0) {} ValueHandleBase(HandleBaseKind Kind, Value *V) : PrevPair(0, Kind), Next(0), VP(V) { - if (V) + if (isValid(VP)) AddToUseList(); } ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS) : PrevPair(0, Kind), Next(0), VP(RHS.VP) { - if (VP) + if (isValid(VP)) AddToExistingUseList(RHS.getPrevPtr()); } ~ValueHandleBase() { - if (VP) - RemoveFromUseList(); + if (isValid(VP)) + RemoveFromUseList(); } - + Value *operator=(Value *RHS) { if (VP == RHS) return RHS; - if (VP) RemoveFromUseList(); + if (isValid(VP)) RemoveFromUseList(); VP = RHS; - if (VP) AddToUseList(); + if (isValid(VP)) AddToUseList(); return RHS; } Value *operator=(const ValueHandleBase &RHS) { if (VP == RHS.VP) return RHS.VP; - if (VP) RemoveFromUseList(); + if (isValid(VP)) RemoveFromUseList(); VP = RHS.VP; - if (VP) AddToExistingUseList(RHS.getPrevPtr()); + if (isValid(VP)) AddToExistingUseList(RHS.getPrevPtr()); return VP; } - + Value *operator->() const { return getValPtr(); } Value &operator*() const { return *getValPtr(); } protected: Value *getValPtr() const { return VP; } + static bool isValid(Value *V) { + return V && + V != DenseMapInfo::getEmptyKey() && + V != DenseMapInfo::getTombstoneKey(); + } + private: // Callbacks made from Value. static void ValueIsDeleted(Value *V); static void ValueIsRAUWd(Value *Old, Value *New); - + // Internal implementation details. ValueHandleBase **getPrevPtr() const { return PrevPair.getPointer(); } HandleBaseKind getKind() const { return PrevPair.getInt(); } void setPrevPtr(ValueHandleBase **Ptr) { PrevPair.setPointer(Ptr); } - - /// AddToUseList - Add this ValueHandle to the use list for VP, where List is - /// known to point into the existing use list. + + /// AddToExistingUseList - Add this ValueHandle to the use list for VP, where + /// List is the address of either the head of the list or a Next node within + /// the existing use list. void AddToExistingUseList(ValueHandleBase **List); - + + /// AddToExistingUseListAfter - Add this ValueHandle to the use list after + /// Node. + void AddToExistingUseListAfter(ValueHandleBase *Node); + /// AddToUseList - Add this ValueHandle to the use list for VP. void AddToUseList(); /// RemoveFromUseList - Remove this ValueHandle from its current use list. void RemoveFromUseList(); }; - + /// WeakVH - This is a value handle that tries hard to point to a Value, even /// across RAUW operations, but will null itself out if the value is destroyed. /// this is useful for advisory sorts of information, but should not be used as @@ -123,6 +138,13 @@ public: WeakVH(const WeakVH &RHS) : ValueHandleBase(Weak, RHS) {} + Value *operator=(Value *RHS) { + return ValueHandleBase::operator=(RHS); + } + Value *operator=(const ValueHandleBase &RHS) { + return ValueHandleBase::operator=(RHS); + } + operator Value*() const { return getValPtr(); } @@ -153,7 +175,7 @@ template<> struct simplify_type : public simplify_type {}; /// AssertingVH's as it moves. This is required because in non-assert mode this /// class turns into a trivial wrapper around a pointer. template -class AssertingVH +class AssertingVH #ifndef NDEBUG : public ValueHandleBase #endif @@ -164,7 +186,7 @@ class AssertingVH return static_cast(ValueHandleBase::getValPtr()); } void setValPtr(ValueTy *P) { - ValueHandleBase::operator=(P); + ValueHandleBase::operator=(GetAsValue(P)); } #else ValueTy *ThePtr; @@ -172,10 +194,15 @@ class AssertingVH void setValPtr(ValueTy *P) { ThePtr = P; } #endif + // Convert a ValueTy*, which may be const, to the type the base + // class expects. + static Value *GetAsValue(Value *V) { return V; } + static Value *GetAsValue(const Value *V) { return const_cast(V); } + public: #ifndef NDEBUG AssertingVH() : ValueHandleBase(Assert) {} - AssertingVH(ValueTy *P) : ValueHandleBase(Assert, P) {} + AssertingVH(ValueTy *P) : ValueHandleBase(Assert, GetAsValue(P)) {} AssertingVH(const AssertingVH &RHS) : ValueHandleBase(Assert, RHS) {} #else AssertingVH() : ThePtr(0) {} @@ -190,7 +217,7 @@ public: setValPtr(RHS); return getValPtr(); } - ValueTy *operator=(AssertingVH &RHS) { + ValueTy *operator=(const AssertingVH &RHS) { setValPtr(RHS.getValPtr()); return getValPtr(); } @@ -211,6 +238,88 @@ template<> struct simplify_type > { template<> struct simplify_type > : public simplify_type > {}; +/// TrackingVH - This is a value handle that tracks a Value (or Value subclass), +/// even across RAUW operations. +/// +/// TrackingVH is designed for situations where a client needs to hold a handle +/// to a Value (or subclass) across some operations which may move that value, +/// but should never destroy it or replace it with some unacceptable type. +/// +/// It is an error to do anything with a TrackingVH whose value has been +/// destroyed, except to destruct it. +/// +/// It is an error to attempt to replace a value with one of a type which is +/// incompatible with any of its outstanding TrackingVHs. +template +class TrackingVH : public ValueHandleBase { + void CheckValidity() const { + Value *VP = ValueHandleBase::getValPtr(); + + // Null is always ok. + if (!VP) + return; + + // Check that this value is valid (i.e., it hasn't been deleted). We + // explicitly delay this check until access to avoid requiring clients to be + // unnecessarily careful w.r.t. destruction. + assert(ValueHandleBase::isValid(VP) && "Tracked Value was deleted!"); + + // Check that the value is a member of the correct subclass. We would like + // to check this property on assignment for better debugging, but we don't + // want to require a virtual interface on this VH. Instead we allow RAUW to + // replace this value with a value of an invalid type, and check it here. + assert(isa(VP) && + "Tracked Value was replaced by one with an invalid type!"); + } + + ValueTy *getValPtr() const { + CheckValidity(); + return static_cast(ValueHandleBase::getValPtr()); + } + void setValPtr(ValueTy *P) { + CheckValidity(); + ValueHandleBase::operator=(GetAsValue(P)); + } + + // Convert a ValueTy*, which may be const, to the type the base + // class expects. + static Value *GetAsValue(Value *V) { return V; } + static Value *GetAsValue(const Value *V) { return const_cast(V); } + +public: + TrackingVH() : ValueHandleBase(Tracking) {} + TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, P) {} + TrackingVH(const TrackingVH &RHS) : ValueHandleBase(Tracking, RHS) {} + + operator ValueTy*() const { + return getValPtr(); + } + + ValueTy *operator=(ValueTy *RHS) { + setValPtr(RHS); + return getValPtr(); + } + ValueTy *operator=(const TrackingVH &RHS) { + setValPtr(RHS.getValPtr()); + return getValPtr(); + } + + ValueTy *operator->() const { return getValPtr(); } + ValueTy &operator*() const { return *getValPtr(); } +}; + +// Specialize simplify_type to allow TrackingVH to participate in +// dyn_cast, isa, etc. +template struct simplify_type; +template<> struct simplify_type > { + typedef Value* SimpleType; + static SimpleType getSimplifiedValue(const TrackingVH &AVH) { + return static_cast(AVH); + } +}; +template<> struct simplify_type > + : public simplify_type > {}; + /// CallbackVH - This is a value handle that allows subclasses to define /// callbacks that run when the underlying Value has RAUW called on it or is /// destroyed. This class can be used as the key of a map, as long as the user diff --git a/include/llvm/Support/raw_os_ostream.h b/include/llvm/Support/raw_os_ostream.h new file mode 100644 index 0000000000000..e0978b238e31c --- /dev/null +++ b/include/llvm/Support/raw_os_ostream.h @@ -0,0 +1,42 @@ +//===- raw_os_ostream.h - std::ostream adaptor for raw_ostream --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the raw_os_ostream class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_RAW_OS_OSTREAM_H +#define LLVM_SUPPORT_RAW_OS_OSTREAM_H + +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + +/// raw_os_ostream - A raw_ostream that writes to an std::ostream. This is a +/// simple adaptor class. It does not check for output errors; clients should +/// use the underlying stream to detect errors. +class raw_os_ostream : public raw_ostream { + std::ostream &OS; + + /// write_impl - See raw_ostream::write_impl. + virtual void write_impl(const char *Ptr, size_t Size); + + /// current_pos - Return the current position within the stream, not + /// counting the bytes currently in the buffer. + virtual uint64_t current_pos(); + +public: + raw_os_ostream(std::ostream &O) : OS(O) {} + ~raw_os_ostream(); +}; + +} // end llvm namespace + +#endif diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index 8242f04e23ce4..7827dd83804b5 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -14,11 +14,8 @@ #ifndef LLVM_SUPPORT_RAW_OSTREAM_H #define LLVM_SUPPORT_RAW_OSTREAM_H -#include "llvm/ADT/StringExtras.h" -#include -#include -#include -#include +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" namespace llvm { class format_object_base; @@ -31,18 +28,39 @@ namespace llvm { /// a chunk at a time. class raw_ostream { private: + // Do not implement. raw_ostream is noncopyable. + void operator=(const raw_ostream &); + raw_ostream(const raw_ostream &); + /// The buffer is handled in such a way that the buffer is /// uninitialized, unbuffered, or out of space when OutBufCur >= /// OutBufEnd. Thus a single comparison suffices to determine if we /// need to take the slow path to write a single character. /// /// The buffer is in one of three states: - /// 1. Unbuffered (Unbuffered == true) - /// 1. Uninitialized (Unbuffered == false && OutBufStart == 0). - /// 2. Buffered (Unbuffered == false && OutBufStart != 0 && - /// OutBufEnd - OutBufStart >= 64). + /// 1. Unbuffered (BufferMode == Unbuffered) + /// 1. Uninitialized (BufferMode != Unbuffered && OutBufStart == 0). + /// 2. Buffered (BufferMode != Unbuffered && OutBufStart != 0 && + /// OutBufEnd - OutBufStart >= 1). + /// + /// If buffered, then the raw_ostream owns the buffer if (BufferMode == + /// InternalBuffer); otherwise the buffer has been set via SetBuffer and is + /// managed by the subclass. + /// + /// If a subclass installs an external buffer using SetBuffer then it can wait + /// for a \see write_impl() call to handle the data which has been put into + /// this buffer. char *OutBufStart, *OutBufEnd, *OutBufCur; - bool Unbuffered; + + enum BufferKind { + Unbuffered = 0, + InternalBuffer, + ExternalBuffer + } BufferMode; + + /// Error This flag is true if an error of any kind has been detected. + /// + bool Error; public: // color order matches ANSI escape sequence, don't change @@ -58,49 +76,66 @@ public: SAVEDCOLOR }; - explicit raw_ostream(bool unbuffered=false) : Unbuffered(unbuffered) { + explicit raw_ostream(bool unbuffered=false) + : BufferMode(unbuffered ? Unbuffered : InternalBuffer), Error(false) { // Start out ready to flush. OutBufStart = OutBufEnd = OutBufCur = 0; } - virtual ~raw_ostream() { - delete [] OutBufStart; - } + virtual ~raw_ostream(); /// tell - Return the current offset with the file. uint64_t tell() { return current_pos() + GetNumBytesInBuffer(); } + /// has_error - Return the value of the flag in this raw_ostream indicating + /// whether an output error has been encountered. + bool has_error() const { + return Error; + } + + /// clear_error - Set the flag read by has_error() to false. If the error + /// flag is set at the time when this raw_ostream's destructor is called, + /// llvm_report_error is called to report the error. Use clear_error() + /// after handling the error to avoid this behavior. + void clear_error() { + Error = false; + } + //===--------------------------------------------------------------------===// // Configuration Interface //===--------------------------------------------------------------------===// - /// SetBufferSize - Set the internal buffer size to the specified amount - /// instead of the default. - void SetBufferSize(unsigned Size=4096) { - assert(Size >= 64 && - "Buffer size must be somewhat large for invariants to hold"); + /// SetBuffered - Set the stream to be buffered, with an automatically + /// determined buffer size. + void SetBuffered(); + + /// SetBufferSize - Set the stream to be buffered, using the + /// specified buffer size. + void SetBufferSize(size_t Size) { flush(); + SetBufferAndMode(new char[Size], Size, InternalBuffer); + } - delete [] OutBufStart; - OutBufStart = new char[Size]; - OutBufEnd = OutBufStart+Size; - OutBufCur = OutBufStart; - Unbuffered = false; + size_t GetBufferSize() { + // If we're supposed to be buffered but haven't actually gotten around + // to allocating the buffer yet, return the value that would be used. + if (BufferMode != Unbuffered && OutBufStart == 0) + return preferred_buffer_size(); + + // Otherwise just return the size of the allocated buffer. + return OutBufEnd - OutBufStart; } - /// SetUnbuffered - Set the streams buffering status. When - /// unbuffered the stream will flush after every write. This routine + /// SetUnbuffered - Set the stream to be unbuffered. When + /// unbuffered, the stream will flush after every write. This routine /// will also flush the buffer immediately when the stream is being /// set to unbuffered. void SetUnbuffered() { flush(); - - delete [] OutBufStart; - OutBufStart = OutBufEnd = OutBufCur = 0; - Unbuffered = true; + SetBufferAndMode(0, 0, Unbuffered); } - unsigned GetNumBytesInBuffer() const { + size_t GetNumBytesInBuffer() const { return OutBufCur - OutBufStart; } @@ -134,22 +169,29 @@ public: return *this; } - raw_ostream &operator<<(const char *Str) { - // Inline fast path, particulary for constant strings where a - // sufficiently smart compiler will simplify strlen. - - unsigned Size = strlen(Str); + raw_ostream &operator<<(const StringRef &Str) { + // Inline fast path, particularly for strings with a known length. + size_t Size = Str.size(); // Make sure we can use the fast path. if (OutBufCur+Size > OutBufEnd) - return write(Str, Size); + return write(Str.data(), Size); - memcpy(OutBufCur, Str, Size); + memcpy(OutBufCur, Str.data(), Size); OutBufCur += Size; return *this; } - raw_ostream &operator<<(const std::string& Str) { + raw_ostream &operator<<(const char *Str) { + // Inline fast path, particulary for constant strings where a sufficiently + // smart compiler will simplify strlen. + + this->operator<<(StringRef(Str)); + return *this; + } + + raw_ostream &operator<<(const std::string &Str) { + // Avoid the fast path, it would only increase code size for a marginal win. write(Str.data(), Str.length()); return *this; } @@ -169,17 +211,21 @@ public: return *this; } - raw_ostream &operator<<(double N) { - this->operator<<(ftostr(N)); - return *this; - } + raw_ostream &operator<<(double N); + + /// write_hex - Output \arg N in hexadecimal, without any prefix or padding. + raw_ostream &write_hex(unsigned long long N); raw_ostream &write(unsigned char C); - raw_ostream &write(const char *Ptr, unsigned Size); + raw_ostream &write(const char *Ptr, size_t Size); // Formatted output, see the format() function in Support/Format.h. raw_ostream &operator<<(const format_object_base &Fmt); + /// indent - Insert 'NumSpaces' spaces. + raw_ostream &indent(unsigned NumSpaces); + + /// Changes the foreground color of text that will be output from this point /// forward. /// @param colors ANSI color to use, the special SAVEDCOLOR can be used to @@ -194,6 +240,11 @@ public: /// outputting colored text, or before program exit. virtual raw_ostream &resetColor() { return *this; } + /// This function determines if this stream is connected to a "tty" or + /// "console" window. That is, the output would be displayed to the user + /// rather than being put on a pipe or stored in a file. + virtual bool is_displayed() const { return false; } + //===--------------------------------------------------------------------===// // Subclass Interface //===--------------------------------------------------------------------===// @@ -203,8 +254,15 @@ private: /// by subclasses. This writes the \args Size bytes starting at /// \arg Ptr to the underlying stream. /// + /// This function is guaranteed to only be called at a point at which it is + /// safe for the subclass to install a new buffer via SetBuffer. + /// + /// \arg Ptr - The start of the data to be written. For buffered streams this + /// is guaranteed to be the start of the buffer. + /// \arg Size - The number of bytes to be written. + /// /// \invariant { Size > 0 } - virtual void write_impl(const char *Ptr, unsigned Size) = 0; + virtual void write_impl(const char *Ptr, size_t Size) = 0; // An out of line virtual method to provide a home for the class vtable. virtual void handle(); @@ -213,14 +271,42 @@ private: /// counting the bytes currently in the buffer. virtual uint64_t current_pos() = 0; +protected: + /// SetBuffer - Use the provided buffer as the raw_ostream buffer. This is + /// intended for use only by subclasses which can arrange for the output to go + /// directly into the desired output buffer, instead of being copied on each + /// flush. + void SetBuffer(char *BufferStart, size_t Size) { + SetBufferAndMode(BufferStart, Size, ExternalBuffer); + } + + /// preferred_buffer_size - Return an efficient buffer size for the + /// underlying output mechanism. + virtual size_t preferred_buffer_size(); + + /// error_detected - Set the flag indicating that an output error has + /// been encountered. + void error_detected() { Error = true; } + + /// getBufferStart - Return the beginning of the current stream buffer, or 0 + /// if the stream is unbuffered. + const char *getBufferStart() const { return OutBufStart; } + //===--------------------------------------------------------------------===// // Private Interface //===--------------------------------------------------------------------===// private: + /// SetBufferAndMode - Install the given buffer and mode. + void SetBufferAndMode(char *BufferStart, size_t Size, BufferKind Mode); + /// flush_nonempty - Flush the current buffer, which is known to be /// non-empty. This outputs the currently buffered data and resets /// the buffer to empty. void flush_nonempty(); + + /// copy_to_buffer - Copy data into the buffer. Size must not be + /// greater than the number of unused bytes in the buffer. + void copy_to_buffer(const char *Ptr, size_t Size); }; //===----------------------------------------------------------------------===// @@ -235,23 +321,41 @@ class raw_fd_ostream : public raw_ostream { uint64_t pos; /// write_impl - See raw_ostream::write_impl. - virtual void write_impl(const char *Ptr, unsigned Size); + virtual void write_impl(const char *Ptr, size_t Size); /// current_pos - Return the current position within the stream, not /// counting the bytes currently in the buffer. virtual uint64_t current_pos() { return pos; } + /// preferred_buffer_size - Determine an efficient buffer size. + virtual size_t preferred_buffer_size(); + public: - /// raw_fd_ostream - Open the specified file for writing. If an - /// error occurs, information about the error is put into ErrorInfo, - /// and the stream should be immediately destroyed; the string will - /// be empty if no error occurred. + + enum { + /// F_Excl - When opening a file, this flag makes raw_fd_ostream + /// report an error if the file already exists. + F_Excl = 1, + + /// F_Append - When opening a file, if it already exists append to the + /// existing file instead of returning an error. This may not be specified + /// with F_Excl. + F_Append = 2, + + /// F_Binary - The file should be opened in binary mode on platforms that + /// make this distinction. + F_Binary = 4 + }; + + /// raw_fd_ostream - Open the specified file for writing. If an error occurs, + /// information about the error is put into ErrorInfo, and the stream should + /// be immediately destroyed; the string will be empty if no error occurred. + /// This allows optional flags to control how the file will be opened. /// /// \param Filename - The file to open. If this is "-" then the /// stream will use stdout instead. - /// \param Binary - The file should be opened in binary mode on - /// platforms that support this distinction. - raw_fd_ostream(const char *Filename, bool Binary, std::string &ErrorInfo); + raw_fd_ostream(const char *Filename, std::string &ErrorInfo, + unsigned Flags = 0); /// raw_fd_ostream ctor - FD is the file descriptor that this writes to. If /// ShouldClose is true, this closes the file when the stream is destroyed. @@ -264,9 +368,6 @@ public: /// close - Manually flush the stream and close the file. void close(); - /// tell - Return the current offset with the file. - uint64_t tell() { return pos + GetNumBytesInBuffer(); } - /// seek - Flushes the stream and repositions the underlying file descriptor /// positition to the offset specified from the beginning of the file. uint64_t seek(uint64_t off); @@ -274,6 +375,8 @@ public: virtual raw_ostream &changeColor(enum Colors colors, bool bold=false, bool bg=false); virtual raw_ostream &resetColor(); + + virtual bool is_displayed() const; }; /// raw_stdout_ostream - This is a stream that always prints to stdout. @@ -302,49 +405,29 @@ raw_ostream &outs(); /// Use it like: errs() << "foo" << "bar"; raw_ostream &errs(); +/// nulls() - This returns a reference to a raw_ostream which simply discards +/// output. +raw_ostream &nulls(); //===----------------------------------------------------------------------===// // Output Stream Adaptors //===----------------------------------------------------------------------===// -/// raw_os_ostream - A raw_ostream that writes to an std::ostream. This is a -/// simple adaptor class. -class raw_os_ostream : public raw_ostream { - std::ostream &OS; - - /// write_impl - See raw_ostream::write_impl. - virtual void write_impl(const char *Ptr, unsigned Size); - - /// current_pos - Return the current position within the stream, not - /// counting the bytes currently in the buffer. - virtual uint64_t current_pos(); - -public: - raw_os_ostream(std::ostream &O) : OS(O) {} - ~raw_os_ostream(); - - /// tell - Return the current offset with the stream. - uint64_t tell(); -}; - /// raw_string_ostream - A raw_ostream that writes to an std::string. This is a -/// simple adaptor class. +/// simple adaptor class. This class does not encounter output errors. class raw_string_ostream : public raw_ostream { std::string &OS; /// write_impl - See raw_ostream::write_impl. - virtual void write_impl(const char *Ptr, unsigned Size); + virtual void write_impl(const char *Ptr, size_t Size); /// current_pos - Return the current position within the stream, not /// counting the bytes currently in the buffer. virtual uint64_t current_pos() { return OS.size(); } public: - raw_string_ostream(std::string &O) : OS(O) {} + explicit raw_string_ostream(std::string &O) : OS(O) {} ~raw_string_ostream(); - /// tell - Return the current offset with the stream. - uint64_t tell() { return OS.size() + GetNumBytesInBuffer(); } - /// str - Flushes the stream contents to the target string and returns /// the string's reference. std::string& str() { @@ -354,22 +437,42 @@ public: }; /// raw_svector_ostream - A raw_ostream that writes to an SmallVector or -/// SmallString. This is a simple adaptor class. +/// SmallString. This is a simple adaptor class. This class does not +/// encounter output errors. class raw_svector_ostream : public raw_ostream { SmallVectorImpl &OS; /// write_impl - See raw_ostream::write_impl. - virtual void write_impl(const char *Ptr, unsigned Size); + virtual void write_impl(const char *Ptr, size_t Size); /// current_pos - Return the current position within the stream, not /// counting the bytes currently in the buffer. virtual uint64_t current_pos(); public: - raw_svector_ostream(SmallVectorImpl &O) : OS(O) {} + /// Construct a new raw_svector_ostream. + /// + /// \arg O - The vector to write to; this should generally have at least 128 + /// bytes free to avoid any extraneous memory overhead. + explicit raw_svector_ostream(SmallVectorImpl &O); ~raw_svector_ostream(); - /// tell - Return the current offset with the stream. - uint64_t tell(); + /// str - Flushes the stream contents to the target vector and return a + /// StringRef for the vector contents. + StringRef str(); +}; + +/// raw_null_ostream - A raw_ostream that discards all output. +class raw_null_ostream : public raw_ostream { + /// write_impl - See raw_ostream::write_impl. + virtual void write_impl(const char *Ptr, size_t size); + + /// current_pos - Return the current position within the stream, not + /// counting the bytes currently in the buffer. + virtual uint64_t current_pos(); + +public: + explicit raw_null_ostream() {} + ~raw_null_ostream(); }; } // end llvm namespace diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h index 5000a8b859b8c..5f799b850de1c 100644 --- a/include/llvm/Support/type_traits.h +++ b/include/llvm/Support/type_traits.h @@ -35,7 +35,7 @@ namespace dont_use // important to make the is_class::value idiom zero cost. it // evaluates to a constant 1 or 0 depending on whether the // parameter T is a class or not (respectively). - template char is_class_helper(void(T::*)(void)); + template char is_class_helper(void(T::*)()); template double is_class_helper(...); } @@ -49,6 +49,44 @@ struct is_class enum { value = sizeof(char) == sizeof(dont_use::is_class_helper(0)) }; }; +/// \brief Metafunction that determines whether the two given types are +/// equivalent. +template +struct is_same { + static const bool value = false; +}; + +template +struct is_same { + static const bool value = true; +}; + +// enable_if_c - Enable/disable a template based on a metafunction +template +struct enable_if_c { + typedef T type; +}; + +template struct enable_if_c { }; + +// enable_if - Enable/disable a template based on a metafunction +template +struct enable_if : public enable_if_c { }; + +namespace dont_use { + template char base_of_helper(const volatile Base*); + template double base_of_helper(...); +} + +/// is_base_of - Metafunction to determine whether one type is a base class of +/// (or identical to) another type. +template +struct is_base_of { + static const bool value + = is_class::value && is_class::value && + sizeof(char) == sizeof(dont_use::base_of_helper((Derived*)0)); +}; + } #endif diff --git a/include/llvm/SymbolTableListTraits.h b/include/llvm/SymbolTableListTraits.h index 337b76f76680d..39953e1a58099 100644 --- a/include/llvm/SymbolTableListTraits.h +++ b/include/llvm/SymbolTableListTraits.h @@ -28,7 +28,8 @@ #include "llvm/ADT/ilist.h" namespace llvm { - +class ValueSymbolTable; + template class ilist_iterator; template class iplist; template struct ilist_traits; diff --git a/include/llvm/System/Alarm.h b/include/llvm/System/Alarm.h index 9535d23f812ca..7c284167c2ce2 100644 --- a/include/llvm/System/Alarm.h +++ b/include/llvm/System/Alarm.h @@ -39,7 +39,8 @@ namespace sys { /// @returns -1=cancelled, 0=untriggered, 1=triggered int AlarmStatus(); - /// Sleep for n seconds. + /// Sleep for n seconds. Warning: mixing calls to Sleep() and other *Alarm + /// calls may be a bad idea on some platforms (source: Linux man page). /// @returns nothing. void Sleep(unsigned n); diff --git a/include/llvm/System/Disassembler.h b/include/llvm/System/Disassembler.h index d1d8a81007e32..6d1cc0fdcb50c 100644 --- a/include/llvm/System/Disassembler.h +++ b/include/llvm/System/Disassembler.h @@ -23,7 +23,7 @@ namespace sys { /// This function returns true, if there is possible to use some external /// disassembler library. False otherwise. -bool hasDisassembler(void); +bool hasDisassembler(); /// This function provides some "glue" code to call external disassembler /// libraries. diff --git a/include/llvm/System/DynamicLibrary.h b/include/llvm/System/DynamicLibrary.h index 409a9d279c2da..ac58407a194d6 100644 --- a/include/llvm/System/DynamicLibrary.h +++ b/include/llvm/System/DynamicLibrary.h @@ -14,7 +14,6 @@ #ifndef LLVM_SYSTEM_DYNAMIC_LIBRARY_H #define LLVM_SYSTEM_DYNAMIC_LIBRARY_H -#include "llvm/System/Path.h" #include namespace llvm { @@ -30,66 +29,55 @@ namespace sys { /// but rather the main program itself, useful on Windows where the main /// executable cannot be searched. class DynamicLibrary { - /// @name Constructors - /// @{ - public: - /// Construct a DynamicLibrary that represents the currently executing - /// program. The program must have been linked with -export-dynamic or - /// -dlopen self for this to work. - /// @throws std::string indicating why the program couldn't be opened. - /// @brief Open program as dynamic library. - DynamicLibrary(); + DynamicLibrary(); // DO NOT IMPLEMENT + public: + /// This function allows a library to be loaded without instantiating a + /// DynamicLibrary object. Consequently, it is marked as being permanent + /// and will only be unloaded when the program terminates. This returns + /// false on success or returns true and fills in *ErrMsg on failure. + /// @brief Open a dynamic library permanently. + /// + /// NOTE: This function is not thread safe. + /// + static bool LoadLibraryPermanently(const char *filename, + std::string *ErrMsg = 0); - /// After destruction, the symbols of the library will no longer be - /// available to the program. - /// @brief Closes the DynamicLibrary - ~DynamicLibrary(); + /// This function will search through all previously loaded dynamic + /// libraries for the symbol \p symbolName. If it is found, the addressof + /// that symbol is returned. If not, null is returned. Note that this will + /// search permanently loaded libraries (LoadLibraryPermanently) as well + /// as ephemerally loaded libraries (constructors). + /// @throws std::string on error. + /// @brief Search through libraries for address of a symbol + /// + /// NOTE: This function is not thread safe. + /// + static void *SearchForAddressOfSymbol(const char *symbolName); - /// @} - /// @name Functions - /// @{ - public: - /// This function allows a library to be loaded without instantiating a - /// DynamicLibrary object. Consequently, it is marked as being permanent - /// and will only be unloaded when the program terminates. This returns - /// false on success or returns true and fills in *ErrMsg on failure. - /// @brief Open a dynamic library permanently. - static bool LoadLibraryPermanently(const char* filename, - std::string *ErrMsg = 0); + /// @brief Convenience function for C++ophiles. + /// + /// NOTE: This function is not thread safe. + /// + static void *SearchForAddressOfSymbol(const std::string &symbolName) { + return SearchForAddressOfSymbol(symbolName.c_str()); + } - /// This function will search through all previously loaded dynamic - /// libraries for the symbol \p symbolName. If it is found, the addressof - /// that symbol is returned. If not, null is returned. Note that this will - /// search permanently loaded libraries (LoadLibraryPermanently) as well - /// as ephemerally loaded libraries (constructors). - /// @throws std::string on error. - /// @brief Search through libraries for address of a symbol - static void* SearchForAddressOfSymbol(const char* symbolName); + /// This functions permanently adds the symbol \p symbolName with the + /// value \p symbolValue. These symbols are searched before any + /// libraries. + /// @brief Add searchable symbol/value pair. + /// + /// NOTE: This function is not thread safe. + /// + static void AddSymbol(const char *symbolName, void *symbolValue); - /// @brief Convenience function for C++ophiles. - static void* SearchForAddressOfSymbol(const std::string& symbolName) { - return SearchForAddressOfSymbol(symbolName.c_str()); - } - - /// This functions permanently adds the symbol \p symbolName with the - /// value \p symbolValue. These symbols are searched before any - /// libraries. - /// @brief Add searchable symbol/value pair. - static void AddSymbol(const char* symbolName, void *symbolValue); - - /// @brief Convenience function for C++ophiles. - static void AddSymbol(const std::string& symbolName, void *symbolValue) { - AddSymbol(symbolName.c_str(), symbolValue); - } - - /// @} - /// @name Implementation - /// @{ - protected: - void* handle; // Opaque handle for information about the library - DynamicLibrary(const DynamicLibrary&); ///< Do not implement - DynamicLibrary& operator=(const DynamicLibrary&); ///< Do not implement - /// @} + /// @brief Convenience function for C++ophiles. + /// + /// NOTE: This function is not thread safe. + /// + static void AddSymbol(const std::string &symbolName, void *symbolValue) { + AddSymbol(symbolName.c_str(), symbolValue); + } }; } // End sys namespace diff --git a/include/llvm/System/Memory.h b/include/llvm/System/Memory.h index 136dc8a32895a..d6300db5a9e2c 100644 --- a/include/llvm/System/Memory.h +++ b/include/llvm/System/Memory.h @@ -14,6 +14,7 @@ #ifndef LLVM_SYSTEM_MEMORY_H #define LLVM_SYSTEM_MEMORY_H +#include "llvm/Support/DataTypes.h" #include namespace llvm { @@ -26,11 +27,13 @@ namespace sys { /// @brief Memory block abstraction. class MemoryBlock { public: + MemoryBlock() { } + MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { } void *base() const { return Address; } - unsigned size() const { return Size; } + size_t size() const { return Size; } private: void *Address; ///< Address of first byte of memory area - unsigned Size; ///< Size, in bytes of the memory area + size_t Size; ///< Size, in bytes of the memory area friend class Memory; }; @@ -50,7 +53,7 @@ namespace sys { /// a null memory block and fills in *ErrMsg. /// /// @brief Allocate Read/Write/Execute memory. - static MemoryBlock AllocateRWX(unsigned NumBytes, + static MemoryBlock AllocateRWX(size_t NumBytes, const MemoryBlock *NearBlock, std::string *ErrMsg = 0); diff --git a/include/llvm/System/Mutex.h b/include/llvm/System/Mutex.h index d2c457dbc91c6..71d10067c3034 100644 --- a/include/llvm/System/Mutex.h +++ b/include/llvm/System/Mutex.h @@ -93,32 +93,36 @@ namespace llvm MutexImpl(rec), acquired(0), recursive(rec) { } bool acquire() { - if (!mt_only || llvm_is_multithreaded()) + if (!mt_only || llvm_is_multithreaded()) { return MutexImpl::acquire(); - - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - assert((recursive || acquired == 0) && "Lock already acquired!!"); - ++acquired; - return true; + } else { + // Single-threaded debugging code. This would be racy in + // multithreaded mode, but provides not sanity checks in single + // threaded mode. + assert((recursive || acquired == 0) && "Lock already acquired!!"); + ++acquired; + return true; + } } bool release() { - if (!mt_only || llvm_is_multithreaded()) + if (!mt_only || llvm_is_multithreaded()) { return MutexImpl::release(); - - // Single-threaded debugging code. This would be racy in multithreaded - // mode, but provides not sanity checks in single threaded mode. - assert(((recursive && acquired) || (acquired == 1)) && - "Lock not acquired before release!"); - --acquired; - return true; + } else { + // Single-threaded debugging code. This would be racy in + // multithreaded mode, but provides not sanity checks in single + // threaded mode. + assert(((recursive && acquired) || (acquired == 1)) && + "Lock not acquired before release!"); + --acquired; + return true; + } } bool tryacquire() { if (!mt_only || llvm_is_multithreaded()) return MutexImpl::tryacquire(); - return true; + else return true; } private: @@ -131,15 +135,15 @@ namespace llvm template class SmartScopedLock { - SmartMutex* mtx; + SmartMutex& mtx; public: - SmartScopedLock(SmartMutex* m) : mtx(m) { - mtx->acquire(); + SmartScopedLock(SmartMutex& m) : mtx(m) { + mtx.acquire(); } ~SmartScopedLock() { - mtx->release(); + mtx.release(); } }; diff --git a/include/llvm/System/Path.h b/include/llvm/System/Path.h index 05be2212758b4..3b73a128fbd1c 100644 --- a/include/llvm/System/Path.h +++ b/include/llvm/System/Path.h @@ -18,7 +18,6 @@ #include #include #include -#include namespace llvm { namespace sys { @@ -216,7 +215,7 @@ namespace sys { /// Compares \p this Path with \p that Path for inequality. /// @returns true if \p this and \p that refer to different things. /// @brief Inequality Operator - bool operator!=(const Path &that) const; + bool operator!=(const Path &that) const { return !(*this == that); } /// Determines if \p this Path is less than \p that Path. This is required /// so that Path objects can be placed into ordered collections (e.g. @@ -248,13 +247,7 @@ namespace sys { /// @brief Determines if the path name is empty (invalid). bool isEmpty() const { return path.empty(); } - /// This function returns the current contents of the path as a - /// std::string. This allows the underlying path string to be manipulated. - /// @returns std::string containing the path name. - /// @brief Returns the path as a std::string. - const std::string &toString() const { return path; } - - /// This function returns the last component of the path name. The last + /// This function returns the last component of the path name. The last /// component is the file or directory name occuring after the last /// directory separator. If no directory separator is present, the entire /// path name is returned (i.e. same as toString). @@ -285,6 +278,8 @@ namespace sys { /// @returns a 'C' string containing the path name. /// @brief Returns the path as a C string. const char *c_str() const { return path.c_str(); } + const std::string &str() const { return path; } + /// size - Return the length in bytes of this path name. size_t size() const { return path.size(); } @@ -586,6 +581,7 @@ namespace sys { /// @name Data /// @{ protected: + // Our win32 implementation relies on this string being mutable. mutable std::string path; ///< Storage for the path name. @@ -714,13 +710,6 @@ namespace sys { extern const char PathSeparator; } -std::ostream& operator<<(std::ostream& strm, const sys::Path& aPath); -inline std::ostream& operator<<(std::ostream& strm, - const sys::PathWithStatus& aPath) { - strm << static_cast(aPath); - return strm; -} - } #endif diff --git a/include/llvm/System/Process.h b/include/llvm/System/Process.h index 11dbf759a6c4f..010499acd4bfe 100644 --- a/include/llvm/System/Process.h +++ b/include/llvm/System/Process.h @@ -94,6 +94,11 @@ namespace sys { /// the user rather than being put on a pipe or stored in a file. static bool StandardErrIsDisplayed(); + /// This function determines if the given file descriptor is connected to + /// a "tty" or "console" window. That is, the output would be displayed to + /// the user rather than being put on a pipe or stored in a file. + static bool FileDescriptorIsDisplayed(int fd); + /// This function determines the number of columns in the window /// if standard output is connected to a "tty" or "console" /// window. If standard output is not connected to a tty or diff --git a/include/llvm/System/Program.h b/include/llvm/System/Program.h index 37f55466a50d7..6799562726096 100644 --- a/include/llvm/System/Program.h +++ b/include/llvm/System/Program.h @@ -19,6 +19,9 @@ namespace llvm { namespace sys { + // TODO: Add operations to communicate with the process, redirect its I/O, + // etc. + /// This class provides an abstraction for programs that are executable by the /// operating system. It provides a platform generic way to find executable /// programs from the path and to execute them in various ways. The sys::Path @@ -26,67 +29,126 @@ namespace sys { /// @since 1.4 /// @brief An abstraction for finding and executing programs. class Program { + /// Opaque handle for target specific data. + void *Data_; + + // Noncopyable. + Program(const Program& other); + Program& operator=(const Program& other); + /// @name Methods /// @{ - public: - /// This static constructor (factory) will attempt to locate a program in - /// the operating system's file system using some pre-determined set of - /// locations to search (e.g. the PATH on Unix). - /// @returns A Path object initialized to the path of the program or a - /// Path object that is empty (invalid) if the program could not be found. - /// @throws nothing - /// @brief Construct a Program by finding it by name. - static Path FindProgramByName(const std::string& name); - - /// This function executes the program using the \p arguments provided and - /// waits for the program to exit. This function will block the current - /// program until the invoked program exits. The invoked program will - /// inherit the stdin, stdout, and stderr file descriptors, the - /// environment and other configuration settings of the invoking program. - /// If Path::executable() does not return true when this function is - /// called then a std::string is thrown. - /// @returns an integer result code indicating the status of the program. - /// A zero or positive value indicates the result code of the program. A - /// negative value is the signal number on which it terminated. - /// @see FindProgrambyName - /// @brief Executes the program with the given set of \p args. - static int ExecuteAndWait( - const Path& path, ///< sys::Path object providing the path of the - ///< program to be executed. It is presumed this is the result of - ///< the FindProgramByName method. - const char** args, ///< A vector of strings that are passed to the - ///< program. The first element should be the name of the program. - ///< The list *must* be terminated by a null char* entry. - const char ** env = 0, ///< An optional vector of strings to use for - ///< the program's environment. If not provided, the current program's - ///< environment will be used. - const sys::Path** redirects = 0, ///< An optional array of pointers to - ///< Paths. If the array is null, no redirection is done. The array - ///< should have a size of at least three. If the pointer in the array - ///< are not null, then the inferior process's stdin(0), stdout(1), - ///< and stderr(2) will be redirected to the corresponding Paths. - ///< When an empty Path is passed in, the corresponding file - ///< descriptor will be disconnected (ie, /dev/null'd) in a portable - ///< way. - unsigned secondsToWait = 0, ///< If non-zero, this specifies the amount - ///< of time to wait for the child process to exit. If the time - ///< expires, the child is killed and this call returns. If zero, - ///< this function will wait until the child finishes or forever if - ///< it doesn't. - unsigned memoryLimit = 0, ///< If non-zero, this specifies max. amount - ///< of memory can be allocated by process. If memory usage will be - ///< higher limit, the child is killed and this call returns. If zero - ///< - no memory limit. - std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string - ///< instance in which error messages will be returned. If the string - ///< is non-empty upon return an error occurred while invoking the - ///< program. + public: + + Program(); + ~Program(); + + /// Return process ID of this program. + unsigned GetPid() const; + + /// This function executes the program using the \p arguments provided. The + /// invoked program will inherit the stdin, stdout, and stderr file + /// descriptors, the environment and other configuration settings of the + /// invoking program. If Path::executable() does not return true when this + /// function is called then a std::string is thrown. + /// @returns false in case of error, true otherwise. + /// @see FindProgramByName + /// @brief Executes the program with the given set of \p args. + bool Execute + ( const Path& path, ///< sys::Path object providing the path of the + ///< program to be executed. It is presumed this is the result of + ///< the FindProgramByName method. + const char** args, ///< A vector of strings that are passed to the + ///< program. The first element should be the name of the program. + ///< The list *must* be terminated by a null char* entry. + const char ** env = 0, ///< An optional vector of strings to use for + ///< the program's environment. If not provided, the current program's + ///< environment will be used. + const sys::Path** redirects = 0, ///< An optional array of pointers to + ///< Paths. If the array is null, no redirection is done. The array + ///< should have a size of at least three. If the pointer in the array + ///< are not null, then the inferior process's stdin(0), stdout(1), + ///< and stderr(2) will be redirected to the corresponding Paths. + ///< When an empty Path is passed in, the corresponding file + ///< descriptor will be disconnected (ie, /dev/null'd) in a portable + ///< way. + unsigned memoryLimit = 0, ///< If non-zero, this specifies max. amount + ///< of memory can be allocated by process. If memory usage will be + ///< higher limit, the child is killed and this call returns. If zero + ///< - no memory limit. + std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string + ///< instance in which error messages will be returned. If the string + ///< is non-empty upon return an error occurred while invoking the + ///< program. + ); + + /// This function waits for the program to exit. This function will block + /// the current program until the invoked program exits. + /// @returns an integer result code indicating the status of the program. + /// A zero or positive value indicates the result code of the program. A + /// negative value is the signal number on which it terminated. + /// @see Execute + /// @brief Waits for the program to exit. + int Wait + ( unsigned secondsToWait = 0, ///< If non-zero, this specifies the amount + ///< of time to wait for the child process to exit. If the time + ///< expires, the child is killed and this call returns. If zero, + ///< this function will wait until the child finishes or forever if + ///< it doesn't. + std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string + ///< instance in which error messages will be returned. If the string + ///< is non-empty upon return an error occurred while waiting. ); - // These methods change the specified standard stream (stdin or stdout) to - // binary mode. They return true if an error occurred - static bool ChangeStdinToBinary(); - static bool ChangeStdoutToBinary(); + + /// This function terminates the program. + /// @returns true if an error occured. + /// @see Execute + /// @brief Terminates the program. + bool Kill + ( std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string + ///< instance in which error messages will be returned. If the string + ///< is non-empty upon return an error occurred while killing the + ///< program. + ); + + /// This static constructor (factory) will attempt to locate a program in + /// the operating system's file system using some pre-determined set of + /// locations to search (e.g. the PATH on Unix). + /// @returns A Path object initialized to the path of the program or a + /// Path object that is empty (invalid) if the program could not be found. + /// @throws nothing + /// @brief Construct a Program by finding it by name. + static Path FindProgramByName(const std::string& name); + + // These methods change the specified standard stream (stdin or stdout) to + // binary mode. They return true if an error occurred + static bool ChangeStdinToBinary(); + static bool ChangeStdoutToBinary(); + + /// A convenience function equivalent to Program prg; prg.Execute(..); + /// prg.Wait(..); + /// @throws nothing + /// @see Execute, Wait + static int ExecuteAndWait(const Path& path, + const char** args, + const char ** env = 0, + const sys::Path** redirects = 0, + unsigned secondsToWait = 0, + unsigned memoryLimit = 0, + std::string* ErrMsg = 0); + + /// A convenience function equivalent to Program prg; prg.Execute(..); + /// @throws nothing + /// @see Execute + static void ExecuteNoWait(const Path& path, + const char** args, + const char ** env = 0, + const sys::Path** redirects = 0, + unsigned memoryLimit = 0, + std::string* ErrMsg = 0); + /// @} + }; } } diff --git a/include/llvm/System/RWMutex.h b/include/llvm/System/RWMutex.h index e577d457afb51..3a288180bf075 100644 --- a/include/llvm/System/RWMutex.h +++ b/include/llvm/System/RWMutex.h @@ -141,15 +141,14 @@ namespace llvm /// ScopedReader - RAII acquisition of a reader lock template struct SmartScopedReader { - SmartRWMutex* mutex; + SmartRWMutex& mutex; - explicit SmartScopedReader(SmartRWMutex* m) { - mutex = m; - mutex->reader_acquire(); + explicit SmartScopedReader(SmartRWMutex& m) : mutex(m) { + mutex.reader_acquire(); } ~SmartScopedReader() { - mutex->reader_release(); + mutex.reader_release(); } }; typedef SmartScopedReader ScopedReader; @@ -157,15 +156,14 @@ namespace llvm /// ScopedWriter - RAII acquisition of a writer lock template struct SmartScopedWriter { - SmartRWMutex* mutex; + SmartRWMutex& mutex; - explicit SmartScopedWriter(SmartRWMutex* m) { - mutex = m; - mutex->writer_acquire(); + explicit SmartScopedWriter(SmartRWMutex& m) : mutex(m) { + mutex.writer_acquire(); } ~SmartScopedWriter() { - mutex->writer_release(); + mutex.writer_release(); } }; typedef SmartScopedWriter ScopedWriter; diff --git a/include/llvm/System/TimeValue.h b/include/llvm/System/TimeValue.h index b9ada00712326..109973042f832 100644 --- a/include/llvm/System/TimeValue.h +++ b/include/llvm/System/TimeValue.h @@ -251,7 +251,7 @@ namespace sys { return seconds_ - PosixZeroTime.seconds_; } - /// Converts the TiemValue into the correspodning number of "ticks" for + /// Converts the TimeValue into the corresponding number of "ticks" for /// Win32 platforms, correcting for the difference in Win32 zero time. /// @brief Convert to windows time (seconds since 12:00:00a Jan 1, 1601) uint64_t toWin32Time() const { @@ -271,7 +271,7 @@ namespace sys { /// Provides conversion of the TimeValue into a readable time & date. /// @returns std::string containing the readable time value /// @brief Convert time to a string. - std::string toString() const; + std::string str() const; /// @} /// @name Mutators diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/Target/SubtargetFeature.h index 5cfdc023d4399..58333e2b424f6 100644 --- a/include/llvm/Target/SubtargetFeature.h +++ b/include/llvm/Target/SubtargetFeature.h @@ -20,12 +20,12 @@ #include #include -#include #include #include "llvm/Support/DataTypes.h" namespace llvm { - + class raw_ostream; + //===----------------------------------------------------------------------===// /// /// SubtargetFeatureKV - Used to provide key value pairs for feature and @@ -102,8 +102,7 @@ public: void *getInfo(const SubtargetInfoKV *Table, size_t TableSize); /// Print feature string. - void print(std::ostream &OS) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS) const; // Dump feature info. void dump() const; diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index ebd826a6f4a16..4d65b19e2e71f 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -203,6 +203,8 @@ class Instruction { bit hasCtrlDep = 0; // Does this instruction r/w ctrl-flow chains? bit isNotDuplicable = 0; // Is it unsafe to duplicate this instruction? bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction. + bit hasExtraSrcRegAllocReq = 0; // Sources have special regalloc requirement? + bit hasExtraDefRegAllocReq = 0; // Defs have special regalloc requirement? // Side effect flags - When set, the flags have these meanings: // @@ -221,6 +223,11 @@ class Instruction { bit mayHaveSideEffects = 0; bit neverHasSideEffects = 0; + // Is this instruction a "real" instruction (with a distinct machine + // encoding), or is it a pseudo instruction used for codegen modeling + // purposes. + bit isCodeGenOnly = 0; + InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling. string Constraints = ""; // OperandConstraint, e.g. $src = $dst. @@ -258,16 +265,63 @@ def ins; /// of operands. def variable_ops; + +/// PointerLikeRegClass - Values that are designed to have pointer width are +/// derived from this. TableGen treats the register class as having a symbolic +/// type that it doesn't know, and resolves the actual regclass to use by using +/// the TargetRegisterInfo::getPointerRegClass() hook at codegen time. +class PointerLikeRegClass { + int RegClassKind = Kind; +} + + /// ptr_rc definition - Mark this operand as being a pointer value whose /// register class is resolved dynamically via a callback to TargetInstrInfo. /// FIXME: We should probably change this to a class which contain a list of /// flags. But currently we have but one flag. -def ptr_rc; +def ptr_rc : PointerLikeRegClass<0>; /// unknown definition - Mark this operand as being of unknown type, causing /// it to be resolved by inference in the context it is used. def unknown; +/// AsmOperandClass - Representation for the kinds of operands which the target +/// specific parser can create and the assembly matcher may need to distinguish. +/// +/// Operand classes are used to define the order in which instructions are +/// matched, to ensure that the instruction which gets matched for any +/// particular list of operands is deterministic. +/// +/// The target specific parser must be able to classify a parsed operand into a +/// unique class which does not partially overlap with any other classes. It can +/// match a subset of some other class, in which case the super class field +/// should be defined. +class AsmOperandClass { + /// The name to use for this class, which should be usable as an enum value. + string Name = ?; + + /// The super class of this operand. + AsmOperandClass SuperClass = ?; + + /// The name of the method on the target specific operand to call to test + /// whether the operand is an instance of this class. If not set, this will + /// default to "isFoo", where Foo is the AsmOperandClass name. The method + /// signature should be: + /// bool isFoo() const; + string PredicateMethod = ?; + + /// The name of the method on the target specific operand to call to add the + /// target specific operand to an MCInst. If not set, this will default to + /// "addFooOperands", where Foo is the AsmOperandClass name. The method + /// signature should be: + /// void addFooOperands(MCInst &Inst, unsigned N) const; + string RenderMethod = ?; +} + +def ImmAsmOperand : AsmOperandClass { + let Name = "Imm"; +} + /// Operand Types - These provide the built-in operand types that may be used /// by a target. Targets can optionally provide their own operand types as /// needed, though this should not be needed for RISC targets. @@ -276,6 +330,16 @@ class Operand { string PrintMethod = "printOperand"; string AsmOperandLowerMethod = ?; dag MIOperandInfo = (ops); + + // ParserMatchClass - The "match class" that operands of this type fit + // in. Match classes are used to define the order in which instructions are + // match, to ensure that which instructions gets matched is deterministic. + // + // The target specific parser must be able to classify an parsed operand + // into a unique class, which does not partially overlap with any other + // classes. It can match a subset of some other class, in which case + // ParserMatchSuperClass should be set to the name of that class. + AsmOperandClass ParserMatchClass = ImmAsmOperand; } def i1imm : Operand; @@ -302,8 +366,8 @@ class PredicateOperand } /// OptionalDefOperand - This is used to define a optional definition operand -/// for an instruction. DefaultOps is the register the operand represents if none -/// is supplied, e.g. zero_reg. +/// for an instruction. DefaultOps is the register the operand represents if +/// none is supplied, e.g. zero_reg. class OptionalDefOperand : Operand { let MIOperandInfo = OpTypes; @@ -329,7 +393,8 @@ class InstrInfo { bit isLittleEndianEncoding = 0; } -// Standard Instructions. +// Standard Pseudo Instructions. +let isCodeGenOnly = 1 in { def PHI : Instruction { let OutOperandList = (ops); let InOperandList = (ops variable_ops); @@ -363,12 +428,12 @@ def GC_LABEL : Instruction { let Namespace = "TargetInstrInfo"; let hasCtrlDep = 1; } -def DECLARE : Instruction { +def KILL : Instruction { let OutOperandList = (ops); let InOperandList = (ops variable_ops); let AsmString = ""; let Namespace = "TargetInstrInfo"; - let hasCtrlDep = 1; + let neverHasSideEffects = 1; } def EXTRACT_SUBREG : Instruction { let OutOperandList = (ops unknown:$dst); @@ -409,6 +474,39 @@ def COPY_TO_REGCLASS : Instruction { let neverHasSideEffects = 1; let isAsCheapAsAMove = 1; } +} + +//===----------------------------------------------------------------------===// +// AsmParser - This class can be implemented by targets that wish to implement +// .s file parsing. +// +// Subtargets can have multiple different assembly parsers (e.g. AT&T vs Intel +// syntax on X86 for example). +// +class AsmParser { + // AsmParserClassName - This specifies the suffix to use for the asmparser + // class. Generated AsmParser classes are always prefixed with the target + // name. + string AsmParserClassName = "AsmParser"; + + // Variant - AsmParsers can be of multiple different variants. Variants are + // used to support targets that need to parser multiple formats for the + // assembly language. + int Variant = 0; + + // CommentDelimiter - If given, the delimiter string used to recognize + // comments which are hard coded in the .td assembler strings for individual + // instructions. + string CommentDelimiter = ""; + + // RegisterPrefix - If given, the token prefix which indicates a register + // token. This is used by the matcher to automatically recognize hard coded + // register tokens as constrained registers, instead of tokens, for the + // purposes of matching. + string RegisterPrefix = ""; +} +def DefaultAsmParser : AsmParser; + //===----------------------------------------------------------------------===// // AsmWriter - This class can be implemented by targets that need to customize @@ -434,6 +532,17 @@ class AsmWriter { // will specify which alternative to use. For example "{x|y|z}" with Variant // == 1, will expand to "y". int Variant = 0; + + + // FirstOperandColumn/OperandSpacing - If the assembler syntax uses a columnar + // layout, the asmwriter can actually generate output in this columns (in + // verbose-asm mode). These two values indicate the width of the first column + // (the "opcode" area) and the width to reserve for subsequent operands. When + // verbose asm mode is enabled, operands will be indented to respect this. + int FirstOperandColumn = -1; + + // OperandSpacing - Space between operand columns. + int OperandSpacing = -1; } def DefaultAsmWriter : AsmWriter; @@ -445,6 +554,9 @@ class Target { // InstructionSet - Instruction set description for this target. InstrInfo InstructionSet; + // AssemblyParsers - The AsmParser instances available for this target. + list AssemblyParsers = [DefaultAsmParser]; + // AssemblyWriters - The AsmWriter instances available for this target. list AssemblyWriters = [DefaultAsmWriter]; } diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h new file mode 100644 index 0000000000000..ef1fc49cefee5 --- /dev/null +++ b/include/llvm/Target/TargetAsmParser.h @@ -0,0 +1,65 @@ +//===-- llvm/Target/TargetAsmParser.h - Target Assembly Parser --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_TARGETPARSER_H +#define LLVM_TARGET_TARGETPARSER_H + +#include "llvm/MC/MCAsmLexer.h" + +namespace llvm { +class MCAsmParser; +class MCInst; +class StringRef; +class Target; + +/// TargetAsmParser - Generic interface to target specific assembly parsers. +class TargetAsmParser { + TargetAsmParser(const TargetAsmParser &); // DO NOT IMPLEMENT + void operator=(const TargetAsmParser &); // DO NOT IMPLEMENT +protected: // Can only create subclasses. + TargetAsmParser(const Target &); + + /// TheTarget - The Target that this machine was created for. + const Target &TheTarget; + +public: + virtual ~TargetAsmParser(); + + const Target &getTarget() const { return TheTarget; } + + /// ParseInstruction - Parse one assembly instruction. + /// + /// The parser is positioned following the instruction name. The target + /// specific instruction parser should parse the entire instruction and + /// construct the appropriate MCInst, or emit an error. On success, the entire + /// line should be parsed up to and including the end-of-statement token. On + /// failure, the parser is not required to read to the end of the line. + // + /// \param AP - The current parser object. + /// \param Name - The instruction name. + /// \param Inst [out] - On success, the parsed instruction. + /// \return True on failure. + virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst) = 0; + + /// ParseDirective - Parse a target specific assembler directive + /// + /// The parser is positioned following the directive name. The target + /// specific directive parser should parse the entire directive doing or + /// recording any target specific work, or return true and do nothing if the + /// directive is not target specific. If the directive is specific for + /// the target, the entire line is parsed up to and including the + /// end-of-statement token and false is returned. + /// + /// \param ID - the identifier token of the directive. + virtual bool ParseDirective(AsmToken DirectiveID) = 0; +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td index da3cbd2088678..ceaeb0b5038b5 100644 --- a/include/llvm/Target/TargetCallingConv.td +++ b/include/llvm/Target/TargetCallingConv.td @@ -58,6 +58,10 @@ class CCIfNest : CCIf<"ArgFlags.isNest()", A> {} /// the specified action. class CCIfSplit : CCIf<"ArgFlags.isSplit()", A> {} +/// CCIfSRet - If this argument is marked with the 'sret' attribute, apply +/// the specified action. +class CCIfSRet : CCIf<"ArgFlags.isSRet()", A> {} + /// CCIfNotVarArg - If the current function is not vararg - apply the action class CCIfNotVarArg : CCIf<"!State.isVarArg()", A> {} @@ -105,6 +109,12 @@ class CCBitConvertToType : CCAction { ValueType DestTy = destTy; } +/// CCPassIndirect - If applied, this stores the value to stack and passes the pointer +/// as normal argument. +class CCPassIndirect : CCAction { + ValueType DestTy = destTy; +} + /// CCDelegateTo - This action invokes the specified sub-calling-convention. It /// is successful if the specified CC matches. class CCDelegateTo : CCAction { diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h index 82abfc72864fd..f8ea64b4ea66b 100644 --- a/include/llvm/Target/TargetData.h +++ b/include/llvm/Target/TargetData.h @@ -22,6 +22,7 @@ #include "llvm/Pass.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/SmallVector.h" #include @@ -33,6 +34,7 @@ class IntegerType; class StructType; class StructLayout; class GlobalVariable; +class LLVMContext; /// Enum used to categorize the alignment types stored by TargetAlignElem enum AlignTypeEnum { @@ -89,6 +91,9 @@ private: */ static const TargetAlignElem InvalidAlignmentElem; + // Opaque pointer for the StructType -> StructLayout map. + mutable void* LayoutMap; + //! Set/initialize target alignments void setAlignment(AlignTypeEnum align_type, unsigned char abi_align, unsigned char pref_align, uint32_t bit_width); @@ -111,9 +116,8 @@ public: /// @note This has to exist, because this is a pass, but it should never be /// used. TargetData() : ImmutablePass(&ID) { - assert(0 && "ERROR: Bad TargetData ctor used. " - "Tool did not specify a TargetData to use?"); - abort(); + llvm_report_error("Bad TargetData ctor used. " + "Tool did not specify a TargetData to use?"); } /// Constructs a TargetData from a specification string. See init(). @@ -131,7 +135,8 @@ public: PointerMemSize(TD.PointerMemSize), PointerABIAlign(TD.PointerABIAlign), PointerPrefAlign(TD.PointerPrefAlign), - Alignments(TD.Alignments) + Alignments(TD.Alignments), + LayoutMap(0) { } ~TargetData(); // Not virtual, do not subclass this class @@ -229,7 +234,7 @@ public: /// getIntPtrType - Return an unsigned integer type that is the same size or /// greater to the host pointer size. /// - const IntegerType *getIntPtrType() const; + const IntegerType *getIntPtrType(LLVMContext &C) const; /// getIndexedOffset - return the offset from the beginning of the type for /// the specified indices. This is used to implement getelementptr. diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h index a5b30c4f6e3d6..7cb693155c293 100644 --- a/include/llvm/Target/TargetELFWriterInfo.h +++ b/include/llvm/Target/TargetELFWriterInfo.h @@ -97,9 +97,26 @@ namespace llvm { /// ELF relocation entry. virtual bool hasRelocationAddend() const = 0; - /// getAddendForRelTy - Gets the addend value for an ELF relocation entry - /// based on the target relocation type. If addend is not used returns 0. - virtual long int getAddendForRelTy(unsigned RelTy) const = 0; + /// getDefaultAddendForRelTy - Gets the default addend value for a + /// relocation entry based on the target ELF relocation type. + virtual long int getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier = 0) const = 0; + + /// getRelTySize - Returns the size of relocatable field in bits + virtual unsigned getRelocationTySize(unsigned RelTy) const = 0; + + /// isPCRelativeRel - True if the relocation type is pc relative + virtual bool isPCRelativeRel(unsigned RelTy) const = 0; + + /// getJumpTableRelocationTy - Returns the machine relocation type used + /// to reference a jumptable. + virtual unsigned getAbsoluteLabelMachineRelTy() const = 0; + + /// computeRelocation - Some relocatable fields could be relocated + /// directly, avoiding the relocation symbol emission, compute the + /// final relocation value for this symbol. + virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, + unsigned RelTy) const = 0; }; } // end llvm namespace diff --git a/include/llvm/Target/TargetFrameInfo.h b/include/llvm/Target/TargetFrameInfo.h index 3e26b9dd01bea..975d15659c15f 100644 --- a/include/llvm/Target/TargetFrameInfo.h +++ b/include/llvm/Target/TargetFrameInfo.h @@ -31,13 +31,22 @@ public: StackGrowsUp, // Adding to the stack increases the stack address StackGrowsDown // Adding to the stack decreases the stack address }; + + // Maps a callee saved register to a stack slot with a fixed offset. + struct SpillSlot { + unsigned Reg; + int Offset; // Offset relative to stack pointer on function entry. + }; private: StackDirection StackDir; unsigned StackAlignment; + unsigned TransientStackAlignment; int LocalAreaOffset; public: - TargetFrameInfo(StackDirection D, unsigned StackAl, int LAO) - : StackDir(D), StackAlignment(StackAl), LocalAreaOffset(LAO) {} + TargetFrameInfo(StackDirection D, unsigned StackAl, int LAO, + unsigned TransAl = 1) + : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), + LocalAreaOffset(LAO) {} virtual ~TargetFrameInfo(); @@ -48,12 +57,20 @@ public: /// StackDirection getStackGrowthDirection() const { return StackDir; } - /// getStackAlignment - This method returns the number of bytes that the stack - /// pointer must be aligned to. Typically, this is the largest alignment for - /// any data object in the target. + /// getStackAlignment - This method returns the number of bytes to which the + /// stack pointer must be aligned on entry to a function. Typically, this + /// is the largest alignment for any data object in the target. /// unsigned getStackAlignment() const { return StackAlignment; } + /// getTransientStackAlignment - This method returns the number of bytes to + /// which the stack pointer must be aligned at all times, even between + /// calls. + /// + unsigned getTransientStackAlignment() const { + return TransientStackAlignment; + } + /// getOffsetOfLocalArea - This method returns the offset of the local area /// from the stack pointer on entrance to a function. /// @@ -65,10 +82,10 @@ public: /// /// Each entry in this array contains a pair, indicating the /// fixed offset from the incoming stack pointer that each register should be - /// spilled at. If a register is not listed here, the code generator is + /// spilled at. If a register is not listed here, the code generator is /// allowed to spill it anywhere it chooses. /// - virtual const std::pair * + virtual const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const { NumEntries = 0; return 0; diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h index 622a216c33c63..d828a236cd88e 100644 --- a/include/llvm/Target/TargetInstrDesc.h +++ b/include/llvm/Target/TargetInstrDesc.h @@ -18,7 +18,8 @@ namespace llvm { class TargetRegisterClass; - +class TargetRegisterInfo; + //===----------------------------------------------------------------------===// // Machine Operand Flags and Description //===----------------------------------------------------------------------===// @@ -45,14 +46,28 @@ namespace TOI { class TargetOperandInfo { public: /// RegClass - This specifies the register class enumeration of the operand - /// if the operand is a register. If not, this contains 0. + /// if the operand is a register. If isLookupPtrRegClass is set, then this is + /// an index that is passed to TargetRegisterInfo::getPointerRegClass(x) to + /// get a dynamic register class. + /// + /// NOTE: This member should be considered to be private, all access should go + /// through "getRegClass(TRI)" below. unsigned short RegClass; + + /// Flags - These are flags from the TOI::OperandFlags enum. unsigned short Flags; + /// Lower 16 bits are used to specify which constraints are set. The higher 16 /// bits are used to specify the value of constraints (4 bits each). - unsigned int Constraints; + unsigned Constraints; /// Currently no other information. + /// getRegClass - Get the register class for the operand, handling resolution + /// of "symbolic" pointer register classes etc. If this is not a register + /// operand, this returns null. + const TargetRegisterClass *getRegClass(const TargetRegisterInfo *TRI) const; + + /// isLookupPtrRegClass - Set if this operand is a pointer value and it /// requires a callback to look up its register class. bool isLookupPtrRegClass() const { return Flags&(1 <getDesc().isRematerializable() && - isReallyTriviallyReMaterializable(MI); + bool isTriviallyReMaterializable(const MachineInstr *MI, + AliasAnalysis *AA = 0) const { + return MI->getOpcode() == IMPLICIT_DEF || + (MI->getDesc().isRematerializable() && + (isReallyTriviallyReMaterializable(MI, AA) || + isReallyTriviallyReMaterializableGeneric(MI, AA))); } protected: /// isReallyTriviallyReMaterializable - For instructions with opcodes for - /// which the M_REMATERIALIZABLE flag is set, this function tests whether the - /// instruction itself is actually trivially rematerializable, considering - /// its operands. This is used for targets that have instructions that are - /// only trivially rematerializable for specific uses. This predicate must - /// return false if the instruction has any side effects other than - /// producing a value, or if it requres any address registers that are not - /// always available. - virtual bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const { - return true; + /// which the M_REMATERIALIZABLE flag is set, this hook lets the target + /// specify whether the instruction is actually trivially rematerializable, + /// taking into consideration its operands. This predicate must return false + /// if the instruction has any side effects other than producing a value, or + /// if it requres any address registers that are not always available. + virtual bool isReallyTriviallyReMaterializable(const MachineInstr *MI, + AliasAnalysis *AA) const { + return false; } +private: + /// isReallyTriviallyReMaterializableGeneric - For instructions with opcodes + /// for which the M_REMATERIALIZABLE flag is set and the target hook + /// isReallyTriviallyReMaterializable returns false, this function does + /// target-independent tests to determine if the instruction is really + /// trivially rematerializable. + bool isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, + AliasAnalysis *AA) const; + public: /// Return true if the instruction is a register to register move and return /// the source and dest operands and their sub-register indices by reference. @@ -150,19 +165,9 @@ public: /// specific location targeting a new destination register. virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, + unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig) const = 0; - /// isInvariantLoad - Return true if the specified instruction (which is - /// marked mayLoad) is loading from a location whose value is invariant across - /// the function. For example, loading a value from the constant pool or from - /// from the argument area of a function if it does not change. This should - /// only return true of *all* loads the instruction does are invariant (if it - /// does multiple loads). - virtual bool isInvariantLoad(const MachineInstr *MI) const { - return false; - } - /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target /// may be able to convert a two-address instruction into one or more true @@ -194,13 +199,11 @@ public: virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI = false) const = 0; - /// CommuteChangesDestination - Return true if commuting the specified - /// instruction will also changes the destination operand. Also return the - /// current operand index of the would be new destination register by - /// reference. This can happen when the commutable instruction is also a - /// two-address instruction. - virtual bool CommuteChangesDestination(MachineInstr *MI, - unsigned &OpIdx) const = 0; + /// findCommutedOpIndices - If specified MI is commutable, return the two + /// operand indices that would swap value. Return true if the instruction + /// is not in a form which this routine understands. + virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const = 0; /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't @@ -212,15 +215,15 @@ public: /// 2. If this block ends with only an unconditional branch, it sets TBB to be /// the destination block. /// 3. If this block ends with an conditional branch and it falls through to - /// an successor block, it sets TBB to be the branch destination block and + /// a successor block, it sets TBB to be the branch destination block and /// a list of operands that evaluate the condition. These /// operands can be passed to other TargetInstrInfo methods to create new /// branches. - /// 4. If this block ends with an conditional branch and an unconditional - /// block, it returns the 'true' destination in TBB, the 'false' - /// destination in FBB, and a list of operands that evaluate the condition. - /// These operands can be passed to other TargetInstrInfo methods to create - /// new branches. + /// 4. If this block ends with a conditional branch followed by an + /// unconditional branch, it returns the 'true' destination in TBB, the + /// 'false' destination in FBB, and a list of operands that evaluate the + /// condition. These operands can be passed to other TargetInstrInfo + /// methods to create new branches. /// /// Note that RemoveBranch and InsertBranch must be implemented to support /// cases where this method returns success. @@ -234,7 +237,7 @@ public: bool AllowModify = false) const { return true; } - + /// RemoveBranch - Remove the branching code at the end of the specific MBB. /// This is only invoked in cases where AnalyzeBranch returns success. It /// returns the number of instructions that were removed. @@ -242,13 +245,12 @@ public: assert(0 && "Target didn't implement TargetInstrInfo::RemoveBranch!"); return 0; } - - /// InsertBranch - Insert a branch into the end of the specified - /// MachineBasicBlock. This operands to this method are the same as those - /// returned by AnalyzeBranch. This is invoked in cases where AnalyzeBranch - /// returns success and when an unconditional branch (TBB is non-null, FBB is - /// null, Cond is empty) needs to be inserted. It returns the number of - /// instructions inserted. + + /// InsertBranch - Insert branch code into the end of the specified + /// MachineBasicBlock. The operands to this method are the same as those + /// returned by AnalyzeBranch. This is only invoked in cases where + /// AnalyzeBranch returns success. It returns the number of instructions + /// inserted. /// /// It is also invoked by tail merging to add unconditional branches in /// cases where AnalyzeBranch doesn't apply because there was no original @@ -285,18 +287,6 @@ public: assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!"); } - /// storeRegToAddr - Store the specified register of the given register class - /// to the specified address. The store instruction is to be added to the - /// given machine basic block before the specified machine instruction. If - /// isKill is true, the register operand is the last use and must be marked - /// kill. - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - assert(0 && "Target didn't implement TargetInstrInfo::storeRegToAddr!"); - } - /// loadRegFromStackSlot - Load the specified register of the given register /// class from the specified stack frame index. The load instruction is to be /// added to the given machine basic block before the specified machine @@ -307,16 +297,6 @@ public: const TargetRegisterClass *RC) const { assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!"); } - - /// loadRegFromAddr - Load the specified register of the given register class - /// class from the specified address. The load instruction is to be added to - /// the given machine basic block before the specified machine instruction. - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromAddr!"); - } /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee /// saved registers and returns true if it isn't possible / profitable to do @@ -429,11 +409,8 @@ public: /// insertNoop - Insert a noop into the instruction stream at the specified /// point. virtual void insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { - assert(0 && "Target didn't implement insertNoop!"); - abort(); - } - + MachineBasicBlock::iterator MI) const; + /// isPredicated - Returns true if the instruction is already predicated. /// virtual bool isPredicated(const MachineInstr *MI) const { @@ -479,9 +456,15 @@ public: return 0; } - /// GetFunctionSizeInBytes - Returns the size of the specified MachineFunction. + /// GetFunctionSizeInBytes - Returns the size of the specified + /// MachineFunction. /// virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const = 0; + + /// Measure the specified inline asm to determine an approximation of its + /// length. + virtual unsigned getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const; }; /// TargetInstrInfoImpl - This is the default implementation of @@ -495,23 +478,17 @@ protected: public: virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI = false) const; - virtual bool CommuteChangesDestination(MachineInstr *MI, - unsigned &OpIdx) const; + virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const; virtual bool PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Pred) const; virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, + unsigned DestReg, unsigned SubReg, const MachineInstr *Orig) const; virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const; }; -/// getInstrOperandRegClass - Return register class of the operand of an -/// instruction of the specified TargetInstrDesc. -const TargetRegisterClass* -getInstrOperandRegClass(const TargetRegisterInfo *TRI, - const TargetInstrDesc &II, unsigned Op); - } // End llvm namespace #endif diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h index 18931ea7fb486..420fa94ce76bc 100644 --- a/include/llvm/Target/TargetInstrItineraries.h +++ b/include/llvm/Target/TargetInstrItineraries.h @@ -7,90 +7,160 @@ // //===----------------------------------------------------------------------===// // -// This file describes the structures used for instruction itineraries and -// states. This is used by schedulers to determine instruction states and -// latencies. +// This file describes the structures used for instruction +// itineraries, stages, and operand reads/writes. This is used by +// schedulers to determine instruction stages and latencies. // //===----------------------------------------------------------------------===// #ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H #define LLVM_TARGET_TARGETINSTRITINERARIES_H +#include + namespace llvm { //===----------------------------------------------------------------------===// -/// Instruction stage - These values represent a step in the execution of an -/// instruction. The latency represents the number of discrete time slots used -/// need to complete the stage. Units represent the choice of functional units -/// that can be used to complete the stage. Eg. IntUnit1, IntUnit2. +/// Instruction stage - These values represent a non-pipelined step in +/// the execution of an instruction. Cycles represents the number of +/// discrete time slots needed to complete the stage. Units represent +/// the choice of functional units that can be used to complete the +/// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many +/// cycles should elapse from the start of this stage to the start of +/// the next stage in the itinerary. A value of -1 indicates that the +/// next stage should start immediately after the current one. +/// For example: +/// +/// { 1, x, -1 } +/// indicates that the stage occupies FU x for 1 cycle and that +/// the next stage starts immediately after this one. +/// +/// { 2, x|y, 1 } +/// indicates that the stage occupies either FU x or FU y for 2 +/// consecuative cycles and that the next stage starts one cycle +/// after this stage starts. That is, the stage requirements +/// overlap in time. +/// +/// { 1, x, 0 } +/// indicates that the stage occupies FU x for 1 cycle and that +/// the next stage starts in this same cycle. This can be used to +/// indicate that the instruction requires multiple stages at the +/// same time. /// struct InstrStage { - unsigned Cycles; ///< Length of stage in machine cycles - unsigned Units; ///< Choice of functional units + unsigned Cycles_; ///< Length of stage in machine cycles + unsigned Units_; ///< Choice of functional units + int NextCycles_; ///< Number of machine cycles to next stage + + /// getCycles - returns the number of cycles the stage is occupied + unsigned getCycles() const { + return Cycles_; + } + + /// getUnits - returns the choice of FUs + unsigned getUnits() const { + return Units_; + } + + /// getNextCycles - returns the number of cycles from the start of + /// this stage to the start of the next stage in the itinerary + unsigned getNextCycles() const { + return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_; + } }; //===----------------------------------------------------------------------===// -/// Instruction itinerary - An itinerary represents a sequential series of steps -/// required to complete an instruction. Itineraries are represented as -/// sequences of instruction stages. +/// Instruction itinerary - An itinerary represents the scheduling +/// information for an instruction. This includes a set of stages +/// occupies by the instruction, and the pipeline cycle in which +/// operands are read and written. /// struct InstrItinerary { - unsigned First; ///< Index of first stage in itinerary - unsigned Last; ///< Index of last + 1 stage in itinerary + unsigned FirstStage; ///< Index of first stage in itinerary + unsigned LastStage; ///< Index of last + 1 stage in itinerary + unsigned FirstOperandCycle; ///< Index of first operand rd/wr + unsigned LastOperandCycle; ///< Index of last + 1 operand rd/wr }; - //===----------------------------------------------------------------------===// /// Instruction itinerary Data - Itinerary data supplied by a subtarget to be /// used by a target. /// struct InstrItineraryData { const InstrStage *Stages; ///< Array of stages selected + const unsigned *OperandCycles; ///< Array of operand cycles selected const InstrItinerary *Itineratries; ///< Array of itineraries selected /// Ctors. /// - InstrItineraryData() : Stages(0), Itineratries(0) {} - InstrItineraryData(const InstrStage *S, const InstrItinerary *I) - : Stages(S), Itineratries(I) {} + InstrItineraryData() : Stages(0), OperandCycles(0), Itineratries(0) {} + InstrItineraryData(const InstrStage *S, const unsigned *OS, + const InstrItinerary *I) + : Stages(S), OperandCycles(OS), Itineratries(I) {} /// isEmpty - Returns true if there are no itineraries. /// bool isEmpty() const { return Itineratries == 0; } - - /// begin - Return the first stage of the itinerary. + + /// isEndMarker - Returns true if the index is for the end marker + /// itinerary. + /// + bool isEndMarker(unsigned ItinClassIndx) const { + return ((Itineratries[ItinClassIndx].FirstStage == ~0U) && + (Itineratries[ItinClassIndx].LastStage == ~0U)); + } + + /// beginStage - Return the first stage of the itinerary. /// - const InstrStage *begin(unsigned ItinClassIndx) const { - unsigned StageIdx = Itineratries[ItinClassIndx].First; + const InstrStage *beginStage(unsigned ItinClassIndx) const { + unsigned StageIdx = Itineratries[ItinClassIndx].FirstStage; return Stages + StageIdx; } - /// end - Return the last+1 stage of the itinerary. + /// endStage - Return the last+1 stage of the itinerary. /// - const InstrStage *end(unsigned ItinClassIndx) const { - unsigned StageIdx = Itineratries[ItinClassIndx].Last; + const InstrStage *endStage(unsigned ItinClassIndx) const { + unsigned StageIdx = Itineratries[ItinClassIndx].LastStage; return Stages + StageIdx; } - /// getLatency - Return the scheduling latency of the given class. A - /// simple latency value for an instruction is an over-simplification - /// for some architectures, but it's a reasonable first approximation. + /// getStageLatency - Return the total stage latency of the given + /// class. The latency is the maximum completion time for any stage + /// in the itinerary. /// - unsigned getLatency(unsigned ItinClassIndx) const { - // If the target doesn't provide latency information, use a simple - // non-zero default value for all instructions. + unsigned getStageLatency(unsigned ItinClassIndx) const { + // If the target doesn't provide itinerary information, use a + // simple non-zero default value for all instructions. if (isEmpty()) return 1; - // Just sum the cycle count for each stage. - unsigned Latency = 0; - for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx); - IS != E; ++IS) - Latency += IS->Cycles; + // Calculate the maximum completion time for any stage. + unsigned Latency = 0, StartCycle = 0; + for (const InstrStage *IS = beginStage(ItinClassIndx), + *E = endStage(ItinClassIndx); IS != E; ++IS) { + Latency = std::max(Latency, StartCycle + IS->getCycles()); + StartCycle += IS->getNextCycles(); + } + return Latency; } + + /// getOperandCycle - Return the cycle for the given class and + /// operand. Return -1 if no cycle is specified for the operand. + /// + int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const { + if (isEmpty()) + return -1; + + unsigned FirstIdx = Itineratries[ItinClassIndx].FirstOperandCycle; + unsigned LastIdx = Itineratries[ItinClassIndx].LastOperandCycle; + if ((FirstIdx + OperandIdx) >= LastIdx) + return -1; + + return (int)OperandCycles[FirstIdx + OperandIdx]; + } }; diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index d24ca679ab76b..4f567b0b203a6 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -22,6 +22,7 @@ #ifndef LLVM_TARGET_TARGETLOWERING_H #define LLVM_TARGET_TARGETLOWERING_H +#include "llvm/CallingConv.h" #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -54,6 +55,7 @@ namespace llvm { class TargetMachine; class TargetRegisterClass; class TargetSubtarget; + class TargetLoweringObjectFile; class Value; // FIXME: should this be here? @@ -77,6 +79,8 @@ namespace llvm { /// target-specific constructs to SelectionDAG operators. /// class TargetLowering { + TargetLowering(const TargetLowering&); // DO NOT IMPLEMENT + void operator=(const TargetLowering&); // DO NOT IMPLEMENT public: /// LegalizeAction - This enum indicates whether operations are valid for a /// target, and if not, what action should be used to make them valid. @@ -87,12 +91,6 @@ public: Custom // Use the LowerOperation hook to implement custom lowering. }; - enum OutOfRangeShiftAmount { - Undefined, // Oversized shift amounts are undefined (default). - Mask, // Shift amounts are auto masked (anded) to value size. - Extend // Oversized shift pulls in zeros or sign bits. - }; - enum BooleanContent { // How the target represents true/false values. UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. ZeroOrOneBooleanContent, // All bits zero except for bit 0. @@ -104,17 +102,18 @@ public: SchedulingForRegPressure // Scheduling for lowest register pressure. }; - explicit TargetLowering(TargetMachine &TM); + /// NOTE: The constructor takes ownership of TLOF. + explicit TargetLowering(TargetMachine &TM, TargetLoweringObjectFile *TLOF); virtual ~TargetLowering(); TargetMachine &getTargetMachine() const { return TM; } const TargetData *getTargetData() const { return TD; } + TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; } bool isBigEndian() const { return !IsLittleEndian; } bool isLittleEndian() const { return IsLittleEndian; } MVT getPointerTy() const { return PointerTy; } MVT getShiftAmountTy() const { return ShiftAmountTy; } - OutOfRangeShiftAmount getShiftAmountFlavor() const {return ShiftAmtHandling; } /// usesGlobalOffsetTable - Return true if this target uses a GOT for PIC /// codegen. @@ -137,7 +136,8 @@ public: /// the condition operand of SELECT and BRCOND nodes. In the case of /// BRCOND the argument passed is MVT::Other since there are no other /// operands to get a type hint from. - virtual MVT getSetCCResultType(MVT VT) const; + virtual + MVT::SimpleValueType getSetCCResultType(EVT VT) const; /// getBooleanContents - For targets without i1 registers, this gives the /// nature of the high-bits of boolean values held in types wider than i1. @@ -153,9 +153,9 @@ public: /// getRegClassFor - Return the register class that should be used for the /// specified value type. This may only be called on legal types. - TargetRegisterClass *getRegClassFor(MVT VT) const { - assert((unsigned)VT.getSimpleVT() < array_lengthof(RegClassForVT)); - TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT()]; + TargetRegisterClass *getRegClassFor(EVT VT) const { + assert(VT.isSimple() && "getRegClassFor called on illegal type!"); + TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; assert(RC && "This value type is not natively supported!"); return RC; } @@ -163,10 +163,10 @@ public: /// isTypeLegal - Return true if the target has native support for the /// specified value type. This means that it has a register that directly /// holds it without promotions or expansions. - bool isTypeLegal(MVT VT) const { + bool isTypeLegal(EVT VT) const { assert(!VT.isSimple() || - (unsigned)VT.getSimpleVT() < array_lengthof(RegClassForVT)); - return VT.isSimple() && RegClassForVT[VT.getSimpleVT()] != 0; + (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); + return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != 0; } class ValueTypeActionImpl { @@ -187,23 +187,23 @@ public: ValueTypeActions[3] = RHS.ValueTypeActions[3]; } - LegalizeAction getTypeAction(MVT VT) const { + LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const { if (VT.isExtended()) { if (VT.isVector()) { return VT.isPow2VectorType() ? Expand : Promote; } if (VT.isInteger()) // First promote to a power-of-two size, then expand if necessary. - return VT == VT.getRoundIntegerType() ? Expand : Promote; + return VT == VT.getRoundIntegerType(Context) ? Expand : Promote; assert(0 && "Unsupported extended type!"); return Legal; } - unsigned I = VT.getSimpleVT(); + unsigned I = VT.getSimpleVT().SimpleTy; assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0])); return (LegalizeAction)((ValueTypeActions[I>>4] >> ((2*I) & 31)) & 3); } - void setTypeAction(MVT VT, LegalizeAction Action) { - unsigned I = VT.getSimpleVT(); + void setTypeAction(EVT VT, LegalizeAction Action) { + unsigned I = VT.getSimpleVT().SimpleTy; assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0])); ValueTypeActions[I>>4] |= Action << ((I*2) & 31); } @@ -217,8 +217,8 @@ public: /// it is already legal (return 'Legal') or we need to promote it to a larger /// type (return 'Promote'), or we need to expand it into multiple registers /// of smaller integer type (return 'Expand'). 'Custom' is not an option. - LegalizeAction getTypeAction(MVT VT) const { - return ValueTypeActions.getTypeAction(VT); + LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const { + return ValueTypeActions.getTypeAction(Context, VT); } /// getTypeToTransformTo - For types supported by the target, this is an @@ -227,33 +227,37 @@ public: /// than the largest integer register, this contains one step in the expansion /// to get to the smaller register. For illegal floating point types, this /// returns the integer type to transform to. - MVT getTypeToTransformTo(MVT VT) const { + EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { if (VT.isSimple()) { - assert((unsigned)VT.getSimpleVT() < array_lengthof(TransformToType)); - MVT NVT = TransformToType[VT.getSimpleVT()]; - assert(getTypeAction(NVT) != Promote && + assert((unsigned)VT.getSimpleVT().SimpleTy < + array_lengthof(TransformToType)); + EVT NVT = TransformToType[VT.getSimpleVT().SimpleTy]; + assert(getTypeAction(Context, NVT) != Promote && "Promote may not follow Expand or Promote"); return NVT; } if (VT.isVector()) { - MVT NVT = VT.getPow2VectorType(); + EVT NVT = VT.getPow2VectorType(Context); if (NVT == VT) { // Vector length is a power of 2 - split to half the size. unsigned NumElts = VT.getVectorNumElements(); - MVT EltVT = VT.getVectorElementType(); - return (NumElts == 1) ? EltVT : MVT::getVectorVT(EltVT, NumElts / 2); + EVT EltVT = VT.getVectorElementType(); + return (NumElts == 1) ? + EltVT : EVT::getVectorVT(Context, EltVT, NumElts / 2); } // Promote to a power of two size, avoiding multi-step promotion. - return getTypeAction(NVT) == Promote ? getTypeToTransformTo(NVT) : NVT; + return getTypeAction(Context, NVT) == Promote ? + getTypeToTransformTo(Context, NVT) : NVT; } else if (VT.isInteger()) { - MVT NVT = VT.getRoundIntegerType(); + EVT NVT = VT.getRoundIntegerType(Context); if (NVT == VT) // Size is a power of two - expand to half the size. - return MVT::getIntegerVT(VT.getSizeInBits() / 2); + return EVT::getIntegerVT(Context, VT.getSizeInBits() / 2); else // Promote to a power of two size, avoiding multi-step promotion. - return getTypeAction(NVT) == Promote ? getTypeToTransformTo(NVT) : NVT; + return getTypeAction(Context, NVT) == Promote ? + getTypeToTransformTo(Context, NVT) : NVT; } assert(0 && "Unsupported extended type!"); return MVT(MVT::Other); // Not reached @@ -263,14 +267,14 @@ public: /// identity function. For types that must be expanded (i.e. integer types /// that are larger than the largest integer register or illegal floating /// point types), this returns the largest legal type it will be expanded to. - MVT getTypeToExpandTo(MVT VT) const { + EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { assert(!VT.isVector()); while (true) { - switch (getTypeAction(VT)) { + switch (getTypeAction(Context, VT)) { case Legal: return VT; case Expand: - VT = getTypeToTransformTo(VT); + VT = getTypeToTransformTo(Context, VT); break; default: assert(false && "Type is not legal nor is it to be expanded!"); @@ -281,18 +285,18 @@ public: } /// getVectorTypeBreakdown - Vector types are broken down into some number of - /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 - /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. - /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. + /// legal first class types. For example, EVT::v8f32 maps to 2 EVT::v4f32 + /// with Altivec or SSE1, or 8 promoted EVT::f64 values with the X86 FP stack. + /// Similarly, EVT::v2i64 turns into 4 EVT::i32 values with both PPC and X86. /// /// This method returns the number of registers needed, and the VT for each /// register. It also returns the VT and quantity of the intermediate values /// before they are promoted/expanded. /// - unsigned getVectorTypeBreakdown(MVT VT, - MVT &IntermediateVT, + unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, unsigned &NumIntermediates, - MVT &RegisterVT) const; + EVT &RegisterVT) const; /// getTgtMemIntrinsic: Given an intrinsic, checks if on the target the /// intrinsic will need to map to a MemIntrinsicNode (touches memory). If @@ -300,7 +304,7 @@ public: /// information into the IntrinsicInfo that was passed to the function. typedef struct IntrinsicInfo { unsigned opc; // target opcode - MVT memVT; // memory VT + EVT memVT; // memory VT const Value* ptrVal; // value representing memory location int offset; // offset off of ptrVal unsigned align; // alignment @@ -319,7 +323,7 @@ public: /// If there is no vector type that we want to widen to, returns MVT::Other /// When and were to widen is target dependent based on the cost of /// scalarizing vs using the wider vector type. - virtual MVT getWidenVectorType(MVT VT) const; + virtual EVT getWidenVectorType(EVT VT) const; typedef std::vector::const_iterator legal_fpimm_iterator; legal_fpimm_iterator legal_fpimm_begin() const { @@ -334,7 +338,7 @@ public: /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. virtual bool isShuffleMaskLegal(const SmallVectorImpl &Mask, - MVT VT) const { + EVT VT) const { return true; } @@ -343,7 +347,7 @@ public: /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. virtual bool isVectorClearMaskLegal(const SmallVectorImpl &Mask, - MVT VT) const { + EVT VT) const { return false; } @@ -351,12 +355,12 @@ public: /// it is legal, needs to be promoted to a larger size, needs to be /// expanded to some other code sequence, or the target has a custom expander /// for it. - LegalizeAction getOperationAction(unsigned Op, MVT VT) const { + LegalizeAction getOperationAction(unsigned Op, EVT VT) const { if (VT.isExtended()) return Expand; assert(Op < array_lengthof(OpActions[0]) && - (unsigned)VT.getSimpleVT() < sizeof(OpActions[0][0])*8 && + (unsigned)VT.getSimpleVT().SimpleTy < sizeof(OpActions[0][0])*8 && "Table isn't big enough!"); - unsigned I = (unsigned) VT.getSimpleVT(); + unsigned I = (unsigned) VT.getSimpleVT().SimpleTy; unsigned J = I & 31; I = I >> 5; return (LegalizeAction)((OpActions[I][Op] >> (J*2) ) & 3); @@ -365,7 +369,7 @@ public: /// isOperationLegalOrCustom - Return true if the specified operation is /// legal on this target or can be made legal with custom lowering. This /// is used to help guide high-level lowering decisions. - bool isOperationLegalOrCustom(unsigned Op, MVT VT) const { + bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { return (VT == MVT::Other || isTypeLegal(VT)) && (getOperationAction(Op, VT) == Legal || getOperationAction(Op, VT) == Custom); @@ -373,7 +377,7 @@ public: /// isOperationLegal - Return true if the specified operation is legal on this /// target. - bool isOperationLegal(unsigned Op, MVT VT) const { + bool isOperationLegal(unsigned Op, EVT VT) const { return (VT == MVT::Other || isTypeLegal(VT)) && getOperationAction(Op, VT) == Legal; } @@ -382,16 +386,17 @@ public: /// either it is legal, needs to be promoted to a larger size, needs to be /// expanded to some other code sequence, or the target has a custom expander /// for it. - LegalizeAction getLoadExtAction(unsigned LType, MVT VT) const { + LegalizeAction getLoadExtAction(unsigned LType, EVT VT) const { assert(LType < array_lengthof(LoadExtActions) && - (unsigned)VT.getSimpleVT() < sizeof(LoadExtActions[0])*4 && + (unsigned)VT.getSimpleVT().SimpleTy < sizeof(LoadExtActions[0])*4 && "Table isn't big enough!"); - return (LegalizeAction)((LoadExtActions[LType] >> (2*VT.getSimpleVT())) & 3); + return (LegalizeAction)((LoadExtActions[LType] >> + (2*VT.getSimpleVT().SimpleTy)) & 3); } /// isLoadExtLegal - Return true if the specified load with extension is legal /// on this target. - bool isLoadExtLegal(unsigned LType, MVT VT) const { + bool isLoadExtLegal(unsigned LType, EVT VT) const { return VT.isSimple() && (getLoadExtAction(LType, VT) == Legal || getLoadExtAction(LType, VT) == Custom); @@ -401,18 +406,20 @@ public: /// treated: either it is legal, needs to be promoted to a larger size, needs /// to be expanded to some other code sequence, or the target has a custom /// expander for it. - LegalizeAction getTruncStoreAction(MVT ValVT, - MVT MemVT) const { - assert((unsigned)ValVT.getSimpleVT() < array_lengthof(TruncStoreActions) && - (unsigned)MemVT.getSimpleVT() < sizeof(TruncStoreActions[0])*4 && + LegalizeAction getTruncStoreAction(EVT ValVT, + EVT MemVT) const { + assert((unsigned)ValVT.getSimpleVT().SimpleTy < + array_lengthof(TruncStoreActions) && + (unsigned)MemVT.getSimpleVT().SimpleTy < + sizeof(TruncStoreActions[0])*4 && "Table isn't big enough!"); - return (LegalizeAction)((TruncStoreActions[ValVT.getSimpleVT()] >> - (2*MemVT.getSimpleVT())) & 3); + return (LegalizeAction)((TruncStoreActions[ValVT.getSimpleVT().SimpleTy] >> + (2*MemVT.getSimpleVT().SimpleTy)) & 3); } /// isTruncStoreLegal - Return true if the specified store with truncation is /// legal on this target. - bool isTruncStoreLegal(MVT ValVT, MVT MemVT) const { + bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { return isTypeLegal(ValVT) && MemVT.isSimple() && (getTruncStoreAction(ValVT, MemVT) == Legal || getTruncStoreAction(ValVT, MemVT) == Custom); @@ -423,16 +430,17 @@ public: /// expanded to some other code sequence, or the target has a custom expander /// for it. LegalizeAction - getIndexedLoadAction(unsigned IdxMode, MVT VT) const { + getIndexedLoadAction(unsigned IdxMode, EVT VT) const { assert( IdxMode < array_lengthof(IndexedModeActions[0][0]) && - ((unsigned)VT.getSimpleVT()) < MVT::LAST_VALUETYPE && + ((unsigned)VT.getSimpleVT().SimpleTy) < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)((IndexedModeActions[(unsigned)VT.getSimpleVT()][0][IdxMode])); + return (LegalizeAction)((IndexedModeActions[ + (unsigned)VT.getSimpleVT().SimpleTy][0][IdxMode])); } /// isIndexedLoadLegal - Return true if the specified indexed load is legal /// on this target. - bool isIndexedLoadLegal(unsigned IdxMode, MVT VT) const { + bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { return VT.isSimple() && (getIndexedLoadAction(IdxMode, VT) == Legal || getIndexedLoadAction(IdxMode, VT) == Custom); @@ -443,16 +451,17 @@ public: /// expanded to some other code sequence, or the target has a custom expander /// for it. LegalizeAction - getIndexedStoreAction(unsigned IdxMode, MVT VT) const { + getIndexedStoreAction(unsigned IdxMode, EVT VT) const { assert(IdxMode < array_lengthof(IndexedModeActions[0][1]) && - (unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE && + (unsigned)VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)((IndexedModeActions[(unsigned)VT.getSimpleVT()][1][IdxMode])); + return (LegalizeAction)((IndexedModeActions[ + (unsigned)VT.getSimpleVT().SimpleTy][1][IdxMode])); } /// isIndexedStoreLegal - Return true if the specified indexed load is legal /// on this target. - bool isIndexedStoreLegal(unsigned IdxMode, MVT VT) const { + bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { return VT.isSimple() && (getIndexedStoreAction(IdxMode, VT) == Legal || getIndexedStoreAction(IdxMode, VT) == Custom); @@ -463,17 +472,19 @@ public: /// expanded to some other code sequence, or the target has a custom expander /// for it. LegalizeAction - getConvertAction(MVT FromVT, MVT ToVT) const { - assert((unsigned)FromVT.getSimpleVT() < array_lengthof(ConvertActions) && - (unsigned)ToVT.getSimpleVT() < sizeof(ConvertActions[0])*4 && + getConvertAction(EVT FromVT, EVT ToVT) const { + assert((unsigned)FromVT.getSimpleVT().SimpleTy < + array_lengthof(ConvertActions) && + (unsigned)ToVT.getSimpleVT().SimpleTy < + sizeof(ConvertActions[0])*4 && "Table isn't big enough!"); - return (LegalizeAction)((ConvertActions[FromVT.getSimpleVT()] >> - (2*ToVT.getSimpleVT())) & 3); + return (LegalizeAction)((ConvertActions[FromVT.getSimpleVT().SimpleTy] >> + (2*ToVT.getSimpleVT().SimpleTy)) & 3); } /// isConvertLegal - Return true if the specified conversion is legal /// on this target. - bool isConvertLegal(MVT FromVT, MVT ToVT) const { + bool isConvertLegal(EVT FromVT, EVT ToVT) const { return isTypeLegal(FromVT) && isTypeLegal(ToVT) && (getConvertAction(FromVT, ToVT) == Legal || getConvertAction(FromVT, ToVT) == Custom); @@ -483,19 +494,19 @@ public: /// either it is legal, needs to be expanded to some other code sequence, /// or the target has a custom expander for it. LegalizeAction - getCondCodeAction(ISD::CondCode CC, MVT VT) const { + getCondCodeAction(ISD::CondCode CC, EVT VT) const { assert((unsigned)CC < array_lengthof(CondCodeActions) && - (unsigned)VT.getSimpleVT() < sizeof(CondCodeActions[0])*4 && + (unsigned)VT.getSimpleVT().SimpleTy < sizeof(CondCodeActions[0])*4 && "Table isn't big enough!"); LegalizeAction Action = (LegalizeAction) - ((CondCodeActions[CC] >> (2*VT.getSimpleVT())) & 3); + ((CondCodeActions[CC] >> (2*VT.getSimpleVT().SimpleTy)) & 3); assert(Action != Promote && "Can't promote condition code!"); return Action; } /// isCondCodeLegal - Return true if the specified condition code is legal /// on this target. - bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { + bool isCondCodeLegal(ISD::CondCode CC, EVT VT) const { return getCondCodeAction(CC, VT) == Legal || getCondCodeAction(CC, VT) == Custom; } @@ -503,22 +514,22 @@ public: /// getTypeToPromoteTo - If the action for this operation is to promote, this /// method returns the ValueType to promote to. - MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { + EVT getTypeToPromoteTo(unsigned Op, EVT VT) const { assert(getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"); // See if this has an explicit type specified. std::map, MVT::SimpleValueType>::const_iterator PTTI = - PromoteToType.find(std::make_pair(Op, VT.getSimpleVT())); + PromoteToType.find(std::make_pair(Op, VT.getSimpleVT().SimpleTy)); if (PTTI != PromoteToType.end()) return PTTI->second; assert((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."); - MVT NVT = VT; + EVT NVT = VT; do { - NVT = (MVT::SimpleValueType)(NVT.getSimpleVT()+1); + NVT = (MVT::SimpleValueType)(NVT.getSimpleVT().SimpleTy+1); assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && "Didn't find type to promote to!"); } while (!isTypeLegal(NVT) || @@ -526,13 +537,13 @@ public: return NVT; } - /// getValueType - Return the MVT corresponding to this LLVM type. + /// getValueType - Return the EVT corresponding to this LLVM type. /// This is fixed by the LLVM operations except for the pointer size. If - /// AllowUnknown is true, this will return MVT::Other for types with no MVT + /// AllowUnknown is true, this will return MVT::Other for types with no EVT /// counterpart (e.g. structs), otherwise it will assert. - MVT getValueType(const Type *Ty, bool AllowUnknown = false) const { - MVT VT = MVT::getMVT(Ty, AllowUnknown); - return VT == MVT::iPTR ? PointerTy : VT; + EVT getValueType(const Type *Ty, bool AllowUnknown = false) const { + EVT VT = EVT::getEVT(Ty, AllowUnknown); + return VT == MVT:: iPTR ? PointerTy : VT; } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate @@ -542,22 +553,31 @@ public: /// getRegisterType - Return the type of registers that this ValueType will /// eventually require. - MVT getRegisterType(MVT VT) const { + EVT getRegisterType(MVT VT) const { + assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); + return RegisterTypeForVT[VT.SimpleTy]; + } + + /// getRegisterType - Return the type of registers that this ValueType will + /// eventually require. + EVT getRegisterType(LLVMContext &Context, EVT VT) const { if (VT.isSimple()) { - assert((unsigned)VT.getSimpleVT() < array_lengthof(RegisterTypeForVT)); - return RegisterTypeForVT[VT.getSimpleVT()]; + assert((unsigned)VT.getSimpleVT().SimpleTy < + array_lengthof(RegisterTypeForVT)); + return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; } if (VT.isVector()) { - MVT VT1, RegisterVT; + EVT VT1, RegisterVT; unsigned NumIntermediates; - (void)getVectorTypeBreakdown(VT, VT1, NumIntermediates, RegisterVT); + (void)getVectorTypeBreakdown(Context, VT, VT1, + NumIntermediates, RegisterVT); return RegisterVT; } if (VT.isInteger()) { - return getRegisterType(getTypeToTransformTo(VT)); + return getRegisterType(Context, getTypeToTransformTo(Context, VT)); } assert(0 && "Unsupported extended type!"); - return MVT(MVT::Other); // Not reached + return EVT(MVT::Other); // Not reached } /// getNumRegisters - Return the number of registers that this ValueType will @@ -566,19 +586,20 @@ public: /// into pieces. For types like i140, which are first promoted then expanded, /// it is the number of registers needed to hold all the bits of the original /// type. For an i140 on a 32 bit machine this means 5 registers. - unsigned getNumRegisters(MVT VT) const { + unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { if (VT.isSimple()) { - assert((unsigned)VT.getSimpleVT() < array_lengthof(NumRegistersForVT)); - return NumRegistersForVT[VT.getSimpleVT()]; + assert((unsigned)VT.getSimpleVT().SimpleTy < + array_lengthof(NumRegistersForVT)); + return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; } if (VT.isVector()) { - MVT VT1, VT2; + EVT VT1, VT2; unsigned NumIntermediates; - return getVectorTypeBreakdown(VT, VT1, NumIntermediates, VT2); + return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); } if (VT.isInteger()) { unsigned BitWidth = VT.getSizeInBits(); - unsigned RegWidth = getRegisterType(VT).getSizeInBits(); + unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); return (BitWidth + RegWidth - 1) / RegWidth; } assert(0 && "Unsupported extended type!"); @@ -588,7 +609,7 @@ public: /// ShouldShrinkFPConstant - If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type /// in order to save space and / or reduce runtime. - virtual bool ShouldShrinkFPConstant(MVT VT) const { return true; } + virtual bool ShouldShrinkFPConstant(EVT VT) const { return true; } /// hasTargetDAGCombine - If true, the target has custom DAG combine /// transformations that it can perform for the specified node. @@ -616,13 +637,13 @@ public: unsigned getMaxStoresPerMemmove() const { return maxStoresPerMemmove; } /// This function returns true if the target allows unaligned memory accesses. - /// This is used, for example, in situations where an array copy/move/set is - /// converted to a sequence of store operations. It's use helps to ensure that - /// such replacements don't generate code that causes an alignment error - /// (trap) on the target machine. + /// of the specified type. This is used, for example, in situations where an + /// array copy/move/set is converted to a sequence of store operations. It's + /// use helps to ensure that such replacements don't generate code that causes + /// an alignment error (trap) on the target machine. /// @brief Determine if the target supports unaligned memory accesses. - bool allowsUnalignedMemoryAccesses() const { - return allowUnalignedMemoryAccesses; + virtual bool allowsUnalignedMemoryAccesses(EVT VT) const { + return false; } /// This function returns true if the target would benefit from code placement @@ -634,9 +655,9 @@ public: /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove lowering. - /// It returns MVT::iAny if SelectionDAG should be responsible for + /// It returns EVT::iAny if SelectionDAG should be responsible for /// determining it. - virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align, + virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align, bool isSrcConst, bool isSrcStr, SelectionDAG &DAG) const { return MVT::iAny; @@ -804,14 +825,17 @@ public: struct DAGCombinerInfo { void *DC; // The DAG Combiner object. bool BeforeLegalize; + bool BeforeLegalizeOps; bool CalledByLegalizer; public: SelectionDAG &DAG; - DAGCombinerInfo(SelectionDAG &dag, bool bl, bool cl, void *dc) - : DC(dc), BeforeLegalize(bl), CalledByLegalizer(cl), DAG(dag) {} + DAGCombinerInfo(SelectionDAG &dag, bool bl, bool blo, bool cl, void *dc) + : DC(dc), BeforeLegalize(bl), BeforeLegalizeOps(blo), + CalledByLegalizer(cl), DAG(dag) {} bool isBeforeLegalize() const { return BeforeLegalize; } + bool isBeforeLegalizeOps() const { return BeforeLegalizeOps; } bool isCalledByLegalizer() const { return CalledByLegalizer; } void AddToWorklist(SDNode *N); @@ -825,7 +849,7 @@ public: /// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. - SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, DebugLoc dl) const; @@ -878,12 +902,6 @@ protected: SchedPreferenceInfo = Pref; } - /// setShiftAmountFlavor - Describe how the target handles out of range shift - /// amounts. - void setShiftAmountFlavor(OutOfRangeShiftAmount OORSA) { - ShiftAmtHandling = OORSA; - } - /// setUseUnderscoreSetJmp - Indicate whether this target prefers to /// use _setjmp to implement llvm.setjmp or the non _ version. /// Defaults to false. @@ -936,10 +954,10 @@ protected: /// addRegisterClass - Add the specified register class as an available /// regclass for the specified value type. This indicates the selector can /// handle values of that class natively. - void addRegisterClass(MVT VT, TargetRegisterClass *RC) { - assert((unsigned)VT.getSimpleVT() < array_lengthof(RegClassForVT)); + void addRegisterClass(EVT VT, TargetRegisterClass *RC) { + assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); AvailableRegClasses.push_back(std::make_pair(VT, RC)); - RegClassForVT[VT.getSimpleVT()] = RC; + RegClassForVT[VT.getSimpleVT().SimpleTy] = RC; } /// computeRegisterProperties - Once all of the register classes are added, @@ -950,9 +968,7 @@ protected: /// with the specified type and indicate what to do about it. void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { - assert((unsigned)VT.getSimpleVT() < sizeof(OpActions[0][0])*8 && - Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); - unsigned I = (unsigned) VT.getSimpleVT(); + unsigned I = (unsigned)VT.SimpleTy; unsigned J = I & 31; I = I >> 5; OpActions[I][Op] &= ~(uint64_t(3UL) << (J*2)); @@ -963,24 +979,22 @@ protected: /// not work with the with specified type and indicate what to do about it. void setLoadExtAction(unsigned ExtType, MVT VT, LegalizeAction Action) { - assert((unsigned)VT.getSimpleVT() < sizeof(LoadExtActions[0])*4 && + assert((unsigned)VT.SimpleTy < sizeof(LoadExtActions[0])*4 && ExtType < array_lengthof(LoadExtActions) && "Table isn't big enough!"); - LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2); - LoadExtActions[ExtType] |= (uint64_t)Action << VT.getSimpleVT()*2; + LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.SimpleTy*2); + LoadExtActions[ExtType] |= (uint64_t)Action << VT.SimpleTy*2; } /// setTruncStoreAction - Indicate that the specified truncating store does /// not work with the with specified type and indicate what to do about it. void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { - assert((unsigned)ValVT.getSimpleVT() < array_lengthof(TruncStoreActions) && - (unsigned)MemVT.getSimpleVT() < sizeof(TruncStoreActions[0])*4 && + assert((unsigned)ValVT.SimpleTy < array_lengthof(TruncStoreActions) && + (unsigned)MemVT.SimpleTy < sizeof(TruncStoreActions[0])*4 && "Table isn't big enough!"); - TruncStoreActions[ValVT.getSimpleVT()] &= ~(uint64_t(3UL) << - MemVT.getSimpleVT()*2); - TruncStoreActions[ValVT.getSimpleVT()] |= (uint64_t)Action << - MemVT.getSimpleVT()*2; + TruncStoreActions[ValVT.SimpleTy] &= ~(uint64_t(3UL) << MemVT.SimpleTy*2); + TruncStoreActions[ValVT.SimpleTy] |= (uint64_t)Action << MemVT.SimpleTy*2; } /// setIndexedLoadAction - Indicate that the specified indexed load does or @@ -989,10 +1003,10 @@ protected: /// TargetLowering.cpp void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { - assert((unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE && + assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE && IdxMode < array_lengthof(IndexedModeActions[0][0]) && "Table isn't big enough!"); - IndexedModeActions[(unsigned)VT.getSimpleVT()][0][IdxMode] = (uint8_t)Action; + IndexedModeActions[(unsigned)VT.SimpleTy][0][IdxMode] = (uint8_t)Action; } /// setIndexedStoreAction - Indicate that the specified indexed store does or @@ -1001,33 +1015,32 @@ protected: /// TargetLowering.cpp void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { - assert((unsigned)VT.getSimpleVT() < MVT::LAST_VALUETYPE && + assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE && IdxMode < array_lengthof(IndexedModeActions[0][1] ) && "Table isn't big enough!"); - IndexedModeActions[(unsigned)VT.getSimpleVT()][1][IdxMode] = (uint8_t)Action; + IndexedModeActions[(unsigned)VT.SimpleTy][1][IdxMode] = (uint8_t)Action; } /// setConvertAction - Indicate that the specified conversion does or does /// not work with the with specified type and indicate what to do about it. void setConvertAction(MVT FromVT, MVT ToVT, LegalizeAction Action) { - assert((unsigned)FromVT.getSimpleVT() < array_lengthof(ConvertActions) && - (unsigned)ToVT.getSimpleVT() < sizeof(ConvertActions[0])*4 && + assert((unsigned)FromVT.SimpleTy < array_lengthof(ConvertActions) && + (unsigned)ToVT.SimpleTy < sizeof(ConvertActions[0])*4 && "Table isn't big enough!"); - ConvertActions[FromVT.getSimpleVT()] &= ~(uint64_t(3UL) << - ToVT.getSimpleVT()*2); - ConvertActions[FromVT.getSimpleVT()] |= (uint64_t)Action << - ToVT.getSimpleVT()*2; + ConvertActions[FromVT.SimpleTy] &= ~(uint64_t(3UL) << ToVT.SimpleTy*2); + ConvertActions[FromVT.SimpleTy] |= (uint64_t)Action << ToVT.SimpleTy*2; } /// setCondCodeAction - Indicate that the specified condition code is or isn't /// supported on the target and indicate what to do about it. - void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action) { - assert((unsigned)VT.getSimpleVT() < sizeof(CondCodeActions[0])*4 && + void setCondCodeAction(ISD::CondCode CC, MVT VT, + LegalizeAction Action) { + assert((unsigned)VT.SimpleTy < sizeof(CondCodeActions[0])*4 && (unsigned)CC < array_lengthof(CondCodeActions) && "Table isn't big enough!"); - CondCodeActions[(unsigned)CC] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2); - CondCodeActions[(unsigned)CC] |= (uint64_t)Action << VT.getSimpleVT()*2; + CondCodeActions[(unsigned)CC] &= ~(uint64_t(3UL) << VT.SimpleTy*2); + CondCodeActions[(unsigned)CC] |= (uint64_t)Action << VT.SimpleTy*2; } /// AddPromotedToType - If Opc/OrigVT is specified as being promoted, the @@ -1035,8 +1048,7 @@ protected: /// one that works. If that default is insufficient, this method can be used /// by the target to override the default. void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { - PromoteToType[std::make_pair(Opc, OrigVT.getSimpleVT())] = - DestVT.getSimpleVT(); + PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; } /// addLegalFPImmediate - Indicate that this target can instruction select @@ -1090,21 +1102,33 @@ public: assert(0 && "Not Implemented"); return NULL; // this is here to silence compiler errors } + //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGLowering code knows how to lower these. // - /// LowerArguments - This hook must be implemented to indicate how we should - /// lower the arguments for the specified function, into the specified DAG. - virtual void - LowerArguments(Function &F, SelectionDAG &DAG, - SmallVectorImpl& ArgValues, DebugLoc dl); + /// LowerFormalArguments - This hook must be implemented to lower the + /// incoming (formal) arguments, described by the Ins array, into the + /// specified DAG. The implementation should fill in the InVals array + /// with legal-type argument values, and return the resulting token + /// chain value. + /// + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + assert(0 && "Not Implemented"); + return SDValue(); // this is here to silence compiler errors + } - /// LowerCallTo - This hook lowers an abstract call to a function into an + /// LowerCallTo - This function lowers an abstract call to a function into an /// actual call. This returns a pair of operands. The first element is the /// return value for the function (if RetTy is not VoidTy). The second - /// element is the outgoing token chain. + /// element is the outgoing token chain. It calls LowerCall to do the actual + /// lowering. struct ArgListEntry { SDValue Node; const Type* Ty; @@ -1120,11 +1144,48 @@ public: isSRet(false), isNest(false), isByVal(false), Alignment(0) { } }; typedef std::vector ArgListTy; - virtual std::pair + std::pair LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, - unsigned CallingConv, bool isTailCall, SDValue Callee, - ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl); + CallingConv::ID CallConv, bool isTailCall, + bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, + SelectionDAG &DAG, DebugLoc dl); + + /// LowerCall - This hook must be implemented to lower calls into the + /// the specified DAG. The outgoing arguments to the call are described + /// by the Outs array, and the values to be returned by the call are + /// described by the Ins array. The implementation should fill in the + /// InVals array with legal-type return values from the call, and return + /// the resulting token chain value. + /// + /// The isTailCall flag here is normative. If it is true, the + /// implementation must emit a tail call. The + /// IsEligibleForTailCallOptimization hook should be used to catch + /// cases that cannot be handled. + /// + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + assert(0 && "Not Implemented"); + return SDValue(); // this is here to silence compiler errors + } + + /// LowerReturn - This hook must be implemented to lower outgoing + /// return values, described by the Outs array, into the specified + /// DAG. The implementation should return the resulting token chain + /// value. + /// + virtual SDValue + LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + assert(0 && "Not Implemented"); + return SDValue(); // this is here to silence compiler errors + } /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a /// memcpy. This can be used by targets to provide code sequences for cases @@ -1220,19 +1281,17 @@ public: /// IsEligibleForTailCallOptimization - Check whether the call is eligible for /// tail call optimization. Targets which want to do tail call optimization - /// should override this function. - virtual bool IsEligibleForTailCallOptimization(CallSDNode *Call, - SDValue Ret, - SelectionDAG &DAG) const { + /// should override this function. + virtual bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl &Ins, + SelectionDAG& DAG) const { + // Conservative default: no calls are eligible. return false; } - /// CheckTailCallReturnConstraints - Check whether CALL node immediatly - /// preceeds the RET node and whether the return uses the result of the node - /// or is a void return. This function can be used by the target to determine - /// eligiblity of tail call optimization. - static bool CheckTailCallReturnConstraints(CallSDNode *TheCall, SDValue Ret); - /// GetPossiblePreceedingTailCall - Get preceeding TailCallNodeOpCode node if /// it exists. Skip a possible ISD::TokenFactor. static SDValue GetPossiblePreceedingTailCall(SDValue Chain, @@ -1270,6 +1329,14 @@ public: // Inline Asm Support hooks // + /// ExpandInlineAsm - This hook allows the target to expand an inline asm + /// call to be explicit llvm code if it wants to. This is useful for + /// turning simple inline asms into LLVM intrinsics, which gives the + /// compiler more information about the behavior of the code. + virtual bool ExpandInlineAsm(CallInst *CI) const { + return false; + } + enum ConstraintType { C_Register, // Constraint represents specific register(s). C_RegisterClass, // Constraint represents any of register(s) in class. @@ -1296,7 +1363,7 @@ public: Value *CallOperandVal; /// ConstraintVT - The ValueType for the operand value. - MVT ConstraintVT; + EVT ConstraintVT; /// isMatchingInputConstraint - Return true of this is an input operand that /// is a matching constraint like "4". @@ -1333,7 +1400,7 @@ public: /// This should only be used for C_RegisterClass constraints. virtual std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; /// getRegForInlineAsmConstraint - Given a physical register constraint (e.g. /// {edx}), return the register number and the register class for the @@ -1347,13 +1414,13 @@ public: /// this returns a register number of 0 and a null register class pointer.. virtual std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; /// LowerXConstraint - try to replace an X constraint, which matches anything, /// with another that has more specific requirements based on the type of the /// corresponding operand. This returns null if there is no replacement to /// make. - virtual const char *LowerXConstraint(MVT ConstraintVT) const; + virtual const char *LowerXConstraint(EVT ConstraintVT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is true @@ -1373,8 +1440,12 @@ public: // instructions are special in various ways, which require special support to // insert. The specified MachineInstr is created but not inserted into any // basic blocks, and the scheduler passes ownership of it to this method. + // When new basic blocks are inserted and the edges from MBB to its successors + // are modified, the method should insert pairs of into the + // DenseMap. virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap *EM) const; //===--------------------------------------------------------------------===// // Addressing mode description hooks (used by LSR etc). @@ -1410,7 +1481,7 @@ public: return false; } - virtual bool isTruncateFree(MVT VT1, MVT VT2) const { + virtual bool isTruncateFree(EVT VT1, EVT VT2) const { return false; } @@ -1426,14 +1497,14 @@ public: return false; } - virtual bool isZExtFree(MVT VT1, MVT VT2) const { + virtual bool isZExtFree(EVT VT1, EVT VT2) const { return false; } /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. - virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const { + virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const { return false; } @@ -1474,9 +1545,22 @@ public: return CmpLibcallCCs[Call]; } + /// setLibcallCallingConv - Set the CallingConv that should be used for the + /// specified libcall. + void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { + LibcallCallingConvs[Call] = CC; + } + + /// getLibcallCallingConv - Get the CallingConv that should be used for the + /// specified libcall. + CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { + return LibcallCallingConvs[Call]; + } + private: TargetMachine &TM; const TargetData *TD; + TargetLoweringObjectFile &TLOF; /// PointerTy - The type to use for pointers, usually i32 or i64. /// @@ -1517,8 +1601,6 @@ private: /// PointerTy is. MVT ShiftAmountTy; - OutOfRangeShiftAmount ShiftAmtHandling; - /// BooleanContents - Information about the contents of the high-bits in /// boolean values held in a type wider than i1. See getBooleanContents. BooleanContent BooleanContents; @@ -1565,14 +1647,14 @@ private: /// each ValueType the target supports natively. TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; - MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; + EVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; /// TransformToType - For any value types we are promoting or expanding, this /// contains the value type that we are changing to. For Expanded types, this /// contains one step of the expand (e.g. i64 -> i32), even if there are /// multiple steps required (e.g. i64 -> i16). For types natively supported /// by the system, this holds the same type (e.g. i32 -> i32). - MVT TransformToType[MVT::LAST_VALUETYPE]; + EVT TransformToType[MVT::LAST_VALUETYPE]; /// OpActions - For each operation and each value type, keep a LegalizeAction /// that indicates how instruction selection should deal with the operation. @@ -1616,7 +1698,7 @@ private: std::vector LegalFPImmediates; - std::vector > AvailableRegClasses; + std::vector > AvailableRegClasses; /// TargetDAGCombineArray - Targets can specify ISD nodes that they would /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(), @@ -1641,6 +1723,10 @@ private: /// of each of the comparison libcall against zero. ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; + /// LibcallCallingConvs - Stores the CallingConv that should be used for each + /// libcall. + CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; + protected: /// When lowering \@llvm.memset this field specifies the maximum number of /// store operations that may be substituted for the call to memset. Targets @@ -1676,12 +1762,6 @@ protected: /// @brief Specify maximum bytes of store instructions per memmove call. unsigned maxStoresPerMemmove; - /// This field specifies whether the target machine permits unaligned memory - /// accesses. This is used, for example, to determine the size of store - /// operations when copying small arrays and other similar tasks. - /// @brief Indicate whether the target permits unaligned memory accesses. - bool allowUnalignedMemoryAccesses; - /// This field specifies whether the target can benefit from code placement /// optimization. bool benefitFromCodePlacementOpt; diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h new file mode 100644 index 0000000000000..821e53783c617 --- /dev/null +++ b/include/llvm/Target/TargetLoweringObjectFile.h @@ -0,0 +1,361 @@ +//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H +#define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H + +#include "llvm/MC/SectionKind.h" + +namespace llvm { + class MachineModuleInfo; + class Mangler; + class MCAsmInfo; + class MCExpr; + class MCSection; + class MCSectionMachO; + class MCContext; + class GlobalValue; + class StringRef; + class TargetMachine; + +class TargetLoweringObjectFile { + MCContext *Ctx; + + TargetLoweringObjectFile(const TargetLoweringObjectFile&); // DO NOT IMPLEMENT + void operator=(const TargetLoweringObjectFile&); // DO NOT IMPLEMENT +protected: + + TargetLoweringObjectFile(); + + /// TextSection - Section directive for standard text. + /// + const MCSection *TextSection; + + /// DataSection - Section directive for standard data. + /// + const MCSection *DataSection; + + /// BSSSection - Section that is default initialized to zero. + const MCSection *BSSSection; + + /// ReadOnlySection - Section that is readonly and can contain arbitrary + /// initialized data. Targets are not required to have a readonly section. + /// If they don't, various bits of code will fall back to using the data + /// section for constants. + const MCSection *ReadOnlySection; + + /// StaticCtorSection - This section contains the static constructor pointer + /// list. + const MCSection *StaticCtorSection; + + /// StaticDtorSection - This section contains the static destructor pointer + /// list. + const MCSection *StaticDtorSection; + + /// LSDASection - If exception handling is supported by the target, this is + /// the section the Language Specific Data Area information is emitted to. + const MCSection *LSDASection; + + /// EHFrameSection - If exception handling is supported by the target, this is + /// the section the EH Frame is emitted to. + const MCSection *EHFrameSection; + + // Dwarf sections for debug info. If a target supports debug info, these must + // be set. + const MCSection *DwarfAbbrevSection; + const MCSection *DwarfInfoSection; + const MCSection *DwarfLineSection; + const MCSection *DwarfFrameSection; + const MCSection *DwarfPubNamesSection; + const MCSection *DwarfPubTypesSection; + const MCSection *DwarfDebugInlineSection; + const MCSection *DwarfStrSection; + const MCSection *DwarfLocSection; + const MCSection *DwarfARangesSection; + const MCSection *DwarfRangesSection; + const MCSection *DwarfMacroInfoSection; + +public: + + MCContext &getContext() const { return *Ctx; } + + + virtual ~TargetLoweringObjectFile(); + + /// Initialize - this method must be called before any actual lowering is + /// done. This specifies the current context for codegen, and gives the + /// lowering implementations a chance to set up their default sections. + virtual void Initialize(MCContext &ctx, const TargetMachine &TM) { + Ctx = &ctx; + } + + + const MCSection *getTextSection() const { return TextSection; } + const MCSection *getDataSection() const { return DataSection; } + const MCSection *getBSSSection() const { return BSSSection; } + const MCSection *getStaticCtorSection() const { return StaticCtorSection; } + const MCSection *getStaticDtorSection() const { return StaticDtorSection; } + const MCSection *getLSDASection() const { return LSDASection; } + const MCSection *getEHFrameSection() const { return EHFrameSection; } + const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } + const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; } + const MCSection *getDwarfLineSection() const { return DwarfLineSection; } + const MCSection *getDwarfFrameSection() const { return DwarfFrameSection; } + const MCSection *getDwarfPubNamesSection() const{return DwarfPubNamesSection;} + const MCSection *getDwarfPubTypesSection() const{return DwarfPubTypesSection;} + const MCSection *getDwarfDebugInlineSection() const { + return DwarfDebugInlineSection; + } + const MCSection *getDwarfStrSection() const { return DwarfStrSection; } + const MCSection *getDwarfLocSection() const { return DwarfLocSection; } + const MCSection *getDwarfARangesSection() const { return DwarfARangesSection;} + const MCSection *getDwarfRangesSection() const { return DwarfRangesSection; } + const MCSection *getDwarfMacroInfoSection() const { + return DwarfMacroInfoSection; + } + + /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively + /// decide not to emit the UsedDirective for some symbols in llvm.used. + /// FIXME: REMOVE this (rdar://7071300) + virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV, + Mangler *) const { + return GV != 0; + } + + /// getSectionForConstant - Given a constant with the SectionKind, return a + /// section that it should be placed in. + virtual const MCSection *getSectionForConstant(SectionKind Kind) const; + + /// getKindForGlobal - Classify the specified global variable into a set of + /// target independent categories embodied in SectionKind. + static SectionKind getKindForGlobal(const GlobalValue *GV, + const TargetMachine &TM); + + /// SectionForGlobal - This method computes the appropriate section to emit + /// the specified global variable or function definition. This should not + /// be passed external (or available externally) globals. + const MCSection *SectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler *Mang, + const TargetMachine &TM) const; + + /// SectionForGlobal - This method computes the appropriate section to emit + /// the specified global variable or function definition. This should not + /// be passed external (or available externally) globals. + const MCSection *SectionForGlobal(const GlobalValue *GV, + Mangler *Mang, + const TargetMachine &TM) const { + return SectionForGlobal(GV, getKindForGlobal(GV, TM), Mang, TM); + } + + + + /// getExplicitSectionGlobal - Targets should implement this method to assign + /// a section to globals with an explicit section specfied. The + /// implementation of this method can assume that GV->hasSection() is true. + virtual const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const = 0; + + /// getSpecialCasedSectionGlobals - Allow the target to completely override + /// section assignment of a global. + virtual const MCSection * + getSpecialCasedSectionGlobals(const GlobalValue *GV, Mangler *Mang, + SectionKind Kind) const { + return 0; + } + + /// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a + /// pc-relative reference to the specified global variable from exception + /// handling information. In addition to the symbol, this returns + /// by-reference: + /// + /// IsIndirect - True if the returned symbol is actually a stub that contains + /// the address of the symbol, false if the symbol is the global itself. + /// + /// IsPCRel - True if the symbol reference is already pc-relative, false if + /// the caller needs to subtract off the address of the reference from the + /// symbol. + /// + virtual const MCExpr * + getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const; + +protected: + virtual const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; +}; + + + + +class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { + mutable void *UniquingMap; +protected: + /// TLSDataSection - Section directive for Thread Local data. + /// + const MCSection *TLSDataSection; // Defaults to ".tdata". + + /// TLSBSSSection - Section directive for Thread Local uninitialized data. + /// Null if this target doesn't support a BSS section. + /// + const MCSection *TLSBSSSection; // Defaults to ".tbss". + + const MCSection *DataRelSection; + const MCSection *DataRelLocalSection; + const MCSection *DataRelROSection; + const MCSection *DataRelROLocalSection; + + const MCSection *MergeableConst4Section; + const MCSection *MergeableConst8Section; + const MCSection *MergeableConst16Section; + +protected: + const MCSection *getELFSection(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind, + bool IsExplicit = false) const; +public: + TargetLoweringObjectFileELF() : UniquingMap(0) {} + ~TargetLoweringObjectFileELF(); + + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + /// getSectionForConstant - Given a constant with the SectionKind, return a + /// section that it should be placed in. + virtual const MCSection *getSectionForConstant(SectionKind Kind) const; + + + virtual const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + + virtual const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; +}; + + + +class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile { + mutable void *UniquingMap; + + const MCSection *CStringSection; + const MCSection *UStringSection; + const MCSection *TextCoalSection; + const MCSection *ConstTextCoalSection; + const MCSection *ConstDataCoalSection; + const MCSection *ConstDataSection; + const MCSection *DataCoalSection; + const MCSection *FourByteConstantSection; + const MCSection *EightByteConstantSection; + const MCSection *SixteenByteConstantSection; + + const MCSection *LazySymbolPointerSection; + const MCSection *NonLazySymbolPointerSection; +public: + TargetLoweringObjectFileMachO() : UniquingMap(0) {} + ~TargetLoweringObjectFileMachO(); + + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + virtual const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + + virtual const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + + virtual const MCSection *getSectionForConstant(SectionKind Kind) const; + + /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively + /// decide not to emit the UsedDirective for some symbols in llvm.used. + /// FIXME: REMOVE this (rdar://7071300) + virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV, + Mangler *) const; + + /// getMachOSection - Return the MCSection for the specified mach-o section. + /// This requires the operands to be valid. + const MCSectionMachO *getMachOSection(const StringRef &Segment, + const StringRef &Section, + unsigned TypeAndAttributes, + SectionKind K) const { + return getMachOSection(Segment, Section, TypeAndAttributes, 0, K); + } + const MCSectionMachO *getMachOSection(const StringRef &Segment, + const StringRef &Section, + unsigned TypeAndAttributes, + unsigned Reserved2, + SectionKind K) const; + + /// getTextCoalSection - Return the "__TEXT,__textcoal_nt" section we put weak + /// text symbols into. + const MCSection *getTextCoalSection() const { + return TextCoalSection; + } + + /// getConstTextCoalSection - Return the "__TEXT,__const_coal" section + /// we put weak read-only symbols into. + const MCSection *getConstTextCoalSection() const { + return ConstTextCoalSection; + } + + /// getLazySymbolPointerSection - Return the section corresponding to + /// the .lazy_symbol_pointer directive. + const MCSection *getLazySymbolPointerSection() const { + return LazySymbolPointerSection; + } + + /// getNonLazySymbolPointerSection - Return the section corresponding to + /// the .non_lazy_symbol_pointer directive. + const MCSection *getNonLazySymbolPointerSection() const { + return NonLazySymbolPointerSection; + } + + /// getSymbolForDwarfGlobalReference - The mach-o version of this method + /// defaults to returning a stub reference. + virtual const MCExpr * + getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const; +}; + + + +class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile { + mutable void *UniquingMap; +public: + TargetLoweringObjectFileCOFF() : UniquingMap(0) {} + ~TargetLoweringObjectFileCOFF(); + + virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); + + virtual const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + + virtual const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + + /// getCOFFSection - Return the MCSection for the specified COFF section. + /// FIXME: Switch this to a semantic view eventually. + const MCSection *getCOFFSection(const char *Name, bool isDirective, + SectionKind K) const; +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 33fc45161a6ea..92b648cbb0a94 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -16,10 +16,12 @@ #include "llvm/Target/TargetInstrItineraries.h" #include +#include namespace llvm { -class TargetAsmInfo; +class Target; +class MCAsmInfo; class TargetData; class TargetSubtarget; class TargetInstrInfo; @@ -29,14 +31,14 @@ class TargetLowering; class TargetFrameInfo; class MachineCodeEmitter; class JITCodeEmitter; +class ObjectCodeEmitter; class TargetRegisterInfo; -class Module; class PassManagerBase; class PassManager; class Pass; class TargetMachOWriterInfo; class TargetELFWriterInfo; -class raw_ostream; +class formatted_raw_ostream; // Relocation model types. namespace Reloc { @@ -79,15 +81,6 @@ namespace CodeGenOpt { } -// Possible float ABI settings. Used with FloatABIType in TargetOptions.h. -namespace FloatABI { - enum ABIType { - Default, // Target-specific (either soft of hard depending on triple, etc). - Soft, // Soft float. - Hard // Hard float. - }; -} - //===----------------------------------------------------------------------===// /// /// TargetMachine - Primary interface to the complete machine description for @@ -98,35 +91,23 @@ class TargetMachine { TargetMachine(const TargetMachine &); // DO NOT IMPLEMENT void operator=(const TargetMachine &); // DO NOT IMPLEMENT protected: // Can only create subclasses. - TargetMachine(); + TargetMachine(const Target &); /// getSubtargetImpl - virtual method implemented by subclasses that returns /// a reference to that target's TargetSubtarget-derived member variable. virtual const TargetSubtarget *getSubtargetImpl() const { return 0; } + + /// TheTarget - The Target that this machine was created for. + const Target &TheTarget; /// AsmInfo - Contains target specific asm information. /// - mutable const TargetAsmInfo *AsmInfo; + const MCAsmInfo *AsmInfo; - /// createTargetAsmInfo - Create a new instance of target specific asm - /// information. - virtual const TargetAsmInfo *createTargetAsmInfo() const { return 0; } - public: virtual ~TargetMachine(); - /// getModuleMatchQuality - This static method should be implemented by - /// targets to indicate how closely they match the specified module. This is - /// used by the LLC tool to determine which target to use when an explicit - /// -march option is not specified. If a target returns zero, it will never - /// be chosen without an explicit -march option. - static unsigned getModuleMatchQuality(const Module &) { return 0; } - - /// getJITMatchQuality - This static method should be implemented by targets - /// that provide JIT capabilities to indicate how suitable they are for - /// execution on the current host. If a value of 0 is returned, the target - /// will not be used unless an explicit -march option is used. - static unsigned getJITMatchQuality() { return 0; } + const Target &getTarget() const { return TheTarget; } // Interfaces to the major aspects of target machine information: // -- Instruction opcode and operand information @@ -139,12 +120,9 @@ public: virtual TargetLowering *getTargetLowering() const { return 0; } virtual const TargetData *getTargetData() const { return 0; } - /// getTargetAsmInfo - Return target specific asm information. + /// getMCAsmInfo - Return target specific asm information. /// - const TargetAsmInfo *getTargetAsmInfo() const { - if (!AsmInfo) AsmInfo = createTargetAsmInfo(); - return AsmInfo; - } + const MCAsmInfo *getMCAsmInfo() const { return AsmInfo; } /// getSubtarget - This method returns a pointer to the specified type of /// TargetSubtarget. In debug builds, it verifies that the object being @@ -225,13 +203,12 @@ public: /// addPassesToEmitFile - Add passes to the specified pass manager to get the /// specified file emitted. Typically this will involve several steps of code - /// generation. If Fast is set to true, the code generator should emit code - /// as fast as possible, though the generated code may be less efficient. + /// generation. /// This method should return FileModel::Error if emission of this file type /// is not supported. /// virtual FileModel::Model addPassesToEmitFile(PassManagerBase &, - raw_ostream &, + formatted_raw_ostream &, CodeGenFileType, CodeGenOpt::Level) { return FileModel::None; @@ -257,6 +234,16 @@ public: return true; } + /// addPassesToEmitFileFinish - If the passes to emit the specified file had + /// to be split up (e.g., to add an object writer pass), this method can be + /// used to finish up adding passes to emit the file, if necessary. + /// + virtual bool addPassesToEmitFileFinish(PassManagerBase &, + ObjectCodeEmitter *, + CodeGenOpt::Level) { + return true; + } + /// addPassesToEmitMachineCode - Add passes to the specified pass manager to /// get machine code emitted. This uses a MachineCodeEmitter object to handle /// actually outputting the machine code and resolving things like the address @@ -285,7 +272,7 @@ public: /// require having the entire module at once. This is not recommended, do not /// use this. virtual bool WantsWholeFile() const { return false; } - virtual bool addPassesToEmitWholeFile(PassManager &, raw_ostream &, + virtual bool addPassesToEmitWholeFile(PassManager &, formatted_raw_ostream &, CodeGenFileType, CodeGenOpt::Level) { return true; @@ -297,8 +284,8 @@ public: /// class LLVMTargetMachine : public TargetMachine { protected: // Can only create subclasses. - LLVMTargetMachine() { } - + LLVMTargetMachine(const Target &T, const std::string &TargetTriple); + /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for /// both emitting to assembly files or machine code output. /// @@ -318,7 +305,7 @@ public: /// target-specific passes in standard locations. /// virtual FileModel::Model addPassesToEmitFile(PassManagerBase &PM, - raw_ostream &Out, + formatted_raw_ostream &Out, CodeGenFileType FileType, CodeGenOpt::Level); @@ -335,7 +322,15 @@ public: /// used to finish up adding passes to emit the file, if necessary. /// virtual bool addPassesToEmitFileFinish(PassManagerBase &PM, - JITCodeEmitter *MCE, + JITCodeEmitter *JCE, + CodeGenOpt::Level); + + /// addPassesToEmitFileFinish - If the passes to emit the specified file had + /// to be split up (e.g., to add an object writer pass), this method can be + /// used to finish up adding passes to emit the file, if necessary. + /// + virtual bool addPassesToEmitFileFinish(PassManagerBase &PM, + ObjectCodeEmitter *OCE, CodeGenOpt::Level); /// addPassesToEmitMachineCode - Add passes to the specified pass manager to @@ -367,20 +362,28 @@ public: return true; } - /// addPreRegAllocPasses - This method may be implemented by targets that want - /// to run passes immediately before register allocation. This should return + /// addPreRegAlloc - This method may be implemented by targets that want to + /// run passes immediately before register allocation. This should return /// true if -print-machineinstrs should print after these passes. virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level) { return false; } - /// addPostRegAllocPasses - This method may be implemented by targets that - /// want to run passes after register allocation but before prolog-epilog + /// addPostRegAlloc - This method may be implemented by targets that want + /// to run passes after register allocation but before prolog-epilog /// insertion. This should return true if -print-machineinstrs should print /// after these passes. virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level) { return false; } + + /// addPreSched2 - This method may be implemented by targets that want to + /// run passes after prolog-epilog insertion and before the second instruction + /// scheduling pass. This should return true if -print-machineinstrs should + /// print after these passes. + virtual bool addPreSched2(PassManagerBase &, CodeGenOpt::Level) { + return false; + } /// addPreEmitPass - This pass may be implemented by targets that want to run /// passes immediately before machine code is emitted. This should return @@ -390,51 +393,57 @@ public: } - /// addAssemblyEmitter - This pass should be overridden by the target to add - /// the asmprinter, if asm emission is supported. If this is not supported, - /// 'true' should be returned. - virtual bool addAssemblyEmitter(PassManagerBase &, CodeGenOpt::Level, - bool /* VerboseAsmDefault */, raw_ostream &) { - return true; - } - /// addCodeEmitter - This pass should be overridden by the target to add a /// code emitter, if supported. If this is not supported, 'true' should be - /// returned. If DumpAsm is true, the generated assembly is printed to cerr. + /// returned. virtual bool addCodeEmitter(PassManagerBase &, CodeGenOpt::Level, - bool /*DumpAsm*/, MachineCodeEmitter &) { + MachineCodeEmitter &) { return true; } /// addCodeEmitter - This pass should be overridden by the target to add a /// code emitter, if supported. If this is not supported, 'true' should be - /// returned. If DumpAsm is true, the generated assembly is printed to cerr. + /// returned. virtual bool addCodeEmitter(PassManagerBase &, CodeGenOpt::Level, - bool /*DumpAsm*/, JITCodeEmitter &) { + JITCodeEmitter &) { return true; } /// addSimpleCodeEmitter - This pass should be overridden by the target to add /// a code emitter (without setting flags), if supported. If this is not - /// supported, 'true' should be returned. If DumpAsm is true, the generated - /// assembly is printed to cerr. + /// supported, 'true' should be returned. virtual bool addSimpleCodeEmitter(PassManagerBase &, CodeGenOpt::Level, - bool /*DumpAsm*/, MachineCodeEmitter &) { + MachineCodeEmitter &) { return true; } /// addSimpleCodeEmitter - This pass should be overridden by the target to add /// a code emitter (without setting flags), if supported. If this is not - /// supported, 'true' should be returned. If DumpAsm is true, the generated - /// assembly is printed to cerr. + /// supported, 'true' should be returned. virtual bool addSimpleCodeEmitter(PassManagerBase &, CodeGenOpt::Level, - bool /*DumpAsm*/, JITCodeEmitter &) { + JITCodeEmitter &) { + return true; + } + + /// addSimpleCodeEmitter - This pass should be overridden by the target to add + /// a code emitter (without setting flags), if supported. If this is not + /// supported, 'true' should be returned. + virtual bool addSimpleCodeEmitter(PassManagerBase &, CodeGenOpt::Level, + ObjectCodeEmitter &) { return true; } /// getEnableTailMergeDefault - the default setting for -enable-tail-merge /// on this target. User flag overrides. virtual bool getEnableTailMergeDefault() const { return true; } + + /// addAssemblyEmitter - Helper function which creates a target specific + /// assembly printer, if available. + /// + /// \return Returns 'false' on success. + bool addAssemblyEmitter(PassManagerBase &, CodeGenOpt::Level, + bool /* VerboseAsmDefault */, + formatted_raw_ostream &); }; } // End llvm namespace diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 377e03f95c472..8d52dadc285f2 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -16,6 +16,15 @@ #define LLVM_TARGET_TARGETOPTIONS_H namespace llvm { + // Possible float ABI settings. Used with FloatABIType in TargetOptions.h. + namespace FloatABI { + enum ABIType { + Default, // Target-specific (either soft of hard depending on triple, etc). + Soft, // Soft float. + Hard // Hard float. + }; + } + /// PrintMachineCode - This flag is enabled when the -print-machineinstrs /// option is specified on the command line, and should enable debugging /// output from the code generator. @@ -85,10 +94,23 @@ namespace llvm { /// .bss section. This flag disables such behaviour (necessary, e.g. for /// crt*.o compiling). extern bool NoZerosInBSS; - - /// ExceptionHandling - This flag indicates that exception information should - /// be emitted. - extern bool ExceptionHandling; + + /// DwarfExceptionHandling - This flag indicates that Dwarf exception + /// information should be emitted. + extern bool DwarfExceptionHandling; + + /// SjLjExceptionHandling - This flag indicates that SJLJ exception + /// information should be emitted. + extern bool SjLjExceptionHandling; + + /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit + /// debug information and notify a debugger about it. + extern bool JITEmitDebugInfo; + + /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write + /// the object files generated by the JITEmitDebugInfo flag to disk. This + /// flag is hidden and is only for debugging the debug info. + extern bool JITEmitDebugInfoToDisk; /// UnwindTablesMandatory - This flag indicates that unwind tables should /// be emitted for all functions. diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 91e8f80fd108d..e90fc6cccc3d4 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -41,7 +41,6 @@ class RegScavenger; /// of AX. /// struct TargetRegisterDesc { - const char *AsmName; // Assembly language name for the register const char *Name; // Printable name for the reg (for debugging) const unsigned *AliasSet; // Register Alias Set, described above const unsigned *SubRegs; // Sub-register set, described above @@ -53,7 +52,7 @@ public: typedef const unsigned* iterator; typedef const unsigned* const_iterator; - typedef const MVT* vt_iterator; + typedef const EVT* vt_iterator; typedef const TargetRegisterClass* const * sc_iterator; private: unsigned ID; @@ -70,7 +69,7 @@ private: public: TargetRegisterClass(unsigned id, const char *name, - const MVT *vts, + const EVT *vts, const TargetRegisterClass * const *subcs, const TargetRegisterClass * const *supcs, const TargetRegisterClass * const *subregcs, @@ -84,7 +83,7 @@ public: RegSet.insert(*I); } virtual ~TargetRegisterClass() {} // Allow subclasses - + /// getID() - Return the register class ID number. /// unsigned getID() const { return ID; } @@ -117,13 +116,13 @@ public: /// hasType - return true if this TargetRegisterClass has the ValueType vt. /// - bool hasType(MVT vt) const { - for(int i = 0; VTs[i] != MVT::Other; ++i) + bool hasType(EVT vt) const { + for(int i = 0; VTs[i].getSimpleVT().SimpleTy != MVT::Other; ++i) if (VTs[i] == vt) return true; return false; } - + /// vt_begin / vt_end - Loop over all of the value types that can be /// represented by values in this register class. vt_iterator vt_begin() const { @@ -132,7 +131,7 @@ public: vt_iterator vt_end() const { vt_iterator I = VTs; - while (*I != MVT::Other) ++I; + while (I->getSimpleVT().SimpleTy != MVT::Other) ++I; return I; } @@ -173,7 +172,7 @@ public: /// hasSubClass - return true if the the specified TargetRegisterClass /// is a proper subset of this TargetRegisterClass. bool hasSubClass(const TargetRegisterClass *cs) const { - for (int i = 0; SubClasses[i] != NULL; ++i) + for (int i = 0; SubClasses[i] != NULL; ++i) if (SubClasses[i] == cs) return true; return false; @@ -184,17 +183,17 @@ public: sc_iterator subclasses_begin() const { return SubClasses; } - + sc_iterator subclasses_end() const { sc_iterator I = SubClasses; while (*I != NULL) ++I; return I; } - + /// hasSuperClass - return true if the specified TargetRegisterClass is a /// proper superset of this TargetRegisterClass. bool hasSuperClass(const TargetRegisterClass *cs) const { - for (int i = 0; SuperClasses[i] != NULL; ++i) + for (int i = 0; SuperClasses[i] != NULL; ++i) if (SuperClasses[i] == cs) return true; return false; @@ -205,7 +204,7 @@ public: sc_iterator superclasses_begin() const { return SuperClasses; } - + sc_iterator superclasses_end() const { sc_iterator I = SuperClasses; while (*I != NULL) ++I; @@ -217,7 +216,7 @@ public: bool isASubClass() const { return SuperClasses[0] != 0; } - + /// allocation_order_begin/end - These methods define a range of registers /// which specify the registers in this class that are valid to register /// allocate, and the preferred order to allocate them in. For example, @@ -318,15 +317,15 @@ public: } /// getPhysicalRegisterRegClass - Returns the Register Class of a physical - /// register of the given type. If type is MVT::Other, then just return any + /// register of the given type. If type is EVT::Other, then just return any /// register class the register belongs to. virtual const TargetRegisterClass * - getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; + getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; /// getAllocatableSet - Returns a bitset indexed by register number /// indicating if a register is allocatable or not. If a register class is /// specified, returns the subset for the class. - BitVector getAllocatableSet(MachineFunction &MF, + BitVector getAllocatableSet(const MachineFunction &MF, const TargetRegisterClass *RC = NULL) const; const TargetRegisterDesc &operator[](unsigned RegNo) const { @@ -368,12 +367,6 @@ public: return get(RegNo).SuperRegs; } - /// getAsmName - Return the symbolic target-specific name for the - /// specified physical register. - const char *getAsmName(unsigned RegNo) const { - return get(RegNo).AsmName; - } - /// getName - Return the human-readable symbolic target-specific name for the /// specified physical register. const char *getName(unsigned RegNo) const { @@ -386,9 +379,16 @@ public: return NumRegs; } - /// areAliases - Returns true if the two registers alias each other, false - /// otherwise - bool areAliases(unsigned regA, unsigned regB) const { + /// regsOverlap - Returns true if the two registers are equal or alias each + /// other. The registers may be virtual register. + bool regsOverlap(unsigned regA, unsigned regB) const { + if (regA == regB) + return true; + + if (isVirtualRegister(regA) || isVirtualRegister(regB)) + return false; + + // regA and regB are distinct physical registers. Do they alias? size_t index = (regA + regB * 37) & (AliasesHashSize-1); unsigned ProbeAmt = 0; while (AliasesHash[index*2] != 0 && @@ -403,17 +403,6 @@ public: return false; } - /// regsOverlap - Returns true if the two registers are equal or alias each - /// other. The registers may be virtual register. - bool regsOverlap(unsigned regA, unsigned regB) const { - if (regA == regB) - return true; - - if (isVirtualRegister(regA) || isVirtualRegister(regB)) - return false; - return areAliases(regA, regB); - } - /// isSubRegister - Returns true if regB is a sub-register of regA. /// bool isSubRegister(unsigned regA, unsigned regB) const { @@ -424,11 +413,11 @@ public: SubregHash[index*2+1] != 0) { if (SubregHash[index*2] == regA && SubregHash[index*2+1] == regB) return true; - + index = (index + ProbeAmt) & (SubregHashSize-1); ProbeAmt += 2; } - + return false; } @@ -442,11 +431,11 @@ public: SuperregHash[index*2+1] != 0) { if (SuperregHash[index*2] == regA && SuperregHash[index*2+1] == regB) return true; - + index = (index + ProbeAmt) & (SuperregHashSize-1); ProbeAmt += 2; } - + return false; } @@ -476,7 +465,7 @@ public: /// getMatchingSuperReg - Return a super-register of the specified register /// Reg so its sub-register of index SubIdx is Reg. - unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, + unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, const TargetRegisterClass *RC) const { for (const unsigned *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs) if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR)) @@ -484,6 +473,15 @@ public: return 0; } + /// getMatchingSuperRegClass - Return a subclass of the specified register + /// class A so that each register in it has a sub-register of the + /// specified sub-register index which is in the specified register class B. + virtual const TargetRegisterClass * + getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, unsigned Idx) const { + return 0; + } + //===--------------------------------------------------------------------===// // Register Class Information // @@ -496,7 +494,7 @@ public: unsigned getNumRegClasses() const { return (unsigned)(regclass_end()-regclass_begin()); } - + /// getRegClass - Returns the register class associated with the enumeration /// value. See class TargetOperandInfo. const TargetRegisterClass *getRegClass(unsigned i) const { @@ -505,8 +503,9 @@ public: } /// getPointerRegClass - Returns a TargetRegisterClass used for pointer - /// values. - virtual const TargetRegisterClass *getPointerRegClass() const { + /// values. If a target supports multiple different pointer register classes, + /// kind specifies which one is indicated. + virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const { assert(0 && "Target didn't implement getPointerRegClass!"); return 0; // Must return a value in order to compile with VS 2005 } @@ -561,24 +560,41 @@ public: virtual bool requiresRegisterScavenging(const MachineFunction &MF) const { return false; } - + + /// requiresFrameIndexScavenging - returns true if the target requires post + /// PEI scavenging of registers for materializing frame index constants. + virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return false; + } + /// hasFP - Return true if the specified function should have a dedicated /// frame pointer register. For most targets this is true only if the function /// has variable sized allocas or if frame pointer elimination is disabled. virtual bool hasFP(const MachineFunction &MF) const = 0; - // hasReservedCallFrame - Under normal circumstances, when a frame pointer is - // not required, we reserve argument space for call sites in the function - // immediately on entry to the current function. This eliminates the need for - // add/sub sp brackets around call sites. Returns true if the call frame is - // included as part of the stack frame. + /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is + /// not required, we reserve argument space for call sites in the function + /// immediately on entry to the current function. This eliminates the need for + /// add/sub sp brackets around call sites. Returns true if the call frame is + /// included as part of the stack frame. virtual bool hasReservedCallFrame(MachineFunction &MF) const { return !hasFP(MF); } - // needsStackRealignment - true if storage within the function requires the - // stack pointer to be aligned more than the normal calling convention calls - // for. + /// hasReservedSpillSlot - Return true if target has reserved a spill slot in + /// the stack frame of the given function for the specified register. e.g. On + /// x86, if the frame register is required, the first fixed stack object is + /// reserved as its spill slot. This tells PEI not to create a new stack frame + /// object for the given register. It should be called only after + /// processFunctionBeforeCalleeSavedScan(). + virtual bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, + int &FrameIdx) const { + return false; + } + + /// needsStackRealignment - true if storage within the function requires the + /// stack pointer to be aligned more than the normal calling convention calls + /// for. virtual bool needsStackRealignment(const MachineFunction &MF) const { return false; } @@ -625,6 +641,24 @@ public: virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const { } + /// saveScavengerRegister - Save the register so it can be used by the + /// register scavenger. Return true if the register was saved, false + /// otherwise. If this function does not save the register, the scavenger + /// will instead spill it to the emergency spill slot. + /// + virtual bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const {return false;} + + /// restoreScavengerRegister - Restore a register saved by + /// saveScavengerRegister(). + /// + virtual void restoreScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const {} + /// eliminateFrameIndex - This method must be overriden to eliminate abstract /// frame indices from instructions which may use them. The instruction /// referenced by the iterator contains an MO_FrameIndex operand which must be @@ -632,18 +666,23 @@ public: /// specified instruction, as long as it keeps the iterator pointing the the /// finished product. SPAdj is the SP adjustment due to call frame setup /// instruction. - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS=NULL) const = 0; + /// + /// When -enable-frame-index-scavenging is enabled, the virtual register + /// allocated for this frame index is returned and its value is stored in + /// *Value. + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, int *Value = NULL, + RegScavenger *RS=NULL) const = 0; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. virtual void emitPrologue(MachineFunction &MF) const = 0; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const = 0; - + //===--------------------------------------------------------------------===// /// Debug information queries. - + /// getDwarfRegNum - Map a target register to an equivalent dwarf register /// number. Returns -1 if there is no equivalent value. The second /// parameter allows targets to use different numberings for EH info and @@ -657,11 +696,11 @@ public: /// getFrameIndexOffset - Returns the displacement from the frame register to /// the stack frame of the specified index. virtual int getFrameIndexOffset(MachineFunction &MF, int FI) const; - + /// getRARegister - This method should return the register where the return /// address can be found. virtual unsigned getRARegister() const = 0; - + /// getInitialFrameState - Returns a list of machine moves that are assumed /// on entry to all functions. Note that LabelID is ignored (assumed to be /// the beginning of the function.) @@ -670,7 +709,7 @@ public: // This is useful when building IndexedMaps keyed on virtual registers -struct VirtReg2IndexFunctor : std::unary_function { +struct VirtReg2IndexFunctor : public std::unary_function { unsigned operator()(unsigned Reg) const { return Reg - TargetRegisterInfo::FirstVirtualRegister; } diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h new file mode 100644 index 0000000000000..8042d2363677b --- /dev/null +++ b/include/llvm/Target/TargetRegistry.h @@ -0,0 +1,560 @@ +//===-- Target/TargetRegistry.h - Target Registration -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes the TargetRegistry interface, which tools can use to access +// the appropriate target specific classes (TargetMachine, AsmPrinter, etc.) +// which have been registered. +// +// Target specific class implementations should register themselves using the +// appropriate TargetRegistry interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_TARGETREGISTRY_H +#define LLVM_TARGET_TARGETREGISTRY_H + +#include "llvm/ADT/Triple.h" +#include +#include + +namespace llvm { + class AsmPrinter; + class MCAsmParser; + class MCCodeEmitter; + class Module; + class MCAsmInfo; + class MCDisassembler; + class MCInstPrinter; + class TargetAsmParser; + class TargetMachine; + class formatted_raw_ostream; + class raw_ostream; + + /// Target - Wrapper for Target specific information. + /// + /// For registration purposes, this is a POD type so that targets can be + /// registered without the use of static constructors. + /// + /// Targets should implement a single global instance of this class (which + /// will be zero initialized), and pass that instance to the TargetRegistry as + /// part of their initialization. + class Target { + public: + friend struct TargetRegistry; + + typedef unsigned (*TripleMatchQualityFnTy)(const std::string &TT); + + typedef const MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T, + const StringRef &TT); + typedef TargetMachine *(*TargetMachineCtorTy)(const Target &T, + const std::string &TT, + const std::string &Features); + typedef AsmPrinter *(*AsmPrinterCtorTy)(formatted_raw_ostream &OS, + TargetMachine &TM, + const MCAsmInfo *MAI, + bool VerboseAsm); + typedef TargetAsmParser *(*AsmParserCtorTy)(const Target &T, + MCAsmParser &P); + typedef const MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T); + typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + raw_ostream &O); + typedef MCCodeEmitter *(*CodeEmitterCtorTy)(const Target &T, + TargetMachine &TM); + + private: + /// Next - The next registered target in the linked list, maintained by the + /// TargetRegistry. + Target *Next; + + /// TripleMatchQualityFn - The target function for rating the match quality + /// of a triple. + TripleMatchQualityFnTy TripleMatchQualityFn; + + /// Name - The target name. + const char *Name; + + /// ShortDesc - A short description of the target. + const char *ShortDesc; + + /// HasJIT - Whether this target supports the JIT. + bool HasJIT; + + AsmInfoCtorFnTy AsmInfoCtorFn; + + /// TargetMachineCtorFn - Construction function for this target's + /// TargetMachine, if registered. + TargetMachineCtorTy TargetMachineCtorFn; + + /// AsmPrinterCtorFn - Construction function for this target's AsmPrinter, + /// if registered. + AsmPrinterCtorTy AsmPrinterCtorFn; + + /// AsmParserCtorFn - Construction function for this target's AsmParser, + /// if registered. + AsmParserCtorTy AsmParserCtorFn; + + /// MCDisassemblerCtorFn - Construction function for this target's + /// MCDisassembler, if registered. + MCDisassemblerCtorTy MCDisassemblerCtorFn; + + + /// MCInstPrinterCtorFn - Construction function for this target's + /// MCInstPrinter, if registered. + MCInstPrinterCtorTy MCInstPrinterCtorFn; + + /// CodeEmitterCtorFn - Construction function for this target's CodeEmitter, + /// if registered. + CodeEmitterCtorTy CodeEmitterCtorFn; + + public: + /// @name Target Information + /// @{ + + // getNext - Return the next registered target. + const Target *getNext() const { return Next; } + + /// getName - Get the target name. + const char *getName() const { return Name; } + + /// getShortDescription - Get a short description of the target. + const char *getShortDescription() const { return ShortDesc; } + + /// @} + /// @name Feature Predicates + /// @{ + + /// hasJIT - Check if this targets supports the just-in-time compilation. + bool hasJIT() const { return HasJIT; } + + /// hasTargetMachine - Check if this target supports code generation. + bool hasTargetMachine() const { return TargetMachineCtorFn != 0; } + + /// hasAsmPrinter - Check if this target supports .s printing. + bool hasAsmPrinter() const { return AsmPrinterCtorFn != 0; } + + /// hasAsmParser - Check if this target supports .s parsing. + bool hasAsmParser() const { return AsmParserCtorFn != 0; } + + /// hasMCDisassembler - Check if this target has a disassembler. + bool hasMCDisassembler() const { return MCDisassemblerCtorFn != 0; } + + /// hasMCInstPrinter - Check if this target has an instruction printer. + bool hasMCInstPrinter() const { return MCInstPrinterCtorFn != 0; } + + /// hasCodeEmitter - Check if this target supports instruction encoding. + bool hasCodeEmitter() const { return CodeEmitterCtorFn != 0; } + + /// @} + /// @name Feature Constructors + /// @{ + + /// createAsmInfo - Create a MCAsmInfo implementation for the specified + /// target triple. + /// + /// \arg Triple - This argument is used to determine the target machine + /// feature set; it should always be provided. Generally this should be + /// either the target triple from the module, or the target triple of the + /// host if that does not exist. + const MCAsmInfo *createAsmInfo(const StringRef &Triple) const { + if (!AsmInfoCtorFn) + return 0; + return AsmInfoCtorFn(*this, Triple); + } + + /// createTargetMachine - Create a target specific machine implementation + /// for the specified \arg Triple. + /// + /// \arg Triple - This argument is used to determine the target machine + /// feature set; it should always be provided. Generally this should be + /// either the target triple from the module, or the target triple of the + /// host if that does not exist. + TargetMachine *createTargetMachine(const std::string &Triple, + const std::string &Features) const { + if (!TargetMachineCtorFn) + return 0; + return TargetMachineCtorFn(*this, Triple, Features); + } + + /// createAsmPrinter - Create a target specific assembly printer pass. + AsmPrinter *createAsmPrinter(formatted_raw_ostream &OS, TargetMachine &TM, + const MCAsmInfo *MAI, bool Verbose) const { + if (!AsmPrinterCtorFn) + return 0; + return AsmPrinterCtorFn(OS, TM, MAI, Verbose); + } + + /// createAsmParser - Create a target specific assembly parser. + /// + /// \arg Parser - The target independent parser implementation to use for + /// parsing and lexing. + TargetAsmParser *createAsmParser(MCAsmParser &Parser) const { + if (!AsmParserCtorFn) + return 0; + return AsmParserCtorFn(*this, Parser); + } + + const MCDisassembler *createMCDisassembler() const { + if (!MCDisassemblerCtorFn) + return 0; + return MCDisassemblerCtorFn(*this); + } + + MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant, + const MCAsmInfo &MAI, + raw_ostream &O) const { + if (!MCInstPrinterCtorFn) + return 0; + return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI, O); + } + + + /// createCodeEmitter - Create a target specific code emitter. + MCCodeEmitter *createCodeEmitter(TargetMachine &TM) const { + if (!CodeEmitterCtorFn) + return 0; + return CodeEmitterCtorFn(*this, TM); + } + + /// @} + }; + + /// TargetRegistry - Generic interface to target specific features. + struct TargetRegistry { + class iterator { + const Target *Current; + explicit iterator(Target *T) : Current(T) {} + friend struct TargetRegistry; + public: + iterator(const iterator &I) : Current(I.Current) {} + iterator() : Current(0) {} + + bool operator==(const iterator &x) const { + return Current == x.Current; + } + bool operator!=(const iterator &x) const { + return !operator==(x); + } + + // Iterator traversal: forward iteration only + iterator &operator++() { // Preincrement + assert(Current && "Cannot increment end iterator!"); + Current = Current->getNext(); + return *this; + } + iterator operator++(int) { // Postincrement + iterator tmp = *this; + ++*this; + return tmp; + } + + const Target &operator*() const { + assert(Current && "Cannot dereference end iterator!"); + return *Current; + } + + const Target *operator->() const { + return &operator*(); + } + }; + + /// @name Registry Access + /// @{ + + static iterator begin(); + + static iterator end() { return iterator(); } + + /// lookupTarget - Lookup a target based on a target triple. + /// + /// \param Triple - The triple to use for finding a target. + /// \param Error - On failure, an error string describing why no target was + /// found. + static const Target *lookupTarget(const std::string &Triple, + std::string &Error); + + /// getClosestTargetForJIT - Pick the best target that is compatible with + /// the current host. If no close target can be found, this returns null + /// and sets the Error string to a reason. + /// + /// Maintained for compatibility through 2.6. + static const Target *getClosestTargetForJIT(std::string &Error); + + /// @} + /// @name Target Registration + /// @{ + + /// RegisterTarget - Register the given target. Attempts to register a + /// target which has already been registered will be ignored. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Name - The target name. This should be a static string. + /// @param ShortDesc - A short target description. This should be a static + /// string. + /// @param TQualityFn - The triple match quality computation function for + /// this target. + /// @param HasJIT - Whether the target supports JIT code + /// generation. + static void RegisterTarget(Target &T, + const char *Name, + const char *ShortDesc, + Target::TripleMatchQualityFnTy TQualityFn, + bool HasJIT = false); + + /// RegisterAsmInfo - Register a MCAsmInfo implementation for the + /// given target. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Fn - A function to construct a MCAsmInfo for the target. + static void RegisterAsmInfo(Target &T, Target::AsmInfoCtorFnTy Fn) { + // Ignore duplicate registration. + if (!T.AsmInfoCtorFn) + T.AsmInfoCtorFn = Fn; + } + + /// RegisterTargetMachine - Register a TargetMachine implementation for the + /// given target. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Fn - A function to construct a TargetMachine for the target. + static void RegisterTargetMachine(Target &T, + Target::TargetMachineCtorTy Fn) { + // Ignore duplicate registration. + if (!T.TargetMachineCtorFn) + T.TargetMachineCtorFn = Fn; + } + + /// RegisterAsmPrinter - Register an AsmPrinter implementation for the given + /// target. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Fn - A function to construct an AsmPrinter for the target. + static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn) { + // Ignore duplicate registration. + if (!T.AsmPrinterCtorFn) + T.AsmPrinterCtorFn = Fn; + } + + /// RegisterAsmParser - Register a TargetAsmParser implementation for the + /// given target. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Fn - A function to construct an AsmPrinter for the target. + static void RegisterAsmParser(Target &T, Target::AsmParserCtorTy Fn) { + if (!T.AsmParserCtorFn) + T.AsmParserCtorFn = Fn; + } + + /// RegisterMCDisassembler - Register a MCDisassembler implementation for + /// the given target. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Fn - A function to construct an MCDisassembler for the target. + static void RegisterMCDisassembler(Target &T, + Target::MCDisassemblerCtorTy Fn) { + if (!T.MCDisassemblerCtorFn) + T.MCDisassemblerCtorFn = Fn; + } + + static void RegisterMCInstPrinter(Target &T, + Target::MCInstPrinterCtorTy Fn) { + if (!T.MCInstPrinterCtorFn) + T.MCInstPrinterCtorFn = Fn; + } + + /// RegisterCodeEmitter - Register a MCCodeEmitter implementation for the + /// given target. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Fn - A function to construct an AsmPrinter for the target. + static void RegisterCodeEmitter(Target &T, Target::CodeEmitterCtorTy Fn) { + if (!T.CodeEmitterCtorFn) + T.CodeEmitterCtorFn = Fn; + } + + /// @} + }; + + + //===--------------------------------------------------------------------===// + + /// RegisterTarget - Helper template for registering a target, for use in the + /// target's initialization function. Usage: + /// + /// + /// Target TheFooTarget; // The global target instance. + /// + /// extern "C" void LLVMInitializeFooTargetInfo() { + /// RegisterTarget X(TheFooTarget, "foo", "Foo description"); + /// } + template + struct RegisterTarget { + RegisterTarget(Target &T, const char *Name, const char *Desc) { + TargetRegistry::RegisterTarget(T, Name, Desc, + &getTripleMatchQuality, + HasJIT); + } + + static unsigned getTripleMatchQuality(const std::string &TT) { + if (Triple(TT).getArch() == TargetArchType) + return 20; + return 0; + } + }; + + /// RegisterAsmInfo - Helper template for registering a target assembly info + /// implementation. This invokes the static "Create" method on the class to + /// actually do the construction. Usage: + /// + /// extern "C" void LLVMInitializeFooTarget() { + /// extern Target TheFooTarget; + /// RegisterAsmInfo X(TheFooTarget); + /// } + template + struct RegisterAsmInfo { + RegisterAsmInfo(Target &T) { + TargetRegistry::RegisterAsmInfo(T, &Allocator); + } + private: + static const MCAsmInfo *Allocator(const Target &T, const StringRef &TT) { + return new MCAsmInfoImpl(T, TT); + } + + }; + + /// RegisterAsmInfoFn - Helper template for registering a target assembly info + /// implementation. This invokes the specified function to do the + /// construction. Usage: + /// + /// extern "C" void LLVMInitializeFooTarget() { + /// extern Target TheFooTarget; + /// RegisterAsmInfoFn X(TheFooTarget, TheFunction); + /// } + struct RegisterAsmInfoFn { + RegisterAsmInfoFn(Target &T, Target::AsmInfoCtorFnTy Fn) { + TargetRegistry::RegisterAsmInfo(T, Fn); + } + }; + + + /// RegisterTargetMachine - Helper template for registering a target machine + /// implementation, for use in the target machine initialization + /// function. Usage: + /// + /// extern "C" void LLVMInitializeFooTarget() { + /// extern Target TheFooTarget; + /// RegisterTargetMachine X(TheFooTarget); + /// } + template + struct RegisterTargetMachine { + RegisterTargetMachine(Target &T) { + TargetRegistry::RegisterTargetMachine(T, &Allocator); + } + + private: + static TargetMachine *Allocator(const Target &T, const std::string &TT, + const std::string &FS) { + return new TargetMachineImpl(T, TT, FS); + } + }; + + /// RegisterAsmPrinter - Helper template for registering a target specific + /// assembly printer, for use in the target machine initialization + /// function. Usage: + /// + /// extern "C" void LLVMInitializeFooAsmPrinter() { + /// extern Target TheFooTarget; + /// RegisterAsmPrinter X(TheFooTarget); + /// } + template + struct RegisterAsmPrinter { + RegisterAsmPrinter(Target &T) { + TargetRegistry::RegisterAsmPrinter(T, &Allocator); + } + + private: + static AsmPrinter *Allocator(formatted_raw_ostream &OS, TargetMachine &TM, + const MCAsmInfo *MAI, bool Verbose) { + return new AsmPrinterImpl(OS, TM, MAI, Verbose); + } + }; + + /// RegisterAsmParser - Helper template for registering a target specific + /// assembly parser, for use in the target machine initialization + /// function. Usage: + /// + /// extern "C" void LLVMInitializeFooAsmParser() { + /// extern Target TheFooTarget; + /// RegisterAsmParser X(TheFooTarget); + /// } + template + struct RegisterAsmParser { + RegisterAsmParser(Target &T) { + TargetRegistry::RegisterAsmParser(T, &Allocator); + } + + private: + static TargetAsmParser *Allocator(const Target &T, MCAsmParser &P) { + return new AsmParserImpl(T, P); + } + }; + + /// RegisterCodeEmitter - Helper template for registering a target specific + /// machine code emitter, for use in the target initialization + /// function. Usage: + /// + /// extern "C" void LLVMInitializeFooCodeEmitter() { + /// extern Target TheFooTarget; + /// RegisterCodeEmitter X(TheFooTarget); + /// } + template + struct RegisterCodeEmitter { + RegisterCodeEmitter(Target &T) { + TargetRegistry::RegisterCodeEmitter(T, &Allocator); + } + + private: + static MCCodeEmitter *Allocator(const Target &T, TargetMachine &TM) { + return new CodeEmitterImpl(T, TM); + } + }; + +} + +#endif diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 38461c5a380ed..dcc09921d9943 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -23,14 +23,23 @@ class FuncUnit; //===----------------------------------------------------------------------===// -// Instruction stage - These values represent a step in the execution of an -// instruction. The latency represents the number of discrete time slots used -// need to complete the stage. Units represent the choice of functional units -// that can be used to complete the stage. Eg. IntUnit1, IntUnit2. +// Instruction stage - These values represent a non-pipelined step in +// the execution of an instruction. Cycles represents the number of +// discrete time slots needed to complete the stage. Units represent +// the choice of functional units that can be used to complete the +// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many +// cycles should elapse from the start of this stage to the start of +// the next stage in the itinerary. For example: // -class InstrStage units> { +// A stage is specified in one of two ways: +// +// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles +// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit +// +class InstrStage units, int timeinc = -1> { int Cycles = cycles; // length of stage in machine cycles list Units = units; // choice of functional units + int TimeInc = timeinc; // cycles till start of next stage } //===----------------------------------------------------------------------===// @@ -51,11 +60,13 @@ def NoItinerary : InstrItinClass; //===----------------------------------------------------------------------===// // Instruction itinerary data - These values provide a runtime map of an -// instruction itinerary class (name) to it's itinerary data. +// instruction itinerary class (name) to its itinerary data. // -class InstrItinData stages> { +class InstrItinData stages, + list operandcycles = []> { InstrItinClass TheClass = Class; list Stages = stages; + list OperandCycles = operandcycles; } //===----------------------------------------------------------------------===// diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h index 002d5fc70e5ea..e79f651e5da5f 100644 --- a/include/llvm/Target/TargetSelect.h +++ b/include/llvm/Target/TargetSelect.h @@ -1,4 +1,4 @@ -//===- TargetSelect.h - Target Selection & Registration -------------------===// +//===- TargetSelect.h - Target Selection & Registration ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,37 +20,76 @@ extern "C" { // Declare all of the target-initialization functions that are available. +#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo(); +#include "llvm/Config/Targets.def" + #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target(); #include "llvm/Config/Targets.def" - // Declare all of the available asm-printer initialization functions. + // Declare all of the available assembly printer initialization functions. #define LLVM_ASM_PRINTER(TargetName) void LLVMInitialize##TargetName##AsmPrinter(); #include "llvm/Config/AsmPrinters.def" + + // Declare all of the available assembly parser initialization functions. +#define LLVM_ASM_PARSER(TargetName) void LLVMInitialize##TargetName##AsmParser(); +#include "llvm/Config/AsmParsers.def" } namespace llvm { + /// InitializeAllTargetInfos - The main program should call this function if + /// it wants access to all available targets that LLVM is configured to + /// support, to make them available via the TargetRegistry. + /// + /// It is legal for a client to make multiple calls to this function. + inline void InitializeAllTargetInfos() { +#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo(); +#include "llvm/Config/Targets.def" + } + /// InitializeAllTargets - The main program should call this function if it - /// wants to link in all available targets that LLVM is configured to support. + /// wants access to all available target machines that LLVM is configured to + /// support, to make them available via the TargetRegistry. + /// + /// It is legal for a client to make multiple calls to this function. inline void InitializeAllTargets() { + // FIXME: Remove this, clients should do it. + InitializeAllTargetInfos(); + #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target(); #include "llvm/Config/Targets.def" } /// InitializeAllAsmPrinters - The main program should call this function if - /// it wants all asm printers that LLVM is configured to support. This will - /// cause them to be linked into its executable. + /// it wants all asm printers that LLVM is configured to support, to make them + /// available via the TargetRegistry. + /// + /// It is legal for a client to make multiple calls to this function. inline void InitializeAllAsmPrinters() { #define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter(); #include "llvm/Config/AsmPrinters.def" } + /// InitializeAllAsmParsers - The main program should call this function if it + /// wants all asm parsers that LLVM is configured to support, to make them + /// available via the TargetRegistry. + /// + /// It is legal for a client to make multiple calls to this function. + inline void InitializeAllAsmParsers() { +#define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser(); +#include "llvm/Config/AsmParsers.def" + } + /// InitializeNativeTarget - The main program should call this function to /// initialize the native target corresponding to the host. This is useful /// for JIT applications to ensure that the target gets linked in correctly. + /// + /// It is legal for a client to make multiple calls to this function. inline bool InitializeNativeTarget() { // If we have a native target, initialize it to ensure it is linked in. #ifdef LLVM_NATIVE_ARCH -#define DoInit2(TARG) LLVMInitialize ## TARG () +#define DoInit2(TARG) \ + LLVMInitialize ## TARG ## Info (); \ + LLVMInitialize ## TARG () #define DoInit(T) DoInit2(T) DoInit(LLVM_NATIVE_ARCH); return false; diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 364d4d0d3cc0f..700c64c8ca533 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -30,12 +30,15 @@ class SDTCisVT : SDTypeConstraint { class SDTCisPtrTy : SDTypeConstraint; -// SDTCisInt - The specified operand is has integer type. +// SDTCisInt - The specified operand has integer type. class SDTCisInt : SDTypeConstraint; -// SDTCisFP - The specified operand is has floating point type. +// SDTCisFP - The specified operand has floating-point type. class SDTCisFP : SDTypeConstraint; +// SDTCisVec - The specified operand has a vector type. +class SDTCisVec : SDTypeConstraint; + // SDTCisSameAs - The two specified operands have identical types. class SDTCisSameAs : SDTypeConstraint { int OtherOperandNum = OtherOp; @@ -345,7 +348,6 @@ def vsetcc : SDNode<"ISD::VSETCC" , SDTSetCC>; def brcond : SDNode<"ISD::BRCOND" , SDTBrcond, [SDNPHasChain]>; def brind : SDNode<"ISD::BRIND" , SDTBrind, [SDNPHasChain]>; def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>; -def ret : SDNode<"ISD::RET" , SDTNone, [SDNPHasChain]>; def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtarget.h index eca45eb0d7459..ac094f6644195 100644 --- a/include/llvm/Target/TargetSubtarget.h +++ b/include/llvm/Target/TargetSubtarget.h @@ -16,6 +16,9 @@ namespace llvm { +class SDep; +class SUnit; + //===----------------------------------------------------------------------===// /// /// TargetSubtarget - Generic base class for all target subtargets. All @@ -35,6 +38,15 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. virtual unsigned getSpecialAddressLatency() const { return 0; } + + // enablePostRAScheduler - Return true to enable + // post-register-allocation scheduling. + virtual bool enablePostRAScheduler() const { return false; } + + // adjustSchedDependency - Perform target specific adjustments to + // the latency of a schedule dependency. + virtual void adjustSchedDependency(SUnit *def, SUnit *use, + SDep& dep) const { } }; } // End llvm namespace diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 750969b36ebdb..d66ed896d80cf 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -19,7 +19,6 @@ namespace llvm { -class FunctionPass; class ModulePass; class Pass; class Function; @@ -174,12 +173,12 @@ ModulePass *createIPSCCPPass(); /// createLoopExtractorPass - This pass extracts all natural loops from the /// program into a function if it can. /// -FunctionPass *createLoopExtractorPass(); +Pass *createLoopExtractorPass(); /// createSingleLoopExtractorPass - This pass extracts one natural loop from the /// program into a function if it can. This is used by bugpoint. /// -FunctionPass *createSingleLoopExtractorPass(); +Pass *createSingleLoopExtractorPass(); /// createBlockExtractorPass - This pass extracts all blocks (except those /// specified in the argument list) from the functions in the module. diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h index b370e964aa596..5d00f4215a832 100644 --- a/include/llvm/Transforms/IPO/InlinerPass.h +++ b/include/llvm/Transforms/IPO/InlinerPass.h @@ -14,16 +14,17 @@ // //===----------------------------------------------------------------------===// -#ifndef INLINER_H -#define INLINER_H +#ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H +#define LLVM_TRANSFORMS_IPO_INLINERPASS_H #include "llvm/CallGraphSCCPass.h" -#include "llvm/Transforms/Utils/InlineCost.h" -#include "llvm/Target/TargetData.h" - namespace llvm { class CallSite; + class TargetData; + class InlineCost; + template + class SmallPtrSet; /// Inliner - This class contains all of the helper code which is used to /// perform the inlining operations that do not depend on the policy. @@ -39,17 +40,12 @@ struct Inliner : public CallGraphSCCPass { // Main run interface method, this implements the interface required by the // Pass class. - virtual bool runOnSCC(const std::vector &SCC); + virtual bool runOnSCC(std::vector &SCC); // doFinalization - Remove now-dead linkonce functions at the end of // processing to avoid breaking the SCC traversal. virtual bool doFinalization(CallGraph &CG); - // InlineCallIfPossible - bool InlineCallIfPossible(CallSite CS, CallGraph &CG, - const SmallPtrSet &SCCFunctions, - const TargetData &TD); - /// This method returns the value specified by the -inline-threshold value, /// specified on the command line. This is typically not directly needed. /// diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index 698e248e7e64a..9794ffd429982 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -28,6 +28,9 @@ ModulePass *createBlockProfilerPass(); // Insert edge profiling instrumentation ModulePass *createEdgeProfilerPass(); +// Insert optimal edge profiling instrumentation +ModulePass *createOptimalEdgeProfilerPass(); + // Random Sampling Profiling Framework ModulePass* createNullProfilerRSPass(); FunctionPass* createRSProfilingPass(); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 29cd3e3dc6c66..2483768ead57c 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -220,12 +220,12 @@ extern const PassInfo *const BreakCriticalEdgesID; // // AU.addRequiredID(LoopSimplifyID); // -FunctionPass *createLoopSimplifyPass(); +Pass *createLoopSimplifyPass(); extern const PassInfo *const LoopSimplifyID; //===----------------------------------------------------------------------===// // -// LowerAllocations - Turn malloc and free instructions into %malloc and %free +// LowerAllocations - Turn malloc and free instructions into @malloc and @free // calls. // // AU.addRequiredID(LowerAllocationsID); @@ -276,20 +276,6 @@ FunctionPass *createBlockPlacementPass(); Pass *createLCSSAPass(); extern const PassInfo *const LCSSAID; -//===----------------------------------------------------------------------===// -// -// PredicateSimplifier - This pass collapses duplicate variables into one -// canonical form, and tries to simplify expressions along the way. -// -FunctionPass *createPredicateSimplifierPass(); - -//===----------------------------------------------------------------------===// -// -// GVN-PRE - This pass performs global value numbering and partial redundancy -// elimination. -// -FunctionPass *createGVNPREPass(); - //===----------------------------------------------------------------------===// // // GVN - This pass performs global value numbering and redundant load @@ -329,6 +315,11 @@ FunctionPass *createSimplifyHalfPowrLibCallsPass(); // FunctionPass *createCodeGenPreparePass(const TargetLowering *TLI = 0); +//===----------------------------------------------------------------------===// +// +// CodeGenLICM - This pass performs late LICM; hoisting constants out of loops. +// +Pass *createCodeGenLICMPass(); //===----------------------------------------------------------------------===// // @@ -339,10 +330,18 @@ extern const PassInfo *const InstructionNamerID; //===----------------------------------------------------------------------===// // -// SSI - This pass converts to Static Single Information form. +// SSI - This pass converts instructions to Static Single Information form +// on demand. // FunctionPass *createSSIPass(); +//===----------------------------------------------------------------------===// +// +// SSI - This pass converts every non-void instuction to Static Single +// Information form. +// +FunctionPass *createSSIEverythingPass(); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/AddrModeMatcher.h b/include/llvm/Transforms/Utils/AddrModeMatcher.h index 913a541f8b5d8..be601e257b8ca 100644 --- a/include/llvm/Transforms/Utils/AddrModeMatcher.h +++ b/include/llvm/Transforms/Utils/AddrModeMatcher.h @@ -20,7 +20,6 @@ #define LLVM_TRANSFORMS_UTILS_ADDRMODEMATCHER_H #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Streams.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -30,18 +29,19 @@ class Instruction; class Value; class Type; class User; - +class raw_ostream; + /// ExtAddrMode - This is an extended version of TargetLowering::AddrMode /// which holds actual Value*'s for register values. struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg; Value *ScaledReg; ExtAddrMode() : BaseReg(0), ScaledReg(0) {} - void print(OStream &OS) const; + void print(raw_ostream &OS) const; void dump() const; }; -static inline OStream &operator<<(OStream &OS, const ExtAddrMode &AM) { +static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { AM.print(OS); return OS; } diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 95ffa46069601..e766d729e1b0e 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -126,10 +126,10 @@ bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, /// dest go to one block instead of each going to a different block, but isn't /// the standard definition of a "critical edge". /// -bool SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P = 0, - bool MergeIdenticalEdges = false); +BasicBlock *SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, + Pass *P = 0, bool MergeIdenticalEdges = false); -inline bool SplitCriticalEdge(BasicBlock *BB, succ_iterator SI, Pass *P = 0) { +inline BasicBlock *SplitCriticalEdge(BasicBlock *BB, succ_iterator SI, Pass *P = 0) { return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(), P); } @@ -143,7 +143,7 @@ inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI, Pass *P = 0) { TerminatorInst *TI = (*PI)->getTerminator(); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) if (TI->getSuccessor(i) == Succ) - MadeChange |= SplitCriticalEdge(TI, i, P); + MadeChange |= !!SplitCriticalEdge(TI, i, P); return MadeChange; } @@ -151,8 +151,9 @@ inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI, Pass *P = 0) { /// and return true, otherwise return false. This method requires that there be /// an edge between the two blocks. If P is specified, it updates the analyses /// described above. -inline bool SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst, Pass *P = 0, - bool MergeIdenticalEdges = false) { +inline BasicBlock *SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst, + Pass *P = 0, + bool MergeIdenticalEdges = false) { TerminatorInst *TI = Src->getTerminator(); unsigned i = 0; while (1) { @@ -180,8 +181,12 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P); /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. This function returns the new block. /// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and -/// DominanceFrontier, but no other analyses. +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. +/// In particular, it does not preserve LoopSimplify (because it's +/// complicated to handle the case where one of the edges being split +/// is an exit of a loop with other exits). +/// BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P = 0); diff --git a/include/llvm/Transforms/Utils/BasicInliner.h b/include/llvm/Transforms/Utils/BasicInliner.h index 6a570552d6405..4bca6b8c4417b 100644 --- a/include/llvm/Transforms/Utils/BasicInliner.h +++ b/include/llvm/Transforms/Utils/BasicInliner.h @@ -15,7 +15,7 @@ #ifndef BASICINLINER_H #define BASICINLINER_H -#include "llvm/Transforms/Utils/InlineCost.h" +#include "llvm/Analysis/InlineCost.h" namespace llvm { diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index 840d9708cbaf5..5b15b5b871999 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -18,7 +18,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_CLONING_H #define LLVM_TRANSFORMS_UTILS_CLONING_H -#include #include "llvm/ADT/DenseMap.h" namespace llvm { @@ -36,10 +35,11 @@ class CallSite; class Trace; class CallGraph; class TargetData; +class Loop; class LoopInfo; class LLVMContext; -template class LoopBase; -typedef LoopBase Loop; +class AllocaInst; +template class SmallVectorImpl; /// CloneModule - Return an exact copy of the specified module /// @@ -105,9 +105,9 @@ BasicBlock *CloneBasicBlock(const BasicBlock *BB, ClonedCodeInfo *CodeInfo = 0); -/// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate ValueMap -/// using old blocks to new blocks mapping. -Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI, +/// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate +/// ValueMap using old blocks to new blocks mapping. +Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI, DenseMap &ValueMap, Pass *P); /// CloneFunction - Return a copy of the specified function, but without @@ -138,7 +138,7 @@ inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){ /// void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, DenseMap &ValueMap, - std::vector &Returns, + SmallVectorImpl &Returns, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = 0); @@ -151,25 +151,11 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, /// used for things like CloneFunction or CloneModule. void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, DenseMap &ValueMap, - std::vector &Returns, + SmallVectorImpl &Returns, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = 0, const TargetData *TD = 0); - -/// CloneTraceInto - Clone T into NewFunc. Original<->clone mapping is -/// saved in ValueMap. -/// -void CloneTraceInto(Function *NewFunc, Trace &T, - DenseMap &ValueMap, - const char *NameSuffix); - -/// CloneTrace - Returns a copy of the specified trace. -/// It takes a vector of basic blocks clones the basic blocks, removes internal -/// phi nodes, adds it to the same function as the original (although there is -/// no jump to it) and returns the new vector of basic blocks. -std::vector CloneTrace(const std::vector &origTrace); - /// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs @@ -183,9 +169,15 @@ std::vector CloneTrace(const std::vector &origTrace); /// If a non-null callgraph pointer is provided, these functions update the /// CallGraph to represent the program after inlining. /// -bool InlineFunction(CallInst *C, CallGraph *CG = 0, const TargetData *TD = 0); -bool InlineFunction(InvokeInst *II, CallGraph *CG = 0, const TargetData *TD =0); -bool InlineFunction(CallSite CS, CallGraph *CG = 0, const TargetData *TD = 0); +/// If StaticAllocas is non-null, InlineFunction populates it with all of the +/// static allocas that it inlines into the caller. +/// +bool InlineFunction(CallInst *C, CallGraph *CG = 0, const TargetData *TD = 0, + SmallVectorImpl *StaticAllocas = 0); +bool InlineFunction(InvokeInst *II, CallGraph *CG = 0, const TargetData *TD = 0, + SmallVectorImpl *StaticAllocas = 0); +bool InlineFunction(CallSite CS, CallGraph *CG = 0, const TargetData *TD = 0, + SmallVectorImpl *StaticAllocas = 0); } // End llvm namespace diff --git a/include/llvm/Transforms/Utils/FunctionUtils.h b/include/llvm/Transforms/Utils/FunctionUtils.h index dc7ef238652c5..785b08f82917d 100644 --- a/include/llvm/Transforms/Utils/FunctionUtils.h +++ b/include/llvm/Transforms/Utils/FunctionUtils.h @@ -14,13 +14,13 @@ #ifndef LLVM_TRANSFORMS_UTILS_FUNCTION_H #define LLVM_TRANSFORMS_UTILS_FUNCTION_H -#include "llvm/Analysis/LoopInfo.h" #include namespace llvm { class BasicBlock; class DominatorTree; class Function; + class Loop; /// ExtractCodeRegion - rip out a sequence of basic blocks into a new function /// diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index dd423fa3b1734..419029f10ee17 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -27,6 +27,7 @@ class PHINode; class AllocaInst; class ConstantExpr; class TargetData; +class LLVMContext; struct DbgInfoIntrinsic; template class SmallVectorImpl; @@ -82,7 +83,7 @@ void RecursivelyDeleteDeadPHINode(PHINode *PN); /// between them, moving the instructions in the predecessor into BB. This /// deletes the predecessor block. /// -void MergeBasicBlockIntoOnlyPred(BasicBlock *BB); +void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, Pass *P = 0); /// SimplifyCFG - This function is used to do simplification of a CFG. For @@ -107,7 +108,8 @@ bool FoldBranchToCommonDest(BranchInst *BI); /// invalidating the SSA information for the value. It returns the pointer to /// the alloca inserted to create a stack slot for X. /// -AllocaInst *DemoteRegToStack(Instruction &X, bool VolatileLoads = false, +AllocaInst *DemoteRegToStack(Instruction &X, + bool VolatileLoads = false, Instruction *AllocaPoint = 0); /// DemotePHIToStack - This function takes a virtual register computed by a phi diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h index 35cfaddb7379a..71a077e8625ee 100644 --- a/include/llvm/Transforms/Utils/PromoteMemToReg.h +++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h @@ -23,6 +23,7 @@ class AllocaInst; class DominatorTree; class DominanceFrontier; class AliasSetTracker; +class LLVMContext; /// isAllocaPromotable - Return true if this alloca is legal for promotion. /// This is true if there are only loads and stores to the alloca... @@ -39,6 +40,7 @@ bool isAllocaPromotable(const AllocaInst *AI); /// void PromoteMemToReg(const std::vector &Allocas, DominatorTree &DT, DominanceFrontier &DF, + LLVMContext &Context, AliasSetTracker *AST = 0); } // End llvm namespace diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h new file mode 100644 index 0000000000000..11b90d426778b --- /dev/null +++ b/include/llvm/Transforms/Utils/SSAUpdater.h @@ -0,0 +1,108 @@ +//===-- SSAUpdater.h - Unstructured SSA Update Tool -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SSAUpdater class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATER_H +#define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H + +namespace llvm { + class Value; + class BasicBlock; + class Use; + class PHINode; + template + class SmallVectorImpl; + +/// SSAUpdater - This class updates SSA form for a set of values defined in +/// multiple blocks. This is used when code duplication or another unstructured +/// transformation wants to rewrite a set of uses of one value with uses of a +/// set of values. +class SSAUpdater { + /// AvailableVals - This keeps track of which value to use on a per-block + /// basis. When we insert PHI nodes, we keep track of them here. We use + /// WeakVH's for the value of the map because we RAUW PHI nodes when we + /// eliminate them, and want the WeakVH to track this. + //typedef DenseMap > AvailableValsTy; + void *AV; + + /// PrototypeValue is an arbitrary representative value, which we derive names + /// and a type for PHI nodes. + Value *PrototypeValue; + + /// IncomingPredInfo - We use this as scratch space when doing our recursive + /// walk. This should only be used in GetValueInBlockInternal, normally it + /// should be empty. + //std::vector > > IncomingPredInfo; + void *IPI; + + /// InsertedPHIs - If this is non-null, the SSAUpdater adds all PHI nodes that + /// it creates to the vector. + SmallVectorImpl *InsertedPHIs; +public: + /// SSAUpdater constructor. If InsertedPHIs is specified, it will be filled + /// in with all PHI Nodes created by rewriting. + SSAUpdater(SmallVectorImpl *InsertedPHIs = 0); + ~SSAUpdater(); + + /// Initialize - Reset this object to get ready for a new set of SSA + /// updates. ProtoValue is the value used to name PHI nodes. + void Initialize(Value *ProtoValue); + + /// AddAvailableValue - Indicate that a rewritten value is available at the + /// end of the specified block with the specified value. + void AddAvailableValue(BasicBlock *BB, Value *V); + + /// HasValueForBlock - Return true if the SSAUpdater already has a value for + /// the specified block. + bool HasValueForBlock(BasicBlock *BB) const; + + /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is + /// live at the end of the specified block. + Value *GetValueAtEndOfBlock(BasicBlock *BB); + + /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that + /// is live in the middle of the specified block. + /// + /// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one + /// important case: if there is a definition of the rewritten value after the + /// 'use' in BB. Consider code like this: + /// + /// X1 = ... + /// SomeBB: + /// use(X) + /// X2 = ... + /// br Cond, SomeBB, OutBB + /// + /// In this case, there are two values (X1 and X2) added to the AvailableVals + /// set by the client of the rewriter, and those values are both live out of + /// their respective blocks. However, the use of X happens in the *middle* of + /// a block. Because of this, we need to insert a new PHI node in SomeBB to + /// merge the appropriate values, and this value isn't live out of the block. + /// + Value *GetValueInMiddleOfBlock(BasicBlock *BB); + + /// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, + /// which use their value in the corresponding predecessor. Note that this + /// will not work if the use is supposed to be rewritten to a value defined in + /// the same block as the use, but above it. Any 'AddAvailableValue's added + /// for the use's block will be considered to be below it. + void RewriteUse(Use &U); + +private: + Value *GetValueAtEndOfBlockInternal(BasicBlock *BB); + void operator=(const SSAUpdater&); // DO NOT IMPLEMENT + SSAUpdater(const SSAUpdater&); // DO NOT IMPLEMENT +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Transforms/Utils/SSI.h b/include/llvm/Transforms/Utils/SSI.h index 59dd6d026ce57..ff5bb7b8614d7 100644 --- a/include/llvm/Transforms/Utils/SSI.h +++ b/include/llvm/Transforms/Utils/SSI.h @@ -23,7 +23,6 @@ #define LLVM_TRANSFORMS_UTILS_SSI_H #include "llvm/Pass.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -44,7 +43,6 @@ namespace llvm { void getAnalysisUsage(AnalysisUsage &AU) const; - /// runOnMachineFunction - pass entry point bool runOnFunction(Function&); void createSSI(SmallVectorImpl &value); @@ -56,44 +54,36 @@ namespace llvm { // Stores variables created by SSI SmallPtrSet created; - // These variables are only live for each creation - unsigned num_values; - - // Has a bit for each variable, true if it needs to be created - // and false otherwise - BitVector needConstruction; - // Phis created by SSI - DenseMap phis; + DenseMap phis; // Sigmas created by SSI - DenseMap sigmas; + DenseMap sigmas; // Phi nodes that have a phi as operand and has to be fixed SmallPtrSet phisToFix; // List of definition points for every variable - SmallVector, 0> defsites; + DenseMap > defsites; // Basic Block of the original definition of each variable - SmallVector value_original; + DenseMap value_original; // Stack of last seen definition of a variable - SmallVector, 0> value_stack; + DenseMap > value_stack; - void insertSigmaFunctions(SmallVectorImpl &value); - void insertPhiFunctions(SmallVectorImpl &value); - void renameInit(SmallVectorImpl &value); + void insertSigmaFunctions(SmallPtrSet &value); + void insertSigma(TerminatorInst *TI, Instruction *I); + void insertPhiFunctions(SmallPtrSet &value); + void renameInit(SmallPtrSet &value); void rename(BasicBlock *BB); void substituteUse(Instruction *I); bool dominateAny(BasicBlock *BB, Instruction *value); void fixPhis(); - unsigned getPositionPhi(PHINode *PN); - unsigned getPositionSigma(PHINode *PN); - - unsigned isUsedInTerminator(CmpInst *CI); + Instruction* getPositionPhi(PHINode *PN); + Instruction* getPositionSigma(PHINode *PN); void init(SmallVectorImpl &value); void clean(); diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h index a9c0bf6968a7a..3d5ee1a62b8a6 100644 --- a/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/include/llvm/Transforms/Utils/UnrollLoop.h @@ -16,10 +16,10 @@ #ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H #define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H -#include "llvm/Analysis/LoopInfo.h" - namespace llvm { +class Loop; +class LoopInfo; class LPPassManager; bool UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM); diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h index ed33413641810..d31edab5b551e 100644 --- a/include/llvm/Transforms/Utils/ValueMapper.h +++ b/include/llvm/Transforms/Utils/ValueMapper.h @@ -20,9 +20,10 @@ namespace llvm { class Value; class Instruction; + class LLVMContext; typedef DenseMap ValueMapTy; - Value *MapValue(const Value *V, ValueMapTy &VM); + Value *MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context); void RemapInstruction(Instruction *I, ValueMapTy &VM); } // End llvm namespace diff --git a/include/llvm/Type.h b/include/llvm/Type.h index d439233d8c05d..4a470af9ca5b0 100644 --- a/include/llvm/Type.h +++ b/include/llvm/Type.h @@ -12,11 +12,11 @@ #define LLVM_TYPE_H #include "llvm/AbstractTypeUser.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DataTypes.h" #include "llvm/System/Atomic.h" #include "llvm/ADT/GraphTraits.h" -#include "llvm/ADT/iterator.h" #include #include @@ -66,6 +66,7 @@ public: /// value, you can cast to a "DerivedType" subclass (see DerivedTypes.h) /// Note: If you add an element to this, you need to add an element to the /// Type::getPrimitiveType function, or else things will break! + /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding. /// enum TypeID { // PrimitiveTypes .. make sure LastPrimitiveTyID stays up to date @@ -105,6 +106,10 @@ private: /// mutable sys::cas_flag RefCount; + /// Context - This refers to the LLVMContext in which this type was uniqued. + LLVMContext &Context; + friend class LLVMContextImpl; + const Type *getForwardedTypeInternal() const; // Some Type instances are allocated as arrays, some aren't. So we provide @@ -112,8 +117,10 @@ private: void destroy() const; // const is a lie, this does "delete this"! protected: - explicit Type(TypeID id) : ID(id), Abstract(false), SubclassData(0), - RefCount(0), ForwardType(0), NumContainedTys(0), + explicit Type(LLVMContext &C, TypeID id) : + ID(id), Abstract(false), SubclassData(0), + RefCount(0), Context(C), + ForwardType(0), NumContainedTys(0), ContainedTys(0) {} virtual ~Type() { assert(AbstractTypeUsers.empty() && "Abstract types remain"); @@ -160,7 +167,6 @@ protected: public: void print(raw_ostream &O) const; - void print(std::ostream &O) const; /// @brief Debugging support: print to stderr void dump() const; @@ -169,6 +175,9 @@ public: /// module). void dump(const Module *Context) const; + /// getContext - Fetch the LLVMContext in which this type was uniqued. + LLVMContext &getContext() const { return Context; } + //===--------------------------------------------------------------------===// // Property accessors for dealing with types... Some of these virtual methods // are defined in private classes defined in Type.cpp for primitive types. @@ -179,6 +188,30 @@ public: /// inline TypeID getTypeID() const { return ID; } + /// isVoidTy - Return true if this is 'void'. + bool isVoidTy() const { return ID == VoidTyID; } + + /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type. + bool isFloatTy() const { return ID == FloatTyID; } + + /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type. + bool isDoubleTy() const { return ID == DoubleTyID; } + + /// isX86_FP80Ty - Return true if this is x86 long double. + bool isX86_FP80Ty() const { return ID == X86_FP80TyID; } + + /// isFP128Ty - Return true if this is 'fp128'. + bool isFP128Ty() const { return ID == FP128TyID; } + + /// isPPC_FP128Ty - Return true if this is powerpc long double. + bool isPPC_FP128Ty() const { return ID == PPC_FP128TyID; } + + /// isLabelTy - Return true if this is 'label'. + bool isLabelTy() const { return ID == LabelTyID; } + + /// isMetadataTy - Return true if this is 'metadata'. + bool isMetadataTy() const { return ID == MetadataTyID; } + /// getDescription - Return the string representation of the type. std::string getDescription() const; @@ -191,7 +224,7 @@ public: /// bool isIntOrIntVector() const; - /// isFloatingPoint - Return true if this is one of the two floating point + /// isFloatingPoint - Return true if this is one of the five floating point /// types bool isFloatingPoint() const { return ID == FloatTyID || ID == DoubleTyID || ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID; } @@ -268,6 +301,11 @@ public: /// This will return zero if the type does not have a size or is not a /// primitive type. /// + /// Note that this may not reflect the size of memory allocated for an + /// instance of the type or the number of bytes that are written when an + /// instance of the type is stored to memory. The TargetData class provides + /// additional query functions to provide this information. + /// unsigned getPrimitiveSizeInBits() const; /// getScalarSizeInBits - If this is a vector type, return the @@ -292,7 +330,7 @@ public: /// getVAArgsPromotedType - Return the type an argument of this type /// will be promoted to if passed through a variable argument /// function. - const Type *getVAArgsPromotedType() const; + const Type *getVAArgsPromotedType(LLVMContext &C) const; /// getScalarType - If this is a vector type, return the element type, /// otherwise return this. @@ -324,14 +362,39 @@ public: // /// getPrimitiveType - Return a type based on an identifier. - static const Type *getPrimitiveType(TypeID IDNumber); + static const Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber); //===--------------------------------------------------------------------===// // These are the builtin types that are always available... // - static const Type *VoidTy, *LabelTy, *FloatTy, *DoubleTy, *MetadataTy; - static const Type *X86_FP80Ty, *FP128Ty, *PPC_FP128Ty; - static const IntegerType *Int1Ty, *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty; + static const Type *getVoidTy(LLVMContext &C); + static const Type *getLabelTy(LLVMContext &C); + static const Type *getFloatTy(LLVMContext &C); + static const Type *getDoubleTy(LLVMContext &C); + static const Type *getMetadataTy(LLVMContext &C); + static const Type *getX86_FP80Ty(LLVMContext &C); + static const Type *getFP128Ty(LLVMContext &C); + static const Type *getPPC_FP128Ty(LLVMContext &C); + static const IntegerType *getInt1Ty(LLVMContext &C); + static const IntegerType *getInt8Ty(LLVMContext &C); + static const IntegerType *getInt16Ty(LLVMContext &C); + static const IntegerType *getInt32Ty(LLVMContext &C); + static const IntegerType *getInt64Ty(LLVMContext &C); + + //===--------------------------------------------------------------------===// + // Convenience methods for getting pointer types with one of the above builtin + // types as pointee. + // + static const PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0); + static const PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0); /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Type *) { return true; } @@ -367,7 +430,7 @@ public: /// getPointerTo - Return a pointer to the current type. This is equivalent /// to PointerType::get(Foo, AddrSpace). - PointerType *getPointerTo(unsigned AddrSpace = 0) const; + const PointerType *getPointerTo(unsigned AddrSpace = 0) const; private: /// isSizedDerivedType - Derived types like structures and arrays are sized @@ -459,7 +522,6 @@ template <> inline bool isa_impl(const Type &Ty) { return Ty.getTypeID() == Type::PointerTyID; } -std::ostream &operator<<(std::ostream &OS, const Type &T); raw_ostream &operator<<(raw_ostream &OS, const Type &T); } // End llvm namespace diff --git a/include/llvm/TypeSymbolTable.h b/include/llvm/TypeSymbolTable.h index e1459b0579417..4dd3a4af2a487 100644 --- a/include/llvm/TypeSymbolTable.h +++ b/include/llvm/TypeSymbolTable.h @@ -19,6 +19,8 @@ namespace llvm { +class StringRef; + /// This class provides a symbol table of name/type pairs with operations to /// support constructing, searching and iterating over the symbol table. The /// class derives from AbstractTypeUser so that the contents of the symbol @@ -55,14 +57,24 @@ public: /// incrementing an integer and appending it to the name, if necessary /// @returns the unique name /// @brief Get a unique name for a type - std::string getUniqueName(const std::string &BaseName) const; + std::string getUniqueName(const StringRef &BaseName) const; /// This method finds the type with the given \p name in the type map /// and returns it. /// @returns null if the name is not found, otherwise the Type /// associated with the \p name. /// @brief Lookup a type by name. - Type* lookup(const std::string& name) const; + Type *lookup(const StringRef &name) const; + + /// Lookup the type associated with name. + /// @returns end() if the name is not found, or an iterator at the entry for + /// Type. + iterator find(const StringRef &name); + + /// Lookup the type associated with name. + /// @returns end() if the name is not found, or an iterator at the entry for + /// Type. + const_iterator find(const StringRef &name) const; /// @returns true iff the symbol table is empty. /// @brief Determine if the symbol table is empty @@ -102,7 +114,7 @@ public: /// a many-to-one mapping between names and types. This method allows a type /// with an existing entry in the symbol table to get a new name. /// @brief Insert a type under a new name. - void insert(const std::string &Name, const Type *Typ); + void insert(const StringRef &Name, const Type *Typ); /// Remove a type at the specified position in the symbol table. /// @returns the removed Type. diff --git a/include/llvm/Use.h b/include/llvm/Use.h index 489dbc50a0412..970f69b9da883 100644 --- a/include/llvm/Use.h +++ b/include/llvm/Use.h @@ -26,8 +26,8 @@ #define LLVM_USE_H #include "llvm/Support/Casting.h" -#include "llvm/ADT/iterator.h" #include "llvm/ADT/PointerIntPair.h" +#include namespace llvm { @@ -158,8 +158,9 @@ template<> struct simplify_type { template // UserTy == 'User' or 'const User' -class value_use_iterator : public forward_iterator { - typedef forward_iterator super; +class value_use_iterator : public std::iterator { + typedef std::iterator super; typedef value_use_iterator _Self; Use *U; diff --git a/include/llvm/User.h b/include/llvm/User.h index 8a244068b24a0..f8277952ee4ba 100644 --- a/include/llvm/User.h +++ b/include/llvm/User.h @@ -41,7 +41,6 @@ struct OperandTraits { struct Layout { typedef U overlay; }; - static inline void *allocate(unsigned); }; class User : public Value { diff --git a/include/llvm/Value.h b/include/llvm/Value.h index a38d8cb8d9846..6b393f603a61b 100644 --- a/include/llvm/Value.h +++ b/include/llvm/Value.h @@ -16,8 +16,9 @@ #include "llvm/AbstractTypeUser.h" #include "llvm/Use.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/Casting.h" -#include #include namespace llvm { @@ -40,6 +41,8 @@ typedef StringMapEntry ValueName; class raw_ostream; class AssemblyAnnotationWriter; class ValueHandleBase; +class LLVMContext; +class MetadataContext; //===----------------------------------------------------------------------===// // Value Class @@ -61,7 +64,14 @@ class ValueHandleBase; class Value { const unsigned char SubclassID; // Subclass identifier (for isa/dyn_cast) unsigned char HasValueHandle : 1; // Has a ValueHandle pointing to this? + unsigned char HasMetadata : 1; // Has a metadata attached to this ? protected: + /// SubclassOptionalData - This member is similar to SubclassData, however it + /// is for holding information which may be used to aid optimization, but + /// which may be cleared to zero without affecting conservative + /// interpretation. + unsigned char SubclassOptionalData : 7; + /// SubclassData - This member is defined by this class, but is not used for /// anything. Subclasses can use it to hold whatever state they find useful. /// This field is initialized to zero by the ctor. @@ -73,57 +83,62 @@ private: friend class ValueSymbolTable; // Allow ValueSymbolTable to directly mod Name. friend class SymbolTable; // Allow SymbolTable to directly poke Name. friend class ValueHandleBase; + friend class MetadataContext; + friend class AbstractTypeUser; ValueName *Name; void operator=(const Value &); // Do not implement Value(const Value &); // Do not implement +protected: + /// printCustom - Value subclasses can override this to implement custom + /// printing behavior. + virtual void printCustom(raw_ostream &O) const; + public: Value(const Type *Ty, unsigned scid); virtual ~Value(); /// dump - Support for debugging, callable in GDB: V->dump() // - virtual void dump() const; + void dump() const; /// print - Implement operator<< on Value. /// - void print(std::ostream &O, AssemblyAnnotationWriter *AAW = 0) const; void print(raw_ostream &O, AssemblyAnnotationWriter *AAW = 0) const; /// All values are typed, get the type of this value. /// inline const Type *getType() const { return VTy; } + /// All values hold a context through their type. + LLVMContext &getContext() const; + // All values can potentially be named... inline bool hasName() const { return Name != 0; } ValueName *getValueName() const { return Name; } - - /// getNameStart - Return a pointer to a null terminated string for this name. - /// Note that names can have null characters within the string as well as at - /// their end. This always returns a non-null pointer. - const char *getNameStart() const; - /// getNameEnd - Return a pointer to the end of the name. - const char *getNameEnd() const { return getNameStart() + getNameLen(); } - - /// isName - Return true if this value has the name specified by the provided - /// nul terminated string. - bool isName(const char *N) const; - /// getNameLen - Return the length of the string, correctly handling nul - /// characters embedded into them. - unsigned getNameLen() const; - - /// getName()/getNameStr() - Return the name of the specified value, - /// *constructing a string* to hold it. Because these are guaranteed to - /// construct a string, they are very expensive and should be avoided. - std::string getName() const { return getNameStr(); } + /// getName() - Return a constant reference to the value's name. This is cheap + /// and guaranteed to return the same reference as long as the value is not + /// modified. + /// + /// This is currently guaranteed to return a StringRef for which data() points + /// to a valid null terminated string. The use of StringRef.data() is + /// deprecated here, however, and clients should not rely on it. If such + /// behavior is needed, clients should use expensive getNameStr(), or switch + /// to an interface that does not depend on null termination. + StringRef getName() const; + + /// getNameStr() - Return the name of the specified value, *constructing a + /// string* to hold it. This is guaranteed to construct a string and is very + /// expensive, clients should use getName() unless necessary. std::string getNameStr() const; - - void setName(const std::string &name); - void setName(const char *Name, unsigned NameLen); - void setName(const char *Name); // Takes a null-terminated string. + /// setName() - Change the name of the value, choosing a new unique name if + /// the provided name is taken. + /// + /// \arg Name - The new name; or "" if the value's name should be removed. + void setName(const Twine &Name); /// takeName - transfer the name from V to this value, setting V's name to @@ -203,15 +218,16 @@ public: ConstantStructVal, // This is an instance of ConstantStruct ConstantVectorVal, // This is an instance of ConstantVector ConstantPointerNullVal, // This is an instance of ConstantPointerNull - MDStringVal, // This is an instance of MDString MDNodeVal, // This is an instance of MDNode + MDStringVal, // This is an instance of MDString + NamedMDNodeVal, // This is an instance of NamedMDNode InlineAsmVal, // This is an instance of InlineAsm PseudoSourceValueVal, // This is an instance of PseudoSourceValue InstructionVal, // This is an instance of Instruction // Markers: ConstantFirstVal = FunctionVal, - ConstantLastVal = MDNodeVal + ConstantLastVal = ConstantPointerNullVal }; /// getValueID - Return an ID for the concrete type of this object. This is @@ -227,6 +243,25 @@ public: return SubclassID; } + /// getRawSubclassOptionalData - Return the raw optional flags value + /// contained in this value. This should only be used when testing two + /// Values for equivalence. + unsigned getRawSubclassOptionalData() const { + return SubclassOptionalData; + } + + /// hasSameSubclassOptionalData - Test whether the optional flags contained + /// in this value are equal to the optional flags in the given value. + bool hasSameSubclassOptionalData(const Value *V) const { + return SubclassOptionalData == V->SubclassOptionalData; + } + + /// intersectOptionalDataWith - Clear any optional flags in this value + /// that are not also set in the given value. + void intersectOptionalDataWith(const Value *V) { + SubclassOptionalData &= V->SubclassOptionalData; + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Value *) { return true; // Values are always values. @@ -263,12 +298,11 @@ public: const BasicBlock *PredBB) const{ return const_cast(this)->DoPHITranslation(CurBB, PredBB); } + + /// hasMetadata - Return true if metadata is attached with this value. + bool hasMetadata() const { return HasMetadata; } }; -inline std::ostream &operator<<(std::ostream &OS, const Value &V) { - V.print(OS); - return OS; -} inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) { V.print(OS); return OS; diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h index 752dd2f24fecb..4f8ebe800172a 100644 --- a/include/llvm/ValueSymbolTable.h +++ b/include/llvm/ValueSymbolTable.h @@ -23,7 +23,9 @@ namespace llvm { class SymbolTableListTraits; class BasicBlock; class Function; + class NamedMDNode; class Module; + class StringRef; /// This class provides a symbol table of name/value pairs. It is essentially /// a std::map but has a controlled interface provided by @@ -37,6 +39,7 @@ class ValueSymbolTable { friend class SymbolTableListTraits; friend class SymbolTableListTraits; friend class SymbolTableListTraits; + friend class SymbolTableListTraits; /// @name Types /// @{ public: @@ -62,12 +65,11 @@ public: /// @{ public: - /// This method finds the value with the given \p name in the + /// This method finds the value with the given \p Name in the /// the symbol table. - /// @returns the value associated with the \p name + /// @returns the value associated with the \p Name /// @brief Lookup a named Value. - Value *lookup(const std::string &name) const; - Value *lookup(const char *NameBegin, const char *NameEnd) const; + Value *lookup(const StringRef &Name) const { return vmap.lookup(Name); } /// @returns true iff the symbol table is empty /// @brief Determine if the symbol table is empty @@ -110,7 +112,7 @@ private: /// createValueName - This method attempts to create a value name and insert /// it into the symbol table with the specified name. If it conflicts, it /// auto-renames the name and returns that instead. - ValueName *createValueName(const char *NameStart, unsigned NameLen, Value *V); + ValueName *createValueName(const StringRef &Name, Value *V); /// This method removes a value from the symbol table. It leaves the /// ValueName attached to the value, but it is no longer inserted in the diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index c5523ec4634d8..c456990d8ae2c 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -88,7 +88,7 @@ AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) { - return alias(L->getOperand(0), TD->getTypeStoreSize(L->getType()), + return alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size) ? Ref : NoModRef; } @@ -97,7 +97,7 @@ AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) { // If the stored address cannot alias the pointer in question, then the // pointer cannot be modified by the store. if (!alias(S->getOperand(1), - TD->getTypeStoreSize(S->getOperand(0)->getType()), P, Size)) + getTypeStoreSize(S->getOperand(0)->getType()), P, Size)) return NoModRef; // If the pointer is a pointer to constant memory, then it could not have been @@ -177,18 +177,23 @@ AliasAnalysis::~AliasAnalysis() {} /// AliasAnalysis interface before any other methods are called. /// void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { - TD = &P->getAnalysis(); + TD = P->getAnalysisIfAvailable(); AA = &P->getAnalysis(); } // getAnalysisUsage - All alias analysis implementations should invoke this -// directly (using AliasAnalysis::getAnalysisUsage(AU)) to make sure that -// TargetData is required by the pass. +// directly (using AliasAnalysis::getAnalysisUsage(AU)). void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); // All AA's need TargetData. AU.addRequired(); // All AA's chain } +/// getTypeStoreSize - Return the TargetData store size for the given type, +/// if known, or a conservative value otherwise. +/// +unsigned AliasAnalysis::getTypeStoreSize(const Type *Ty) { + return TD ? TD->getTypeStoreSize(Ty) : ~0u; +} + /// canBasicBlockModify - Return true if it is possible for execution of the /// specified basic block to modify the value pointed to by Ptr. /// @@ -228,13 +233,15 @@ bool llvm::isNoAliasCall(const Value *V) { /// isIdentifiedObject - Return true if this pointer refers to a distinct and /// identifiable object. This returns true for: -/// Global Variables and Functions +/// Global Variables and Functions (but not Global Aliases) /// Allocas and Mallocs /// ByVal and NoAlias Arguments /// NoAlias returns /// bool llvm::isIdentifiedObject(const Value *V) { - if (isa(V) || isa(V) || isNoAliasCall(V)) + if (isa(V) || isNoAliasCall(V)) + return true; + if (isa(V) && !isa(V)) return true; if (const Argument *A = dyn_cast(V)) return A->hasNoAliasAttr() || A->hasByValAttr(); diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 4362d7d301a80..272c871ce239a 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -18,11 +18,12 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt -PrintAll("count-aa-print-all-queries", cl::ReallyHidden); +PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true)); static cl::opt PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); @@ -41,33 +42,33 @@ namespace { } void printLine(const char *Desc, unsigned Val, unsigned Sum) { - cerr << " " << Val << " " << Desc << " responses (" - << Val*100/Sum << "%)\n"; + errs() << " " << Val << " " << Desc << " responses (" + << Val*100/Sum << "%)\n"; } ~AliasAnalysisCounter() { unsigned AASum = No+May+Must; unsigned MRSum = NoMR+JustRef+JustMod+MR; if (AASum + MRSum) { // Print a report if any counted queries occurred... - cerr << "\n===== Alias Analysis Counter Report =====\n" - << " Analysis counted: " << Name << "\n" - << " " << AASum << " Total Alias Queries Performed\n"; + errs() << "\n===== Alias Analysis Counter Report =====\n" + << " Analysis counted: " << Name << "\n" + << " " << AASum << " Total Alias Queries Performed\n"; if (AASum) { printLine("no alias", No, AASum); printLine("may alias", May, AASum); printLine("must alias", Must, AASum); - cerr << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" - << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n"; + errs() << " Alias Analysis Counter Summary: " << No*100/AASum << "%/" + << May*100/AASum << "%/" << Must*100/AASum<<"%\n\n"; } - cerr << " " << MRSum << " Total Mod/Ref Queries Performed\n"; + errs() << " " << MRSum << " Total Mod/Ref Queries Performed\n"; if (MRSum) { printLine("no mod/ref", NoMR, MRSum); printLine("ref", JustRef, MRSum); printLine("mod", JustMod, MRSum); printLine("mod/ref", MR, MRSum); - cerr << " Mod/Ref Analysis Counter Summary: " <().pointsToConstantMemory(P); } - bool doesNotAccessMemory(CallSite CS) { - return getAnalysis().doesNotAccessMemory(CS); - } - bool doesNotAccessMemory(Function *F) { - return getAnalysis().doesNotAccessMemory(F); - } - bool onlyReadsMemory(CallSite CS) { - return getAnalysis().onlyReadsMemory(CS); - } - bool onlyReadsMemory(Function *F) { - return getAnalysis().onlyReadsMemory(F); - } - // Forwarding functions: just delegate to a real AA implementation, counting // the number of responses... @@ -131,20 +119,20 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size, const char *AliasString; switch (R) { - default: assert(0 && "Unknown alias type!"); + default: llvm_unreachable("Unknown alias type!"); case NoAlias: No++; AliasString = "No alias"; break; case MayAlias: May++; AliasString = "May alias"; break; case MustAlias: Must++; AliasString = "Must alias"; break; } if (PrintAll || (PrintAllFailures && R == MayAlias)) { - cerr << AliasString << ":\t"; - cerr << "[" << V1Size << "B] "; - WriteAsOperand(*cerr.stream(), V1, true, M); - cerr << ", "; - cerr << "[" << V2Size << "B] "; - WriteAsOperand(*cerr.stream(), V2, true, M); - cerr << "\n"; + errs() << AliasString << ":\t"; + errs() << "[" << V1Size << "B] "; + WriteAsOperand(errs(), V1, true, M); + errs() << ", "; + errs() << "[" << V2Size << "B] "; + WriteAsOperand(errs(), V2, true, M); + errs() << "\n"; } return R; @@ -156,7 +144,7 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) { const char *MRString; switch (R) { - default: assert(0 && "Unknown mod/ref type!"); + default: llvm_unreachable("Unknown mod/ref type!"); case NoModRef: NoMR++; MRString = "NoModRef"; break; case Ref: JustRef++; MRString = "JustRef"; break; case Mod: JustMod++; MRString = "JustMod"; break; @@ -164,10 +152,10 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) { } if (PrintAll || (PrintAllFailures && R == ModRef)) { - cerr << MRString << ": Ptr: "; - cerr << "[" << Size << "B] "; - WriteAsOperand(*cerr.stream(), P, true, M); - cerr << "\t<->" << *CS.getInstruction(); + errs() << MRString << ": Ptr: "; + errs() << "[" << Size << "B] "; + WriteAsOperand(errs(), P, true, M); + errs() << "\t<->" << *CS.getInstruction(); } return R; } diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index 07820e350681c..bb95c01e2ea9e 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -29,9 +29,8 @@ #include "llvm/Support/InstIterator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" -#include -#include +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" using namespace llvm; static cl::opt PrintAll("print-all-alias-modref-info", cl::ReallyHidden); @@ -81,18 +80,21 @@ X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true); FunctionPass *llvm::createAAEvalPass() { return new AAEval(); } -static void PrintResults(const char *Msg, bool P, const Value *V1, const Value *V2, - const Module *M) { +static void PrintResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { if (P) { - std::stringstream s1, s2; - WriteAsOperand(s1, V1, true, M); - WriteAsOperand(s2, V2, true, M); - std::string o1(s1.str()), o2(s2.str()); + std::string o1, o2; + { + raw_string_ostream os1(o1), os2(o2); + WriteAsOperand(os1, V1, true, M); + WriteAsOperand(os2, V2, true, M); + } + if (o2 < o1) - std::swap(o1, o2); - cerr << " " << Msg << ":\t" - << o1 << ", " - << o2 << "\n"; + std::swap(o1, o2); + errs() << " " << Msg << ":\t" + << o1 << ", " + << o2 << "\n"; } } @@ -100,19 +102,17 @@ static inline void PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr, Module *M) { if (P) { - cerr << " " << Msg << ": Ptr: "; - WriteAsOperand(*cerr.stream(), Ptr, true, M); - cerr << "\t<->" << *I; + errs() << " " << Msg << ": Ptr: "; + WriteAsOperand(errs(), Ptr, true, M); + errs() << "\t<->" << *I << '\n'; } } bool AAEval::runOnFunction(Function &F) { AliasAnalysis &AA = getAnalysis(); - const TargetData &TD = AA.getTargetData(); - - std::set Pointers; - std::set CallSites; + SetVector Pointers; + SetVector CallSites; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) if (isa(I->getType())) // Add all pointer arguments @@ -136,20 +136,20 @@ bool AAEval::runOnFunction(Function &F) { if (PrintNoAlias || PrintMayAlias || PrintMustAlias || PrintNoModRef || PrintMod || PrintRef || PrintModRef) - cerr << "Function: " << F.getName() << ": " << Pointers.size() - << " pointers, " << CallSites.size() << " call sites\n"; + errs() << "Function: " << F.getName() << ": " << Pointers.size() + << " pointers, " << CallSites.size() << " call sites\n"; // iterate over the worklist, and run the full (n^2)/2 disambiguations - for (std::set::iterator I1 = Pointers.begin(), E = Pointers.end(); + for (SetVector::iterator I1 = Pointers.begin(), E = Pointers.end(); I1 != E; ++I1) { - unsigned I1Size = 0; + unsigned I1Size = ~0u; const Type *I1ElTy = cast((*I1)->getType())->getElementType(); - if (I1ElTy->isSized()) I1Size = TD.getTypeStoreSize(I1ElTy); + if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); - for (std::set::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { - unsigned I2Size = 0; + for (SetVector::iterator I2 = Pointers.begin(); I2 != I1; ++I2) { + unsigned I2Size = ~0u; const Type *I2ElTy =cast((*I2)->getType())->getElementType(); - if (I2ElTy->isSized()) I2Size = TD.getTypeStoreSize(I2ElTy); + if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); switch (AA.alias(*I1, I1Size, *I2, I2Size)) { case AliasAnalysis::NoAlias: @@ -162,21 +162,21 @@ bool AAEval::runOnFunction(Function &F) { PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent()); ++MustAlias; break; default: - cerr << "Unknown alias query result!\n"; + errs() << "Unknown alias query result!\n"; } } } // Mod/ref alias analysis: compare all pairs of calls and values - for (std::set::iterator C = CallSites.begin(), + for (SetVector::iterator C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { Instruction *I = C->getInstruction(); - for (std::set::iterator V = Pointers.begin(), Ve = Pointers.end(); + for (SetVector::iterator V = Pointers.begin(), Ve = Pointers.end(); V != Ve; ++V) { - unsigned Size = 0; + unsigned Size = ~0u; const Type *ElTy = cast((*V)->getType())->getElementType(); - if (ElTy->isSized()) Size = TD.getTypeStoreSize(ElTy); + if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); switch (AA.getModRefInfo(*C, *V, Size)) { case AliasAnalysis::NoModRef: @@ -192,7 +192,7 @@ bool AAEval::runOnFunction(Function &F) { PrintModRefResults(" ModRef", PrintModRef, I, *V, F.getParent()); ++ModRef; break; default: - cerr << "Unknown alias query result!\n"; + errs() << "Unknown alias query result!\n"; } } } @@ -201,45 +201,45 @@ bool AAEval::runOnFunction(Function &F) { } static void PrintPercent(unsigned Num, unsigned Sum) { - cerr << "(" << Num*100ULL/Sum << "." - << ((Num*1000ULL/Sum) % 10) << "%)\n"; + errs() << "(" << Num*100ULL/Sum << "." + << ((Num*1000ULL/Sum) % 10) << "%)\n"; } bool AAEval::doFinalization(Module &M) { unsigned AliasSum = NoAlias + MayAlias + MustAlias; - cerr << "===== Alias Analysis Evaluator Report =====\n"; + errs() << "===== Alias Analysis Evaluator Report =====\n"; if (AliasSum == 0) { - cerr << " Alias Analysis Evaluator Summary: No pointers!\n"; + errs() << " Alias Analysis Evaluator Summary: No pointers!\n"; } else { - cerr << " " << AliasSum << " Total Alias Queries Performed\n"; - cerr << " " << NoAlias << " no alias responses "; + errs() << " " << AliasSum << " Total Alias Queries Performed\n"; + errs() << " " << NoAlias << " no alias responses "; PrintPercent(NoAlias, AliasSum); - cerr << " " << MayAlias << " may alias responses "; + errs() << " " << MayAlias << " may alias responses "; PrintPercent(MayAlias, AliasSum); - cerr << " " << MustAlias << " must alias responses "; + errs() << " " << MustAlias << " must alias responses "; PrintPercent(MustAlias, AliasSum); - cerr << " Alias Analysis Evaluator Pointer Alias Summary: " - << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" - << MustAlias*100/AliasSum << "%\n"; + errs() << " Alias Analysis Evaluator Pointer Alias Summary: " + << NoAlias*100/AliasSum << "%/" << MayAlias*100/AliasSum << "%/" + << MustAlias*100/AliasSum << "%\n"; } // Display the summary for mod/ref analysis unsigned ModRefSum = NoModRef + Mod + Ref + ModRef; if (ModRefSum == 0) { - cerr << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n"; + errs() << " Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n"; } else { - cerr << " " << ModRefSum << " Total ModRef Queries Performed\n"; - cerr << " " << NoModRef << " no mod/ref responses "; + errs() << " " << ModRefSum << " Total ModRef Queries Performed\n"; + errs() << " " << NoModRef << " no mod/ref responses "; PrintPercent(NoModRef, ModRefSum); - cerr << " " << Mod << " mod responses "; + errs() << " " << Mod << " mod responses "; PrintPercent(Mod, ModRefSum); - cerr << " " << Ref << " ref responses "; + errs() << " " << Ref << " ref responses "; PrintPercent(Ref, ModRefSum); - cerr << " " << ModRef << " mod & ref responses "; + errs() << " " << ModRef << " mod & ref responses "; PrintPercent(ModRef, ModRefSum); - cerr << " Alias Analysis Evaluator Mod/Ref Summary: " - << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/" - << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n"; + errs() << " Alias Analysis Evaluator Mod/Ref Summary: " + << NoModRef*100/ModRefSum << "%/" << Mod*100/ModRefSum << "%/" + << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n"; } return false; diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 18c2b66505f63..b056d0091a099 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -20,8 +20,10 @@ #include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; /// mergeSetIn - Merge the specified alias set into this alias set. @@ -186,8 +188,8 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { void AliasSetTracker::clear() { // Delete all the PointerRec entries. - for (DenseMap::iterator I = PointerMap.begin(), - E = PointerMap.end(); I != E; ++I) + for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end(); + I != E; ++I) I->second->eraseFromList(); PointerMap.clear(); @@ -279,7 +281,7 @@ bool AliasSetTracker::add(Value *Ptr, unsigned Size) { bool AliasSetTracker::add(LoadInst *LI) { bool NewPtr; AliasSet &AS = addPointer(LI->getOperand(0), - AA.getTargetData().getTypeStoreSize(LI->getType()), + AA.getTypeStoreSize(LI->getType()), AliasSet::Refs, NewPtr); if (LI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -289,7 +291,7 @@ bool AliasSetTracker::add(StoreInst *SI) { bool NewPtr; Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), - AA.getTargetData().getTypeStoreSize(Val->getType()), + AA.getTypeStoreSize(Val->getType()), AliasSet::Mods, NewPtr); if (SI->isVolatile()) AS.setVolatile(); return NewPtr; @@ -411,7 +413,7 @@ bool AliasSetTracker::remove(Value *Ptr, unsigned Size) { } bool AliasSetTracker::remove(LoadInst *LI) { - unsigned Size = AA.getTargetData().getTypeStoreSize(LI->getType()); + unsigned Size = AA.getTypeStoreSize(LI->getType()); AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size); if (!AS) return false; remove(*AS); @@ -419,8 +421,7 @@ bool AliasSetTracker::remove(LoadInst *LI) { } bool AliasSetTracker::remove(StoreInst *SI) { - unsigned Size = - AA.getTargetData().getTypeStoreSize(SI->getOperand(0)->getType()); + unsigned Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size); if (!AS) return false; remove(*AS); @@ -485,7 +486,7 @@ void AliasSetTracker::deleteValue(Value *PtrVal) { AS->removeCallSite(CS); // First, look up the PointerRec for this pointer. - DenseMap::iterator I = PointerMap.find(PtrVal); + PointerMapType::iterator I = PointerMap.find(PtrVal); if (I == PointerMap.end()) return; // Noop // If we found one, remove the pointer from the alias set it is in. @@ -511,7 +512,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { AA.copyValue(From, To); // First, look up the PointerRec for this pointer. - DenseMap::iterator I = PointerMap.find(From); + PointerMapType::iterator I = PointerMap.find(From); if (I == PointerMap.end()) return; // Noop assert(I->second->hasAliasSet() && "Dead entry?"); @@ -531,15 +532,15 @@ void AliasSetTracker::copyValue(Value *From, Value *To) { // AliasSet/AliasSetTracker Printing Support //===----------------------------------------------------------------------===// -void AliasSet::print(std::ostream &OS) const { - OS << " AliasSet[" << (void*)this << "," << RefCount << "] "; +void AliasSet::print(raw_ostream &OS) const { + OS << " AliasSet[" << format("0x%p", (void*)this) << "," << RefCount << "] "; OS << (AliasTy == MustAlias ? "must" : "may") << " alias, "; switch (AccessTy) { case NoModRef: OS << "No access "; break; case Refs : OS << "Ref "; break; case Mods : OS << "Mod "; break; case ModRef : OS << "Mod/Ref "; break; - default: assert(0 && "Bad value for AccessTy!"); + default: llvm_unreachable("Bad value for AccessTy!"); } if (isVolatile()) OS << "[volatile] "; if (Forward) @@ -564,7 +565,7 @@ void AliasSet::print(std::ostream &OS) const { OS << "\n"; } -void AliasSetTracker::print(std::ostream &OS) const { +void AliasSetTracker::print(raw_ostream &OS) const { OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for " << PointerMap.size() << " pointer values.\n"; for (const_iterator I = begin(), E = end(); I != E; ++I) @@ -572,8 +573,26 @@ void AliasSetTracker::print(std::ostream &OS) const { OS << "\n"; } -void AliasSet::dump() const { print (cerr); } -void AliasSetTracker::dump() const { print(cerr); } +void AliasSet::dump() const { print(errs()); } +void AliasSetTracker::dump() const { print(errs()); } + +//===----------------------------------------------------------------------===// +// ASTCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void AliasSetTracker::ASTCallbackVH::deleted() { + assert(AST && "ASTCallbackVH called with a null AliasSetTracker!"); + AST->deleteValue(getValPtr()); + // this now dangles! +} + +AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast) + : CallbackVH(V), AST(ast) {} + +AliasSetTracker::ASTCallbackVH & +AliasSetTracker::ASTCallbackVH::operator=(Value *V) { + return *this = ASTCallbackVH(V, AST); +} //===----------------------------------------------------------------------===// // AliasSetPrinter Pass @@ -596,7 +615,7 @@ namespace { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) Tracker->add(&*I); - Tracker->print(cerr); + Tracker->print(errs()); delete Tracker; return false; } diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 493c6e88b3f8e..f8cb32321b00e 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -9,7 +9,6 @@ #include "llvm-c/Analysis.h" #include "llvm/Analysis/Verifier.h" -#include #include using namespace llvm; diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index f689dcac305a3..2c4efc4985b31 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/Passes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -22,11 +23,15 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include using namespace llvm; @@ -35,12 +40,8 @@ using namespace llvm; // Useful predicates //===----------------------------------------------------------------------===// -static const User *isGEP(const Value *V) { - if (isa(V) || - (isa(V) && - cast(V)->getOpcode() == Instruction::GetElementPtr)) - return cast(V); - return 0; +static const GEPOperator *isGEP(const Value *V) { + return dyn_cast(V); } static const Value *GetGEPOperands(const Value *V, @@ -103,7 +104,7 @@ static bool isNonEscapingLocalObject(const Value *V) { /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. static bool isObjectSmallerThan(const Value *V, unsigned Size, - const TargetData &TD) { + LLVMContext &Context, const TargetData &TD) { const Type *AccessTy; if (const GlobalVariable *GV = dyn_cast(V)) { AccessTy = GV->getType()->getElementType(); @@ -112,6 +113,12 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size, AccessTy = AI->getType()->getElementType(); else return false; + } else if (const CallInst* CI = extractMallocCall(V)) { + if (!isArrayMalloc(V, Context, &TD)) + // The size is the argument to the malloc call. + if (const ConstantInt* C = dyn_cast(CI->getOperand(1))) + return (C->getZExtValue() < Size); + return false; } else if (const Argument *A = dyn_cast(V)) { if (A->hasByValAttr()) AccessTy = cast(A->getType())->getElementType(); @@ -142,11 +149,10 @@ namespace { explicit NoAA(void *PID) : ImmutablePass(PID) { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); } virtual void initializePass() { - TD = &getAnalysis(); + TD = getAnalysisIfAvailable(); } virtual AliasResult alias(const Value *V1, unsigned V1Size, @@ -156,7 +162,7 @@ namespace { virtual void getArgumentAccesses(Function *F, CallSite CS, std::vector &Info) { - assert(0 && "This method may not be called on this function!"); + llvm_unreachable("This method may not be called on this function!"); } virtual void getMustAliases(Value *P, std::vector &RetVals) { } @@ -196,7 +202,12 @@ namespace { static char ID; // Class identification, replacement for typeinfo BasicAliasAnalysis() : NoAA(&ID) {} AliasResult alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); + const Value *V2, unsigned V2Size) { + assert(VisitedPHIs.empty() && "VisitedPHIs must be cleared after use!"); + AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size); + VisitedPHIs.clear(); + return Alias; + } ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); @@ -210,6 +221,22 @@ namespace { bool pointsToConstantMemory(const Value *P); private: + // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call. + SmallSet VisitedPHIs; + + // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction + // against another. + AliasResult aliasGEP(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + + // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction + // against another. + AliasResult aliasPHI(const PHINode *PN, unsigned PNSize, + const Value *V2, unsigned V2Size); + + AliasResult aliasCheck(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + // CheckGEPInstructions - Check two GEP instructions with known // must-aliasing base pointers. This checks to see if the index expressions // preclude the pointers from aliasing... @@ -279,6 +306,27 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { if (!passedAsArg) return NoModRef; } + + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::atomic_cmp_swap: + case Intrinsic::atomic_swap: + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: + if (alias(II->getOperand(1), Size, P, Size) == NoAlias) + return NoModRef; + break; + } + } } // The AliasAnalysis base class has some smarts, lets use them. @@ -303,71 +351,12 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { return NoAA::getModRefInfo(CS1, CS2); } - -// alias - Provide a bunch of ad-hoc rules to disambiguate in common cases, such -// as array references. +// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction +// against another. // AliasAnalysis::AliasResult -BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { - // Strip off any constant expression casts if they exist - if (const ConstantExpr *CE = dyn_cast(V1)) - if (CE->isCast() && isa(CE->getOperand(0)->getType())) - V1 = CE->getOperand(0); - if (const ConstantExpr *CE = dyn_cast(V2)) - if (CE->isCast() && isa(CE->getOperand(0)->getType())) - V2 = CE->getOperand(0); - - // Are we checking for alias of the same value? - if (V1 == V2) return MustAlias; - - if (!isa(V1->getType()) || !isa(V2->getType())) - return NoAlias; // Scalars cannot alias each other - - // Strip off cast instructions. Since V1 and V2 are pointers, they must be - // pointer<->pointer bitcasts. - if (const BitCastInst *I = dyn_cast(V1)) - return alias(I->getOperand(0), V1Size, V2, V2Size); - if (const BitCastInst *I = dyn_cast(V2)) - return alias(V1, V1Size, I->getOperand(0), V2Size); - - // Figure out what objects these things are pointing to if we can. - const Value *O1 = V1->getUnderlyingObject(); - const Value *O2 = V2->getUnderlyingObject(); - - if (O1 != O2) { - // If V1/V2 point to two different objects we know that we have no alias. - if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) - return NoAlias; - - // Arguments can't alias with local allocations or noalias calls. - if ((isa(O1) && (isa(O2) || isNoAliasCall(O2))) || - (isa(O2) && (isa(O1) || isNoAliasCall(O1)))) - return NoAlias; - - // Most objects can't alias null. - if ((isa(V2) && isKnownNonNull(O1)) || - (isa(V1) && isKnownNonNull(O2))) - return NoAlias; - } - - // If the size of one access is larger than the entire object on the other - // side, then we know such behavior is undefined and can assume no alias. - const TargetData &TD = getTargetData(); - if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, TD)) || - (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, TD))) - return NoAlias; - - // If one pointer is the result of a call/invoke and the other is a - // non-escaping local object, then we know the object couldn't escape to a - // point where the call could return it. - if ((isa(O1) || isa(O1)) && - isNonEscapingLocalObject(O2) && O1 != O2) - return NoAlias; - if ((isa(O2) || isa(O2)) && - isNonEscapingLocalObject(O1) && O1 != O2) - return NoAlias; - +BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { // If we have two gep instructions with must-alias'ing base pointers, figure // out if the indexes to the GEP tell us anything about the derived pointer. // Note that we also handle chains of getelementptr instructions as well as @@ -387,8 +376,8 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, GEP1->getOperand(0)->getType() == GEP2->getOperand(0)->getType() && // All operands are the same, ignoring the base. std::equal(GEP1->op_begin()+1, GEP1->op_end(), GEP2->op_begin()+1)) - return alias(GEP1->getOperand(0), V1Size, GEP2->getOperand(0), V2Size); - + return aliasCheck(GEP1->getOperand(0), V1Size, + GEP2->getOperand(0), V2Size); // Drill down into the first non-gep value, to test for must-aliasing of // the base pointers. @@ -405,7 +394,7 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, const Value *BasePtr2 = GEP2->getOperand(0); // Do the base pointers alias? - AliasResult BaseAlias = alias(BasePtr1, ~0U, BasePtr2, ~0U); + AliasResult BaseAlias = aliasCheck(BasePtr1, ~0U, BasePtr2, ~0U); if (BaseAlias == NoAlias) return NoAlias; if (BaseAlias == MustAlias) { // If the base pointers alias each other exactly, check to see if we can @@ -435,79 +424,190 @@ BasicAliasAnalysis::alias(const Value *V1, unsigned V1Size, // instruction. If one pointer is a GEP with a non-zero index of the other // pointer, we know they cannot alias. // - if (isGEP(V2)) { - std::swap(V1, V2); - std::swap(V1Size, V2Size); - } + if (V1Size == ~0U || V2Size == ~0U) + return MayAlias; - if (V1Size != ~0U && V2Size != ~0U) - if (isGEP(V1)) { - SmallVector GEPOperands; - const Value *BasePtr = GetGEPOperands(V1, GEPOperands); - - AliasResult R = alias(BasePtr, V1Size, V2, V2Size); - if (R == MustAlias) { - // If there is at least one non-zero constant index, we know they cannot - // alias. - bool ConstantFound = false; - bool AllZerosFound = true; - for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i) - if (const Constant *C = dyn_cast(GEPOperands[i])) { - if (!C->isNullValue()) { - ConstantFound = true; - AllZerosFound = false; - break; - } - } else { - AllZerosFound = false; - } + SmallVector GEPOperands; + const Value *BasePtr = GetGEPOperands(V1, GEPOperands); + + AliasResult R = aliasCheck(BasePtr, ~0U, V2, V2Size); + if (R != MustAlias) + // If V2 may alias GEP base pointer, conservatively returns MayAlias. + // If V2 is known not to alias GEP base pointer, then the two values + // cannot alias per GEP semantics: "A pointer value formed from a + // getelementptr instruction is associated with the addresses associated + // with the first operand of the getelementptr". + return R; + + // If there is at least one non-zero constant index, we know they cannot + // alias. + bool ConstantFound = false; + bool AllZerosFound = true; + for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i) + if (const Constant *C = dyn_cast(GEPOperands[i])) { + if (!C->isNullValue()) { + ConstantFound = true; + AllZerosFound = false; + break; + } + } else { + AllZerosFound = false; + } - // If we have getelementptr , 0, 0, 0, 0, ... and V2 must aliases - // the ptr, the end result is a must alias also. - if (AllZerosFound) - return MustAlias; + // If we have getelementptr , 0, 0, 0, 0, ... and V2 must aliases + // the ptr, the end result is a must alias also. + if (AllZerosFound) + return MustAlias; - if (ConstantFound) { - if (V2Size <= 1 && V1Size <= 1) // Just pointer check? - return NoAlias; + if (ConstantFound) { + if (V2Size <= 1 && V1Size <= 1) // Just pointer check? + return NoAlias; - // Otherwise we have to check to see that the distance is more than - // the size of the argument... build an index vector that is equal to - // the arguments provided, except substitute 0's for any variable - // indexes we find... - if (cast( - BasePtr->getType())->getElementType()->isSized()) { - for (unsigned i = 0; i != GEPOperands.size(); ++i) - if (!isa(GEPOperands[i])) - GEPOperands[i] = - Constant::getNullValue(GEPOperands[i]->getType()); - int64_t Offset = - getTargetData().getIndexedOffset(BasePtr->getType(), - &GEPOperands[0], - GEPOperands.size()); - - if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size) - return NoAlias; - } - } - } + // Otherwise we have to check to see that the distance is more than + // the size of the argument... build an index vector that is equal to + // the arguments provided, except substitute 0's for any variable + // indexes we find... + if (TD && + cast(BasePtr->getType())->getElementType()->isSized()) { + for (unsigned i = 0; i != GEPOperands.size(); ++i) + if (!isa(GEPOperands[i])) + GEPOperands[i] = Constant::getNullValue(GEPOperands[i]->getType()); + int64_t Offset = TD->getIndexedOffset(BasePtr->getType(), + &GEPOperands[0], + GEPOperands.size()); + + if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size) + return NoAlias; } + } + + return MayAlias; +} + +// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction +// against another. +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, + const Value *V2, unsigned V2Size) { + // The PHI node has already been visited, avoid recursion any further. + if (!VisitedPHIs.insert(PN)) + return MayAlias; + + SmallSet UniqueSrc; + SmallVector V1Srcs; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PV1 = PN->getIncomingValue(i); + if (isa(PV1)) + // If any of the source itself is a PHI, return MayAlias conservatively + // to avoid compile time explosion. The worst possible case is if both + // sides are PHI nodes. In which case, this is O(m x n) time where 'm' + // and 'n' are the number of PHI sources. + return MayAlias; + if (UniqueSrc.insert(PV1)) + V1Srcs.push_back(PV1); + } + + AliasResult Alias = aliasCheck(V1Srcs[0], PNSize, V2, V2Size); + // Early exit if the check of the first PHI source against V2 is MayAlias. + // Other results are not possible. + if (Alias == MayAlias) + return MayAlias; + + // If all sources of the PHI node NoAlias or MustAlias V2, then returns + // NoAlias / MustAlias. Otherwise, returns MayAlias. + for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { + Value *V = V1Srcs[i]; + AliasResult ThisAlias = aliasCheck(V, PNSize, V2, V2Size); + if (ThisAlias != Alias || ThisAlias == MayAlias) + return MayAlias; + } + + return Alias; +} + +// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, +// such as array references. +// +AliasAnalysis::AliasResult +BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + // Strip off any casts if they exist. + V1 = V1->stripPointerCasts(); + V2 = V2->stripPointerCasts(); + + // Are we checking for alias of the same value? + if (V1 == V2) return MustAlias; + + if (!isa(V1->getType()) || !isa(V2->getType())) + return NoAlias; // Scalars cannot alias each other + + // Figure out what objects these things are pointing to if we can. + const Value *O1 = V1->getUnderlyingObject(); + const Value *O2 = V2->getUnderlyingObject(); + + if (O1 != O2) { + // If V1/V2 point to two different objects we know that we have no alias. + if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) + return NoAlias; + + // Arguments can't alias with local allocations or noalias calls. + if ((isa(O1) && (isa(O2) || isNoAliasCall(O2))) || + (isa(O2) && (isa(O1) || isNoAliasCall(O1)))) + return NoAlias; + + // Most objects can't alias null. + if ((isa(V2) && isKnownNonNull(O1)) || + (isa(V1) && isKnownNonNull(O2))) + return NoAlias; + } + + // If the size of one access is larger than the entire object on the other + // side, then we know such behavior is undefined and can assume no alias. + LLVMContext &Context = V1->getContext(); + if (TD) + if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, Context, *TD)) || + (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, Context, *TD))) + return NoAlias; + + // If one pointer is the result of a call/invoke and the other is a + // non-escaping local object, then we know the object couldn't escape to a + // point where the call could return it. + if ((isa(O1) || isa(O1)) && + isNonEscapingLocalObject(O2) && O1 != O2) + return NoAlias; + if ((isa(O2) || isa(O2)) && + isNonEscapingLocalObject(O1) && O1 != O2) + return NoAlias; + + if (!isGEP(V1) && isGEP(V2)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + if (isGEP(V1)) + return aliasGEP(V1, V1Size, V2, V2Size); + + if (isa(V2) && !isa(V1)) { + std::swap(V1, V2); + std::swap(V1Size, V2Size); + } + if (const PHINode *PN = dyn_cast(V1)) + return aliasPHI(PN, V1Size, V2, V2Size); return MayAlias; } // This function is used to determine if the indices of two GEP instructions are // equal. V1 and V2 are the indices. -static bool IndexOperandsEqual(Value *V1, Value *V2) { +static bool IndexOperandsEqual(Value *V1, Value *V2, LLVMContext &Context) { if (V1->getType() == V2->getType()) return V1 == V2; if (Constant *C1 = dyn_cast(V1)) if (Constant *C2 = dyn_cast(V2)) { // Sign extend the constants to long types, if necessary - if (C1->getType() != Type::Int64Ty) - C1 = ConstantExpr::getSExt(C1, Type::Int64Ty); - if (C2->getType() != Type::Int64Ty) - C2 = ConstantExpr::getSExt(C2, Type::Int64Ty); + if (C1->getType() != Type::getInt64Ty(Context)) + C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context)); + if (C2->getType() != Type::getInt64Ty(Context)) + C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context)); return C1 == C2; } return false; @@ -528,6 +628,8 @@ BasicAliasAnalysis::CheckGEPInstructions( const PointerType *GEPPointerTy = cast(BasePtr1Ty); + LLVMContext &Context = GEPPointerTy->getContext(); + // Find the (possibly empty) initial sequence of equal values... which are not // necessarily constants. unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops; @@ -535,7 +637,8 @@ BasicAliasAnalysis::CheckGEPInstructions( unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands); unsigned UnequalOper = 0; while (UnequalOper != MinOperands && - IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) { + IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper], + Context)) { // Advance through the type as we go... ++UnequalOper; if (const CompositeType *CT = dyn_cast(BasePtr1Ty)) @@ -599,10 +702,10 @@ BasicAliasAnalysis::CheckGEPInstructions( if (Constant *G2OC = dyn_cast(const_cast(G2Oper))){ if (G1OC->getType() != G2OC->getType()) { // Sign extend both operands to long. - if (G1OC->getType() != Type::Int64Ty) - G1OC = ConstantExpr::getSExt(G1OC, Type::Int64Ty); - if (G2OC->getType() != Type::Int64Ty) - G2OC = ConstantExpr::getSExt(G2OC, Type::Int64Ty); + if (G1OC->getType() != Type::getInt64Ty(Context)) + G1OC = ConstantExpr::getSExt(G1OC, Type::getInt64Ty(Context)); + if (G2OC->getType() != Type::getInt64Ty(Context)) + G2OC = ConstantExpr::getSExt(G2OC, Type::getInt64Ty(Context)); GEP1Ops[FirstConstantOper] = G1OC; GEP2Ops[FirstConstantOper] = G2OC; } @@ -673,6 +776,10 @@ BasicAliasAnalysis::CheckGEPInstructions( // However, one GEP may have more operands than the other. If this is the // case, there may still be hope. Check this now. if (FirstConstantOper == MinOperands) { + // Without TargetData, we won't know what the offsets are. + if (!TD) + return MayAlias; + // Make GEP1Ops be the longer one if there is a longer one. if (NumGEP1Ops < NumGEP2Ops) { std::swap(GEP1Ops, GEP2Ops); @@ -692,13 +799,12 @@ BasicAliasAnalysis::CheckGEPInstructions( GEP1Ops[i] = Constant::getNullValue(GEP1Ops[i]->getType()); // Okay, now get the offset. This is the relative offset for the full // instruction. - const TargetData &TD = getTargetData(); - int64_t Offset1 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops, - NumGEP1Ops); + int64_t Offset1 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops, + NumGEP1Ops); // Now check without any constants at the end. - int64_t Offset2 = TD.getIndexedOffset(GEPPointerTy, GEP1Ops, - MinOperands); + int64_t Offset2 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops, + MinOperands); // Make sure we compare the absolute difference. if (Offset1 > Offset2) @@ -734,7 +840,8 @@ BasicAliasAnalysis::CheckGEPInstructions( const Type *ZeroIdxTy = GEPPointerTy; for (unsigned i = 0; i != FirstConstantOper; ++i) { if (!isa(ZeroIdxTy)) - GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Type::Int32Ty); + GEP1Ops[i] = GEP2Ops[i] = + Constant::getNullValue(Type::getInt32Ty(Context)); if (const CompositeType *CT = dyn_cast(ZeroIdxTy)) ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]); @@ -775,9 +882,13 @@ BasicAliasAnalysis::CheckGEPInstructions( // value possible. // if (const ArrayType *AT = dyn_cast(BasePtr1Ty)) - GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,AT->getNumElements()-1); + GEP1Ops[i] = + ConstantInt::get(Type::getInt64Ty(Context), + AT->getNumElements()-1); else if (const VectorType *VT = dyn_cast(BasePtr1Ty)) - GEP1Ops[i] = ConstantInt::get(Type::Int64Ty,VT->getNumElements()-1); + GEP1Ops[i] = + ConstantInt::get(Type::getInt64Ty(Context), + VT->getNumElements()-1); } } @@ -812,11 +923,11 @@ BasicAliasAnalysis::CheckGEPInstructions( } } - if (GEPPointerTy->getElementType()->isSized()) { + if (TD && GEPPointerTy->getElementType()->isSized()) { int64_t Offset1 = - getTargetData().getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops); + TD->getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops); int64_t Offset2 = - getTargetData().getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops); + TD->getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops); assert(Offset1 != Offset2 && "There is at least one different constant here!"); diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 8ada5a3f74cdd..6fed4005d1935 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -25,38 +25,36 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/GraphWriter.h" -#include "llvm/Config/config.h" -#include -#include -#include using namespace llvm; namespace llvm { template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { static std::string getGraphName(const Function *F) { - return "CFG for '" + F->getName() + "' function"; + return "CFG for '" + F->getNameStr() + "' function"; } static std::string getNodeLabel(const BasicBlock *Node, const Function *Graph, bool ShortNames) { if (ShortNames && !Node->getName().empty()) - return Node->getName() + ":"; + return Node->getNameStr() + ":"; + + std::string Str; + raw_string_ostream OS(Str); - std::ostringstream Out; if (ShortNames) { - WriteAsOperand(Out, Node, false); - return Out.str(); + WriteAsOperand(OS, Node, false); + return OS.str(); } if (Node->getName().empty()) { - WriteAsOperand(Out, Node, false); - Out << ":"; + WriteAsOperand(OS, Node, false); + OS << ":"; } - - Out << *Node; - std::string OutStr = Out.str(); + + OS << *Node; + std::string OutStr = OS.str(); if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); // Process string output to make it nicer... @@ -94,7 +92,7 @@ namespace { return false; } - void print(std::ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = 0) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -112,11 +110,11 @@ namespace { CFGOnlyViewer() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - F.viewCFG(); + F.viewCFGOnly(); return false; } - void print(std::ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = 0) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -136,19 +134,21 @@ namespace { explicit CFGPrinter(void *pid) : FunctionPass(pid) {} virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getName() + ".dot"; - cerr << "Writing '" << Filename << "'..."; - std::ofstream File(Filename.c_str()); + std::string Filename = "cfg." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; + + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); - if (File.good()) + if (ErrorInfo.empty()) WriteGraph(File, (const Function*)&F); else - cerr << " error opening file for writing!"; - cerr << "\n"; + errs() << " error opening file for writing!"; + errs() << "\n"; return false; } - void print(std::ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = 0) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -166,18 +166,20 @@ namespace { CFGOnlyPrinter() : FunctionPass(&ID) {} explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {} virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getName() + ".dot"; - cerr << "Writing '" << Filename << "'..."; - std::ofstream File(Filename.c_str()); + std::string Filename = "cfg." + F.getNameStr() + ".dot"; + errs() << "Writing '" << Filename << "'..."; - if (File.good()) + std::string ErrorInfo; + raw_fd_ostream File(Filename.c_str(), ErrorInfo); + + if (ErrorInfo.empty()) WriteGraph(File, (const Function*)&F, true); else - cerr << " error opening file for writing!"; - cerr << "\n"; + errs() << " error opening file for writing!"; + errs() << "\n"; return false; } - void print(std::ostream &OS, const Module* = 0) const {} + void print(raw_ostream &OS, const Module* = 0) const {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -196,7 +198,7 @@ P2("dot-cfg-only", /// being a 'dot' and 'gv' program in your path. /// void Function::viewCFG() const { - ViewGraph(this, "cfg" + getName()); + ViewGraph(this, "cfg" + getNameStr()); } /// viewCFGOnly - This function is meant for use from the debugger. It works @@ -205,7 +207,7 @@ void Function::viewCFG() const { /// his can make the graph smaller. /// void Function::viewCFGOnly() const { - ViewGraph(this, "cfg" + getName(), true); + ViewGraph(this, "cfg" + getNameStr(), true); } FunctionPass *llvm::createCFGPrinterPass () { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 6f2a06c7ac8fe..1d2f118bb446b 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -6,28 +6,33 @@ add_llvm_library(LLVMAnalysis AliasSetTracker.cpp Analysis.cpp BasicAliasAnalysis.cpp - CaptureTracking.cpp CFGPrinter.cpp + CaptureTracking.cpp ConstantFolding.cpp DbgInfoPrinter.cpp DebugInfo.cpp + IVUsers.cpp + InlineCost.cpp InstCount.cpp Interval.cpp IntervalPartition.cpp - IVUsers.cpp LibCallAliasAnalysis.cpp LibCallSemantics.cpp LiveValues.cpp LoopDependenceAnalysis.cpp LoopInfo.cpp LoopPass.cpp - LoopVR.cpp + MallocHelper.cpp MemoryDependenceAnalysis.cpp + PointerTracking.cpp PostDominators.cpp + ProfileEstimatorPass.cpp ProfileInfo.cpp ProfileInfoLoader.cpp ProfileInfoLoaderPass.cpp + ProfileVerifierPass.cpp ScalarEvolution.cpp + ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionExpander.cpp SparsePropagation.cpp Trace.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index a19b8e4f94dbd..b30ac719ae0e5 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -54,7 +54,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) { // its return value and doesn't unwind (a readonly function can leak bits // by throwing an exception or not depending on the input value). if (CS.onlyReadsMemory() && CS.doesNotThrow() && - I->getType() == Type::VoidTy) + I->getType() == Type::getVoidTy(V->getContext())) break; // Not captured if only passed via 'nocapture' arguments. Note that diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 5aa4d56c4e674..0ce1c24bed67b 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1,4 +1,4 @@ -//===-- ConstantFolding.cpp - Analyze constant folding possibilities ------===// +//===-- ConstantFolding.cpp - Fold instructions into constants ------------===// // // The LLVM Compiler Infrastructure // @@ -7,8 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This family of functions determines the possibility of performing constant -// folding. +// This file defines routines for folding instructions into constants. +// +// Also, to supplement the basic VMCore ConstantExpr simplifications, +// this file defines some additional folding routines that can make use of +// TargetData information. These functions cannot go in VMCore due to library +// dependency issues. // //===----------------------------------------------------------------------===// @@ -19,9 +23,11 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include @@ -92,7 +98,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, /// these together. If target data info is available, it is provided as TD, /// otherwise TD is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const TargetData *TD){ + Constant *Op1, const TargetData *TD, + LLVMContext &Context){ // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. @@ -121,40 +128,103 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps, const Type *ResultTy, + LLVMContext &Context, const TargetData *TD) { Constant *Ptr = Ops[0]; if (!TD || !cast(Ptr->getType())->getElementType()->isSized()) return 0; - - uint64_t BasePtr = 0; + + unsigned BitWidth = TD->getTypeSizeInBits(TD->getIntPtrType(Context)); + APInt BasePtr(BitWidth, 0); + bool BaseIsInt = true; if (!Ptr->isNullValue()) { // If this is a inttoptr from a constant int, we can fold this as the base, // otherwise we can't. if (ConstantExpr *CE = dyn_cast(Ptr)) if (CE->getOpcode() == Instruction::IntToPtr) - if (ConstantInt *Base = dyn_cast(CE->getOperand(0))) - BasePtr = Base->getZExtValue(); + if (ConstantInt *Base = dyn_cast(CE->getOperand(0))) { + BasePtr = Base->getValue(); + BasePtr.zextOrTrunc(BitWidth); + } if (BasePtr == 0) - return 0; + BaseIsInt = false; } // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' for (unsigned i = 1; i != NumOps; ++i) if (!isa(Ops[i])) - return false; + return 0; - uint64_t Offset = TD->getIndexedOffset(Ptr->getType(), - (Value**)Ops+1, NumOps-1); - Constant *C = ConstantInt::get(TD->getIntPtrType(), Offset+BasePtr); - return ConstantExpr::getIntToPtr(C, ResultTy); + APInt Offset = APInt(BitWidth, + TD->getIndexedOffset(Ptr->getType(), + (Value**)Ops+1, NumOps-1)); + // If the base value for this address is a literal integer value, fold the + // getelementptr to the resulting integer value casted to the pointer type. + if (BaseIsInt) { + Constant *C = ConstantInt::get(Context, Offset+BasePtr); + return ConstantExpr::getIntToPtr(C, ResultTy); + } + + // Otherwise form a regular getelementptr. Recompute the indices so that + // we eliminate over-indexing of the notional static type array bounds. + // This makes it easy to determine if the getelementptr is "inbounds". + // Also, this helps GlobalOpt do SROA on GlobalVariables. + const Type *Ty = Ptr->getType(); + SmallVector NewIdxs; + do { + if (const SequentialType *ATy = dyn_cast(Ty)) { + // The only pointer indexing we'll do is on the first index of the GEP. + if (isa(ATy) && !NewIdxs.empty()) + break; + // Determine which element of the array the offset points into. + APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); + if (ElemSize == 0) + return 0; + APInt NewIdx = Offset.udiv(ElemSize); + Offset -= NewIdx * ElemSize; + NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Context), NewIdx)); + Ty = ATy->getElementType(); + } else if (const StructType *STy = dyn_cast(Ty)) { + // Determine which field of the struct the offset points into. The + // getZExtValue is at least as safe as the StructLayout API because we + // know the offset is within the struct at this point. + const StructLayout &SL = *TD->getStructLayout(STy); + unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue()); + NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Context), ElIdx)); + Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx)); + Ty = STy->getTypeAtIndex(ElIdx); + } else { + // We've reached some non-indexable type. + break; + } + } while (Ty != cast(ResultTy)->getElementType()); + + // If we haven't used up the entire offset by descending the static + // type, then the offset is pointing into the middle of an indivisible + // member, so we can't simplify it. + if (Offset != 0) + return 0; + + // Create a GEP. + Constant *C = + ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size()); + assert(cast(C->getType())->getElementType() == Ty && + "Computed GetElementPtr has unexpected type!"); + + // If we ended up indexing a member with a type that doesn't match + // the type of what the original indices indexed, add a cast. + if (Ty != cast(ResultTy)->getElementType()) + C = ConstantExpr::getBitCast(C, ResultTy); + + return C; } /// FoldBitCast - Constant fold bitcast, symbolically evaluating it with /// targetdata. Return 0 if unfoldable. static Constant *FoldBitCast(Constant *C, const Type *DestTy, - const TargetData &TD) { + const TargetData &TD, LLVMContext &Context) { // If this is a bitcast from constant vector -> vector, fold it. if (ConstantVector *CV = dyn_cast(C)) { if (const VectorType *DestVTy = dyn_cast(DestTy)) { @@ -180,10 +250,10 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, if (DstEltTy->isFloatingPoint()) { // Fold to an vector of integers with same size as our FP type. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); - const Type *DestIVTy = VectorType::get(IntegerType::get(FPWidth), - NumDstElt); + const Type *DestIVTy = VectorType::get( + IntegerType::get(Context, FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. - C = FoldBitCast(C, DestIVTy, TD); + C = FoldBitCast(C, DestIVTy, TD, Context); if (!C) return 0; // Finally, VMCore can handle this now that #elts line up. @@ -194,8 +264,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // it to integer first. if (SrcEltTy->isFloatingPoint()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); - const Type *SrcIVTy = VectorType::get(IntegerType::get(FPWidth), - NumSrcElt); + const Type *SrcIVTy = VectorType::get( + IntegerType::get(Context, FPWidth), NumSrcElt); // Ask VMCore to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); CV = dyn_cast(C); @@ -228,7 +298,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // Shift it to the right place, depending on endianness. Src = ConstantExpr::getShl(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); + ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; // Mix it in. @@ -251,7 +321,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, // Shift the piece of the value into the right place, depending on // endianness. Constant *Elt = ConstantExpr::getLShr(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); + ConstantInt::get(Src->getType(), ShiftAmt)); ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; // Truncate and remember this piece. @@ -278,7 +348,8 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy, /// is returned. Note that this function can only fail when attempting to fold /// instructions like loads and stores, which have no constant expression form. /// -Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { +Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context, + const TargetData *TD) { if (PHINode *PN = dyn_cast(I)) { if (PN->getNumIncomingValues() == 0) return UndefValue::get(PN->getType()); @@ -306,16 +377,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { if (const CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), - Ops.data(), Ops.size(), TD); - else - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Ops.data(), Ops.size(), TD); + Ops.data(), Ops.size(), + Context, TD); + + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), + Ops.data(), Ops.size(), Context, TD); } /// ConstantFoldConstantExpression - Attempt to fold the constant expression /// using the specified TargetData. If successful, the constant result is /// result is returned, if not, null is returned. Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, + LLVMContext &Context, const TargetData *TD) { SmallVector Ops; for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) @@ -323,10 +396,10 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), - Ops.data(), Ops.size(), TD); - else - return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), - Ops.data(), Ops.size(), TD); + Ops.data(), Ops.size(), + Context, TD); + return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), + Ops.data(), Ops.size(), Context, TD); } /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the @@ -337,11 +410,13 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, /// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, Constant* const* Ops, unsigned NumOps, + LLVMContext &Context, const TargetData *TD) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { if (isa(Ops[0]) || isa(Ops[1])) - if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) + if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD, + Context)) return C; return ConstantExpr::get(Opcode, Ops[0], Ops[1]); @@ -356,9 +431,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, return 0; case Instruction::ICmp: case Instruction::FCmp: - case Instruction::VICmp: - case Instruction::VFCmp: - assert(0 &&"This function is invalid for compares: no predicate specified"); + llvm_unreachable("This function is invalid for compares: no predicate specified"); case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. @@ -368,7 +441,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, unsigned InWidth = Input->getType()->getScalarSizeInBits(); if (TD->getPointerSizeInBits() < InWidth) { Constant *Mask = - ConstantInt::get(APInt::getLowBitsSet(InWidth, + ConstantInt::get(Context, APInt::getLowBitsSet(InWidth, TD->getPointerSizeInBits())); Input = ConstantExpr::getAnd(Input, Mask); } @@ -387,7 +460,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, CE->getType()->getScalarSizeInBits()) { if (CE->getOpcode() == Instruction::PtrToInt) { Constant *Input = CE->getOperand(0); - Constant *C = FoldBitCast(Input, DestTy, *TD); + Constant *C = FoldBitCast(Input, DestTy, *TD, Context); return C ? C : ConstantExpr::getBitCast(Input, DestTy); } // If there's a constant offset added to the integer value before @@ -412,9 +485,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, AT->getNumElements()))) { Constant *Index[] = { Constant::getNullValue(CE->getType()), - ConstantInt::get(ElemIdx) + ConstantInt::get(Context, ElemIdx) }; - return ConstantExpr::getGetElementPtr(GV, &Index[0], 2); + return + ConstantExpr::getGetElementPtr(GV, &Index[0], 2); } } } @@ -434,7 +508,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::BitCast: if (TD) - if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD)) + if (Constant *C = FoldBitCast(Ops[0], DestTy, *TD, Context)) return C; return ConstantExpr::getBitCast(Ops[0], DestTy); case Instruction::Select: @@ -446,7 +520,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: - if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD)) + if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, Context, TD)) return C; return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1); @@ -460,6 +534,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant*const * Ops, unsigned NumOps, + LLVMContext &Context, const TargetData *TD) { // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp (ptrtoint x), 0 -> icmp x, null @@ -470,14 +545,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast(Ops[0])) { if (TD && Ops[1]->isNullValue()) { - const Type *IntPtrTy = TD->getIntPtrType(); + const Type *IntPtrTy = TD->getIntPtrType(Context); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), IntPtrTy, false); Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, + Context, TD); } // Only do this transformation if the int is intptrty in size, otherwise @@ -487,13 +563,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *C = CE0->getOperand(0); Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) }; // FIXME! - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, + Context, TD); } } if (ConstantExpr *CE1 = dyn_cast(Ops[1])) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { - const Type *IntPtrTy = TD->getIntPtrType(); + const Type *IntPtrTy = TD->getIntPtrType(Context); if (CE0->getOpcode() == Instruction::IntToPtr) { // Convert the integer value to the right size to ensure we get the @@ -503,7 +580,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0), IntPtrTy, false); Constant *NewOps[] = { C0, C1 }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, + Context, TD); } // Only do this transformation if the int is intptrty in size, otherwise @@ -514,7 +592,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *NewOps[] = { CE0->getOperand(0), CE1->getOperand(0) }; - return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, TD); + return ConstantFoldCompareInstOperands(Predicate, NewOps, 2, + Context, TD); } } } @@ -597,74 +676,47 @@ llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: + case Intrinsic::uadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: return true; - default: break; + default: + return false; + case 0: break; } if (!F->hasName()) return false; - const char *Str = F->getNameStart(); - unsigned Len = F->getNameLen(); + StringRef Name = F->getName(); // In these cases, the check of the length is required. We don't want to // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. - switch (Str[0]) { + switch (Name[0]) { default: return false; case 'a': - if (Len == 4) - return !strcmp(Str, "acos") || !strcmp(Str, "asin") || - !strcmp(Str, "atan"); - else if (Len == 5) - return !strcmp(Str, "atan2"); - return false; + return Name == "acos" || Name == "asin" || + Name == "atan" || Name == "atan2"; case 'c': - if (Len == 3) - return !strcmp(Str, "cos"); - else if (Len == 4) - return !strcmp(Str, "ceil") || !strcmp(Str, "cosf") || - !strcmp(Str, "cosh"); - return false; + return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': - if (Len == 3) - return !strcmp(Str, "exp"); - return false; + return Name == "exp"; case 'f': - if (Len == 4) - return !strcmp(Str, "fabs") || !strcmp(Str, "fmod"); - else if (Len == 5) - return !strcmp(Str, "floor"); - return false; - break; + return Name == "fabs" || Name == "fmod" || Name == "floor"; case 'l': - if (Len == 3 && !strcmp(Str, "log")) - return true; - if (Len == 5 && !strcmp(Str, "log10")) - return true; - return false; + return Name == "log" || Name == "log10"; case 'p': - if (Len == 3 && !strcmp(Str, "pow")) - return true; - return false; + return Name == "pow"; case 's': - if (Len == 3) - return !strcmp(Str, "sin"); - if (Len == 4) - return !strcmp(Str, "sinh") || !strcmp(Str, "sqrt") || - !strcmp(Str, "sinf"); - if (Len == 5) - return !strcmp(Str, "sqrtf"); - return false; + return Name == "sin" || Name == "sinh" || Name == "sqrt" || + Name == "sinf" || Name == "sqrtf"; case 't': - if (Len == 3 && !strcmp(Str, "tan")) - return true; - else if (Len == 4 && !strcmp(Str, "tanh")) - return true; - return false; + return Name == "tan" || Name == "tanh"; } } static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, - const Type *Ty) { + const Type *Ty, LLVMContext &Context) { errno = 0; V = NativeFP(V); if (errno != 0) { @@ -672,17 +724,18 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, return 0; } - if (Ty == Type::FloatTy) - return ConstantFP::get(APFloat((float)V)); - if (Ty == Type::DoubleTy) - return ConstantFP::get(APFloat(V)); - assert(0 && "Can only constant fold float/double"); + if (Ty->isFloatTy()) + return ConstantFP::get(Context, APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Context, APFloat(V)); + llvm_unreachable("Can only constant fold float/double"); return 0; // dummy return to suppress warning } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V, double W, - const Type *Ty) { + const Type *Ty, + LLVMContext &Context) { errno = 0; V = NativeFP(V, W); if (errno != 0) { @@ -690,137 +743,195 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return 0; } - if (Ty == Type::FloatTy) - return ConstantFP::get(APFloat((float)V)); - if (Ty == Type::DoubleTy) - return ConstantFP::get(APFloat(V)); - assert(0 && "Can only constant fold float/double"); + if (Ty->isFloatTy()) + return ConstantFP::get(Context, APFloat((float)V)); + if (Ty->isDoubleTy()) + return ConstantFP::get(Context, APFloat(V)); + llvm_unreachable("Can only constant fold float/double"); return 0; // dummy return to suppress warning } /// ConstantFoldCall - Attempt to constant fold a call to the specified function /// with the specified arguments, returning null if unsuccessful. - Constant * llvm::ConstantFoldCall(Function *F, - Constant* const* Operands, unsigned NumOperands) { + Constant *const *Operands, unsigned NumOperands) { if (!F->hasName()) return 0; - const char *Str = F->getNameStart(); - unsigned Len = F->getNameLen(); - + LLVMContext &Context = F->getContext(); + StringRef Name = F->getName(); + const Type *Ty = F->getReturnType(); if (NumOperands == 1) { if (ConstantFP *Op = dyn_cast(Operands[0])) { - if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy) + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty==Type::FloatTy ? (double)Op->getValueAPF().convertToFloat(): + double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : Op->getValueAPF().convertToDouble(); - switch (Str[0]) { + switch (Name[0]) { case 'a': - if (Len == 4 && !strcmp(Str, "acos")) - return ConstantFoldFP(acos, V, Ty); - else if (Len == 4 && !strcmp(Str, "asin")) - return ConstantFoldFP(asin, V, Ty); - else if (Len == 4 && !strcmp(Str, "atan")) - return ConstantFoldFP(atan, V, Ty); + if (Name == "acos") + return ConstantFoldFP(acos, V, Ty, Context); + else if (Name == "asin") + return ConstantFoldFP(asin, V, Ty, Context); + else if (Name == "atan") + return ConstantFoldFP(atan, V, Ty, Context); break; case 'c': - if (Len == 4 && !strcmp(Str, "ceil")) - return ConstantFoldFP(ceil, V, Ty); - else if (Len == 3 && !strcmp(Str, "cos")) - return ConstantFoldFP(cos, V, Ty); - else if (Len == 4 && !strcmp(Str, "cosh")) - return ConstantFoldFP(cosh, V, Ty); - else if (Len == 4 && !strcmp(Str, "cosf")) - return ConstantFoldFP(cos, V, Ty); + if (Name == "ceil") + return ConstantFoldFP(ceil, V, Ty, Context); + else if (Name == "cos") + return ConstantFoldFP(cos, V, Ty, Context); + else if (Name == "cosh") + return ConstantFoldFP(cosh, V, Ty, Context); + else if (Name == "cosf") + return ConstantFoldFP(cos, V, Ty, Context); break; case 'e': - if (Len == 3 && !strcmp(Str, "exp")) - return ConstantFoldFP(exp, V, Ty); + if (Name == "exp") + return ConstantFoldFP(exp, V, Ty, Context); break; case 'f': - if (Len == 4 && !strcmp(Str, "fabs")) - return ConstantFoldFP(fabs, V, Ty); - else if (Len == 5 && !strcmp(Str, "floor")) - return ConstantFoldFP(floor, V, Ty); + if (Name == "fabs") + return ConstantFoldFP(fabs, V, Ty, Context); + else if (Name == "floor") + return ConstantFoldFP(floor, V, Ty, Context); break; case 'l': - if (Len == 3 && !strcmp(Str, "log") && V > 0) - return ConstantFoldFP(log, V, Ty); - else if (Len == 5 && !strcmp(Str, "log10") && V > 0) - return ConstantFoldFP(log10, V, Ty); - else if (!strcmp(Str, "llvm.sqrt.f32") || - !strcmp(Str, "llvm.sqrt.f64")) { + if (Name == "log" && V > 0) + return ConstantFoldFP(log, V, Ty, Context); + else if (Name == "log10" && V > 0) + return ConstantFoldFP(log10, V, Ty, Context); + else if (Name == "llvm.sqrt.f32" || + Name == "llvm.sqrt.f64") { if (V >= -0.0) - return ConstantFoldFP(sqrt, V, Ty); + return ConstantFoldFP(sqrt, V, Ty, Context); else // Undefined return Constant::getNullValue(Ty); } break; case 's': - if (Len == 3 && !strcmp(Str, "sin")) - return ConstantFoldFP(sin, V, Ty); - else if (Len == 4 && !strcmp(Str, "sinh")) - return ConstantFoldFP(sinh, V, Ty); - else if (Len == 4 && !strcmp(Str, "sqrt") && V >= 0) - return ConstantFoldFP(sqrt, V, Ty); - else if (Len == 5 && !strcmp(Str, "sqrtf") && V >= 0) - return ConstantFoldFP(sqrt, V, Ty); - else if (Len == 4 && !strcmp(Str, "sinf")) - return ConstantFoldFP(sin, V, Ty); + if (Name == "sin") + return ConstantFoldFP(sin, V, Ty, Context); + else if (Name == "sinh") + return ConstantFoldFP(sinh, V, Ty, Context); + else if (Name == "sqrt" && V >= 0) + return ConstantFoldFP(sqrt, V, Ty, Context); + else if (Name == "sqrtf" && V >= 0) + return ConstantFoldFP(sqrt, V, Ty, Context); + else if (Name == "sinf") + return ConstantFoldFP(sin, V, Ty, Context); break; case 't': - if (Len == 3 && !strcmp(Str, "tan")) - return ConstantFoldFP(tan, V, Ty); - else if (Len == 4 && !strcmp(Str, "tanh")) - return ConstantFoldFP(tanh, V, Ty); + if (Name == "tan") + return ConstantFoldFP(tan, V, Ty, Context); + else if (Name == "tanh") + return ConstantFoldFP(tanh, V, Ty, Context); break; default: break; } - } else if (ConstantInt *Op = dyn_cast(Operands[0])) { - if (Len > 11 && !memcmp(Str, "llvm.bswap", 10)) - return ConstantInt::get(Op->getValue().byteSwap()); - else if (Len > 11 && !memcmp(Str, "llvm.ctpop", 10)) + return 0; + } + + + if (ConstantInt *Op = dyn_cast(Operands[0])) { + if (Name.startswith("llvm.bswap")) + return ConstantInt::get(Context, Op->getValue().byteSwap()); + else if (Name.startswith("llvm.ctpop")) return ConstantInt::get(Ty, Op->getValue().countPopulation()); - else if (Len > 10 && !memcmp(Str, "llvm.cttz", 9)) + else if (Name.startswith("llvm.cttz")) return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); - else if (Len > 10 && !memcmp(Str, "llvm.ctlz", 9)) + else if (Name.startswith("llvm.ctlz")) return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); + return 0; } - } else if (NumOperands == 2) { + + return 0; + } + + if (NumOperands == 2) { if (ConstantFP *Op1 = dyn_cast(Operands[0])) { - if (Ty!=Type::FloatTy && Ty!=Type::DoubleTy) + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty==Type::FloatTy ? - (double)Op1->getValueAPF().convertToFloat(): + double Op1V = Ty->isFloatTy() ? + (double)Op1->getValueAPF().convertToFloat() : Op1->getValueAPF().convertToDouble(); if (ConstantFP *Op2 = dyn_cast(Operands[1])) { - double Op2V = Ty==Type::FloatTy ? + if (Op2->getType() != Op1->getType()) + return 0; + + double Op2V = Ty->isFloatTy() ? (double)Op2->getValueAPF().convertToFloat(): Op2->getValueAPF().convertToDouble(); - if (Len == 3 && !strcmp(Str, "pow")) { - return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); - } else if (Len == 4 && !strcmp(Str, "fmod")) { - return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); - } else if (Len == 5 && !strcmp(Str, "atan2")) { - return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); - } + if (Name == "pow") + return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty, Context); + if (Name == "fmod") + return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty, Context); + if (Name == "atan2") + return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty, Context); } else if (ConstantInt *Op2C = dyn_cast(Operands[1])) { - if (!strcmp(Str, "llvm.powi.f32")) { - return ConstantFP::get(APFloat((float)std::pow((float)Op1V, + if (Name == "llvm.powi.f32") + return ConstantFP::get(Context, APFloat((float)std::pow((float)Op1V, (int)Op2C->getZExtValue()))); - } else if (!strcmp(Str, "llvm.powi.f64")) { - return ConstantFP::get(APFloat((double)std::pow((double)Op1V, + if (Name == "llvm.powi.f64") + return ConstantFP::get(Context, APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); + } + return 0; + } + + + if (ConstantInt *Op1 = dyn_cast(Operands[0])) { + if (ConstantInt *Op2 = dyn_cast(Operands[1])) { + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::uadd_with_overflow: { + Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. + Constant *Ops[] = { + Res, ConstantExpr::getICmp(CmpInst::ICMP_ULT, Res, Op1) // overflow. + }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::usub_with_overflow: { + Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + Constant *Ops[] = { + Res, ConstantExpr::getICmp(CmpInst::ICMP_UGT, Res, Op1) // overflow. + }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::sadd_with_overflow: { + Constant *Res = ConstantExpr::getAdd(Op1, Op2); // result. + Constant *Overflow = ConstantExpr::getSelect( + ConstantExpr::getICmp(CmpInst::ICMP_SGT, + ConstantInt::get(Op1->getType(), 0), Op1), + ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op2), + ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op2)); // overflow. + + Constant *Ops[] = { Res, Overflow }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } + case Intrinsic::ssub_with_overflow: { + Constant *Res = ConstantExpr::getSub(Op1, Op2); // result. + Constant *Overflow = ConstantExpr::getSelect( + ConstantExpr::getICmp(CmpInst::ICMP_SGT, + ConstantInt::get(Op2->getType(), 0), Op2), + ConstantExpr::getICmp(CmpInst::ICMP_SLT, Res, Op1), + ConstantExpr::getICmp(CmpInst::ICMP_SGT, Res, Op1)); // overflow. + + Constant *Ops[] = { Res, Overflow }; + return ConstantStruct::get(F->getContext(), Ops, 2, false); + } } } + + return 0; } + return 0; } return 0; } diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp index 6c549e6345e64..2bbe2e0ecb4f7 100644 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ b/lib/Analysis/DbgInfoPrinter.cpp @@ -90,10 +90,9 @@ void PrintDbgInfo::printStopPoint(const DbgStopPointInst *DSI) { } void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) { - DISubprogram Subprogram(cast(FS->getSubprogram())); - std::string Res1, Res2; - Out << "; fully qualified function name: " << Subprogram.getDisplayName(Res1) - << " return type: " << Subprogram.getReturnTypeName(Res2) + DISubprogram Subprogram(FS->getSubprogram()); + Out << "; fully qualified function name: " << Subprogram.getDisplayName() + << " return type: " << Subprogram.getReturnTypeName() << " at line " << Subprogram.getLineNumber() << "\n\n"; } @@ -152,7 +151,7 @@ bool PrintDbgInfo::runOnFunction(Function &F) { Printed = true; } - Out << *i; + Out << *i << '\n'; printVariableDeclaration(i); if (const User *U = dyn_cast(i)) { diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 9eecc339b4835..7bb7e9b4af2d5 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -18,12 +18,13 @@ #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/DebugLoc.h" -#include "llvm/Support/Streams.h" - +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::dwarf; @@ -32,18 +33,12 @@ using namespace llvm::dwarf; //===----------------------------------------------------------------------===// /// ValidDebugInfo - Return true if V represents valid debug info value. -bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) { - if (!V) - return false; - - GlobalVariable *GV = dyn_cast(V->stripPointerCasts()); - if (!GV) - return false; - - if (!GV->hasInternalLinkage () && !GV->hasLinkOnceLinkage()) +/// FIXME : Add DIDescriptor.isValid() +bool DIDescriptor::ValidDebugInfo(MDNode *N, CodeGenOpt::Level OptLevel) { + if (!N) return false; - DIDescriptor DI(GV); + DIDescriptor DI(N); // Check current version. Allow Version6 for now. unsigned Version = DI.getVersion(); @@ -53,13 +48,13 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) { unsigned Tag = DI.getTag(); switch (Tag) { case DW_TAG_variable: - assert(DIVariable(GV).Verify() && "Invalid DebugInfo value"); + assert(DIVariable(N).Verify() && "Invalid DebugInfo value"); break; case DW_TAG_compile_unit: - assert(DICompileUnit(GV).Verify() && "Invalid DebugInfo value"); + assert(DICompileUnit(N).Verify() && "Invalid DebugInfo value"); break; case DW_TAG_subprogram: - assert(DISubprogram(GV).Verify() && "Invalid DebugInfo value"); + assert(DISubprogram(N).Verify() && "Invalid DebugInfo value"); break; case DW_TAG_lexical_block: // FIXME: This interfers with the quality of generated code during @@ -74,84 +69,75 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) { return true; } -DIDescriptor::DIDescriptor(GlobalVariable *GV, unsigned RequiredTag) { - DbgGV = GV; - - // If this is non-null, check to see if the Tag matches. If not, set to null. - if (GV && getTag() != RequiredTag) - DbgGV = 0; -} +DIDescriptor::DIDescriptor(MDNode *N, unsigned RequiredTag) { + DbgNode = N; -const std::string & -DIDescriptor::getStringField(unsigned Elt, std::string &Result) const { - if (DbgGV == 0) { - Result.clear(); - return Result; + // If this is non-null, check to see if the Tag matches. If not, set to null. + if (N && getTag() != RequiredTag) { + DbgNode = 0; } +} - Constant *C = DbgGV->getInitializer(); - if (C == 0 || Elt >= C->getNumOperands()) { - Result.clear(); - return Result; - } +const char * +DIDescriptor::getStringField(unsigned Elt) const { + if (DbgNode == 0) + return NULL; - // Fills in the string if it succeeds - if (!GetConstantStringInfo(C->getOperand(Elt), Result)) - Result.clear(); + if (Elt < DbgNode->getNumElements()) + if (MDString *MDS = dyn_cast_or_null(DbgNode->getElement(Elt))) + return MDS->getString().data(); - return Result; + return NULL; } uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { - if (DbgGV == 0) return 0; - - Constant *C = DbgGV->getInitializer(); - if (C == 0 || Elt >= C->getNumOperands()) + if (DbgNode == 0) return 0; - if (ConstantInt *CI = dyn_cast(C->getOperand(Elt))) - return CI->getZExtValue(); + if (Elt < DbgNode->getNumElements()) + if (ConstantInt *CI = dyn_cast(DbgNode->getElement(Elt))) + return CI->getZExtValue(); + return 0; } DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { - if (DbgGV == 0) return DIDescriptor(); - - Constant *C = DbgGV->getInitializer(); - if (C == 0 || Elt >= C->getNumOperands()) + if (DbgNode == 0) return DIDescriptor(); - C = C->getOperand(Elt); - return DIDescriptor(dyn_cast(C->stripPointerCasts())); + if (Elt < DbgNode->getNumElements() && DbgNode->getElement(Elt)) + return DIDescriptor(dyn_cast(DbgNode->getElement(Elt))); + + return DIDescriptor(); } GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { - if (DbgGV == 0) return 0; - - Constant *C = DbgGV->getInitializer(); - if (C == 0 || Elt >= C->getNumOperands()) + if (DbgNode == 0) return 0; - C = C->getOperand(Elt); - return dyn_cast(C->stripPointerCasts()); + if (Elt < DbgNode->getNumElements()) + return dyn_cast_or_null(DbgNode->getElement(Elt)); + return 0; } //===----------------------------------------------------------------------===// -// Simple Descriptor Constructors and other Methods +// Predicates //===----------------------------------------------------------------------===// -// Needed by DIVariable::getType(). -DIType::DIType(GlobalVariable *GV) : DIDescriptor(GV) { - if (!GV) return; - unsigned tag = getTag(); - if (tag != dwarf::DW_TAG_base_type && !DIDerivedType::isDerivedType(tag) && - !DICompositeType::isCompositeType(tag)) - DbgGV = 0; +/// isBasicType - Return true if the specified tag is legal for +/// DIBasicType. +bool DIDescriptor::isBasicType() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_base_type; } -/// isDerivedType - Return true if the specified tag is legal for -/// DIDerivedType. -bool DIType::isDerivedType(unsigned Tag) { +/// isDerivedType - Return true if the specified tag is legal for DIDerivedType. +bool DIDescriptor::isDerivedType() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + switch (Tag) { case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_pointer_type: @@ -163,16 +149,18 @@ bool DIType::isDerivedType(unsigned Tag) { case dwarf::DW_TAG_inheritance: return true; default: - // FIXME: Even though it doesn't make sense, CompositeTypes are current - // modelled as DerivedTypes, this should return true for them as well. - return false; + // CompositeTypes are currently modelled as DerivedTypes. + return isCompositeType(); } } /// isCompositeType - Return true if the specified tag is legal for /// DICompositeType. -bool DIType::isCompositeType(unsigned TAG) { - switch (TAG) { +bool DIDescriptor::isCompositeType() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + switch (Tag) { case dwarf::DW_TAG_array_type: case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: @@ -187,7 +175,10 @@ bool DIType::isCompositeType(unsigned TAG) { } /// isVariable - Return true if the specified tag is legal for DIVariable. -bool DIVariable::isVariable(unsigned Tag) { +bool DIDescriptor::isVariable() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + switch (Tag) { case dwarf::DW_TAG_auto_variable: case dwarf::DW_TAG_arg_variable: @@ -198,19 +189,126 @@ bool DIVariable::isVariable(unsigned Tag) { } } +/// isType - Return true if the specified tag is legal for DIType. +bool DIDescriptor::isType() const { + return isBasicType() || isCompositeType() || isDerivedType(); +} + +/// isSubprogram - Return true if the specified tag is legal for +/// DISubprogram. +bool DIDescriptor::isSubprogram() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_subprogram; +} + +/// isGlobalVariable - Return true if the specified tag is legal for +/// DIGlobalVariable. +bool DIDescriptor::isGlobalVariable() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_variable; +} + +/// isGlobal - Return true if the specified tag is legal for DIGlobal. +bool DIDescriptor::isGlobal() const { + return isGlobalVariable(); +} + +/// isScope - Return true if the specified tag is one of the scope +/// related tag. +bool DIDescriptor::isScope() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + switch (Tag) { + case dwarf::DW_TAG_compile_unit: + case dwarf::DW_TAG_lexical_block: + case dwarf::DW_TAG_subprogram: + return true; + default: + break; + } + return false; +} + +/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit. +bool DIDescriptor::isCompileUnit() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_compile_unit; +} + +/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. +bool DIDescriptor::isLexicalBlock() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_lexical_block; +} + +/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. +bool DIDescriptor::isSubrange() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_subrange_type; +} + +/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator. +bool DIDescriptor::isEnumerator() const { + assert (!isNull() && "Invalid descriptor!"); + unsigned Tag = getTag(); + + return Tag == dwarf::DW_TAG_enumerator; +} + +//===----------------------------------------------------------------------===// +// Simple Descriptor Constructors and other Methods +//===----------------------------------------------------------------------===// + +DIType::DIType(MDNode *N) : DIDescriptor(N) { + if (!N) return; + if (!isBasicType() && !isDerivedType() && !isCompositeType()) { + DbgNode = 0; + } +} + unsigned DIArray::getNumElements() const { - assert (DbgGV && "Invalid DIArray"); - Constant *C = DbgGV->getInitializer(); - assert (C && "Invalid DIArray initializer"); - return C->getNumOperands(); + assert (DbgNode && "Invalid DIArray"); + return DbgNode->getNumElements(); +} + +/// replaceAllUsesWith - Replace all uses of debug info referenced by +/// this descriptor. After this completes, the current debug info value +/// is erased. +void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) { + if (isNull()) + return; + + assert (!D.isNull() && "Can not replace with null"); + + // Since we use a TrackingVH for the node, its easy for clients to manufacture + // legitimate situations where they want to replaceAllUsesWith() on something + // which, due to uniquing, has merged with the source. We shield clients from + // this detail by allowing a value to be replaced with replaceAllUsesWith() + // itself. + if (getNode() != D.getNode()) { + MDNode *Node = DbgNode; + Node->replaceAllUsesWith(D.getNode()); + delete Node; + } } /// Verify - Verify that a compile unit is well formed. bool DICompileUnit::Verify() const { - if (isNull()) + if (isNull()) return false; - std::string Res; - if (getFilename(Res).empty()) + const char *N = getFilename(); + if (!N) return false; // It is possible that directory and produce string is empty. return true; @@ -218,26 +316,26 @@ bool DICompileUnit::Verify() const { /// Verify - Verify that a type descriptor is well formed. bool DIType::Verify() const { - if (isNull()) + if (isNull()) return false; - if (getContext().isNull()) + if (getContext().isNull()) return false; DICompileUnit CU = getCompileUnit(); - if (!CU.isNull() && !CU.Verify()) + if (!CU.isNull() && !CU.Verify()) return false; return true; } /// Verify - Verify that a composite type descriptor is well formed. bool DICompositeType::Verify() const { - if (isNull()) + if (isNull()) return false; - if (getContext().isNull()) + if (getContext().isNull()) return false; DICompileUnit CU = getCompileUnit(); - if (!CU.isNull() && !CU.Verify()) + if (!CU.isNull() && !CU.Verify()) return false; return true; } @@ -246,12 +344,12 @@ bool DICompositeType::Verify() const { bool DISubprogram::Verify() const { if (isNull()) return false; - + if (getContext().isNull()) return false; DICompileUnit CU = getCompileUnit(); - if (!CU.Verify()) + if (!CU.Verify()) return false; DICompositeType Ty = getType(); @@ -264,12 +362,12 @@ bool DISubprogram::Verify() const { bool DIGlobalVariable::Verify() const { if (isNull()) return false; - + if (getContext().isNull()) return false; DICompileUnit CU = getCompileUnit(); - if (!CU.isNull() && !CU.Verify()) + if (!CU.isNull() && !CU.Verify()) return false; DIType Ty = getType(); @@ -286,7 +384,7 @@ bool DIGlobalVariable::Verify() const { bool DIVariable::Verify() const { if (isNull()) return false; - + if (getContext().isNull()) return false; @@ -312,15 +410,38 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { /// information for the function F. bool DISubprogram::describes(const Function *F) { assert (F && "Invalid function"); - std::string Name; - getLinkageName(Name); - if (Name.empty()) - getName(Name); - if (!Name.empty() && (strcmp(Name.c_str(), F->getNameStart()) == false)) + const char *Name = getLinkageName(); + if (!Name) + Name = getName(); + if (strcmp(F->getName().data(), Name) == 0) return true; return false; } +const char *DIScope::getFilename() const { + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getFilename(); + else if (isSubprogram()) + return DISubprogram(DbgNode).getFilename(); + else if (isCompileUnit()) + return DICompileUnit(DbgNode).getFilename(); + else + assert (0 && "Invalid DIScope!"); + return NULL; +} + +const char *DIScope::getDirectory() const { + if (isLexicalBlock()) + return DILexicalBlock(DbgNode).getDirectory(); + else if (isSubprogram()) + return DISubprogram(DbgNode).getDirectory(); + else if (isCompileUnit()) + return DICompileUnit(DbgNode).getDirectory(); + else + assert (0 && "Invalid DIScope!"); + return NULL; +} + //===----------------------------------------------------------------------===// // DIDescriptor: dump routines for all descriptors. //===----------------------------------------------------------------------===// @@ -328,69 +449,67 @@ bool DISubprogram::describes(const Function *F) { /// dump - Print descriptor. void DIDescriptor::dump() const { - cerr << "[" << dwarf::TagString(getTag()) << "] "; - cerr << std::hex << "[GV:" << DbgGV << "]" << std::dec; + errs() << "[" << dwarf::TagString(getTag()) << "] "; + errs().write_hex((intptr_t) &*DbgNode) << ']'; } /// dump - Print compile unit. void DICompileUnit::dump() const { if (getLanguage()) - cerr << " [" << dwarf::LanguageString(getLanguage()) << "] "; + errs() << " [" << dwarf::LanguageString(getLanguage()) << "] "; - std::string Res1, Res2; - cerr << " [" << getDirectory(Res1) << "/" << getFilename(Res2) << " ]"; + errs() << " [" << getDirectory() << "/" << getFilename() << " ]"; } /// dump - Print type. void DIType::dump() const { if (isNull()) return; - std::string Res; - if (!getName(Res).empty()) - cerr << " [" << Res << "] "; + if (const char *Res = getName()) + errs() << " [" << Res << "] "; unsigned Tag = getTag(); - cerr << " [" << dwarf::TagString(Tag) << "] "; + errs() << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context getCompileUnit().dump(); - cerr << " [" - << getLineNumber() << ", " - << getSizeInBits() << ", " - << getAlignInBits() << ", " - << getOffsetInBits() - << "] "; - - if (isPrivate()) - cerr << " [private] "; + errs() << " [" + << getLineNumber() << ", " + << getSizeInBits() << ", " + << getAlignInBits() << ", " + << getOffsetInBits() + << "] "; + + if (isPrivate()) + errs() << " [private] "; else if (isProtected()) - cerr << " [protected] "; + errs() << " [protected] "; if (isForwardDecl()) - cerr << " [fwd] "; - - if (isBasicType(Tag)) - DIBasicType(DbgGV).dump(); - else if (isDerivedType(Tag)) - DIDerivedType(DbgGV).dump(); - else if (isCompositeType(Tag)) - DICompositeType(DbgGV).dump(); + errs() << " [fwd] "; + + if (isBasicType()) + DIBasicType(DbgNode).dump(); + else if (isDerivedType()) + DIDerivedType(DbgNode).dump(); + else if (isCompositeType()) + DICompositeType(DbgNode).dump(); else { - cerr << "Invalid DIType\n"; + errs() << "Invalid DIType\n"; return; } - cerr << "\n"; + errs() << "\n"; } /// dump - Print basic type. void DIBasicType::dump() const { - cerr << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; + errs() << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] "; } /// dump - Print derived type. void DIDerivedType::dump() const { - cerr << "\n\t Derived From: "; getTypeDerivedFrom().dump(); + errs() << "\n\t Derived From: "; getTypeDerivedFrom().dump(); } /// dump - Print composite type. @@ -398,54 +517,72 @@ void DICompositeType::dump() const { DIArray A = getTypeArray(); if (A.isNull()) return; - cerr << " [" << A.getNumElements() << " elements]"; + errs() << " [" << A.getNumElements() << " elements]"; } /// dump - Print global. void DIGlobal::dump() const { - std::string Res; - if (!getName(Res).empty()) - cerr << " [" << Res << "] "; + if (const char *Res = getName()) + errs() << " [" << Res << "] "; unsigned Tag = getTag(); - cerr << " [" << dwarf::TagString(Tag) << "] "; + errs() << " [" << dwarf::TagString(Tag) << "] "; // TODO : Print context getCompileUnit().dump(); - cerr << " [" << getLineNumber() << "] "; + errs() << " [" << getLineNumber() << "] "; if (isLocalToUnit()) - cerr << " [local] "; + errs() << " [local] "; if (isDefinition()) - cerr << " [def] "; + errs() << " [def] "; - if (isGlobalVariable(Tag)) - DIGlobalVariable(DbgGV).dump(); + if (isGlobalVariable()) + DIGlobalVariable(DbgNode).dump(); - cerr << "\n"; + errs() << "\n"; } /// dump - Print subprogram. void DISubprogram::dump() const { - DIGlobal::dump(); + if (const char *Res = getName()) + errs() << " [" << Res << "] "; + + unsigned Tag = getTag(); + errs() << " [" << dwarf::TagString(Tag) << "] "; + + // TODO : Print context + getCompileUnit().dump(); + errs() << " [" << getLineNumber() << "] "; + + if (isLocalToUnit()) + errs() << " [local] "; + + if (isDefinition()) + errs() << " [def] "; + + errs() << "\n"; } /// dump - Print global variable. void DIGlobalVariable::dump() const { - cerr << " ["; getGlobal()->dump(); cerr << "] "; + errs() << " ["; + getGlobal()->dump(); + errs() << "] "; } /// dump - Print variable. void DIVariable::dump() const { - std::string Res; - if (!getName(Res).empty()) - cerr << " [" << Res << "] "; + if (const char *Res = getName()) + errs() << " [" << Res << "] "; getCompileUnit().dump(); - cerr << " [" << getLineNumber() << "] "; + errs() << " [" << getLineNumber() << "] "; getType().dump(); - cerr << "\n"; + errs() << "\n"; + + // FIXME: Dump complex addresses } //===----------------------------------------------------------------------===// @@ -453,98 +590,46 @@ void DIVariable::dump() const { //===----------------------------------------------------------------------===// DIFactory::DIFactory(Module &m) - : M(m), StopPointFn(0), FuncStartFn(0), RegionStartFn(0), RegionEndFn(0), + : M(m), VMContext(M.getContext()), StopPointFn(0), FuncStartFn(0), + RegionStartFn(0), RegionEndFn(0), DeclareFn(0) { - EmptyStructPtr = PointerType::getUnqual(StructType::get()); -} - -/// getCastToEmpty - Return this descriptor as a Constant* with type '{}*'. -/// This is only valid when the descriptor is non-null. -Constant *DIFactory::getCastToEmpty(DIDescriptor D) { - if (D.isNull()) return Constant::getNullValue(EmptyStructPtr); - return ConstantExpr::getBitCast(D.getGV(), EmptyStructPtr); + EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext)); } Constant *DIFactory::GetTagConstant(unsigned TAG) { assert((TAG & LLVMDebugVersionMask) == 0 && "Tag too large for debug encoding!"); - return ConstantInt::get(Type::Int32Ty, TAG | LLVMDebugVersion); -} - -Constant *DIFactory::GetStringConstant(const std::string &String) { - // Check string cache for previous edition. - Constant *&Slot = StringCache[String]; - - // Return Constant if previously defined. - if (Slot) return Slot; - - const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty); - - // If empty string then use a i8* null instead. - if (String.empty()) - return Slot = ConstantPointerNull::get(DestTy); - - // Construct string as an llvm constant. - Constant *ConstStr = ConstantArray::get(String); - - // Otherwise create and return a new string global. - GlobalVariable *StrGV = new GlobalVariable(ConstStr->getType(), true, - GlobalVariable::InternalLinkage, - ConstStr, ".str", &M); - StrGV->setSection("llvm.metadata"); - return Slot = ConstantExpr::getBitCast(StrGV, DestTy); + return ConstantInt::get(Type::getInt32Ty(VMContext), TAG | LLVMDebugVersion); } //===----------------------------------------------------------------------===// // DIFactory: Primary Constructors //===----------------------------------------------------------------------===// -/// GetOrCreateArray - Create an descriptor for an array of descriptors. +/// GetOrCreateArray - Create an descriptor for an array of descriptors. /// This implicitly uniques the arrays created. DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) { - SmallVector Elts; - - for (unsigned i = 0; i != NumTys; ++i) - Elts.push_back(getCastToEmpty(Tys[i])); - - Constant *Init = ConstantArray::get(ArrayType::get(EmptyStructPtr, - Elts.size()), - Elts.data(), Elts.size()); - // If we already have this array, just return the uniqued version. - DIDescriptor &Entry = SimpleConstantCache[Init]; - if (!Entry.isNull()) return DIArray(Entry.getGV()); - - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.array", &M); - GV->setSection("llvm.metadata"); - Entry = DIDescriptor(GV); - return DIArray(GV); + SmallVector Elts; + + if (NumTys == 0) + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + else + for (unsigned i = 0; i != NumTys; ++i) + Elts.push_back(Tys[i].getNode()); + + return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size())); } /// GetOrCreateSubrange - Create a descriptor for a value range. This /// implicitly uniques the values returned. DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subrange_type), - ConstantInt::get(Type::Int64Ty, Lo), - ConstantInt::get(Type::Int64Ty, Hi) + ConstantInt::get(Type::getInt64Ty(VMContext), Lo), + ConstantInt::get(Type::getInt64Ty(VMContext), Hi) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - // If we already have this range, just return the uniqued version. - DIDescriptor &Entry = SimpleConstantCache[Init]; - if (!Entry.isNull()) return DISubrange(Entry.getGV()); - - M.addTypeName("llvm.dbg.subrange.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.subrange", &M); - GV->setSection("llvm.metadata"); - Entry = DIDescriptor(GV); - return DISubrange(GV); + return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); } @@ -552,92 +637,69 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { /// CreateCompileUnit - Create a new descriptor for the specified compile /// unit. Note that this does not unique compile units within the module. DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, - const std::string &Filename, - const std::string &Directory, - const std::string &Producer, + StringRef Filename, + StringRef Directory, + StringRef Producer, bool isMain, bool isOptimized, const char *Flags, unsigned RunTimeVer) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_compile_unit), - Constant::getNullValue(EmptyStructPtr), - ConstantInt::get(Type::Int32Ty, LangID), - GetStringConstant(Filename), - GetStringConstant(Directory), - GetStringConstant(Producer), - ConstantInt::get(Type::Int1Ty, isMain), - ConstantInt::get(Type::Int1Ty, isOptimized), - GetStringConstant(Flags), - ConstantInt::get(Type::Int32Ty, RunTimeVer) + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + ConstantInt::get(Type::getInt32Ty(VMContext), LangID), + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + MDString::get(VMContext, Producer), + ConstantInt::get(Type::getInt1Ty(VMContext), isMain), + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + MDString::get(VMContext, Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.compile_unit.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::LinkOnceAnyLinkage, - Init, "llvm.dbg.compile_unit", &M); - GV->setSection("llvm.metadata"); - return DICompileUnit(GV); + + return DICompileUnit(MDNode::get(VMContext, &Elts[0], 10)); } /// CreateEnumerator - Create a single enumerator value. -DIEnumerator DIFactory::CreateEnumerator(const std::string &Name, uint64_t Val){ - Constant *Elts[] = { +DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){ + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_enumerator), - GetStringConstant(Name), - ConstantInt::get(Type::Int64Ty, Val) + MDString::get(VMContext, Name), + ConstantInt::get(Type::getInt64Ty(VMContext), Val) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.enumerator.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.enumerator", &M); - GV->setSection("llvm.metadata"); - return DIEnumerator(GV); + return DIEnumerator(MDNode::get(VMContext, &Elts[0], 3)); } /// CreateBasicType - Create a basic type like int, float, etc. DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, unsigned Encoding) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_base_type), - getCastToEmpty(Context), - GetStringConstant(Name), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNumber), - ConstantInt::get(Type::Int64Ty, SizeInBits), - ConstantInt::get(Type::Int64Ty, AlignInBits), - ConstantInt::get(Type::Int64Ty, OffsetInBits), - ConstantInt::get(Type::Int32Ty, Flags), - ConstantInt::get(Type::Int32Ty, Encoding) + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.basictype.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.basictype", &M); - GV->setSection("llvm.metadata"); - return DIBasicType(GV); + return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); } /// CreateDerivedType - Create a derived type like const qualified type, /// pointer, typedef, etc. DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -645,33 +707,25 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(Tag), - getCastToEmpty(Context), - GetStringConstant(Name), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNumber), - ConstantInt::get(Type::Int64Ty, SizeInBits), - ConstantInt::get(Type::Int64Ty, AlignInBits), - ConstantInt::get(Type::Int64Ty, OffsetInBits), - ConstantInt::get(Type::Int32Ty, Flags), - getCastToEmpty(DerivedFrom) + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom.getNode(), }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.derivedtype.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.derivedtype", &M); - GV->setSection("llvm.metadata"); - return DIDerivedType(GV); + return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); } /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType DIFactory::CreateCompositeType(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -682,143 +736,143 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, DIArray Elements, unsigned RuntimeLang) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(Tag), - getCastToEmpty(Context), - GetStringConstant(Name), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNumber), - ConstantInt::get(Type::Int64Ty, SizeInBits), - ConstantInt::get(Type::Int64Ty, AlignInBits), - ConstantInt::get(Type::Int64Ty, OffsetInBits), - ConstantInt::get(Type::Int32Ty, Flags), - getCastToEmpty(DerivedFrom), - getCastToEmpty(Elements), - ConstantInt::get(Type::Int32Ty, RuntimeLang) + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + DerivedFrom.getNode(), + Elements.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.composite.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.composite", &M); - GV->setSection("llvm.metadata"); - return DICompositeType(GV); + return DICompositeType(MDNode::get(VMContext, &Elts[0], 12)); } /// CreateSubprogram - Create a new descriptor for the specified subprogram. /// See comments in DISubprogram for descriptions of these fields. This /// method does not unique the generated descriptors. -DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, - const std::string &Name, - const std::string &DisplayName, - const std::string &LinkageName, +DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, + StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, bool isDefinition) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), - Constant::getNullValue(EmptyStructPtr), - getCastToEmpty(Context), - GetStringConstant(Name), - GetStringConstant(DisplayName), - GetStringConstant(LinkageName), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNo), - getCastToEmpty(Type), - ConstantInt::get(Type::Int1Ty, isLocalToUnit), - ConstantInt::get(Type::Int1Ty, isDefinition) + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context.getNode(), + MDString::get(VMContext, Name), + MDString::get(VMContext, DisplayName), + MDString::get(VMContext, LinkageName), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Type.getNode(), + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition) }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.subprogram.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::LinkOnceAnyLinkage, - Init, "llvm.dbg.subprogram", &M); - GV->setSection("llvm.metadata"); - return DISubprogram(GV); + return DISubprogram(MDNode::get(VMContext, &Elts[0], 11)); } /// CreateGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable -DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name, - const std::string &DisplayName, - const std::string &LinkageName, +DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type,bool isLocalToUnit, bool isDefinition, llvm::GlobalVariable *Val) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_variable), - Constant::getNullValue(EmptyStructPtr), - getCastToEmpty(Context), - GetStringConstant(Name), - GetStringConstant(DisplayName), - GetStringConstant(LinkageName), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNo), - getCastToEmpty(Type), - ConstantInt::get(Type::Int1Ty, isLocalToUnit), - ConstantInt::get(Type::Int1Ty, isDefinition), - ConstantExpr::getBitCast(Val, EmptyStructPtr) + llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), + Context.getNode(), + MDString::get(VMContext, Name), + MDString::get(VMContext, DisplayName), + MDString::get(VMContext, LinkageName), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Type.getNode(), + ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), + ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), + Val }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.global_variable.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::LinkOnceAnyLinkage, - Init, "llvm.dbg.global_variable", &M); - GV->setSection("llvm.metadata"); - return DIGlobalVariable(GV); + + Value *const *Vs = &Elts[0]; + MDNode *Node = MDNode::get(VMContext,Vs, 12); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addElement(Node); + + return DIGlobalVariable(Node); } /// CreateVariable - Create a new descriptor for the specified variable. DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, - const std::string &Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNo, DIType Type) { - Constant *Elts[] = { + Value *Elts[] = { GetTagConstant(Tag), - getCastToEmpty(Context), - GetStringConstant(Name), - getCastToEmpty(CompileUnit), - ConstantInt::get(Type::Int32Ty, LineNo), - getCastToEmpty(Type) + Context.getNode(), + MDString::get(VMContext, Name), + CompileUnit.getNode(), + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + Type.getNode(), }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.variable.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.variable", &M); - GV->setSection("llvm.metadata"); - return DIVariable(GV); + return DIVariable(MDNode::get(VMContext, &Elts[0], 6)); +} + + +/// CreateComplexVariable - Create a new descriptor for the specified variable +/// which has a complex address expression for its address. +DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, + const std::string &Name, + DICompileUnit CompileUnit, + unsigned LineNo, + DIType Type, SmallVector &addr) { + SmallVector Elts; + Elts.push_back(GetTagConstant(Tag)); + Elts.push_back(Context.getNode()); + Elts.push_back(MDString::get(VMContext, Name)); + Elts.push_back(CompileUnit.getNode()); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); + Elts.push_back(Type.getNode()); + Elts.insert(Elts.end(), addr.begin(), addr.end()); + + return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size())); } /// CreateBlock - This creates a descriptor for a lexical block with the -/// specified parent context. -DIBlock DIFactory::CreateBlock(DIDescriptor Context) { - Constant *Elts[] = { +/// specified parent VMContext. +DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context) { + Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_lexical_block), - getCastToEmpty(Context) + Context.getNode() + }; + return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2)); +} + +/// CreateLocation - Creates a debug info location. +DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, + DIScope S, DILocation OrigLoc) { + Value *Elts[] = { + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), + S.getNode(), + OrigLoc.getNode(), }; - - Constant *Init = ConstantStruct::get(Elts, sizeof(Elts)/sizeof(Elts[0])); - - M.addTypeName("llvm.dbg.block.type", Init->getType()); - GlobalVariable *GV = new GlobalVariable(Init->getType(), true, - GlobalValue::InternalLinkage, - Init, "llvm.dbg.block", &M); - GV->setSection("llvm.metadata"); - return DIBlock(GV); + return DILocation(MDNode::get(VMContext, &Elts[0], 4)); } @@ -830,17 +884,17 @@ DIBlock DIFactory::CreateBlock(DIDescriptor Context) { /// inserting it at the end of the specified basic block. void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo, unsigned ColNo, BasicBlock *BB) { - + // Lazily construct llvm.dbg.stoppoint function. if (!StopPointFn) - StopPointFn = llvm::Intrinsic::getDeclaration(&M, + StopPointFn = llvm::Intrinsic::getDeclaration(&M, llvm::Intrinsic::dbg_stoppoint); - + // Invoke llvm.dbg.stoppoint Value *Args[] = { - llvm::ConstantInt::get(llvm::Type::Int32Ty, LineNo), - llvm::ConstantInt::get(llvm::Type::Int32Ty, ColNo), - getCastToEmpty(CU) + ConstantInt::get(llvm::Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(llvm::Type::getInt32Ty(VMContext), ColNo), + CU.getNode() }; CallInst::Create(StopPointFn, Args, Args+3, "", BB); } @@ -851,9 +905,9 @@ void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) { // Lazily construct llvm.dbg.func.start. if (!FuncStartFn) FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start); - + // Call llvm.dbg.func.start which also implicitly sets a stoppoint. - CallInst::Create(FuncStartFn, getCastToEmpty(SP), "", BB); + CallInst::Create(FuncStartFn, SP.getNode(), "", BB); } /// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to @@ -864,7 +918,7 @@ void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) { RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start); // Call llvm.dbg.func.start. - CallInst::Create(RegionStartFn, getCastToEmpty(D), "", BB); + CallInst::Create(RegionStartFn, D.getNode(), "", BB); } /// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to @@ -875,19 +929,220 @@ void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) { RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end); // Call llvm.dbg.region.end. - CallInst::Create(RegionEndFn, getCastToEmpty(D), "", BB); + CallInst::Create(RegionEndFn, D.getNode(), "", BB); } /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -void DIFactory::InsertDeclare(Value *Storage, DIVariable D, BasicBlock *BB) { +void DIFactory::InsertDeclare(Value *Storage, DIVariable D, + Instruction *InsertBefore) { // Cast the storage to a {}* for the call to llvm.dbg.declare. - Storage = new BitCastInst(Storage, EmptyStructPtr, "", BB); - + Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore); + if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { Storage, getCastToEmpty(D) }; - CallInst::Create(DeclareFn, Args, Args+2, "", BB); + Value *Args[] = { Storage, D.getNode() }; + CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); +} + +/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. +void DIFactory::InsertDeclare(Value *Storage, DIVariable D, + BasicBlock *InsertAtEnd) { + // Cast the storage to a {}* for the call to llvm.dbg.declare. + Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd); + + if (!DeclareFn) + DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); + + Value *Args[] = { Storage, D.getNode() }; + CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); +} + + +//===----------------------------------------------------------------------===// +// DebugInfoFinder implementations. +//===----------------------------------------------------------------------===// + +/// processModule - Process entire module and collect debug info. +void DebugInfoFinder::processModule(Module &M) { + +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + MetadataContext &TheMetadata = M.getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); +#endif + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; + ++BI) { + if (DbgStopPointInst *SPI = dyn_cast(BI)) + processStopPoint(SPI); + else if (DbgFuncStartInst *FSI = dyn_cast(BI)) + processFuncStart(FSI); + else if (DbgRegionStartInst *DRS = dyn_cast(BI)) + processRegionStart(DRS); + else if (DbgRegionEndInst *DRE = dyn_cast(BI)) + processRegionEnd(DRE); + else if (DbgDeclareInst *DDI = dyn_cast(BI)) + processDeclare(DDI); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + else if (MDDbgKind) { + if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) { + DILocation Loc(L); + DIScope S(Loc.getScope().getNode()); + if (S.isCompileUnit()) + addCompileUnit(DICompileUnit(S.getNode())); + else if (S.isSubprogram()) + processSubprogram(DISubprogram(S.getNode())); + else if (S.isLexicalBlock()) + processLexicalBlock(DILexicalBlock(S.getNode())); + } + } +#endif + } + + NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); + if (!NMD) + return; + + for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) { + DIGlobalVariable DIG(cast(NMD->getElement(i))); + if (addGlobalVariable(DIG)) { + addCompileUnit(DIG.getCompileUnit()); + processType(DIG.getType()); + } + } +} + +/// processType - Process DIType. +void DebugInfoFinder::processType(DIType DT) { + if (!addType(DT)) + return; + + addCompileUnit(DT.getCompileUnit()); + if (DT.isCompositeType()) { + DICompositeType DCT(DT.getNode()); + processType(DCT.getTypeDerivedFrom()); + DIArray DA = DCT.getTypeArray(); + if (!DA.isNull()) + for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { + DIDescriptor D = DA.getElement(i); + DIType TypeE = DIType(D.getNode()); + if (!TypeE.isNull()) + processType(TypeE); + else + processSubprogram(DISubprogram(D.getNode())); + } + } else if (DT.isDerivedType()) { + DIDerivedType DDT(DT.getNode()); + if (!DDT.isNull()) + processType(DDT.getTypeDerivedFrom()); + } +} + +/// processLexicalBlock +void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { + if (LB.isNull()) + return; + DIScope Context = LB.getContext(); + if (Context.isLexicalBlock()) + return processLexicalBlock(DILexicalBlock(Context.getNode())); + else + return processSubprogram(DISubprogram(Context.getNode())); +} + +/// processSubprogram - Process DISubprogram. +void DebugInfoFinder::processSubprogram(DISubprogram SP) { + if (SP.isNull()) + return; + if (!addSubprogram(SP)) + return; + addCompileUnit(SP.getCompileUnit()); + processType(SP.getType()); +} + +/// processStopPoint - Process DbgStopPointInst. +void DebugInfoFinder::processStopPoint(DbgStopPointInst *SPI) { + MDNode *Context = dyn_cast(SPI->getContext()); + addCompileUnit(DICompileUnit(Context)); +} + +/// processFuncStart - Process DbgFuncStartInst. +void DebugInfoFinder::processFuncStart(DbgFuncStartInst *FSI) { + MDNode *SP = dyn_cast(FSI->getSubprogram()); + processSubprogram(DISubprogram(SP)); +} + +/// processRegionStart - Process DbgRegionStart. +void DebugInfoFinder::processRegionStart(DbgRegionStartInst *DRS) { + MDNode *SP = dyn_cast(DRS->getContext()); + processSubprogram(DISubprogram(SP)); +} + +/// processRegionEnd - Process DbgRegionEnd. +void DebugInfoFinder::processRegionEnd(DbgRegionEndInst *DRE) { + MDNode *SP = dyn_cast(DRE->getContext()); + processSubprogram(DISubprogram(SP)); +} + +/// processDeclare - Process DbgDeclareInst. +void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) { + DIVariable DV(cast(DDI->getVariable())); + if (DV.isNull()) + return; + + if (!NodesSeen.insert(DV.getNode())) + return; + + addCompileUnit(DV.getCompileUnit()); + processType(DV.getType()); +} + +/// addType - Add type into Tys. +bool DebugInfoFinder::addType(DIType DT) { + if (DT.isNull()) + return false; + + if (!NodesSeen.insert(DT.getNode())) + return false; + + TYs.push_back(DT.getNode()); + return true; +} + +/// addCompileUnit - Add compile unit into CUs. +bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { + if (CU.isNull()) + return false; + + if (!NodesSeen.insert(CU.getNode())) + return false; + + CUs.push_back(CU.getNode()); + return true; +} + +/// addGlobalVariable - Add global variable into GVs. +bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { + if (DIG.isNull()) + return false; + + if (!NodesSeen.insert(DIG.getNode())) + return false; + + GVs.push_back(DIG.getNode()); + return true; +} + +// addSubprogram - Add subprgoram into SPs. +bool DebugInfoFinder::addSubprogram(DISubprogram SP) { + if (SP.isNull()) + return false; + + if (!NodesSeen.insert(SP.getNode())) + return false; + + SPs.push_back(SP.getNode()); + return true; } namespace llvm { @@ -939,30 +1194,17 @@ namespace llvm { Value *findDbgGlobalDeclare(GlobalVariable *V) { const Module *M = V->getParent(); - const Type *Ty = M->getTypeByName("llvm.dbg.global_variable.type"); - if (!Ty) return 0; - - Ty = PointerType::get(Ty, 0); - - Value *Val = V->stripPointerCasts(); - for (Value::use_iterator I = Val->use_begin(), E = Val->use_end(); - I != E; ++I) { - if (ConstantExpr *CE = dyn_cast(I)) { - if (CE->getOpcode() == Instruction::BitCast) { - Value *VV = CE; - - while (VV->hasOneUse()) - VV = *VV->use_begin(); + NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); + if (!NMD) + return 0; - if (VV->getType() == Ty) - return VV; - } - } + for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) { + DIGlobalVariable DIG(cast_or_null(NMD->getElement(i))); + if (DIG.isNull()) + continue; + if (DIG.getGlobal() == V) + return DIG.getNode(); } - - if (Val->getType() == Ty) - return Val; - return 0; } @@ -990,8 +1232,8 @@ namespace llvm { return 0; } - bool getLocationInfo(const Value *V, std::string &DisplayName, - std::string &Type, unsigned &LineNo, std::string &File, +bool getLocationInfo(const Value *V, std::string &DisplayName, + std::string &Type, unsigned &LineNo, std::string &File, std::string &Dir) { DICompileUnit Unit; DIType TypeD; @@ -999,81 +1241,56 @@ namespace llvm { if (GlobalVariable *GV = dyn_cast(const_cast(V))) { Value *DIGV = findDbgGlobalDeclare(GV); if (!DIGV) return false; - DIGlobalVariable Var(cast(DIGV)); + DIGlobalVariable Var(cast(DIGV)); - Var.getDisplayName(DisplayName); + if (const char *D = Var.getDisplayName()) + DisplayName = D; LineNo = Var.getLineNumber(); Unit = Var.getCompileUnit(); TypeD = Var.getType(); } else { const DbgDeclareInst *DDI = findDbgDeclare(V); if (!DDI) return false; - DIVariable Var(cast(DDI->getVariable())); + DIVariable Var(cast(DDI->getVariable())); - Var.getName(DisplayName); + if (const char *D = Var.getName()) + DisplayName = D; LineNo = Var.getLineNumber(); Unit = Var.getCompileUnit(); TypeD = Var.getType(); } - TypeD.getName(Type); - Unit.getFilename(File); - Unit.getDirectory(Dir); + if (const char *T = TypeD.getName()) + Type = T; + if (const char *F = Unit.getFilename()) + File = F; + if (const char *D = Unit.getDirectory()) + Dir = D; return true; } - /// CollectDebugInfoAnchors - Collect debugging information anchors. - void CollectDebugInfoAnchors(Module &M, - SmallVector &CUs, - SmallVector &GVs, - SmallVector &SPs) { - - for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); - GVI != E; GVI++) { - GlobalVariable *GV = GVI; - if (GV->hasName() && strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0 - && GV->isConstant() && GV->hasInitializer()) { - DICompileUnit C(GV); - if (C.isNull() == false) { - CUs.push_back(GV); - continue; - } - DIGlobalVariable G(GV); - if (G.isNull() == false) { - GVs.push_back(GV); - continue; - } - DISubprogram S(GV); - if (S.isNull() == false) { - SPs.push_back(GV); - continue; - } - } - } - } - - /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if SPI is a valid debug /// info intrinsic. - bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, + bool isValidDebugInfoIntrinsic(DbgStopPointInst &SPI, CodeGenOpt::Level OptLev) { return DIDescriptor::ValidDebugInfo(SPI.getContext(), OptLev); } - /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if FSI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgFuncStartInst &FSI, CodeGenOpt::Level OptLev) { return DIDescriptor::ValidDebugInfo(FSI.getSubprogram(), OptLev); } - /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if RSI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgRegionStartInst &RSI, CodeGenOpt::Level OptLev) { return DIDescriptor::ValidDebugInfo(RSI.getContext(), OptLev); } - /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if REI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgRegionEndInst &REI, CodeGenOpt::Level OptLev) { @@ -1081,14 +1298,14 @@ namespace llvm { } - /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug + /// isValidDebugInfoIntrinsic - Return true if DI is a valid debug /// info intrinsic. bool isValidDebugInfoIntrinsic(DbgDeclareInst &DI, CodeGenOpt::Level OptLev) { return DIDescriptor::ValidDebugInfo(DI.getVariable(), OptLev); } - /// ExtractDebugLocation - Extract debug location information + /// ExtractDebugLocation - Extract debug location information /// from llvm.dbg.stoppoint intrinsic. DebugLoc ExtractDebugLocation(DbgStopPointInst &SPI, DebugLocTracker &DebugLocInfo) { @@ -1096,7 +1313,7 @@ namespace llvm { Value *Context = SPI.getContext(); // If this location is already tracked then use it. - DebugLocTuple Tuple(cast(Context), SPI.getLine(), + DebugLocTuple Tuple(cast(Context), NULL, SPI.getLine(), SPI.getColumn()); DenseMap::iterator II = DebugLocInfo.DebugIdMap.find(Tuple); @@ -1107,23 +1324,48 @@ namespace llvm { unsigned Id = DebugLocInfo.DebugLocations.size(); DebugLocInfo.DebugLocations.push_back(Tuple); DebugLocInfo.DebugIdMap[Tuple] = Id; - + + return DebugLoc::get(Id); + } + + /// ExtractDebugLocation - Extract debug location information + /// from DILocation. + DebugLoc ExtractDebugLocation(DILocation &Loc, + DebugLocTracker &DebugLocInfo) { + DebugLoc DL; + MDNode *Context = Loc.getScope().getNode(); + MDNode *InlinedLoc = NULL; + if (!Loc.getOrigLocation().isNull()) + InlinedLoc = Loc.getOrigLocation().getNode(); + // If this location is already tracked then use it. + DebugLocTuple Tuple(Context, InlinedLoc, Loc.getLineNumber(), + Loc.getColumnNumber()); + DenseMap::iterator II + = DebugLocInfo.DebugIdMap.find(Tuple); + if (II != DebugLocInfo.DebugIdMap.end()) + return DebugLoc::get(II->second); + + // Add a new location entry. + unsigned Id = DebugLocInfo.DebugLocations.size(); + DebugLocInfo.DebugLocations.push_back(Tuple); + DebugLocInfo.DebugIdMap[Tuple] = Id; + return DebugLoc::get(Id); } - /// ExtractDebugLocation - Extract debug location information + /// ExtractDebugLocation - Extract debug location information /// from llvm.dbg.func_start intrinsic. DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI, DebugLocTracker &DebugLocInfo) { DebugLoc DL; Value *SP = FSI.getSubprogram(); - DISubprogram Subprogram(cast(SP)); + DISubprogram Subprogram(cast(SP)); unsigned Line = Subprogram.getLineNumber(); DICompileUnit CU(Subprogram.getCompileUnit()); // If this location is already tracked then use it. - DebugLocTuple Tuple(CU.getGV(), Line, /* Column */ 0); + DebugLocTuple Tuple(CU.getNode(), NULL, Line, /* Column */ 0); DenseMap::iterator II = DebugLocInfo.DebugIdMap.find(Tuple); if (II != DebugLocInfo.DebugIdMap.end()) @@ -1133,13 +1375,13 @@ namespace llvm { unsigned Id = DebugLocInfo.DebugLocations.size(); DebugLocInfo.DebugLocations.push_back(Tuple); DebugLocInfo.DebugIdMap[Tuple] = Id; - + return DebugLoc::get(Id); } /// isInlinedFnStart - Return true if FSI is starting an inlined function. bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn) { - DISubprogram Subprogram(cast(FSI.getSubprogram())); + DISubprogram Subprogram(cast(FSI.getSubprogram())); if (Subprogram.describes(CurrentFn)) return false; @@ -1148,11 +1390,10 @@ namespace llvm { /// isInlinedFnEnd - Return true if REI is ending an inlined function. bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn) { - DISubprogram Subprogram(cast(REI.getContext())); + DISubprogram Subprogram(cast(REI.getContext())); if (Subprogram.isNull() || Subprogram.describes(CurrentFn)) return false; return true; } - } diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index 3fb65265472d5..1c9159dfbfcc1 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -60,9 +60,11 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/System/Atomic.h" @@ -84,7 +86,9 @@ #define FULL_UNIVERSAL 0 using namespace llvm; +#ifndef NDEBUG STATISTIC(NumIters , "Number of iterations to reach convergence"); +#endif STATISTIC(NumConstraints, "Number of constraints"); STATISTIC(NumNodes , "Number of nodes"); STATISTIC(NumUnified , "Number of variables unified"); @@ -507,7 +511,7 @@ namespace { #ifndef NDEBUG V->dump(); #endif - assert(0 && "Value does not have a node in the points-to graph!"); + llvm_unreachable("Value does not have a node in the points-to graph!"); } return I->second; } @@ -589,9 +593,12 @@ namespace { friend class InstVisitor; void visitReturnInst(ReturnInst &RI); void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); } - void visitCallInst(CallInst &CI) { visitCallSite(CallSite(&CI)); } + void visitCallInst(CallInst &CI) { + if (isMalloc(&CI)) visitAllocationInst(CI); + else visitCallSite(CallSite(&CI)); + } void visitCallSite(CallSite CS); - void visitAllocationInst(AllocationInst &AI); + void visitAllocationInst(Instruction &I); void visitLoadInst(LoadInst &LI); void visitStoreInst(StoreInst &SI); void visitGetElementPtrInst(GetElementPtrInst &GEP); @@ -606,7 +613,7 @@ namespace { //===------------------------------------------------------------------===// // Implement Analyize interface // - void print(std::ostream &O, const Module* M) const { + void print(raw_ostream &O, const Module*) const { PrintPointsToGraph(); } }; @@ -614,7 +621,8 @@ namespace { char Andersens::ID = 0; static RegisterPass -X("anders-aa", "Andersen's Interprocedural Alias Analysis", false, true); +X("anders-aa", "Andersen's Interprocedural Alias Analysis (experimental)", + false, true); static RegisterAnalysisGroup Y(X); // Initialize Timestamp Counter (static). @@ -786,6 +794,8 @@ void Andersens::IdentifyObjects(Module &M) { ValueNodes[&*II] = NumObjects++; if (AllocationInst *AI = dyn_cast(&*II)) ObjectNodes[AI] = NumObjects++; + else if (isMalloc(&*II)) + ObjectNodes[&*II] = NumObjects++; } // Calls to inline asm need to be added as well because the callee isn't @@ -825,11 +835,11 @@ unsigned Andersens::getNodeForConstantPointer(Constant *C) { case Instruction::BitCast: return getNodeForConstantPointer(CE->getOperand(0)); default: - cerr << "Constant Expr not yet handled: " << *CE << "\n"; - assert(0); + errs() << "Constant Expr not yet handled: " << *CE << "\n"; + llvm_unreachable(0); } } else { - assert(0 && "Unknown constant pointer!"); + llvm_unreachable("Unknown constant pointer!"); } return 0; } @@ -852,11 +862,11 @@ unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) { case Instruction::BitCast: return getNodeForConstantPointerTarget(CE->getOperand(0)); default: - cerr << "Constant Expr not yet handled: " << *CE << "\n"; - assert(0); + errs() << "Constant Expr not yet handled: " << *CE << "\n"; + llvm_unreachable(0); } } else { - assert(0 && "Unknown constant pointer!"); + llvm_unreachable("Unknown constant pointer!"); } return 0; } @@ -996,7 +1006,7 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) { if (!isa(V->getType())) return true; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (dyn_cast(*UI)) { + if (isa(*UI)) { return false; } else if (StoreInst *SI = dyn_cast(*UI)) { if (V == SI->getOperand(1)) { @@ -1027,7 +1037,7 @@ bool Andersens::AnalyzeUsesOfFunction(Value *V) { } else if (ICmpInst *ICI = dyn_cast(*UI)) { if (!isa(ICI->getOperand(1))) return true; // Allow comparison against null. - } else if (dyn_cast(*UI)) { + } else if (isa(*UI)) { return false; } else { return true; @@ -1060,7 +1070,7 @@ void Andersens::CollectConstraints(Module &M) { Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I), ObjectIndex)); - if (I->hasInitializer()) { + if (I->hasDefinitiveInitializer()) { AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer()); } else { // If it doesn't have an initializer (i.e. it's defined in another @@ -1152,15 +1162,15 @@ void Andersens::visitInstruction(Instruction &I) { return; default: // Is this something we aren't handling yet? - cerr << "Unknown instruction: " << I; - abort(); + errs() << "Unknown instruction: " << I; + llvm_unreachable(0); } } -void Andersens::visitAllocationInst(AllocationInst &AI) { - unsigned ObjectIndex = getObject(&AI); - GraphNodes[ObjectIndex].setValue(&AI); - Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(AI), +void Andersens::visitAllocationInst(Instruction &I) { + unsigned ObjectIndex = getObject(&I); + GraphNodes[ObjectIndex].setValue(&I); + Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I), ObjectIndex)); } @@ -1243,7 +1253,7 @@ void Andersens::visitSelectInst(SelectInst &SI) { } void Andersens::visitVAArg(VAArgInst &I) { - assert(0 && "vaarg not handled yet!"); + llvm_unreachable("vaarg not handled yet!"); } /// AddConstraintsForCall - Add constraints for a call with actual arguments @@ -1395,12 +1405,6 @@ bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const { return Result; } -void dumpToDOUT(SparseBitVector<> *bitmap) { -#ifndef NDEBUG - dump(*bitmap, DOUT); -#endif -} - /// Clump together address taken variables so that the points-to sets use up /// less space and can be operated on faster. @@ -1424,7 +1428,7 @@ void Andersens::ClumpAddressTaken() { unsigned Pos = NewPos++; Translate[i] = Pos; NewGraphNodes.push_back(GraphNodes[i]); - DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n"); } // I believe this ends up being faster than making two vectors and splicing @@ -1434,7 +1438,7 @@ void Andersens::ClumpAddressTaken() { unsigned Pos = NewPos++; Translate[i] = Pos; NewGraphNodes.push_back(GraphNodes[i]); - DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n"); } } @@ -1443,7 +1447,7 @@ void Andersens::ClumpAddressTaken() { unsigned Pos = NewPos++; Translate[i] = Pos; NewGraphNodes.push_back(GraphNodes[i]); - DOUT << "Renumbering node " << i << " to node " << Pos << "\n"; + DEBUG(errs() << "Renumbering node " << i << " to node " << Pos << "\n"); } } @@ -1515,7 +1519,7 @@ void Andersens::ClumpAddressTaken() { /// receive &D from E anyway. void Andersens::HVN() { - DOUT << "Beginning HVN\n"; + DEBUG(errs() << "Beginning HVN\n"); // Build a predecessor graph. This is like our constraint graph with the // edges going in the opposite direction, and there are edges for all the // constraints, instead of just copy constraints. We also build implicit @@ -1586,7 +1590,7 @@ void Andersens::HVN() { Node2DFS.clear(); Node2Deleted.clear(); Node2Visited.clear(); - DOUT << "Finished HVN\n"; + DEBUG(errs() << "Finished HVN\n"); } @@ -1710,7 +1714,7 @@ void Andersens::HVNValNum(unsigned NodeIndex) { /// and is equivalent to value numbering the collapsed constraint graph /// including evaluating unions. void Andersens::HU() { - DOUT << "Beginning HU\n"; + DEBUG(errs() << "Beginning HU\n"); // Build a predecessor graph. This is like our constraint graph with the // edges going in the opposite direction, and there are edges for all the // constraints, instead of just copy constraints. We also build implicit @@ -1790,7 +1794,7 @@ void Andersens::HU() { } // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller. Set2PEClass.clear(); - DOUT << "Finished HU\n"; + DEBUG(errs() << "Finished HU\n"); } @@ -1968,12 +1972,12 @@ void Andersens::RewriteConstraints() { // to anything. if (LHSLabel == 0) { DEBUG(PrintNode(&GraphNodes[LHSNode])); - DOUT << " is a non-pointer, ignoring constraint.\n"; + DEBUG(errs() << " is a non-pointer, ignoring constraint.\n"); continue; } if (RHSLabel == 0) { DEBUG(PrintNode(&GraphNodes[RHSNode])); - DOUT << " is a non-pointer, ignoring constraint.\n"; + DEBUG(errs() << " is a non-pointer, ignoring constraint.\n"); continue; } // This constraint may be useless, and it may become useless as we translate @@ -2021,19 +2025,19 @@ void Andersens::PrintLabels() const { if (i < FirstRefNode) { PrintNode(&GraphNodes[i]); } else if (i < FirstAdrNode) { - DOUT << "REF("; + DEBUG(errs() << "REF("); PrintNode(&GraphNodes[i-FirstRefNode]); - DOUT <<")"; + DEBUG(errs() <<")"); } else { - DOUT << "ADR("; + DEBUG(errs() << "ADR("); PrintNode(&GraphNodes[i-FirstAdrNode]); - DOUT <<")"; + DEBUG(errs() <<")"); } - DOUT << " has pointer label " << GraphNodes[i].PointerEquivLabel + DEBUG(errs() << " has pointer label " << GraphNodes[i].PointerEquivLabel << " and SCC rep " << VSSCCRep[i] << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct") - << "\n"; + << "\n"); } } @@ -2047,7 +2051,7 @@ void Andersens::PrintLabels() const { /// operation are stored in SDT and are later used in SolveContraints() /// and UniteNodes(). void Andersens::HCD() { - DOUT << "Starting HCD.\n"; + DEBUG(errs() << "Starting HCD.\n"); HCDSCCRep.resize(GraphNodes.size()); for (unsigned i = 0; i < GraphNodes.size(); ++i) { @@ -2096,7 +2100,7 @@ void Andersens::HCD() { Node2Visited.clear(); Node2Deleted.clear(); HCDSCCRep.clear(); - DOUT << "HCD complete.\n"; + DEBUG(errs() << "HCD complete.\n"); } // Component of HCD: @@ -2168,7 +2172,7 @@ void Andersens::Search(unsigned Node) { /// Optimize the constraints by performing offline variable substitution and /// other optimizations. void Andersens::OptimizeConstraints() { - DOUT << "Beginning constraint optimization\n"; + DEBUG(errs() << "Beginning constraint optimization\n"); SDTActive = false; @@ -2252,7 +2256,7 @@ void Andersens::OptimizeConstraints() { // HCD complete. - DOUT << "Finished constraint optimization\n"; + DEBUG(errs() << "Finished constraint optimization\n"); FirstRefNode = 0; FirstAdrNode = 0; } @@ -2260,7 +2264,7 @@ void Andersens::OptimizeConstraints() { /// Unite pointer but not location equivalent variables, now that the constraint /// graph is built. void Andersens::UnitePointerEquivalences() { - DOUT << "Uniting remaining pointer equivalences\n"; + DEBUG(errs() << "Uniting remaining pointer equivalences\n"); for (unsigned i = 0; i < GraphNodes.size(); ++i) { if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) { unsigned Label = GraphNodes[i].PointerEquivLabel; @@ -2269,7 +2273,7 @@ void Andersens::UnitePointerEquivalences() { UniteNodes(i, PENLEClass2Node[Label]); } } - DOUT << "Finished remaining pointer equivalences\n"; + DEBUG(errs() << "Finished remaining pointer equivalences\n"); PENLEClass2Node.clear(); } @@ -2425,7 +2429,7 @@ void Andersens::SolveConstraints() { std::vector RSV; #endif while( !CurrWL->empty() ) { - DOUT << "Starting iteration #" << ++NumIters << "\n"; + DEBUG(errs() << "Starting iteration #" << ++NumIters << "\n"); Node* CurrNode; unsigned CurrNodeIndex; @@ -2728,11 +2732,11 @@ unsigned Andersens::UniteNodes(unsigned First, unsigned Second, SecondNode->OldPointsTo = NULL; NumUnified++; - DOUT << "Unified Node "; + DEBUG(errs() << "Unified Node "); DEBUG(PrintNode(FirstNode)); - DOUT << " and Node "; + DEBUG(errs() << " and Node "); DEBUG(PrintNode(SecondNode)); - DOUT << "\n"; + DEBUG(errs() << "\n"); if (SDTActive) if (SDT[Second] >= 0) { @@ -2777,17 +2781,17 @@ unsigned Andersens::FindNode(unsigned NodeIndex) const { void Andersens::PrintNode(const Node *N) const { if (N == &GraphNodes[UniversalSet]) { - cerr << ""; + errs() << ""; return; } else if (N == &GraphNodes[NullPtr]) { - cerr << ""; + errs() << ""; return; } else if (N == &GraphNodes[NullObject]) { - cerr << ""; + errs() << ""; return; } if (!N->getValue()) { - cerr << "artificial" << (intptr_t) N; + errs() << "artificial" << (intptr_t) N; return; } @@ -2796,85 +2800,85 @@ void Andersens::PrintNode(const Node *N) const { if (Function *F = dyn_cast(V)) { if (isa(F->getFunctionType()->getReturnType()) && N == &GraphNodes[getReturnNode(F)]) { - cerr << F->getName() << ":retval"; + errs() << F->getName() << ":retval"; return; } else if (F->getFunctionType()->isVarArg() && N == &GraphNodes[getVarargNode(F)]) { - cerr << F->getName() << ":vararg"; + errs() << F->getName() << ":vararg"; return; } } if (Instruction *I = dyn_cast(V)) - cerr << I->getParent()->getParent()->getName() << ":"; + errs() << I->getParent()->getParent()->getName() << ":"; else if (Argument *Arg = dyn_cast(V)) - cerr << Arg->getParent()->getName() << ":"; + errs() << Arg->getParent()->getName() << ":"; if (V->hasName()) - cerr << V->getName(); + errs() << V->getName(); else - cerr << "(unnamed)"; + errs() << "(unnamed)"; - if (isa(V) || isa(V)) + if (isa(V) || isa(V) || isMalloc(V)) if (N == &GraphNodes[getObject(V)]) - cerr << ""; + errs() << ""; } void Andersens::PrintConstraint(const Constraint &C) const { if (C.Type == Constraint::Store) { - cerr << "*"; + errs() << "*"; if (C.Offset != 0) - cerr << "("; + errs() << "("; } PrintNode(&GraphNodes[C.Dest]); if (C.Type == Constraint::Store && C.Offset != 0) - cerr << " + " << C.Offset << ")"; - cerr << " = "; + errs() << " + " << C.Offset << ")"; + errs() << " = "; if (C.Type == Constraint::Load) { - cerr << "*"; + errs() << "*"; if (C.Offset != 0) - cerr << "("; + errs() << "("; } else if (C.Type == Constraint::AddressOf) - cerr << "&"; + errs() << "&"; PrintNode(&GraphNodes[C.Src]); if (C.Offset != 0 && C.Type != Constraint::Store) - cerr << " + " << C.Offset; + errs() << " + " << C.Offset; if (C.Type == Constraint::Load && C.Offset != 0) - cerr << ")"; - cerr << "\n"; + errs() << ")"; + errs() << "\n"; } void Andersens::PrintConstraints() const { - cerr << "Constraints:\n"; + errs() << "Constraints:\n"; for (unsigned i = 0, e = Constraints.size(); i != e; ++i) PrintConstraint(Constraints[i]); } void Andersens::PrintPointsToGraph() const { - cerr << "Points-to graph:\n"; + errs() << "Points-to graph:\n"; for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) { const Node *N = &GraphNodes[i]; if (FindNode(i) != i) { PrintNode(N); - cerr << "\t--> same as "; + errs() << "\t--> same as "; PrintNode(&GraphNodes[FindNode(i)]); - cerr << "\n"; + errs() << "\n"; } else { - cerr << "[" << (N->PointsTo->count()) << "] "; + errs() << "[" << (N->PointsTo->count()) << "] "; PrintNode(N); - cerr << "\t--> "; + errs() << "\t--> "; bool first = true; for (SparseBitVector<>::iterator bi = N->PointsTo->begin(); bi != N->PointsTo->end(); ++bi) { if (!first) - cerr << ", "; + errs() << ", "; PrintNode(&GraphNodes[*bi]); first = false; } - cerr << "\n"; + errs() << "\n"; } } } diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 6dabcdb94bf11..e2b288d1ba96c 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -18,8 +18,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" -#include +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -54,7 +53,7 @@ public: CallsExternalNode = new CallGraphNode(0); Root = 0; - // Add every function to the call graph... + // Add every function to the call graph. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) addToCallGraph(I); @@ -68,30 +67,21 @@ public: AU.setPreservesAll(); } - void print(std::ostream *o, const Module *M) const { - if (o) print(*o, M); - } - - virtual void print(std::ostream &o, const Module *M) const { - o << "CallGraph Root is: "; + virtual void print(raw_ostream &OS, const Module *) const { + OS << "CallGraph Root is: "; if (Function *F = getRoot()->getFunction()) - o << F->getName() << "\n"; - else - o << "<>\n"; + OS << F->getName() << "\n"; + else { + OS << "<>\n"; + } - CallGraph::print(o, M); + CallGraph::print(OS, 0); } virtual void releaseMemory() { destroy(); } - /// dump - Print out this call graph. - /// - inline void dump() const { - print(cerr, Mod); - } - CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; } CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; } @@ -179,21 +169,20 @@ void CallGraph::initialize(Module &M) { } void CallGraph::destroy() { - if (!FunctionMap.empty()) { - for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); - I != E; ++I) - delete I->second; - FunctionMap.clear(); - } + if (FunctionMap.empty()) return; + + for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); + I != E; ++I) + delete I->second; + FunctionMap.clear(); } -void CallGraph::print(std::ostream &OS, const Module *M) const { +void CallGraph::print(raw_ostream &OS, Module*) const { for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } - void CallGraph::dump() const { - print(cerr, 0); + print(errs(), 0); } //===----------------------------------------------------------------------===// @@ -207,7 +196,7 @@ void CallGraph::dump() const { // is to dropAllReferences before calling this. // Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { - assert(CGN->CalledFunctions.empty() && "Cannot remove function from call " + assert(CGN->empty() && "Cannot remove function from call " "graph if it references other functions!"); Function *F = CGN->getFunction(); // Get the function for the call graph node delete CGN; // Delete the call graph node for this func @@ -217,20 +206,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) { return F; } -// changeFunction - This method changes the function associated with this -// CallGraphNode, for use by transformations that need to change the prototype -// of a Function (thus they must create a new Function and move the old code -// over). -void CallGraph::changeFunction(Function *OldF, Function *NewF) { - iterator I = FunctionMap.find(OldF); - CallGraphNode *&New = FunctionMap[NewF]; - assert(I != FunctionMap.end() && I->second && !New && - "OldF didn't exist in CG or NewF already does!"); - New = I->second; - New->F = NewF; - FunctionMap.erase(I); -} - // getOrInsertFunction - This method is identical to calling operator[], but // it will insert a new CallGraphNode for the specified function if one does // not already exist. @@ -242,11 +217,13 @@ CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { return CGN = new CallGraphNode(const_cast(F)); } -void CallGraphNode::print(std::ostream &OS) const { +void CallGraphNode::print(raw_ostream &OS) const { if (Function *F = getFunction()) - OS << "Call graph node for function: '" << F->getName() <<"'\n"; + OS << "Call graph node for function: '" << F->getName() << "'"; else - OS << "Call graph node <>:\n"; + OS << "Call graph node <>"; + + OS << "<<0x" << this << ">> #uses=" << getNumReferences() << '\n'; for (const_iterator I = begin(), E = end(); I != E; ++I) if (Function *FI = I->second->getFunction()) @@ -256,7 +233,7 @@ void CallGraphNode::print(std::ostream &OS) const { OS << "\n"; } -void CallGraphNode::dump() const { print(cerr); } +void CallGraphNode::dump() const { print(errs()); } /// removeCallEdgeFor - This method removes the edge in the node for the /// specified call site. Note that this method takes linear time, so it @@ -264,8 +241,10 @@ void CallGraphNode::dump() const { print(cerr); } void CallGraphNode::removeCallEdgeFor(CallSite CS) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); - if (I->first == CS) { - CalledFunctions.erase(I); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); return; } } @@ -278,6 +257,7 @@ void CallGraphNode::removeCallEdgeFor(CallSite CS) { void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) { for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i) if (CalledFunctions[i].second == Callee) { + Callee->DropRef(); CalledFunctions[i] = CalledFunctions.back(); CalledFunctions.pop_back(); --i; --e; @@ -290,21 +270,27 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); CallRecord &CR = *I; - if (CR.second == Callee && !CR.first.getInstruction()) { - CalledFunctions.erase(I); + if (CR.second == Callee && CR.first == 0) { + Callee->DropRef(); + *I = CalledFunctions.back(); + CalledFunctions.pop_back(); return; } } } -/// replaceCallSite - Make the edge in the node for Old CallSite be for -/// New CallSite instead. Note that this method takes linear time, so it -/// should be used sparingly. -void CallGraphNode::replaceCallSite(CallSite Old, CallSite New) { +/// replaceCallEdge - This method replaces the edge in the node for the +/// specified call site with a new one. Note that this method takes linear +/// time, so it should be used sparingly. +void CallGraphNode::replaceCallEdge(CallSite CS, + CallSite NewCS, CallGraphNode *NewNode){ for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { - assert(I != CalledFunctions.end() && "Cannot find callsite to replace!"); - if (I->first == Old) { - I->first = New; + assert(I != CalledFunctions.end() && "Cannot find callsite to remove!"); + if (I->first == CS.getInstruction()) { + I->second->DropRef(); + I->first = NewCS.getInstruction(); + I->second = NewNode; + NewNode->AddRef(); return; } } diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 3880d0a10bb67..a96a5c591f831 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -15,22 +15,25 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "cgscc-passmgr" #include "llvm/CallGraphSCCPass.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/PassManagers.h" #include "llvm/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// // CGPassManager // -/// CGPassManager manages FPPassManagers and CalLGraphSCCPasses. +/// CGPassManager manages FPPassManagers and CallGraphSCCPasses. namespace { class CGPassManager : public ModulePass, public PMDataManager { - public: static char ID; explicit CGPassManager(int Depth) @@ -56,7 +59,7 @@ public: // Print passes managed by this manager void dumpPassStructure(unsigned Offset) { - llvm::cerr << std::string(Offset*2, ' ') << "Call Graph SCC Pass Manager\n"; + errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); P->dumpPassStructure(Offset + 1); @@ -65,56 +68,275 @@ public: } Pass *getContainedPass(unsigned N) { - assert ( N < PassVector.size() && "Pass number out of range!"); - Pass *FP = static_cast(PassVector[N]); - return FP; + assert(N < PassVector.size() && "Pass number out of range!"); + return static_cast(PassVector[N]); } virtual PassManagerType getPassManagerType() const { return PMT_CallGraphPassManager; } + +private: + bool RunPassOnSCC(Pass *P, std::vector &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate); + void RefreshCallGraph(std::vector &CurSCC, CallGraph &CG, + bool IsCheckingMode); }; -} +} // end anonymous namespace. char CGPassManager::ID = 0; + +bool CGPassManager::RunPassOnSCC(Pass *P, std::vector &CurSCC, + CallGraph &CG, bool &CallGraphUpToDate) { + bool Changed = false; + if (CallGraphSCCPass *CGSP = dynamic_cast(P)) { + if (!CallGraphUpToDate) { + RefreshCallGraph(CurSCC, CG, false); + CallGraphUpToDate = true; + } + + Timer *T = StartPassTimer(CGSP); + Changed = CGSP->runOnSCC(CurSCC); + StopPassTimer(CGSP, T); + + // After the CGSCCPass is done, when assertions are enabled, use + // RefreshCallGraph to verify that the callgraph was correctly updated. +#ifndef NDEBUG + if (Changed) + RefreshCallGraph(CurSCC, CG, true); +#endif + + return Changed; + } + + FPPassManager *FPP = dynamic_cast(P); + assert(FPP && "Invalid CGPassManager member"); + + // Run pass P on all functions in the current SCC. + for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) { + if (Function *F = CurSCC[i]->getFunction()) { + dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); + Timer *T = StartPassTimer(FPP); + Changed |= FPP->runOnFunction(*F); + StopPassTimer(FPP, T); + } + } + + // The function pass(es) modified the IR, they may have clobbered the + // callgraph. + if (Changed && CallGraphUpToDate) { + DEBUG(errs() << "CGSCCPASSMGR: Pass Dirtied SCC: " + << P->getPassName() << '\n'); + CallGraphUpToDate = false; + } + return Changed; +} + + +/// RefreshCallGraph - Scan the functions in the specified CFG and resync the +/// callgraph with the call sites found in it. This is used after +/// FunctionPasses have potentially munged the callgraph, and can be used after +/// CallGraphSCC passes to verify that they correctly updated the callgraph. +/// +void CGPassManager::RefreshCallGraph(std::vector &CurSCC, + CallGraph &CG, bool CheckingMode) { + DenseMap CallSites; + + DEBUG(errs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size() + << " nodes:\n"; + for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) + CurSCC[i]->dump(); + ); + + bool MadeChange = false; + + // Scan all functions in the SCC. + for (unsigned sccidx = 0, e = CurSCC.size(); sccidx != e; ++sccidx) { + CallGraphNode *CGN = CurSCC[sccidx]; + Function *F = CGN->getFunction(); + if (F == 0 || F->isDeclaration()) continue; + + // Walk the function body looking for call sites. Sync up the call sites in + // CGN with those actually in the function. + + // Get the set of call sites currently in the function. + for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { + // If this call site is null, then the function pass deleted the call + // entirely and the WeakVH nulled it out. + if (I->first == 0 || + // If we've already seen this call site, then the FunctionPass RAUW'd + // one call with another, which resulted in two "uses" in the edge + // list of the same call. + CallSites.count(I->first) || + + // If the call edge is not from a call or invoke, then the function + // pass RAUW'd a call with another value. This can happen when + // constant folding happens of well known functions etc. + CallSite::get(I->first).getInstruction() == 0) { + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // Just remove the edge from the set of callees, keep track of whether + // I points to the last element of the vector. + bool WasLast = I + 1 == E; + CGN->removeCallEdge(I); + + // If I pointed to the last element of the vector, we have to bail out: + // iterator checking rejects comparisons of the resultant pointer with + // end. + if (WasLast) + break; + E = CGN->end(); + continue; + } + + assert(!CallSites.count(I->first) && + "Call site occurs in node multiple times"); + CallSites.insert(std::make_pair(I->first, I->second)); + ++I; + } + + // Loop over all of the instructions in the function, getting the callsites. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS = CallSite::get(I); + if (!CS.getInstruction() || isa(I)) continue; + + // If this call site already existed in the callgraph, just verify it + // matches up to expectations and remove it from CallSites. + DenseMap::iterator ExistingIt = + CallSites.find(CS.getInstruction()); + if (ExistingIt != CallSites.end()) { + CallGraphNode *ExistingNode = ExistingIt->second; + + // Remove from CallSites since we have now seen it. + CallSites.erase(ExistingIt); + + // Verify that the callee is right. + if (ExistingNode->getFunction() == CS.getCalledFunction()) + continue; + + // If we are in checking mode, we are not allowed to actually mutate + // the callgraph. If this is a case where we can infer that the + // callgraph is less precise than it could be (e.g. an indirect call + // site could be turned direct), don't reject it in checking mode, and + // don't tweak it to be more precise. + if (CheckingMode && CS.getCalledFunction() && + ExistingNode->getFunction() == 0) + continue; + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If not, we either went from a direct call to indirect, indirect to + // direct, or direct to different direct. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) + CalleeNode = CG.getOrInsertFunction(Callee); + else + CalleeNode = CG.getCallsExternalNode(); + + // Update the edge target in CGN. + for (CallGraphNode::iterator I = CGN->begin(); ; ++I) { + assert(I != CGN->end() && "Didn't find call entry"); + if (I->first == CS.getInstruction()) { + I->second = CalleeNode; + break; + } + } + MadeChange = true; + continue; + } + + assert(!CheckingMode && + "CallGraphSCCPass did not update the CallGraph correctly!"); + + // If the call site didn't exist in the CGN yet, add it. We assume that + // newly introduced call sites won't be indirect. This could be fixed + // in the future. + CallGraphNode *CalleeNode; + if (Function *Callee = CS.getCalledFunction()) + CalleeNode = CG.getOrInsertFunction(Callee); + else + CalleeNode = CG.getCallsExternalNode(); + + CGN->addCalledFunction(CS, CalleeNode); + MadeChange = true; + } + + // After scanning this function, if we still have entries in callsites, then + // they are dangling pointers. WeakVH should save us for this, so abort if + // this happens. + assert(CallSites.empty() && "Dangling pointers found in call sites map"); + + // Periodically do an explicit clear to remove tombstones when processing + // large scc's. + if ((sccidx & 15) == 0) + CallSites.clear(); + } + + DEBUG(if (MadeChange) { + errs() << "CGSCCPASSMGR: Refreshed SCC is now:\n"; + for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) + CurSCC[i]->dump(); + } else { + errs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n"; + } + ); +} + /// run - Execute all of the passes scheduled for execution. Keep track of /// whether any of the passes modifies the module, and if so, return true. bool CGPassManager::runOnModule(Module &M) { CallGraph &CG = getAnalysis(); bool Changed = doInitialization(CG); - // Walk SCC - for (scc_iterator I = scc_begin(&CG), E = scc_end(&CG); - I != E; ++I) { - - // Run all passes on current SCC - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - Pass *P = getContainedPass(Index); - - dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, ""); + std::vector CurSCC; + + // Walk the callgraph in bottom-up SCC order. + for (scc_iterator CGI = scc_begin(&CG), E = scc_end(&CG); + CGI != E;) { + // Copy the current SCC and increment past it so that the pass can hack + // on the SCC if it wants to without invalidating our iterator. + CurSCC = *CGI; + ++CGI; + + + // CallGraphUpToDate - Keep track of whether the callgraph is known to be + // up-to-date or not. The CGSSC pass manager runs two types of passes: + // CallGraphSCC Passes and other random function passes. Because other + // random function passes are not CallGraph aware, they may clobber the + // call graph by introducing new calls or deleting other ones. This flag + // is set to false when we run a function pass so that we know to clean up + // the callgraph when we need to run a CGSCCPass again. + bool CallGraphUpToDate = true; + + // Run all passes on current SCC. + for (unsigned PassNo = 0, e = getNumContainedPasses(); + PassNo != e; ++PassNo) { + Pass *P = getContainedPass(PassNo); + + // If we're in -debug-pass=Executions mode, construct the SCC node list, + // otherwise avoid constructing this string as it is expensive. + if (isPassDebuggingExecutionsOrMore()) { + std::string Functions; +#ifndef NDEBUG + raw_string_ostream OS(Functions); + for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) { + if (i) OS << ", "; + CurSCC[i]->print(OS); + } + OS.flush(); +#endif + dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions); + } dumpRequiredSet(P); initializeAnalysisImpl(P); - StartPassTimer(P); - if (CallGraphSCCPass *CGSP = dynamic_cast(P)) - Changed |= CGSP->runOnSCC(*I); // TODO : What if CG is changed ? - else { - FPPassManager *FPP = dynamic_cast(P); - assert (FPP && "Invalid CGPassManager member"); - - // Run pass P on all functions current SCC - std::vector &SCC = *I; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) { - Function *F = SCC[i]->getFunction(); - if (F) { - dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getNameStart()); - Changed |= FPP->runOnFunction(*F); - } - } - } - StopPassTimer(P); + // Actually run this pass on the current SCC. + Changed |= RunPassOnSCC(P, CurSCC, CG, CallGraphUpToDate); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, ""); @@ -125,6 +347,11 @@ bool CGPassManager::runOnModule(Module &M) { recordAvailableAnalysis(P); removeDeadPasses(P, "", ON_CG_MSG); } + + // If the callgraph was left out of date (because the last pass run was a + // functionpass), refresh it before we move on to the next SCC. + if (!CallGraphUpToDate) + RefreshCallGraph(CurSCC, CG, false); } Changed |= doFinalization(CG); return Changed; diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index 920ee374555f7..c4fb0b9a4e3dd 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -92,13 +92,12 @@ bool FindUsedTypes::runOnModule(Module &m) { // passed in, then the types are printed symbolically if possible, using the // symbol table from the module. // -void FindUsedTypes::print(std::ostream &OS, const Module *M) const { - raw_os_ostream RO(OS); - RO << "Types in use by this module:\n"; +void FindUsedTypes::print(raw_ostream &OS, const Module *M) const { + OS << "Types in use by this module:\n"; for (std::set::const_iterator I = UsedTypes.begin(), E = UsedTypes.end(); I != E; ++I) { - RO << " "; - WriteTypeSymbolic(RO, *I, M); - RO << '\n'; + OS << " "; + WriteTypeSymbolic(OS, *I, M); + OS << '\n'; } } diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 2e9884aa01b40..f5c1108412924 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -23,6 +23,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InstIterator.h" @@ -236,6 +237,9 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } } else if (GetElementPtrInst *GEP = dyn_cast(*UI)) { if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; + } else if (BitCastInst *BCI = dyn_cast(*UI)) { + if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) + return true; } else if (CallInst *CI = dyn_cast(*UI)) { // Make sure that this is just the function being called, not that it is // passing into the function. @@ -299,7 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Check the value being stored. Value *Ptr = SI->getOperand(0)->getUnderlyingObject(); - if (isa(Ptr)) { + if (isa(Ptr) || isMalloc(Ptr)) { // Okay, easy case. } else if (CallInst *CI = dyn_cast(Ptr)) { Function *F = CI->getCalledFunction(); @@ -435,7 +439,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { if (cast(*II).isVolatile()) // Treat volatile stores as reading memory somewhere. FunctionEffect |= Ref; - } else if (isa(*II) || isa(*II)) { + } else if (isa(*II) || isa(*II) || + isMalloc(&cast(*II))) { FunctionEffect |= ModRef; } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index caeb14bef3739..543e017fc9dd7 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -19,7 +19,6 @@ #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/ADT/STLExtras.h" @@ -39,7 +38,7 @@ Pass *llvm::createIVUsersPass() { /// containsAddRecFromDifferentLoop - Determine whether expression S involves a /// subexpression that is an AddRec from a loop other than L. An outer loop /// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { +static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { // This is very common, put it first. if (isa(S)) return false; @@ -54,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfoBase::isNotAlreadyContainedIn(L, newLoop)) + if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) return false; } return true; @@ -80,10 +79,10 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { /// a mix of loop invariant and loop variant expressions. The start cannot, /// however, contain an AddRec from a different loop, unless that loop is an /// outer loop of the current loop. -static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop, - const SCEV* &Start, const SCEV* &Stride, +static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, + const SCEV *&Start, const SCEV *&Stride, ScalarEvolution *SE, DominatorTree *DT) { - const SCEV* TheAddRec = Start; // Initialize to zero. + const SCEV *TheAddRec = Start; // Initialize to zero. // If the outer level is an AddExpr, the operands are all start values except // for a nested AddRecExpr. @@ -109,9 +108,9 @@ static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop, // Use getSCEVAtScope to attempt to simplify other loops out of // the picture. - const SCEV* AddRecStart = AddRec->getStart(); + const SCEV *AddRecStart = AddRec->getStart(); AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - const SCEV* AddRecStride = AddRec->getStepRecurrence(*SE); + const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE); // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other // than an outer loop of the current loop, reject it. LSR has no concept of @@ -122,15 +121,15 @@ static bool getSCEVStartAndStride(const SCEV* &SH, Loop *L, Loop *UseLoop, Start = SE->getAddExpr(Start, AddRecStart); - // If stride is an instruction, make sure it dominates the loop preheader. + // If stride is an instruction, make sure it properly dominates the header. // Otherwise we could end up with a use before def situation. if (!isa(AddRecStride)) { - BasicBlock *Preheader = L->getLoopPreheader(); - if (!AddRecStride->dominates(Preheader, DT)) + BasicBlock *Header = L->getHeader(); + if (!AddRecStride->properlyDominates(Header, DT)) return false; - DOUT << "[" << L->getHeader()->getName() - << "] Variable stride: " << *AddRec << "\n"; + DEBUG(errs() << "[" << L->getHeader()->getName() + << "] Variable stride: " << *AddRec << "\n"); } Stride = AddRecStride; @@ -196,13 +195,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return true; // Instruction already handled. // Get the symbolic expression for this instruction. - const SCEV* ISE = SE->getSCEV(I); + const SCEV *ISE = SE->getSCEV(I); if (isa(ISE)) return false; // Get the start and stride for this expression. Loop *UseLoop = LI->getLoopFor(I->getParent()); - const SCEV* Start = SE->getIntegerSCEV(0, ISE->getType()); - const SCEV* Stride = Start; + const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType()); + const SCEV *Stride = Start; if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) return false; // Non-reducible symbolic expression, bail out. @@ -228,14 +227,14 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (LI->getLoopFor(User->getParent()) != L) { if (isa(User) || Processed.count(User) || !AddUsersIfInteresting(User)) { - DOUT << "FOUND USER in other loop: " << *User - << " OF SCEV: " << *ISE << "\n"; + DEBUG(errs() << "FOUND USER in other loop: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } } else if (Processed.count(User) || !AddUsersIfInteresting(User)) { - DOUT << "FOUND USER: " << *User - << " OF SCEV: " << *ISE << "\n"; + DEBUG(errs() << "FOUND USER: " << *User << '\n' + << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } @@ -254,10 +253,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) { // The value used will be incremented by the stride more than we are // expecting, so subtract this off. - const SCEV* NewStart = SE->getMinusSCEV(Start, Stride); + const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); StrideUses->addUser(NewStart, User, I); StrideUses->Users.back().setIsUseOfPostIncrementedValue(true); - DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n"; + DEBUG(errs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); } else { StrideUses->addUser(Start, User, I); } @@ -295,9 +294,9 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. -const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const { +const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { // Start with zero. - const SCEV* RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); + const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType()); // Create the basic add recurrence. RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L); // Add the offset in a separate step, because it may be loop-variant. @@ -308,7 +307,7 @@ const SCEV* IVUsers::getReplacementExpr(const IVStrideUse &U) const { RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); // Evaluate the expression out of the loop, if possible. if (!L->contains(U.getUser()->getParent())) { - const SCEV* ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); + const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); if (ExitVal->isLoopInvariant(L)) RetVal = ExitVal; } @@ -325,7 +324,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << ":\n"; for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) { - std::map::const_iterator SI = + std::map::const_iterator SI = IVUsesByStride.find(StrideOrder[Stride]); assert(SI != IVUsesByStride.end() && "Stride doesn't exist!"); OS << " Stride " << *SI->first->getType() << " " << *SI->first << ":\n"; @@ -340,15 +339,11 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << " (post-inc)"; OS << " in "; UI->getUser()->print(OS); + OS << '\n'; } } } -void IVUsers::print(std::ostream &o, const Module *M) const { - raw_os_ostream OS(o); - print(OS, M); -} - void IVUsers::dump() const { print(errs()); } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp new file mode 100644 index 0000000000000..3b0d2c90aeb5f --- /dev/null +++ b/lib/Analysis/InlineCost.cpp @@ -0,0 +1,338 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Support/CallSite.h" +#include "llvm/CallingConv.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/SmallPtrSet.h" +using namespace llvm; + +// CountCodeReductionForConstant - Figure out an approximation for how many +// instructions will be constant folded if the specified value is constant. +// +unsigned InlineCostAnalyzer::FunctionInfo:: + CountCodeReductionForConstant(Value *V) { + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) + if (isa(*UI)) + Reduction += 40; // Eliminating a conditional branch is a big win + else if (SwitchInst *SI = dyn_cast(*UI)) + // Eliminating a switch is a big win, proportional to the number of edges + // deleted. + Reduction += (SI->getNumSuccessors()-1) * 40; + else if (CallInst *CI = dyn_cast(*UI)) { + // Turning an indirect call into a direct call is a BIG win + Reduction += CI->getCalledValue() == V ? 500 : 0; + } else if (InvokeInst *II = dyn_cast(*UI)) { + // Turning an indirect call into a direct call is a BIG win + Reduction += II->getCalledValue() == V ? 500 : 0; + } else { + // Figure out if this instruction will be removed due to simple constant + // propagation. + Instruction &Inst = cast(**UI); + + // We can't constant propagate instructions which have effects or + // read memory. + // + // FIXME: It would be nice to capture the fact that a load from a + // pointer-to-constant-global is actually a *really* good thing to zap. + // Unfortunately, we don't know the pointer that may get propagated here, + // so we can't make this decision. + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa(Inst)) + continue; + + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + // We will get to remove this instruction... + Reduction += 7; + + // And any other instructions that use it which become constants + // themselves. + Reduction += CountCodeReductionForConstant(&Inst); + } + } + + return Reduction; +} + +// CountCodeReductionForAlloca - Figure out an approximation of how much smaller +// the function will be if it is inlined into a context where an argument +// becomes an alloca. +// +unsigned InlineCostAnalyzer::FunctionInfo:: + CountCodeReductionForAlloca(Value *V) { + if (!isa(V->getType())) return 0; // Not a pointer + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + Instruction *I = cast(*UI); + if (isa(I) || isa(I)) + Reduction += 10; + else if (GetElementPtrInst *GEP = dyn_cast(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (!GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP)+15; + } else { + // If there is some other strange instruction, we're not going to be able + // to do much if we inline this. + return 0; + } + } + + return Reduction; +} + +/// analyzeBasicBlock - Fill in the current structure with information gleaned +/// from the specified block. +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { + ++NumBlocks; + + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isa(II)) continue; // PHI nodes don't count. + + // Special handling for calls. + if (isa(II) || isa(II)) { + if (isa(II)) + continue; // Debug intrinsics don't count as size. + + CallSite CS = CallSite::get(const_cast(&*II)); + + // If this function contains a call to setjmp or _setjmp, never inline + // it. This is a hack because we depend on the user marking their local + // variables as volatile if they are live across a setjmp call, and they + // probably won't do this in callers. + if (Function *F = CS.getCalledFunction()) + if (F->isDeclaration() && + (F->getName() == "setjmp" || F->getName() == "_setjmp")) + NeverInline = true; + + // Calls often compile into many machine instructions. Bump up their + // cost to reflect this. + if (!isa(II)) + NumInsts += InlineConstants::CallPenalty; + } + + // These, too, are calls. + if (isa(II) || isa(II)) + NumInsts += InlineConstants::CallPenalty; + + if (const AllocaInst *AI = dyn_cast(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa(II) || isa(II->getType())) + ++NumVectorInsts; + + // Noop casts, including ptr <-> int, don't count. + if (const CastInst *CI = dyn_cast(II)) { + if (CI->isLosslessCast() || isa(CI) || + isa(CI)) + continue; + } else if (const GetElementPtrInst *GEPI = + dyn_cast(II)) { + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (GEPI->hasAllConstantIndices()) + continue; + } + + if (isa(II)) + ++NumRets; + + ++NumInsts; + } +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void CodeMetrics::analyzeFunction(Function *F) { + // Look at the size of the callee. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + analyzeBasicBlock(&*BB); +} + +/// analyzeFunction - Fill in the current structure with information gleaned +/// from the specified function. +void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { + Metrics.analyzeFunction(F); + + // A function with exactly one return has it removed during the inlining + // process (see InlineFunction), so don't count it. + // FIXME: This knowledge should really be encoded outside of FunctionInfo. + if (Metrics.NumRets==1) + --Metrics.NumInsts; + + // Check out all of the arguments to the function, figuring out how much + // code can be eliminated if one of the arguments is a constant. + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) + ArgumentWeights.push_back(ArgInfo(CountCodeReductionForConstant(I), + CountCodeReductionForAlloca(I))); +} + +// getInlineCost - The heuristic used to determine if we should inline the +// function call or not. +// +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, + SmallPtrSet &NeverInline) { + Instruction *TheCall = CS.getInstruction(); + Function *Callee = CS.getCalledFunction(); + Function *Caller = TheCall->getParent()->getParent(); + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline. + if (Callee->mayBeOverridden() || + Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee)) + return llvm::InlineCost::getNever(); + + // InlineCost - This value measures how good of an inline candidate this call + // site is to inline. A lower inline cost make is more likely for the call to + // be inlined. This value may go negative. + // + int InlineCost = 0; + + // If there is only one call of the function, and it has internal linkage, + // make it almost guaranteed to be inlined. + // + if (Callee->hasLocalLinkage() && Callee->hasOneUse()) + InlineCost += InlineConstants::LastCallToStaticBonus; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (Callee->getCallingConv() == CallingConv::Cold) + InlineCost += InlineConstants::ColdccPenalty; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this. + if (InvokeInst *II = dyn_cast(TheCall)) { + if (isa(II->getNormalDest()->begin())) + InlineCost += InlineConstants::NoreturnPenalty; + } else if (isa(++BasicBlock::iterator(TheCall))) + InlineCost += InlineConstants::NoreturnPenalty; + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.Metrics.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + // If we should never inline this, return a huge cost. + if (CalleeFI.Metrics.NeverInline) + return InlineCost::getNever(); + + // FIXME: It would be nice to kill off CalleeFI.NeverInline. Then we + // could move this up and avoid computing the FunctionInfo for + // things we are going to just return always inline for. This + // requires handling setjmp somewhere else, however. + if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) + return InlineCost::getAlways(); + + if (CalleeFI.Metrics.usesDynamicAlloca) { + // Get infomation about the caller... + FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; + + // If we haven't calculated this information yet, do so now. + if (CallerFI.Metrics.NumBlocks == 0) + CallerFI.analyzeFunction(Caller); + + // Don't inline a callee with dynamic alloca into a caller without them. + // Functions containing dynamic alloca's are inefficient in various ways; + // don't create more inefficiency. + if (!CallerFI.Metrics.usesDynamicAlloca) + return InlineCost::getNever(); + } + + // Add to the inline quality for properties that make the call valuable to + // inline. This includes factors that indicate that the result of inlining + // the function will be optimizable. Currently this just looks at arguments + // passed into the function. + // + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I, ++ArgNo) { + // Each argument passed in has a cost at both the caller and the callee + // sides. This favors functions that take many arguments over functions + // that take few arguments. + InlineCost -= 20; + + // If this is a function being passed in, it is very likely that we will be + // able to turn an indirect function call into a direct function call. + if (isa(I)) + InlineCost -= 100; + + // If an alloca is passed in, inlining this function is likely to allow + // significant future optimization possibilities (like scalar promotion, and + // scalarization), so encourage the inlining of the function. + // + else if (isa(I)) { + if (ArgNo < CalleeFI.ArgumentWeights.size()) + InlineCost -= CalleeFI.ArgumentWeights[ArgNo].AllocaWeight; + + // If this is a constant being passed into the function, use the argument + // weights calculated for the callee to determine how much will be folded + // away with this information. + } else if (isa(I)) { + if (ArgNo < CalleeFI.ArgumentWeights.size()) + InlineCost -= CalleeFI.ArgumentWeights[ArgNo].ConstantWeight; + } + } + + // Now that we have considered all of the factors that make the call site more + // likely to be inlined, look at factors that make us not want to inline it. + + // Don't inline into something too big, which would make it bigger. + // "size" here is the number of basic blocks, not instructions. + // + InlineCost += Caller->size()/15; + + // Look at the size of the callee. Each instruction counts as 5. + InlineCost += CalleeFI.Metrics.NumInsts*5; + + return llvm::InlineCost::get(InlineCost); +} + +// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a +// higher threshold to determine if the function call should be inlined. +float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + + // Get information about the callee... + FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; + + // If we haven't calculated this information yet, do so now. + if (CalleeFI.Metrics.NumBlocks == 0) + CalleeFI.analyzeFunction(Callee); + + float Factor = 1.0f; + // Single BB functions are often written to be inlined. + if (CalleeFI.Metrics.NumBlocks == 1) + Factor += 0.5f; + + // Be more aggressive if the function contains a good chunk (if it mades up + // at least 10% of the instructions) of vector instructions. + if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) + Factor += 2.0f; + else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) + Factor += 1.5f; + return Factor; +} diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 2b34ad3b070de..83724caf52106 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -16,8 +16,9 @@ #include "llvm/Pass.h" #include "llvm/Function.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstVisitor.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -46,8 +47,8 @@ namespace { #include "llvm/Instruction.def" void visitInstruction(Instruction &I) { - cerr << "Instruction Count does not know about " << I; - abort(); + errs() << "Instruction Count does not know about " << I; + llvm_unreachable(0); } public: static char ID; // Pass identification, replacement for typeid @@ -58,7 +59,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } - virtual void print(std::ostream &O, const Module *M) const {} + virtual void print(raw_ostream &O, const Module *M) const {} }; } diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp index 16b1947230714..ca9cdcaf24648 100644 --- a/lib/Analysis/Interval.cpp +++ b/lib/Analysis/Interval.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/Interval.h" #include "llvm/BasicBlock.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -29,29 +30,29 @@ bool Interval::isLoop() const { // There is a loop in this interval iff one of the predecessors of the header // node lives in the interval. for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode); - I != E; ++I) { - if (contains(*I)) return true; - } + I != E; ++I) + if (contains(*I)) + return true; return false; } -void Interval::print(std::ostream &o) const { - o << "-------------------------------------------------------------\n" +void Interval::print(raw_ostream &OS) const { + OS << "-------------------------------------------------------------\n" << "Interval Contents:\n"; // Print out all of the basic blocks in the interval... for (std::vector::const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) - o << **I << "\n"; + OS << **I << "\n"; - o << "Interval Predecessors:\n"; + OS << "Interval Predecessors:\n"; for (std::vector::const_iterator I = Predecessors.begin(), E = Predecessors.end(); I != E; ++I) - o << **I << "\n"; + OS << **I << "\n"; - o << "Interval Successors:\n"; + OS << "Interval Successors:\n"; for (std::vector::const_iterator I = Successors.begin(), E = Successors.end(); I != E; ++I) - o << **I << "\n"; + OS << **I << "\n"; } diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp index cb8a85da552a3..1f17b77a5b96f 100644 --- a/lib/Analysis/IntervalPartition.cpp +++ b/lib/Analysis/IntervalPartition.cpp @@ -32,7 +32,7 @@ void IntervalPartition::releaseMemory() { RootInterval = 0; } -void IntervalPartition::print(std::ostream &O, const Module*) const { +void IntervalPartition::print(raw_ostream &O, const Module*) const { for(unsigned i = 0, e = Intervals.size(); i != e; ++i) Intervals[i]->print(O); } diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index 971e6e7accb42..7419659298902 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -16,7 +16,6 @@ #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/Function.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" using namespace llvm; // Register this pass... @@ -37,7 +36,6 @@ LibCallAliasAnalysis::~LibCallAliasAnalysis() { void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AliasAnalysis::getAnalysisUsage(AU); - AU.addRequired(); AU.setPreservesAll(); // Does not transform code } diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 29850471f7dc3..e0060c3e89b1a 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -57,9 +57,6 @@ const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const { } // Look up this function in the string map. - const char *ValueName = F->getNameStart(); - StringMap::iterator I = - Map->find(ValueName, ValueName+F->getNameLen()); - return I != Map->end() ? I->second : 0; + return Map->lookup(F->getName()); } diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp index f6057839266f2..32d22662c3415 100644 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/lib/Analysis/LoopDependenceAnalysis.cpp @@ -15,18 +15,33 @@ // // TODO: adapt as implementation progresses. // +// TODO: document lingo (pair, subscript, index) +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lda" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopDependenceAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Instructions.h" +#include "llvm/Operator.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" using namespace llvm; +STATISTIC(NumAnswered, "Number of dependence queries answered"); +STATISTIC(NumAnalysed, "Number of distinct dependence pairs analysed"); +STATISTIC(NumDependent, "Number of pairs with dependent accesses"); +STATISTIC(NumIndependent, "Number of pairs with independent accesses"); +STATISTIC(NumUnknown, "Number of pairs with unknown accesses"); + LoopPass *llvm::createLoopDependenceAnalysisPass() { return new LoopDependenceAnalysis(); } @@ -44,14 +59,14 @@ static inline bool IsMemRefInstr(const Value *V) { return I && (I->mayReadFromMemory() || I->mayWriteToMemory()); } -static void GetMemRefInstrs( - const Loop *L, SmallVectorImpl &memrefs) { +static void GetMemRefInstrs(const Loop *L, + SmallVectorImpl &Memrefs) { for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); - b != be; ++b) + b != be; ++b) for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end(); - i != ie; ++i) + i != ie; ++i) if (IsMemRefInstr(i)) - memrefs.push_back(i); + Memrefs.push_back(i); } static bool IsLoadOrStoreInst(Value *I) { @@ -63,53 +78,223 @@ static Value *GetPointerOperand(Value *I) { return i->getPointerOperand(); if (StoreInst *i = dyn_cast(I)) return i->getPointerOperand(); - assert(0 && "Value is no load or store instruction!"); + llvm_unreachable("Value is no load or store instruction!"); // Never reached. return 0; } +static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA, + const Value *A, + const Value *B) { + const Value *aObj = A->getUnderlyingObject(); + const Value *bObj = B->getUnderlyingObject(); + return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()), + bObj, AA->getTypeStoreSize(bObj->getType())); +} + +static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) { + return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L); +} + //===----------------------------------------------------------------------===// // Dependence Testing //===----------------------------------------------------------------------===// -bool LoopDependenceAnalysis::isDependencePair(const Value *x, - const Value *y) const { - return IsMemRefInstr(x) && - IsMemRefInstr(y) && - (cast(x)->mayWriteToMemory() || - cast(y)->mayWriteToMemory()); +bool LoopDependenceAnalysis::isDependencePair(const Value *A, + const Value *B) const { + return IsMemRefInstr(A) && + IsMemRefInstr(B) && + (cast(A)->mayWriteToMemory() || + cast(B)->mayWriteToMemory()); +} + +bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A, + Value *B, + DependencePair *&P) { + void *insertPos = 0; + FoldingSetNodeID id; + id.AddPointer(A); + id.AddPointer(B); + + P = Pairs.FindNodeOrInsertPos(id, insertPos); + if (P) return true; + + P = PairAllocator.Allocate(); + new (P) DependencePair(id, A, B); + Pairs.InsertNode(P, insertPos); + return false; +} + +void LoopDependenceAnalysis::getLoops(const SCEV *S, + DenseSet* Loops) const { + // Refactor this into an SCEVVisitor, if efficiency becomes a concern. + for (const Loop *L = this->L; L != 0; L = L->getParentLoop()) + if (!S->isLoopInvariant(L)) + Loops->insert(L); +} + +bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const { + DenseSet loops; + getLoops(S, &loops); + return loops.empty(); +} + +bool LoopDependenceAnalysis::isAffine(const SCEV *S) const { + const SCEVAddRecExpr *rec = dyn_cast(S); + return isLoopInvariant(S) || (rec && rec->isAffine()); +} + +bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const { + return isLoopInvariant(A) && isLoopInvariant(B); +} + +bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const { + DenseSet loops; + getLoops(A, &loops); + getLoops(B, &loops); + return loops.size() == 1; +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseZIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!"); + return A == B ? Dependent : Independent; } -bool LoopDependenceAnalysis::depends(Value *src, Value *dst) { - assert(isDependencePair(src, dst) && "Values form no dependence pair!"); - DOUT << "== LDA test ==\n" << *src << *dst; +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseSIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + return Unknown; // TODO: Implement. +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseMIV(const SCEV *A, + const SCEV *B, + Subscript *S) const { + return Unknown; // TODO: Implement. +} - // We only analyse loads and stores; for possible memory accesses by e.g. - // free, call, or invoke instructions we conservatively assume dependence. - if (!IsLoadOrStoreInst(src) || !IsLoadOrStoreInst(dst)) - return true; +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analyseSubscript(const SCEV *A, + const SCEV *B, + Subscript *S) const { + DEBUG(errs() << " Testing subscript: " << *A << ", " << *B << "\n"); - Value *srcPtr = GetPointerOperand(src); - Value *dstPtr = GetPointerOperand(dst); - const Value *srcObj = srcPtr->getUnderlyingObject(); - const Value *dstObj = dstPtr->getUnderlyingObject(); - AliasAnalysis::AliasResult alias = AA->alias( - srcObj, AA->getTargetData().getTypeStoreSize(srcObj->getType()), - dstObj, AA->getTargetData().getTypeStoreSize(dstObj->getType())); + if (A == B) { + DEBUG(errs() << " -> [D] same SCEV\n"); + return Dependent; + } - // If we don't know whether or not the two objects alias, assume dependence. - if (alias == AliasAnalysis::MayAlias) - return true; + if (!isAffine(A) || !isAffine(B)) { + DEBUG(errs() << " -> [?] not affine\n"); + return Unknown; + } - // If the objects noalias, they are distinct, accesses are independent. - if (alias == AliasAnalysis::NoAlias) - return false; + if (isZIVPair(A, B)) + return analyseZIV(A, B, S); - // TODO: the underlying objects MustAlias, test for dependence + if (isSIVPair(A, B)) + return analyseSIV(A, B, S); - // We couldn't establish a more precise result, so we have to conservatively - // assume full dependence. - return true; + return analyseMIV(A, B, S); +} + +LoopDependenceAnalysis::DependenceResult +LoopDependenceAnalysis::analysePair(DependencePair *P) const { + DEBUG(errs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n"); + + // We only analyse loads and stores but no possible memory accesses by e.g. + // free, call, or invoke instructions. + if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) { + DEBUG(errs() << "--> [?] no load/store\n"); + return Unknown; + } + + Value *aPtr = GetPointerOperand(P->A); + Value *bPtr = GetPointerOperand(P->B); + + switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) { + case AliasAnalysis::MayAlias: + // We can not analyse objects if we do not know about their aliasing. + DEBUG(errs() << "---> [?] may alias\n"); + return Unknown; + + case AliasAnalysis::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + DEBUG(errs() << "---> [I] no alias\n"); + return Independent; + + case AliasAnalysis::MustAlias: + break; // The underlying objects alias, test accesses for dependence. + } + + const GEPOperator *aGEP = dyn_cast(aPtr); + const GEPOperator *bGEP = dyn_cast(bPtr); + + if (!aGEP || !bGEP) + return Unknown; + + // FIXME: Is filtering coupled subscripts necessary? + + // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding + // trailing zeroes to the smaller GEP, if needed. + typedef SmallVector, 4> GEPOpdPairsTy; + GEPOpdPairsTy opds; + for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(), + aEnd = aGEP->idx_end(), + bIdx = bGEP->idx_begin(), + bEnd = bGEP->idx_end(); + aIdx != aEnd && bIdx != bEnd; + aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) { + const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE); + const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE); + opds.push_back(std::make_pair(aSCEV, bSCEV)); + } + + if (!opds.empty() && opds[0].first != opds[0].second) { + // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting + // + // TODO: this could be relaxed by adding the size of the underlying object + // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we + // know that x is a [100 x i8]*, we could modify the first subscript to be + // (i, 200-i) instead of (i, -i). + return Unknown; + } + + // Now analyse the collected operand pairs (skipping the GEP ptr offsets). + for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end(); + i != end; ++i) { + Subscript subscript; + DependenceResult result = analyseSubscript(i->first, i->second, &subscript); + if (result != Dependent) { + // We either proved independence or failed to analyse this subscript. + // Further subscripts will not improve the situation, so abort early. + return result; + } + P->Subscripts.push_back(subscript); + } + // We successfully analysed all subscripts but failed to prove independence. + return Dependent; +} + +bool LoopDependenceAnalysis::depends(Value *A, Value *B) { + assert(isDependencePair(A, B) && "Values form no dependence pair!"); + ++NumAnswered; + + DependencePair *p; + if (!findOrInsertDependencePair(A, B, p)) { + // The pair is not cached, so analyse it. + ++NumAnalysed; + switch (p->Result = analysePair(p)) { + case Dependent: ++NumDependent; break; + case Independent: ++NumIndependent; break; + case Unknown: ++NumUnknown; break; + } + } + return p->Result != Independent; } //===----------------------------------------------------------------------===// @@ -123,14 +308,19 @@ bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) { return false; } +void LoopDependenceAnalysis::releaseMemory() { + Pairs.clear(); + PairAllocator.Reset(); +} + void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); } -static void PrintLoopInfo( - raw_ostream &OS, LoopDependenceAnalysis *LDA, const Loop *L) { +static void PrintLoopInfo(raw_ostream &OS, + LoopDependenceAnalysis *LDA, const Loop *L) { if (!L->empty()) return; // ignore non-innermost loops SmallVector memrefs; @@ -142,14 +332,14 @@ static void PrintLoopInfo( OS << " Load/store instructions: " << memrefs.size() << "\n"; for (SmallVector::const_iterator x = memrefs.begin(), - end = memrefs.end(); x != end; ++x) - OS << "\t" << (x - memrefs.begin()) << ": " << **x; + end = memrefs.end(); x != end; ++x) + OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n"; OS << " Pairwise dependence results:\n"; for (SmallVector::const_iterator x = memrefs.begin(), - end = memrefs.end(); x != end; ++x) + end = memrefs.end(); x != end; ++x) for (SmallVector::const_iterator y = x + 1; - y != end; ++y) + y != end; ++y) if (LDA->isDependencePair(*x, *y)) OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin()) << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent") @@ -160,8 +350,3 @@ void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const { // TODO: doc why const_cast is safe PrintLoopInfo(OS, const_cast(this), this->L); } - -void LoopDependenceAnalysis::print(std::ostream &OS, const Module *M) const { - raw_os_ostream os(OS); - print(os, M); -} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index bb535894efabd..ce2d29f331b6b 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -20,12 +20,22 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/CommandLine.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include using namespace llvm; +// Always verify loopinfo if expensive checking is enabled. +#ifdef XDEBUG +bool VerifyLoopInfo = true; +#else +bool VerifyLoopInfo = false; +#endif +static cl::opt +VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), + cl::desc("Verify loop info (time consuming)")); + char LoopInfo::ID = 0; static RegisterPass X("loops", "Natural Loop Information", true, true); @@ -34,6 +44,338 @@ X("loops", "Natural Loop Information", true, true); // Loop implementation // +/// isLoopInvariant - Return true if the specified value is loop invariant +/// +bool Loop::isLoopInvariant(Value *V) const { + if (Instruction *I = dyn_cast(V)) + return isLoopInvariant(I); + return true; // All non-instructions are loop invariant +} + +/// isLoopInvariant - Return true if the specified instruction is +/// loop-invariant. +/// +bool Loop::isLoopInvariant(Instruction *I) const { + return !contains(I->getParent()); +} + +/// makeLoopInvariant - If the given value is an instruciton inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the value after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Value *V, bool &Changed, + Instruction *InsertPt) const { + if (Instruction *I = dyn_cast(V)) + return makeLoopInvariant(I, Changed, InsertPt); + return true; // All non-instructions are loop-invariant. +} + +/// makeLoopInvariant - If the given instruction is inside of the +/// loop and it can be hoisted, do so to make it trivially loop-invariant. +/// Return true if the instruction after any hoisting is loop invariant. This +/// function can be used as a slightly more aggressive replacement for +/// isLoopInvariant. +/// +/// If InsertPt is specified, it is the point to hoist instructions to. +/// If null, the terminator of the loop preheader is used. +/// +bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, + Instruction *InsertPt) const { + // Test if the value is already loop-invariant. + if (isLoopInvariant(I)) + return true; + if (!I->isSafeToSpeculativelyExecute()) + return false; + if (I->mayReadFromMemory()) + return false; + // Determine the insertion point, unless one was given. + if (!InsertPt) { + BasicBlock *Preheader = getLoopPreheader(); + // Without a preheader, hoisting is not feasible. + if (!Preheader) + return false; + InsertPt = Preheader->getTerminator(); + } + // Don't hoist instructions with loop-variant operands. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt)) + return false; + // Hoist. + I->moveBefore(InsertPt); + Changed = true; + return true; +} + +/// getCanonicalInductionVariable - Check to see if the loop has a canonical +/// induction variable: an integer recurrence that starts at 0 and increments +/// by one each time through the loop. If so, return the phi node that +/// corresponds to it. +/// +/// The IndVarSimplify pass transforms loops to have a canonical induction +/// variable. +/// +PHINode *Loop::getCanonicalInductionVariable() const { + BasicBlock *H = getHeader(); + + BasicBlock *Incoming = 0, *Backedge = 0; + typedef GraphTraits > InvBlockTraits; + InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(H); + assert(PI != InvBlockTraits::child_end(H) && + "Loop must have at least one backedge!"); + Backedge = *PI++; + if (PI == InvBlockTraits::child_end(H)) return 0; // dead loop + Incoming = *PI++; + if (PI != InvBlockTraits::child_end(H)) return 0; // multiple backedges? + + if (contains(Incoming)) { + if (contains(Backedge)) + return 0; + std::swap(Incoming, Backedge); + } else if (!contains(Backedge)) + return 0; + + // Loop over all of the PHI nodes, looking for a canonical indvar. + for (BasicBlock::iterator I = H->begin(); isa(I); ++I) { + PHINode *PN = cast(I); + if (ConstantInt *CI = + dyn_cast(PN->getIncomingValueForBlock(Incoming))) + if (CI->isNullValue()) + if (Instruction *Inc = + dyn_cast(PN->getIncomingValueForBlock(Backedge))) + if (Inc->getOpcode() == Instruction::Add && + Inc->getOperand(0) == PN) + if (ConstantInt *CI = dyn_cast(Inc->getOperand(1))) + if (CI->equalsInt(1)) + return PN; + } + return 0; +} + +/// getCanonicalInductionVariableIncrement - Return the LLVM value that holds +/// the canonical induction variable value for the "next" iteration of the +/// loop. This always succeeds if getCanonicalInductionVariable succeeds. +/// +Instruction *Loop::getCanonicalInductionVariableIncrement() const { + if (PHINode *PN = getCanonicalInductionVariable()) { + bool P1InLoop = contains(PN->getIncomingBlock(1)); + return cast(PN->getIncomingValue(P1InLoop)); + } + return 0; +} + +/// getTripCount - Return a loop-invariant LLVM value indicating the number of +/// times the loop will be executed. Note that this means that the backedge +/// of the loop executes N-1 times. If the trip-count cannot be determined, +/// this returns null. +/// +/// The IndVarSimplify pass transforms loops to have a form that this +/// function easily understands. +/// +Value *Loop::getTripCount() const { + // Canonical loops will end with a 'cmp ne I, V', where I is the incremented + // canonical induction variable and V is the trip count of the loop. + Instruction *Inc = getCanonicalInductionVariableIncrement(); + if (Inc == 0) return 0; + PHINode *IV = cast(Inc->getOperand(0)); + + BasicBlock *BackedgeBlock = + IV->getIncomingBlock(contains(IV->getIncomingBlock(1))); + + if (BranchInst *BI = dyn_cast(BackedgeBlock->getTerminator())) + if (BI->isConditional()) { + if (ICmpInst *ICI = dyn_cast(BI->getCondition())) { + if (ICI->getOperand(0) == Inc) { + if (BI->getSuccessor(0) == getHeader()) { + if (ICI->getPredicate() == ICmpInst::ICMP_NE) + return ICI->getOperand(1); + } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { + return ICI->getOperand(1); + } + } + } + } + + return 0; +} + +/// getSmallConstantTripCount - Returns the trip count of this loop as a +/// normal unsigned value, if possible. Returns 0 if the trip count is unknown +/// of not constant. Will also return 0 if the trip count is very large +/// (>= 2^32) +unsigned Loop::getSmallConstantTripCount() const { + Value* TripCount = this->getTripCount(); + if (TripCount) { + if (ConstantInt *TripCountC = dyn_cast(TripCount)) { + // Guard against huge trip counts. + if (TripCountC->getValue().getActiveBits() <= 32) { + return (unsigned)TripCountC->getZExtValue(); + } + } + } + return 0; +} + +/// getSmallConstantTripMultiple - Returns the largest constant divisor of the +/// trip count of this loop as a normal unsigned value, if possible. This +/// means that the actual trip count is always a multiple of the returned +/// value (don't forget the trip count could very well be zero as well!). +/// +/// Returns 1 if the trip count is unknown or not guaranteed to be the +/// multiple of a constant (which is also the case if the trip count is simply +/// constant, use getSmallConstantTripCount for that case), Will also return 1 +/// if the trip count is very large (>= 2^32). +unsigned Loop::getSmallConstantTripMultiple() const { + Value* TripCount = this->getTripCount(); + // This will hold the ConstantInt result, if any + ConstantInt *Result = NULL; + if (TripCount) { + // See if the trip count is constant itself + Result = dyn_cast(TripCount); + // if not, see if it is a multiplication + if (!Result) + if (BinaryOperator *BO = dyn_cast(TripCount)) { + switch (BO->getOpcode()) { + case BinaryOperator::Mul: + Result = dyn_cast(BO->getOperand(1)); + break; + default: + break; + } + } + } + // Guard against huge trip counts. + if (Result && Result->getValue().getActiveBits() <= 32) { + return (unsigned)Result->getZExtValue(); + } else { + return 1; + } +} + +/// isLCSSAForm - Return true if the Loop is in LCSSA form +bool Loop::isLCSSAForm() const { + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallPtrSet LoopBBs(block_begin(), block_end()); + + for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { + BasicBlock *BB = *BI; + for (BasicBlock ::iterator I = BB->begin(), E = BB->end(); I != E;++I) + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + BasicBlock *UserBB = cast(*UI)->getParent(); + if (PHINode *P = dyn_cast(*UI)) { + UserBB = P->getIncomingBlock(UI); + } + + // Check the current block, as a fast-path. Most values are used in + // the same block they are defined in. + if (UserBB != BB && !LoopBBs.count(UserBB)) + return false; + } + } + + return true; +} + +/// isLoopSimplifyForm - Return true if the Loop is in the form that +/// the LoopSimplify form transforms loops to, which is sometimes called +/// normal form. +bool Loop::isLoopSimplifyForm() const { + // Normal-form loops have a preheader. + if (!getLoopPreheader()) + return false; + // Normal-form loops have a single backedge. + if (!getLoopLatch()) + return false; + // Each predecessor of each exit block of a normal loop is contained + // within the loop. + SmallVector ExitBlocks; + getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + for (pred_iterator PI = pred_begin(ExitBlocks[i]), + PE = pred_end(ExitBlocks[i]); PI != PE; ++PI) + if (!contains(*PI)) + return false; + // All the requirements are met. + return true; +} + +/// getUniqueExitBlocks - Return all unique successor blocks of this loop. +/// These are the blocks _outside of the current loop_ which are branched to. +/// This assumes that loop is in canonical form. +/// +void +Loop::getUniqueExitBlocks(SmallVectorImpl &ExitBlocks) const { + assert(isLoopSimplifyForm() && + "getUniqueExitBlocks assumes the loop is in canonical form!"); + + // Sort the blocks vector so that we can use binary search to do quick + // lookups. + SmallVector LoopBBs(block_begin(), block_end()); + std::sort(LoopBBs.begin(), LoopBBs.end()); + + SmallVector switchExitBlocks; + + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { + + BasicBlock *current = *BI; + switchExitBlocks.clear(); + + typedef GraphTraits BlockTraits; + typedef GraphTraits > InvBlockTraits; + for (BlockTraits::ChildIteratorType I = + BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); + I != E; ++I) { + // If block is inside the loop then it is not a exit block. + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + continue; + + InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(*I); + BasicBlock *firstPred = *PI; + + // If current basic block is this exit block's first predecessor + // then only insert exit block in to the output ExitBlocks vector. + // This ensures that same exit block is not inserted twice into + // ExitBlocks vector. + if (current != firstPred) + continue; + + // If a terminator has more then two successors, for example SwitchInst, + // then it is possible that there are multiple edges from current block + // to one exit block. + if (std::distance(BlockTraits::child_begin(current), + BlockTraits::child_end(current)) <= 2) { + ExitBlocks.push_back(*I); + continue; + } + + // In case of multiple edges from current block to exit block, collect + // only one edge in ExitBlocks. Use switchExitBlocks to keep track of + // duplicate edges. + if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I) + == switchExitBlocks.end()) { + switchExitBlocks.push_back(*I); + ExitBlocks.push_back(*I); + } + } + } +} + +/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one +/// block, return that block. Otherwise return null. +BasicBlock *Loop::getUniqueExitBlock() const { + SmallVector UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + if (UniqueExitBlocks.size() == 1) + return UniqueExitBlocks[0]; + return 0; +} + //===----------------------------------------------------------------------===// // LoopInfo implementation // @@ -43,7 +385,29 @@ bool LoopInfo::runOnFunction(Function &) { return false; } +void LoopInfo::verifyAnalysis() const { + // LoopInfo is a FunctionPass, but verifying every loop in the function + // each time verifyAnalysis is called is very expensive. The + // -verify-loop-info option can enable this. In order to perform some + // checking by default, LoopPass has been taught to call verifyLoop + // manually during loop pass sequences. + + if (!VerifyLoopInfo) return; + + for (iterator I = begin(), E = end(); I != E; ++I) { + assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); + (*I)->verifyLoopNest(); + } + + // TODO: check BBMap consistency. +} + void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); } + +void LoopInfo::print(raw_ostream &OS, const Module*) const { + LI.print(OS); +} + diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index ee03556f27412..43463cd8ef1cc 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -21,7 +21,6 @@ using namespace llvm; // char LPPassManager::ID = 0; -/// LPPassManager manages FPPassManagers and CalLGraphSCCPasses. LPPassManager::LPPassManager(int Depth) : FunctionPass(&ID), PMDataManager(Depth) { @@ -111,17 +110,21 @@ void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { else LI->addTopLevelLoop(L); + insertLoopIntoQueue(L); +} + +void LPPassManager::insertLoopIntoQueue(Loop *L) { // Insert L into loop queue if (L == CurrentLoop) redoLoop(L); - else if (!ParentLoop) + else if (!L->getParentLoop()) // This is top level loop. LQ.push_front(L); else { - // Insert L after ParentLoop + // Insert L after the parent loop. for (std::deque::iterator I = LQ.begin(), E = LQ.end(); I != E; ++I) { - if (*I == ParentLoop) { + if (*I == L->getParentLoop()) { // deque does not support insert after. ++I; LQ.insert(I, 1, L); @@ -217,41 +220,66 @@ bool LPPassManager::runOnFunction(Function &F) { skipThisLoop = false; redoThisLoop = false; - // Run all passes on current SCC + // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); - dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, ""); + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, + CurrentLoop->getHeader()->getNameStr()); dumpRequiredSet(P); initializeAnalysisImpl(P); LoopPass *LP = dynamic_cast(P); + assert(LP && "Invalid LPPassManager member"); { PassManagerPrettyStackEntry X(LP, *CurrentLoop->getHeader()); - StartPassTimer(P); - assert(LP && "Invalid LPPassManager member"); + Timer *T = StartPassTimer(P); Changed |= LP->runOnLoop(CurrentLoop, *this); - StopPassTimer(P); + StopPassTimer(P, T); } if (Changed) - dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, ""); + dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, + skipThisLoop ? "" : + CurrentLoop->getHeader()->getNameStr()); dumpPreservedSet(P); - verifyPreservedAnalysis(LP); + if (!skipThisLoop) { + // Manually check that this loop is still healthy. This is done + // instead of relying on LoopInfo::verifyLoop since LoopInfo + // is a function pass and it's really expensive to verify every + // loop in the function every time. That level of checking can be + // enabled with the -verify-loop-info option. + Timer *T = StartPassTimer(LI); + CurrentLoop->verifyLoop(); + StopPassTimer(LI, T); + + // Then call the regular verifyAnalysis functions. + verifyPreservedAnalysis(LP); + } + removeNotPreservedAnalysis(P); recordAvailableAnalysis(P); - removeDeadPasses(P, "", ON_LOOP_MSG); - - // If dominator information is available then verify the info if requested. - verifyDomInfo(*LP, F); + removeDeadPasses(P, + skipThisLoop ? "" : + CurrentLoop->getHeader()->getNameStr(), + ON_LOOP_MSG); if (skipThisLoop) // Do not run other passes on this loop. break; } + // If the loop was deleted, release all the loop passes. This frees up + // some memory, and avoids trouble with the pass manager trying to call + // verifyAnalysis on them. + if (skipThisLoop) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + Pass *P = getContainedPass(Index); + freePass(P, "", ON_LOOP_MSG); + } + // Pop the loop from queue after running all passes. LQ.pop_back(); @@ -272,7 +300,7 @@ bool LPPassManager::runOnFunction(Function &F) { /// Print passes managed by this manager void LPPassManager::dumpPassStructure(unsigned Offset) { - llvm::cerr << std::string(Offset*2, ' ') << "Loop Pass Manager\n"; + errs().indent(Offset*2) << "Loop Pass Manager\n"; for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { Pass *P = getContainedPass(Index); P->dumpPassStructure(Offset + 1); diff --git a/lib/Analysis/MallocHelper.cpp b/lib/Analysis/MallocHelper.cpp new file mode 100644 index 0000000000000..89051d1788387 --- /dev/null +++ b/lib/Analysis/MallocHelper.cpp @@ -0,0 +1,230 @@ +//===-- MallocHelper.cpp - Functions to identify malloc calls -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions identifies calls to malloc, bitcasts of malloc +// calls, and the types and array sizes associated with them. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MallocHelper.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Analysis/ConstantFolding.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// malloc Call Utility Functions. +// + +/// isMalloc - Returns true if the the value is either a malloc call or a +/// bitcast of the result of a malloc call. +bool llvm::isMalloc(const Value* I) { + return extractMallocCall(I) || extractMallocCallFromBitCast(I); +} + +static bool isMallocCall(const CallInst *CI) { + if (!CI) + return false; + + const Module* M = CI->getParent()->getParent()->getParent(); + Function *MallocFunc = M->getFunction("malloc"); + + if (CI->getOperand(0) != MallocFunc) + return false; + + // Check malloc prototype. + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // attribute will exist. + const FunctionType *FTy = MallocFunc->getFunctionType(); + if (FTy->getNumParams() != 1) + return false; + if (IntegerType *ITy = dyn_cast(FTy->param_begin()->get())) { + if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64) + return false; + return true; + } + + return false; +} + +/// extractMallocCall - Returns the corresponding CallInst if the instruction +/// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we +/// ignore InvokeInst here. +const CallInst* llvm::extractMallocCall(const Value* I) { + const CallInst *CI = dyn_cast(I); + return (isMallocCall(CI)) ? CI : NULL; +} + +CallInst* llvm::extractMallocCall(Value* I) { + CallInst *CI = dyn_cast(I); + return (isMallocCall(CI)) ? CI : NULL; +} + +static bool isBitCastOfMallocCall(const BitCastInst* BCI) { + if (!BCI) + return false; + + return isMallocCall(dyn_cast(BCI->getOperand(0))); +} + +/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the +/// instruction is a bitcast of the result of a malloc call. +CallInst* llvm::extractMallocCallFromBitCast(Value* I) { + BitCastInst *BCI = dyn_cast(I); + return (isBitCastOfMallocCall(BCI)) ? cast(BCI->getOperand(0)) + : NULL; +} + +const CallInst* llvm::extractMallocCallFromBitCast(const Value* I) { + const BitCastInst *BCI = dyn_cast(I); + return (isBitCastOfMallocCall(BCI)) ? cast(BCI->getOperand(0)) + : NULL; +} + +static bool isArrayMallocHelper(const CallInst *CI, LLVMContext &Context, + const TargetData* TD) { + if (!CI) + return false; + + const Type* T = getMallocAllocatedType(CI); + + // We can only indentify an array malloc if we know the type of the malloc + // call. + if (!T) return false; + + Value* MallocArg = CI->getOperand(1); + Constant *ElementSize = ConstantExpr::getSizeOf(T); + ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, + MallocArg->getType()); + Constant *FoldedElementSize = ConstantFoldConstantExpression( + cast(ElementSize), + Context, TD); + + + if (isa(MallocArg)) + return (MallocArg != ElementSize); + + BinaryOperator *BI = dyn_cast(MallocArg); + if (!BI) + return false; + + if (BI->getOpcode() == Instruction::Mul) + // ArraySize * ElementSize + if (BI->getOperand(1) == ElementSize || + (FoldedElementSize && BI->getOperand(1) == FoldedElementSize)) + return true; + + // TODO: Detect case where MallocArg mul has been transformed to shl. + + return false; +} + +/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// matches the malloc call IR generated by CallInst::CreateMalloc(). This +/// means that it is a malloc call with one bitcast use AND the malloc call's +/// size argument is: +/// 1. a constant not equal to the malloc's allocated type +/// or +/// 2. the result of a multiplication by the malloc's allocated type +/// Otherwise it returns NULL. +/// The unique bitcast is needed to determine the type/size of the array +/// allocation. +CallInst* llvm::isArrayMalloc(Value* I, LLVMContext &Context, + const TargetData* TD) { + CallInst *CI = extractMallocCall(I); + return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL; +} + +const CallInst* llvm::isArrayMalloc(const Value* I, LLVMContext &Context, + const TargetData* TD) { + const CallInst *CI = extractMallocCall(I); + return (isArrayMallocHelper(CI, Context, TD)) ? CI : NULL; +} + +/// getMallocType - Returns the PointerType resulting from the malloc call. +/// This PointerType is the result type of the call's only bitcast use. +/// If there is no unique bitcast use, then return NULL. +const PointerType* llvm::getMallocType(const CallInst* CI) { + assert(isMalloc(CI) && "GetMallocType and not malloc call"); + + const BitCastInst* BCI = NULL; + + // Determine if CallInst has a bitcast use. + for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) + if ((BCI = dyn_cast(cast(*UI++)))) + break; + + // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's + // destination type. + if (BCI && CI->hasOneUse()) + return cast(BCI->getDestTy()); + + // Malloc call was not bitcast, so type is the malloc function's return type. + if (!BCI) + return cast(CI->getType()); + + // Type could not be determined. + return NULL; +} + +/// getMallocAllocatedType - Returns the Type allocated by malloc call. This +/// Type is the result type of the call's only bitcast use. If there is no +/// unique bitcast use, then return NULL. +const Type* llvm::getMallocAllocatedType(const CallInst* CI) { + const PointerType* PT = getMallocType(CI); + return PT ? PT->getElementType() : NULL; +} + +/// isConstantOne - Return true only if val is constant int 1. +static bool isConstantOne(Value *val) { + return isa(val) && cast(val)->isOne(); +} + +/// getMallocArraySize - Returns the array size of a malloc call. The array +/// size is computated in 1 of 3 ways: +/// 1. If the element type if of size 1, then array size is the argument to +/// malloc. +/// 2. Else if the malloc's argument is a constant, the array size is that +/// argument divided by the element type's size. +/// 3. Else the malloc argument must be a multiplication and the array size is +/// the first operand of the multiplication. +/// This function returns constant 1 if: +/// 1. The malloc call's allocated type cannot be determined. +/// 2. IR wasn't created by a call to CallInst::CreateMalloc() with a non-NULL +/// ArraySize. +Value* llvm::getMallocArraySize(CallInst* CI, LLVMContext &Context, + const TargetData* TD) { + // Match CreateMalloc's use of constant 1 array-size for non-array mallocs. + if (!isArrayMalloc(CI, Context, TD)) + return ConstantInt::get(CI->getOperand(1)->getType(), 1); + + Value* MallocArg = CI->getOperand(1); + assert(getMallocAllocatedType(CI) && "getMallocArraySize and no type"); + Constant *ElementSize = ConstantExpr::getSizeOf(getMallocAllocatedType(CI)); + ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, + MallocArg->getType()); + + Constant* CO = dyn_cast(MallocArg); + BinaryOperator* BO = dyn_cast(MallocArg); + assert((isConstantOne(ElementSize) || CO || BO) && + "getMallocArraySize and malformed malloc IR"); + + if (isConstantOne(ElementSize)) + return MallocArg; + + if (CO) + return CO->getOperand(0); + + // TODO: Detect case where MallocArg mul has been transformed to shl. + + assert(BO && "getMallocArraySize not constant but not multiplication either"); + return BO->getOperand(0); +} diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 3b2102955f337..d6400757a5131 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -16,16 +16,15 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Function.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/PredIteratorCache.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" using namespace llvm; STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); @@ -71,12 +70,10 @@ void MemoryDependenceAnalysis::releaseMemory() { void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); - AU.addRequiredTransitive(); } bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis(); - TD = &getAnalysis(); if (PredCache == 0) PredCache.reset(new PredIteratorCache()); return false; @@ -112,10 +109,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, uint64_t PointerSize = 0; if (StoreInst *S = dyn_cast(Inst)) { Pointer = S->getPointerOperand(); - PointerSize = TD->getTypeStoreSize(S->getOperand(0)->getType()); + PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType()); } else if (VAArgInst *V = dyn_cast(Inst)) { Pointer = V->getOperand(0); - PointerSize = TD->getTypeStoreSize(V->getType()); + PointerSize = AA->getTypeStoreSize(V->getType()); } else if (FreeInst *F = dyn_cast(Inst)) { Pointer = F->getPointerOperand(); @@ -185,7 +182,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast(Inst)) { Value *Pointer = LI->getPointerOperand(); - uint64_t PointerSize = TD->getTypeStoreSize(LI->getType()); + uint64_t PointerSize = AA->getTypeStoreSize(LI->getType()); // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = @@ -211,7 +208,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. Value *Pointer = SI->getPointerOperand(); - uint64_t PointerSize = TD->getTypeStoreSize(SI->getOperand(0)->getType()); + uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType()); // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = @@ -228,15 +225,19 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // the allocation, return Def. This means that there is no dependence and // the access can be optimized based on that. For example, a load could // turn into undef. - if (AllocationInst *AI = dyn_cast(Inst)) { + // Note: Only determine this to be a malloc if Inst is the malloc call, not + // a subsequent bitcast of the malloc call result. There can be stores to + // the malloced memory between the malloc call and its bitcast uses, and we + // need to continue scanning until the malloc call. + if (isa(Inst) || extractMallocCall(Inst)) { Value *AccessPtr = MemPtr->getUnderlyingObject(); - if (AccessPtr == AI || - AA->alias(AI, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) - return MemDepResult::getDef(AI); + if (AccessPtr == Inst || + AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); continue; } - + // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) { case AliasAnalysis::NoModRef: @@ -302,7 +303,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); else { MemPtr = SI->getPointerOperand(); - MemSize = TD->getTypeStoreSize(SI->getOperand(0)->getType()); + MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType()); } } else if (LoadInst *LI = dyn_cast(QueryInst)) { // If this is a volatile load, don't mess around with it. Just return the @@ -311,7 +312,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); else { MemPtr = LI->getPointerOperand(); - MemSize = TD->getTypeStoreSize(LI->getType()); + MemSize = AA->getTypeStoreSize(LI->getType()); } } else if (isa(QueryInst) || isa(QueryInst)) { CallSite QueryCS = CallSite::get(QueryInst); @@ -513,7 +514,7 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB, // We know that the pointer value is live into FromBB find the def/clobbers // from presecessors. const Type *EltTy = cast(Pointer->getType())->getElementType(); - uint64_t PointeeSize = TD->getTypeStoreSize(EltTy); + uint64_t PointeeSize = AA->getTypeStoreSize(EltTy); // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying @@ -599,6 +600,42 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize, return Dep; } +/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain +/// number of elements in the array that are already properly ordered. This is +/// optimized for the case when only a few entries are added. +static void +SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, + unsigned NumSortedEntries) { + switch (Cache.size() - NumSortedEntries) { + case 0: + // done, no new entries. + break; + case 2: { + // Two new entries, insert the last one into place. + MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end()-1, Val); + Cache.insert(Entry, Val); + // FALL THROUGH. + } + case 1: + // One new entry, Just insert the new value at the appropriate position. + if (Cache.size() != 1) { + MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back(); + Cache.pop_back(); + MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry = + std::upper_bound(Cache.begin(), Cache.end(), Val); + Cache.insert(Entry, Val); + } + break; + default: + // Added many values, do a full scale sort. + std::sort(Cache.begin(), Cache.end()); + break; + } +} + /// getNonLocalPointerDepFromBB - Perform a dependency query based on /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def @@ -731,10 +768,22 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, // If we do need to do phi translation, then there are a bunch of different // cases, because we have to find a Value* live in the predecessor block. We // know that PtrInst is defined in this block at least. + + // We may have added values to the cache list before this PHI translation. + // If so, we haven't done anything to ensure that the cache remains sorted. + // Sort it now (if needed) so that recursive invocations of + // getNonLocalPointerDepFromBB and other routines that could reuse the cache + // value will only see properly sorted cache arrays. + if (Cache && NumSortedEntries != Cache->size()) { + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); + NumSortedEntries = Cache->size(); + } // If this is directly a PHI node, just use the incoming values for each // pred as the phi translated version. if (PHINode *PtrPHI = dyn_cast(PtrInst)) { + Cache = 0; + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { BasicBlock *Pred = *PI; Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred); @@ -759,15 +808,6 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, goto PredTranslationFailure; } - // We may have added values to the cache list before this PHI - // translation. If so, we haven't done anything to ensure that the - // cache remains sorted. Sort it now (if needed) so that recursive - // invocations of getNonLocalPointerDepFromBB that could reuse the cache - // value will only see properly sorted cache arrays. - if (Cache && NumSortedEntries != Cache->size()) - std::sort(Cache->begin(), Cache->end()); - Cache = 0; - // FIXME: it is entirely possible that PHI translating will end up with // the same value. Consider PHI translating something like: // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* @@ -779,7 +819,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, Result, Visited)) goto PredTranslationFailure; } - + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->second; @@ -806,11 +846,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->second; NumSortedEntries = Cache->size(); - } else if (NumSortedEntries != Cache->size()) { - std::sort(Cache->begin(), Cache->end()); - NumSortedEntries = Cache->size(); } - + // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all @@ -841,33 +878,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, } // Okay, we're done now. If we added new values to the cache, re-sort it. - switch (Cache->size()-NumSortedEntries) { - case 0: - // done, no new entries. - break; - case 2: { - // Two new entries, insert the last one into place. - NonLocalDepEntry Val = Cache->back(); - Cache->pop_back(); - NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache->begin(), Cache->end()-1, Val); - Cache->insert(Entry, Val); - // FALL THROUGH. - } - case 1: - // One new entry, Just insert the new value at the appropriate position. - if (Cache->size() != 1) { - NonLocalDepEntry Val = Cache->back(); - Cache->pop_back(); - NonLocalDepInfo::iterator Entry = - std::upper_bound(Cache->begin(), Cache->end(), Val); - Cache->insert(Entry, Val); - } - break; - default: - // Added many values, do a full scale sort. - std::sort(Cache->begin(), Cache->end()); - } + SortNonLocalDepInfoCache(*Cache, NumSortedEntries); DEBUG(AssertSorted(*Cache)); return false; } diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp new file mode 100644 index 0000000000000..43f4af36d81c3 --- /dev/null +++ b/lib/Analysis/PointerTracking.cpp @@ -0,0 +1,265 @@ +//===- PointerTracking.cpp - Pointer Bounds Tracking ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements tracking of pointer bounds. +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MallocHelper.h" +#include "llvm/Analysis/PointerTracking.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Constants.h" +#include "llvm/Module.h" +#include "llvm/Value.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +using namespace llvm; + +char PointerTracking::ID = 0; +PointerTracking::PointerTracking() : FunctionPass(&ID) {} + +bool PointerTracking::runOnFunction(Function &F) { + predCache.clear(); + assert(analyzing.empty()); + FF = &F; + TD = getAnalysisIfAvailable(); + SE = &getAnalysis(); + LI = &getAnalysis(); + DT = &getAnalysis(); + return false; +} + +void PointerTracking::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.setPreservesAll(); +} + +bool PointerTracking::doInitialization(Module &M) { + const Type *PTy = Type::getInt8PtrTy(M.getContext()); + + // Find calloc(i64, i64) or calloc(i32, i32). + callocFunc = M.getFunction("calloc"); + if (callocFunc) { + const FunctionType *Ty = callocFunc->getFunctionType(); + + std::vector args, args2; + args.push_back(Type::getInt64Ty(M.getContext())); + args.push_back(Type::getInt64Ty(M.getContext())); + args2.push_back(Type::getInt32Ty(M.getContext())); + args2.push_back(Type::getInt32Ty(M.getContext())); + const FunctionType *Calloc1Type = + FunctionType::get(PTy, args, false); + const FunctionType *Calloc2Type = + FunctionType::get(PTy, args2, false); + if (Ty != Calloc1Type && Ty != Calloc2Type) + callocFunc = 0; // Give up + } + + // Find realloc(i8*, i64) or realloc(i8*, i32). + reallocFunc = M.getFunction("realloc"); + if (reallocFunc) { + const FunctionType *Ty = reallocFunc->getFunctionType(); + std::vector args, args2; + args.push_back(PTy); + args.push_back(Type::getInt64Ty(M.getContext())); + args2.push_back(PTy); + args2.push_back(Type::getInt32Ty(M.getContext())); + + const FunctionType *Realloc1Type = + FunctionType::get(PTy, args, false); + const FunctionType *Realloc2Type = + FunctionType::get(PTy, args2, false); + if (Ty != Realloc1Type && Ty != Realloc2Type) + reallocFunc = 0; // Give up + } + return false; +} + +// Calculates the number of elements allocated for pointer P, +// the type of the element is stored in Ty. +const SCEV *PointerTracking::computeAllocationCount(Value *P, + const Type *&Ty) const { + Value *V = P->stripPointerCasts(); + if (AllocationInst *AI = dyn_cast(V)) { + Value *arraySize = AI->getArraySize(); + Ty = AI->getAllocatedType(); + // arraySize elements of type Ty. + return SE->getSCEV(arraySize); + } + + if (CallInst *CI = extractMallocCall(V)) { + Value *arraySize = getMallocArraySize(CI, P->getContext(), TD); + Ty = getMallocAllocatedType(CI); + if (!Ty || !arraySize) return SE->getCouldNotCompute(); + // arraySize elements of type Ty. + return SE->getSCEV(arraySize); + } + + if (GlobalVariable *GV = dyn_cast(V)) { + if (GV->hasDefinitiveInitializer()) { + Constant *C = GV->getInitializer(); + if (const ArrayType *ATy = dyn_cast(C->getType())) { + Ty = ATy->getElementType(); + return SE->getConstant(Type::getInt32Ty(P->getContext()), + ATy->getNumElements()); + } + } + Ty = GV->getType(); + return SE->getConstant(Type::getInt32Ty(P->getContext()), 1); + //TODO: implement more tracking for globals + } + + if (CallInst *CI = dyn_cast(V)) { + CallSite CS(CI); + Function *F = dyn_cast(CS.getCalledValue()->stripPointerCasts()); + const Loop *L = LI->getLoopFor(CI->getParent()); + if (F == callocFunc) { + Ty = Type::getInt8Ty(P->getContext()); + // calloc allocates arg0*arg1 bytes. + return SE->getSCEVAtScope(SE->getMulExpr(SE->getSCEV(CS.getArgument(0)), + SE->getSCEV(CS.getArgument(1))), + L); + } else if (F == reallocFunc) { + Ty = Type::getInt8Ty(P->getContext()); + // realloc allocates arg1 bytes. + return SE->getSCEVAtScope(CS.getArgument(1), L); + } + } + + return SE->getCouldNotCompute(); +} + +// Calculates the number of elements of type Ty allocated for P. +const SCEV *PointerTracking::computeAllocationCountForType(Value *P, + const Type *Ty) + const { + const Type *elementTy; + const SCEV *Count = computeAllocationCount(P, elementTy); + if (isa(Count)) + return Count; + if (elementTy == Ty) + return Count; + + if (!TD) // need TargetData from this point forward + return SE->getCouldNotCompute(); + + uint64_t elementSize = TD->getTypeAllocSize(elementTy); + uint64_t wantSize = TD->getTypeAllocSize(Ty); + if (elementSize == wantSize) + return Count; + if (elementSize % wantSize) //fractional counts not possible + return SE->getCouldNotCompute(); + return SE->getMulExpr(Count, SE->getConstant(Count->getType(), + elementSize/wantSize)); +} + +const SCEV *PointerTracking::getAllocationElementCount(Value *V) const { + // We only deal with pointers. + const PointerType *PTy = cast(V->getType()); + return computeAllocationCountForType(V, PTy->getElementType()); +} + +const SCEV *PointerTracking::getAllocationSizeInBytes(Value *V) const { + return computeAllocationCountForType(V, Type::getInt8Ty(V->getContext())); +} + +// Helper for isLoopGuardedBy that checks the swapped and inverted predicate too +enum SolverResult PointerTracking::isLoopGuardedBy(const Loop *L, + Predicate Pred, + const SCEV *A, + const SCEV *B) const { + if (SE->isLoopGuardedByCond(L, Pred, A, B)) + return AlwaysTrue; + Pred = ICmpInst::getSwappedPredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, B, A)) + return AlwaysTrue; + + Pred = ICmpInst::getInversePredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, B, A)) + return AlwaysFalse; + Pred = ICmpInst::getSwappedPredicate(Pred); + if (SE->isLoopGuardedByCond(L, Pred, A, B)) + return AlwaysTrue; + return Unknown; +} + +enum SolverResult PointerTracking::checkLimits(const SCEV *Offset, + const SCEV *Limit, + BasicBlock *BB) +{ + //FIXME: merge implementation + return Unknown; +} + +void PointerTracking::getPointerOffset(Value *Pointer, Value *&Base, + const SCEV *&Limit, + const SCEV *&Offset) const +{ + Pointer = Pointer->stripPointerCasts(); + Base = Pointer->getUnderlyingObject(); + Limit = getAllocationSizeInBytes(Base); + if (isa(Limit)) { + Base = 0; + Offset = Limit; + return; + } + + Offset = SE->getMinusSCEV(SE->getSCEV(Pointer), SE->getSCEV(Base)); + if (isa(Offset)) { + Base = 0; + Limit = Offset; + } +} + +void PointerTracking::print(raw_ostream &OS, const Module* M) const { + // Calling some PT methods may cause caches to be updated, however + // this should be safe for the same reason its safe for SCEV. + PointerTracking &PT = *const_cast(this); + for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) { + if (!isa(I->getType())) + continue; + Value *Base; + const SCEV *Limit, *Offset; + getPointerOffset(&*I, Base, Limit, Offset); + if (!Base) + continue; + + if (Base == &*I) { + const SCEV *S = getAllocationElementCount(Base); + OS << *Base << " ==> " << *S << " elements, "; + OS << *Limit << " bytes allocated\n"; + continue; + } + OS << &*I << " -- base: " << *Base; + OS << " offset: " << *Offset; + + enum SolverResult res = PT.checkLimits(Offset, Limit, I->getParent()); + switch (res) { + case AlwaysTrue: + OS << " always safe\n"; + break; + case AlwaysFalse: + OS << " always unsafe\n"; + break; + case Unknown: + OS << " <>\n"; + break; + } + } +} + +static RegisterPass X("pointertracking", + "Track pointer bounds", false, true); diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index 4853c2ac87b79..69d6b47bbee49 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -33,15 +33,19 @@ F("postdomtree", "Post-Dominator Tree Construction", true, true); bool PostDominatorTree::runOnFunction(Function &F) { DT->recalculate(F); - DEBUG(DT->dump()); + DEBUG(DT->print(errs())); return false; } -PostDominatorTree::~PostDominatorTree() -{ +PostDominatorTree::~PostDominatorTree() { delete DT; } +void PostDominatorTree::print(raw_ostream &OS, const Module *) const { + DT->print(OS); +} + + FunctionPass* llvm::createPostDomTree() { return new PostDominatorTree(); } diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp new file mode 100644 index 0000000000000..c585c1dced044 --- /dev/null +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -0,0 +1,310 @@ +//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a concrete implementation of profiling information that +// estimates the profiling information in a very crude and unimaginative way. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-estimator" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +using namespace llvm; + +static cl::opt +LoopWeight( + "profile-estimator-loop-weight", cl::init(10), + cl::value_desc("loop-weight"), + cl::desc("Number of loop executions used for profile-estimator") +); + +namespace { + class VISIBILITY_HIDDEN ProfileEstimatorPass : + public FunctionPass, public ProfileInfo { + double ExecCount; + LoopInfo *LI; + std::set BBToVisit; + std::map LoopExitWeights; + public: + static char ID; // Class identification, replacement for typeinfo + explicit ProfileEstimatorPass(const double execcount = 0) + : FunctionPass(&ID), ExecCount(execcount) { + if (execcount == 0) ExecCount = LoopWeight; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + } + + virtual const char *getPassName() const { + return "Profiling information estimator"; + } + + /// run - Estimate the profile information from the specified file. + virtual bool runOnFunction(Function &F); + + virtual void recurseBasicBlock(BasicBlock *BB); + + void inline printEdgeWeight(Edge); + }; +} // End of anonymous namespace + +char ProfileEstimatorPass::ID = 0; +static RegisterPass +X("profile-estimator", "Estimate profiling information", false, true); + +static RegisterAnalysisGroup Y(X); + +namespace llvm { + const PassInfo *ProfileEstimatorPassID = &X; + + FunctionPass *createProfileEstimatorPass() { + return new ProfileEstimatorPass(); + } + + /// createProfileEstimatorPass - This function returns a Pass that estimates + /// profiling information using the given loop execution count. + Pass *createProfileEstimatorPass(const unsigned execcount) { + return new ProfileEstimatorPass(execcount); + } +} + +static double ignoreMissing(double w) { + if (w == ProfileInfo::MissingValue) return 0; + return w; +} + +static void inline printEdgeError(ProfileInfo::Edge e, const char *M) { + DEBUG(errs() << "-- Edge " << e << " is not calculated, " << M << "\n"); +} + +void inline ProfileEstimatorPass::printEdgeWeight(Edge E) { + DEBUG(errs() << "-- Weight of Edge " << E << ":" + << format("%g", getEdgeWeight(E)) << "\n"); +} + +// recurseBasicBlock() - This calculates the ProfileInfo estimation for a +// single block and then recurses into the successors. +// The algorithm preserves the flow condition, meaning that the sum of the +// weight of the incoming edges must be equal the block weight which must in +// turn be equal to the sume of the weights of the outgoing edges. +// Since the flow of an block is deterimined from the current state of the +// flow, once an edge has a flow assigned this flow is never changed again, +// otherwise it would be possible to violate the flow condition in another +// block. +void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { + + // Break the recursion if this BasicBlock was already visited. + if (BBToVisit.find(BB) == BBToVisit.end()) return; + + // Read the LoopInfo for this block. + bool BBisHeader = LI->isLoopHeader(BB); + Loop* BBLoop = LI->getLoopFor(BB); + + // To get the block weight, read all incoming edges. + double BBWeight = 0; + std::set ProcessedPreds; + for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + // If this block was not considered already, add weight. + Edge edge = getEdge(*bbi,BB); + double w = getEdgeWeight(edge); + if (ProcessedPreds.insert(*bbi).second) { + BBWeight += ignoreMissing(w); + } + // If this block is a loop header and the predecessor is contained in this + // loop, thus the edge is a backedge, continue and do not check if the + // value is valid. + if (BBisHeader && BBLoop->contains(*bbi)) { + printEdgeError(edge, "but is backedge, continueing"); + continue; + } + // If the edges value is missing (and this is no loop header, and this is + // no backedge) return, this block is currently non estimatable. + if (w == MissingValue) { + printEdgeError(edge, "returning"); + return; + } + } + if (getExecutionCount(BB) != MissingValue) { + BBWeight = getExecutionCount(BB); + } + + // Fetch all necessary information for current block. + SmallVector ExitEdges; + SmallVector Edges; + if (BBLoop) { + BBLoop->getExitEdges(ExitEdges); + } + + // If this is a loop header, consider the following: + // Exactly the flow that is entering this block, must exit this block too. So + // do the following: + // *) get all the exit edges, read the flow that is already leaving this + // loop, remember the edges that do not have any flow on them right now. + // (The edges that have already flow on them are most likely exiting edges of + // other loops, do not touch those flows because the previously caclulated + // loopheaders would not be exact anymore.) + // *) In case there is not a single exiting edge left, create one at the loop + // latch to prevent the flow from building up in the loop. + // *) Take the flow that is not leaving the loop already and distribute it on + // the remaining exiting edges. + // (This ensures that all flow that enters the loop also leaves it.) + // *) Increase the flow into the loop by increasing the weight of this block. + // There is at least one incoming backedge that will bring us this flow later + // on. (So that the flow condition in this node is valid again.) + if (BBisHeader) { + double incoming = BBWeight; + // Subtract the flow leaving the loop. + std::set ProcessedExits; + for (SmallVector::iterator ei = ExitEdges.begin(), + ee = ExitEdges.end(); ei != ee; ++ei) { + if (ProcessedExits.insert(*ei).second) { + double w = getEdgeWeight(*ei); + if (w == MissingValue) { + Edges.push_back(*ei); + } else { + incoming -= w; + } + } + } + // If no exit edges, create one: + if (Edges.size() == 0) { + BasicBlock *Latch = BBLoop->getLoopLatch(); + if (Latch) { + Edge edge = getEdge(Latch,0); + EdgeInformation[BB->getParent()][edge] = BBWeight; + printEdgeWeight(edge); + edge = getEdge(Latch, BB); + EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount; + printEdgeWeight(edge); + } + } + // Distribute remaining weight onto the exit edges. + for (SmallVector::iterator ei = Edges.begin(), ee = Edges.end(); + ei != ee; ++ei) { + EdgeInformation[BB->getParent()][*ei] += incoming/Edges.size(); + printEdgeWeight(*ei); + } + // Increase flow into the loop. + BBWeight *= (ExecCount+1); + } + + BlockInformation[BB->getParent()][BB] = BBWeight; + // Up until now we considered only the loop exiting edges, now we have a + // definite block weight and must ditribute this onto the outgoing edges. + // Since there may be already flow attached to some of the edges, read this + // flow first and remember the edges that have still now flow attached. + Edges.clear(); + std::set ProcessedSuccs; + + succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // Also check for (BB,0) edges that may already contain some flow. (But only + // in case there are no successors.) + if (bbi == bbe) { + Edge edge = getEdge(BB,0); + EdgeInformation[BB->getParent()][edge] = BBWeight; + printEdgeWeight(edge); + } + for ( ; bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + Edge edge = getEdge(BB,*bbi); + double w = getEdgeWeight(edge); + if (w != MissingValue) { + BBWeight -= getEdgeWeight(edge); + } else { + Edges.push_back(edge); + } + } + } + + // Finally we know what flow is still not leaving the block, distribute this + // flow onto the empty edges. + for (SmallVector::iterator ei = Edges.begin(), ee = Edges.end(); + ei != ee; ++ei) { + EdgeInformation[BB->getParent()][*ei] += BBWeight/Edges.size(); + printEdgeWeight(*ei); + } + + // This block is visited, mark this before the recursion. + BBToVisit.erase(BB); + + // Recurse into successors. + for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } +} + +bool ProfileEstimatorPass::runOnFunction(Function &F) { + if (F.isDeclaration()) return false; + + // Fetch LoopInfo and clear ProfileInfo for this function. + LI = &getAnalysis(); + FunctionInformation.erase(&F); + BlockInformation[&F].clear(); + EdgeInformation[&F].clear(); + + // Mark all blocks as to visit. + for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) + BBToVisit.insert(bi); + + DEBUG(errs() << "Working on function " << F.getNameStr() << "\n"); + + // Since the entry block is the first one and has no predecessors, the edge + // (0,entry) is inserted with the starting weight of 1. + BasicBlock *entry = &F.getEntryBlock(); + BlockInformation[&F][entry] = 1; + Edge edge = getEdge(0,entry); + EdgeInformation[&F][edge] = 1; + printEdgeWeight(edge); + + // Since recurseBasicBlock() maybe returns with a block which was not fully + // estimated, use recurseBasicBlock() until everything is calculated. + recurseBasicBlock(entry); + while (BBToVisit.size() > 0) { + // Remember number of open blocks, this is later used to check if progress + // was made. + unsigned size = BBToVisit.size(); + + // Try to calculate all blocks in turn. + for (std::set::iterator bi = BBToVisit.begin(), + be = BBToVisit.end(); bi != be; ++bi) { + recurseBasicBlock(*bi); + // If at least one block was finished, break because iterator may be + // invalid. + if (BBToVisit.size() < size) break; + } + + // If there was not a single block resovled, make some assumptions. + if (BBToVisit.size() == size) { + BasicBlock *BB = *(BBToVisit.begin()); + // Since this BB was not calculated because of missing incoming edges, + // set these edges to zero. + for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi) { + Edge e = getEdge(*bbi,BB); + double w = getEdgeWeight(e); + if (w == MissingValue) { + EdgeInformation[&F][e] = 0; + DEBUG(errs() << "Assuming edge weight: "); + printEdgeWeight(e); + } + } + } + } + + return false; +} diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index a0965b66da815..9efdd23081c41 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -17,6 +17,9 @@ #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" #include using namespace llvm; @@ -26,56 +29,149 @@ char ProfileInfo::ID = 0; ProfileInfo::~ProfileInfo() {} -unsigned ProfileInfo::getExecutionCount(BasicBlock *BB) const { - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); +const double ProfileInfo::MissingValue = -1; + +double ProfileInfo::getExecutionCount(const BasicBlock *BB) { + std::map::iterator J = + BlockInformation.find(BB->getParent()); + if (J != BlockInformation.end()) { + BlockCounts::iterator I = J->second.find(BB); + if (I != J->second.end()) + return I->second; + } + + pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB); // Are there zero predecessors of this block? if (PI == PE) { // If this is the entry block, look for the Null -> Entry edge. if (BB == &BB->getParent()->getEntryBlock()) - return getEdgeWeight(0, BB); + return getEdgeWeight(getEdge(0, BB)); else return 0; // Otherwise, this is a dead block. } // Otherwise, if there are predecessors, the execution count of this block is - // the sum of the edge frequencies from the incoming edges. Note that if - // there are multiple edges from a predecessor to this block that we don't - // want to count its weight multiple times. For this reason, we keep track of - // the predecessors we've seen and only count them if we haven't run into them - // yet. - // - // We don't want to create an std::set unless we are dealing with a block that - // has a LARGE number of in-edges. Handle the common case of having only a - // few in-edges with special code. - // - BasicBlock *FirstPred = *PI; - unsigned Count = getEdgeWeight(FirstPred, BB); - ++PI; - if (PI == PE) return Count; // Quick exit for single predecessor blocks - - BasicBlock *SecondPred = *PI; - if (SecondPred != FirstPred) Count += getEdgeWeight(SecondPred, BB); - ++PI; - if (PI == PE) return Count; // Quick exit for two predecessor blocks - - BasicBlock *ThirdPred = *PI; - if (ThirdPred != FirstPred && ThirdPred != SecondPred) - Count += getEdgeWeight(ThirdPred, BB); - ++PI; - if (PI == PE) return Count; // Quick exit for three predecessor blocks - - std::set ProcessedPreds; - ProcessedPreds.insert(FirstPred); - ProcessedPreds.insert(SecondPred); - ProcessedPreds.insert(ThirdPred); + // the sum of the edge frequencies from the incoming edges. + std::set ProcessedPreds; + double Count = 0; for (; PI != PE; ++PI) - if (ProcessedPreds.insert(*PI).second) - Count += getEdgeWeight(*PI, BB); + if (ProcessedPreds.insert(*PI).second) { + double w = getEdgeWeight(getEdge(*PI, BB)); + if (w == MissingValue) { + Count = MissingValue; + break; + } + Count += w; + } + + if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count; + return Count; +} + +double ProfileInfo::getExecutionCount(const Function *F) { + std::map::iterator J = + FunctionInformation.find(F); + if (J != FunctionInformation.end()) + return J->second; + + // isDeclaration() is checked here and not at start of function to allow + // functions without a body still to have a execution count. + if (F->isDeclaration()) return MissingValue; + + double Count = getExecutionCount(&F->getEntryBlock()); + if (Count != MissingValue) FunctionInformation[F] = Count; return Count; } +/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB. +/// This checks all edges of the function the blocks reside in and replaces the +/// occurences of RmBB with DestBB. +void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, + const BasicBlock *DestBB) { + DEBUG(errs() << "Replacing " << RmBB->getNameStr() + << " with " << DestBB->getNameStr() << "\n"); + const Function *F = DestBB->getParent(); + std::map::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + for (EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); + I != E; ++I) { + Edge e = I->first; + Edge newedge; bool foundedge = false; + if (e.first == RmBB) { + newedge = getEdge(DestBB, e.second); + foundedge = true; + } + if (e.second == RmBB) { + newedge = getEdge(e.first, DestBB); + foundedge = true; + } + if (foundedge) { + double w = getEdgeWeight(e); + EdgeInformation[F][newedge] = w; + DEBUG(errs() << "Replacing " << e << " with " << newedge << "\n"); + J->second.erase(e); + } + } +} + +/// Splits an edge in the ProfileInfo and redirects flow over NewBB. +/// Since its possible that there is more than one edge in the CFG from FristBB +/// to SecondBB its necessary to redirect the flow proporionally. +void ProfileInfo::splitEdge(const BasicBlock *FirstBB, + const BasicBlock *SecondBB, + const BasicBlock *NewBB, + bool MergeIdenticalEdges) { + const Function *F = FirstBB->getParent(); + std::map::iterator J = + EdgeInformation.find(F); + if (J == EdgeInformation.end()) return; + + // Generate edges and read current weight. + Edge e = getEdge(FirstBB, SecondBB); + Edge n1 = getEdge(FirstBB, NewBB); + Edge n2 = getEdge(NewBB, SecondBB); + EdgeWeights &ECs = J->second; + double w = ECs[e]; + + int succ_count = 0; + if (!MergeIdenticalEdges) { + // First count the edges from FristBB to SecondBB, if there is more than + // one, only slice out a proporional part for NewBB. + for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB); + BBI != BBE; ++BBI) { + if (*BBI == SecondBB) succ_count++; + } + // When the NewBB is completely new, increment the count by one so that + // the counts are properly distributed. + if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++; + } else { + // When the edges are merged anyway, then redirect all flow. + succ_count = 1; + } + // We know now how many edges there are from FirstBB to SecondBB, reroute a + // proportional part of the edge weight over NewBB. + double neww = w / succ_count; + ECs[n1] += neww; + ECs[n2] += neww; + BlockInformation[F][NewBB] += neww; + if (succ_count == 1) { + ECs.erase(e); + } else { + ECs[e] -= neww; + } +} + +raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) { + O << "("; + O << (E.first ? E.first->getNameStr() : "0"); + O << ","; + O << (E.second ? E.second->getNameStr() : "0"); + return O << ")"; +} //===----------------------------------------------------------------------===// // NoProfile ProfileInfo implementation diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp index adb2bdc425491..25481b2ee671e 100644 --- a/lib/Analysis/ProfileInfoLoader.cpp +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -16,7 +16,7 @@ #include "llvm/Analysis/ProfileInfoTypes.h" #include "llvm/Module.h" #include "llvm/InstrTypes.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -26,10 +26,17 @@ using namespace llvm; // static inline unsigned ByteSwap(unsigned Var, bool Really) { if (!Really) return Var; - return ((Var & (255<< 0)) << 24) | - ((Var & (255<< 8)) << 8) | - ((Var & (255<<16)) >> 8) | - ((Var & (255<<24)) >> 24); + return ((Var & (255U<< 0U)) << 24U) | + ((Var & (255U<< 8U)) << 8U) | + ((Var & (255U<<16U)) >> 8U) | + ((Var & (255U<<24U)) >> 24U); +} + +static unsigned AddCounts(unsigned A, unsigned B) { + // If either value is undefined, use the other. + if (A == ProfileInfoLoader::Uncounted) return B; + if (B == ProfileInfoLoader::Uncounted) return A; + return A + B; } static void ReadProfilingBlock(const char *ToolName, FILE *F, @@ -38,7 +45,7 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F, // Read the number of entries... unsigned NumEntries; if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) { - cerr << ToolName << ": data packet truncated!\n"; + errs() << ToolName << ": data packet truncated!\n"; perror(0); exit(1); } @@ -49,35 +56,41 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F, // Read in the block of data... if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) { - cerr << ToolName << ": data packet truncated!\n"; + errs() << ToolName << ": data packet truncated!\n"; perror(0); exit(1); } - // Make sure we have enough space... + // Make sure we have enough space... The space is initialised to -1 to + // facitiltate the loading of missing values for OptimalEdgeProfiling. if (Data.size() < NumEntries) - Data.resize(NumEntries); + Data.resize(NumEntries, ProfileInfoLoader::Uncounted); // Accumulate the data we just read into the data. if (!ShouldByteSwap) { - for (unsigned i = 0; i != NumEntries; ++i) - Data[i] += TempSpace[i]; + for (unsigned i = 0; i != NumEntries; ++i) { + Data[i] = AddCounts(TempSpace[i], Data[i]); + } } else { - for (unsigned i = 0; i != NumEntries; ++i) - Data[i] += ByteSwap(TempSpace[i], true); + for (unsigned i = 0; i != NumEntries; ++i) { + Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]); + } } } +const unsigned ProfileInfoLoader::Uncounted = ~0U; + // ProfileInfoLoader ctor - Read the specified profiling data file, exiting the // program if the file is invalid or broken. // ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, const std::string &Filename, - Module &TheModule) : - M(TheModule), Warned(false) { - FILE *F = fopen(Filename.c_str(), "r"); + Module &TheModule) : + Filename(Filename), + M(TheModule), Warned(false) { + FILE *F = fopen(Filename.c_str(), "rb"); if (F == 0) { - cerr << ToolName << ": Error opening '" << Filename << "': "; + errs() << ToolName << ": Error opening '" << Filename << "': "; perror(0); exit(1); } @@ -95,7 +108,7 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, case ArgumentInfo: { unsigned ArgLength; if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) { - cerr << ToolName << ": arguments packet truncated!\n"; + errs() << ToolName << ": arguments packet truncated!\n"; perror(0); exit(1); } @@ -106,7 +119,7 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, if (ArgLength) if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) { - cerr << ToolName << ": arguments packet truncated!\n"; + errs() << ToolName << ": arguments packet truncated!\n"; perror(0); exit(1); } @@ -126,12 +139,16 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); break; + case OptEdgeInfo: + ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts); + break; + case BBTraceInfo: ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace); break; default: - cerr << ToolName << ": Unknown packet type #" << PacketType << "!\n"; + errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n"; exit(1); } } @@ -139,139 +156,3 @@ ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, fclose(F); } - -// getFunctionCounts - This method is used by consumers of function counting -// information. If we do not directly have function count information, we -// compute it from other, more refined, types of profile information. -// -void ProfileInfoLoader::getFunctionCounts(std::vector > &Counts) { - if (FunctionCounts.empty()) { - if (hasAccurateBlockCounts()) { - // Synthesize function frequency information from the number of times - // their entry blocks were executed. - std::vector > BlockCounts; - getBlockCounts(BlockCounts); - - for (unsigned i = 0, e = BlockCounts.size(); i != e; ++i) - if (&BlockCounts[i].first->getParent()->getEntryBlock() == - BlockCounts[i].first) - Counts.push_back(std::make_pair(BlockCounts[i].first->getParent(), - BlockCounts[i].second)); - } else { - cerr << "Function counts are not available!\n"; - } - return; - } - - unsigned Counter = 0; - for (Module::iterator I = M.begin(), E = M.end(); - I != E && Counter != FunctionCounts.size(); ++I) - if (!I->isDeclaration()) - Counts.push_back(std::make_pair(I, FunctionCounts[Counter++])); -} - -// getBlockCounts - This method is used by consumers of block counting -// information. If we do not directly have block count information, we -// compute it from other, more refined, types of profile information. -// -void ProfileInfoLoader::getBlockCounts(std::vector > &Counts) { - if (BlockCounts.empty()) { - if (hasAccurateEdgeCounts()) { - // Synthesize block count information from edge frequency information. - // The block execution frequency is equal to the sum of the execution - // frequency of all outgoing edges from a block. - // - // If a block has no successors, this will not be correct, so we have to - // special case it. :( - std::vector > EdgeCounts; - getEdgeCounts(EdgeCounts); - - std::map InEdgeFreqs; - - BasicBlock *LastBlock = 0; - TerminatorInst *TI = 0; - for (unsigned i = 0, e = EdgeCounts.size(); i != e; ++i) { - if (EdgeCounts[i].first.first != LastBlock) { - LastBlock = EdgeCounts[i].first.first; - TI = LastBlock->getTerminator(); - Counts.push_back(std::make_pair(LastBlock, 0)); - } - Counts.back().second += EdgeCounts[i].second; - unsigned SuccNum = EdgeCounts[i].first.second; - if (SuccNum >= TI->getNumSuccessors()) { - if (!Warned) { - cerr << "WARNING: profile info doesn't seem to match" - << " the program!\n"; - Warned = true; - } - } else { - // If this successor has no successors of its own, we will never - // compute an execution count for that block. Remember the incoming - // edge frequencies to add later. - BasicBlock *Succ = TI->getSuccessor(SuccNum); - if (Succ->getTerminator()->getNumSuccessors() == 0) - InEdgeFreqs[Succ] += EdgeCounts[i].second; - } - } - - // Now we have to accumulate information for those blocks without - // successors into our table. - for (std::map::iterator I = InEdgeFreqs.begin(), - E = InEdgeFreqs.end(); I != E; ++I) { - unsigned i = 0; - for (; i != Counts.size() && Counts[i].first != I->first; ++i) - /*empty*/; - if (i == Counts.size()) Counts.push_back(std::make_pair(I->first, 0)); - Counts[i].second += I->second; - } - - } else { - cerr << "Block counts are not available!\n"; - } - return; - } - - unsigned Counter = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - Counts.push_back(std::make_pair(BB, BlockCounts[Counter++])); - if (Counter == BlockCounts.size()) - return; - } -} - -// getEdgeCounts - This method is used by consumers of edge counting -// information. If we do not directly have edge count information, we compute -// it from other, more refined, types of profile information. -// -void ProfileInfoLoader::getEdgeCounts(std::vector > &Counts) { - if (EdgeCounts.empty()) { - cerr << "Edge counts not available, and no synthesis " - << "is implemented yet!\n"; - return; - } - - unsigned Counter = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (unsigned i = 0, e = BB->getTerminator()->getNumSuccessors(); - i != e; ++i) { - Counts.push_back(std::make_pair(Edge(BB, i), EdgeCounts[Counter++])); - if (Counter == EdgeCounts.size()) - return; - } -} - -// getBBTrace - This method is used by consumers of basic-block trace -// information. -// -void ProfileInfoLoader::getBBTrace(std::vector &Trace) { - if (BBTrace.empty ()) { - cerr << "Basic block trace is not available!\n"; - return; - } - cerr << "Basic block trace loading is not implemented yet!\n"; -} diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 0a8a87bd0f97a..89d90bca2166d 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -11,18 +11,27 @@ // loads the information from a profile dump file. // //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "profile-loader" #include "llvm/BasicBlock.h" #include "llvm/InstrTypes.h" +#include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" +#include using namespace llvm; +STATISTIC(NumEdgesRead, "The # of edges read."); + static cl::opt ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), cl::value_desc("filename"), @@ -31,6 +40,9 @@ ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), namespace { class VISIBILITY_HIDDEN LoaderPass : public ModulePass, public ProfileInfo { std::string Filename; + std::set SpanningTree; + std::set BBisUnvisited; + unsigned ReadCount; public: static char ID; // Class identification, replacement for typeinfo explicit LoaderPass(const std::string &filename = "") @@ -46,6 +58,12 @@ namespace { return "Profiling information loader"; } + // recurseBasicBlock() - Calculates the edge weights for as much basic + // blocks as possbile. + virtual void recurseBasicBlock(const BasicBlock *BB); + virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, unsigned &); + virtual void readEdge(ProfileInfo::Edge, std::vector&); + /// run - Load the profile information from the specified file. virtual bool runOnModule(Module &M); }; @@ -66,25 +84,210 @@ Pass *llvm::createProfileLoaderPass(const std::string &Filename) { return new LoaderPass(Filename); } +void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, + unsigned &uncalc, unsigned &count) { + double w; + if ((w = getEdgeWeight(edge)) == MissingValue) { + tocalc = edge; + uncalc++; + } else { + count+=w; + } +} + +// recurseBasicBlock - Visits all neighbours of a block and then tries to +// calculate the missing edge values. +void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { + + // break recursion if already visited + if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return; + BBisUnvisited.erase(BB); + if (!BB) return; + + for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } + for (pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi) { + recurseBasicBlock(*bbi); + } + + Edge edgetocalc; + unsigned uncalculated = 0; + + // collect weights of all incoming and outgoing edges, rememer edges that + // have no value + unsigned incount = 0; + SmallSet pred_visited; + pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + if (bbi==bbe) { + readEdgeOrRemember(getEdge(0, BB),edgetocalc,uncalculated,incount); + } + for (;bbi != bbe; ++bbi) { + if (pred_visited.insert(*bbi)) { + readEdgeOrRemember(getEdge(*bbi, BB),edgetocalc,uncalculated,incount); + } + } + + unsigned outcount = 0; + SmallSet succ_visited; + succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); + if (sbbi==sbbe) { + readEdgeOrRemember(getEdge(BB, 0),edgetocalc,uncalculated,outcount); + } + for (;sbbi != sbbe; ++sbbi) { + if (succ_visited.insert(*sbbi)) { + readEdgeOrRemember(getEdge(BB, *sbbi),edgetocalc,uncalculated,outcount); + } + } + + // if exactly one edge weight was missing, calculate it and remove it from + // spanning tree + if (uncalculated == 1) { + if (incount < outcount) { + EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; + } else { + EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; + } + DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": " + << format("%g", getEdgeWeight(edgetocalc)) << "\n"); + SpanningTree.erase(edgetocalc); + } +} + +void LoaderPass::readEdge(ProfileInfo::Edge e, + std::vector &ECs) { + if (ReadCount < ECs.size()) { + double weight = ECs[ReadCount++]; + if (weight != ProfileInfoLoader::Uncounted) { + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also representable + // in double. + EdgeInformation[getFunction(e)][e] += (double)weight; + + DEBUG(errs() << "--Read Edge Counter for " << e + << " (# "<< (ReadCount-1) << "): " + << (unsigned)getEdgeWeight(e) << "\n"); + } else { + // This happens only if reading optimal profiling information, not when + // reading regular profiling information. + SpanningTree.insert(e); + } + } +} + bool LoaderPass::runOnModule(Module &M) { ProfileInfoLoader PIL("profile-loader", Filename, M); - EdgeCounts.clear(); - bool PrintedWarning = false; - - std::vector > ECs; - PIL.getEdgeCounts(ECs); - for (unsigned i = 0, e = ECs.size(); i != e; ++i) { - BasicBlock *BB = ECs[i].first.first; - unsigned SuccNum = ECs[i].first.second; - TerminatorInst *TI = BB->getTerminator(); - if (SuccNum >= TI->getNumSuccessors()) { - if (!PrintedWarning) { - cerr << "WARNING: profile information is inconsistent with " + + EdgeInformation.clear(); + std::vector Counters = PIL.getRawEdgeCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(errs()<<"Working on "<getNameStr()<<"\n"); + readEdge(getEdge(0,&F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); + } + } + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " << "the current program!\n"; - PrintedWarning = true; + } + NumEdgesRead = ReadCount; + } + + Counters = PIL.getRawOptimalEdgeCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(errs()<<"Working on "<getNameStr()<<"\n"); + readEdge(getEdge(0,&F->getEntryBlock()), Counters); + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + readEdge(getEdge(BB,0), Counters); + } + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); + } } - } else { - EdgeCounts[std::make_pair(BB, TI->getSuccessor(SuccNum))]+= ECs[i].second; + while (SpanningTree.size() > 0) { +#if 0 + unsigned size = SpanningTree.size(); +#endif + BBisUnvisited.clear(); + for (std::set::iterator ei = SpanningTree.begin(), + ee = SpanningTree.end(); ei != ee; ++ei) { + BBisUnvisited.insert(ei->first); + BBisUnvisited.insert(ei->second); + } + while (BBisUnvisited.size() > 0) { + recurseBasicBlock(*BBisUnvisited.begin()); + } +#if 0 + if (SpanningTree.size() == size) { + DEBUG(errs()<<"{"); + for (std::set::iterator ei = SpanningTree.begin(), + ee = SpanningTree.end(); ei != ee; ++ei) { + DEBUG(errs()<<"("<<(ei->first?ei->first->getName():"0")<<"," + <<(ei->second?ei->second->getName():"0")<<"),"); + } + assert(0 && "No edge calculated!"); + } +#endif + } + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + NumEdgesRead = ReadCount; + } + + BlockInformation.clear(); + Counters = PIL.getRawBlockCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + if (ReadCount < Counters.size()) + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also + // representable in double. + BlockInformation[F][BB] = (double)Counters[ReadCount++]; + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; + } + } + + FunctionInformation.clear(); + Counters = PIL.getRawFunctionCounts(); + if (Counters.size() > 0) { + ReadCount = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + if (ReadCount < Counters.size()) + // Here the data realm changes from the unsigned of the file to the + // double of the ProfileInfo. This conversion is save because we know + // that everything thats representable in unsinged is also + // representable in double. + FunctionInformation[F] = (double)Counters[ReadCount++]; + } + if (ReadCount != Counters.size()) { + errs() << "WARNING: profile information is inconsistent with " + << "the current program!\n"; } } diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp new file mode 100644 index 0000000000000..9766da5992df0 --- /dev/null +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -0,0 +1,343 @@ +//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that checks profiling information for +// plausibility. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "profile-verifier" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include +using namespace llvm; + +static cl::opt +ProfileVerifierDisableAssertions("profile-verifier-noassert", + cl::desc("Disable assertions")); + +namespace { + class VISIBILITY_HIDDEN ProfileVerifierPass : public FunctionPass { + + struct DetailedBlockInfo { + const BasicBlock *BB; + double BBWeight; + double inWeight; + int inCount; + double outWeight; + int outCount; + }; + + ProfileInfo *PI; + std::set BBisVisited; + std::set FisVisited; + bool DisableAssertions; + + // When debugging is enabled, the verifier prints a whole slew of debug + // information, otherwise its just the assert. These are all the helper + // functions. + bool PrintedDebugTree; + std::set BBisPrinted; + void debugEntry(DetailedBlockInfo*); + void printDebugInfo(const BasicBlock *BB); + + public: + static char ID; // Class identification, replacement for typeinfo + + explicit ProfileVerifierPass () : FunctionPass(&ID) { + DisableAssertions = ProfileVerifierDisableAssertions; + } + explicit ProfileVerifierPass (bool da) : FunctionPass(&ID), + DisableAssertions(da) { + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + } + + const char *getPassName() const { + return "Profiling information verifier"; + } + + /// run - Verify the profile information. + bool runOnFunction(Function &F); + void recurseBasicBlock(const BasicBlock*); + + bool exitReachable(const Function*); + double ReadOrAssert(ProfileInfo::Edge); + void CheckValue(bool, const char*, DetailedBlockInfo*); + }; +} // End of anonymous namespace + +char ProfileVerifierPass::ID = 0; +static RegisterPass +X("profile-verifier", "Verify profiling information", false, true); + +namespace llvm { + FunctionPass *createProfileVerifierPass() { + return new ProfileVerifierPass(ProfileVerifierDisableAssertions); + } +} + +void ProfileVerifierPass::printDebugInfo(const BasicBlock *BB) { + + if (BBisPrinted.find(BB) != BBisPrinted.end()) return; + + double BBWeight = PI->getExecutionCount(BB); + if (BBWeight == ProfileInfo::MissingValue) { BBWeight = 0; } + double inWeight = 0; + int inCount = 0; + std::set ProcessedPreds; + for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedPreds.insert(*bbi).second) { + ProfileInfo::Edge E = PI->getEdge(*bbi,BB); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; } + errs() << "calculated in-edge " << E << ": " << EdgeWeight << "\n"; + inWeight += EdgeWeight; + inCount++; + } + } + double outWeight = 0; + int outCount = 0; + std::set ProcessedSuccs; + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + if (ProcessedSuccs.insert(*bbi).second) { + ProfileInfo::Edge E = PI->getEdge(BB,*bbi); + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; } + errs() << "calculated out-edge " << E << ": " << EdgeWeight << "\n"; + outWeight += EdgeWeight; + outCount++; + } + } + errs()<<"Block "<getNameStr()<<" in "<getParent()->getNameStr() + <<",BBWeight="<BB->getNameStr() << " in " + << DI->BB->getParent()->getNameStr() << ":"; + errs() << "BBWeight=" << DI->BBWeight << ","; + errs() << "inWeight=" << DI->inWeight << ","; + errs() << "inCount=" << DI->inCount << ","; + errs() << "outWeight=" << DI->outWeight << ","; + errs() << "outCount=" << DI->outCount << "\n"; + if (!PrintedDebugTree) { + PrintedDebugTree = true; + printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); + } +} + +// This compares A and B but considering maybe small differences. +static bool Equals(double A, double B) { + double maxRelativeError = 0.0000001; + if (A == B) + return true; + double relativeError; + if (fabs(B) > fabs(A)) + relativeError = fabs((A - B) / B); + else + relativeError = fabs((A - B) / A); + if (relativeError <= maxRelativeError) return true; + return false; +} + +// This checks if the function "exit" is reachable from an given function +// via calls, this is necessary to check if a profile is valid despite the +// counts not fitting exactly. +bool ProfileVerifierPass::exitReachable(const Function *F) { + if (!F) return false; + + if (FisVisited.count(F)) return false; + + Function *Exit = F->getParent()->getFunction("exit"); + if (Exit == F) { + return true; + } + + FisVisited.insert(F); + bool exits = false; + for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (const CallInst *CI = dyn_cast(&*I)) { + exits |= exitReachable(CI->getCalledFunction()); + if (exits) break; + } + } + return exits; +} + +#define ASSERTMESSAGE(M) \ + errs() << (M) << "\n"; \ + if (!DisableAssertions) assert(0 && (M)); + +double ProfileVerifierPass::ReadOrAssert(ProfileInfo::Edge E) { + double EdgeWeight = PI->getEdgeWeight(E); + if (EdgeWeight == ProfileInfo::MissingValue) { + errs() << "Edge " << E << " in Function " + << ProfileInfo::getFunction(E)->getNameStr() << ": "; + ASSERTMESSAGE("ASSERT:Edge has missing value"); + return 0; + } else { + return EdgeWeight; + } +} + +void ProfileVerifierPass::CheckValue(bool Error, const char *Message, + DetailedBlockInfo *DI) { + if (Error) { + DEBUG(debugEntry(DI)); + errs() << "Block " << DI->BB->getNameStr() << " in Function " + << DI->BB->getParent()->getNameStr() << ": "; + ASSERTMESSAGE(Message); + } + return; +} + +// This calculates the Information for a block and then recurses into the +// successors. +void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) { + + // Break the recursion by remembering all visited blocks. + if (BBisVisited.find(BB) != BBisVisited.end()) return; + + // Use a data structure to store all the information, this can then be handed + // to debug printers. + DetailedBlockInfo DI; + DI.BB = BB; + DI.outCount = DI.inCount = DI.inWeight = DI.outWeight = 0; + + // Read predecessors. + std::set ProcessedPreds; + pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB); + // If there are none, check for (0,BB) edge. + if (bpi == bpe) { + DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); + DI.inCount++; + } + for (;bpi != bpe; ++bpi) { + if (ProcessedPreds.insert(*bpi).second) { + DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); + DI.inCount++; + } + } + + // Read successors. + std::set ProcessedSuccs; + succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + // If there is an (0,BB) edge, consider it too. (This is done not only when + // there are no successors, but every time; not every function contains + // return blocks with no successors (think loop latch as return block)). + double w = PI->getEdgeWeight(PI->getEdge(BB,0)); + if (w != ProfileInfo::MissingValue) { + DI.outWeight += w; + DI.outCount++; + } + for (;bbi != bbe; ++bbi) { + if (ProcessedSuccs.insert(*bbi).second) { + DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); + DI.outCount++; + } + } + + // Read block weight. + DI.BBWeight = PI->getExecutionCount(BB); + CheckValue(DI.BBWeight == ProfileInfo::MissingValue, + "ASSERT:BasicBlock has missing value", &DI); + + // Check if this block is a setjmp target. + bool isSetJmpTarget = false; + if (DI.outWeight > DI.inWeight) { + for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast(&*i)) { + Function *F = CI->getCalledFunction(); + if (F && (F->getNameStr() == "_setjmp")) { + isSetJmpTarget = true; break; + } + } + } + } + // Check if this block is eventually reaching exit. + bool isExitReachable = false; + if (DI.inWeight > DI.outWeight) { + for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end(); + i != ie; ++i) { + if (const CallInst *CI = dyn_cast(&*i)) { + FisVisited.clear(); + isExitReachable |= exitReachable(CI->getCalledFunction()); + if (isExitReachable) break; + } + } + } + + if (DI.inCount > 0 && DI.outCount == 0) { + // If this is a block with no successors. + if (!isSetJmpTarget) { + CheckValue(!Equals(DI.inWeight,DI.BBWeight), + "ASSERT:inWeight and BBWeight do not match", &DI); + } + } else if (DI.inCount == 0 && DI.outCount > 0) { + // If this is a block with no predecessors. + if (!isExitReachable) + CheckValue(!Equals(DI.BBWeight,DI.outWeight), + "ASSERT:BBWeight and outWeight do not match", &DI); + } else { + // If this block has successors and predecessors. + if (DI.inWeight > DI.outWeight && !isExitReachable) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "ASSERT:inWeight and outWeight do not match", &DI); + if (DI.inWeight < DI.outWeight && !isSetJmpTarget) + CheckValue(!Equals(DI.inWeight,DI.outWeight), + "ASSERT:inWeight and outWeight do not match", &DI); + } + + + // Mark this block as visited, rescurse into successors. + BBisVisited.insert(BB); + for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); + bbi != bbe; ++bbi ) { + recurseBasicBlock(*bbi); + } +} + +bool ProfileVerifierPass::runOnFunction(Function &F) { + PI = &getAnalysis(); + + // Prepare global variables. + PrintedDebugTree = false; + BBisVisited.clear(); + + // Fetch entry block and recurse into it. + const BasicBlock *entry = &F.getEntryBlock(); + recurseBasicBlock(entry); + + if (!DisableAssertions) + assert((PI->getExecutionCount(&F)==PI->getExecutionCount(entry)) && + "Function count and entry block count do not match"); + return false; +} diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt new file mode 100644 index 0000000000000..c401090272994 --- /dev/null +++ b/lib/Analysis/README.txt @@ -0,0 +1,18 @@ +Analysis Opportunities: + +//===---------------------------------------------------------------------===// + +In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the +ScalarEvolution expression for %r is this: + + {1,+,3,+,2} + +Outside the loop, this could be evaluated simply as (%n * %n), however +ScalarEvolution currently evaluates it as + + (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n)) + +In addition to being much more complicated, it involves i65 arithmetic, +which is very inefficient when expanded into code. + +//===---------------------------------------------------------------------===// diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 408156265d242..62f3aa1dcae4d 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -14,9 +14,8 @@ // There are several aspects to this library. First is the representation of // scalar expressions, which are represented as subclasses of the SCEV class. // These classes are used to represent certain types of subexpressions that we -// can handle. These classes are reference counted, managed by the const SCEV* -// class. We only create one SCEV of a particular shape, so pointer-comparisons -// for equality are legal. +// can handle. We only create one SCEV of a particular shape, so +// pointer-comparisons for equality are legal. // // One important aspect of the SCEV objects is that they are never cyclic, even // if there is a cycle in the dataflow for an expression (ie, a PHI node). If @@ -64,7 +63,10 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" @@ -74,12 +76,14 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ConstantRange.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include using namespace llvm; @@ -118,11 +122,6 @@ void SCEV::dump() const { errs() << '\n'; } -void SCEV::print(std::ostream &o) const { - raw_os_ostream OS(o); - print(OS); -} - bool SCEV::isZero() const { if (const SCEVConstant *SC = dyn_cast(this)) return SC->getValue()->isZero(); @@ -142,33 +141,26 @@ bool SCEV::isAllOnesValue() const { } SCEVCouldNotCompute::SCEVCouldNotCompute() : - SCEV(scCouldNotCompute) {} - -void SCEVCouldNotCompute::Profile(FoldingSetNodeID &ID) const { - assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); -} + SCEV(FoldingSetNodeID(), scCouldNotCompute) {} bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { - assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); return false; } const Type *SCEVCouldNotCompute::getType() const { - assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); return 0; } bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const { - assert(0 && "Attempt to use a SCEVCouldNotCompute object!"); + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); return false; } -const SCEV * -SCEVCouldNotCompute::replaceSymbolicValuesWithConcrete( - const SCEV *Sym, - const SCEV *Conc, - ScalarEvolution &SE) const { - return this; +bool SCEVCouldNotCompute::hasOperand(const SCEV *) const { + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + return false; } void SCEVCouldNotCompute::print(raw_ostream &OS) const { @@ -179,30 +171,26 @@ bool SCEVCouldNotCompute::classof(const SCEV *S) { return S->getSCEVType() == scCouldNotCompute; } -const SCEV* ScalarEvolution::getConstant(ConstantInt *V) { +const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); ID.AddPointer(V); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVConstant(V); + new (S) SCEVConstant(ID, V); UniqueSCEVs.InsertNode(S, IP); return S; } -const SCEV* ScalarEvolution::getConstant(const APInt& Val) { - return getConstant(ConstantInt::get(Val)); +const SCEV *ScalarEvolution::getConstant(const APInt& Val) { + return getConstant(ConstantInt::get(getContext(), Val)); } -const SCEV* +const SCEV * ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) { - return getConstant(ConstantInt::get(cast(Ty), V, isSigned)); -} - -void SCEVConstant::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(scConstant); - ID.AddPointer(V); + return getConstant( + ConstantInt::get(cast(Ty), V, isSigned)); } const Type *SCEVConstant::getType() const { return V->getType(); } @@ -211,22 +199,21 @@ void SCEVConstant::print(raw_ostream &OS) const { WriteAsOperand(OS, V, false); } -SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy, - const SCEV* op, const Type *ty) - : SCEV(SCEVTy), Op(op), Ty(ty) {} - -void SCEVCastExpr::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(getSCEVType()); - ID.AddPointer(Op); - ID.AddPointer(Ty); -} +SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeID &ID, + unsigned SCEVTy, const SCEV *op, const Type *ty) + : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return Op->dominates(BB, DT); } -SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty) - : SCEVCastExpr(scTruncate, op, ty) { +bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + return Op->properlyDominates(BB, DT); +} + +SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scTruncate, op, ty) { assert((Op->getType()->isInteger() || isa(Op->getType())) && (Ty->isInteger() || isa(Ty)) && "Cannot truncate non-integer value!"); @@ -236,8 +223,9 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const { OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } -SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEV* op, const Type *ty) - : SCEVCastExpr(scZeroExtend, op, ty) { +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scZeroExtend, op, ty) { assert((Op->getType()->isInteger() || isa(Op->getType())) && (Ty->isInteger() || isa(Ty)) && "Cannot zero extend non-integer value!"); @@ -247,8 +235,9 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const { OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } -SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEV* op, const Type *ty) - : SCEVCastExpr(scSignExtend, op, ty) { +SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID, + const SCEV *op, const Type *ty) + : SCEVCastExpr(ID, scSignExtend, op, ty) { assert((Op->getType()->isInteger() || isa(Op->getType())) && (Ty->isInteger() || isa(Ty)) && "Cannot sign extend non-integer value!"); @@ -267,46 +256,6 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const { OS << ")"; } -const SCEV * -SCEVCommutativeExpr::replaceSymbolicValuesWithConcrete( - const SCEV *Sym, - const SCEV *Conc, - ScalarEvolution &SE) const { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const SCEV* H = - getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - if (H != getOperand(i)) { - SmallVector NewOps; - NewOps.reserve(getNumOperands()); - for (unsigned j = 0; j != i; ++j) - NewOps.push_back(getOperand(j)); - NewOps.push_back(H); - for (++i; i != e; ++i) - NewOps.push_back(getOperand(i)-> - replaceSymbolicValuesWithConcrete(Sym, Conc, SE)); - - if (isa(this)) - return SE.getAddExpr(NewOps); - else if (isa(this)) - return SE.getMulExpr(NewOps); - else if (isa(this)) - return SE.getSMaxExpr(NewOps); - else if (isa(this)) - return SE.getUMaxExpr(NewOps); - else - assert(0 && "Unknown commutative expr!"); - } - } - return this; -} - -void SCEVNAryExpr::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(getSCEVType()); - ID.AddInteger(Operands.size()); - for (unsigned i = 0, e = Operands.size(); i != e; ++i) - ID.AddPointer(Operands[i]); -} - bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (!getOperand(i)->dominates(BB, DT)) @@ -315,16 +264,22 @@ bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return true; } -void SCEVUDivExpr::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(scUDivExpr); - ID.AddPointer(LHS); - ID.AddPointer(RHS); +bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (!getOperand(i)->properlyDominates(BB, DT)) + return false; + } + return true; } bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const { return LHS->dominates(BB, DT) && RHS->dominates(BB, DT); } +bool SCEVUDivExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + return LHS->properlyDominates(BB, DT) && RHS->properlyDominates(BB, DT); +} + void SCEVUDivExpr::print(raw_ostream &OS) const { OS << "(" << *LHS << " /u " << *RHS << ")"; } @@ -338,38 +293,6 @@ const Type *SCEVUDivExpr::getType() const { return RHS->getType(); } -void SCEVAddRecExpr::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(scAddRecExpr); - ID.AddInteger(Operands.size()); - for (unsigned i = 0, e = Operands.size(); i != e; ++i) - ID.AddPointer(Operands[i]); - ID.AddPointer(L); -} - -const SCEV * -SCEVAddRecExpr::replaceSymbolicValuesWithConcrete(const SCEV *Sym, - const SCEV *Conc, - ScalarEvolution &SE) const { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const SCEV* H = - getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE); - if (H != getOperand(i)) { - SmallVector NewOps; - NewOps.reserve(getNumOperands()); - for (unsigned j = 0; j != i; ++j) - NewOps.push_back(getOperand(j)); - NewOps.push_back(H); - for (++i; i != e; ++i) - NewOps.push_back(getOperand(i)-> - replaceSymbolicValuesWithConcrete(Sym, Conc, SE)); - - return SE.getAddRecExpr(NewOps, L); - } - } - return this; -} - - bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const { // Add recurrences are never invariant in the function-body (null loop). if (!QueryLoop) @@ -396,9 +319,13 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << "}<" << L->getHeader()->getName() + ">"; } -void SCEVUnknown::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(scUnknown); - ID.AddPointer(V); +void SCEVFieldOffsetExpr::print(raw_ostream &OS) const { + // LLVM struct fields don't have names, so just print the field number. + OS << "offsetof(" << *STy << ", " << FieldNo << ")"; +} + +void SCEVAllocSizeExpr::print(raw_ostream &OS) const { + OS << "sizeof(" << *AllocTy << ")"; } bool SCEVUnknown::isLoopInvariant(const Loop *L) const { @@ -417,6 +344,12 @@ bool SCEVUnknown::dominates(BasicBlock *BB, DominatorTree *DT) const { return true; } +bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { + if (Instruction *I = dyn_cast(getValue())) + return DT->properlyDominates(I->getParent(), BB); + return true; +} + const Type *SCEVUnknown::getType() const { return V->getType(); } @@ -429,6 +362,41 @@ void SCEVUnknown::print(raw_ostream &OS) const { // SCEV Utilities //===----------------------------------------------------------------------===// +static bool CompareTypes(const Type *A, const Type *B) { + if (A->getTypeID() != B->getTypeID()) + return A->getTypeID() < B->getTypeID(); + if (const IntegerType *AI = dyn_cast(A)) { + const IntegerType *BI = cast(B); + return AI->getBitWidth() < BI->getBitWidth(); + } + if (const PointerType *AI = dyn_cast(A)) { + const PointerType *BI = cast(B); + return CompareTypes(AI->getElementType(), BI->getElementType()); + } + if (const ArrayType *AI = dyn_cast(A)) { + const ArrayType *BI = cast(B); + if (AI->getNumElements() != BI->getNumElements()) + return AI->getNumElements() < BI->getNumElements(); + return CompareTypes(AI->getElementType(), BI->getElementType()); + } + if (const VectorType *AI = dyn_cast(A)) { + const VectorType *BI = cast(B); + if (AI->getNumElements() != BI->getNumElements()) + return AI->getNumElements() < BI->getNumElements(); + return CompareTypes(AI->getElementType(), BI->getElementType()); + } + if (const StructType *AI = dyn_cast(A)) { + const StructType *BI = cast(B); + if (AI->getNumElements() != BI->getNumElements()) + return AI->getNumElements() < BI->getNumElements(); + for (unsigned i = 0, e = AI->getNumElements(); i != e; ++i) + if (CompareTypes(AI->getElementType(i), BI->getElementType(i)) || + CompareTypes(BI->getElementType(i), AI->getElementType(i))) + return CompareTypes(AI->getElementType(i), BI->getElementType(i)); + } + return false; +} + namespace { /// SCEVComplexityCompare - Return true if the complexity of the LHS is less /// than the complexity of the RHS. This comparator is used to canonicalize @@ -439,6 +407,10 @@ namespace { explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {} bool operator()(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return false; + // Primarily, sort the SCEVs by their getSCEVType(). if (LHS->getSCEVType() != RHS->getSCEVType()) return LHS->getSCEVType() < RHS->getSCEVType(); @@ -495,6 +467,8 @@ namespace { // Compare constant values. if (const SCEVConstant *LC = dyn_cast(LHS)) { const SCEVConstant *RC = cast(RHS); + if (LC->getValue()->getBitWidth() != RC->getValue()->getBitWidth()) + return LC->getValue()->getBitWidth() < RC->getValue()->getBitWidth(); return LC->getValue()->getValue().ult(RC->getValue()->getValue()); } @@ -539,7 +513,22 @@ namespace { return operator()(LC->getOperand(), RC->getOperand()); } - assert(0 && "Unknown SCEV kind!"); + // Compare offsetof expressions. + if (const SCEVFieldOffsetExpr *LA = dyn_cast(LHS)) { + const SCEVFieldOffsetExpr *RA = cast(RHS); + if (CompareTypes(LA->getStructType(), RA->getStructType()) || + CompareTypes(RA->getStructType(), LA->getStructType())) + return CompareTypes(LA->getStructType(), RA->getStructType()); + return LA->getFieldNo() < RA->getFieldNo(); + } + + // Compare sizeof expressions by the allocation type. + if (const SCEVAllocSizeExpr *LA = dyn_cast(LHS)) { + const SCEVAllocSizeExpr *RA = cast(RHS); + return CompareTypes(LA->getAllocType(), RA->getAllocType()); + } + + llvm_unreachable("Unknown SCEV kind!"); return false; } }; @@ -555,7 +544,7 @@ namespace { /// this to depend on where the addresses of various SCEV objects happened to /// land in memory. /// -static void GroupByComplexity(SmallVectorImpl &Ops, +static void GroupByComplexity(SmallVectorImpl &Ops, LoopInfo *LI) { if (Ops.size() < 2) return; // Noop if (Ops.size() == 2) { @@ -598,9 +587,9 @@ static void GroupByComplexity(SmallVectorImpl &Ops, /// BinomialCoefficient - Compute BC(It, K). The result has width W. /// Assume, K > 0. -static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K, - ScalarEvolution &SE, - const Type* ResultTy) { +static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, + ScalarEvolution &SE, + const Type* ResultTy) { // Handle the simplest case efficiently. if (K == 1) return SE.getTruncateOrZeroExtend(It, ResultTy); @@ -690,16 +679,17 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K, MultiplyFactor = MultiplyFactor.trunc(W); // Calculate the product, at width T+W - const IntegerType *CalculationTy = IntegerType::get(CalculationBits); - const SCEV* Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); + const IntegerType *CalculationTy = IntegerType::get(SE.getContext(), + CalculationBits); + const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); for (unsigned i = 1; i != K; ++i) { - const SCEV* S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType())); + const SCEV *S = SE.getMinusSCEV(It, SE.getIntegerSCEV(i, It->getType())); Dividend = SE.getMulExpr(Dividend, SE.getTruncateOrZeroExtend(S, CalculationTy)); } // Divide by 2^T - const SCEV* DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); + const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); // Truncate the result, and divide by K! / 2^T. @@ -716,14 +706,14 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K, /// /// where BC(It, k) stands for binomial coefficient. /// -const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It, - ScalarEvolution &SE) const { - const SCEV* Result = getStart(); +const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, + ScalarEvolution &SE) const { + const SCEV *Result = getStart(); for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { // The computation is correct in the face of overflow provided that the // multiplication is performed _after_ the evaluation of the binomial // coefficient. - const SCEV* Coeff = BinomialCoefficient(It, i, SE, getType()); + const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); if (isa(Coeff)) return Coeff; @@ -736,14 +726,21 @@ const SCEV* SCEVAddRecExpr::evaluateAtIteration(const SCEV* It, // SCEV Expression folder implementations //===----------------------------------------------------------------------===// -const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op, - const Type *Ty) { +const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && "This is not a truncating conversion!"); assert(isSCEVable(Ty) && "This is not a conversion to a SCEVable type!"); Ty = getEffectiveSCEVType(Ty); + FoldingSetNodeID ID; + ID.AddInteger(scTruncate); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( @@ -763,26 +760,23 @@ const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op, // If the input value is a chrec scev, truncate the chrec's operands. if (const SCEVAddRecExpr *AddRec = dyn_cast(Op)) { - SmallVector Operands; + SmallVector Operands; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); return getAddRecExpr(Operands, AddRec->getLoop()); } - FoldingSetNodeID ID; - ID.AddInteger(scTruncate); - ID.AddPointer(Op); - ID.AddPointer(Ty); - void *IP = 0; + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVTruncateExpr(Op, Ty); + new (S) SCEVTruncateExpr(ID, Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } -const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, - const Type *Ty) { +const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -801,12 +795,33 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) return getZeroExtendExpr(SZ->getOperand(), Ty); + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scZeroExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can zero extend all of the // operands (often constants). This allows analysis of something like // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } if (const SCEVAddRecExpr *AR = dyn_cast(Op)) if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->hasNoUnsignedWrap()) + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is @@ -815,28 +830,25 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. - const SCEV* Start = AR->getStart(); - const SCEV* Step = AR->getStepRecurrence(*this); // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. - const SCEV* CastedMaxBECount = + const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType()); - const SCEV* RecastedMaxBECount = + const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { - const Type *WideTy = - IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. - const SCEV* ZMul = + const SCEV *ZMul = getMulExpr(CastedMaxBECount, getTruncateOrZeroExtend(Step, Start->getType())); - const SCEV* Add = getAddExpr(Start, ZMul); - const SCEV* OperandExtendedAdd = + const SCEV *Add = getAddExpr(Start, ZMul); + const SCEV *OperandExtendedAdd = getAddExpr(getZeroExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getZeroExtendExpr(Step, WideTy))); @@ -844,11 +856,11 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - AR->getLoop()); + L); // Similar to above, only this time treat the step value as signed. // This covers loops that count down. - const SCEV* SMul = + const SCEV *SMul = getMulExpr(CastedMaxBECount, getTruncateOrSignExtend(Step, Start->getType())); Add = getAddExpr(Start, SMul); @@ -860,25 +872,50 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op, // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - AR->getLoop()); + L); + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - + getUnsignedRange(Step).getUnsignedMax()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || + (isLoopGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + } else if (isKnownNegative(Step)) { + const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - + getSignedRange(Step).getSignedMin()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) && + (isLoopGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) || + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getZeroExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); } } } - FoldingSetNodeID ID; - ID.AddInteger(scZeroExtend); - ID.AddPointer(Op); - ID.AddPointer(Ty); - void *IP = 0; + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVZeroExtendExpr(Op, Ty); + new (S) SCEVZeroExtendExpr(ID, Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } -const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, - const Type *Ty) { +const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -897,12 +934,33 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) return getSignExtendExpr(SS->getOperand(), Ty); + // Before doing any expensive analysis, check to see if we've already + // computed a SCEV for this Op and Ty. + FoldingSetNodeID ID; + ID.AddInteger(scSignExtend); + ID.AddPointer(Op); + ID.AddPointer(Ty); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } if (const SCEVAddRecExpr *AR = dyn_cast(Op)) if (AR->isAffine()) { + const SCEV *Start = AR->getStart(); + const SCEV *Step = AR->getStepRecurrence(*this); + unsigned BitWidth = getTypeSizeInBits(AR->getType()); + const Loop *L = AR->getLoop(); + + // If we have special knowledge that this addrec won't overflow, + // we don't need to do any further analysis. + if (AR->hasNoSignedWrap()) + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are // simply not analyzable, and it covers the case where this code is @@ -911,28 +969,25 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, // in infinite recursion. In the later case, the analysis code will // cope with a conservative value, and it will take care to purge // that value once it has finished. - const SCEV* MaxBECount = getMaxBackedgeTakenCount(AR->getLoop()); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(L); if (!isa(MaxBECount)) { // Manually compute the final value for AR, checking for // overflow. - const SCEV* Start = AR->getStart(); - const SCEV* Step = AR->getStepRecurrence(*this); // Check whether the backedge-taken count can be losslessly casted to // the addrec's type. The count is always unsigned. - const SCEV* CastedMaxBECount = + const SCEV *CastedMaxBECount = getTruncateOrZeroExtend(MaxBECount, Start->getType()); - const SCEV* RecastedMaxBECount = + const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType()); if (MaxBECount == RecastedMaxBECount) { - const Type *WideTy = - IntegerType::get(getTypeSizeInBits(Start->getType()) * 2); + const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. - const SCEV* SMul = + const SCEV *SMul = getMulExpr(CastedMaxBECount, getTruncateOrSignExtend(Step, Start->getType())); - const SCEV* Add = getAddExpr(Start, SMul); - const SCEV* OperandExtendedAdd = + const SCEV *Add = getAddExpr(Start, SMul); + const SCEV *OperandExtendedAdd = getAddExpr(getSignExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getSignExtendExpr(Step, WideTy))); @@ -940,19 +995,60 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - AR->getLoop()); + L); + + // Similar to above, only this time treat the step value as unsigned. + // This covers loops that count up with an unsigned step. + const SCEV *UMul = + getMulExpr(CastedMaxBECount, + getTruncateOrZeroExtend(Step, Start->getType())); + Add = getAddExpr(Start, UMul); + OperandExtendedAdd = + getAddExpr(getSignExtendExpr(Start, WideTy), + getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getZeroExtendExpr(Step, WideTy))); + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getZeroExtendExpr(Step, Ty), + L); + } + + // If the backedge is guarded by a comparison with the pre-inc value + // the addrec is safe. Also, if the entry is guarded by a comparison + // with the start value and the backedge is guarded by a comparison + // with the post-inc value, the addrec is safe. + if (isKnownPositive(Step)) { + const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) - + getSignedRange(Step).getSignedMax()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || + (isLoopGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); + } else if (isKnownNegative(Step)) { + const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - + getSignedRange(Step).getSignedMin()); + if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || + (isLoopGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && + isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, + AR->getPostIncExpr(*this), N))) + // Return the expression with the addrec on the outside. + return getAddRecExpr(getSignExtendExpr(Start, Ty), + getSignExtendExpr(Step, Ty), + L); } } } - FoldingSetNodeID ID; - ID.AddInteger(scSignExtend); - ID.AddPointer(Op); - ID.AddPointer(Ty); - void *IP = 0; + // The cast wasn't folded; create an explicit cast node. + // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVSignExtendExpr(Op, Ty); + new (S) SCEVSignExtendExpr(ID, Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -960,8 +1056,8 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op, /// getAnyExtendExpr - Return a SCEV for the given operand extended with /// unspecified bits out to the given type. /// -const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op, - const Type *Ty) { +const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, + const Type *Ty) { assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && "This is not an extending conversion!"); assert(isSCEVable(Ty) && @@ -975,19 +1071,19 @@ const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op, // Peel off a truncate cast. if (const SCEVTruncateExpr *T = dyn_cast(Op)) { - const SCEV* NewOp = T->getOperand(); + const SCEV *NewOp = T->getOperand(); if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) return getAnyExtendExpr(NewOp, Ty); return getTruncateOrNoop(NewOp, Ty); } // Next try a zext cast. If the cast is folded, use it. - const SCEV* ZExt = getZeroExtendExpr(Op, Ty); + const SCEV *ZExt = getZeroExtendExpr(Op, Ty); if (!isa(ZExt)) return ZExt; // Next try a sext cast. If the cast is folded, use it. - const SCEV* SExt = getSignExtendExpr(Op, Ty); + const SCEV *SExt = getSignExtendExpr(Op, Ty); if (!isa(SExt)) return SExt; @@ -1025,10 +1121,10 @@ const SCEV* ScalarEvolution::getAnyExtendExpr(const SCEV* Op, /// is also used as a check to avoid infinite recursion. /// static bool -CollectAddOperandsWithScales(DenseMap &M, - SmallVector &NewOps, +CollectAddOperandsWithScales(DenseMap &M, + SmallVector &NewOps, APInt &AccumulatedConstant, - const SmallVectorImpl &Ops, + const SmallVectorImpl &Ops, const APInt &Scale, ScalarEvolution &SE) { bool Interesting = false; @@ -1049,9 +1145,9 @@ CollectAddOperandsWithScales(DenseMap &M, } else { // A multiplication of a constant with some other value. Update // the map. - SmallVector MulOps(Mul->op_begin()+1, Mul->op_end()); - const SCEV* Key = SE.getMulExpr(MulOps); - std::pair::iterator, bool> Pair = + SmallVector MulOps(Mul->op_begin()+1, Mul->op_end()); + const SCEV *Key = SE.getMulExpr(MulOps); + std::pair::iterator, bool> Pair = M.insert(std::make_pair(Key, NewScale)); if (Pair.second) { NewOps.push_back(Pair.first->first); @@ -1069,7 +1165,7 @@ CollectAddOperandsWithScales(DenseMap &M, AccumulatedConstant += Scale * C->getValue()->getValue(); } else { // An ordinary operand. Update the map. - std::pair::iterator, bool> Pair = + std::pair::iterator, bool> Pair = M.insert(std::make_pair(Ops[i], Scale)); if (Pair.second) { NewOps.push_back(Pair.first->first); @@ -1095,7 +1191,8 @@ namespace { /// getAddExpr - Get a canonical add expression, or something simpler if /// possible. -const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { +const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, + bool HasNUW, bool HasNSW) { assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1139,13 +1236,13 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 // Found a match, merge the two values into a multiply, and add any // remaining values to the result. - const SCEV* Two = getIntegerSCEV(2, Ty); - const SCEV* Mul = getMulExpr(Ops[i], Two); + const SCEV *Two = getIntegerSCEV(2, Ty); + const SCEV *Mul = getMulExpr(Ops[i], Two); if (Ops.size() == 2) return Mul; Ops.erase(Ops.begin()+i, Ops.begin()+i+2); Ops.push_back(Mul); - return getAddExpr(Ops); + return getAddExpr(Ops, HasNUW, HasNSW); } // Check for truncates. If all the operands are truncated from the same @@ -1156,7 +1253,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { const SCEVTruncateExpr *Trunc = cast(Ops[Idx]); const Type *DstType = Trunc->getType(); const Type *SrcType = Trunc->getOperand()->getType(); - SmallVector LargeOps; + SmallVector LargeOps; bool Ok = true; // Check all the operands to see if they can be represented in the // source type of the truncate. @@ -1172,7 +1269,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { // is much more likely to be foldable here. LargeOps.push_back(getSignExtendExpr(C, SrcType)); } else if (const SCEVMulExpr *M = dyn_cast(Ops[i])) { - SmallVector LargeMulOps; + SmallVector LargeMulOps; for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { if (const SCEVTruncateExpr *T = dyn_cast(M->getOperand(j))) { @@ -1200,7 +1297,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { } if (Ok) { // Evaluate the expression in the larger type. - const SCEV* Fold = getAddExpr(LargeOps); + const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW); // If it folds to something simple, use it. Otherwise, don't. if (isa(Fold) || isa(Fold)) return getTruncateExpr(Fold, DstType); @@ -1237,16 +1334,16 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { // operands multiplied by constant values. if (Idx < Ops.size() && isa(Ops[Idx])) { uint64_t BitWidth = getTypeSizeInBits(Ty); - DenseMap M; - SmallVector NewOps; + DenseMap M; + SmallVector NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops, APInt(BitWidth, 1), *this)) { // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. - std::map, APIntCompare> MulOpLists; - for (SmallVector::iterator I = NewOps.begin(), + std::map, APIntCompare> MulOpLists; + for (SmallVector::iterator I = NewOps.begin(), E = NewOps.end(); I != E; ++I) MulOpLists[M.find(*I)->second].push_back(*I); // Re-generate the operands list. @@ -1276,17 +1373,17 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) if (MulOpSCEV == Ops[AddOp] && !isa(Ops[AddOp])) { // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) - const SCEV* InnerMul = Mul->getOperand(MulOp == 0); + const SCEV *InnerMul = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { // If the multiply has more than two operands, we must get the // Y*Z term. - SmallVector MulOps(Mul->op_begin(), Mul->op_end()); + SmallVector MulOps(Mul->op_begin(), Mul->op_end()); MulOps.erase(MulOps.begin()+MulOp); InnerMul = getMulExpr(MulOps); } - const SCEV* One = getIntegerSCEV(1, Ty); - const SCEV* AddOne = getAddExpr(InnerMul, One); - const SCEV* OuterMul = getMulExpr(AddOne, Ops[AddOp]); + const SCEV *One = getIntegerSCEV(1, Ty); + const SCEV *AddOne = getAddExpr(InnerMul, One); + const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]); if (Ops.size() == 2) return OuterMul; if (AddOp < Idx) { Ops.erase(Ops.begin()+AddOp); @@ -1310,22 +1407,22 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { OMulOp != e; ++OMulOp) if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) - const SCEV* InnerMul1 = Mul->getOperand(MulOp == 0); + const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { SmallVector MulOps(Mul->op_begin(), Mul->op_end()); MulOps.erase(MulOps.begin()+MulOp); InnerMul1 = getMulExpr(MulOps); } - const SCEV* InnerMul2 = OtherMul->getOperand(OMulOp == 0); + const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); if (OtherMul->getNumOperands() != 2) { SmallVector MulOps(OtherMul->op_begin(), OtherMul->op_end()); MulOps.erase(MulOps.begin()+OMulOp); InnerMul2 = getMulExpr(MulOps); } - const SCEV* InnerMulSum = getAddExpr(InnerMul1,InnerMul2); - const SCEV* OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); if (Ops.size() == 2) return OuterMul; Ops.erase(Ops.begin()+Idx); Ops.erase(Ops.begin()+OtherMulIdx-1); @@ -1346,7 +1443,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { // Scan all of the other operands to this add and add them to the vector if // they are loop invariant w.r.t. the recurrence. - SmallVector LIOps; + SmallVector LIOps; const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { @@ -1360,11 +1457,11 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} LIOps.push_back(AddRec->getStart()); - SmallVector AddRecOps(AddRec->op_begin(), + SmallVector AddRecOps(AddRec->op_begin(), AddRec->op_end()); AddRecOps[0] = getAddExpr(LIOps); - const SCEV* NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1396,7 +1493,7 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { } NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i)); } - const SCEV* NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRec->getLoop()); if (Ops.size() == 2) return NewAddRec; @@ -1420,16 +1517,19 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl &Ops) { ID.AddPointer(Ops[i]); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVAddExpr(Ops); + SCEVAddExpr *S = SCEVAllocator.Allocate(); + new (S) SCEVAddExpr(ID, Ops); UniqueSCEVs.InsertNode(S, IP); + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); return S; } /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. -const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl &Ops) { +const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, + bool HasNUW, bool HasNSW) { assert(!Ops.empty() && "Cannot get empty mul!"); #ifndef NDEBUG for (unsigned i = 1, e = Ops.size(); i != e; ++i) @@ -1457,7 +1557,8 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl &Ops) { ++Idx; while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() * + ConstantInt *Fold = ConstantInt::get(getContext(), + LHSC->getValue()->getValue() * RHSC->getValue()->getValue()); Ops[0] = getConstant(Fold); Ops.erase(Ops.begin()+1); // Erase the folded element @@ -1510,7 +1611,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl &Ops) { for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { // Scan all of the other operands to this mul and add them to the vector if // they are loop invariant w.r.t. the recurrence. - SmallVector LIOps; + SmallVector LIOps; const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (Ops[i]->isLoopInvariant(AddRec->getLoop())) { @@ -1522,7 +1623,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl &Ops) { // If we found some loop invariants, fold them into the recurrence. if (!LIOps.empty()) { // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} - SmallVector NewOps; + SmallVector NewOps; NewOps.reserve(AddRec->getNumOperands()); if (LIOps.size() == 1) { const SCEV *Scale = LIOps[0]; @@ -1530,13 +1631,13 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl &Ops) { NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); } else { for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { - SmallVector MulOps(LIOps.begin(), LIOps.end()); + SmallVector MulOps(LIOps.begin(), LIOps.end()); MulOps.push_back(AddRec->getOperand(i)); NewOps.push_back(getMulExpr(MulOps)); } } - const SCEV* NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1560,14 +1661,14 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl &Ops) { if (AddRec->getLoop() == OtherAddRec->getLoop()) { // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D} const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec; - const SCEV* NewStart = getMulExpr(F->getStart(), + const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart()); - const SCEV* B = F->getStepRecurrence(*this); - const SCEV* D = G->getStepRecurrence(*this); - const SCEV* NewStep = getAddExpr(getMulExpr(F, D), + const SCEV *B = F->getStepRecurrence(*this); + const SCEV *D = G->getStepRecurrence(*this); + const SCEV *NewStep = getAddExpr(getMulExpr(F, D), getMulExpr(G, B), getMulExpr(B, D)); - const SCEV* NewAddRec = getAddRecExpr(NewStart, NewStep, + const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, F->getLoop()); if (Ops.size() == 2) return NewAddRec; @@ -1591,14 +1692,16 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl &Ops) { ID.AddPointer(Ops[i]); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVMulExpr(Ops); + SCEVMulExpr *S = SCEVAllocator.Allocate(); + new (S) SCEVMulExpr(ID, Ops); UniqueSCEVs.InsertNode(S, IP); + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); return S; } -/// getUDivExpr - Get a canonical multiply expression, or something simpler if -/// possible. +/// getUDivExpr - Get a canonical unsigned division expression, or something +/// simpler if possible. const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, const SCEV *RHS) { assert(getEffectiveSCEVType(LHS->getType()) == @@ -1607,7 +1710,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, if (const SCEVConstant *RHSC = dyn_cast(RHS)) { if (RHSC->getValue()->equalsInt(1)) - return LHS; // X udiv 1 --> x + return LHS; // X udiv 1 --> x if (RHSC->isZero()) return getIntegerSCEV(0, LHS->getType()); // value is undefined @@ -1622,7 +1725,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, if (!RHSC->getValue()->getValue().isPowerOf2()) ++MaxShiftAmt; const IntegerType *ExtTy = - IntegerType::get(getTypeSizeInBits(Ty) + MaxShiftAmt); + IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) if (const SCEVConstant *Step = @@ -1633,24 +1736,24 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), AR->getLoop())) { - SmallVector Operands; + SmallVector Operands; for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); return getAddRecExpr(Operands, AR->getLoop()); } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast(LHS)) { - SmallVector Operands; + SmallVector Operands; for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) // Find an operand that's safely divisible. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { - const SCEV* Op = M->getOperand(i); - const SCEV* Div = getUDivExpr(Op, RHSC); + const SCEV *Op = M->getOperand(i); + const SCEV *Div = getUDivExpr(Op, RHSC); if (!isa(Div) && getMulExpr(Div, RHSC) == Op) { - const SmallVectorImpl &MOperands = M->getOperands(); - Operands = SmallVector(MOperands.begin(), + const SmallVectorImpl &MOperands = M->getOperands(); + Operands = SmallVector(MOperands.begin(), MOperands.end()); Operands[i] = Div; return getMulExpr(Operands); @@ -1659,13 +1762,13 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. if (const SCEVAddRecExpr *A = dyn_cast(LHS)) { - SmallVector Operands; + SmallVector Operands; for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { Operands.clear(); for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { - const SCEV* Op = getUDivExpr(A->getOperand(i), RHS); + const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); if (isa(Op) || getMulExpr(Op, RHS) != A->getOperand(i)) break; Operands.push_back(Op); @@ -1691,7 +1794,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVUDivExpr(LHS, RHS); + new (S) SCEVUDivExpr(ID, LHS, RHS); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -1699,9 +1802,10 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. -const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start, - const SCEV* Step, const Loop *L) { - SmallVector Operands; +const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, + const SCEV *Step, const Loop *L, + bool HasNUW, bool HasNSW) { + SmallVector Operands; Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast(Step)) if (StepChrec->getLoop() == L) { @@ -1711,14 +1815,15 @@ const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start, } Operands.push_back(Step); - return getAddRecExpr(Operands, L); + return getAddRecExpr(Operands, L, HasNUW, HasNSW); } /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. const SCEV * -ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, - const Loop *L) { +ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, + const Loop *L, + bool HasNUW, bool HasNSW) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG for (unsigned i = 1, e = Operands.size(); i != e; ++i) @@ -1729,14 +1834,14 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, if (Operands.back()->isZero()) { Operands.pop_back(); - return getAddRecExpr(Operands, L); // {X,+,0} --> X + return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X } // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast(Operands[0])) { const Loop* NestedLoop = NestedAR->getLoop(); if (L->getLoopDepth() < NestedLoop->getLoopDepth()) { - SmallVector NestedOperands(NestedAR->op_begin(), + SmallVector NestedOperands(NestedAR->op_begin(), NestedAR->op_end()); Operands[0] = NestedAR->getStart(); // AddRecs require their operands be loop-invariant with respect to their @@ -1758,7 +1863,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, } if (AllInvariant) // Ok, both add recurrences are valid after the transformation. - return getAddRecExpr(NestedOperands, NestedLoop); + return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW); } // Reset Operands to its original state. Operands[0] = NestedAR; @@ -1773,22 +1878,24 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, ID.AddPointer(L); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVAddRecExpr(Operands, L); + SCEVAddRecExpr *S = SCEVAllocator.Allocate(); + new (S) SCEVAddRecExpr(ID, Operands, L); UniqueSCEVs.InsertNode(S, IP); + if (HasNUW) S->setHasNoUnsignedWrap(true); + if (HasNSW) S->setHasNoSignedWrap(true); return S; } const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { - SmallVector Ops; + SmallVector Ops; Ops.push_back(LHS); Ops.push_back(RHS); return getSMaxExpr(Ops); } -const SCEV* -ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { +const SCEV * +ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { assert(!Ops.empty() && "Cannot get empty smax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1808,7 +1915,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get( + ConstantInt *Fold = ConstantInt::get(getContext(), APIntOps::smax(LHSC->getValue()->getValue(), RHSC->getValue()->getValue())); Ops[0] = getConstant(Fold); @@ -1871,21 +1978,21 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVSMaxExpr(Ops); + new (S) SCEVSMaxExpr(ID, Ops); UniqueSCEVs.InsertNode(S, IP); return S; } const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { - SmallVector Ops; + SmallVector Ops; Ops.push_back(LHS); Ops.push_back(RHS); return getUMaxExpr(Ops); } -const SCEV* -ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { +const SCEV * +ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { assert(!Ops.empty() && "Cannot get empty umax!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1905,7 +2012,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { assert(Idx < Ops.size()); while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get( + ConstantInt *Fold = ConstantInt::get(getContext(), APIntOps::umax(LHSC->getValue()->getValue(), RHSC->getValue()->getValue())); Ops[0] = getConstant(Fold); @@ -1968,7 +2075,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVUMaxExpr(Ops); + new (S) SCEVUMaxExpr(ID, Ops); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -1985,7 +2092,77 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV* ScalarEvolution::getUnknown(Value *V) { +const SCEV *ScalarEvolution::getFieldOffsetExpr(const StructType *STy, + unsigned FieldNo) { + // If we have TargetData we can determine the constant offset. + if (TD) { + const Type *IntPtrTy = TD->getIntPtrType(getContext()); + const StructLayout &SL = *TD->getStructLayout(STy); + uint64_t Offset = SL.getElementOffset(FieldNo); + return getIntegerSCEV(Offset, IntPtrTy); + } + + // Field 0 is always at offset 0. + if (FieldNo == 0) { + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + return getIntegerSCEV(0, Ty); + } + + // Okay, it looks like we really DO need an offsetof expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scFieldOffset); + ID.AddPointer(STy); + ID.AddInteger(FieldNo); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = SCEVAllocator.Allocate(); + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); + new (S) SCEVFieldOffsetExpr(ID, Ty, STy, FieldNo); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getAllocSizeExpr(const Type *AllocTy) { + // If we have TargetData we can determine the constant size. + if (TD && AllocTy->isSized()) { + const Type *IntPtrTy = TD->getIntPtrType(getContext()); + return getIntegerSCEV(TD->getTypeAllocSize(AllocTy), IntPtrTy); + } + + // Expand an array size into the element size times the number + // of elements. + if (const ArrayType *ATy = dyn_cast(AllocTy)) { + const SCEV *E = getAllocSizeExpr(ATy->getElementType()); + return getMulExpr( + E, getConstant(ConstantInt::get(cast(E->getType()), + ATy->getNumElements()))); + } + + // Expand a vector size into the element size times the number + // of elements. + if (const VectorType *VTy = dyn_cast(AllocTy)) { + const SCEV *E = getAllocSizeExpr(VTy->getElementType()); + return getMulExpr( + E, getConstant(ConstantInt::get(cast(E->getType()), + VTy->getNumElements()))); + } + + // Okay, it looks like we really DO need a sizeof expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; + ID.AddInteger(scAllocSize); + ID.AddPointer(AllocTy); + void *IP = 0; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + SCEV *S = SCEVAllocator.Allocate(); + const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + new (S) SCEVAllocSizeExpr(ID, Ty, AllocTy); + UniqueSCEVs.InsertNode(S, IP); + return S; +} + +const SCEV *ScalarEvolution::getUnknown(Value *V) { // Don't attempt to do anything other than create a SCEVUnknown object // here. createSCEV only calls getUnknown after checking for all other // interesting possibilities, and any other code that calls getUnknown @@ -1997,7 +2174,7 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) { void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = SCEVAllocator.Allocate(); - new (S) SCEVUnknown(V); + new (S) SCEVUnknown(ID, V); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -2011,17 +2188,8 @@ const SCEV* ScalarEvolution::getUnknown(Value *V) { /// can optionally include pointer types if the ScalarEvolution class /// has access to target-specific information. bool ScalarEvolution::isSCEVable(const Type *Ty) const { - // Integers are always SCEVable. - if (Ty->isInteger()) - return true; - - // Pointers are SCEVable if TargetData information is available - // to provide pointer size information. - if (isa(Ty)) - return TD != NULL; - - // Otherwise it's not SCEVable. - return false; + // Integers and pointers are always SCEVable. + return Ty->isInteger() || isa(Ty); } /// getTypeSizeInBits - Return the size in bits of the specified type, @@ -2033,9 +2201,14 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const { if (TD) return TD->getTypeSizeInBits(Ty); - // Otherwise, we support only integer types. - assert(Ty->isInteger() && "isSCEVable permitted a non-SCEVable type!"); - return Ty->getPrimitiveSizeInBits(); + // Integer types have fixed sizes. + if (Ty->isInteger()) + return Ty->getPrimitiveSizeInBits(); + + // The only other support type is pointer. Without TargetData, conservatively + // assume pointers are 64-bit. + assert(isa(Ty) && "isSCEVable permitted a non-SCEVable type!"); + return 64; } /// getEffectiveSCEVType - Return a type with the same bitwidth as @@ -2048,58 +2221,60 @@ const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const { if (Ty->isInteger()) return Ty; + // The only other support type is pointer. assert(isa(Ty) && "Unexpected non-pointer non-integer type!"); - return TD->getIntPtrType(); -} + if (TD) return TD->getIntPtrType(getContext()); -const SCEV* ScalarEvolution::getCouldNotCompute() { - return &CouldNotCompute; + // Without TargetData, conservatively assume pointers are 64-bit. + return Type::getInt64Ty(getContext()); } -/// hasSCEV - Return true if the SCEV for this value has already been -/// computed. -bool ScalarEvolution::hasSCEV(Value *V) const { - return Scalars.count(V); +const SCEV *ScalarEvolution::getCouldNotCompute() { + return &CouldNotCompute; } /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. -const SCEV* ScalarEvolution::getSCEV(Value *V) { +const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - std::map::iterator I = Scalars.find(V); + std::map::iterator I = Scalars.find(V); if (I != Scalars.end()) return I->second; - const SCEV* S = createSCEV(V); + const SCEV *S = createSCEV(V); Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S)); return S; } /// getIntegerSCEV - Given a SCEVable type, create a constant for the /// specified signed integer value and return a SCEV for the constant. -const SCEV* ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { +const SCEV *ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) { const IntegerType *ITy = cast(getEffectiveSCEVType(Ty)); return getConstant(ConstantInt::get(ITy, Val)); } /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V /// -const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) { +const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { if (const SCEVConstant *VC = dyn_cast(V)) - return getConstant(cast(ConstantExpr::getNeg(VC->getValue()))); + return getConstant( + cast(ConstantExpr::getNeg(VC->getValue()))); const Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - return getMulExpr(V, getConstant(ConstantInt::getAllOnesValue(Ty))); + return getMulExpr(V, + getConstant(cast(Constant::getAllOnesValue(Ty)))); } /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V -const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) { +const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { if (const SCEVConstant *VC = dyn_cast(V)) - return getConstant(cast(ConstantExpr::getNot(VC->getValue()))); + return getConstant( + cast(ConstantExpr::getNot(VC->getValue()))); const Type *Ty = V->getType(); Ty = getEffectiveSCEVType(Ty); - const SCEV* AllOnes = getConstant(ConstantInt::getAllOnesValue(Ty)); + const SCEV *AllOnes = + getConstant(cast(Constant::getAllOnesValue(Ty))); return getMinusSCEV(AllOnes, V); } @@ -2114,12 +2289,12 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is zero /// extended. -const SCEV* -ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V, +const SCEV * +ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && - (Ty->isInteger() || (TD && isa(Ty))) && + assert((SrcTy->isInteger() || isa(SrcTy)) && + (Ty->isInteger() || isa(Ty)) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -2131,12 +2306,12 @@ ScalarEvolution::getTruncateOrZeroExtend(const SCEV* V, /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is sign /// extended. -const SCEV* -ScalarEvolution::getTruncateOrSignExtend(const SCEV* V, +const SCEV * +ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && - (Ty->isInteger() || (TD && isa(Ty))) && + assert((SrcTy->isInteger() || isa(SrcTy)) && + (Ty->isInteger() || isa(Ty)) && "Cannot truncate or zero extend with non-integer arguments!"); if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) return V; // No conversion @@ -2148,11 +2323,11 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV* V, /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is zero /// extended. The conversion must not be narrowing. -const SCEV* -ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) { +const SCEV * +ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && - (Ty->isInteger() || (TD && isa(Ty))) && + assert((SrcTy->isInteger() || isa(SrcTy)) && + (Ty->isInteger() || isa(Ty)) && "Cannot noop or zero extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrZeroExtend cannot truncate!"); @@ -2164,11 +2339,11 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV* V, const Type *Ty) { /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the /// input value to the specified type. If the type must be extended, it is sign /// extended. The conversion must not be narrowing. -const SCEV* -ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) { +const SCEV * +ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && - (Ty->isInteger() || (TD && isa(Ty))) && + assert((SrcTy->isInteger() || isa(SrcTy)) && + (Ty->isInteger() || isa(Ty)) && "Cannot noop or sign extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrSignExtend cannot truncate!"); @@ -2181,11 +2356,11 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV* V, const Type *Ty) { /// the input value to the specified type. If the type must be extended, /// it is extended with unspecified bits. The conversion must not be /// narrowing. -const SCEV* -ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) { +const SCEV * +ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && - (Ty->isInteger() || (TD && isa(Ty))) && + assert((SrcTy->isInteger() || isa(SrcTy)) && + (Ty->isInteger() || isa(Ty)) && "Cannot noop or any extend with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && "getNoopOrAnyExtend cannot truncate!"); @@ -2196,11 +2371,11 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV* V, const Type *Ty) { /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the /// input value to the specified type. The conversion must not be widening. -const SCEV* -ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) { +const SCEV * +ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) { const Type *SrcTy = V->getType(); - assert((SrcTy->isInteger() || (TD && isa(SrcTy))) && - (Ty->isInteger() || (TD && isa(Ty))) && + assert((SrcTy->isInteger() || isa(SrcTy)) && + (Ty->isInteger() || isa(Ty)) && "Cannot truncate or noop with non-integer arguments!"); assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && "getTruncateOrNoop cannot extend!"); @@ -2214,8 +2389,8 @@ ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) { /// with them. const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS) { - const SCEV* PromotedLHS = LHS; - const SCEV* PromotedRHS = RHS; + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); @@ -2230,8 +2405,8 @@ const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, /// with them. const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS) { - const SCEV* PromotedLHS = LHS; - const SCEV* PromotedRHS = RHS; + const SCEV *PromotedLHS = LHS; + const SCEV *PromotedRHS = RHS; if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); @@ -2241,34 +2416,60 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, return getUMinExpr(PromotedLHS, PromotedRHS); } -/// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for -/// the specified instruction and replaces any references to the symbolic value -/// SymName with the specified value. This is used during PHI resolution. +/// PushDefUseChildren - Push users of the given Instruction +/// onto the given Worklist. +static void +PushDefUseChildren(Instruction *I, + SmallVectorImpl &Worklist) { + // Push the def-use children onto the Worklist stack. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) + Worklist.push_back(cast(UI)); +} + +/// ForgetSymbolicValue - This looks up computed SCEV values for all +/// instructions that depend on the given instruction and removes them from +/// the Scalars map if they reference SymName. This is used during PHI +/// resolution. void -ScalarEvolution::ReplaceSymbolicValueWithConcrete(Instruction *I, - const SCEV *SymName, - const SCEV *NewVal) { - std::map::iterator SI = - Scalars.find(SCEVCallbackVH(I, this)); - if (SI == Scalars.end()) return; +ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) { + SmallVector Worklist; + PushDefUseChildren(I, Worklist); - const SCEV* NV = - SI->second->replaceSymbolicValuesWithConcrete(SymName, NewVal, *this); - if (NV == SI->second) return; // No change. + SmallPtrSet Visited; + Visited.insert(I); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; - SI->second = NV; // Update the scalars map! + std::map::iterator It = + Scalars.find(static_cast(I)); + if (It != Scalars.end()) { + // Short-circuit the def-use traversal if the symbolic name + // ceases to appear in expressions. + if (!It->second->hasOperand(SymName)) + continue; + + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, or it's a PHI that's in the progress of being computed + // by createNodeForPHI. In the former case, additional loop trip + // count information isn't going to change anything. In the later + // case, createNodeForPHI will perform the necessary updates on its + // own when it gets to that point. + if (!isa(I) || !isa(It->second)) { + ValuesAtScopes.erase(It->second); + Scalars.erase(It); + } + } - // Any instruction values that use this instruction might also need to be - // updated! - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) - ReplaceSymbolicValueWithConcrete(cast(*UI), SymName, NewVal); + PushDefUseChildren(I, Worklist); + } } /// createNodeForPHI - PHI nodes have two cases. Either the PHI node exists in /// a loop header, making it a potential recurrence, or it doesn't. /// -const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { +const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (PN->getNumIncomingValues() == 2) // The loops have been canonicalized. if (const Loop *L = LI->getLoopFor(PN->getParent())) if (L->getHeader() == PN->getParent()) { @@ -2278,14 +2479,15 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { unsigned BackEdge = IncomingEdge^1; // While we are analyzing this PHI node, handle its value symbolically. - const SCEV* SymbolicName = getUnknown(PN); + const SCEV *SymbolicName = getUnknown(PN); assert(Scalars.find(PN) == Scalars.end() && "PHI node already processed?"); Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); // Using this symbolic name for the PHI, analyze the value coming around // the back-edge. - const SCEV* BEValue = getSCEV(PN->getIncomingValue(BackEdge)); + Value *BEValueV = PN->getIncomingValue(BackEdge); + const SCEV *BEValue = getSCEV(BEValueV); // NOTE: If BEValue is loop invariant, we know that the PHI node just // has a special value for the first iteration of the loop. @@ -2305,11 +2507,11 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { if (FoundIndex != Add->getNumOperands()) { // Create an add with everything but the specified operand. - SmallVector Ops; + SmallVector Ops; for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) if (i != FoundIndex) Ops.push_back(Add->getOperand(i)); - const SCEV* Accum = getAddExpr(Ops); + const SCEV *Accum = getAddExpr(Ops); // This is not a valid addrec if the step amount is varying each // loop iteration, but is not itself an addrec in this loop. @@ -2318,15 +2520,35 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { cast(Accum)->getLoop() == L)) { const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); - const SCEV *PHISCEV = - getAddRecExpr(StartVal, Accum, L); + const SCEVAddRecExpr *PHISCEV = + cast(getAddRecExpr(StartVal, Accum, L)); + + // If the increment doesn't overflow, then neither the addrec nor the + // post-increment will overflow. + if (const AddOperator *OBO = dyn_cast(BEValueV)) + if (OBO->getOperand(0) == PN && + getSCEV(OBO->getOperand(1)) == + PHISCEV->getStepRecurrence(*this)) { + const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this); + if (OBO->hasNoUnsignedWrap()) { + const_cast(PHISCEV) + ->setHasNoUnsignedWrap(true); + const_cast(PostInc) + ->setHasNoUnsignedWrap(true); + } + if (OBO->hasNoSignedWrap()) { + const_cast(PHISCEV) + ->setHasNoSignedWrap(true); + const_cast(PostInc) + ->setHasNoSignedWrap(true); + } + } // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and update all of the - // entries for the scalars that use the PHI (except for the PHI - // itself) to use the new analyzed value instead of the "symbolic" - // value. - ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV); + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + Scalars[SCEVCallbackVH(PN, this)] = PHISCEV; return PHISCEV; } } @@ -2338,21 +2560,20 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { // Because the other in-value of i (0) fits the evolution of BEValue // i really is an addrec evolution. if (AddRec->getLoop() == L && AddRec->isAffine()) { - const SCEV* StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); + const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); // If StartVal = j.start - j.stride, we can use StartVal as the // initial step of the addrec evolution. if (StartVal == getMinusSCEV(AddRec->getOperand(0), AddRec->getOperand(1))) { - const SCEV* PHISCEV = + const SCEV *PHISCEV = getAddRecExpr(StartVal, AddRec->getOperand(1), L); // Okay, for the entire analysis of this edge we assumed the PHI - // to be symbolic. We now need to go back and update all of the - // entries for the scalars that use the PHI (except for the PHI - // itself) to use the new analyzed value instead of the "symbolic" - // value. - ReplaceSymbolicValueWithConcrete(PN, SymbolicName, PHISCEV); + // to be symbolic. We now need to go back and purge all of the + // entries for the scalars that use the symbolic expression. + ForgetSymbolicName(PN, SymbolicName); + Scalars[SCEVCallbackVH(PN, this)] = PHISCEV; return PHISCEV; } } @@ -2361,6 +2582,10 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { return SymbolicName; } + // It's tempting to recognize PHIs with a unique incoming value, however + // this leads passes like indvars to break LCSSA form. Fortunately, such + // PHIs are rare, as instcombine zaps them. + // If it's not a loop phi, we can't handle it yet. return getUnknown(PN); } @@ -2368,14 +2593,14 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) { /// createNodeForGEP - Expand GEP instructions into add and multiply /// operations. This allows them to be analyzed by regular SCEV code. /// -const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) { +const SCEV *ScalarEvolution::createNodeForGEP(Operator *GEP) { - const Type *IntPtrTy = TD->getIntPtrType(); + const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. if (!cast(Base->getType())->getElementType()->isSized()) return getUnknown(GEP); - const SCEV* TotalOffset = getIntegerSCEV(0, IntPtrTy); + const SCEV *TotalOffset = getIntegerSCEV(0, IntPtrTy); gep_type_iterator GTI = gep_type_begin(GEP); for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()), E = GEP->op_end(); @@ -2384,22 +2609,16 @@ const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) { // Compute the (potentially symbolic) offset in bytes for this index. if (const StructType *STy = dyn_cast(*GTI++)) { // For a struct, add the member offset. - const StructLayout &SL = *TD->getStructLayout(STy); unsigned FieldNo = cast(Index)->getZExtValue(); - uint64_t Offset = SL.getElementOffset(FieldNo); TotalOffset = getAddExpr(TotalOffset, - getIntegerSCEV(Offset, IntPtrTy)); + getFieldOffsetExpr(STy, FieldNo)); } else { // For an array, add the element offset, explicitly scaled. - const SCEV* LocalOffset = getSCEV(Index); + const SCEV *LocalOffset = getSCEV(Index); if (!isa(LocalOffset->getType())) // Getelementptr indicies are signed. - LocalOffset = getTruncateOrSignExtend(LocalOffset, - IntPtrTy); - LocalOffset = - getMulExpr(LocalOffset, - getIntegerSCEV(TD->getTypeAllocSize(*GTI), - IntPtrTy)); + LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); + LocalOffset = getMulExpr(LocalOffset, getAllocSizeExpr(*GTI)); TotalOffset = getAddExpr(TotalOffset, LocalOffset); } } @@ -2411,7 +2630,7 @@ const SCEV* ScalarEvolution::createNodeForGEP(User *GEP) { /// the minimum number of times S is divisible by 2. For example, given {4,+,8} /// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S. uint32_t -ScalarEvolution::GetMinTrailingZeros(const SCEV* S) { +ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVConstant *C = dyn_cast(S)) return C->getValue()->getValue().countTrailingZeros(); @@ -2487,18 +2706,100 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV* S) { return 0; } -uint32_t -ScalarEvolution::GetMinLeadingZeros(const SCEV* S) { - // TODO: Handle other SCEV expression types here. +/// getUnsignedRange - Determine the unsigned range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVConstant *C = dyn_cast(S)) - return C->getValue()->getValue().countLeadingZeros(); + return ConstantRange(C->getValue()->getValue()); + + if (const SCEVAddExpr *Add = dyn_cast(S)) { + ConstantRange X = getUnsignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getUnsignedRange(Add->getOperand(i))); + return X; + } + + if (const SCEVMulExpr *Mul = dyn_cast(S)) { + ConstantRange X = getUnsignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getUnsignedRange(Mul->getOperand(i))); + return X; + } + + if (const SCEVSMaxExpr *SMax = dyn_cast(S)) { + ConstantRange X = getUnsignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getUnsignedRange(SMax->getOperand(i))); + return X; + } + + if (const SCEVUMaxExpr *UMax = dyn_cast(S)) { + ConstantRange X = getUnsignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getUnsignedRange(UMax->getOperand(i))); + return X; + } - if (const SCEVZeroExtendExpr *C = dyn_cast(S)) { - // A zero-extension cast adds zero bits. - return GetMinLeadingZeros(C->getOperand()) + - (getTypeSizeInBits(C->getType()) - - getTypeSizeInBits(C->getOperand()->getType())); + if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { + ConstantRange X = getUnsignedRange(UDiv->getLHS()); + ConstantRange Y = getUnsignedRange(UDiv->getRHS()); + return X.udiv(Y); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast(S)) { + ConstantRange X = getUnsignedRange(ZExt->getOperand()); + return X.zeroExtend(cast(ZExt->getType())->getBitWidth()); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast(S)) { + ConstantRange X = getUnsignedRange(SExt->getOperand()); + return X.signExtend(cast(SExt->getType())->getBitWidth()); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast(S)) { + ConstantRange X = getUnsignedRange(Trunc->getOperand()); + return X.truncate(cast(Trunc->getType())->getBitWidth()); + } + + ConstantRange FullSet(getTypeSizeInBits(S->getType()), true); + + if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) { + const SCEV *T = getBackedgeTakenCount(AddRec->getLoop()); + const SCEVConstant *Trip = dyn_cast(T); + if (!Trip) return FullSet; + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + const Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + + // Check for overflow. + // TODO: This is very conservative. + if (!(Step->isOne() && + isKnownPredicate(ICmpInst::ICMP_ULT, Start, End)) && + !(Step->isAllOnesValue() && + isKnownPredicate(ICmpInst::ICMP_UGT, Start, End))) + return FullSet; + + ConstantRange StartRange = getUnsignedRange(Start); + ConstantRange EndRange = getUnsignedRange(End); + APInt Min = APIntOps::umin(StartRange.getUnsignedMin(), + EndRange.getUnsignedMin()); + APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), + EndRange.getUnsignedMax()); + if (Min.isMinValue() && Max.isMaxValue()) + return FullSet; + return ConstantRange(Min, Max+1); + } + } } if (const SCEVUnknown *U = dyn_cast(S)) { @@ -2507,73 +2808,128 @@ ScalarEvolution::GetMinLeadingZeros(const SCEV* S) { APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); - return Zeros.countLeadingOnes(); + if (Ones == ~Zeros + 1) + return FullSet; + return ConstantRange(Ones, ~Zeros + 1); } - return 1; + return FullSet; } -uint32_t -ScalarEvolution::GetMinSignBits(const SCEV* S) { - // TODO: Handle other SCEV expression types here. +/// getSignedRange - Determine the signed range for a particular SCEV. +/// +ConstantRange +ScalarEvolution::getSignedRange(const SCEV *S) { - if (const SCEVConstant *C = dyn_cast(S)) { - const APInt &A = C->getValue()->getValue(); - return A.isNegative() ? A.countLeadingOnes() : - A.countLeadingZeros(); + if (const SCEVConstant *C = dyn_cast(S)) + return ConstantRange(C->getValue()->getValue()); + + if (const SCEVAddExpr *Add = dyn_cast(S)) { + ConstantRange X = getSignedRange(Add->getOperand(0)); + for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) + X = X.add(getSignedRange(Add->getOperand(i))); + return X; } - if (const SCEVSignExtendExpr *C = dyn_cast(S)) { - // A sign-extension cast adds sign bits. - return GetMinSignBits(C->getOperand()) + - (getTypeSizeInBits(C->getType()) - - getTypeSizeInBits(C->getOperand()->getType())); + if (const SCEVMulExpr *Mul = dyn_cast(S)) { + ConstantRange X = getSignedRange(Mul->getOperand(0)); + for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) + X = X.multiply(getSignedRange(Mul->getOperand(i))); + return X; } - if (const SCEVAddExpr *A = dyn_cast(S)) { - unsigned BitWidth = getTypeSizeInBits(A->getType()); - - // Special case decrementing a value (ADD X, -1): - if (const SCEVConstant *CRHS = dyn_cast(A->getOperand(0))) - if (CRHS->isAllOnesValue()) { - SmallVector OtherOps(A->op_begin() + 1, A->op_end()); - const SCEV *OtherOpsAdd = getAddExpr(OtherOps); - unsigned LZ = GetMinLeadingZeros(OtherOpsAdd); - - // If the input is known to be 0 or 1, the output is 0/-1, which is all - // sign bits set. - if (LZ == BitWidth - 1) - return BitWidth; - - // If we are subtracting one from a positive number, there is no carry - // out of the result. - if (LZ > 0) - return GetMinSignBits(OtherOpsAdd); - } + if (const SCEVSMaxExpr *SMax = dyn_cast(S)) { + ConstantRange X = getSignedRange(SMax->getOperand(0)); + for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) + X = X.smax(getSignedRange(SMax->getOperand(i))); + return X; + } + + if (const SCEVUMaxExpr *UMax = dyn_cast(S)) { + ConstantRange X = getSignedRange(UMax->getOperand(0)); + for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) + X = X.umax(getSignedRange(UMax->getOperand(i))); + return X; + } + + if (const SCEVUDivExpr *UDiv = dyn_cast(S)) { + ConstantRange X = getSignedRange(UDiv->getLHS()); + ConstantRange Y = getSignedRange(UDiv->getRHS()); + return X.udiv(Y); + } + + if (const SCEVZeroExtendExpr *ZExt = dyn_cast(S)) { + ConstantRange X = getSignedRange(ZExt->getOperand()); + return X.zeroExtend(cast(ZExt->getType())->getBitWidth()); + } + + if (const SCEVSignExtendExpr *SExt = dyn_cast(S)) { + ConstantRange X = getSignedRange(SExt->getOperand()); + return X.signExtend(cast(SExt->getType())->getBitWidth()); + } + + if (const SCEVTruncateExpr *Trunc = dyn_cast(S)) { + ConstantRange X = getSignedRange(Trunc->getOperand()); + return X.truncate(cast(Trunc->getType())->getBitWidth()); + } - // Add can have at most one carry bit. Thus we know that the output - // is, at worst, one more bit than the inputs. - unsigned Min = BitWidth; - for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { - unsigned N = GetMinSignBits(A->getOperand(i)); - Min = std::min(Min, N) - 1; - if (Min == 0) return 1; + ConstantRange FullSet(getTypeSizeInBits(S->getType()), true); + + if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) { + const SCEV *T = getBackedgeTakenCount(AddRec->getLoop()); + const SCEVConstant *Trip = dyn_cast(T); + if (!Trip) return FullSet; + + // TODO: non-affine addrec + if (AddRec->isAffine()) { + const Type *Ty = AddRec->getType(); + const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); + if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) { + MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*this); + const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + + // Check for overflow. + // TODO: This is very conservative. + if (!(Step->isOne() && + isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) && + !(Step->isAllOnesValue() && + isKnownPredicate(ICmpInst::ICMP_SGT, Start, End))) + return FullSet; + + ConstantRange StartRange = getSignedRange(Start); + ConstantRange EndRange = getSignedRange(End); + APInt Min = APIntOps::smin(StartRange.getSignedMin(), + EndRange.getSignedMin()); + APInt Max = APIntOps::smax(StartRange.getSignedMax(), + EndRange.getSignedMax()); + if (Min.isMinSignedValue() && Max.isMaxSignedValue()) + return FullSet; + return ConstantRange(Min, Max+1); + } } - return 1; } if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. - return ComputeNumSignBits(U->getValue(), TD); + unsigned BitWidth = getTypeSizeInBits(U->getType()); + unsigned NS = ComputeNumSignBits(U->getValue(), TD); + if (NS == 1) + return FullSet; + return + ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), + APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1); } - return 1; + return FullSet; } /// createSCEV - We know that there is no SCEV for the specified value. /// Analyze the expression. /// -const SCEV* ScalarEvolution::createSCEV(Value *V) { +const SCEV *ScalarEvolution::createSCEV(Value *V) { if (!isSCEVable(V->getType())) return getUnknown(V); @@ -2588,15 +2944,23 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { return getIntegerSCEV(0, V->getType()); else if (isa(V)) return getIntegerSCEV(0, V->getType()); + else if (GlobalAlias *GA = dyn_cast(V)) + return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee()); else return getUnknown(V); - User *U = cast(V); + Operator *U = cast(V); switch (Opcode) { case Instruction::Add: + // Don't transfer the NSW and NUW bits from the Add instruction to the + // Add expression, because the Instruction may be guarded by control + // flow and the no-overflow bits may not be valid for the expression in + // any context. return getAddExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::Mul: + // Don't transfer the NSW and NUW bits from the Mul instruction to the + // Mul expression, as with Add. return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::UDiv: @@ -2630,7 +2994,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask)) return getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)), - IntegerType::get(BitWidth - LZ)), + IntegerType::get(getContext(), BitWidth - LZ)), U->getType()); } break; @@ -2643,11 +3007,23 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { // In order for this transformation to be safe, the LHS must be of the // form X*(2^n) and the Or constant must be less than 2^n. if (ConstantInt *CI = dyn_cast(U->getOperand(1))) { - const SCEV* LHS = getSCEV(U->getOperand(0)); + const SCEV *LHS = getSCEV(U->getOperand(0)); const APInt &CIVal = CI->getValue(); if (GetMinTrailingZeros(LHS) >= - (CIVal.getBitWidth() - CIVal.countLeadingZeros())) - return getAddExpr(LHS, getSCEV(U->getOperand(1))); + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { + // Build a plain add SCEV. + const SCEV *S = getAddExpr(LHS, getSCEV(CI)); + // If the LHS of the add was an addrec and it has no-wrap flags, + // transfer the no-wrap flags, since an or won't introduce a wrap. + if (const SCEVAddRecExpr *NewAR = dyn_cast(S)) { + const SCEVAddRecExpr *OldAR = cast(LHS); + if (OldAR->hasNoUnsignedWrap()) + const_cast(NewAR)->setHasNoUnsignedWrap(true); + if (OldAR->hasNoSignedWrap()) + const_cast(NewAR)->setHasNoSignedWrap(true); + } + return S; + } } break; case Instruction::Xor: @@ -2673,7 +3049,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { if (const SCEVZeroExtendExpr *Z = dyn_cast(getSCEV(U->getOperand(0)))) { const Type *UTy = U->getType(); - const SCEV* Z0 = Z->getOperand(); + const SCEV *Z0 = Z->getOperand(); const Type *Z0Ty = Z0->getType(); unsigned Z0TySize = getTypeSizeInBits(Z0Ty); @@ -2699,7 +3075,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { // Turn shift left of a constant amount into a multiply. if (ConstantInt *SA = dyn_cast(U->getOperand(1))) { uint32_t BitWidth = cast(V->getType())->getBitWidth(); - Constant *X = ConstantInt::get( + Constant *X = ConstantInt::get(getContext(), APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } @@ -2709,7 +3085,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { // Turn logical shift right of a constant into a unsigned divide. if (ConstantInt *SA = dyn_cast(U->getOperand(1))) { uint32_t BitWidth = cast(V->getType())->getBitWidth(); - Constant *X = ConstantInt::get( + Constant *X = ConstantInt::get(getContext(), APInt(BitWidth, 1).shl(SA->getLimitedValue(BitWidth))); return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } @@ -2729,7 +3105,7 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { return getIntegerSCEV(0, U->getType()); // value is undefined return getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)), - IntegerType::get(Amt)), + IntegerType::get(getContext(), Amt)), U->getType()); } break; @@ -2749,18 +3125,12 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { return getSCEV(U->getOperand(0)); break; - case Instruction::IntToPtr: - if (!TD) break; // Without TD we can't analyze pointers. - return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)), - TD->getIntPtrType()); - - case Instruction::PtrToInt: - if (!TD) break; // Without TD we can't analyze pointers. - return getTruncateOrZeroExtend(getSCEV(U->getOperand(0)), - U->getType()); + // It's tempting to handle inttoptr and ptrtoint, however this can + // lead to pointer expressions which cannot be expanded to GEPs + // (because they may overflow). For now, the only pointer-typed + // expressions we handle are GEPs and address literals. case Instruction::GetElementPtr: - if (!TD) break; // Without TD we can't analyze pointers. return createNodeForGEP(U); case Instruction::PHI: @@ -2842,17 +3212,29 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) { /// loop-invariant backedge-taken count (see /// hasLoopInvariantBackedgeTakenCount). /// -const SCEV* ScalarEvolution::getBackedgeTakenCount(const Loop *L) { +const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).Exact; } /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except /// return the least SCEV value that is known never to be less than the /// actual backedge taken count. -const SCEV* ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { +const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenInfo(L).Max; } +/// PushLoopPHIs - Push PHI nodes in the header of the given loop +/// onto the given Worklist. +static void +PushLoopPHIs(const Loop *L, SmallVectorImpl &Worklist) { + BasicBlock *Header = L->getHeader(); + + // Push all Loop-header PHIs onto the Worklist stack. + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast(I); ++I) + Worklist.push_back(PN); +} + const ScalarEvolution::BackedgeTakenInfo & ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Initially insert a CouldNotCompute for this loop. If the insertion @@ -2883,10 +3265,39 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // Now that we know more about the trip count for this loop, forget any // existing SCEV values for PHI nodes in this loop since they are only - // conservative estimates made without the benefit - // of trip count information. - if (ItCount.hasAnyInfo()) - forgetLoopPHIs(L); + // conservative estimates made without the benefit of trip count + // information. This is similar to the code in + // forgetLoopBackedgeTakenCount, except that it handles SCEVUnknown PHI + // nodes specially. + if (ItCount.hasAnyInfo()) { + SmallVector Worklist; + PushLoopPHIs(L, Worklist); + + SmallPtrSet Visited; + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I)) continue; + + std::map::iterator It = + Scalars.find(static_cast(I)); + if (It != Scalars.end()) { + // SCEVUnknown for a PHI either means that it has an unrecognized + // structure, or it's a PHI that's in the progress of being computed + // by createNodeForPHI. In the former case, additional loop trip + // count information isn't going to change anything. In the later + // case, createNodeForPHI will perform the necessary updates on its + // own when it gets to that point. + if (!isa(I) || !isa(It->second)) { + ValuesAtScopes.erase(It->second); + Scalars.erase(It); + } + if (PHINode *PN = dyn_cast(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); + } + } } return Pair.first->second; } @@ -2897,37 +3308,25 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { /// is deleted. void ScalarEvolution::forgetLoopBackedgeTakenCount(const Loop *L) { BackedgeTakenCounts.erase(L); - forgetLoopPHIs(L); -} -/// forgetLoopPHIs - Delete the memoized SCEVs associated with the -/// PHI nodes in the given loop. This is used when the trip count of -/// the loop may have changed. -void ScalarEvolution::forgetLoopPHIs(const Loop *L) { - BasicBlock *Header = L->getHeader(); - - // Push all Loop-header PHIs onto the Worklist stack, except those - // that are presently represented via a SCEVUnknown. SCEVUnknown for - // a PHI either means that it has an unrecognized structure, or it's - // a PHI that's in the progress of being computed by createNodeForPHI. - // In the former case, additional loop trip count information isn't - // going to change anything. In the later case, createNodeForPHI will - // perform the necessary updates on its own when it gets to that point. SmallVector Worklist; - for (BasicBlock::iterator I = Header->begin(); - PHINode *PN = dyn_cast(I); ++I) { - std::map::iterator It = - Scalars.find((Value*)I); - if (It != Scalars.end() && !isa(It->second)) - Worklist.push_back(PN); - } + PushLoopPHIs(L, Worklist); + SmallPtrSet Visited; while (!Worklist.empty()) { Instruction *I = Worklist.pop_back_val(); - if (Scalars.erase(I)) - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) - Worklist.push_back(cast(UI)); + if (!Visited.insert(I)) continue; + + std::map::iterator It = + Scalars.find(static_cast(I)); + if (It != Scalars.end()) { + ValuesAtScopes.erase(It->second); + Scalars.erase(It); + if (PHINode *PN = dyn_cast(I)) + ConstantEvolutionLoopExitValue.erase(PN); + } + + PushDefUseChildren(I, Worklist); } } @@ -2939,8 +3338,8 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { L->getExitingBlocks(ExitingBlocks); // Examine all exits and pick the most conservative values. - const SCEV* BECount = getCouldNotCompute(); - const SCEV* MaxBECount = getCouldNotCompute(); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); bool CouldNotComputeBECount = false; for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BackedgeTakenInfo NewBTI = @@ -3049,8 +3448,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); BackedgeTakenInfo BTI1 = ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); - const SCEV* BECount = getCouldNotCompute(); - const SCEV* MaxBECount = getCouldNotCompute(); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); if (L->contains(TBB)) { // Both conditions must be true for the loop to continue executing. // Choose the less conservative count. @@ -3084,8 +3483,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB); BackedgeTakenInfo BTI1 = ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB); - const SCEV* BECount = getCouldNotCompute(); - const SCEV* MaxBECount = getCouldNotCompute(); + const SCEV *BECount = getCouldNotCompute(); + const SCEV *MaxBECount = getCouldNotCompute(); if (L->contains(FBB)) { // Both conditions must be false for the loop to continue executing. // Choose the less conservative count. @@ -3143,7 +3542,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, // Handle common loops like: for (X = "string"; *X; ++X) if (LoadInst *LI = dyn_cast(ExitCond->getOperand(0))) if (Constant *RHS = dyn_cast(ExitCond->getOperand(1))) { - const SCEV* ItCnt = + const SCEV *ItCnt = ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond); if (!isa(ItCnt)) { unsigned BitWidth = getTypeSizeInBits(ItCnt->getType()); @@ -3153,8 +3552,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, } } - const SCEV* LHS = getSCEV(ExitCond->getOperand(0)); - const SCEV* RHS = getSCEV(ExitCond->getOperand(1)); + const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); + const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); // Try to evaluate any dependencies out of the loop. LHS = getSCEVAtScope(LHS, L); @@ -3177,20 +3576,20 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, ConstantRange CompRange( ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); - const SCEV* Ret = AddRec->getNumIterationsInRange(CompRange, *this); + const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); if (!isa(Ret)) return Ret; } switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - const SCEV* TC = HowFarToZero(getMinusSCEV(LHS, RHS), L); + const SCEV *TC = HowFarToZero(getMinusSCEV(LHS, RHS), L); if (!isa(TC)) return TC; break; } - case ICmpInst::ICMP_EQ: { - // Convert to: while (X-Y == 0) // while (X == Y) - const SCEV* TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); + case ICmpInst::ICMP_EQ: { // while (X == Y) + // Convert to: while (X-Y == 0) + const SCEV *TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L); if (!isa(TC)) return TC; break; } @@ -3234,8 +3633,8 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, static ConstantInt * EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, ScalarEvolution &SE) { - const SCEV* InVal = SE.getConstant(C); - const SCEV* Val = AddRec->evaluateAtIteration(InVal, SE); + const SCEV *InVal = SE.getConstant(C); + const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); assert(isa(Val) && "Evaluation of SCEV at constant didn't fold correctly?"); return cast(Val)->getValue(); @@ -3246,7 +3645,7 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, /// the addressed element of the initializer or null if the index expression is /// invalid. static Constant * -GetAddressedElementFromGlobal(GlobalVariable *GV, +GetAddressedElementFromGlobal(LLVMContext &Context, GlobalVariable *GV, const std::vector &Indices) { Constant *Init = GV->getInitializer(); for (unsigned i = 0, e = Indices.size(); i != e; ++i) { @@ -3265,7 +3664,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV, if (Idx >= ATy->getNumElements()) return 0; // Bogus program Init = Constant::getNullValue(ATy->getElementType()); } else { - assert(0 && "Unknown constant aggregate type!"); + llvm_unreachable("Unknown constant aggregate type!"); } return 0; } else { @@ -3293,7 +3692,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( // Make sure that it is really a constant global we are gepping, with an // initializer, and make sure the first IDX is really 0. GlobalVariable *GV = dyn_cast(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasInitializer() || + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || GEP->getNumOperands() < 3 || !isa(GEP->getOperand(1)) || !cast(GEP->getOperand(1))->isNullValue()) return getCouldNotCompute(); @@ -3314,7 +3713,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. - const SCEV* Idx = getSCEV(VarIdx); + const SCEV *Idx = getSCEV(VarIdx); Idx = getSCEVAtScope(Idx, L); // We can only recognize very limited forms of loop index expressions, in @@ -3327,14 +3726,14 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( unsigned MaxSteps = MaxBruteForceIterations; for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { - ConstantInt *ItCst = - ConstantInt::get(cast(IdxExpr->getType()), IterationNum); + ConstantInt *ItCst = ConstantInt::get( + cast(IdxExpr->getType()), IterationNum); ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); // Form the GEP offset. Indexes[VarIdxNum] = Val; - Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + Constant *Result = GetAddressedElementFromGlobal(getContext(), GV, Indexes); if (Result == 0) break; // Cannot compute! // Evaluate the condition for this iteration. @@ -3418,6 +3817,7 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) { if (Constant *C = dyn_cast(V)) return C; if (GlobalValue *GV = dyn_cast(V)) return GV; Instruction *I = cast(V); + LLVMContext &Context = I->getParent()->getContext(); std::vector Operands; Operands.resize(I->getNumOperands()); @@ -3429,10 +3829,12 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) { if (const CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), - &Operands[0], Operands.size()); + &Operands[0], Operands.size(), + Context); else return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size()); + &Operands[0], Operands.size(), + Context); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -3487,7 +3889,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, } } -/// ComputeBackedgeTakenCountExhaustively - If the trip is known to execute a +/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a /// constant number of times (the condition evolves only from constants), /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot @@ -3526,7 +3928,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, if (CondVal->getValue() == uint64_t(ExitWhen)) { ++NumBruteForceTripCountsComputed; - return getConstant(Type::Int32Ty, IterationNum); + return getConstant(Type::getInt32Ty(getContext()), IterationNum); } // Compute the value of the PHI node for the next iteration. @@ -3540,7 +3942,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, return getCouldNotCompute(); } -/// getSCEVAtScope - Return a SCEV expression handle for the specified value +/// getSCEVAtScope - Return a SCEV expression for the specified value /// at the specified scope in the program. The L value specifies a loop /// nest to evaluate the expression at, where null is the top-level or a /// specified loop is immediately inside of the loop. @@ -3550,9 +3952,21 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, /// /// In the case that a relevant loop exit value cannot be computed, the /// original value V is returned. -const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { - // FIXME: this should be turned into a virtual method on SCEV! +const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { + // Check to see if we've folded this expression at this loop before. + std::map &Values = ValuesAtScopes[V]; + std::pair::iterator, bool> Pair = + Values.insert(std::make_pair(L, static_cast(0))); + if (!Pair.second) + return Pair.first->second ? Pair.first->second : V; + // Otherwise compute it. + const SCEV *C = computeSCEVAtScope(V, L); + ValuesAtScopes[V][L] = C; + return C; +} + +const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (isa(V)) return V; // If this instruction is evolved from a constant-evolving PHI, compute the @@ -3567,7 +3981,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // to see if the loop that contains it has a known backedge-taken // count. If so, we may be able to force computation of the exit // value. - const SCEV* BackedgeTakenCount = getBackedgeTakenCount(LI); + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); if (const SCEVConstant *BTCC = dyn_cast(BackedgeTakenCount)) { // Okay, we know how many times the containing loop executes. If @@ -3585,13 +3999,6 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // the arguments into constants, and if so, try to constant propagate the // result. This is particularly useful for computing loop exit values. if (CanConstantFold(I)) { - // Check to see if we've folded this instruction at this loop before. - std::map &Values = ValuesAtScopes[I]; - std::pair::iterator, bool> Pair = - Values.insert(std::make_pair(L, static_cast(0))); - if (!Pair.second) - return Pair.first->second ? &*getSCEV(Pair.first->second) : V; - std::vector Operands; Operands.reserve(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { @@ -3605,7 +4012,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (!isSCEVable(Op->getType())) return V; - const SCEV* OpV = getSCEVAtScope(getSCEV(Op), L); + const SCEV* OpV = getSCEVAtScope(Op, L); if (const SCEVConstant *SC = dyn_cast(OpV)) { Constant *C = SC->getValue(); if (C->getType() != Op->getType()) @@ -3634,11 +4041,12 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { Constant *C; if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - &Operands[0], Operands.size()); + &Operands[0], Operands.size(), + getContext()); else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size()); - Pair.first->second = C; + &Operands[0], Operands.size(), + getContext()); return getSCEV(C); } } @@ -3651,7 +4059,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // Avoid performing the look-up in the common case where the specified // expression has no loop-variant portions. for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { - const SCEV* OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); + const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); if (OpAtScope != Comm->getOperand(i)) { // Okay, at least one of these operands is loop variant but might be // foldable. Build a new instance of the folded commutative expression. @@ -3671,7 +4079,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { return getSMaxExpr(NewOps); if (isa(Comm)) return getUMaxExpr(NewOps); - assert(0 && "Unknown commutative SCEV type!"); + llvm_unreachable("Unknown commutative SCEV type!"); } } // If we got here, all operands are loop invariant. @@ -3679,8 +4087,8 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { } if (const SCEVUDivExpr *Div = dyn_cast(V)) { - const SCEV* LHS = getSCEVAtScope(Div->getLHS(), L); - const SCEV* RHS = getSCEVAtScope(Div->getRHS(), L); + const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); + const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); if (LHS == Div->getLHS() && RHS == Div->getRHS()) return Div; // must be loop invariant return getUDivExpr(LHS, RHS); @@ -3692,7 +4100,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (!L || !AddRec->getLoop()->contains(L->getHeader())) { // To evaluate this recurrence, we need to know how many times the AddRec // loop iterates. Compute this now. - const SCEV* BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); + const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; // Then, evaluate the AddRec. @@ -3702,33 +4110,36 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { } if (const SCEVZeroExtendExpr *Cast = dyn_cast(V)) { - const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getZeroExtendExpr(Op, Cast->getType()); } if (const SCEVSignExtendExpr *Cast = dyn_cast(V)) { - const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getSignExtendExpr(Op, Cast->getType()); } if (const SCEVTruncateExpr *Cast = dyn_cast(V)) { - const SCEV* Op = getSCEVAtScope(Cast->getOperand(), L); + const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); if (Op == Cast->getOperand()) return Cast; // must be loop invariant return getTruncateExpr(Op, Cast->getType()); } - assert(0 && "Unknown SCEV type!"); + if (isa(V)) + return V; + + llvm_unreachable("Unknown SCEV type!"); return 0; } /// getSCEVAtScope - This is a convenience function which does /// getSCEVAtScope(getSCEV(V), L). -const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { +const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { return getSCEVAtScope(getSCEV(V), L); } @@ -3741,7 +4152,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { /// A and B isn't important. /// /// If the equation does not have a solution, SCEVCouldNotCompute is returned. -static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B, +static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B, ScalarEvolution &SE) { uint32_t BW = A.getBitWidth(); assert(BW == B.getBitWidth() && "Bit widths must be the same."); @@ -3784,7 +4195,7 @@ static const SCEV* SolveLinEquationWithOverflow(const APInt &A, const APInt &B, /// given quadratic chrec {L,+,M,+,N}. This returns either the two roots (which /// might be the same) or two SCEVCouldNotCompute objects. /// -static std::pair +static std::pair SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); const SCEVConstant *LC = dyn_cast(AddRec->getOperand(0)); @@ -3833,8 +4244,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { return std::make_pair(CNC, CNC); } - ConstantInt *Solution1 = ConstantInt::get((NegB + SqrtVal).sdiv(TwoA)); - ConstantInt *Solution2 = ConstantInt::get((NegB - SqrtVal).sdiv(TwoA)); + LLVMContext &Context = SE.getContext(); + + ConstantInt *Solution1 = + ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); + ConstantInt *Solution2 = + ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); return std::make_pair(SE.getConstant(Solution1), SE.getConstant(Solution2)); @@ -3843,7 +4258,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// HowFarToZero - Return the number of times a backedge comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. -const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { +const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // If the value is a constant if (const SCEVConstant *C = dyn_cast(V)) { // If the value is already zero, the branch will execute zero times. @@ -3878,7 +4293,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // First, handle unitary steps. if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: - return getNegativeSCEV(Start); // N = -Start (as unsigned) + return getNegativeSCEV(Start); // N = -Start (as unsigned) if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: return Start; // N = Start (as unsigned) @@ -3891,7 +4306,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) { // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. - std::pair Roots = SolveQuadraticEquation(AddRec, + std::pair Roots = SolveQuadraticEquation(AddRec, *this); const SCEVConstant *R1 = dyn_cast(Roots.first); const SCEVConstant *R2 = dyn_cast(Roots.second); @@ -3910,7 +4325,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // We can only use this value if the chrec ends up with an exact zero // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. - const SCEV* Val = AddRec->evaluateAtIteration(R1, *this); + const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); if (Val->isZero()) return R1; // We found a quadratic root! } @@ -3923,7 +4338,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { /// HowFarToNonZero - Return the number of times a backedge checking the /// specified value for nonzero will execute. If not computable, return /// CouldNotCompute -const SCEV* ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { +const SCEV *ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { // Loops that look like: while (X == 0) are very strange indeed. We don't // handle them yet except for the trivial case. This could be expanded in the // future as needed. @@ -3984,7 +4399,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { /// more general, since a front-end may have replicated the controlling /// expression. /// -static bool HasSameValue(const SCEV* A, const SCEV* B) { +static bool HasSameValue(const SCEV *A, const SCEV *B) { // Quick check to see if they are the same SCEV. if (A == B) return true; @@ -3994,19 +4409,142 @@ static bool HasSameValue(const SCEV* A, const SCEV* B) { if (const SCEVUnknown *BU = dyn_cast(B)) if (const Instruction *AI = dyn_cast(AU->getValue())) if (const Instruction *BI = dyn_cast(BU->getValue())) - if (AI->isIdenticalTo(BI)) + if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) return true; // Otherwise assume they may have a different value. return false; } -/// isLoopGuardedByCond - Test whether entry to the loop is protected by -/// a conditional between LHS and RHS. This is used to help avoid max -/// expressions in loop trip counts. -bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, - ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS) { +bool ScalarEvolution::isKnownNegative(const SCEV *S) { + return getSignedRange(S).getSignedMax().isNegative(); +} + +bool ScalarEvolution::isKnownPositive(const SCEV *S) { + return getSignedRange(S).getSignedMin().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonNegative(const SCEV *S) { + return !getSignedRange(S).getSignedMin().isNegative(); +} + +bool ScalarEvolution::isKnownNonPositive(const SCEV *S) { + return !getSignedRange(S).getSignedMax().isStrictlyPositive(); +} + +bool ScalarEvolution::isKnownNonZero(const SCEV *S) { + return isKnownNegative(S) || isKnownPositive(S); +} + +bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + + if (HasSameValue(LHS, RHS)) + return ICmpInst::isTrueWhenEqual(Pred); + + switch (Pred) { + default: + llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + break; + case ICmpInst::ICMP_SGT: + Pred = ICmpInst::ICMP_SLT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLT: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_SGE: + Pred = ICmpInst::ICMP_SLE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_SLE: { + ConstantRange LHSRange = getSignedRange(LHS); + ConstantRange RHSRange = getSignedRange(RHS); + if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin())) + return true; + if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGT: + Pred = ICmpInst::ICMP_ULT; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULT: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_UGE: + Pred = ICmpInst::ICMP_ULE; + std::swap(LHS, RHS); + case ICmpInst::ICMP_ULE: { + ConstantRange LHSRange = getUnsignedRange(LHS); + ConstantRange RHSRange = getUnsignedRange(RHS); + if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin())) + return true; + if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax())) + return false; + break; + } + case ICmpInst::ICMP_NE: { + if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet()) + return true; + if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet()) + return true; + + const SCEV *Diff = getMinusSCEV(LHS, RHS); + if (isKnownNonZero(Diff)) + return true; + break; + } + case ICmpInst::ICMP_EQ: + // The check at the top of the function catches the case where + // the values are known to be equal. + break; + } + return false; +} + +/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is +/// protected by a conditional between LHS and RHS. This is used to +/// to eliminate casts. +bool +ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { + // Interpret a null as meaning no loop, where there is obviously no guard + // (interprocedural conditions notwithstanding). + if (!L) return true; + + BasicBlock *Latch = L->getLoopLatch(); + if (!Latch) + return false; + + BranchInst *LoopContinuePredicate = + dyn_cast(Latch->getTerminator()); + if (!LoopContinuePredicate || + LoopContinuePredicate->isUnconditional()) + return false; + + return isImpliedCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS, + LoopContinuePredicate->getSuccessor(0) != L->getHeader()); +} + +/// isLoopGuardedByCond - Test whether entry to the loop is protected +/// by a conditional between LHS and RHS. This is used to help avoid max +/// expressions in loop trip counts, and to eliminate casts. +bool +ScalarEvolution::isLoopGuardedByCond(const Loop *L, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) { // Interpret a null as meaning no loop, where there is obviously no guard // (interprocedural conditions notwithstanding). if (!L) return false; @@ -4027,136 +4565,308 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L, LoopEntryPredicate->isUnconditional()) continue; - if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, - LoopEntryPredicate->getSuccessor(0) != PredecessorDest)) + if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS, + LoopEntryPredicate->getSuccessor(0) != PredecessorDest)) return true; } return false; } -/// isNecessaryCond - Test whether the given CondValue value is a condition -/// which is at least as strict as the one described by Pred, LHS, and RHS. -bool ScalarEvolution::isNecessaryCond(Value *CondValue, - ICmpInst::Predicate Pred, - const SCEV *LHS, const SCEV *RHS, - bool Inverse) { +/// isImpliedCond - Test whether the condition described by Pred, LHS, +/// and RHS is true whenever the given Cond value evaluates to true. +bool ScalarEvolution::isImpliedCond(Value *CondValue, + ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + bool Inverse) { // Recursivly handle And and Or conditions. if (BinaryOperator *BO = dyn_cast(CondValue)) { if (BO->getOpcode() == Instruction::And) { if (!Inverse) - return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || - isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || + isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); } else if (BO->getOpcode() == Instruction::Or) { if (Inverse) - return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || - isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); + return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) || + isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse); } } ICmpInst *ICI = dyn_cast(CondValue); if (!ICI) return false; + // Bail if the ICmp's operands' types are wider than the needed type + // before attempting to call getSCEV on them. This avoids infinite + // recursion, since the analysis of widening casts can require loop + // exit condition information for overflow checking, which would + // lead back here. + if (getTypeSizeInBits(LHS->getType()) < + getTypeSizeInBits(ICI->getOperand(0)->getType())) + return false; + // Now that we found a conditional branch that dominates the loop, check to // see if it is the comparison we are looking for. - Value *PreCondLHS = ICI->getOperand(0); - Value *PreCondRHS = ICI->getOperand(1); - ICmpInst::Predicate Cond; + ICmpInst::Predicate FoundPred; if (Inverse) - Cond = ICI->getInversePredicate(); + FoundPred = ICI->getInversePredicate(); else - Cond = ICI->getPredicate(); + FoundPred = ICI->getPredicate(); + + const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); + const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); + + // Balance the types. The case where FoundLHS' type is wider than + // LHS' type is checked for above. + if (getTypeSizeInBits(LHS->getType()) > + getTypeSizeInBits(FoundLHS->getType())) { + if (CmpInst::isSigned(Pred)) { + FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); + } else { + FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); + FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); + } + } - if (Cond == Pred) - ; // An exact match. - else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE) - ; // The actual condition is beyond sufficient. - else - // Check a few special cases. - switch (Cond) { + // Canonicalize the query to match the way instcombine will have + // canonicalized the comparison. + // First, put a constant operand on the right. + if (isa(LHS)) { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + // Then, canonicalize comparisons with boundary cases. + if (const SCEVConstant *RC = dyn_cast(RHS)) { + const APInt &RA = RC->getValue()->getValue(); + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + break; + case ICmpInst::ICMP_UGE: + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + break; + } + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + break; + } + if (RA.isMinValue()) return true; + break; + case ICmpInst::ICMP_ULE: + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + break; + } + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + break; + } + if (RA.isMaxValue()) return true; + break; + case ICmpInst::ICMP_SGE: + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA - 1); + break; + } + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + break; + } + if (RA.isMinSignedValue()) return true; + break; + case ICmpInst::ICMP_SLE: + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + RHS = getConstant(RA + 1); + break; + } + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + break; + } + if (RA.isMaxSignedValue()) return true; + break; case ICmpInst::ICMP_UGT: - if (Pred == ICmpInst::ICMP_ULT) { - std::swap(PreCondLHS, PreCondRHS); - Cond = ICmpInst::ICMP_ULT; + if (RA.isMinValue()) { + Pred = ICmpInst::ICMP_NE; break; } - return false; + if ((RA + 1).isMaxValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + break; + } + if (RA.isMaxValue()) return false; + break; + case ICmpInst::ICMP_ULT: + if (RA.isMaxValue()) { + Pred = ICmpInst::ICMP_NE; + break; + } + if ((RA - 1).isMinValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + break; + } + if (RA.isMinValue()) return false; + break; case ICmpInst::ICMP_SGT: - if (Pred == ICmpInst::ICMP_SLT) { - std::swap(PreCondLHS, PreCondRHS); - Cond = ICmpInst::ICMP_SLT; + if (RA.isMinSignedValue()) { + Pred = ICmpInst::ICMP_NE; break; } - return false; - case ICmpInst::ICMP_NE: - // Expressions like (x >u 0) are often canonicalized to (x != 0), - // so check for this case by checking if the NE is comparing against - // a minimum or maximum constant. - if (!ICmpInst::isTrueWhenEqual(Pred)) - if (ConstantInt *CI = dyn_cast(PreCondRHS)) { - const APInt &A = CI->getValue(); - switch (Pred) { - case ICmpInst::ICMP_SLT: - if (A.isMaxSignedValue()) break; - return false; - case ICmpInst::ICMP_SGT: - if (A.isMinSignedValue()) break; - return false; - case ICmpInst::ICMP_ULT: - if (A.isMaxValue()) break; - return false; - case ICmpInst::ICMP_UGT: - if (A.isMinValue()) break; - return false; - default: - return false; - } - Cond = ICmpInst::ICMP_NE; - // NE is symmetric but the original comparison may not be. Swap - // the operands if necessary so that they match below. - if (isa(LHS)) - std::swap(PreCondLHS, PreCondRHS); - break; - } - return false; - default: - // We weren't able to reconcile the condition. - return false; + if ((RA + 1).isMaxSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA + 1); + break; + } + if (RA.isMaxSignedValue()) return false; + break; + case ICmpInst::ICMP_SLT: + if (RA.isMaxSignedValue()) { + Pred = ICmpInst::ICMP_NE; + break; + } + if ((RA - 1).isMinSignedValue()) { + Pred = ICmpInst::ICMP_EQ; + RHS = getConstant(RA - 1); + break; + } + if (RA.isMinSignedValue()) return false; + break; + } + } + + // Check to see if we can make the LHS or RHS match. + if (LHS == FoundRHS || RHS == FoundLHS) { + if (isa(RHS)) { + std::swap(FoundLHS, FoundRHS); + FoundPred = ICmpInst::getSwappedPredicate(FoundPred); + } else { + std::swap(LHS, RHS); + Pred = ICmpInst::getSwappedPredicate(Pred); } + } - if (!PreCondLHS->getType()->isInteger()) return false; + // Check whether the found predicate is the same as the desired predicate. + if (FoundPred == Pred) + return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); - const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS); - const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS); - return (HasSameValue(LHS, PreCondLHSSCEV) && - HasSameValue(RHS, PreCondRHSSCEV)) || - (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) && - HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))); + // Check whether swapping the found predicate makes it the same as the + // desired predicate. + if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { + if (isa(RHS)) + return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); + else + return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), + RHS, LHS, FoundLHS, FoundRHS); + } + + // Check whether the actual condition is beyond sufficient. + if (FoundPred == ICmpInst::ICMP_EQ) + if (ICmpInst::isTrueWhenEqual(Pred)) + if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + if (Pred == ICmpInst::ICMP_NE) + if (!ICmpInst::isTrueWhenEqual(FoundPred)) + if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) + return true; + + // Otherwise assume the worst. + return false; +} + +/// isImpliedCondOperands - Test whether the condition described by Pred, +/// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS, +/// and FoundRHS is true. +bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + return isImpliedCondOperandsHelper(Pred, LHS, RHS, + FoundLHS, FoundRHS) || + // ~x < ~y --> x > y + isImpliedCondOperandsHelper(Pred, LHS, RHS, + getNotSCEV(FoundRHS), + getNotSCEV(FoundLHS)); +} + +/// isImpliedCondOperandsHelper - Test whether the condition described by +/// Pred, LHS, and RHS is true whenever the condition desribed by Pred, +/// FoundLHS, and FoundRHS is true. +bool +ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS) { + switch (Pred) { + default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + if (isKnownPredicate(ICmpInst::ICMP_SLE, LHS, FoundLHS) && + isKnownPredicate(ICmpInst::ICMP_SGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + if (isKnownPredicate(ICmpInst::ICMP_SGE, LHS, FoundLHS) && + isKnownPredicate(ICmpInst::ICMP_SLE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (isKnownPredicate(ICmpInst::ICMP_ULE, LHS, FoundLHS) && + isKnownPredicate(ICmpInst::ICMP_UGE, RHS, FoundRHS)) + return true; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (isKnownPredicate(ICmpInst::ICMP_UGE, LHS, FoundLHS) && + isKnownPredicate(ICmpInst::ICMP_ULE, RHS, FoundRHS)) + return true; + break; + } + + return false; } /// getBECount - Subtract the end and start values and divide by the step, /// rounding up, to get the number of times the backedge is executed. Return /// CouldNotCompute if an intermediate computation overflows. -const SCEV* ScalarEvolution::getBECount(const SCEV* Start, - const SCEV* End, - const SCEV* Step) { +const SCEV *ScalarEvolution::getBECount(const SCEV *Start, + const SCEV *End, + const SCEV *Step, + bool NoWrap) { const Type *Ty = Start->getType(); - const SCEV* NegOne = getIntegerSCEV(-1, Ty); - const SCEV* Diff = getMinusSCEV(End, Start); - const SCEV* RoundUp = getAddExpr(Step, NegOne); + const SCEV *NegOne = getIntegerSCEV(-1, Ty); + const SCEV *Diff = getMinusSCEV(End, Start); + const SCEV *RoundUp = getAddExpr(Step, NegOne); // Add an adjustment to the difference between End and Start so that // the division will effectively round up. - const SCEV* Add = getAddExpr(Diff, RoundUp); - - // Check Add for unsigned overflow. - // TODO: More sophisticated things could be done here. - const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1); - const SCEV* OperandExtendedAdd = - getAddExpr(getZeroExtendExpr(Diff, WideTy), - getZeroExtendExpr(RoundUp, WideTy)); - if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) - return getCouldNotCompute(); + const SCEV *Add = getAddExpr(Diff, RoundUp); + + if (!NoWrap) { + // Check Add for unsigned overflow. + // TODO: More sophisticated things could be done here. + const Type *WideTy = IntegerType::get(getContext(), + getTypeSizeInBits(Ty) + 1); + const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); + const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); + const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp); + if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) + return getCouldNotCompute(); + } return getUDivExpr(Add, Step); } @@ -4174,10 +4884,14 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (!AddRec || AddRec->getLoop() != L) return getCouldNotCompute(); + // Check to see if we have a flag which makes analysis easy. + bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() : + AddRec->hasNoUnsignedWrap(); + if (AddRec->isAffine()) { // FORNOW: We only support unit strides. unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); - const SCEV* Step = AddRec->getStepRecurrence(*this); + const SCEV *Step = AddRec->getStepRecurrence(*this); // TODO: handle non-constant strides. const SCEVConstant *CStep = dyn_cast(Step); @@ -4186,7 +4900,10 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, if (CStep->isOne()) { // With unit stride, the iteration never steps past the limit value. } else if (CStep->getValue()->getValue().isStrictlyPositive()) { - if (const SCEVConstant *CLimit = dyn_cast(RHS)) { + if (NoWrap) { + // We know the iteration won't step past the maximum value for its type. + ; + } else if (const SCEVConstant *CLimit = dyn_cast(RHS)) { // Test whether a positive iteration iteration can step past the limit // value and past the maximum value for its type in a single step. if (isSigned) { @@ -4213,39 +4930,37 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // treat m-n as signed nor unsigned due to overflow possibility. // First, we get the value of the LHS in the first iteration: n - const SCEV* Start = AddRec->getOperand(0); + const SCEV *Start = AddRec->getOperand(0); // Determine the minimum constant start value. - const SCEV *MinStart = isa(Start) ? Start : - getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) : - APInt::getMinValue(BitWidth)); + const SCEV *MinStart = getConstant(isSigned ? + getSignedRange(Start).getSignedMin() : + getUnsignedRange(Start).getUnsignedMin()); // If we know that the condition is true in order to enter the loop, // then we know that it will run exactly (m-n)/s times. Otherwise, we // only know that it will execute (max(m,n)-n)/s times. In both cases, // the division must round up. - const SCEV* End = RHS; + const SCEV *End = RHS; if (!isLoopGuardedByCond(L, - isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + isSigned ? ICmpInst::ICMP_SLT : + ICmpInst::ICMP_ULT, getMinusSCEV(Start, Step), RHS)) End = isSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); // Determine the maximum constant end value. - const SCEV* MaxEnd = - isa(End) ? End : - getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) - .ashr(GetMinSignBits(End) - 1) : - APInt::getMaxValue(BitWidth) - .lshr(GetMinLeadingZeros(End))); + const SCEV *MaxEnd = getConstant(isSigned ? + getSignedRange(End).getSignedMax() : + getUnsignedRange(End).getUnsignedMax()); // Finally, we subtract these two values and divide, rounding up, to get // the number of times the backedge is executed. - const SCEV* BECount = getBECount(Start, End, Step); + const SCEV *BECount = getBECount(Start, End, Step, NoWrap); // The maximum backedge count is similar, except using the minimum start // value and the maximum end value. - const SCEV* MaxBECount = getBECount(MinStart, MaxEnd, Step); + const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap); return BackedgeTakenInfo(BECount, MaxBECount); } @@ -4258,7 +4973,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, /// this is that it returns the first iteration number where the value is not in /// the condition, thus computing the exit count. If the iteration count can't /// be computed, an instance of SCEVCouldNotCompute is returned. -const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, +const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, ScalarEvolution &SE) const { if (Range.isFullSet()) // Infinite loop. return SE.getCouldNotCompute(); @@ -4266,9 +4981,9 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If the start is a non-zero constant, shift the range to simplify things. if (const SCEVConstant *SC = dyn_cast(getStart())) if (!SC->getValue()->isZero()) { - SmallVector Operands(op_begin(), op_end()); + SmallVector Operands(op_begin(), op_end()); Operands[0] = SE.getIntegerSCEV(0, SC->getType()); - const SCEV* Shifted = SE.getAddRecExpr(Operands, getLoop()); + const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop()); if (const SCEVAddRecExpr *ShiftedAddRec = dyn_cast(Shifted)) return ShiftedAddRec->getNumIterationsInRange( @@ -4307,7 +5022,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // The exit value should be (End+A)/A. APInt ExitVal = (End + A).udiv(A); - ConstantInt *ExitValue = ConstantInt::get(ExitVal); + ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); // Evaluate at the exit value. If we really did fall out of the valid // range, then we computed our trip count, otherwise wrap around or other @@ -4319,7 +5034,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // Ensure that the previous value is in the range. This is a sanity check. assert(Range.contains( EvaluateConstantChrecAtConstant(this, - ConstantInt::get(ExitVal - One), SE)->getValue()) && + ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) && "Linear scev computation is off in a bad way!"); return SE.getConstant(ExitValue); } else if (isQuadratic()) { @@ -4327,12 +5042,12 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // quadratic equation to solve it. To do this, we must frame our problem in // terms of figuring out when zero is crossed, instead of when // Range.getUpper() is crossed. - SmallVector NewOps(op_begin(), op_end()); + SmallVector NewOps(op_begin(), op_end()); NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); - const SCEV* NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); + const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); // Next, solve the constructed addrec - std::pair Roots = + std::pair Roots = SolveQuadraticEquation(cast(NewAddRec), SE); const SCEVConstant *R1 = dyn_cast(Roots.first); const SCEVConstant *R2 = dyn_cast(Roots.second); @@ -4340,7 +5055,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // Pick the smallest positive root value. if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, - R1->getValue(), R2->getValue()))) { + R1->getValue(), R2->getValue()))) { if (CB->getZExtValue() == false) std::swap(R1, R2); // R1 is the minimum root now. @@ -4352,7 +5067,8 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, SE); if (Range.contains(R1Val->getValue())) { // The next iteration must be out of the range... - ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()+1); + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (!Range.contains(R1Val->getValue())) @@ -4362,7 +5078,8 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // If R1 was not in the range, then it is a good return value. Make // sure that R1-1 WAS in the range though, just in case. - ConstantInt *NextVal = ConstantInt::get(R1->getValue()->getValue()-1); + ConstantInt *NextVal = + ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); if (Range.contains(R1Val->getValue())) return R1; @@ -4381,22 +5098,21 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, //===----------------------------------------------------------------------===// void ScalarEvolution::SCEVCallbackVH::deleted() { - assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!"); + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); if (PHINode *PN = dyn_cast(getValPtr())) SE->ConstantEvolutionLoopExitValue.erase(PN); - if (Instruction *I = dyn_cast(getValPtr())) - SE->ValuesAtScopes.erase(I); SE->Scalars.erase(getValPtr()); // this now dangles! } void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) { - assert(SE && "SCEVCallbackVH called with a non-null ScalarEvolution!"); + assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); // Forget all the expressions associated with users of the old value, // so that future queries will recompute the expressions using the new // value. SmallVector Worklist; + SmallPtrSet Visited; Value *Old = getValPtr(); bool DeleteOld = false; for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end(); @@ -4410,20 +5126,19 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) { DeleteOld = true; continue; } + if (!Visited.insert(U)) + continue; if (PHINode *PN = dyn_cast(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); - if (Instruction *I = dyn_cast(U)) - SE->ValuesAtScopes.erase(I); - if (SE->Scalars.erase(U)) - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); - UI != UE; ++UI) - Worklist.push_back(*UI); + SE->Scalars.erase(U); + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) + Worklist.push_back(*UI); } + // Delete the Old value if it (indirectly) references itself. if (DeleteOld) { if (PHINode *PN = dyn_cast(Old)) SE->ConstantEvolutionLoopExitValue.erase(PN); - if (Instruction *I = dyn_cast(Old)) - SE->ValuesAtScopes.erase(I); SE->Scalars.erase(Old); // this now dangles! } @@ -4502,21 +5217,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { // out SCEV values of all instructions that are interesting. Doing // this potentially causes it to create new SCEV objects though, // which technically conflicts with the const qualifier. This isn't - // observable from outside the class though (the hasSCEV function - // notwithstanding), so casting away the const isn't dangerous. + // observable from outside the class though, so casting away the + // const isn't dangerous. ScalarEvolution &SE = *const_cast(this); OS << "Classifying expressions for: " << F->getName() << "\n"; for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) if (isSCEVable(I->getType())) { - OS << *I; + OS << *I << '\n'; OS << " --> "; - const SCEV* SV = SE.getSCEV(&*I); + const SCEV *SV = SE.getSCEV(&*I); SV->print(OS); const Loop *L = LI->getLoopFor((*I).getParent()); - const SCEV* AtUse = SE.getSCEVAtScope(SV, L); + const SCEV *AtUse = SE.getSCEVAtScope(SV, L); if (AtUse != SV) { OS << " --> "; AtUse->print(OS); @@ -4524,7 +5239,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { if (L) { OS << "\t\t" "Exits: "; - const SCEV* ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); + const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) { OS << "<>"; } else { @@ -4540,7 +5255,3 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const { PrintLoopInfo(OS, &SE, *I); } -void ScalarEvolution::print(std::ostream &o, const Module *M) const { - raw_os_ostream OS(o); - print(OS, M); -} diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp new file mode 100644 index 0000000000000..cc79e6c3b1304 --- /dev/null +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -0,0 +1,133 @@ +//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a +// simple alias analysis implemented in terms of ScalarEvolution queries. +// +// ScalarEvolution has a more complete understanding of pointer arithmetic +// than BasicAliasAnalysis' collection of ad-hoc analyses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +using namespace llvm; + +namespace { + /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis + /// implementation that uses ScalarEvolution to answer queries. + class VISIBILITY_HIDDEN ScalarEvolutionAliasAnalysis : public FunctionPass, + public AliasAnalysis { + ScalarEvolution *SE; + + public: + static char ID; // Class identification, replacement for typeinfo + ScalarEvolutionAliasAnalysis() : FunctionPass(&ID), SE(0) {} + + private: + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnFunction(Function &F); + virtual AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size); + + Value *GetUnderlyingIdentifiedObject(const SCEV *S); + }; +} // End of anonymous namespace + +// Register this pass... +char ScalarEvolutionAliasAnalysis::ID = 0; +static RegisterPass +X("scev-aa", "ScalarEvolution-based Alias Analysis", false, true); + +// Declare that we implement the AliasAnalysis interface +static RegisterAnalysisGroup Y(X); + +FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { + return new ScalarEvolutionAliasAnalysis(); +} + +void +ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredTransitive(); + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); +} + +bool +ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { + InitializeAliasAnalysis(this); + SE = &getAnalysis(); + return false; +} + +/// GetUnderlyingIdentifiedObject - Given an expression, try to find an +/// "identified object" (see AliasAnalysis::isIdentifiedObject) base +/// value. Return null is none was found. +Value * +ScalarEvolutionAliasAnalysis::GetUnderlyingIdentifiedObject(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + // In an addrec, assume that the base will be in the start, rather + // than the step. + return GetUnderlyingIdentifiedObject(AR->getStart()); + } else if (const SCEVAddExpr *A = dyn_cast(S)) { + // If there's a pointer operand, it'll be sorted at the end of the list. + const SCEV *Last = A->getOperand(A->getNumOperands()-1); + if (isa(Last->getType())) + return GetUnderlyingIdentifiedObject(Last); + } else if (const SCEVUnknown *U = dyn_cast(S)) { + // Determine if we've found an Identified object. + Value *V = U->getValue(); + if (isIdentifiedObject(V)) + return V; + } + // No Identified object found. + return 0; +} + +AliasAnalysis::AliasResult +ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, + const Value *B, unsigned BSize) { + // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! + const SCEV *AS = SE->getSCEV(const_cast(A)); + const SCEV *BS = SE->getSCEV(const_cast(B)); + + // If they evaluate to the same expression, it's a MustAlias. + if (AS == BS) return MustAlias; + + // If something is known about the difference between the two addresses, + // see if it's enough to prove a NoAlias. + if (SE->getEffectiveSCEVType(AS->getType()) == + SE->getEffectiveSCEVType(BS->getType())) { + unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); + APInt AI(BitWidth, ASize); + const SCEV *BA = SE->getMinusSCEV(BS, AS); + if (AI.ule(SE->getUnsignedRange(BA).getUnsignedMin())) { + APInt BI(BitWidth, BSize); + const SCEV *AB = SE->getMinusSCEV(AS, BS); + if (BI.ule(SE->getUnsignedRange(AB).getUnsignedMin())) + return NoAlias; + } + } + + // If ScalarEvolution can find an underlying object, form a new query. + // The correctness of this depends on ScalarEvolution not recognizing + // inttoptr and ptrtoint operators. + Value *AO = GetUnderlyingIdentifiedObject(AS); + Value *BO = GetUnderlyingIdentifiedObject(BS); + if ((AO && AO != A) || (BO && BO != B)) + if (alias(AO ? AO : A, AO ? ~0u : ASize, + BO ? BO : B, BO ? ~0u : BSize) == NoAlias) + return NoAlias; + + // Forward the query to the next analysis. + return AliasAnalysis::alias(A, ASize, B, BSize); +} diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 729a0c3254489..d674ee847f116 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/LLVMContext.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -52,10 +53,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { return CE->getOperand(0); } - // FIXME: keep track of the cast instruction. if (Constant *C = dyn_cast(V)) return ConstantExpr::getCast(Op, C, Ty); - + if (Argument *A = dyn_cast(V)) { // Check to see if there is already a cast! for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); @@ -155,55 +155,95 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made /// unnecessary; in its place, just signed-divide Ops[i] by the scale and /// check to see if the divide was folded. -static bool FactorOutConstant(const SCEV* &S, - const SCEV* &Remainder, - const APInt &Factor, - ScalarEvolution &SE) { +static bool FactorOutConstant(const SCEV *&S, + const SCEV *&Remainder, + const SCEV *Factor, + ScalarEvolution &SE, + const TargetData *TD) { // Everything is divisible by one. - if (Factor == 1) + if (Factor->isOne()) + return true; + + // x/x == 1. + if (S == Factor) { + S = SE.getIntegerSCEV(1, S->getType()); return true; + } // For a Constant, check for a multiple of the given factor. if (const SCEVConstant *C = dyn_cast(S)) { - ConstantInt *CI = - ConstantInt::get(C->getValue()->getValue().sdiv(Factor)); - // If the quotient is zero and the remainder is non-zero, reject - // the value at this scale. It will be considered for subsequent - // smaller scales. - if (C->isZero() || !CI->isZero()) { - const SCEV* Div = SE.getConstant(CI); - S = Div; - Remainder = - SE.getAddExpr(Remainder, - SE.getConstant(C->getValue()->getValue().srem(Factor))); + // 0/x == 0. + if (C->isZero()) return true; + // Check for divisibility. + if (const SCEVConstant *FC = dyn_cast(Factor)) { + ConstantInt *CI = + ConstantInt::get(SE.getContext(), + C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + // If the quotient is zero and the remainder is non-zero, reject + // the value at this scale. It will be considered for subsequent + // smaller scales. + if (!CI->isZero()) { + const SCEV *Div = SE.getConstant(CI); + S = Div; + Remainder = + SE.getAddExpr(Remainder, + SE.getConstant(C->getValue()->getValue().srem( + FC->getValue()->getValue()))); + return true; + } } } // In a Mul, check if there is a constant operand which is a multiple // of the given factor. - if (const SCEVMulExpr *M = dyn_cast(S)) - if (const SCEVConstant *C = dyn_cast(M->getOperand(0))) - if (!C->getValue()->getValue().srem(Factor)) { - const SmallVectorImpl &MOperands = M->getOperands(); - SmallVector NewMulOps(MOperands.begin(), - MOperands.end()); - NewMulOps[0] = - SE.getConstant(C->getValue()->getValue().sdiv(Factor)); - S = SE.getMulExpr(NewMulOps); - return true; + if (const SCEVMulExpr *M = dyn_cast(S)) { + if (TD) { + // With TargetData, the size is known. Check if there is a constant + // operand which is a multiple of the given factor. If so, we can + // factor it. + const SCEVConstant *FC = cast(Factor); + if (const SCEVConstant *C = dyn_cast(M->getOperand(0))) + if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + const SmallVectorImpl &MOperands = M->getOperands(); + SmallVector NewMulOps(MOperands.begin(), + MOperands.end()); + NewMulOps[0] = + SE.getConstant(C->getValue()->getValue().sdiv( + FC->getValue()->getValue())); + S = SE.getMulExpr(NewMulOps); + return true; + } + } else { + // Without TargetData, check if Factor can be factored out of any of the + // Mul's operands. If so, we can just remove it. + for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { + const SCEV *SOp = M->getOperand(i); + const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType()); + if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && + Remainder->isZero()) { + const SmallVectorImpl &MOperands = M->getOperands(); + SmallVector NewMulOps(MOperands.begin(), + MOperands.end()); + NewMulOps[i] = SOp; + S = SE.getMulExpr(NewMulOps); + return true; + } } + } + } // In an AddRec, check if both start and step are divisible. if (const SCEVAddRecExpr *A = dyn_cast(S)) { - const SCEV* Step = A->getStepRecurrence(SE); - const SCEV* StepRem = SE.getIntegerSCEV(0, Step->getType()); - if (!FactorOutConstant(Step, StepRem, Factor, SE)) + const SCEV *Step = A->getStepRecurrence(SE); + const SCEV *StepRem = SE.getIntegerSCEV(0, Step->getType()); + if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) return false; if (!StepRem->isZero()) return false; - const SCEV* Start = A->getStart(); - if (!FactorOutConstant(Start, Remainder, Factor, SE)) + const SCEV *Start = A->getStart(); + if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) return false; S = SE.getAddRecExpr(Start, Step, A->getLoop()); return true; @@ -212,15 +252,81 @@ static bool FactorOutConstant(const SCEV* &S, return false; } -/// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP -/// instead of using ptrtoint+arithmetic+inttoptr. This helps -/// BasicAliasAnalysis analyze the result. However, it suffers from the -/// underlying bug described in PR2831. Addition in LLVM currently always -/// has two's complement wrapping guaranteed. However, the semantics for -/// getelementptr overflow are ambiguous. In the common case though, this -/// expansion gets used when a GEP in the original code has been converted -/// into integer arithmetic, in which case the resulting code will be no -/// more undefined than it was originally. +/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs +/// is the number of SCEVAddRecExprs present, which are kept at the end of +/// the list. +/// +static void SimplifyAddOperands(SmallVectorImpl &Ops, + const Type *Ty, + ScalarEvolution &SE) { + unsigned NumAddRecs = 0; + for (unsigned i = Ops.size(); i > 0 && isa(Ops[i-1]); --i) + ++NumAddRecs; + // Group Ops into non-addrecs and addrecs. + SmallVector NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); + SmallVector AddRecs(Ops.end() - NumAddRecs, Ops.end()); + // Let ScalarEvolution sort and simplify the non-addrecs list. + const SCEV *Sum = NoAddRecs.empty() ? + SE.getIntegerSCEV(0, Ty) : + SE.getAddExpr(NoAddRecs); + // If it returned an add, use the operands. Otherwise it simplified + // the sum into a single value, so just use that. + if (const SCEVAddExpr *Add = dyn_cast(Sum)) + Ops = Add->getOperands(); + else { + Ops.clear(); + if (!Sum->isZero()) + Ops.push_back(Sum); + } + // Then append the addrecs. + Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); +} + +/// SplitAddRecs - Flatten a list of add operands, moving addrec start values +/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. +/// This helps expose more opportunities for folding parts of the expressions +/// into GEP indices. +/// +static void SplitAddRecs(SmallVectorImpl &Ops, + const Type *Ty, + ScalarEvolution &SE) { + // Find the addrecs. + SmallVector AddRecs; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + while (const SCEVAddRecExpr *A = dyn_cast(Ops[i])) { + const SCEV *Start = A->getStart(); + if (Start->isZero()) break; + const SCEV *Zero = SE.getIntegerSCEV(0, Ty); + AddRecs.push_back(SE.getAddRecExpr(Zero, + A->getStepRecurrence(SE), + A->getLoop())); + if (const SCEVAddExpr *Add = dyn_cast(Start)) { + Ops[i] = Zero; + Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + e += Add->getNumOperands(); + } else { + Ops[i] = Start; + } + } + if (!AddRecs.empty()) { + // Add the addrecs onto the end of the list. + Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); + // Resort the operand list, moving any constants to the front. + SimplifyAddOperands(Ops, Ty, SE); + } +} + +/// expandAddToGEP - Expand an addition expression with a pointer type into +/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps +/// BasicAliasAnalysis and other passes analyze the result. See the rules +/// for getelementptr vs. inttoptr in +/// http://llvm.org/docs/LangRef.html#pointeraliasing +/// for details. +/// +/// Design note: The correctness of using getelmeentptr here depends on +/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as +/// they may introduce pointer arithmetic which may not be safely converted +/// into getelementptr. /// /// Design note: It might seem desirable for this function to be more /// loop-aware. If some of the indices are loop-invariant while others @@ -237,92 +343,130 @@ static bool FactorOutConstant(const SCEV* &S, /// loop-invariant portions of expressions, after considering what /// can be folded using target addressing modes. /// -Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin, - const SCEV* const *op_end, +Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, + const SCEV *const *op_end, const PointerType *PTy, const Type *Ty, Value *V) { const Type *ElTy = PTy->getElementType(); SmallVector GepIndices; - SmallVector Ops(op_begin, op_end); + SmallVector Ops(op_begin, op_end); bool AnyNonZeroIndices = false; + // Split AddRecs up into parts as either of the parts may be usable + // without the other. + SplitAddRecs(Ops, Ty, SE); + // Decend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of // the indices index into the element or field type selected by the // preceding index. for (;;) { - APInt ElSize = APInt(SE.getTypeSizeInBits(Ty), - ElTy->isSized() ? SE.TD->getTypeAllocSize(ElTy) : 0); - SmallVector NewOps; - SmallVector ScaledOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - // Split AddRecs up into parts as either of the parts may be usable - // without the other. - if (const SCEVAddRecExpr *A = dyn_cast(Ops[i])) - if (!A->getStart()->isZero()) { - const SCEV* Start = A->getStart(); - Ops.push_back(SE.getAddRecExpr(SE.getIntegerSCEV(0, A->getType()), - A->getStepRecurrence(SE), - A->getLoop())); - Ops[i] = Start; - ++e; - } - // If the scale size is not 0, attempt to factor out a scale. - if (ElSize != 0) { - const SCEV* Op = Ops[i]; - const SCEV* Remainder = SE.getIntegerSCEV(0, Op->getType()); - if (FactorOutConstant(Op, Remainder, ElSize, SE)) { - ScaledOps.push_back(Op); // Op now has ElSize factored out. - NewOps.push_back(Remainder); - continue; + const SCEV *ElSize = SE.getAllocSizeExpr(ElTy); + // If the scale size is not 0, attempt to factor out a scale for + // array indexing. + SmallVector ScaledOps; + if (ElTy->isSized() && !ElSize->isZero()) { + SmallVector NewOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + const SCEV *Op = Ops[i]; + const SCEV *Remainder = SE.getIntegerSCEV(0, Ty); + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + // Op now has ElSize factored out. + ScaledOps.push_back(Op); + if (!Remainder->isZero()) + NewOps.push_back(Remainder); + AnyNonZeroIndices = true; + } else { + // The operand was not divisible, so add it to the list of operands + // we'll scan next iteration. + NewOps.push_back(Ops[i]); } } - // If the operand was not divisible, add it to the list of operands - // we'll scan next iteration. - NewOps.push_back(Ops[i]); + // If we made any changes, update Ops. + if (!ScaledOps.empty()) { + Ops = NewOps; + SimplifyAddOperands(Ops, Ty, SE); + } } - Ops = NewOps; - AnyNonZeroIndices |= !ScaledOps.empty(); + + // Record the scaled array index for this level of the type. If + // we didn't find any operands that could be factored, tentatively + // assume that element zero was selected (since the zero offset + // would obviously be folded away). Value *Scaled = ScaledOps.empty() ? Constant::getNullValue(Ty) : expandCodeFor(SE.getAddExpr(ScaledOps), Ty); GepIndices.push_back(Scaled); // Collect struct field index operands. - if (!Ops.empty()) - while (const StructType *STy = dyn_cast(ElTy)) { + while (const StructType *STy = dyn_cast(ElTy)) { + bool FoundFieldNo = false; + // An empty struct has no fields. + if (STy->getNumElements() == 0) break; + if (SE.TD) { + // With TargetData, field offsets are known. See if a constant offset + // falls within any of the struct fields. + if (Ops.empty()) break; if (const SCEVConstant *C = dyn_cast(Ops[0])) if (SE.getTypeSizeInBits(C->getType()) <= 64) { const StructLayout &SL = *SE.TD->getStructLayout(STy); uint64_t FullOffset = C->getValue()->getZExtValue(); if (FullOffset < SL.getSizeInBytes()) { unsigned ElIdx = SL.getElementContainingOffset(FullOffset); - GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx)); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); ElTy = STy->getTypeAtIndex(ElIdx); Ops[0] = SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); AnyNonZeroIndices = true; - continue; + FoundFieldNo = true; } } - break; + } else { + // Without TargetData, just check for a SCEVFieldOffsetExpr of the + // appropriate struct type. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (const SCEVFieldOffsetExpr *FO = + dyn_cast(Ops[i])) + if (FO->getStructType() == STy) { + unsigned FieldNo = FO->getFieldNo(); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), + FieldNo)); + ElTy = STy->getTypeAtIndex(FieldNo); + Ops[i] = SE.getConstant(Ty, 0); + AnyNonZeroIndices = true; + FoundFieldNo = true; + break; + } + } + // If no struct field offsets were found, tentatively assume that + // field zero was selected (since the zero offset would obviously + // be folded away). + if (!FoundFieldNo) { + ElTy = STy->getTypeAtIndex(0u); + GepIndices.push_back( + Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); } + } - if (const ArrayType *ATy = dyn_cast(ElTy)) { + if (const ArrayType *ATy = dyn_cast(ElTy)) ElTy = ATy->getElementType(); - continue; - } - break; + else + break; } // If none of the operands were convertable to proper GEP indices, cast // the base to i8* and do an ugly getelementptr with that. It's still // better than ptrtoint+arithmetic+inttoptr at least. if (!AnyNonZeroIndices) { + // Cast the base to i8*. V = InsertNoopCastOfTo(V, - Type::Int8Ty->getPointerTo(PTy->getAddressSpace())); + Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + + // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); // Fold a GEP with constant operands. @@ -345,12 +489,15 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin, } } - Value *GEP = Builder.CreateGEP(V, Idx, "scevgep"); + // Emit a GEP. + Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); InsertedValues.insert(GEP); return GEP; } - // Insert a pretty getelementptr. + // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, + // because ScalarEvolution may have changed the address arithmetic to + // compute a value which is beyond the end of the allocated object. Value *GEP = Builder.CreateGEP(V, GepIndices.begin(), GepIndices.end(), @@ -361,21 +508,37 @@ Value *SCEVExpander::expandAddToGEP(const SCEV* const *op_begin, } Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + int NumOperands = S->getNumOperands(); const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expand(S->getOperand(S->getNumOperands()-1)); + + // Find the index of an operand to start with. Choose the operand with + // pointer type, if there is one, or the last operand otherwise. + int PIdx = 0; + for (; PIdx != NumOperands - 1; ++PIdx) + if (isa(S->getOperand(PIdx)->getType())) break; + + // Expand code for the operand that we chose. + Value *V = expand(S->getOperand(PIdx)); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. - if (SE.TD) - if (const PointerType *PTy = dyn_cast(V->getType())) { - const SmallVectorImpl &Ops = S->getOperands(); - return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1], PTy, Ty, V); - } + if (const PointerType *PTy = dyn_cast(V->getType())) { + // Take the operand at PIdx out of the list. + const SmallVectorImpl &Ops = S->getOperands(); + SmallVector NewOps; + NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx); + NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end()); + // Make a GEP. + return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V); + } + // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer + // arithmetic. V = InsertNoopCastOfTo(V, Ty); // Emit a bunch of add instructions - for (int i = S->getNumOperands()-2; i >= 0; --i) { + for (int i = NumOperands-1; i >= 0; --i) { + if (i == PIdx) continue; Value *W = expandCodeFor(S->getOperand(i), Ty); V = InsertBinop(Instruction::Add, V, W); } @@ -422,7 +585,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { /// Move parts of Base into Rest to leave Base with the minimal /// expression that provides a pointer operand suitable for a /// GEP expansion. -static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest, +static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, ScalarEvolution &SE) { while (const SCEVAddRecExpr *A = dyn_cast(Base)) { Base = A->getStart(); @@ -433,7 +596,7 @@ static void ExposePointerBase(const SCEV* &Base, const SCEV* &Rest, } if (const SCEVAddExpr *A = dyn_cast(Base)) { Base = A->getOperand(A->getNumOperands()-1); - SmallVector NewAddOps(A->op_begin(), A->op_end()); + SmallVector NewAddOps(A->op_begin(), A->op_end()); NewAddOps.back() = Rest; Rest = SE.getAddExpr(NewAddOps); ExposePointerBase(Base, Rest, SE); @@ -457,11 +620,11 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (CanonicalIV && SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty)) { - const SCEV *Start = SE.getAnyExtendExpr(S->getStart(), - CanonicalIV->getType()); - const SCEV *Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE), - CanonicalIV->getType()); - Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop())); + const SmallVectorImpl &Ops = S->getOperands(); + SmallVector NewOps(Ops.size()); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + NewOps[i] = SE.getAnyExtendExpr(Ops[i], CanonicalIV->getType()); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop())); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = @@ -475,28 +638,26 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { - const SmallVectorImpl &SOperands = S->getOperands(); - SmallVector NewOps(SOperands.begin(), SOperands.end()); + const SmallVectorImpl &SOperands = S->getOperands(); + SmallVector NewOps(SOperands.begin(), SOperands.end()); NewOps[0] = SE.getIntegerSCEV(0, Ty); - const SCEV* Rest = SE.getAddRecExpr(NewOps, L); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. - if (SE.TD) { - const SCEV* Base = S->getStart(); - const SCEV* RestArray[1] = { Rest }; - // Dig into the expression to find the pointer base for a GEP. - ExposePointerBase(Base, RestArray[0], SE); - // If we found a pointer, expand the AddRec with a GEP. - if (const PointerType *PTy = dyn_cast(Base->getType())) { - // Make sure the Base isn't something exotic, such as a multiplied - // or divided pointer value. In those cases, the result type isn't - // actually a pointer type. - if (!isa(Base) && !isa(Base)) { - Value *StartV = expand(Base); - assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); - return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); - } + const SCEV *Base = S->getStart(); + const SCEV *RestArray[1] = { Rest }; + // Dig into the expression to find the pointer base for a GEP. + ExposePointerBase(Base, RestArray[0], SE); + // If we found a pointer, expand the AddRec with a GEP. + if (const PointerType *PTy = dyn_cast(Base->getType())) { + // Make sure the Base isn't something exotic, such as a multiplied + // or divided pointer value. In those cases, the result type isn't + // actually a pointer type. + if (!isa(Base) && !isa(Base)) { + Value *StartV = expand(Base); + assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); + return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); } } @@ -519,29 +680,22 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Create and insert the PHI node for the induction variable in the // specified loop. BasicBlock *Header = L->getHeader(); - BasicBlock *Preheader = L->getLoopPreheader(); PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin()); InsertedValues.insert(PN); - PN->addIncoming(Constant::getNullValue(Ty), Preheader); - pred_iterator HPI = pred_begin(Header); - assert(HPI != pred_end(Header) && "Loop with zero preds???"); - if (!L->contains(*HPI)) ++HPI; - assert(HPI != pred_end(Header) && L->contains(*HPI) && - "No backedge in loop?"); - - // Insert a unit add instruction right before the terminator corresponding - // to the back-edge. Constant *One = ConstantInt::get(Ty, 1); - Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", - (*HPI)->getTerminator()); - InsertedValues.insert(Add); - - pred_iterator PI = pred_begin(Header); - if (*PI == Preheader) - ++PI; - PN->addIncoming(Add, *PI); - return PN; + for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); + HPI != HPE; ++HPI) + if (L->contains(*HPI)) { + // Insert a unit add instruction right before the terminator corresponding + // to the back-edge. + Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", + (*HPI)->getTerminator()); + InsertedValues.insert(Add); + PN->addIncoming(Add, *HPI); + } else { + PN->addIncoming(Constant::getNullValue(Ty), *HPI); + } } // {0,+,F} --> {0,+,1} * F @@ -563,19 +717,19 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // folders, then expandCodeFor the closed form. This allows the folders to // simplify the expression without having to build a bunch of special code // into this folder. - const SCEV* IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. + const SCEV *IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. // Promote S up to the canonical IV type, if the cast is foldable. - const SCEV* NewS = S; - const SCEV* Ext = SE.getNoopOrAnyExtend(S, I->getType()); + const SCEV *NewS = S; + const SCEV *Ext = SE.getNoopOrAnyExtend(S, I->getType()); if (isa(Ext)) NewS = Ext; - const SCEV* V = cast(NewS)->evaluateAtIteration(IH, SE); + const SCEV *V = cast(NewS)->evaluateAtIteration(IH, SE); //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; // Truncate the result down to the original type, if needed. - const SCEV* T = SE.getTruncateOrNoop(V, Ty); + const SCEV *T = SE.getTruncateOrNoop(V, Ty); return expand(T); } @@ -607,9 +761,15 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { } Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expandCodeFor(S->getOperand(0), Ty); - for (unsigned i = 1; i < S->getNumOperands(); ++i) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + const Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } Value *RHS = expandCodeFor(S->getOperand(i), Ty); Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp"); InsertedValues.insert(ICmp); @@ -617,13 +777,23 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { InsertedValues.insert(Sel); LHS = Sel; } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); return LHS; } Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { - const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expandCodeFor(S->getOperand(0), Ty); - for (unsigned i = 1; i < S->getNumOperands(); ++i) { + Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); + const Type *Ty = LHS->getType(); + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. + if (S->getOperand(i)->getType() != Ty) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } Value *RHS = expandCodeFor(S->getOperand(i), Ty); Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp"); InsertedValues.insert(ICmp); @@ -631,10 +801,22 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { InsertedValues.insert(Sel); LHS = Sel; } + // In the case of mixed integer and pointer types, cast the + // final result back to the pointer type. + if (LHS->getType() != S->getType()) + LHS = InsertNoopCastOfTo(LHS, S->getType()); return LHS; } -Value *SCEVExpander::expandCodeFor(const SCEV* SH, const Type *Ty) { +Value *SCEVExpander::visitFieldOffsetExpr(const SCEVFieldOffsetExpr *S) { + return ConstantExpr::getOffsetOf(S->getStructType(), S->getFieldNo()); +} + +Value *SCEVExpander::visitAllocSizeExpr(const SCEVAllocSizeExpr *S) { + return ConstantExpr::getSizeOf(S->getAllocType()); +} + +Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); if (Ty) { @@ -695,7 +877,7 @@ Value * SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty) { assert(Ty->isInteger() && "Can only insert integer induction variables!"); - const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), + const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), SE.getIntegerSCEV(1, Ty), L); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index 543306854ceda..b7844f022765c 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -17,7 +17,9 @@ #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -27,7 +29,7 @@ using namespace llvm; AbstractLatticeFunction::~AbstractLatticeFunction() {} /// PrintValue - Render the specified lattice value to the specified stream. -void AbstractLatticeFunction::PrintValue(LatticeVal V, std::ostream &OS) { +void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) { if (V == UndefVal) OS << "undefined"; else if (V == OverdefinedVal) @@ -87,7 +89,7 @@ void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { - DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n"; + DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n"); BBExecutable.insert(BB); // Basic block is executable! BBWorkList.push_back(BB); // Add the block to the work list! } @@ -98,8 +100,8 @@ void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) return; // This edge is already known to be executable! - DOUT << "Marking Edge Executable: " << Source->getNameStart() - << " -> " << Dest->getNameStart() << "\n"; + DEBUG(errs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << "\n"); if (BBExecutable.count(Dest)) { // The destination is already executable, but we just made an edge @@ -153,7 +155,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, } // Constant condition variables mean the branch can only go a single way - Succs[C == ConstantInt::getFalse()] = true; + Succs[C == ConstantInt::getFalse(*Context)] = true; return; } @@ -221,6 +223,16 @@ void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { } void SparseSolver::visitPHINode(PHINode &PN) { + // The lattice function may store more information on a PHINode than could be + // computed from its incoming values. For example, SSI form stores its sigma + // functions as PHINodes with a single incoming value. + if (LatticeFunc->IsSpecialCasedPHI(&PN)) { + LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this); + if (IV != LatticeFunc->getUntrackedVal()) + UpdateState(PN, IV); + return; + } + LatticeVal PNIV = getOrInitValueState(&PN); LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); @@ -283,7 +295,7 @@ void SparseSolver::Solve(Function &F) { Instruction *I = InstWorkList.back(); InstWorkList.pop_back(); - DOUT << "\nPopped off I-WL: " << *I; + DEBUG(errs() << "\nPopped off I-WL: " << *I << "\n"); // "I" got into the work list because it made a transition. See if any // users are both live and in need of updating. @@ -300,7 +312,7 @@ void SparseSolver::Solve(Function &F) { BasicBlock *BB = BBWorkList.back(); BBWorkList.pop_back(); - DOUT << "\nPopped off BBWL: " << *BB; + DEBUG(errs() << "\nPopped off BBWL: " << *BB); // Notify all instructions in this basic block that they are newly // executable. @@ -310,7 +322,7 @@ void SparseSolver::Solve(Function &F) { } } -void SparseSolver::Print(Function &F, std::ostream &OS) const { +void SparseSolver::Print(Function &F, raw_ostream &OS) const { OS << "\nFUNCTION: " << F.getNameStr() << "\n"; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!BBExecutable.count(BB)) @@ -322,7 +334,7 @@ void SparseSolver::Print(Function &F, std::ostream &OS) const { OS << "; anon bb\n"; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { LatticeFunc->PrintValue(getLatticeState(I), OS); - OS << *I; + OS << *I << "\n"; } OS << "\n"; diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index 8f19fda953dd7..c9b303b48b28b 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -18,7 +18,7 @@ #include "llvm/Analysis/Trace.h" #include "llvm/Function.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; Function *Trace::getFunction() const { @@ -31,9 +31,9 @@ Module *Trace::getModule() const { /// print - Write trace to output stream. /// -void Trace::print(std::ostream &O) const { - Function *F = getFunction (); - O << "; Trace from function " << F->getName() << ", blocks:\n"; +void Trace::print(raw_ostream &O) const { + Function *F = getFunction(); + O << "; Trace from function " << F->getNameStr() << ", blocks:\n"; for (const_iterator i = begin(), e = end(); i != e; ++i) { O << "; "; WriteAsOperand(O, *i, true, getModule()); @@ -46,5 +46,5 @@ void Trace::print(std::ostream &O) const { /// output stream. /// void Trace::dump() const { - print(cerr); + print(errs()); } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 07a18fe4de42e..baa347a6638f9 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -16,25 +16,16 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include using namespace llvm; -/// getOpcode - If this is an Instruction or a ConstantExpr, return the -/// opcode value. Otherwise return UserOp1. -static unsigned getOpcode(const Value *V) { - if (const Instruction *I = dyn_cast(V)) - return I->getOpcode(); - if (const ConstantExpr *CE = dyn_cast(V)) - return CE->getOpcode(); - // Use UserOp1 to mean there's no opcode. - return Instruction::UserOp1; -} - - /// ComputeMaskedBits - Determine which of the bits specified in Mask are /// known to be either zero or one and return them in the KnownZero/KnownOne /// bit sets. This code only analyzes bits in Mask, in order to short-circuit @@ -45,9 +36,15 @@ static unsigned getOpcode(const Value *V) { /// optimized based on the contradictory assumption that it is non-zero. /// Because instcombine aggressively folds operations with undef args anyway, /// this won't lose us code quality. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, APInt &KnownOne, - TargetData *TD, unsigned Depth) { + const TargetData *TD, unsigned Depth) { const unsigned MaxDepth = 6; assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); @@ -91,8 +88,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast(V)) { unsigned Align = GV->getAlignment(); - if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) - Align = TD->getPrefTypeAlignment(GV->getType()->getElementType()); + if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { + const Type *ObjectType = GV->getType()->getElementType(); + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GV->isDeclaration() && !GV->mayBeOverridden()) + Align = TD->getPrefTypeAlignment(ObjectType); + else + Align = TD->getABITypeAlignment(ObjectType); + } if (Align > 0) KnownZero = Mask & APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); @@ -101,17 +106,28 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownOne.clear(); return; } + // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has + // the bits of its aliasee. + if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) { + KnownZero.clear(); KnownOne.clear(); + } else { + ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, + TD, Depth+1); + } + return; + } KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything. if (Depth == MaxDepth || Mask == 0) return; // Limit search depth. - User *I = dyn_cast(V); + Operator *I = dyn_cast(V); if (!I) return; APInt KnownZero2(KnownZero), KnownOne2(KnownOne); - switch (getOpcode(I)) { + switch (I->getOpcode()) { default: break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -228,12 +244,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { + const Type *SrcTy = I->getOperand(0)->getType(); + + unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - const Type *SrcTy = I->getOperand(0)->getType(); - unsigned SrcBitWidth = TD ? - TD->getTypeSizeInBits(SrcTy) : - SrcTy->getScalarSizeInBits(); + if (isa(SrcTy)) + SrcBitWidth = TD->getTypeSizeInBits(SrcTy); + else + SrcBitWidth = SrcTy->getScalarSizeInBits(); + APInt MaskIn(Mask); MaskIn.zextOrTrunc(SrcBitWidth); KnownZero.zextOrTrunc(SrcBitWidth); @@ -261,8 +281,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. - const IntegerType *SrcTy = cast(I->getOperand(0)->getType()); - unsigned SrcBitWidth = SrcTy->getBitWidth(); + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); APInt MaskIn(Mask); MaskIn.trunc(SrcBitWidth); @@ -382,7 +401,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Determine which operand has more trailing zeros, and use that // many bits from the other operand. if (LHSKnownZeroOut > RHSKnownZeroOut) { - if (getOpcode(I) == Instruction::Add) { + if (I->getOpcode() == Instruction::Add) { APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); KnownZero |= KnownZero2 & Mask; KnownOne |= KnownOne2 & Mask; @@ -462,10 +481,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, Align = TD->getABITypeAlignment(AI->getType()->getElementType()); Align = std::max(Align, - (unsigned)TD->getABITypeAlignment(Type::DoubleTy)); + (unsigned)TD->getABITypeAlignment( + Type::getDoubleTy(V->getContext()))); Align = std::max(Align, - (unsigned)TD->getABITypeAlignment(Type::Int64Ty)); + (unsigned)TD->getABITypeAlignment( + Type::getInt64Ty(V->getContext()))); } } @@ -522,10 +543,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, for (unsigned i = 0; i != 2; ++i) { Value *L = P->getIncomingValue(i); Value *R = P->getIncomingValue(!i); - User *LU = dyn_cast(L); + Operator *LU = dyn_cast(L); if (!LU) continue; - unsigned Opcode = getOpcode(LU); + unsigned Opcode = LU->getOpcode(); // Check for operations that have the property that if // both their operands have low zero bits, the result // will have low zero bits. @@ -608,8 +629,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use /// this predicate to simplify operations downstream. Mask is known to be zero /// for bits that V cannot have. +/// +/// This function is defined on values with integer type, values with pointer +/// type (but only if TD is non-null), and vectors of integers. In the case +/// where V is a vector, the mask, known zero, and known one values are the +/// same width as the vector element, and the bit is set only if it is true +/// for all of the elements in the vector. bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, - TargetData *TD, unsigned Depth) { + const TargetData *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); @@ -626,7 +653,8 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, /// /// 'Op' must have a scalar integer type. /// -unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) { +unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, + unsigned Depth) { assert((TD || V->getType()->isIntOrIntVector()) && "ComputeNumSignBits requires a TargetData object to operate " "on non-integer values!"); @@ -642,8 +670,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) { if (Depth == 6) return 1; // Limit search depth. - User *U = dyn_cast(V); - switch (getOpcode(V)) { + Operator *U = dyn_cast(V); + switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: Tmp = TyBits-cast(U->getOperand(0)->getType())->getBitWidth(); @@ -789,7 +817,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (Depth == 6) return 1; // Limit search depth. - const Instruction *I = dyn_cast(V); + const Operator *I = dyn_cast(V); if (I == 0) return false; // (add x, 0.0) is guaranteed to return +0.0, not -0.0. @@ -810,15 +838,15 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) { if (F->isDeclaration()) { - switch (F->getNameLen()) { - case 3: // abs(x) != -0.0 - if (!strcmp(F->getNameStart(), "abs")) return true; - break; - case 4: // abs[lf](x) != -0.0 - if (!strcmp(F->getNameStart(), "absf")) return true; - if (!strcmp(F->getNameStart(), "absl")) return true; - break; - } + // abs(x) != -0.0 + if (F->getName() == "abs") return true; + // fabs[lf](x) != -0.0 + if (F->getName() == "fabs") return true; + if (F->getName() == "fabsf") return true; + if (F->getName() == "fabsl") return true; + if (F->getName() == "sqrt" || F->getName() == "sqrtf" || + F->getName() == "sqrtl") + return CannotBeNegativeZero(CI->getOperand(1), Depth+1); } } @@ -831,10 +859,11 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { // indices from Idxs that should be left out when inserting into the resulting // struct. To is the result struct built so far, new insertvalue instructions // build on that. -Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, - SmallVector &Idxs, - unsigned IdxSkip, - Instruction *InsertBefore) { +static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, + SmallVector &Idxs, + unsigned IdxSkip, + LLVMContext &Context, + Instruction *InsertBefore) { const llvm::StructType *STy = llvm::dyn_cast(IndexedType); if (STy) { // Save the original To argument so we can modify it @@ -845,7 +874,7 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, Idxs.push_back(i); Value *PrevTo = To; To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, - InsertBefore); + Context, InsertBefore); Idxs.pop_back(); if (!To) { // Couldn't find any inserted value for this index? Cleanup @@ -868,7 +897,7 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, // we might be able to find the complete struct somewhere. // Find the value that is at that particular spot - Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end()); + Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end(), Context); if (!V) return NULL; @@ -890,8 +919,9 @@ Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, // insertvalue instruction somewhere). // // All inserted insertvalue instructions are inserted before InsertBefore -Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, - const unsigned *idx_end, Instruction *InsertBefore) { +static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, + const unsigned *idx_end, LLVMContext &Context, + Instruction *InsertBefore) { assert(InsertBefore && "Must have someplace to insert!"); const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), idx_begin, @@ -900,7 +930,8 @@ Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, SmallVector Idxs(idx_begin, idx_end); unsigned IdxSkip = Idxs.size(); - return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); + return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, + Context, InsertBefore); } /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if @@ -910,7 +941,8 @@ Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, /// If InsertBefore is not null, this function will duplicate (modified) /// insertvalues when a part of a nested struct is extracted. Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, - const unsigned *idx_end, Instruction *InsertBefore) { + const unsigned *idx_end, LLVMContext &Context, + Instruction *InsertBefore) { // Nothing to index? Just return V then (this is useful at the end of our // recursion) if (idx_begin == idx_end) @@ -921,20 +953,20 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end) && "Invalid indices for type?"); const CompositeType *PTy = cast(V->getType()); - + if (isa(V)) return UndefValue::get(ExtractValueInst::getIndexedType(PTy, idx_begin, idx_end)); else if (isa(V)) return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, - idx_begin, - idx_end)); + idx_begin, + idx_end)); else if (Constant *C = dyn_cast(V)) { if (isa(C) || isa(C)) // Recursively process this constant - return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, idx_end, - InsertBefore); + return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, + idx_end, Context, InsertBefore); } else if (InsertValueInst *I = dyn_cast(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices @@ -953,7 +985,8 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // %C = insertvalue {i32, i32 } %A, i32 11, 1 // which allows the unused 0,0 element from the nested struct to be // removed. - return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore); + return BuildSubAggregate(V, idx_begin, req_idx, + Context, InsertBefore); else // We can't handle this without inserting insertvalues return 0; @@ -964,13 +997,13 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end, - InsertBefore); + Context, InsertBefore); } // If we end up here, the indices of the insertvalue match with those // requested (though possibly only partially). Now we recursively look at // the inserted value, passing any remaining indices. return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end, - InsertBefore); + Context, InsertBefore); } else if (ExtractValueInst *I = dyn_cast(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. @@ -994,7 +1027,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, && "Number of indices added not correct?"); return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(), - InsertBefore); + Context, InsertBefore); } // Otherwise, we don't know (such as, extracting from a function return value // or load instruction) @@ -1035,7 +1068,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Make sure the index-ee is a pointer to array of i8. const PointerType *PT = cast(GEP->getOperand(0)->getType()); const ArrayType *AT = dyn_cast(PT->getElementType()); - if (AT == 0 || AT->getElementType() != Type::Int8Ty) + if (AT == 0 || AT->getElementType() != Type::getInt8Ty(V->getContext())) return false; // Check to make sure that the first operand of the GEP is an integer and @@ -1056,11 +1089,16 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, StopAtNul); } + if (MDString *MDStr = dyn_cast(V)) { + Str = MDStr->getString(); + return true; + } + // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. GlobalVariable* GV = dyn_cast(V); - if (!GV || !GV->isConstant() || !GV->hasInitializer()) + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return false; Constant *GlobalInit = GV->getInitializer(); @@ -1074,7 +1112,8 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset, // Must be a Constant Array ConstantArray *Array = dyn_cast(GlobalInit); - if (Array == 0 || Array->getType()->getElementType() != Type::Int8Ty) + if (Array == 0 || + Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) return false; // Get the number of elements in the array diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp index bb5726293fe4b..00778d9983448 100644 --- a/lib/Archive/Archive.cpp +++ b/lib/Archive/Archive.cpp @@ -31,7 +31,7 @@ ArchiveMember::getMemberSize() const { // If it has a long filename, include the name length if (hasLongFilename()) - result += path.toString().length() + 1; + result += path.str().length() + 1; // If its now odd lengthed, include the padding byte if (result % 2 != 0 ) @@ -76,38 +76,38 @@ bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) { path = newFile; // SVR4 symbol tables have an empty name - if (path.toString() == ARFILE_SVR4_SYMTAB_NAME) + if (path.str() == ARFILE_SVR4_SYMTAB_NAME) flags |= SVR4SymbolTableFlag; else flags &= ~SVR4SymbolTableFlag; // BSD4.4 symbol tables have a special name - if (path.toString() == ARFILE_BSD4_SYMTAB_NAME) + if (path.str() == ARFILE_BSD4_SYMTAB_NAME) flags |= BSD4SymbolTableFlag; else flags &= ~BSD4SymbolTableFlag; // LLVM symbol tables have a very specific name - if (path.toString() == ARFILE_LLVM_SYMTAB_NAME) + if (path.str() == ARFILE_LLVM_SYMTAB_NAME) flags |= LLVMSymbolTableFlag; else flags &= ~LLVMSymbolTableFlag; // String table name - if (path.toString() == ARFILE_STRTAB_NAME) + if (path.str() == ARFILE_STRTAB_NAME) flags |= StringTableFlag; else flags &= ~StringTableFlag; // If it has a slash then it has a path - bool hasSlash = path.toString().find('/') != std::string::npos; + bool hasSlash = path.str().find('/') != std::string::npos; if (hasSlash) flags |= HasPathFlag; else flags &= ~HasPathFlag; // If it has a slash or its over 15 chars then its a long filename format - if (hasSlash || path.toString().length() > 15) + if (hasSlash || path.str().length() > 15) flags |= HasLongFilenameFlag; else flags &= ~HasLongFilenameFlag; @@ -126,8 +126,11 @@ bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) { return true; } - // Determine what kind of file it is + // Determine what kind of file it is. switch (sys::IdentifyFileType(signature,4)) { + case sys::Bitcode_FileType: + flags |= BitcodeFlag; + break; default: flags &= ~BitcodeFlag; break; @@ -214,7 +217,7 @@ bool llvm::GetBitcodeSymbols(const sys::Path& fName, std::auto_ptr Buffer( MemoryBuffer::getFileOrSTDIN(fName.c_str())); if (!Buffer.get()) { - if (ErrMsg) *ErrMsg = "Could not open file '" + fName.toString() + "'"; + if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'"; return true; } diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 718d44608b1d2..74895d8a6f112 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -344,8 +344,8 @@ Archive::getAllModules(std::vector& Modules, for (iterator I=begin(), E=end(); I != E; ++I) { if (I->isBitcode()) { - std::string FullMemberName = archPath.toString() + - "(" + I->getPath().toString() + ")"; + std::string FullMemberName = archPath.str() + + "(" + I->getPath().str() + ")"; MemoryBuffer *Buffer = MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); @@ -484,8 +484,8 @@ Archive::findModuleDefiningSymbol(const std::string& symbol, return 0; // Now, load the bitcode module to get the ModuleProvider - std::string FullMemberName = archPath.toString() + "(" + - mbr->getPath().toString() + ")"; + std::string FullMemberName = archPath.str() + "(" + + mbr->getPath().str() + ")"; MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(), FullMemberName.c_str()); memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize()); @@ -534,8 +534,8 @@ Archive::findModulesDefiningSymbols(std::set& symbols, if (mbr->isBitcode()) { // Get the symbols std::vector symbols; - std::string FullMemberName = archPath.toString() + "(" + - mbr->getPath().toString() + ")"; + std::string FullMemberName = archPath.str() + "(" + + mbr->getPath().str() + ")"; ModuleProvider* MP = GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(), FullMemberName, Context, symbols, error); @@ -552,7 +552,7 @@ Archive::findModulesDefiningSymbols(std::set& symbols, } else { if (error) *error = "Can't parse bitcode member: " + - mbr->getPath().toString() + ": " + *error; + mbr->getPath().str() + ": " + *error; delete mbr; return false; } @@ -612,7 +612,7 @@ bool Archive::isBitcodeArchive() { continue; std::string FullMemberName = - archPath.toString() + "(" + I->getPath().toString() + ")"; + archPath.str() + "(" + I->getPath().str() + ")"; MemoryBuffer *Buffer = MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index 881d75b3ba8fa..d17f6b5036f38 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -95,7 +95,7 @@ Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr, memcpy(hdr.date,buffer,12); // Get rid of trailing blanks in the name - std::string mbrPath = mbr.getPath().toString(); + std::string mbrPath = mbr.getPath().str(); size_t mbrLen = mbrPath.length(); while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') { mbrPath.erase(mbrLen-1,1); @@ -173,10 +173,10 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where, mbr->info = *FSInfo; unsigned flags = 0; - bool hasSlash = filePath.toString().find('/') != std::string::npos; + bool hasSlash = filePath.str().find('/') != std::string::npos; if (hasSlash) flags |= ArchiveMember::HasPathFlag; - if (hasSlash || filePath.toString().length() > 15) + if (hasSlash || filePath.str().length() > 15) flags |= ArchiveMember::HasLongFilenameFlag; std::string magic; mbr->path.getMagicNumber(magic,4); @@ -223,8 +223,7 @@ Archive::writeMember( // symbol table if its a bitcode file. if (CreateSymbolTable && member.isBitcode()) { std::vector symbols; - std::string FullMemberName = archPath.toString() + "(" + - member.getPath().toString() + std::string FullMemberName = archPath.str() + "(" + member.getPath().str() + ")"; ModuleProvider* MP = GetBitcodeSymbols((const unsigned char*)data,fSize, @@ -249,7 +248,7 @@ Archive::writeMember( } else { delete mFile; if (ErrMsg) - *ErrMsg = "Can't parse bitcode member: " + member.getPath().toString() + *ErrMsg = "Can't parse bitcode member: " + member.getPath().str() + ": " + *ErrMsg; return true; } @@ -266,8 +265,8 @@ Archive::writeMember( // Write the long filename if its long if (writeLongName) { - ARFile.write(member.getPath().toString().data(), - member.getPath().toString().length()); + ARFile.write(member.getPath().str().data(), + member.getPath().str().length()); } // Write the (possibly compressed) member's content to the file. @@ -371,7 +370,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, if (TmpArchive.exists()) TmpArchive.eraseFromDisk(); if (ErrMsg) - *ErrMsg = "Error opening archive file: " + archPath.toString(); + *ErrMsg = "Error opening archive file: " + archPath.str(); return true; } @@ -425,7 +424,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, if (TmpArchive.exists()) TmpArchive.eraseFromDisk(); if (ErrMsg) - *ErrMsg = "Error opening archive file: " + FinalFilePath.toString(); + *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); return true; } diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 741c5381fc78a..0e9f1a05fe3eb 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -14,11 +14,14 @@ #include "LLLexer.h" #include "llvm/DerivedTypes.h" #include "llvm/Instruction.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Assembly/Parser.h" +#include #include #include using namespace llvm; @@ -180,8 +183,9 @@ static const char *isLabelTail(const char *CurPtr) { // Lexer definition. //===----------------------------------------------------------------------===// -LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err) - : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), APFloatVal(0.0) { +LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err, + LLVMContext &C) + : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) { CurPtr = CurBuf->getBufferStart(); } @@ -250,7 +254,7 @@ lltok::Kind LLLexer::LexToken() { case ';': SkipLineComment(); return LexToken(); - case '!': return lltok::Metadata; + case '!': return LexMetadata(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': @@ -418,7 +422,23 @@ static bool JustWhitespaceNewLine(const char *&Ptr) { return false; } +/// LexMetadata: +/// !{...} +/// !42 +/// !foo +lltok::Kind LLLexer::LexMetadata() { + if (isalpha(CurPtr[0])) { + ++CurPtr; + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') + ++CurPtr; + StrVal.assign(TokStart+1, CurPtr); // Skip ! + return lltok::NamedOrCustomMD; + } + return lltok::Metadata; +} + /// LexIdentifier: Handle several related productions: /// Label [-a-zA-Z$._0-9]+: /// IntegerType i[0-9]+ @@ -452,7 +472,7 @@ lltok::Kind LLLexer::LexIdentifier() { Error("bitwidth for integer type out of range!"); return lltok::Error; } - TyVal = IntegerType::get(NumBits); + TyVal = IntegerType::get(Context, NumBits); return lltok::Type; } @@ -471,6 +491,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(global); KEYWORD(constant); KEYWORD(private); + KEYWORD(linker_private); KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); @@ -497,6 +518,10 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(deplibs); KEYWORD(datalayout); KEYWORD(volatile); + KEYWORD(nuw); + KEYWORD(nsw); + KEYWORD(exact); + KEYWORD(inbounds); KEYWORD(align); KEYWORD(addrspace); KEYWORD(section); @@ -504,6 +529,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(module); KEYWORD(asm); KEYWORD(sideeffect); + KEYWORD(msasm); KEYWORD(gc); KEYWORD(ccc); @@ -531,6 +557,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(readnone); KEYWORD(readonly); + KEYWORD(inlinehint); KEYWORD(noinline); KEYWORD(alwaysinline); KEYWORD(optsize); @@ -538,6 +565,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(sspreq); KEYWORD(noredzone); KEYWORD(noimplicitfloat); + KEYWORD(naked); KEYWORD(type); KEYWORD(opaque); @@ -554,14 +582,14 @@ lltok::Kind LLLexer::LexIdentifier() { #define TYPEKEYWORD(STR, LLVMTY) \ if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ TyVal = LLVMTY; return lltok::Type; } - TYPEKEYWORD("void", Type::VoidTy); - TYPEKEYWORD("float", Type::FloatTy); - TYPEKEYWORD("double", Type::DoubleTy); - TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty); - TYPEKEYWORD("fp128", Type::FP128Ty); - TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty); - TYPEKEYWORD("label", Type::LabelTy); - TYPEKEYWORD("metadata", Type::MetadataTy); + TYPEKEYWORD("void", Type::getVoidTy(Context)); + TYPEKEYWORD("float", Type::getFloatTy(Context)); + TYPEKEYWORD("double", Type::getDoubleTy(Context)); + TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context)); + TYPEKEYWORD("fp128", Type::getFP128Ty(Context)); + TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context)); + TYPEKEYWORD("label", Type::getLabelTy(Context)); + TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); #undef TYPEKEYWORD // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is @@ -589,7 +617,6 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor); INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp); - INSTKEYWORD(vicmp, VICmp); INSTKEYWORD(vfcmp, VFCmp); INSTKEYWORD(phi, PHI); INSTKEYWORD(call, Call); @@ -635,7 +662,7 @@ lltok::Kind LLLexer::LexIdentifier() { TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { int len = CurPtr-TokStart-3; uint32_t bits = len * 4; - APInt Tmp(bits, TokStart+3, len, 16); + APInt Tmp(bits, StringRef(TokStart+3, len), 16); uint32_t activeBits = Tmp.getActiveBits(); if (activeBits > 0 && activeBits < bits) Tmp.trunc(activeBits); @@ -698,7 +725,7 @@ lltok::Kind LLLexer::Lex0x() { uint64_t Pair[2]; switch (Kind) { - default: assert(0 && "Unknown kind!"); + default: llvm_unreachable("Unknown kind!"); case 'K': // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) FP80HexToIntPair(TokStart+3, CurPtr, Pair); @@ -761,7 +788,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { return Lex0x(); unsigned Len = CurPtr-TokStart; uint32_t numBits = ((Len * 64) / 19) + 2; - APInt Tmp(numBits, TokStart, Len, 10); + APInt Tmp(numBits, StringRef(TokStart, Len), 10); if (TokStart[0] == '-') { uint32_t minBits = Tmp.getMinSignedBits(); if (minBits > 0 && minBits < numBits) diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index b5e58f1418ec2..de39272f45e27 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -24,12 +24,14 @@ namespace llvm { class MemoryBuffer; class Type; class SMDiagnostic; + class LLVMContext; class LLLexer { const char *CurPtr; MemoryBuffer *CurBuf; SMDiagnostic &ErrorInfo; SourceMgr &SM; + LLVMContext &Context; // Information about the current token. const char *TokStart; @@ -42,7 +44,8 @@ namespace llvm { std::string TheError; public: - explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &); + explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &, + LLVMContext &C); ~LLLexer() {} lltok::Kind Lex() { @@ -72,6 +75,7 @@ namespace llvm { lltok::Kind LexDigitOrNegative(); lltok::Kind LexPositive(); lltok::Kind LexAt(); + lltok::Kind LexMetadata(); lltok::Kind LexPercent(); lltok::Kind LexQuote(); lltok::Kind Lex0x(); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 3966ab3b5fc6e..09bc5f736fc61 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -19,11 +19,13 @@ #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" -#include "llvm/MDNode.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/ValueSymbolTable.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -40,15 +42,17 @@ namespace llvm { t_Null, t_Undef, t_Zero, // No value. t_EmptyArray, // No value: [] t_Constant, // Value in ConstantVal. - t_InlineAsm // Value in StrVal/StrVal2/UIntVal. + t_InlineAsm, // Value in StrVal/StrVal2/UIntVal. + t_Metadata // Value in MetadataVal. } Kind; - + LLParser::LocTy Loc; unsigned UIntVal; std::string StrVal, StrVal2; APSInt APSIntVal; APFloat APFloatVal; Constant *ConstantVal; + MetadataBase *MetadataVal; ValID() : APFloatVal(0.0) {} }; } @@ -73,21 +77,29 @@ bool LLParser::ValidateEndOfModule() { return Error(ForwardRefTypeIDs.begin()->second.second, "use of undefined type '%" + utostr(ForwardRefTypeIDs.begin()->first) + "'"); - + if (!ForwardRefVals.empty()) return Error(ForwardRefVals.begin()->second.second, "use of undefined value '@" + ForwardRefVals.begin()->first + "'"); - + if (!ForwardRefValIDs.empty()) return Error(ForwardRefValIDs.begin()->second.second, "use of undefined value '@" + utostr(ForwardRefValIDs.begin()->first) + "'"); - + + if (!ForwardRefMDNodes.empty()) + return Error(ForwardRefMDNodes.begin()->second.second, + "use of undefined metadata '!" + + utostr(ForwardRefMDNodes.begin()->first) + "'"); + + // Look for intrinsic functions and CallInst that need to be upgraded for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove - + + // Check debug info intrinsics. + CheckDebugInfoIntrinsics(M); return false; } @@ -107,27 +119,31 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_target: if (ParseTargetDefinition()) return true; break; case lltok::kw_deplibs: if (ParseDepLibs()) return true; break; case lltok::kw_type: if (ParseUnnamedType()) return true; break; + case lltok::LocalVarID: if (ParseUnnamedType()) return true; break; case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0 case lltok::LocalVar: if (ParseNamedType()) return true; break; + case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break; case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break; case lltok::Metadata: if (ParseStandaloneMetadata()) return true; break; + case lltok::NamedOrCustomMD: if (ParseNamedMetadata()) return true; break; // The Global variable production with no name can have many different // optional leading prefixes, the production is: // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal // OptionalAddrSpace ('constant'|'global') ... - case lltok::kw_private: // OptionalLinkage - case lltok::kw_internal: // OptionalLinkage - case lltok::kw_weak: // OptionalLinkage - case lltok::kw_weak_odr: // OptionalLinkage - case lltok::kw_linkonce: // OptionalLinkage - case lltok::kw_linkonce_odr: // OptionalLinkage - case lltok::kw_appending: // OptionalLinkage - case lltok::kw_dllexport: // OptionalLinkage - case lltok::kw_common: // OptionalLinkage - case lltok::kw_dllimport: // OptionalLinkage - case lltok::kw_extern_weak: // OptionalLinkage - case lltok::kw_external: { // OptionalLinkage + case lltok::kw_private : // OptionalLinkage + case lltok::kw_linker_private: // OptionalLinkage + case lltok::kw_internal: // OptionalLinkage + case lltok::kw_weak: // OptionalLinkage + case lltok::kw_weak_odr: // OptionalLinkage + case lltok::kw_linkonce: // OptionalLinkage + case lltok::kw_linkonce_odr: // OptionalLinkage + case lltok::kw_appending: // OptionalLinkage + case lltok::kw_dllexport: // OptionalLinkage + case lltok::kw_common: // OptionalLinkage + case lltok::kw_dllimport: // OptionalLinkage + case lltok::kw_extern_weak: // OptionalLinkage + case lltok::kw_external: { // OptionalLinkage unsigned Linkage, Visibility; if (ParseOptionalLinkage(Linkage) || ParseOptionalVisibility(Visibility) || @@ -144,7 +160,7 @@ bool LLParser::ParseTopLevelEntities() { return true; break; } - + case lltok::kw_thread_local: // OptionalThreadLocal case lltok::kw_addrspace: // OptionalAddrSpace case lltok::kw_constant: // GlobalType @@ -161,11 +177,11 @@ bool LLParser::ParseTopLevelEntities() { bool LLParser::ParseModuleAsm() { assert(Lex.getKind() == lltok::kw_module); Lex.Lex(); - - std::string AsmStr; + + std::string AsmStr; if (ParseToken(lltok::kw_asm, "expected 'module asm'") || ParseStringConstant(AsmStr)) return true; - + const std::string &AsmSoFar = M->getModuleInlineAsm(); if (AsmSoFar.empty()) M->setModuleInlineAsm(AsmStr); @@ -211,7 +227,7 @@ bool LLParser::ParseDepLibs() { if (EatIfPresent(lltok::rsquare)) return false; - + std::string Str; if (ParseStringConstant(Str)) return true; M->addLibrary(Str); @@ -224,32 +240,44 @@ bool LLParser::ParseDepLibs() { return ParseToken(lltok::rsquare, "expected ']' at end of list"); } -/// toplevelentity +/// ParseUnnamedType: /// ::= 'type' type +/// ::= LocalVarID '=' 'type' type bool LLParser::ParseUnnamedType() { + unsigned TypeID = NumberedTypes.size(); + + // Handle the LocalVarID form. + if (Lex.getKind() == lltok::LocalVarID) { + if (Lex.getUIntVal() != TypeID) + return Error(Lex.getLoc(), "type expected to be numbered '%" + + utostr(TypeID) + "'"); + Lex.Lex(); // eat LocalVarID; + + if (ParseToken(lltok::equal, "expected '=' after name")) + return true; + } + assert(Lex.getKind() == lltok::kw_type); LocTy TypeLoc = Lex.getLoc(); Lex.Lex(); // eat kw_type - PATypeHolder Ty(Type::VoidTy); + PATypeHolder Ty(Type::getVoidTy(Context)); if (ParseType(Ty)) return true; - - unsigned TypeID = NumberedTypes.size(); - + // See if this type was previously referenced. std::map >::iterator FI = ForwardRefTypeIDs.find(TypeID); if (FI != ForwardRefTypeIDs.end()) { if (FI->second.first.get() == Ty) return Error(TypeLoc, "self referential type is invalid"); - + cast(FI->second.first.get())->refineAbstractTypeTo(Ty); Ty = FI->second.first.get(); ForwardRefTypeIDs.erase(FI); } - + NumberedTypes.push_back(Ty); - + return false; } @@ -259,14 +287,14 @@ bool LLParser::ParseNamedType() { std::string Name = Lex.getStrVal(); LocTy NameLoc = Lex.getLoc(); Lex.Lex(); // eat LocalVar. - - PATypeHolder Ty(Type::VoidTy); - + + PATypeHolder Ty(Type::getVoidTy(Context)); + if (ParseToken(lltok::equal, "expected '=' after name") || ParseToken(lltok::kw_type, "expected 'type' after name") || ParseType(Ty)) return true; - + // Set the type name, checking for conflicts as we do so. bool AlreadyExists = M->addTypeName(Name, Ty); if (!AlreadyExists) return false; @@ -283,16 +311,16 @@ bool LLParser::ParseNamedType() { Ty = FI->second.first.get(); ForwardRefTypes.erase(FI); } - + // Inserting a name that is already defined, get the existing name. const Type *Existing = M->getTypeByName(Name); assert(Existing && "Conflict but no matching type?!"); - + // Otherwise, this is an attempt to redefine a type. That's okay if // the redefinition is identical to the original. // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0 if (Existing == Ty) return false; - + // Any other kind of (non-equivalent) redefinition is an error. return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" + Ty->getDescription() + "'"); @@ -304,7 +332,7 @@ bool LLParser::ParseNamedType() { bool LLParser::ParseDeclare() { assert(Lex.getKind() == lltok::kw_declare); Lex.Lex(); - + Function *F; return ParseFunctionHeader(F, false); } @@ -314,7 +342,7 @@ bool LLParser::ParseDeclare() { bool LLParser::ParseDefine() { assert(Lex.getKind() == lltok::kw_define); Lex.Lex(); - + Function *F; return ParseFunctionHeader(F, true) || ParseFunctionBody(*F); @@ -336,6 +364,38 @@ bool LLParser::ParseGlobalType(bool &IsConstant) { return false; } +/// ParseUnnamedGlobal: +/// OptionalVisibility ALIAS ... +/// OptionalLinkage OptionalVisibility ... -> global variable +/// GlobalID '=' OptionalVisibility ALIAS ... +/// GlobalID '=' OptionalLinkage OptionalVisibility ... -> global variable +bool LLParser::ParseUnnamedGlobal() { + unsigned VarID = NumberedVals.size(); + std::string Name; + LocTy NameLoc = Lex.getLoc(); + + // Handle the GlobalID form. + if (Lex.getKind() == lltok::GlobalID) { + if (Lex.getUIntVal() != VarID) + return Error(Lex.getLoc(), "variable expected to be numbered '%" + + utostr(VarID) + "'"); + Lex.Lex(); // eat GlobalID; + + if (ParseToken(lltok::equal, "expected '=' after name")) + return true; + } + + bool HasLinkage; + unsigned Linkage, Visibility; + if (ParseOptionalLinkage(Linkage, HasLinkage) || + ParseOptionalVisibility(Visibility)) + return true; + + if (HasLinkage || Lex.getKind() != lltok::kw_alias) + return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility); + return ParseAlias(Name, NameLoc, Visibility); +} + /// ParseNamedGlobal: /// GlobalVar '=' OptionalVisibility ALIAS ... /// GlobalVar '=' OptionalLinkage OptionalVisibility ... -> global variable @@ -344,21 +404,96 @@ bool LLParser::ParseNamedGlobal() { LocTy NameLoc = Lex.getLoc(); std::string Name = Lex.getStrVal(); Lex.Lex(); - + bool HasLinkage; unsigned Linkage, Visibility; if (ParseToken(lltok::equal, "expected '=' in global variable") || ParseOptionalLinkage(Linkage, HasLinkage) || ParseOptionalVisibility(Visibility)) return true; - + if (HasLinkage || Lex.getKind() != lltok::kw_alias) return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility); return ParseAlias(Name, NameLoc, Visibility); } +// MDString: +// ::= '!' STRINGCONSTANT +bool LLParser::ParseMDString(MetadataBase *&MDS) { + std::string Str; + if (ParseStringConstant(Str)) return true; + MDS = MDString::get(Context, Str); + return false; +} + +// MDNode: +// ::= '!' MDNodeNumber +bool LLParser::ParseMDNode(MetadataBase *&Node) { + // !{ ..., !42, ... } + unsigned MID = 0; + if (ParseUInt32(MID)) return true; + + // Check existing MDNode. + std::map::iterator I = MetadataCache.find(MID); + if (I != MetadataCache.end()) { + Node = I->second; + return false; + } + + // Check known forward references. + std::map >::iterator + FI = ForwardRefMDNodes.find(MID); + if (FI != ForwardRefMDNodes.end()) { + Node = FI->second.first; + return false; + } + + // Create MDNode forward reference + SmallVector Elts; + std::string FwdRefName = "llvm.mdnode.fwdref." + utostr(MID); + Elts.push_back(MDString::get(Context, FwdRefName)); + MDNode *FwdNode = MDNode::get(Context, Elts.data(), Elts.size()); + ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc()); + Node = FwdNode; + return false; +} + +///ParseNamedMetadata: +/// !foo = !{ !1, !2 } +bool LLParser::ParseNamedMetadata() { + assert(Lex.getKind() == lltok::NamedOrCustomMD); + Lex.Lex(); + std::string Name = Lex.getStrVal(); + + if (ParseToken(lltok::equal, "expected '=' here")) + return true; + + if (Lex.getKind() != lltok::Metadata) + return TokError("Expected '!' here"); + Lex.Lex(); + + if (Lex.getKind() != lltok::lbrace) + return TokError("Expected '{' here"); + Lex.Lex(); + SmallVector Elts; + do { + if (Lex.getKind() != lltok::Metadata) + return TokError("Expected '!' here"); + Lex.Lex(); + MetadataBase *N = 0; + if (ParseMDNode(N)) return true; + Elts.push_back(N); + } while (EatIfPresent(lltok::comma)); + + if (ParseToken(lltok::rbrace, "expected end of metadata node")) + return true; + + NamedMDNode::Create(Context, Name, Elts.data(), Elts.size(), M); + return false; +} + /// ParseStandaloneMetadata: -/// !42 = !{...} +/// !42 = !{...} bool LLParser::ParseStandaloneMetadata() { assert(Lex.getKind() == lltok::Metadata); Lex.Lex(); @@ -371,17 +506,32 @@ bool LLParser::ParseStandaloneMetadata() { return true; LocTy TyLoc; - bool IsConstant; - PATypeHolder Ty(Type::VoidTy); - if (ParseGlobalType(IsConstant) || - ParseType(Ty, TyLoc)) + PATypeHolder Ty(Type::getVoidTy(Context)); + if (ParseType(Ty, TyLoc)) return true; - - Constant *Init = 0; - if (ParseGlobalValue(Ty, Init)) - return true; + if (Lex.getKind() != lltok::Metadata) + return TokError("Expected metadata here"); + + Lex.Lex(); + if (Lex.getKind() != lltok::lbrace) + return TokError("Expected '{' here"); + + SmallVector Elts; + if (ParseMDNodeVector(Elts) + || ParseToken(lltok::rbrace, "expected end of metadata node")) + return true; + + MDNode *Init = MDNode::get(Context, Elts.data(), Elts.size()); MetadataCache[MetadataID] = Init; + std::map >::iterator + FI = ForwardRefMDNodes.find(MetadataID); + if (FI != ForwardRefMDNodes.end()) { + MDNode *FwdNode = cast(FI->second.first); + FwdNode->replaceAllUsesWith(Init); + ForwardRefMDNodes.erase(FI); + } + return false; } @@ -390,7 +540,7 @@ bool LLParser::ParseStandaloneMetadata() { /// Aliasee /// ::= TypeAndValue /// ::= 'bitcast' '(' TypeAndValue 'to' Type ')' -/// ::= 'getelementptr' '(' ... ')' +/// ::= 'getelementptr' 'inbounds'? '(' ... ')' /// /// Everything through visibility has already been parsed. /// @@ -407,9 +557,10 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, Linkage != GlobalValue::WeakAnyLinkage && Linkage != GlobalValue::WeakODRLinkage && Linkage != GlobalValue::InternalLinkage && - Linkage != GlobalValue::PrivateLinkage) + Linkage != GlobalValue::PrivateLinkage && + Linkage != GlobalValue::LinkerPrivateLinkage) return Error(LinkageLoc, "invalid linkage type for alias"); - + Constant *Aliasee; LocTy AliaseeLoc = Lex.getLoc(); if (Lex.getKind() != lltok::kw_bitcast && @@ -423,7 +574,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, return Error(AliaseeLoc, "invalid aliasee"); Aliasee = ID.ConstantVal; } - + if (!isa(Aliasee->getType())) return Error(AliaseeLoc, "alias must have pointer type"); @@ -432,7 +583,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, (GlobalValue::LinkageTypes)Linkage, Name, Aliasee); GA->setVisibility((GlobalValue::VisibilityTypes)Visibility); - + // See if this value already exists in the symbol table. If so, it is either // a redefinition or a definition of a forward reference. if (GlobalValue *Val = @@ -449,18 +600,18 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, if (Val->getType() != GA->getType()) return Error(NameLoc, "forward reference and definition of alias have different types"); - + // If they agree, just RAUW the old value with the alias and remove the // forward ref info. Val->replaceAllUsesWith(GA); Val->eraseFromParent(); ForwardRefVals.erase(I); } - + // Insert into the module, we know its name won't collide now. M->getAliasList().push_back(GA); assert(GA->getNameStr() == Name && "Should not be a name conflict!"); - + return false; } @@ -478,14 +629,14 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned AddrSpace; bool ThreadLocal, IsConstant; LocTy TyLoc; - - PATypeHolder Ty(Type::VoidTy); + + PATypeHolder Ty(Type::getVoidTy(Context)); if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) || ParseOptionalAddrSpace(AddrSpace) || ParseGlobalType(IsConstant) || ParseType(Ty, TyLoc)) return true; - + // If the linkage is specified and is external, then no initializer is // present. Constant *Init = 0; @@ -496,9 +647,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, return true; } - if (isa(Ty) || Ty == Type::LabelTy) + if (isa(Ty) || Ty->isLabelTy()) return Error(TyLoc, "invalid type for global variable"); - + GlobalVariable *GV = 0; // See if the global was forward referenced, if so, use the global. @@ -516,20 +667,20 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, } if (GV == 0) { - GV = new GlobalVariable(Ty, false, GlobalValue::ExternalLinkage, 0, Name, - M, false, AddrSpace); + GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0, + Name, 0, false, AddrSpace); } else { if (GV->getType()->getElementType() != Ty) return Error(TyLoc, "forward reference and definition of global have different types"); - + // Move the forward-reference to the correct spot in the module. M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV); } if (Name.empty()) NumberedVals.push_back(GV); - + // Set the parsed properties on the global. if (Init) GV->setInitializer(Init); @@ -537,11 +688,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, GV->setLinkage((GlobalValue::LinkageTypes)Linkage); GV->setVisibility((GlobalValue::VisibilityTypes)Visibility); GV->setThreadLocal(ThreadLocal); - + // Parse attributes on the global. while (Lex.getKind() == lltok::comma) { Lex.Lex(); - + if (Lex.getKind() == lltok::kw_section) { Lex.Lex(); GV->setSection(Lex.getStrVal()); @@ -555,7 +706,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, TokError("unknown global variable property!"); } } - + return false; } @@ -574,11 +725,11 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, Error(Loc, "global variable reference must have pointer type"); return 0; } - + // Look this name up in the normal function symbol table. GlobalValue *Val = cast_or_null(M->getValueSymbolTable().lookup(Name)); - + // If this is a forward reference for the value, see if we already created a // forward ref record. if (Val == 0) { @@ -587,7 +738,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, if (I != ForwardRefVals.end()) Val = I->second.first; } - + // If we have the value in the symbol table or fwd-ref table, return it. if (Val) { if (Val->getType() == Ty) return Val; @@ -595,7 +746,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, Val->getType()->getDescription() + "'"); return 0; } - + // Otherwise, create a new forward reference for this value and remember it. GlobalValue *FwdVal; if (const FunctionType *FT = dyn_cast(PTy->getElementType())) { @@ -604,13 +755,13 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty, Error(Loc, "function may not return opaque type"); return 0; } - + FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); } else { - FwdVal = new GlobalVariable(PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, 0, Name, M); + FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, + GlobalValue::ExternalWeakLinkage, 0, Name); } - + ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); return FwdVal; } @@ -621,9 +772,9 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { Error(Loc, "global variable reference must have pointer type"); return 0; } - + GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0; - + // If this is a forward reference for the value, see if we already created a // forward ref record. if (Val == 0) { @@ -632,7 +783,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { if (I != ForwardRefValIDs.end()) Val = I->second.first; } - + // If we have the value in the symbol table or fwd-ref table, return it. if (Val) { if (Val->getType() == Ty) return Val; @@ -640,7 +791,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { Val->getType()->getDescription() + "'"); return 0; } - + // Otherwise, create a new forward reference for this value and remember it. GlobalValue *FwdVal; if (const FunctionType *FT = dyn_cast(PTy->getElementType())) { @@ -651,10 +802,10 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) { } FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M); } else { - FwdVal = new GlobalVariable(PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, 0, "", M); + FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, + GlobalValue::ExternalWeakLinkage, 0, ""); } - + ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); return FwdVal; } @@ -707,7 +858,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { return ParseToken(lltok::lparen, "expected '(' in address space") || ParseUInt32(AddrSpace) || ParseToken(lltok::rparen, "expected ')' in address space"); -} +} /// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind /// indicates what kind of attribute list this is: 0: function arg, 1: result, @@ -716,7 +867,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { Attrs = Attribute::None; LocTy AttrLoc = Lex.getLoc(); - + while (1) { switch (Lex.getKind()) { case lltok::kw_sext: @@ -737,10 +888,10 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { default: // End of attributes. if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly)) return Error(AttrLoc, "invalid use of function-only attribute"); - + if (AttrKind != 0 && AttrKind != 3 && (Attrs & Attribute::ParameterOnly)) return Error(AttrLoc, "invalid use of parameter-only attribute"); - + return false; case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break; case lltok::kw_signext: Attrs |= Attribute::SExt; break; @@ -756,13 +907,15 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { case lltok::kw_noinline: Attrs |= Attribute::NoInline; break; case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break; case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break; + case lltok::kw_inlinehint: Attrs |= Attribute::InlineHint; break; case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break; case lltok::kw_optsize: Attrs |= Attribute::OptimizeForSize; break; case lltok::kw_ssp: Attrs |= Attribute::StackProtect; break; case lltok::kw_sspreq: Attrs |= Attribute::StackProtectReq; break; case lltok::kw_noredzone: Attrs |= Attribute::NoRedZone; break; case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break; - + case lltok::kw_naked: Attrs |= Attribute::Naked; break; + case lltok::kw_align: { unsigned Alignment; if (ParseOptionalAlignment(Alignment)) @@ -778,6 +931,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { /// ParseOptionalLinkage /// ::= /*empty*/ /// ::= 'private' +/// ::= 'linker_private' /// ::= 'internal' /// ::= 'weak' /// ::= 'weak_odr' @@ -792,22 +946,23 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { HasLinkage = false; switch (Lex.getKind()) { - default: Res = GlobalValue::ExternalLinkage; return false; - case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break; - case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break; - case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break; - case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; - case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break; - case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break; + default: Res=GlobalValue::ExternalLinkage; return false; + case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break; + case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break; + case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break; + case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break; + case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; + case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break; + case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break; case lltok::kw_available_externally: Res = GlobalValue::AvailableExternallyLinkage; break; - case lltok::kw_appending: Res = GlobalValue::AppendingLinkage; break; - case lltok::kw_dllexport: Res = GlobalValue::DLLExportLinkage; break; - case lltok::kw_common: Res = GlobalValue::CommonLinkage; break; - case lltok::kw_dllimport: Res = GlobalValue::DLLImportLinkage; break; - case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break; - case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break; + case lltok::kw_appending: Res = GlobalValue::AppendingLinkage; break; + case lltok::kw_dllexport: Res = GlobalValue::DLLExportLinkage; break; + case lltok::kw_common: Res = GlobalValue::CommonLinkage; break; + case lltok::kw_dllimport: Res = GlobalValue::DLLImportLinkage; break; + case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break; + case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break; } Lex.Lex(); HasLinkage = true; @@ -819,7 +974,7 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { /// ::= 'default' /// ::= 'hidden' /// ::= 'protected' -/// +/// bool LLParser::ParseOptionalVisibility(unsigned &Res) { switch (Lex.getKind()) { default: Res = GlobalValue::DefaultVisibility; return false; @@ -843,7 +998,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= 'arm_aapcs_vfpcc' /// ::= 'cc' UINT /// -bool LLParser::ParseOptionalCallingConv(unsigned &CC) { +bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { switch (Lex.getKind()) { default: CC = CallingConv::C; return false; case lltok::kw_ccc: CC = CallingConv::C; break; @@ -854,9 +1009,47 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break; case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break; case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break; - case lltok::kw_cc: Lex.Lex(); return ParseUInt32(CC); + case lltok::kw_cc: { + unsigned ArbitraryCC; + Lex.Lex(); + if (ParseUInt32(ArbitraryCC)) { + return true; + } else + CC = static_cast(ArbitraryCC); + return false; + } + break; } + + Lex.Lex(); + return false; +} + +/// ParseOptionalCustomMetadata +/// ::= /* empty */ +/// ::= !dbg !42 +bool LLParser::ParseOptionalCustomMetadata() { + + std::string Name; + if (Lex.getKind() == lltok::NamedOrCustomMD) { + Name = Lex.getStrVal(); + Lex.Lex(); + } else + return false; + + if (Lex.getKind() != lltok::Metadata) + return TokError("Expected '!' here"); Lex.Lex(); + + MetadataBase *Node; + if (ParseMDNode(Node)) return true; + + MetadataContext &TheMetadata = M->getContext().getMetadata(); + unsigned MDK = TheMetadata.getMDKind(Name.c_str()); + if (!MDK) + MDK = TheMetadata.RegisterMDKind(Name.c_str()); + MDsOnInst.push_back(std::make_pair(MDK, cast(Node))); + return false; } @@ -874,29 +1067,36 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { return false; } -/// ParseOptionalCommaAlignment -/// ::= /* empty */ -/// ::= ',' 'align' 4 -bool LLParser::ParseOptionalCommaAlignment(unsigned &Alignment) { - Alignment = 0; - if (!EatIfPresent(lltok::comma)) - return false; - return ParseToken(lltok::kw_align, "expected 'align'") || - ParseUInt32(Alignment); +/// ParseOptionalInfo +/// ::= OptionalInfo (',' OptionalInfo)+ +bool LLParser::ParseOptionalInfo(unsigned &Alignment) { + + // FIXME: Handle customized metadata info attached with an instruction. + do { + if (Lex.getKind() == lltok::NamedOrCustomMD) { + if (ParseOptionalCustomMetadata()) return true; + } else if (Lex.getKind() == lltok::kw_align) { + if (ParseOptionalAlignment(Alignment)) return true; + } else + return true; + } while (EatIfPresent(lltok::comma)); + + return false; } + /// ParseIndexList /// ::= (',' uint32)+ bool LLParser::ParseIndexList(SmallVectorImpl &Indices) { if (Lex.getKind() != lltok::comma) return TokError("expected ',' as start of index list"); - + while (EatIfPresent(lltok::comma)) { unsigned Idx; if (ParseUInt32(Idx)) return true; Indices.push_back(Idx); } - + return false; } @@ -908,14 +1108,14 @@ bool LLParser::ParseIndexList(SmallVectorImpl &Indices) { bool LLParser::ParseType(PATypeHolder &Result, bool AllowVoid) { LocTy TypeLoc = Lex.getLoc(); if (ParseTypeRec(Result)) return true; - + // Verify no unresolved uprefs. if (!UpRefs.empty()) return Error(UpRefs.back().Loc, "invalid unresolved type up reference"); - - if (!AllowVoid && Result.get() == Type::VoidTy) + + if (!AllowVoid && Result.get()->isVoidTy()) return Error(TypeLoc, "void type only allowed for function results"); - + return false; } @@ -930,26 +1130,26 @@ PATypeHolder LLParser::HandleUpRefs(const Type *ty) { // If Ty isn't abstract, or if there are no up-references in it, then there is // nothing to resolve here. if (!ty->isAbstract() || UpRefs.empty()) return ty; - + PATypeHolder Ty(ty); #if 0 errs() << "Type '" << Ty->getDescription() << "' newly formed. Resolving upreferences.\n" << UpRefs.size() << " upreferences active!\n"; #endif - + // If we find any resolvable upreferences (i.e., those whose NestingLevel goes // to zero), we resolve them all together before we resolve them to Ty. At // the end of the loop, if there is anything to resolve to Ty, it will be in // this variable. OpaqueType *TypeToResolve = 0; - + for (unsigned i = 0; i != UpRefs.size(); ++i) { // Determine if 'Ty' directly contains this up-references 'LastContainedTy'. bool ContainsType = std::find(Ty->subtype_begin(), Ty->subtype_end(), UpRefs[i].LastContainedTy) != Ty->subtype_end(); - + #if 0 errs() << " UR#" << i << " - TypeContains(" << Ty->getDescription() << ", " << UpRefs[i].LastContainedTy->getDescription() << ") = " @@ -958,15 +1158,15 @@ PATypeHolder LLParser::HandleUpRefs(const Type *ty) { #endif if (!ContainsType) continue; - + // Decrement level of upreference unsigned Level = --UpRefs[i].NestingLevel; UpRefs[i].LastContainedTy = Ty; - + // If the Up-reference has a non-zero level, it shouldn't be resolved yet. if (Level != 0) continue; - + #if 0 errs() << " * Resolving upreference for " << UpRefs[i].UpRefTy << "\n"; #endif @@ -977,10 +1177,10 @@ PATypeHolder LLParser::HandleUpRefs(const Type *ty) { UpRefs.erase(UpRefs.begin()+i); // Remove from upreference list. --i; // Do not skip the next element. } - + if (TypeToResolve) TypeToResolve->refineAbstractTypeTo(Ty); - + return Ty; } @@ -994,11 +1194,11 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { case lltok::Type: // TypeRec ::= 'float' | 'void' (etc) Result = Lex.getTyVal(); - Lex.Lex(); + Lex.Lex(); break; case lltok::kw_opaque: // TypeRec ::= 'opaque' - Result = Context.getOpaqueType(); + Result = OpaqueType::get(Context); Lex.Lex(); break; case lltok::lbrace: @@ -1028,7 +1228,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { if (const Type *T = M->getTypeByName(Lex.getStrVal())) { Result = T; } else { - Result = Context.getOpaqueType(); + Result = OpaqueType::get(Context); ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(), std::make_pair(Result, Lex.getLoc()))); @@ -1036,7 +1236,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { } Lex.Lex(); break; - + case lltok::LocalVarID: // TypeRec ::= %4 if (Lex.getUIntVal() < NumberedTypes.size()) @@ -1047,7 +1247,7 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { if (I != ForwardRefTypeIDs.end()) Result = I->second.first; else { - Result = Context.getOpaqueType(); + Result = OpaqueType::get(Context); ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(), std::make_pair(Result, Lex.getLoc()))); @@ -1060,36 +1260,36 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { Lex.Lex(); unsigned Val; if (ParseUInt32(Val)) return true; - OpaqueType *OT = Context.getOpaqueType(); //Use temporary placeholder. + OpaqueType *OT = OpaqueType::get(Context); //Use temporary placeholder. UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT)); Result = OT; break; } } - - // Parse the type suffixes. + + // Parse the type suffixes. while (1) { switch (Lex.getKind()) { // End of type. - default: return false; + default: return false; // TypeRec ::= TypeRec '*' case lltok::star: - if (Result.get() == Type::LabelTy) + if (Result.get()->isLabelTy()) return TokError("basic block pointers are invalid"); - if (Result.get() == Type::VoidTy) + if (Result.get()->isVoidTy()) return TokError("pointers to void are invalid; use i8* instead"); if (!PointerType::isValidElementType(Result.get())) return TokError("pointer to this type is invalid"); - Result = HandleUpRefs(Context.getPointerTypeUnqual(Result.get())); + Result = HandleUpRefs(PointerType::getUnqual(Result.get())); Lex.Lex(); break; // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*' case lltok::kw_addrspace: { - if (Result.get() == Type::LabelTy) + if (Result.get()->isLabelTy()) return TokError("basic block pointers are invalid"); - if (Result.get() == Type::VoidTy) + if (Result.get()->isVoidTy()) return TokError("pointers to void are invalid; use i8* instead"); if (!PointerType::isValidElementType(Result.get())) return TokError("pointer to this type is invalid"); @@ -1098,10 +1298,10 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) { ParseToken(lltok::star, "expected '*' in address space")) return true; - Result = HandleUpRefs(Context.getPointerType(Result.get(), AddrSpace)); + Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace)); break; } - + /// Types '(' ArgTypeListI ')' OptFuncAttrs case lltok::lparen: if (ParseFunctionType(Result)) @@ -1120,16 +1320,16 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, PerFunctionState &PFS) { if (ParseToken(lltok::lparen, "expected '(' in call")) return true; - + while (Lex.getKind() != lltok::rparen) { // If this isn't the first argument, we need a comma. if (!ArgList.empty() && ParseToken(lltok::comma, "expected ',' in argument list")) return true; - + // Parse the argument. LocTy ArgLoc; - PATypeHolder ArgTy(Type::VoidTy); + PATypeHolder ArgTy(Type::getVoidTy(Context)); unsigned ArgAttrs1, ArgAttrs2; Value *V; if (ParseType(ArgTy, ArgLoc) || @@ -1162,7 +1362,7 @@ bool LLParser::ParseArgumentList(std::vector &ArgList, isVarArg = false; assert(Lex.getKind() == lltok::lparen); Lex.Lex(); // eat the (. - + if (Lex.getKind() == lltok::rparen) { // empty } else if (Lex.getKind() == lltok::dotdotdot) { @@ -1170,19 +1370,19 @@ bool LLParser::ParseArgumentList(std::vector &ArgList, Lex.Lex(); } else { LocTy TypeLoc = Lex.getLoc(); - PATypeHolder ArgTy(Type::VoidTy); + PATypeHolder ArgTy(Type::getVoidTy(Context)); unsigned Attrs; std::string Name; - + // If we're parsing a type, use ParseTypeRec, because we allow recursive // types (such as a function returning a pointer to itself). If parsing a // function prototype, we require fully resolved types. if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) || ParseOptionalAttrs(Attrs, 0)) return true; - - if (ArgTy == Type::VoidTy) + + if (ArgTy->isVoidTy()) return Error(TypeLoc, "argument can not have void type"); - + if (Lex.getKind() == lltok::LocalVar || Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0 Name = Lex.getStrVal(); @@ -1191,22 +1391,22 @@ bool LLParser::ParseArgumentList(std::vector &ArgList, if (!FunctionType::isValidArgumentType(ArgTy)) return Error(TypeLoc, "invalid type for function argument"); - + ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name)); - + while (EatIfPresent(lltok::comma)) { // Handle ... at end of arg list. if (EatIfPresent(lltok::dotdotdot)) { isVarArg = true; break; } - + // Otherwise must be an argument type. TypeLoc = Lex.getLoc(); if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) || ParseOptionalAttrs(Attrs, 0)) return true; - if (ArgTy == Type::VoidTy) + if (ArgTy->isVoidTy()) return Error(TypeLoc, "argument can not have void type"); if (Lex.getKind() == lltok::LocalVar || @@ -1219,14 +1419,14 @@ bool LLParser::ParseArgumentList(std::vector &ArgList, if (!ArgTy->isFirstClassType() && !isa(ArgTy)) return Error(TypeLoc, "invalid type for function argument"); - + ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name)); } } - + return ParseToken(lltok::rparen, "expected ')' at end of argument list"); } - + /// ParseFunctionType /// ::= Type ArgumentList OptionalAttrs bool LLParser::ParseFunctionType(PATypeHolder &Result) { @@ -1234,7 +1434,7 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) { if (!FunctionType::isValidReturnType(Result)) return TokError("invalid function return type"); - + std::vector ArgList; bool isVarArg; unsigned Attrs; @@ -1243,7 +1443,7 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) { // FIXME: Remove in LLVM 3.0 ParseOptionalAttrs(Attrs, 2)) return true; - + // Reject names on the arguments lists. for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { if (!ArgList[i].Name.empty()) @@ -1254,12 +1454,12 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) { // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0 } } - + std::vector ArgListTy; for (unsigned i = 0, e = ArgList.size(); i != e; ++i) ArgListTy.push_back(ArgList[i].Type); - - Result = HandleUpRefs(Context.getFunctionType(Result.get(), + + Result = HandleUpRefs(FunctionType::get(Result.get(), ArgListTy, isVarArg)); return false; } @@ -1273,9 +1473,9 @@ bool LLParser::ParseFunctionType(PATypeHolder &Result) { bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) { assert(Lex.getKind() == lltok::lbrace); Lex.Lex(); // Consume the '{' - + if (EatIfPresent(lltok::rbrace)) { - Result = Context.getStructType(Packed); + Result = StructType::get(Context, Packed); return false; } @@ -1283,62 +1483,62 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) { LocTy EltTyLoc = Lex.getLoc(); if (ParseTypeRec(Result)) return true; ParamsList.push_back(Result); - - if (Result == Type::VoidTy) + + if (Result->isVoidTy()) return Error(EltTyLoc, "struct element can not have void type"); if (!StructType::isValidElementType(Result)) return Error(EltTyLoc, "invalid element type for struct"); - + while (EatIfPresent(lltok::comma)) { EltTyLoc = Lex.getLoc(); if (ParseTypeRec(Result)) return true; - - if (Result == Type::VoidTy) + + if (Result->isVoidTy()) return Error(EltTyLoc, "struct element can not have void type"); if (!StructType::isValidElementType(Result)) return Error(EltTyLoc, "invalid element type for struct"); - + ParamsList.push_back(Result); } - + if (ParseToken(lltok::rbrace, "expected '}' at end of struct")) return true; - + std::vector ParamsListTy; for (unsigned i = 0, e = ParamsList.size(); i != e; ++i) ParamsListTy.push_back(ParamsList[i].get()); - Result = HandleUpRefs(Context.getStructType(ParamsListTy, Packed)); + Result = HandleUpRefs(StructType::get(Context, ParamsListTy, Packed)); return false; } /// ParseArrayVectorType - Parse an array or vector type, assuming the first /// token has already been consumed. -/// TypeRec +/// TypeRec /// ::= '[' APSINTVAL 'x' Types ']' /// ::= '<' APSINTVAL 'x' Types '>' bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) { if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() || Lex.getAPSIntVal().getBitWidth() > 64) return TokError("expected number in address space"); - + LocTy SizeLoc = Lex.getLoc(); uint64_t Size = Lex.getAPSIntVal().getZExtValue(); Lex.Lex(); - + if (ParseToken(lltok::kw_x, "expected 'x' after element count")) return true; LocTy TypeLoc = Lex.getLoc(); - PATypeHolder EltTy(Type::VoidTy); + PATypeHolder EltTy(Type::getVoidTy(Context)); if (ParseTypeRec(EltTy)) return true; - - if (EltTy == Type::VoidTy) + + if (EltTy->isVoidTy()) return Error(TypeLoc, "array and vector element type cannot be void"); if (ParseToken(isVector ? lltok::greater : lltok::rsquare, "expected end of sequential type")) return true; - + if (isVector) { if (Size == 0) return Error(SizeLoc, "zero element vector is illegal"); @@ -1346,11 +1546,11 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) { return Error(SizeLoc, "size too large for vector"); if (!VectorType::isValidElementType(EltTy)) return Error(TypeLoc, "vector element type must be fp or integer"); - Result = Context.getVectorType(EltTy, unsigned(Size)); + Result = VectorType::get(EltTy, unsigned(Size)); } else { if (!ArrayType::isValidElementType(EltTy)) return Error(TypeLoc, "invalid array element type"); - Result = HandleUpRefs(Context.getArrayType(EltTy, Size)); + Result = HandleUpRefs(ArrayType::get(EltTy, Size)); } return false; } @@ -1375,16 +1575,16 @@ LLParser::PerFunctionState::~PerFunctionState() { I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I) if (!isa(I->second.first)) { I->second.first->replaceAllUsesWith( - P.getContext().getUndef(I->second.first->getType())); + UndefValue::get(I->second.first->getType())); delete I->second.first; I->second.first = 0; } - + for (std::map >::iterator I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I) if (!isa(I->second.first)) { I->second.first->replaceAllUsesWith( - P.getContext().getUndef(I->second.first->getType())); + UndefValue::get(I->second.first->getType())); delete I->second.first; I->second.first = 0; } @@ -1410,7 +1610,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, const Type *Ty, LocTy Loc) { // Look this name up in the normal function symbol table. Value *Val = F.getValueSymbolTable().lookup(Name); - + // If this is a forward reference for the value, see if we already created a // forward ref record. if (Val == 0) { @@ -1419,31 +1619,32 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, if (I != ForwardRefVals.end()) Val = I->second.first; } - + // If we have the value in the symbol table or fwd-ref table, return it. if (Val) { if (Val->getType() == Ty) return Val; - if (Ty == Type::LabelTy) + if (Ty->isLabelTy()) P.Error(Loc, "'%" + Name + "' is not a basic block"); else P.Error(Loc, "'%" + Name + "' defined with type '" + Val->getType()->getDescription() + "'"); return 0; } - + // Don't make placeholders with invalid type. - if (!Ty->isFirstClassType() && !isa(Ty) && Ty != Type::LabelTy) { + if (!Ty->isFirstClassType() && !isa(Ty) && + Ty != Type::getLabelTy(F.getContext())) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } - + // Otherwise, create a new forward reference for this value and remember it. Value *FwdVal; - if (Ty == Type::LabelTy) - FwdVal = BasicBlock::Create(Name, &F); + if (Ty->isLabelTy()) + FwdVal = BasicBlock::Create(F.getContext(), Name, &F); else FwdVal = new Argument(Ty, Name); - + ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); return FwdVal; } @@ -1452,7 +1653,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty, LocTy Loc) { // Look this name up in the normal function symbol table. Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0; - + // If this is a forward reference for the value, see if we already created a // forward ref record. if (Val == 0) { @@ -1461,30 +1662,31 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty, if (I != ForwardRefValIDs.end()) Val = I->second.first; } - + // If we have the value in the symbol table or fwd-ref table, return it. if (Val) { if (Val->getType() == Ty) return Val; - if (Ty == Type::LabelTy) + if (Ty->isLabelTy()) P.Error(Loc, "'%" + utostr(ID) + "' is not a basic block"); else P.Error(Loc, "'%" + utostr(ID) + "' defined with type '" + Val->getType()->getDescription() + "'"); return 0; } - - if (!Ty->isFirstClassType() && !isa(Ty) && Ty != Type::LabelTy) { + + if (!Ty->isFirstClassType() && !isa(Ty) && + Ty != Type::getLabelTy(F.getContext())) { P.Error(Loc, "invalid use of a non-first-class type"); return 0; } - + // Otherwise, create a new forward reference for this value and remember it. Value *FwdVal; - if (Ty == Type::LabelTy) - FwdVal = BasicBlock::Create("", &F); + if (Ty->isLabelTy()) + FwdVal = BasicBlock::Create(F.getContext(), "", &F); else FwdVal = new Argument(Ty); - + ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); return FwdVal; } @@ -1495,30 +1697,31 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc, Instruction *Inst) { // If this instruction has void type, it cannot have a name or ID specified. - if (Inst->getType() == Type::VoidTy) { + if (Inst->getType()->isVoidTy()) { if (NameID != -1 || !NameStr.empty()) return P.Error(NameLoc, "instructions returning void cannot have a name"); return false; } - + // If this was a numbered instruction, verify that the instruction is the // expected value and resolve any forward references. if (NameStr.empty()) { // If neither a name nor an ID was specified, just use the next ID. if (NameID == -1) NameID = NumberedVals.size(); - + if (unsigned(NameID) != NumberedVals.size()) return P.Error(NameLoc, "instruction expected to be numbered '%" + utostr(NumberedVals.size()) + "'"); - + std::map >::iterator FI = ForwardRefValIDs.find(NameID); if (FI != ForwardRefValIDs.end()) { if (FI->second.first->getType() != Inst->getType()) - return P.Error(NameLoc, "instruction forward referenced with type '" + + return P.Error(NameLoc, "instruction forward referenced with type '" + FI->second.first->getType()->getDescription() + "'"); FI->second.first->replaceAllUsesWith(Inst); + delete FI->second.first; ForwardRefValIDs.erase(FI); } @@ -1531,17 +1734,18 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, FI = ForwardRefVals.find(NameStr); if (FI != ForwardRefVals.end()) { if (FI->second.first->getType() != Inst->getType()) - return P.Error(NameLoc, "instruction forward referenced with type '" + + return P.Error(NameLoc, "instruction forward referenced with type '" + FI->second.first->getType()->getDescription() + "'"); FI->second.first->replaceAllUsesWith(Inst); + delete FI->second.first; ForwardRefVals.erase(FI); } - + // Set the name on the instruction. Inst->setName(NameStr); - + if (Inst->getNameStr() != NameStr) - return P.Error(NameLoc, "multiple definition of local value named '" + + return P.Error(NameLoc, "multiple definition of local value named '" + NameStr + "'"); return false; } @@ -1550,11 +1754,13 @@ bool LLParser::PerFunctionState::SetInstName(int NameID, /// forward reference record if needed. BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name, LocTy Loc) { - return cast_or_null(GetVal(Name, Type::LabelTy, Loc)); + return cast_or_null(GetVal(Name, + Type::getLabelTy(F.getContext()), Loc)); } BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) { - return cast_or_null(GetVal(ID, Type::LabelTy, Loc)); + return cast_or_null(GetVal(ID, + Type::getLabelTy(F.getContext()), Loc)); } /// DefineBB - Define the specified basic block, which is either named or @@ -1568,11 +1774,11 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name, else BB = GetBB(Name, Loc); if (BB == 0) return 0; // Already diagnosed error. - + // Move the block to the end of the function. Forward ref'd blocks are // inserted wherever they happen to be referenced. F.getBasicBlockList().splice(F.end(), F.getBasicBlockList(), BB); - + // Remove the block from forward ref sets. if (Name.empty()) { ForwardRefValIDs.erase(NumberedVals.size()); @@ -1581,7 +1787,7 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name, // BB forward references are already in the function symbol table. ForwardRefVals.erase(Name); } - + return BB; } @@ -1615,7 +1821,7 @@ bool LLParser::ParseValID(ValID &ID) { ID.Kind = ValID::t_LocalName; break; case lltok::Metadata: { // !{...} MDNode, !"foo" MDString - ID.Kind = ValID::t_Constant; + ID.Kind = ValID::t_Metadata; Lex.Lex(); if (Lex.getKind() == lltok::lbrace) { SmallVector Elts; @@ -1623,31 +1829,23 @@ bool LLParser::ParseValID(ValID &ID) { ParseToken(lltok::rbrace, "expected end of metadata node")) return true; - ID.ConstantVal = Context.getMDNode(Elts.data(), Elts.size()); + ID.MetadataVal = MDNode::get(Context, Elts.data(), Elts.size()); return false; } // Standalone metadata reference // !{ ..., !42, ... } - unsigned MID = 0; - if (!ParseUInt32(MID)) { - std::map::iterator I = MetadataCache.find(MID); - if (I == MetadataCache.end()) - return TokError("Unknown metadata reference"); - ID.ConstantVal = I->second; + if (!ParseMDNode(ID.MetadataVal)) return false; - } - + // MDString: // ::= '!' STRINGCONSTANT - std::string Str; - if (ParseStringConstant(Str)) return true; - - ID.ConstantVal = Context.getMDString(Str.data(), Str.data() + Str.size()); + if (ParseMDString(ID.MetadataVal)) return true; + ID.Kind = ValID::t_Metadata; return false; } case lltok::APSInt: - ID.APSIntVal = Lex.getAPSIntVal(); + ID.APSIntVal = Lex.getAPSIntVal(); ID.Kind = ValID::t_APSInt; break; case lltok::APFloat: @@ -1655,17 +1853,17 @@ bool LLParser::ParseValID(ValID &ID) { ID.Kind = ValID::t_APFloat; break; case lltok::kw_true: - ID.ConstantVal = Context.getConstantIntTrue(); + ID.ConstantVal = ConstantInt::getTrue(Context); ID.Kind = ValID::t_Constant; break; case lltok::kw_false: - ID.ConstantVal = Context.getConstantIntFalse(); + ID.ConstantVal = ConstantInt::getFalse(Context); ID.Kind = ValID::t_Constant; break; case lltok::kw_null: ID.Kind = ValID::t_Null; break; case lltok::kw_undef: ID.Kind = ValID::t_Undef; break; case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break; - + case lltok::lbrace: { // ValID ::= '{' ConstVector '}' Lex.Lex(); @@ -1673,8 +1871,9 @@ bool LLParser::ParseValID(ValID &ID) { if (ParseGlobalValueVector(Elts) || ParseToken(lltok::rbrace, "expected end of struct constant")) return true; - - ID.ConstantVal = Context.getConstantStruct(Elts.data(), Elts.size(), false); + + ID.ConstantVal = ConstantStruct::get(Context, Elts.data(), + Elts.size(), false); ID.Kind = ValID::t_Constant; return false; } @@ -1683,7 +1882,7 @@ bool LLParser::ParseValID(ValID &ID) { // ValID ::= '<' '{' ConstVector '}' '>' --> Packed Struct. Lex.Lex(); bool isPackedStruct = EatIfPresent(lltok::lbrace); - + SmallVector Elts; LocTy FirstEltLoc = Lex.getLoc(); if (ParseGlobalValueVector(Elts) || @@ -1691,14 +1890,14 @@ bool LLParser::ParseValID(ValID &ID) { ParseToken(lltok::rbrace, "expected end of packed struct")) || ParseToken(lltok::greater, "expected end of constant")) return true; - + if (isPackedStruct) { ID.ConstantVal = - Context.getConstantStruct(Elts.data(), Elts.size(), true); + ConstantStruct::get(Context, Elts.data(), Elts.size(), true); ID.Kind = ValID::t_Constant; return false; } - + if (Elts.empty()) return Error(ID.Loc, "constant vector must not be empty"); @@ -1706,15 +1905,15 @@ bool LLParser::ParseValID(ValID &ID) { !Elts[0]->getType()->isFloatingPoint()) return Error(FirstEltLoc, "vector elements must have integer or floating point type"); - + // Verify that all the vector elements have the same type. for (unsigned i = 1, e = Elts.size(); i != e; ++i) if (Elts[i]->getType() != Elts[0]->getType()) return Error(FirstEltLoc, "vector element #" + utostr(i) + " is not of type '" + Elts[0]->getType()->getDescription()); - - ID.ConstantVal = Context.getConstantVector(Elts.data(), Elts.size()); + + ID.ConstantVal = ConstantVector::get(Elts.data(), Elts.size()); ID.Kind = ValID::t_Constant; return false; } @@ -1733,13 +1932,13 @@ bool LLParser::ParseValID(ValID &ID) { ID.Kind = ValID::t_EmptyArray; return false; } - + if (!Elts[0]->getType()->isFirstClassType()) - return Error(FirstEltLoc, "invalid array element type: " + + return Error(FirstEltLoc, "invalid array element type: " + Elts[0]->getType()->getDescription()); - - ArrayType *ATy = Context.getArrayType(Elts[0]->getType(), Elts.size()); - + + ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size()); + // Verify all elements are correct type! for (unsigned i = 0, e = Elts.size(); i != e; ++i) { if (Elts[i]->getType() != Elts[0]->getType()) @@ -1747,33 +1946,34 @@ bool LLParser::ParseValID(ValID &ID) { "array element #" + utostr(i) + " is not of type '" +Elts[0]->getType()->getDescription()); } - - ID.ConstantVal = Context.getConstantArray(ATy, Elts.data(), Elts.size()); + + ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size()); ID.Kind = ValID::t_Constant; return false; } case lltok::kw_c: // c "foo" Lex.Lex(); - ID.ConstantVal = Context.getConstantArray(Lex.getStrVal(), false); + ID.ConstantVal = ConstantArray::get(Context, Lex.getStrVal(), false); if (ParseToken(lltok::StringConstant, "expected string")) return true; ID.Kind = ValID::t_Constant; return false; case lltok::kw_asm: { - // ValID ::= 'asm' SideEffect? STRINGCONSTANT ',' STRINGCONSTANT - bool HasSideEffect; + // ValID ::= 'asm' SideEffect? MsAsm? STRINGCONSTANT ',' STRINGCONSTANT + bool HasSideEffect, MsAsm; Lex.Lex(); if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) || + ParseOptionalToken(lltok::kw_msasm, MsAsm) || ParseStringConstant(ID.StrVal) || ParseToken(lltok::comma, "expected comma in inline asm expression") || ParseToken(lltok::StringConstant, "expected constraint string")) return true; ID.StrVal2 = Lex.getStrVal(); - ID.UIntVal = HasSideEffect; + ID.UIntVal = HasSideEffect | ((unsigned)MsAsm<<1); ID.Kind = ValID::t_InlineAsm; return false; } - + case lltok::kw_trunc: case lltok::kw_zext: case lltok::kw_sext: @@ -1783,11 +1983,11 @@ bool LLParser::ParseValID(ValID &ID) { case lltok::kw_uitofp: case lltok::kw_sitofp: case lltok::kw_fptoui: - case lltok::kw_fptosi: + case lltok::kw_fptosi: case lltok::kw_inttoptr: - case lltok::kw_ptrtoint: { + case lltok::kw_ptrtoint: { unsigned Opc = Lex.getUIntVal(); - PATypeHolder DestTy(Type::VoidTy); + PATypeHolder DestTy(Type::getVoidTy(Context)); Constant *SrcVal; Lex.Lex(); if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") || @@ -1800,7 +2000,7 @@ bool LLParser::ParseValID(ValID &ID) { return Error(ID.Loc, "invalid cast opcode for cast from '" + SrcVal->getType()->getDescription() + "' to '" + DestTy->getDescription() + "'"); - ID.ConstantVal = Context.getConstantExprCast((Instruction::CastOps)Opc, + ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc, SrcVal, DestTy); ID.Kind = ValID::t_Constant; return false; @@ -1820,7 +2020,7 @@ bool LLParser::ParseValID(ValID &ID) { Indices.end())) return Error(ID.Loc, "invalid indices for extractvalue"); ID.ConstantVal = - Context.getConstantExprExtractValue(Val, Indices.data(), Indices.size()); + ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size()); ID.Kind = ValID::t_Constant; return false; } @@ -1840,15 +2040,13 @@ bool LLParser::ParseValID(ValID &ID) { if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), Indices.end())) return Error(ID.Loc, "invalid indices for insertvalue"); - ID.ConstantVal = Context.getConstantExprInsertValue(Val0, Val1, + ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices.data(), Indices.size()); ID.Kind = ValID::t_Constant; return false; } case lltok::kw_icmp: - case lltok::kw_fcmp: - case lltok::kw_vicmp: - case lltok::kw_vfcmp: { + case lltok::kw_fcmp: { unsigned PredVal, Opc = Lex.getUIntVal(); Constant *Val0, *Val1; Lex.Lex(); @@ -1859,38 +2057,27 @@ bool LLParser::ParseValID(ValID &ID) { ParseGlobalTypeAndValue(Val1) || ParseToken(lltok::rparen, "expected ')' in compare constantexpr")) return true; - + if (Val0->getType() != Val1->getType()) return Error(ID.Loc, "compare operands must have the same type"); - + CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal; - + if (Opc == Instruction::FCmp) { if (!Val0->getType()->isFPOrFPVector()) return Error(ID.Loc, "fcmp requires floating point operands"); - ID.ConstantVal = Context.getConstantExprFCmp(Pred, Val0, Val1); - } else if (Opc == Instruction::ICmp) { + ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1); + } else { + assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!"); if (!Val0->getType()->isIntOrIntVector() && !isa(Val0->getType())) return Error(ID.Loc, "icmp requires pointer or integer operands"); - ID.ConstantVal = Context.getConstantExprICmp(Pred, Val0, Val1); - } else if (Opc == Instruction::VFCmp) { - // FIXME: REMOVE VFCMP Support - if (!Val0->getType()->isFPOrFPVector() || - !isa(Val0->getType())) - return Error(ID.Loc, "vfcmp requires vector floating point operands"); - ID.ConstantVal = Context.getConstantExprVFCmp(Pred, Val0, Val1); - } else if (Opc == Instruction::VICmp) { - // FIXME: REMOVE VICMP Support - if (!Val0->getType()->isIntOrIntVector() || - !isa(Val0->getType())) - return Error(ID.Loc, "vicmp requires vector floating point operands"); - ID.ConstantVal = Context.getConstantExprVICmp(Pred, Val0, Val1); + ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); } ID.Kind = ValID::t_Constant; return false; } - + // Binary Operators. case lltok::kw_add: case lltok::kw_fadd: @@ -1904,9 +2091,27 @@ bool LLParser::ParseValID(ValID &ID) { case lltok::kw_urem: case lltok::kw_srem: case lltok::kw_frem: { + bool NUW = false; + bool NSW = false; + bool Exact = false; unsigned Opc = Lex.getUIntVal(); Constant *Val0, *Val1; Lex.Lex(); + LocTy ModifierLoc = Lex.getLoc(); + if (Opc == Instruction::Add || + Opc == Instruction::Sub || + Opc == Instruction::Mul) { + if (EatIfPresent(lltok::kw_nuw)) + NUW = true; + if (EatIfPresent(lltok::kw_nsw)) { + NSW = true; + if (EatIfPresent(lltok::kw_nuw)) + NUW = true; + } + } else if (Opc == Instruction::SDiv) { + if (EatIfPresent(lltok::kw_exact)) + Exact = true; + } if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") || ParseGlobalTypeAndValue(Val0) || ParseToken(lltok::comma, "expected comma in binary constantexpr") || @@ -1915,14 +2120,27 @@ bool LLParser::ParseValID(ValID &ID) { return true; if (Val0->getType() != Val1->getType()) return Error(ID.Loc, "operands of constexpr must have same type"); + if (!Val0->getType()->isIntOrIntVector()) { + if (NUW) + return Error(ModifierLoc, "nuw only applies to integer operations"); + if (NSW) + return Error(ModifierLoc, "nsw only applies to integer operations"); + } + // API compatibility: Accept either integer or floating-point types with + // add, sub, and mul. if (!Val0->getType()->isIntOrIntVector() && !Val0->getType()->isFPOrFPVector()) return Error(ID.Loc,"constexpr requires integer, fp, or vector operands"); - ID.ConstantVal = Context.getConstantExpr(Opc, Val0, Val1); + unsigned Flags = 0; + if (NUW) Flags |= OverflowingBinaryOperator::NoUnsignedWrap; + if (NSW) Flags |= OverflowingBinaryOperator::NoSignedWrap; + if (Exact) Flags |= SDivOperator::IsExact; + Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags); + ID.ConstantVal = C; ID.Kind = ValID::t_Constant; return false; } - + // Logical Operations case lltok::kw_shl: case lltok::kw_lshr: @@ -1944,11 +2162,11 @@ bool LLParser::ParseValID(ValID &ID) { if (!Val0->getType()->isIntOrIntVector()) return Error(ID.Loc, "constexpr requires integer or integer vector operands"); - ID.ConstantVal = Context.getConstantExpr(Opc, Val0, Val1); + ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1); ID.Kind = ValID::t_Constant; return false; - } - + } + case lltok::kw_getelementptr: case lltok::kw_shufflevector: case lltok::kw_insertelement: @@ -1956,41 +2174,49 @@ bool LLParser::ParseValID(ValID &ID) { case lltok::kw_select: { unsigned Opc = Lex.getUIntVal(); SmallVector Elts; + bool InBounds = false; Lex.Lex(); + if (Opc == Instruction::GetElementPtr) + InBounds = EatIfPresent(lltok::kw_inbounds); if (ParseToken(lltok::lparen, "expected '(' in constantexpr") || ParseGlobalValueVector(Elts) || ParseToken(lltok::rparen, "expected ')' in constantexpr")) return true; - + if (Opc == Instruction::GetElementPtr) { if (Elts.size() == 0 || !isa(Elts[0]->getType())) return Error(ID.Loc, "getelementptr requires pointer operand"); - + if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(), - (Value**)&Elts[1], Elts.size()-1)) + (Value**)(Elts.data() + 1), + Elts.size() - 1)) return Error(ID.Loc, "invalid indices for getelementptr"); - ID.ConstantVal = Context.getConstantExprGetElementPtr(Elts[0], - &Elts[1], Elts.size()-1); + ID.ConstantVal = InBounds ? + ConstantExpr::getInBoundsGetElementPtr(Elts[0], + Elts.data() + 1, + Elts.size() - 1) : + ConstantExpr::getGetElementPtr(Elts[0], + Elts.data() + 1, Elts.size() - 1); } else if (Opc == Instruction::Select) { if (Elts.size() != 3) return Error(ID.Loc, "expected three operands to select"); if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1], Elts[2])) return Error(ID.Loc, Reason); - ID.ConstantVal = Context.getConstantExprSelect(Elts[0], Elts[1], Elts[2]); + ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]); } else if (Opc == Instruction::ShuffleVector) { if (Elts.size() != 3) return Error(ID.Loc, "expected three operands to shufflevector"); if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2])) return Error(ID.Loc, "invalid operands to shufflevector"); ID.ConstantVal = - Context.getConstantExprShuffleVector(Elts[0], Elts[1],Elts[2]); + ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]); } else if (Opc == Instruction::ExtractElement) { if (Elts.size() != 2) return Error(ID.Loc, "expected two operands to extractelement"); if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1])) return Error(ID.Loc, "invalid extractelement operands"); - ID.ConstantVal = Context.getConstantExprExtractElement(Elts[0], Elts[1]); + ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]); } else { assert(Opc == Instruction::InsertElement && "Unknown opcode"); if (Elts.size() != 3) @@ -1998,14 +2224,14 @@ bool LLParser::ParseValID(ValID &ID) { if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2])) return Error(ID.Loc, "invalid insertelement operands"); ID.ConstantVal = - Context.getConstantExprInsertElement(Elts[0], Elts[1],Elts[2]); + ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]); } - + ID.Kind = ValID::t_Constant; return false; } } - + Lex.Lex(); return false; } @@ -2024,9 +2250,11 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, Constant *&V) { if (isa(Ty)) return Error(ID.Loc, "functions are not values, refer to them as pointers"); - + switch (ID.Kind) { - default: assert(0 && "Unknown ValID!"); + default: llvm_unreachable("Unknown ValID!"); + case ValID::t_Metadata: + return Error(ID.Loc, "invalid use of metadata"); case ValID::t_LocalID: case ValID::t_LocalName: return Error(ID.Loc, "invalid use of function-local name"); @@ -2042,50 +2270,50 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, if (!isa(Ty)) return Error(ID.Loc, "integer constant must have integer type"); ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits()); - V = Context.getConstantInt(ID.APSIntVal); + V = ConstantInt::get(Context, ID.APSIntVal); return false; case ValID::t_APFloat: if (!Ty->isFloatingPoint() || !ConstantFP::isValueValidForType(Ty, ID.APFloatVal)) return Error(ID.Loc, "floating point constant invalid for type"); - + // The lexer has no type info, so builds all float and double FP constants // as double. Fix this here. Long double does not need this. if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble && - Ty == Type::FloatTy) { + Ty->isFloatTy()) { bool Ignored; ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Ignored); } - V = Context.getConstantFP(ID.APFloatVal); - + V = ConstantFP::get(Context, ID.APFloatVal); + if (V->getType() != Ty) return Error(ID.Loc, "floating point constant does not have type '" + Ty->getDescription() + "'"); - + return false; case ValID::t_Null: if (!isa(Ty)) return Error(ID.Loc, "null must be a pointer type"); - V = Context.getConstantPointerNull(cast(Ty)); + V = ConstantPointerNull::get(cast(Ty)); return false; case ValID::t_Undef: // FIXME: LabelTy should not be a first-class type. - if ((!Ty->isFirstClassType() || Ty == Type::LabelTy) && + if ((!Ty->isFirstClassType() || Ty->isLabelTy()) && !isa(Ty)) return Error(ID.Loc, "invalid type for undef constant"); - V = Context.getUndef(Ty); + V = UndefValue::get(Ty); return false; case ValID::t_EmptyArray: if (!isa(Ty) || cast(Ty)->getNumElements() != 0) return Error(ID.Loc, "invalid empty array initializer"); - V = Context.getUndef(Ty); + V = UndefValue::get(Ty); return false; case ValID::t_Zero: // FIXME: LabelTy should not be a first-class type. - if (!Ty->isFirstClassType() || Ty == Type::LabelTy) + if (!Ty->isFirstClassType() || Ty->isLabelTy()) return Error(ID.Loc, "invalid type for null constant"); - V = Context.getNullValue(Ty); + V = Constant::getNullValue(Ty); return false; case ValID::t_Constant: if (ID.ConstantVal->getType() != Ty) @@ -2094,12 +2322,12 @@ bool LLParser::ConvertGlobalValIDToValue(const Type *Ty, ValID &ID, return false; } } - + bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { - PATypeHolder Type(Type::VoidTy); + PATypeHolder Type(Type::getVoidTy(Context)); return ParseType(Type) || ParseGlobalValue(Type, V); -} +} /// ParseGlobalValueVector /// ::= /*empty*/ @@ -2111,16 +2339,16 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl &Elts) { Lex.getKind() == lltok::greater || Lex.getKind() == lltok::rparen) return false; - + Constant *C; if (ParseGlobalTypeAndValue(C)) return true; Elts.push_back(C); - + while (EatIfPresent(lltok::comma)) { if (ParseGlobalTypeAndValue(C)) return true; Elts.push_back(C); } - + return false; } @@ -2141,8 +2369,10 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, PTy ? dyn_cast(PTy->getElementType()) : 0; if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) return Error(ID.Loc, "invalid type for inline asm constraint string"); - V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal); + V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1); return false; + } else if (ID.Kind == ValID::t_Metadata) { + V = ID.MetadataVal; } else { Constant *C; if (ConvertGlobalValIDToValue(Ty, ID, C)) return true; @@ -2161,7 +2391,7 @@ bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) { } bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) { - PATypeHolder T(Type::VoidTy); + PATypeHolder T(Type::getVoidTy(Context)); return ParseType(T) || ParseValue(T, V, PFS); } @@ -2174,9 +2404,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { // Parse the linkage. LocTy LinkageLoc = Lex.getLoc(); unsigned Linkage; - - unsigned Visibility, CC, RetAttrs; - PATypeHolder RetType(Type::VoidTy); + + unsigned Visibility, RetAttrs; + CallingConv::ID CC; + PATypeHolder RetType(Type::getVoidTy(Context)); LocTy RetTypeLoc = Lex.getLoc(); if (ParseOptionalLinkage(Linkage) || ParseOptionalVisibility(Visibility) || @@ -2195,6 +2426,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { return Error(LinkageLoc, "invalid linkage for function definition"); break; case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: case GlobalValue::InternalLinkage: case GlobalValue::AvailableExternallyLinkage: case GlobalValue::LinkOnceAnyLinkage: @@ -2210,11 +2442,11 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { case GlobalValue::CommonLinkage: return Error(LinkageLoc, "invalid function linkage type"); } - + if (!FunctionType::isValidReturnType(RetType) || isa(RetType)) return Error(RetTypeLoc, "invalid function return type"); - + LocTy NameLoc = Lex.getLoc(); std::string FunctionName; @@ -2229,12 +2461,12 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { } else { return TokError("expected function name"); } - + Lex.Lex(); - + if (Lex.getKind() != lltok::lparen) return TokError("expected '(' in function argument list"); - + std::vector ArgList; bool isVarArg; unsigned FuncAttrs; @@ -2256,22 +2488,22 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs); FuncAttrs &= ~Attribute::Alignment; } - + // Okay, if we got here, the function is syntactically valid. Convert types // and do semantic checks. std::vector ParamTypeList; SmallVector Attrs; - // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function + // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function // attributes. unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; if (FuncAttrs & ObsoleteFuncAttrs) { RetAttrs |= FuncAttrs & ObsoleteFuncAttrs; FuncAttrs &= ~ObsoleteFuncAttrs; } - + if (RetAttrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); - + for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { ParamTypeList.push_back(ArgList[i].Type); if (ArgList[i].Attrs != Attribute::None) @@ -2282,14 +2514,14 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs)); AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); - + if (PAL.paramHasAttr(1, Attribute::StructRet) && - RetType != Type::VoidTy) - return Error(RetTypeLoc, "functions with 'sret' argument must return void"); - + RetType != Type::getVoidTy(Context)) + return Error(RetTypeLoc, "functions with 'sret' argument must return void"); + const FunctionType *FT = - Context.getFunctionType(RetType, ParamTypeList, isVarArg); - const PointerType *PFT = Context.getPointerTypeUnqual(FT); + FunctionType::get(RetType, ParamTypeList, isVarArg); + const PointerType *PFT = PointerType::getUnqual(FT); Fn = 0; if (!FunctionName.empty()) { @@ -2317,8 +2549,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { AI->setName(""); } } - - } else if (FunctionName.empty()) { + + } else { // If this is a definition of a forward referenced function, make sure the // types agree. std::map >::iterator I @@ -2339,7 +2571,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (FunctionName.empty()) NumberedVals.push_back(Fn); - + Fn->setLinkage((GlobalValue::LinkageTypes)Linkage); Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility); Fn->setCallingConv(CC); @@ -2347,21 +2579,21 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Fn->setAlignment(Alignment); Fn->setSection(Section); if (!GC.empty()) Fn->setGC(GC.c_str()); - + // Add all of the arguments we parsed to the function. Function::arg_iterator ArgIt = Fn->arg_begin(); for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) { // If the argument has a name, insert it into the argument symbol table. if (ArgList[i].Name.empty()) continue; - + // Set the name, if it conflicted, it will be auto-renamed. ArgIt->setName(ArgList[i].Name); - + if (ArgIt->getNameStr() != ArgList[i].Name) return Error(ArgList[i].Loc, "redefinition of argument '%" + ArgList[i].Name + "'"); } - + return false; } @@ -2374,15 +2606,15 @@ bool LLParser::ParseFunctionBody(Function &Fn) { if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin) return TokError("expected '{' in function body"); Lex.Lex(); // eat the {. - + PerFunctionState PFS(*this, Fn); - + while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end) if (ParseBasicBlock(PFS)) return true; - + // Eat the }. Lex.Lex(); - + // Verify function is ok. return PFS.VerifyFunctionComplete(); } @@ -2397,12 +2629,12 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { Name = Lex.getStrVal(); Lex.Lex(); } - + BasicBlock *BB = PFS.DefineBB(Name, NameLoc); if (BB == 0) return true; - + std::string NameStr; - + // Parse the instructions in this block until we get a terminator. Instruction *Inst; do { @@ -2411,7 +2643,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { LocTy NameLoc = Lex.getLoc(); int NameID = -1; NameStr = ""; - + if (Lex.getKind() == lltok::LocalVarID) { NameID = Lex.getUIntVal(); Lex.Lex(); @@ -2425,15 +2657,24 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { if (ParseToken(lltok::equal, "expected '=' after instruction name")) return true; } - + if (ParseInstruction(Inst, BB, PFS)) return true; - + if (EatIfPresent(lltok::comma)) + ParseOptionalCustomMetadata(); + + // Set metadata attached with this instruction. + MetadataContext &TheMetadata = M->getContext().getMetadata(); + for (SmallVector, 2>::iterator + MDI = MDsOnInst.begin(), MDE = MDsOnInst.end(); MDI != MDE; ++MDI) + TheMetadata.addMD(MDI->first, MDI->second, Inst); + MDsOnInst.clear(); + BB->getInstList().push_back(Inst); // Set the name on the instruction. if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true; } while (!isa(Inst)); - + return false; } @@ -2451,12 +2692,12 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, LocTy Loc = Lex.getLoc(); unsigned KeywordVal = Lex.getUIntVal(); Lex.Lex(); // Eat the keyword. - + switch (Token) { default: return Error(Loc, "expected instruction opcode"); // Terminator Instructions. - case lltok::kw_unwind: Inst = new UnwindInst(); return false; - case lltok::kw_unreachable: Inst = new UnreachableInst(); return false; + case lltok::kw_unwind: Inst = new UnwindInst(Context); return false; + case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false; case lltok::kw_ret: return ParseRet(Inst, BB, PFS); case lltok::kw_br: return ParseBr(Inst, PFS); case lltok::kw_switch: return ParseSwitch(Inst, PFS); @@ -2464,15 +2705,49 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, // Binary Operators. case lltok::kw_add: case lltok::kw_sub: - case lltok::kw_mul: + case lltok::kw_mul: { + bool NUW = false; + bool NSW = false; + LocTy ModifierLoc = Lex.getLoc(); + if (EatIfPresent(lltok::kw_nuw)) + NUW = true; + if (EatIfPresent(lltok::kw_nsw)) { + NSW = true; + if (EatIfPresent(lltok::kw_nuw)) + NUW = true; + } // API compatibility: Accept either integer or floating-point types. - return ParseArithmetic(Inst, PFS, KeywordVal, 0); + bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0); + if (!Result) { + if (!Inst->getType()->isIntOrIntVector()) { + if (NUW) + return Error(ModifierLoc, "nuw only applies to integer operations"); + if (NSW) + return Error(ModifierLoc, "nsw only applies to integer operations"); + } + if (NUW) + cast(Inst)->setHasNoUnsignedWrap(true); + if (NSW) + cast(Inst)->setHasNoSignedWrap(true); + } + return Result; + } case lltok::kw_fadd: case lltok::kw_fsub: case lltok::kw_fmul: return ParseArithmetic(Inst, PFS, KeywordVal, 2); + case lltok::kw_sdiv: { + bool Exact = false; + if (EatIfPresent(lltok::kw_exact)) + Exact = true; + bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 1); + if (!Result) + if (Exact) + cast(Inst)->setIsExact(true); + return Result; + } + case lltok::kw_udiv: - case lltok::kw_sdiv: case lltok::kw_urem: case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1); case lltok::kw_fdiv: @@ -2484,9 +2759,7 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_or: case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal); case lltok::kw_icmp: - case lltok::kw_fcmp: - case lltok::kw_vicmp: - case lltok::kw_vfcmp: return ParseCompare(Inst, PFS, KeywordVal); + case lltok::kw_fcmp: return ParseCompare(Inst, PFS, KeywordVal); // Casts. case lltok::kw_trunc: case lltok::kw_zext: @@ -2497,7 +2770,7 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_uitofp: case lltok::kw_sitofp: case lltok::kw_fptoui: - case lltok::kw_fptosi: + case lltok::kw_fptosi: case lltok::kw_inttoptr: case lltok::kw_ptrtoint: return ParseCast(Inst, PFS, KeywordVal); // Other. @@ -2531,8 +2804,7 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, /// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind. bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) { - // FIXME: REMOVE vicmp/vfcmp! - if (Opc == Instruction::FCmp || Opc == Instruction::VFCmp) { + if (Opc == Instruction::FCmp) { switch (Lex.getKind()) { default: TokError("expected fcmp predicate (e.g. 'oeq')"); case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break; @@ -2576,42 +2848,57 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) { //===----------------------------------------------------------------------===// /// ParseRet - Parse a return instruction. -/// ::= 'ret' void -/// ::= 'ret' TypeAndValue -/// ::= 'ret' TypeAndValue (',' TypeAndValue)+ [[obsolete: LLVM 3.0]] +/// ::= 'ret' void (',' !dbg, !1) +/// ::= 'ret' TypeAndValue (',' !dbg, !1) +/// ::= 'ret' TypeAndValue (',' TypeAndValue)+ (',' !dbg, !1) +/// [[obsolete: LLVM 3.0]] bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS) { - PATypeHolder Ty(Type::VoidTy); + PATypeHolder Ty(Type::getVoidTy(Context)); if (ParseType(Ty, true /*void allowed*/)) return true; - - if (Ty == Type::VoidTy) { - Inst = ReturnInst::Create(); + + if (Ty->isVoidTy()) { + if (EatIfPresent(lltok::comma)) + if (ParseOptionalCustomMetadata()) return true; + Inst = ReturnInst::Create(Context); return false; } - + Value *RV; if (ParseValue(Ty, RV, PFS)) return true; - - // The normal case is one return value. - if (Lex.getKind() == lltok::comma) { - // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring use - // of 'ret {i32,i32} {i32 1, i32 2}' - SmallVector RVs; - RVs.push_back(RV); - - while (EatIfPresent(lltok::comma)) { - if (ParseTypeAndValue(RV, PFS)) return true; + + if (EatIfPresent(lltok::comma)) { + // Parse optional custom metadata, e.g. !dbg + if (Lex.getKind() == lltok::NamedOrCustomMD) { + if (ParseOptionalCustomMetadata()) return true; + } else { + // The normal case is one return value. + // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring use + // of 'ret {i32,i32} {i32 1, i32 2}' + SmallVector RVs; RVs.push_back(RV); - } - RV = Context.getUndef(PFS.getFunction().getReturnType()); - for (unsigned i = 0, e = RVs.size(); i != e; ++i) { - Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv"); - BB->getInstList().push_back(I); - RV = I; + do { + // If optional custom metadata, e.g. !dbg is seen then this is the + // end of MRV. + if (Lex.getKind() == lltok::NamedOrCustomMD) + break; + if (ParseTypeAndValue(RV, PFS)) return true; + RVs.push_back(RV); + } while (EatIfPresent(lltok::comma)); + + RV = UndefValue::get(PFS.getFunction().getReturnType()); + for (unsigned i = 0, e = RVs.size(); i != e; ++i) { + Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv"); + BB->getInstList().push_back(I); + RV = I; + } } } - Inst = ReturnInst::Create(RV); + if (EatIfPresent(lltok::comma)) + if (ParseOptionalCustomMetadata()) return true; + + Inst = ReturnInst::Create(Context, RV); return false; } @@ -2623,26 +2910,26 @@ bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) { LocTy Loc, Loc2; Value *Op0, *Op1, *Op2; if (ParseTypeAndValue(Op0, Loc, PFS)) return true; - + if (BasicBlock *BB = dyn_cast(Op0)) { Inst = BranchInst::Create(BB); return false; } - - if (Op0->getType() != Type::Int1Ty) + + if (Op0->getType() != Type::getInt1Ty(Context)) return Error(Loc, "branch condition must have 'i1' type"); - + if (ParseToken(lltok::comma, "expected ',' after branch condition") || ParseTypeAndValue(Op1, Loc, PFS) || ParseToken(lltok::comma, "expected ',' after true destination") || ParseTypeAndValue(Op2, Loc2, PFS)) return true; - + if (!isa(Op1)) return Error(Loc, "true destination of branch must be a basic block"); if (!isa(Op2)) return Error(Loc2, "true destination of branch must be a basic block"); - + Inst = BranchInst::Create(cast(Op1), cast(Op2), Op0); return false; } @@ -2665,13 +2952,13 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) { return Error(CondLoc, "switch condition must have integer type"); if (!isa(DefaultBB)) return Error(BBLoc, "default destination must be a basic block"); - + // Parse the jump table pairs. SmallPtrSet SeenCases; SmallVector, 32> Table; while (Lex.getKind() != lltok::rsquare) { Value *Constant, *DestBB; - + if (ParseTypeAndValue(Constant, CondLoc, PFS) || ParseToken(lltok::comma, "expected ',' after case value") || ParseTypeAndValue(DestBB, BBLoc, PFS)) @@ -2683,13 +2970,13 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) { return Error(CondLoc, "case value is not a constant integer"); if (!isa(DestBB)) return Error(BBLoc, "case destination is not a basic block"); - + Table.push_back(std::make_pair(cast(Constant), cast(DestBB))); } - + Lex.Lex(); // Eat the ']'. - + SwitchInst *SI = SwitchInst::Create(Cond, cast(DefaultBB), Table.size()); for (unsigned i = 0, e = Table.size(); i != e; ++i) @@ -2703,8 +2990,9 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) { /// OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy CallLoc = Lex.getLoc(); - unsigned CC, RetAttrs, FnAttrs; - PATypeHolder RetType(Type::VoidTy); + unsigned RetAttrs, FnAttrs; + CallingConv::ID CC; + PATypeHolder RetType(Type::getVoidTy(Context)); LocTy RetTypeLoc; ValID CalleeID; SmallVector ArgList; @@ -2721,12 +3009,12 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") || ParseTypeAndValue(UnwindBB, PFS)) return true; - + if (!isa(NormalBB)) return Error(CallLoc, "normal destination is not a basic block"); if (!isa(UnwindBB)) return Error(CallLoc, "unwind destination is not a basic block"); - + // If RetType is a non-function pointer type, then this is the short syntax // for the call, which means that RetType is just the return type. Infer the // rest of the function argument types from the arguments that are present. @@ -2738,18 +3026,18 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { std::vector ParamTypes; for (unsigned i = 0, e = ArgList.size(); i != e; ++i) ParamTypes.push_back(ArgList[i].V->getType()); - + if (!FunctionType::isValidReturnType(RetType)) return Error(RetTypeLoc, "Invalid result type for LLVM function"); - - Ty = Context.getFunctionType(RetType, ParamTypes, false); - PFTy = Context.getPointerTypeUnqual(Ty); + + Ty = FunctionType::get(RetType, ParamTypes, false); + PFTy = PointerType::getUnqual(Ty); } - + // Look up the callee. Value *Callee; if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true; - + // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional // function attributes. unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; @@ -2757,14 +3045,14 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { RetAttrs |= FnAttrs & ObsoleteFuncAttrs; FnAttrs &= ~ObsoleteFuncAttrs; } - + // Set up the Attributes for the function. SmallVector Attrs; if (RetAttrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); - + SmallVector Args; - + // Loop through FunctionType's arguments and ensure they are specified // correctly. Also, gather any parameter attributes. FunctionType::param_iterator I = Ty->param_begin(); @@ -2776,7 +3064,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { } else if (!Ty->isVarArg()) { return Error(ArgList[i].Loc, "too many arguments specified"); } - + if (ExpectedTy && ExpectedTy != ArgList[i].V->getType()) return Error(ArgList[i].Loc, "argument is not of expected type '" + ExpectedTy->getDescription() + "'"); @@ -2784,16 +3072,16 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { if (ArgList[i].Attrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); } - + if (I != E) return Error(CallLoc, "not enough parameters specified for call"); - + if (FnAttrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs)); - + // Finish off the Attributes and check them AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); - + InvokeInst *II = InvokeInst::Create(Callee, cast(NormalBB), cast(UnwindBB), Args.begin(), Args.end()); @@ -2824,7 +3112,7 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, bool Valid; switch (OperandType) { - default: assert(0 && "Unknown operand type!"); + default: llvm_unreachable("Unknown operand type!"); case 0: // int or FP. Valid = LHS->getType()->isIntOrIntVector() || LHS->getType()->isFPOrFPVector(); @@ -2832,10 +3120,10 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS, case 1: Valid = LHS->getType()->isIntOrIntVector(); break; case 2: Valid = LHS->getType()->isFPOrFPVector(); break; } - + if (!Valid) return Error(Loc, "invalid operand type for instruction"); - + Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); return false; } @@ -2861,8 +3149,6 @@ bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS, /// ParseCompare /// ::= 'icmp' IPredicates TypeAndValue ',' Value /// ::= 'fcmp' FPredicates TypeAndValue ',' Value -/// ::= 'vicmp' IPredicates TypeAndValue ',' Value -/// ::= 'vfcmp' FPredicates TypeAndValue ',' Value bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc) { // Parse the integer/fp comparison predicate. @@ -2874,24 +3160,17 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, ParseToken(lltok::comma, "expected ',' after compare value") || ParseValue(LHS->getType(), RHS, PFS)) return true; - + if (Opc == Instruction::FCmp) { if (!LHS->getType()->isFPOrFPVector()) return Error(Loc, "fcmp requires floating point operands"); Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS); - } else if (Opc == Instruction::ICmp) { + } else { + assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!"); if (!LHS->getType()->isIntOrIntVector() && !isa(LHS->getType())) return Error(Loc, "icmp requires integer operands"); Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); - } else if (Opc == Instruction::VFCmp) { - if (!LHS->getType()->isFPOrFPVector() || !isa(LHS->getType())) - return Error(Loc, "vfcmp requires vector floating point operands"); - Inst = new VFCmpInst(CmpInst::Predicate(Pred), LHS, RHS); - } else if (Opc == Instruction::VICmp) { - if (!LHS->getType()->isIntOrIntVector() || !isa(LHS->getType())) - return Error(Loc, "vicmp requires vector floating point operands"); - Inst = new VICmpInst(CmpInst::Predicate(Pred), LHS, RHS); } return false; } @@ -2906,12 +3185,12 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc) { LocTy Loc; Value *Op; - PATypeHolder DestTy(Type::VoidTy); + PATypeHolder DestTy(Type::getVoidTy(Context)); if (ParseTypeAndValue(Op, Loc, PFS) || ParseToken(lltok::kw_to, "expected 'to' after cast value") || ParseType(DestTy)) return true; - + if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) { CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy); return Error(Loc, "invalid cast opcode for cast from '" + @@ -2933,10 +3212,10 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::comma, "expected ',' after select value") || ParseTypeAndValue(Op2, PFS)) return true; - + if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2)) return Error(Loc, Reason); - + Inst = SelectInst::Create(Op0, Op1, Op2); return false; } @@ -2945,13 +3224,13 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'va_arg' TypeAndValue ',' Type bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) { Value *Op; - PATypeHolder EltTy(Type::VoidTy); + PATypeHolder EltTy(Type::getVoidTy(Context)); LocTy TypeLoc; if (ParseTypeAndValue(Op, PFS) || ParseToken(lltok::comma, "expected ',' after vaarg operand") || ParseType(EltTy, TypeLoc)) return true; - + if (!EltTy->isFirstClassType()) return Error(TypeLoc, "va_arg requires operand with first class type"); @@ -2968,11 +3247,11 @@ bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::comma, "expected ',' after extract value") || ParseTypeAndValue(Op1, PFS)) return true; - + if (!ExtractElementInst::isValidOperands(Op0, Op1)) return Error(Loc, "invalid extractelement operands"); - - Inst = new ExtractElementInst(Op0, Op1); + + Inst = ExtractElementInst::Create(Op0, Op1); return false; } @@ -2987,10 +3266,10 @@ bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::comma, "expected ',' after insertelement value") || ParseTypeAndValue(Op2, PFS)) return true; - + if (!InsertElementInst::isValidOperands(Op0, Op1, Op2)) - return Error(Loc, "invalid extractelement operands"); - + return Error(Loc, "invalid insertelement operands"); + Inst = InsertElementInst::Create(Op0, Op1, Op2); return false; } @@ -3006,10 +3285,10 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::comma, "expected ',' after shuffle value") || ParseTypeAndValue(Op2, PFS)) return true; - + if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2)) return Error(Loc, "invalid extractelement operands"); - + Inst = new ShuffleVectorInst(Op0, Op1, Op2); return false; } @@ -3017,33 +3296,33 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) { /// ParsePHI /// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Valueß ']')* bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { - PATypeHolder Ty(Type::VoidTy); + PATypeHolder Ty(Type::getVoidTy(Context)); Value *Op0, *Op1; LocTy TypeLoc = Lex.getLoc(); - + if (ParseType(Ty) || ParseToken(lltok::lsquare, "expected '[' in phi value list") || ParseValue(Ty, Op0, PFS) || ParseToken(lltok::comma, "expected ',' after insertelement value") || - ParseValue(Type::LabelTy, Op1, PFS) || + ParseValue(Type::getLabelTy(Context), Op1, PFS) || ParseToken(lltok::rsquare, "expected ']' in phi value list")) return true; - + SmallVector, 16> PHIVals; while (1) { PHIVals.push_back(std::make_pair(Op0, cast(Op1))); - + if (!EatIfPresent(lltok::comma)) break; if (ParseToken(lltok::lsquare, "expected '[' in phi value list") || ParseValue(Ty, Op0, PFS) || ParseToken(lltok::comma, "expected ',' after insertelement value") || - ParseValue(Type::LabelTy, Op1, PFS) || + ParseValue(Type::getLabelTy(Context), Op1, PFS) || ParseToken(lltok::rsquare, "expected ']' in phi value list")) return true; } - + if (!Ty->isFirstClassType()) return Error(TypeLoc, "phi node must have first class type"); @@ -3060,13 +3339,14 @@ bool LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { /// ParameterList OptionalAttrs bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { - unsigned CC, RetAttrs, FnAttrs; - PATypeHolder RetType(Type::VoidTy); + unsigned RetAttrs, FnAttrs; + CallingConv::ID CC; + PATypeHolder RetType(Type::getVoidTy(Context)); LocTy RetTypeLoc; ValID CalleeID; SmallVector ArgList; LocTy CallLoc = Lex.getLoc(); - + if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) || ParseOptionalCallingConv(CC) || ParseOptionalAttrs(RetAttrs, 1) || @@ -3075,7 +3355,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, ParseParameterList(ArgList, PFS) || ParseOptionalAttrs(FnAttrs, 2)) return true; - + // If RetType is a non-function pointer type, then this is the short syntax // for the call, which means that RetType is just the return type. Infer the // rest of the function argument types from the arguments that are present. @@ -3087,18 +3367,18 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, std::vector ParamTypes; for (unsigned i = 0, e = ArgList.size(); i != e; ++i) ParamTypes.push_back(ArgList[i].V->getType()); - + if (!FunctionType::isValidReturnType(RetType)) return Error(RetTypeLoc, "Invalid result type for LLVM function"); - - Ty = Context.getFunctionType(RetType, ParamTypes, false); - PFTy = Context.getPointerTypeUnqual(Ty); + + Ty = FunctionType::get(RetType, ParamTypes, false); + PFTy = PointerType::getUnqual(Ty); } - + // Look up the callee. Value *Callee; if (ConvertValIDToValue(PFTy, CalleeID, Callee, PFS)) return true; - + // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional // function attributes. unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg; @@ -3111,9 +3391,9 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, SmallVector Attrs; if (RetAttrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(0, RetAttrs)); - + SmallVector Args; - + // Loop through FunctionType's arguments and ensure they are specified // correctly. Also, gather any parameter attributes. FunctionType::param_iterator I = Ty->param_begin(); @@ -3125,7 +3405,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, } else if (!Ty->isVarArg()) { return Error(ArgList[i].Loc, "too many arguments specified"); } - + if (ExpectedTy && ExpectedTy != ArgList[i].V->getType()) return Error(ArgList[i].Loc, "argument is not of expected type '" + ExpectedTy->getDescription() + "'"); @@ -3133,7 +3413,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, if (ArgList[i].Attrs != Attribute::None) Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); } - + if (I != E) return Error(CallLoc, "not enough parameters specified for call"); @@ -3142,7 +3422,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // Finish off the Attributes and check them AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end()); - + CallInst *CI = CallInst::Create(Callee, Args.begin(), Args.end()); CI->setTailCall(isTail); CI->setCallingConv(CC); @@ -3156,26 +3436,28 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, //===----------------------------------------------------------------------===// /// ParseAlloc -/// ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalAlignment)? -/// ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalAlignment)? +/// ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalInfo)? +/// ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)? bool LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc) { - PATypeHolder Ty(Type::VoidTy); + PATypeHolder Ty(Type::getVoidTy(Context)); Value *Size = 0; LocTy SizeLoc; unsigned Alignment = 0; if (ParseType(Ty)) return true; if (EatIfPresent(lltok::comma)) { - if (Lex.getKind() == lltok::kw_align) { - if (ParseOptionalAlignment(Alignment)) return true; - } else if (ParseTypeAndValue(Size, SizeLoc, PFS) || - ParseOptionalCommaAlignment(Alignment)) { - return true; + if (Lex.getKind() == lltok::kw_align + || Lex.getKind() == lltok::NamedOrCustomMD) { + if (ParseOptionalInfo(Alignment)) return true; + } else { + if (ParseTypeAndValue(Size, SizeLoc, PFS)) return true; + if (EatIfPresent(lltok::comma)) + if (ParseOptionalInfo(Alignment)) return true; } } - if (Size && Size->getType() != Type::Int32Ty) + if (Size && Size->getType() != Type::getInt32Ty(Context)) return Error(SizeLoc, "element count must be i32"); if (Opc == Instruction::Malloc) @@ -3197,19 +3479,20 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) { } /// ParseLoad -/// ::= 'volatile'? 'load' TypeAndValue (',' 'align' i32)? +/// ::= 'volatile'? 'load' TypeAndValue (',' OptionalInfo)? bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, bool isVolatile) { Value *Val; LocTy Loc; - unsigned Alignment; - if (ParseTypeAndValue(Val, Loc, PFS) || - ParseOptionalCommaAlignment(Alignment)) - return true; + unsigned Alignment = 0; + if (ParseTypeAndValue(Val, Loc, PFS)) return true; + + if (EatIfPresent(lltok::comma)) + if (ParseOptionalInfo(Alignment)) return true; if (!isa(Val->getType()) || !cast(Val->getType())->getElementType()->isFirstClassType()) return Error(Loc, "load operand must be a pointer to a first class type"); - + Inst = new LoadInst(Val, "", isVolatile, Alignment); return false; } @@ -3219,20 +3502,22 @@ bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS, bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS, bool isVolatile) { Value *Val, *Ptr; LocTy Loc, PtrLoc; - unsigned Alignment; + unsigned Alignment = 0; if (ParseTypeAndValue(Val, Loc, PFS) || ParseToken(lltok::comma, "expected ',' after store operand") || - ParseTypeAndValue(Ptr, PtrLoc, PFS) || - ParseOptionalCommaAlignment(Alignment)) + ParseTypeAndValue(Ptr, PtrLoc, PFS)) return true; - + + if (EatIfPresent(lltok::comma)) + if (ParseOptionalInfo(Alignment)) return true; + if (!isa(Ptr->getType())) return Error(PtrLoc, "store operand must be a pointer"); if (!Val->getType()->isFirstClassType()) return Error(Loc, "store operand must be a first class value"); if (cast(Ptr->getType())->getElementType() != Val->getType()) return Error(Loc, "stored value and pointer type do not match"); - + Inst = new StoreInst(Val, Ptr, isVolatile, Alignment); return false; } @@ -3247,7 +3532,7 @@ bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::comma, "expected ',' after getresult operand") || ParseUInt32(Element, EltLoc)) return true; - + if (!isa(Val->getType()) && !isa(Val->getType())) return Error(ValLoc, "getresult inst requires an aggregate operand"); if (!ExtractValueInst::getIndexedType(Val->getType(), Element)) @@ -3257,26 +3542,35 @@ bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) { } /// ParseGetElementPtr -/// ::= 'getelementptr' TypeAndValue (',' TypeAndValue)* +/// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)* bool LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { Value *Ptr, *Val; LocTy Loc, EltLoc; + + bool InBounds = EatIfPresent(lltok::kw_inbounds); + if (ParseTypeAndValue(Ptr, Loc, PFS)) return true; - + if (!isa(Ptr->getType())) return Error(Loc, "base of getelementptr must be a pointer"); - + SmallVector Indices; while (EatIfPresent(lltok::comma)) { + if (Lex.getKind() == lltok::NamedOrCustomMD) + break; if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; if (!isa(Val->getType())) return Error(EltLoc, "getelementptr index must be an integer"); Indices.push_back(Val); } - + if (Lex.getKind() == lltok::NamedOrCustomMD) + if (ParseOptionalCustomMetadata()) return true; + if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices.begin(), Indices.end())) return Error(Loc, "invalid getelementptr indices"); Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end()); + if (InBounds) + cast(Inst)->setIsInBounds(true); return false; } @@ -3309,10 +3603,10 @@ bool LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { ParseTypeAndValue(Val1, Loc1, PFS) || ParseIndexList(Indices)) return true; - + if (!isa(Val0->getType()) && !isa(Val0->getType())) return Error(Loc0, "extractvalue operand must be array or struct"); - + if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(), Indices.end())) return Error(Loc0, "invalid indices for insertvalue"); @@ -3332,14 +3626,28 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts) { assert(Lex.getKind() == lltok::lbrace); Lex.Lex(); do { - Value *V; + Value *V = 0; if (Lex.getKind() == lltok::kw_null) { Lex.Lex(); V = 0; } else { - Constant *C; - if (ParseGlobalTypeAndValue(C)) return true; - V = C; + PATypeHolder Ty(Type::getVoidTy(Context)); + if (ParseType(Ty)) return true; + if (Lex.getKind() == lltok::Metadata) { + Lex.Lex(); + MetadataBase *Node = 0; + if (!ParseMDNode(Node)) + V = Node; + else { + MetadataBase *MDS = 0; + if (ParseMDString(MDS)) return true; + V = MDS; + } + } else { + Constant *C; + if (ParseGlobalValue(Ty, C)) return true; + V = C; + } } Elts.push_back(V); } while (EatIfPresent(lltok::comma)); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 6659620e6c939..97bf2f309f6d5 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -28,6 +28,7 @@ namespace llvm { class Instruction; class Constant; class GlobalValue; + class MetadataBase; class MDString; class MDNode; struct ValID; @@ -45,7 +46,9 @@ namespace llvm { std::map > ForwardRefTypeIDs; std::vector NumberedTypes; /// MetadataCache - This map keeps track of parsed metadata constants. - std::map MetadataCache; + std::map MetadataCache; + std::map > ForwardRefMDNodes; + SmallVector, 2> MDsOnInst; struct UpRefRecord { /// Loc - This is the location of the upref. LocTy Loc; @@ -74,7 +77,7 @@ namespace llvm { std::vector NumberedVals; public: LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : - Context(m->getContext()), Lex(F, SM, Err), M(m) {} + Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m) {} bool Run(); LLVMContext& getContext() { return Context; } @@ -123,9 +126,10 @@ namespace llvm { bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); } bool ParseOptionalVisibility(unsigned &Visibility); - bool ParseOptionalCallingConv(unsigned &CC); + bool ParseOptionalCallingConv(CallingConv::ID &CC); bool ParseOptionalAlignment(unsigned &Alignment); - bool ParseOptionalCommaAlignment(unsigned &Alignment); + bool ParseOptionalCustomMetadata(); + bool ParseOptionalInfo(unsigned &Alignment); bool ParseIndexList(SmallVectorImpl &Indices); // Top-Level Entities @@ -140,11 +144,15 @@ namespace llvm { bool ParseDefine(); bool ParseGlobalType(bool &IsConstant); + bool ParseUnnamedGlobal(); bool ParseNamedGlobal(); bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage, bool HasLinkage, unsigned Visibility); bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility); bool ParseStandaloneMetadata(); + bool ParseNamedMetadata(); + bool ParseMDString(MetadataBase *&S); + bool ParseMDNode(MetadataBase *&N); // Type Parsing. bool ParseType(PATypeHolder &Result, bool AllowVoid = false); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index cff89f8e472fc..b3c59ee9d3604 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -36,8 +36,9 @@ namespace lltok { kw_declare, kw_define, kw_global, kw_constant, - kw_private, kw_internal, kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, - kw_appending, kw_dllimport, kw_dllexport, kw_common,kw_available_externally, + kw_private, kw_linker_private, kw_internal, kw_linkonce, kw_linkonce_odr, + kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, + kw_available_externally, kw_default, kw_hidden, kw_protected, kw_extern_weak, kw_external, kw_thread_local, @@ -50,6 +51,10 @@ namespace lltok { kw_deplibs, kw_datalayout, kw_volatile, + kw_nuw, + kw_nsw, + kw_exact, + kw_inbounds, kw_align, kw_addrspace, kw_section, @@ -57,7 +62,9 @@ namespace lltok { kw_module, kw_asm, kw_sideeffect, + kw_msasm, kw_gc, + kw_dbg, kw_c, kw_cc, kw_ccc, kw_fastcc, kw_coldcc, @@ -77,6 +84,7 @@ namespace lltok { kw_readnone, kw_readonly, + kw_inlinehint, kw_noinline, kw_alwaysinline, kw_optsize, @@ -84,6 +92,7 @@ namespace lltok { kw_sspreq, kw_noredzone, kw_noimplicitfloat, + kw_naked, kw_type, kw_opaque, @@ -96,7 +105,7 @@ namespace lltok { kw_add, kw_fadd, kw_sub, kw_fsub, kw_mul, kw_fmul, kw_udiv, kw_sdiv, kw_fdiv, kw_urem, kw_srem, kw_frem, kw_shl, kw_lshr, kw_ashr, - kw_and, kw_or, kw_xor, kw_icmp, kw_fcmp, kw_vicmp, kw_vfcmp, + kw_and, kw_or, kw_xor, kw_icmp, kw_fcmp, kw_phi, kw_call, kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp, @@ -119,6 +128,7 @@ namespace lltok { GlobalVar, // @foo @"foo" LocalVar, // %foo %"foo" StringConstant, // "foo" + NamedOrCustomMD, // !foo // Metadata valued tokens. Metadata, // !"foo" !{i8 42} diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index d66c13d39c09f..331a23323b517 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -21,6 +21,24 @@ #include using namespace llvm; +Module *llvm::ParseAssembly(MemoryBuffer *F, + Module *M, + SMDiagnostic &Err, + LLVMContext &Context) { + SourceMgr SM; + SM.AddNewSourceBuffer(F, SMLoc()); + + // If we are parsing into an existing module, do it. + if (M) + return LLParser(F, SM, Err, M).Run() ? 0 : M; + + // Otherwise create a new module. + OwningPtr M2(new Module(F->getBufferIdentifier(), Context)); + if (LLParser(F, SM, Err, M2.get()).Run()) + return 0; + return M2.take(); +} + Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, LLVMContext &Context) { std::string ErrorStr; @@ -31,13 +49,7 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, return 0; } - SourceMgr SM; - SM.AddNewSourceBuffer(F, SMLoc()); - - OwningPtr M(new Module(Filename, Context)); - if (LLParser(F, SM, Err, M.get()).Run()) - return 0; - return M.take(); + return ParseAssembly(F, 0, Err, Context); } Module *llvm::ParseAssemblyString(const char *AsmString, Module *M, @@ -45,17 +57,6 @@ Module *llvm::ParseAssemblyString(const char *AsmString, Module *M, MemoryBuffer *F = MemoryBuffer::getMemBuffer(AsmString, AsmString+strlen(AsmString), ""); - - SourceMgr SM; - SM.AddNewSourceBuffer(F, SMLoc()); - // If we are parsing into an existing module, do it. - if (M) - return LLParser(F, SM, Err, M).Run() ? 0 : M; - - // Otherwise create a new module. - OwningPtr M2(new Module("", Context)); - if (LLParser(F, SM, Err, M2.get()).Run()) - return 0; - return M2.take(); + return ParseAssembly(F, M, Err, Context); } diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index e5b8f7c7685a2..f513d41ce3b45 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -34,12 +34,12 @@ int LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, return 0; } -int LLVMParseBitcodeInContext(LLVMMemoryBufferRef MemBuf, - LLVMContextRef ContextRef, +int LLVMParseBitcodeInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule, char **OutMessage) { std::string Message; - *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef), + *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef), &Message)); if (!*OutModule) { if (OutMessage) @@ -70,13 +70,13 @@ int LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf, return 0; } -int LLVMGetBitcodeModuleProviderInContext(LLVMMemoryBufferRef MemBuf, - LLVMContextRef ContextRef, +int LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, LLVMModuleProviderRef *OutMP, char **OutMessage) { std::string Message; - *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef), + *OutMP = wrap(getBitcodeModuleProvider(unwrap(MemBuf), *unwrap(ContextRef), &Message)); if (!*OutMP) { if (OutMessage) diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 5943de2f81210..4eb12c69eb6ec 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -16,9 +16,11 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/MDNode.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/AutoUpgrade.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -32,7 +34,8 @@ void BitcodeReader::FreeState() { Buffer = 0; std::vector().swap(TypeList); ValueList.clear(); - + MDValueList.clear(); + std::vector().swap(MAttributes); std::vector().swap(FunctionBBs); std::vector().swap(FunctionsWithBodies); @@ -50,7 +53,7 @@ static bool ConvertToString(SmallVector &Record, unsigned Idx, StrTy &Result) { if (Idx > Record.size()) return true; - + for (unsigned i = Idx, e = Record.size(); i != e; ++i) Result += (char)Record[i]; return false; @@ -59,19 +62,20 @@ static bool ConvertToString(SmallVector &Record, unsigned Idx, static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) { switch (Val) { default: // Map unknown/new linkages to external - case 0: return GlobalValue::ExternalLinkage; - case 1: return GlobalValue::WeakAnyLinkage; - case 2: return GlobalValue::AppendingLinkage; - case 3: return GlobalValue::InternalLinkage; - case 4: return GlobalValue::LinkOnceAnyLinkage; - case 5: return GlobalValue::DLLImportLinkage; - case 6: return GlobalValue::DLLExportLinkage; - case 7: return GlobalValue::ExternalWeakLinkage; - case 8: return GlobalValue::CommonLinkage; - case 9: return GlobalValue::PrivateLinkage; + case 0: return GlobalValue::ExternalLinkage; + case 1: return GlobalValue::WeakAnyLinkage; + case 2: return GlobalValue::AppendingLinkage; + case 3: return GlobalValue::InternalLinkage; + case 4: return GlobalValue::LinkOnceAnyLinkage; + case 5: return GlobalValue::DLLImportLinkage; + case 6: return GlobalValue::DLLExportLinkage; + case 7: return GlobalValue::ExternalWeakLinkage; + case 8: return GlobalValue::CommonLinkage; + case 9: return GlobalValue::PrivateLinkage; case 10: return GlobalValue::WeakODRLinkage; case 11: return GlobalValue::LinkOnceODRLinkage; case 12: return GlobalValue::AvailableExternallyLinkage; + case 13: return GlobalValue::LinkerPrivateLinkage; } } @@ -137,19 +141,19 @@ namespace { void *operator new(size_t s) { return User::operator new(s, 1); } - explicit ConstantPlaceHolder(const Type *Ty) + explicit ConstantPlaceHolder(const Type *Ty, LLVMContext& Context) : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) { - Op<0>() = UndefValue::get(Type::Int32Ty); + Op<0>() = UndefValue::get(Type::getInt32Ty(Context)); } - + /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const ConstantPlaceHolder *) { return true; } static bool classof(const Value *V) { - return isa(V) && + return isa(V) && cast(V)->getOpcode() == Instruction::UserOp1; } - - + + /// Provide fast operand accessors //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; @@ -157,7 +161,7 @@ namespace { // FIXME: can we inherit this from ConstantExpr? template <> -struct OperandTraits : FixedNumOperandTraits<1> { +struct OperandTraits : public FixedNumOperandTraits<1> { }; } @@ -167,16 +171,16 @@ void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) { push_back(V); return; } - + if (Idx >= size()) resize(Idx+1); - + WeakVH &OldV = ValuePtrs[Idx]; if (OldV == 0) { OldV = V; return; } - + // Handle constants and non-constants (e.g. instrs) differently for // efficiency. if (Constant *PHC = dyn_cast(&*OldV)) { @@ -189,7 +193,7 @@ void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) { delete PrevVal; } } - + Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, const Type *Ty) { @@ -202,7 +206,7 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, } // Create and return a placeholder, which will later be RAUW'd. - Constant *C = new ConstantPlaceHolder(Ty); + Constant *C = new ConstantPlaceHolder(Ty, Context); ValuePtrs[Idx] = C; return C; } @@ -210,15 +214,15 @@ Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) { if (Idx >= size()) resize(Idx + 1); - + if (Value *V = ValuePtrs[Idx]) { assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!"); return V; } - + // No type specified, must be invalid reference. if (Ty == 0) return 0; - + // Create and return a placeholder, which will later be RAUW'd. Value *V = new Argument(Ty); ValuePtrs[Idx] = V; @@ -233,30 +237,30 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) { /// uses and rewrite all the place holders at once for any constant that uses /// a placeholder. void BitcodeReaderValueList::ResolveConstantForwardRefs() { - // Sort the values by-pointer so that they are efficient to look up with a + // Sort the values by-pointer so that they are efficient to look up with a // binary search. std::sort(ResolveConstants.begin(), ResolveConstants.end()); - + SmallVector NewOps; - + while (!ResolveConstants.empty()) { Value *RealVal = operator[](ResolveConstants.back().second); Constant *Placeholder = ResolveConstants.back().first; ResolveConstants.pop_back(); - + // Loop over all users of the placeholder, updating them to reference the // new value. If they reference more than one placeholder, update them all // at once. while (!Placeholder->use_empty()) { Value::use_iterator UI = Placeholder->use_begin(); - + // If the using object isn't uniqued, just update the operands. This // handles instructions and initializers for global variables. if (!isa(*UI) || isa(*UI)) { UI.getUse().set(RealVal); continue; } - + // Otherwise, we have a constant that uses the placeholder. Replace that // constant with a new constant that has *all* placeholder uses updated. Constant *UserC = cast(*UI); @@ -271,8 +275,8 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { NewOp = RealVal; } else { // Otherwise, look up the placeholder in ResolveConstants. - ResolveConstantsTy::iterator It = - std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(), + ResolveConstantsTy::iterator It = + std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(), std::pair(cast(*I), 0)); assert(It != ResolveConstants.end() && It->first == *I); @@ -285,10 +289,11 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { // Make the new constant. Constant *NewC; if (ConstantArray *UserCA = dyn_cast(UserC)) { - NewC = ConstantArray::get(UserCA->getType(), &NewOps[0], NewOps.size()); + NewC = ConstantArray::get(UserCA->getType(), &NewOps[0], + NewOps.size()); } else if (ConstantStruct *UserCS = dyn_cast(UserC)) { - NewC = ConstantStruct::get(&NewOps[0], NewOps.size(), - UserCS->getType()->isPacked()); + NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(), + UserCS->getType()->isPacked()); } else if (isa(UserC)) { NewC = ConstantVector::get(&NewOps[0], NewOps.size()); } else { @@ -296,29 +301,67 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { NewC = cast(UserC)->getWithOperands(&NewOps[0], NewOps.size()); } - + UserC->replaceAllUsesWith(NewC); UserC->destroyConstant(); NewOps.clear(); } - + // Update all ValueHandles, they should be the only users at this point. Placeholder->replaceAllUsesWith(RealVal); delete Placeholder; } } +void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) { + if (Idx == size()) { + push_back(V); + return; + } + + if (Idx >= size()) + resize(Idx+1); + + WeakVH &OldV = MDValuePtrs[Idx]; + if (OldV == 0) { + OldV = V; + return; + } + + // If there was a forward reference to this value, replace it. + Value *PrevVal = OldV; + OldV->replaceAllUsesWith(V); + delete PrevVal; + // Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new + // value for Idx. + MDValuePtrs[Idx] = V; +} + +Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { + if (Idx >= size()) + resize(Idx + 1); + + if (Value *V = MDValuePtrs[Idx]) { + assert(V->getType()->isMetadataTy() && "Type mismatch in value table!"); + return V; + } + + // Create and return a placeholder, which will later be RAUW'd. + Value *V = new Argument(Type::getMetadataTy(Context)); + MDValuePtrs[Idx] = V; + return V; +} const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) { // If the TypeID is in range, return it. if (ID < TypeList.size()) return TypeList[ID].get(); if (!isTypeTable) return 0; - + // The type table allows forward references. Push as many Opaque types as // needed to get up to ID. while (TypeList.size() <= ID) - TypeList.push_back(OpaqueType::get()); + TypeList.push_back(OpaqueType::get(Context)); return TypeList.back().get(); } @@ -329,14 +372,14 @@ const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) { bool BitcodeReader::ParseAttributeBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) return Error("Malformed block record"); - + if (!MAttributes.empty()) return Error("Multiple PARAMATTR blocks found!"); - + SmallVector Record; - + SmallVector Attrs; - + // Read all the records. while (1) { unsigned Code = Stream.ReadCode(); @@ -345,7 +388,7 @@ bool BitcodeReader::ParseAttributeBlock() { return Error("Error at end of PARAMATTR block"); return false; } - + if (Code == bitc::ENTER_SUBBLOCK) { // No known subblocks, always skip them. Stream.ReadSubBlockID(); @@ -353,12 +396,12 @@ bool BitcodeReader::ParseAttributeBlock() { return Error("Malformed block record"); continue; } - + if (Code == bitc::DEFINE_ABBREV) { Stream.ReadAbbrevRecord(); continue; } - + // Read a record. Record.clear(); switch (Stream.ReadRecord(Code, Record)) { @@ -397,14 +440,14 @@ bool BitcodeReader::ParseAttributeBlock() { unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn| Attribute::ReadOnly|Attribute::ReadNone); - + if (FnAttribute == Attribute::None && RetAttribute != Attribute::None && (RetAttribute & OldRetAttrs) != 0) { if (FnAttribute == Attribute::None) { // add a slot so they get added. Record.push_back(~0U); Record.push_back(0); } - + FnAttribute |= RetAttribute & OldRetAttrs; RetAttribute &= ~OldRetAttrs; } @@ -432,7 +475,7 @@ bool BitcodeReader::ParseAttributeBlock() { bool BitcodeReader::ParseTypeTable() { if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID)) return Error("Malformed block record"); - + if (!TypeList.empty()) return Error("Multiple TYPE_BLOCKs found!"); @@ -449,7 +492,7 @@ bool BitcodeReader::ParseTypeTable() { return Error("Error at end of type table block"); return false; } - + if (Code == bitc::ENTER_SUBBLOCK) { // No known subblocks, always skip them. Stream.ReadSubBlockID(); @@ -457,12 +500,12 @@ bool BitcodeReader::ParseTypeTable() { return Error("Malformed block record"); continue; } - + if (Code == bitc::DEFINE_ABBREV) { Stream.ReadAbbrevRecord(); continue; } - + // Read a record. Record.clear(); const Type *ResultTy = 0; @@ -478,46 +521,47 @@ bool BitcodeReader::ParseTypeTable() { TypeList.reserve(Record[0]); continue; case bitc::TYPE_CODE_VOID: // VOID - ResultTy = Type::VoidTy; + ResultTy = Type::getVoidTy(Context); break; case bitc::TYPE_CODE_FLOAT: // FLOAT - ResultTy = Type::FloatTy; + ResultTy = Type::getFloatTy(Context); break; case bitc::TYPE_CODE_DOUBLE: // DOUBLE - ResultTy = Type::DoubleTy; + ResultTy = Type::getDoubleTy(Context); break; case bitc::TYPE_CODE_X86_FP80: // X86_FP80 - ResultTy = Type::X86_FP80Ty; + ResultTy = Type::getX86_FP80Ty(Context); break; case bitc::TYPE_CODE_FP128: // FP128 - ResultTy = Type::FP128Ty; + ResultTy = Type::getFP128Ty(Context); break; case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128 - ResultTy = Type::PPC_FP128Ty; + ResultTy = Type::getPPC_FP128Ty(Context); break; case bitc::TYPE_CODE_LABEL: // LABEL - ResultTy = Type::LabelTy; + ResultTy = Type::getLabelTy(Context); break; case bitc::TYPE_CODE_OPAQUE: // OPAQUE ResultTy = 0; break; case bitc::TYPE_CODE_METADATA: // METADATA - ResultTy = Type::MetadataTy; + ResultTy = Type::getMetadataTy(Context); break; case bitc::TYPE_CODE_INTEGER: // INTEGER: [width] if (Record.size() < 1) return Error("Invalid Integer type record"); - - ResultTy = IntegerType::get(Record[0]); + + ResultTy = IntegerType::get(Context, Record[0]); break; - case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or + case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or // [pointee type, address space] if (Record.size() < 1) return Error("Invalid POINTER type record"); unsigned AddressSpace = 0; if (Record.size() == 2) AddressSpace = Record[1]; - ResultTy = PointerType::get(getTypeByID(Record[0], true), AddressSpace); + ResultTy = PointerType::get(getTypeByID(Record[0], true), + AddressSpace); break; } case bitc::TYPE_CODE_FUNCTION: { @@ -528,7 +572,7 @@ bool BitcodeReader::ParseTypeTable() { std::vector ArgTys; for (unsigned i = 3, e = Record.size(); i != e; ++i) ArgTys.push_back(getTypeByID(Record[i], true)); - + ResultTy = FunctionType::get(getTypeByID(Record[2], true), ArgTys, Record[0]); break; @@ -539,7 +583,7 @@ bool BitcodeReader::ParseTypeTable() { std::vector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) EltTys.push_back(getTypeByID(Record[i], true)); - ResultTy = StructType::get(EltTys, Record[0]); + ResultTy = StructType::get(Context, EltTys, Record[0]); break; } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] @@ -553,10 +597,10 @@ bool BitcodeReader::ParseTypeTable() { ResultTy = VectorType::get(getTypeByID(Record[1], true), Record[0]); break; } - + if (NumRecords == TypeList.size()) { // If this is a new type slot, just append it. - TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get()); + TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get(Context)); ++NumRecords; } else if (ResultTy == 0) { // Otherwise, this was forward referenced, so an opaque type was created, @@ -568,14 +612,14 @@ bool BitcodeReader::ParseTypeTable() { // Resolve the opaque type to the real type now. assert(NumRecords < TypeList.size() && "Typelist imbalance"); const OpaqueType *OldTy = cast(TypeList[NumRecords++].get()); - + // Don't directly push the new type on the Tab. Instead we want to replace // the opaque type we previously inserted with the new concrete value. The // refinement from the abstract (opaque) type to the new type causes all // uses of the abstract type to use the concrete type (NewTy). This will // also cause the opaque type to be deleted. const_cast(OldTy)->refineAbstractTypeTo(ResultTy); - + // This should have replaced the old opaque type with the new type in the // value table... or with a preexisting type that was already in the // system. Let's just make sure it did. @@ -589,9 +633,9 @@ bool BitcodeReader::ParseTypeTable() { bool BitcodeReader::ParseTypeSymbolTable() { if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID)) return Error("Malformed block record"); - + SmallVector Record; - + // Read all the records for this type table. std::string TypeName; while (1) { @@ -601,7 +645,7 @@ bool BitcodeReader::ParseTypeSymbolTable() { return Error("Error at end of type symbol table block"); return false; } - + if (Code == bitc::ENTER_SUBBLOCK) { // No known subblocks, always skip them. Stream.ReadSubBlockID(); @@ -609,12 +653,12 @@ bool BitcodeReader::ParseTypeSymbolTable() { return Error("Malformed block record"); continue; } - + if (Code == bitc::DEFINE_ABBREV) { Stream.ReadAbbrevRecord(); continue; } - + // Read a record. Record.clear(); switch (Stream.ReadRecord(Code, Record)) { @@ -639,7 +683,7 @@ bool BitcodeReader::ParseValueSymbolTable() { return Error("Malformed block record"); SmallVector Record; - + // Read all the records for this value table. SmallString<128> ValueName; while (1) { @@ -648,7 +692,7 @@ bool BitcodeReader::ParseValueSymbolTable() { if (Stream.ReadBlockEnd()) return Error("Error at end of value symbol table block"); return false; - } + } if (Code == bitc::ENTER_SUBBLOCK) { // No known subblocks, always skip them. Stream.ReadSubBlockID(); @@ -656,12 +700,12 @@ bool BitcodeReader::ParseValueSymbolTable() { return Error("Malformed block record"); continue; } - + if (Code == bitc::DEFINE_ABBREV) { Stream.ReadAbbrevRecord(); continue; } - + // Read a record. Record.clear(); switch (Stream.ReadRecord(Code, Record)) { @@ -674,8 +718,8 @@ bool BitcodeReader::ParseValueSymbolTable() { if (ValueID >= ValueList.size()) return Error("Invalid Value ID in VST_ENTRY record"); Value *V = ValueList[ValueID]; - - V->setName(&ValueName[0], ValueName.size()); + + V->setName(StringRef(ValueName.data(), ValueName.size())); ValueName.clear(); break; } @@ -685,8 +729,8 @@ bool BitcodeReader::ParseValueSymbolTable() { BasicBlock *BB = getBasicBlock(Record[0]); if (BB == 0) return Error("Invalid BB ID in VST_BBENTRY record"); - - BB->setName(&ValueName[0], ValueName.size()); + + BB->setName(StringRef(ValueName.data(), ValueName.size())); ValueName.clear(); break; } @@ -694,12 +738,121 @@ bool BitcodeReader::ParseValueSymbolTable() { } } +bool BitcodeReader::ParseMetadata() { + unsigned NextValueNo = MDValueList.size(); + + if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) + return Error("Malformed block record"); + + SmallVector Record; + + // Read all the records. + while (1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of PARAMATTR block"); + return false; + } + + if (Code == bitc::ENTER_SUBBLOCK) { + // No known subblocks, always skip them. + Stream.ReadSubBlockID(); + if (Stream.SkipBlock()) + return Error("Malformed block record"); + continue; + } + + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + + // Read a record. + Record.clear(); + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: ignore. + break; + case bitc::METADATA_NAME: { + // Read named of the named metadata. + unsigned NameLength = Record.size(); + SmallString<8> Name; + Name.resize(NameLength); + for (unsigned i = 0; i != NameLength; ++i) + Name[i] = Record[i]; + Record.clear(); + Code = Stream.ReadCode(); + + // METADATA_NAME is always followed by METADATA_NAMED_NODE. + if (Stream.ReadRecord(Code, Record) != bitc::METADATA_NAMED_NODE) + assert ( 0 && "Inavlid Named Metadata record"); + + // Read named metadata elements. + unsigned Size = Record.size(); + SmallVector Elts; + for (unsigned i = 0; i != Size; ++i) { + Value *MD = MDValueList.getValueFwdRef(Record[i]); + if (MetadataBase *B = dyn_cast(MD)) + Elts.push_back(B); + } + Value *V = NamedMDNode::Create(Context, Name.str(), Elts.data(), + Elts.size(), TheModule); + MDValueList.AssignValue(V, NextValueNo++); + break; + } + case bitc::METADATA_NODE: { + if (Record.empty() || Record.size() % 2 == 1) + return Error("Invalid METADATA_NODE record"); + + unsigned Size = Record.size(); + SmallVector Elts; + for (unsigned i = 0; i != Size; i += 2) { + const Type *Ty = getTypeByID(Record[i], false); + if (Ty->isMetadataTy()) + Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); + else if (Ty != Type::getVoidTy(Context)) + Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty)); + else + Elts.push_back(NULL); + } + Value *V = MDNode::get(Context, &Elts[0], Elts.size()); + MDValueList.AssignValue(V, NextValueNo++); + break; + } + case bitc::METADATA_STRING: { + unsigned MDStringLength = Record.size(); + SmallString<8> String; + String.resize(MDStringLength); + for (unsigned i = 0; i != MDStringLength; ++i) + String[i] = Record[i]; + Value *V = MDString::get(Context, + StringRef(String.data(), String.size())); + MDValueList.AssignValue(V, NextValueNo++); + break; + } + case bitc::METADATA_KIND: { + unsigned RecordLength = Record.size(); + if (Record.empty() || RecordLength < 2) + return Error("Invalid METADATA_KIND record"); + SmallString<8> Name; + Name.resize(RecordLength-1); + unsigned Kind = Record[0]; + for (unsigned i = 1; i != RecordLength; ++i) + Name[i-1] = Record[i]; + MetadataContext &TheMetadata = Context.getMetadata(); + TheMetadata.MDHandlerNames[Name.str()] = Kind; + break; + } + } + } +} + /// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in /// the LSB for dense VBR encoding. static uint64_t DecodeSignRotatedValue(uint64_t V) { if ((V & 1) == 0) return V >> 1; - if (V != 1) + if (V != 1) return -(V >> 1); // There is no such thing as -0 with integers. "-0" really means MININT. return 1ULL << 63; @@ -710,7 +863,7 @@ static uint64_t DecodeSignRotatedValue(uint64_t V) { bool BitcodeReader::ResolveGlobalAndAliasInits() { std::vector > GlobalInitWorklist; std::vector > AliasInitWorklist; - + GlobalInitWorklist.swap(GlobalInits); AliasInitWorklist.swap(AliasInits); @@ -725,7 +878,7 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() { else return Error("Global variable initializer is not a constant!"); } - GlobalInitWorklist.pop_back(); + GlobalInitWorklist.pop_back(); } while (!AliasInitWorklist.empty()) { @@ -738,26 +891,25 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() { else return Error("Alias initializer is not a constant!"); } - AliasInitWorklist.pop_back(); + AliasInitWorklist.pop_back(); } return false; } - bool BitcodeReader::ParseConstants() { if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID)) return Error("Malformed block record"); SmallVector Record; - + // Read all the records for this value table. - const Type *CurTy = Type::Int32Ty; + const Type *CurTy = Type::getInt32Ty(Context); unsigned NextCstNo = ValueList.size(); while (1) { unsigned Code = Stream.ReadCode(); if (Code == bitc::END_BLOCK) break; - + if (Code == bitc::ENTER_SUBBLOCK) { // No known subblocks, always skip them. Stream.ReadSubBlockID(); @@ -765,16 +917,17 @@ bool BitcodeReader::ParseConstants() { return Error("Malformed block record"); continue; } - + if (Code == bitc::DEFINE_ABBREV) { Stream.ReadAbbrevRecord(); continue; } - + // Read a record. Record.clear(); Value *V = 0; - switch (Stream.ReadRecord(Code, Record)) { + unsigned BitCode = Stream.ReadRecord(Code, Record); + switch (BitCode) { default: // Default behavior: unknown constant case bitc::CST_CODE_UNDEF: // UNDEF V = UndefValue::get(CurTy); @@ -797,45 +950,46 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] if (!isa(CurTy) || Record.empty()) return Error("Invalid WIDE_INTEGER record"); - + unsigned NumWords = Record.size(); SmallVector Words; Words.resize(NumWords); for (unsigned i = 0; i != NumWords; ++i) Words[i] = DecodeSignRotatedValue(Record[i]); - V = ConstantInt::get(APInt(cast(CurTy)->getBitWidth(), - NumWords, &Words[0])); + V = ConstantInt::get(Context, + APInt(cast(CurTy)->getBitWidth(), + NumWords, &Words[0])); break; } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) return Error("Invalid FLOAT record"); - if (CurTy == Type::FloatTy) - V = ConstantFP::get(APFloat(APInt(32, (uint32_t)Record[0]))); - else if (CurTy == Type::DoubleTy) - V = ConstantFP::get(APFloat(APInt(64, Record[0]))); - else if (CurTy == Type::X86_FP80Ty) { + if (CurTy->isFloatTy()) + V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0]))); + else if (CurTy->isDoubleTy()) + V = ConstantFP::get(Context, APFloat(APInt(64, Record[0]))); + else if (CurTy->isX86_FP80Ty()) { // Bits are not stored the same way as a normal i80 APInt, compensate. uint64_t Rearrange[2]; Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16); Rearrange[1] = Record[0] >> 48; - V = ConstantFP::get(APFloat(APInt(80, 2, Rearrange))); - } else if (CurTy == Type::FP128Ty) - V = ConstantFP::get(APFloat(APInt(128, 2, &Record[0]), true)); - else if (CurTy == Type::PPC_FP128Ty) - V = ConstantFP::get(APFloat(APInt(128, 2, &Record[0]))); + V = ConstantFP::get(Context, APFloat(APInt(80, 2, Rearrange))); + } else if (CurTy->isFP128Ty()) + V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]), true)); + else if (CurTy->isPPC_FP128Ty()) + V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]))); else V = UndefValue::get(CurTy); break; } - + case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number] if (Record.empty()) return Error("Invalid CST_AGGREGATE record"); - + unsigned Size = Record.size(); std::vector Elts; - + if (const StructType *STy = dyn_cast(CurTy)) { for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], @@ -862,7 +1016,7 @@ bool BitcodeReader::ParseConstants() { const ArrayType *ATy = cast(CurTy); const Type *EltTy = ATy->getElementType(); - + unsigned Size = Record.size(); std::vector Elts; for (unsigned i = 0; i != Size; ++i) @@ -873,10 +1027,10 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_CSTRING: { // CSTRING: [values] if (Record.empty()) return Error("Invalid CST_AGGREGATE record"); - + const ArrayType *ATy = cast(CurTy); const Type *EltTy = ATy->getElementType(); - + unsigned Size = Record.size(); std::vector Elts; for (unsigned i = 0; i != Size; ++i) @@ -893,10 +1047,24 @@ bool BitcodeReader::ParseConstants() { } else { Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy); Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy); - V = ConstantExpr::get(Opc, LHS, RHS); + unsigned Flags = 0; + if (Record.size() >= 4) { + if (Opc == Instruction::Add || + Opc == Instruction::Sub || + Opc == Instruction::Mul) { + if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP)) + Flags |= OverflowingBinaryOperator::NoSignedWrap; + if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP)) + Flags |= OverflowingBinaryOperator::NoUnsignedWrap; + } else if (Opc == Instruction::SDiv) { + if (Record[3] & (1 << bitc::SDIV_EXACT)) + Flags |= SDivOperator::IsExact; + } + } + V = ConstantExpr::get(Opc, LHS, RHS, Flags); } break; - } + } case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval] if (Record.size() < 3) return Error("Invalid CE_CAST record"); int Opc = GetDecodedCastOpcode(Record[0]); @@ -909,7 +1077,8 @@ bool BitcodeReader::ParseConstants() { V = ConstantExpr::getCast(Opc, Op, CurTy); } break; - } + } + case bitc::CST_CODE_CE_INBOUNDS_GEP: case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands] if (Record.size() & 1) return Error("Invalid CE_GEP record"); SmallVector Elts; @@ -918,23 +1087,28 @@ bool BitcodeReader::ParseConstants() { if (!ElTy) return Error("Invalid CE_GEP record"); Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy)); } - V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1], Elts.size()-1); + if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP) + V = ConstantExpr::getInBoundsGetElementPtr(Elts[0], &Elts[1], + Elts.size()-1); + else + V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1], + Elts.size()-1); break; } case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#] if (Record.size() < 3) return Error("Invalid CE_SELECT record"); V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0], - Type::Int1Ty), + Type::getInt1Ty(Context)), ValueList.getConstantFwdRef(Record[1],CurTy), ValueList.getConstantFwdRef(Record[2],CurTy)); break; case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval] if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record"); - const VectorType *OpTy = + const VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); - Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::Int32Ty); + Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); V = ConstantExpr::getExtractElement(Op0, Op1); break; } @@ -945,7 +1119,7 @@ bool BitcodeReader::ParseConstants() { Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy->getElementType()); - Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::Int32Ty); + Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); V = ConstantExpr::getInsertElement(Op0, Op1, Op2); break; } @@ -955,7 +1129,8 @@ bool BitcodeReader::ParseConstants() { return Error("Invalid CE_SHUFFLEVEC record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy); - const Type *ShufTy=VectorType::get(Type::Int32Ty, OpTy->getNumElements()); + const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), + OpTy->getNumElements()); Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy); V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); break; @@ -967,7 +1142,8 @@ bool BitcodeReader::ParseConstants() { return Error("Invalid CE_SHUFVEC_EX record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); - const Type *ShufTy=VectorType::get(Type::Int32Ty, RTy->getNumElements()); + const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), + RTy->getNumElements()); Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy); V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); break; @@ -981,72 +1157,43 @@ bool BitcodeReader::ParseConstants() { if (OpTy->isFloatingPoint()) V = ConstantExpr::getFCmp(Record[3], Op0, Op1); - else if (!isa(OpTy)) - V = ConstantExpr::getICmp(Record[3], Op0, Op1); - else if (OpTy->isFPOrFPVector()) - V = ConstantExpr::getVFCmp(Record[3], Op0, Op1); else - V = ConstantExpr::getVICmp(Record[3], Op0, Op1); + V = ConstantExpr::getICmp(Record[3], Op0, Op1); break; } case bitc::CST_CODE_INLINEASM: { if (Record.size() < 2) return Error("Invalid INLINEASM record"); std::string AsmStr, ConstrStr; - bool HasSideEffects = Record[0]; + bool HasSideEffects = Record[0] & 1; + bool IsMsAsm = Record[0] >> 1; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) return Error("Invalid INLINEASM record"); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) return Error("Invalid INLINEASM record"); - + for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; const PointerType *PTy = cast(CurTy); V = InlineAsm::get(cast(PTy->getElementType()), - AsmStr, ConstrStr, HasSideEffects); - break; - } - case bitc::CST_CODE_MDSTRING: { - if (Record.size() < 2) return Error("Invalid MDSTRING record"); - unsigned MDStringLength = Record.size(); - SmallString<8> String; - String.resize(MDStringLength); - for (unsigned i = 0; i != MDStringLength; ++i) - String[i] = Record[i]; - V = MDString::get(String.c_str(), String.c_str() + MDStringLength); - break; - } - case bitc::CST_CODE_MDNODE: { - if (Record.empty() || Record.size() % 2 == 1) - return Error("Invalid CST_MDNODE record"); - - unsigned Size = Record.size(); - SmallVector Elts; - for (unsigned i = 0; i != Size; i += 2) { - const Type *Ty = getTypeByID(Record[i], false); - if (Ty != Type::VoidTy) - Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty)); - else - Elts.push_back(NULL); - } - V = MDNode::get(&Elts[0], Elts.size()); + AsmStr, ConstrStr, HasSideEffects, IsMsAsm); break; } } - + ValueList.AssignValue(V, NextCstNo); ++NextCstNo; } - + if (NextCstNo != ValueList.size()) return Error("Invalid constant reference!"); - + if (Stream.ReadBlockEnd()) return Error("Error at end of constants block"); - + // Once all the constants have been read, go through and resolve forward // references. ValueList.ResolveConstantForwardRefs(); @@ -1060,18 +1207,18 @@ bool BitcodeReader::RememberAndSkipFunctionBody() { // Get the function we are talking about. if (FunctionsWithBodies.empty()) return Error("Insufficient function protos"); - + Function *Fn = FunctionsWithBodies.back(); FunctionsWithBodies.pop_back(); - + // Save the current stream state. uint64_t CurBit = Stream.GetCurrentBitNo(); DeferredFunctionInfo[Fn] = std::make_pair(CurBit, Fn->getLinkage()); - + // Set the functions linkage to GhostLinkage so we know it is lazily // deserialized. Fn->setLinkage(GlobalValue::GhostLinkage); - + // Skip over the function block for now. if (Stream.SkipBlock()) return Error("Malformed block record"); @@ -1082,13 +1229,13 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { // Reject multiple MODULE_BLOCK's in a single bitstream. if (TheModule) return Error("Multiple MODULE_BLOCKs in same stream"); - + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return Error("Malformed block record"); // Otherwise, create the module. TheModule = new Module(ModuleID, Context); - + SmallVector Record; std::vector SectionTable; std::vector GCTable; @@ -1122,7 +1269,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { std::vector().swap(FunctionsWithBodies); return false; } - + if (Code == bitc::ENTER_SUBBLOCK) { switch (Stream.ReadSubBlockID()) { default: // Skip unknown content. @@ -1153,6 +1300,10 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { if (ParseConstants() || ResolveGlobalAndAliasInits()) return true; break; + case bitc::METADATA_BLOCK_ID: + if (ParseMetadata()) + return true; + break; case bitc::FUNCTION_BLOCK_ID: // If this is the first function body we've seen, reverse the // FunctionsWithBodies list. @@ -1160,19 +1311,19 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end()); HasReversedFunctionsWithBodies = true; } - + if (RememberAndSkipFunctionBody()) return true; break; } continue; } - + if (Code == bitc::DEFINE_ABBREV) { Stream.ReadAbbrevRecord(); continue; } - + // Read a record. switch (Stream.ReadRecord(Code, Record)) { default: break; // Default behavior, ignore unknown content. @@ -1235,7 +1386,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { return Error("Global not a pointer type!"); unsigned AddressSpace = cast(Ty)->getAddressSpace(); Ty = cast(Ty)->getElementType(); - + bool isConstant = Record[1]; GlobalValue::LinkageTypes Linkage = GetDecodedLinkage(Record[3]); unsigned Alignment = (1 << Record[4]) >> 1; @@ -1253,16 +1404,16 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { isThreadLocal = Record[7]; GlobalVariable *NewGV = - new GlobalVariable(Ty, isConstant, Linkage, 0, "", TheModule, + new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0, isThreadLocal, AddressSpace); NewGV->setAlignment(Alignment); if (!Section.empty()) NewGV->setSection(Section); NewGV->setVisibility(Visibility); NewGV->setThreadLocal(isThreadLocal); - + ValueList.push_back(NewGV); - + // Remember which value to use for the global initializer. if (unsigned InitID = Record[2]) GlobalInits.push_back(std::make_pair(NewGV, InitID-1)); @@ -1284,11 +1435,11 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule); - Func->setCallingConv(Record[1]); + Func->setCallingConv(static_cast(Record[1])); bool isProto = Record[2]; Func->setLinkage(GetDecodedLinkage(Record[3])); Func->setAttributes(getAttributes(Record[4])); - + Func->setAlignment((1 << Record[5]) >> 1); if (Record[6]) { if (Record[6]-1 >= SectionTable.size()) @@ -1302,7 +1453,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { Func->setGC(GCTable[Record[8]-1].c_str()); } ValueList.push_back(Func); - + // If this is a function with a body, remember the prototype we are // creating now, so that we can match up the body with them later. if (!isProto) @@ -1317,7 +1468,7 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { const Type *Ty = getTypeByID(Record[0]); if (!isa(Ty)) return Error("Function not a pointer type!"); - + GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]), "", 0, TheModule); // Old bitcode files didn't have visibility field. @@ -1337,28 +1488,28 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { } Record.clear(); } - + return Error("Premature end of bitstream"); } bool BitcodeReader::ParseBitcode() { TheModule = 0; - + if (Buffer->getBufferSize() & 3) return Error("Bitcode stream should be a multiple of 4 bytes in length"); - + unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); - + // If we have a wrapper header, parse it and ignore the non-bc file contents. // The magic number is 0x0B17C0DE stored in little endian. if (isBitcodeWrapper(BufPtr, BufEnd)) if (SkipBitcodeWrapperHeader(BufPtr, BufEnd)) return Error("Invalid bitcode wrapper header"); - + StreamFile.init(BufPtr, BufEnd); Stream.init(StreamFile); - + // Sniff for the signature. if (Stream.Read(8) != 'B' || Stream.Read(8) != 'C' || @@ -1367,17 +1518,17 @@ bool BitcodeReader::ParseBitcode() { Stream.Read(4) != 0xE || Stream.Read(4) != 0xD) return Error("Invalid bitcode signature"); - + // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (!Stream.AtEndOfStream()) { unsigned Code = Stream.ReadCode(); - + if (Code != bitc::ENTER_SUBBLOCK) return Error("Invalid record at top-level"); - + unsigned BlockID = Stream.ReadSubBlockID(); - + // We only know the MODULE subblock ID. switch (BlockID) { case bitc::BLOCKINFO_BLOCK_ID: @@ -1394,22 +1545,61 @@ bool BitcodeReader::ParseBitcode() { break; } } - + return false; } +/// ParseMetadataAttachment - Parse metadata attachments. +bool BitcodeReader::ParseMetadataAttachment() { + if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID)) + return Error("Malformed block record"); + + MetadataContext &TheMetadata = Context.getMetadata(); + SmallVector Record; + while(1) { + unsigned Code = Stream.ReadCode(); + if (Code == bitc::END_BLOCK) { + if (Stream.ReadBlockEnd()) + return Error("Error at end of PARAMATTR block"); + break; + } + if (Code == bitc::DEFINE_ABBREV) { + Stream.ReadAbbrevRecord(); + continue; + } + // Read a metadata attachment record. + Record.clear(); + switch (Stream.ReadRecord(Code, Record)) { + default: // Default behavior: ignore. + break; + case bitc::METADATA_ATTACHMENT: { + unsigned RecordLength = Record.size(); + if (Record.empty() || (RecordLength - 1) % 2 == 1) + return Error ("Invalid METADATA_ATTACHMENT reader!"); + Instruction *Inst = InstructionList[Record[0]]; + for (unsigned i = 1; i != RecordLength; i = i+2) { + unsigned Kind = Record[i]; + Value *Node = MDValueList.getValueFwdRef(Record[i+1]); + TheMetadata.addMD(Kind, cast(Node), Inst); + } + break; + } + } + } + return false; +} /// ParseFunctionBody - Lazily parse the specified function body block. bool BitcodeReader::ParseFunctionBody(Function *F) { if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID)) return Error("Malformed block record"); - + unsigned ModuleValueListSize = ValueList.size(); - + // Add all the function arguments to the value table. for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) ValueList.push_back(I); - + unsigned NextValueNo = ValueList.size(); BasicBlock *CurBB = 0; unsigned CurBBNo = 0; @@ -1423,7 +1613,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { return Error("Error at end of function block"); break; } - + if (Code == bitc::ENTER_SUBBLOCK) { switch (Stream.ReadSubBlockID()) { default: // Skip unknown content. @@ -1437,19 +1627,23 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::VALUE_SYMTAB_BLOCK_ID: if (ParseValueSymbolTable()) return true; break; + case bitc::METADATA_ATTACHMENT_ID: + if (ParseMetadataAttachment()) return true; + break; } continue; } - + if (Code == bitc::DEFINE_ABBREV) { Stream.ReadAbbrevRecord(); continue; } - + // Read a record. Record.clear(); Instruction *I = 0; - switch (Stream.ReadRecord(Code, Record)) { + unsigned BitCode = Stream.ReadRecord(Code, Record); + switch (BitCode) { default: // Default behavior: reject return Error("Unknown instruction"); case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks] @@ -1458,21 +1652,35 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { // Create all the basic blocks for the function. FunctionBBs.resize(Record[0]); for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) - FunctionBBs[i] = BasicBlock::Create("", F); + FunctionBBs[i] = BasicBlock::Create(Context, "", F); CurBB = FunctionBBs[0]; continue; - + case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode] unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || getValue(Record, OpNum, LHS->getType(), RHS) || - OpNum+1 != Record.size()) + OpNum+1 > Record.size()) return Error("Invalid BINOP record"); - - int Opc = GetDecodedBinaryOpcode(Record[OpNum], LHS->getType()); + + int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType()); if (Opc == -1) return Error("Invalid BINOP record"); I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); + InstructionList.push_back(I); + if (OpNum < Record.size()) { + if (Opc == Instruction::Add || + Opc == Instruction::Sub || + Opc == Instruction::Mul) { + if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP)) + cast(I)->setHasNoSignedWrap(true); + if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP)) + cast(I)->setHasNoUnsignedWrap(true); + } else if (Opc == Instruction::SDiv) { + if (Record[3] & (1 << bitc::SDIV_EXACT)) + cast(I)->setIsExact(true); + } + } break; } case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc] @@ -1481,14 +1689,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+2 != Record.size()) return Error("Invalid CAST record"); - + const Type *ResTy = getTypeByID(Record[OpNum]); int Opc = GetDecodedCastOpcode(Record[OpNum+1]); if (Opc == -1 || ResTy == 0) return Error("Invalid CAST record"); I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy); + InstructionList.push_back(I); break; } + case bitc::FUNC_CODE_INST_INBOUNDS_GEP: case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands] unsigned OpNum = 0; Value *BasePtr; @@ -1504,9 +1714,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end()); + InstructionList.push_back(I); + if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP) + cast(I)->setIsInBounds(true); break; } - + case bitc::FUNC_CODE_INST_EXTRACTVAL: { // EXTRACTVAL: [opty, opval, n x indices] unsigned OpNum = 0; @@ -1525,9 +1738,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = ExtractValueInst::Create(Agg, EXTRACTVALIdx.begin(), EXTRACTVALIdx.end()); + InstructionList.push_back(I); break; } - + case bitc::FUNC_CODE_INST_INSERTVAL: { // INSERTVAL: [opty, opval, opty, opval, n x indices] unsigned OpNum = 0; @@ -1549,9 +1763,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = InsertValueInst::Create(Agg, Val, INSERTVALIdx.begin(), INSERTVALIdx.end()); + InstructionList.push_back(I); break; } - + case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval] // obsolete form of select // handles select i1 ... in old bitcode @@ -1559,13 +1774,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || getValue(Record, OpNum, TrueVal->getType(), FalseVal) || - getValue(Record, OpNum, Type::Int1Ty, Cond)) + getValue(Record, OpNum, Type::getInt1Ty(Context), Cond)) return Error("Invalid SELECT record"); - + I = SelectInst::Create(Cond, TrueVal, FalseVal); + InstructionList.push_back(I); break; } - + case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred] // new form of select // handles select i1 or select [N x i1] @@ -1580,40 +1796,43 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (const VectorType* vector_type = dyn_cast(Cond->getType())) { // expect - if (vector_type->getElementType() != Type::Int1Ty) + if (vector_type->getElementType() != Type::getInt1Ty(Context)) return Error("Invalid SELECT condition type"); } else { // expect i1 - if (Cond->getType() != Type::Int1Ty) + if (Cond->getType() != Type::getInt1Ty(Context)) return Error("Invalid SELECT condition type"); - } - + } + I = SelectInst::Create(Cond, TrueVal, FalseVal); + InstructionList.push_back(I); break; } - + case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval] unsigned OpNum = 0; Value *Vec, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || - getValue(Record, OpNum, Type::Int32Ty, Idx)) + getValue(Record, OpNum, Type::getInt32Ty(Context), Idx)) return Error("Invalid EXTRACTELT record"); - I = new ExtractElementInst(Vec, Idx); + I = ExtractElementInst::Create(Vec, Idx); + InstructionList.push_back(I); break; } - + case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval] unsigned OpNum = 0; Value *Vec, *Elt, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || - getValue(Record, OpNum, + getValue(Record, OpNum, cast(Vec->getType())->getElementType(), Elt) || - getValue(Record, OpNum, Type::Int32Ty, Idx)) + getValue(Record, OpNum, Type::getInt32Ty(Context), Idx)) return Error("Invalid INSERTELT record"); I = InsertElementInst::Create(Vec, Elt, Idx); + InstructionList.push_back(I); break; } - + case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval] unsigned OpNum = 0; Value *Vec1, *Vec2, *Mask; @@ -1624,44 +1843,32 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) return Error("Invalid SHUFFLEVEC record"); I = new ShuffleVectorInst(Vec1, Vec2, Mask); + InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_CMP: { // CMP: [opty, opval, opval, pred] - // VFCmp/VICmp - // or old form of ICmp/FCmp returning bool - unsigned OpNum = 0; - Value *LHS, *RHS; - if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || - getValue(Record, OpNum, LHS->getType(), RHS) || - OpNum+1 != Record.size()) - return Error("Invalid CMP record"); - - if (LHS->getType()->isFloatingPoint()) - I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); - else if (!isa(LHS->getType())) - I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); - else if (LHS->getType()->isFPOrFPVector()) - I = new VFCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); - else - I = new VICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); - break; - } + case bitc::FUNC_CODE_INST_CMP: // CMP: [opty, opval, opval, pred] + // Old form of ICmp/FCmp returning bool + // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were + // both legal on vectors but had different behaviour. case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred] - // Fcmp/ICmp returning bool or vector of bool + // FCmp/ICmp returning bool or vector of bool + unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || getValue(Record, OpNum, LHS->getType(), RHS) || OpNum+1 != Record.size()) - return Error("Invalid CMP2 record"); - + return Error("Invalid CMP record"); + if (LHS->getType()->isFPOrFPVector()) I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); - else + else I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS); + InstructionList.push_back(I); break; } + case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n] if (Record.size() != 2) return Error("Invalid GETRESULT record"); @@ -1670,14 +1877,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { getValueTypePair(Record, OpNum, NextValueNo, Op); unsigned Index = Record[1]; I = ExtractValueInst::Create(Op, Index); + InstructionList.push_back(I); break; } - + case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval] { unsigned Size = Record.size(); if (Size == 0) { - I = ReturnInst::Create(); + I = ReturnInst::Create(Context); + InstructionList.push_back(I); break; } @@ -1697,15 +1906,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *RV = UndefValue::get(ReturnType); for (unsigned i = 0, e = Vs.size(); i != e; ++i) { I = InsertValueInst::Create(RV, Vs[i], i, "mrv"); + InstructionList.push_back(I); CurBB->getInstList().push_back(I); ValueList.AssignValue(I, NextValueNo++); RV = I; } - I = ReturnInst::Create(RV); + I = ReturnInst::Create(Context, RV); + InstructionList.push_back(I); break; } - I = ReturnInst::Create(Vs[0]); + I = ReturnInst::Create(Context, Vs[0]); + InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#] @@ -1715,14 +1927,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (TrueDest == 0) return Error("Invalid BR record"); - if (Record.size() == 1) + if (Record.size() == 1) { I = BranchInst::Create(TrueDest); + InstructionList.push_back(I); + } else { BasicBlock *FalseDest = getBasicBlock(Record[1]); - Value *Cond = getFnValueByID(Record[2], Type::Int1Ty); + Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context)); if (FalseDest == 0 || Cond == 0) return Error("Invalid BR record"); I = BranchInst::Create(TrueDest, FalseDest, Cond); + InstructionList.push_back(I); } break; } @@ -1736,8 +1951,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { return Error("Invalid SWITCH record"); unsigned NumCases = (Record.size()-3)/2; SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); + InstructionList.push_back(SI); for (unsigned i = 0, e = NumCases; i != e; ++i) { - ConstantInt *CaseVal = + ConstantInt *CaseVal = dyn_cast_or_null(getFnValueByID(Record[3+i*2], OpTy)); BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]); if (CaseVal == 0 || DestBB == 0) { @@ -1749,7 +1965,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = SI; break; } - + case bitc::FUNC_CODE_INST_INVOKE: { // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...] if (Record.size() < 4) return Error("Invalid INVOKE record"); @@ -1757,12 +1973,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned CCInfo = Record[1]; BasicBlock *NormalBB = getBasicBlock(Record[2]); BasicBlock *UnwindBB = getBasicBlock(Record[3]); - + unsigned OpNum = 4; Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) return Error("Invalid INVOKE record"); - + const PointerType *CalleeTy = dyn_cast(Callee->getType()); const FunctionType *FTy = !CalleeTy ? 0 : dyn_cast(CalleeTy->getElementType()); @@ -1771,13 +1987,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 || Record.size() < OpNum+FTy->getNumParams()) return Error("Invalid INVOKE record"); - + SmallVector Ops; for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); if (Ops.back() == 0) return Error("Invalid INVOKE record"); } - + if (!FTy->isVarArg()) { if (Record.size() != OpNum) return Error("Invalid INVOKE record"); @@ -1790,28 +2006,33 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Ops.push_back(Op); } } - + I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops.begin(), Ops.end()); - cast(I)->setCallingConv(CCInfo); + InstructionList.push_back(I); + cast(I)->setCallingConv( + static_cast(CCInfo)); cast(I)->setAttributes(PAL); break; } case bitc::FUNC_CODE_INST_UNWIND: // UNWIND - I = new UnwindInst(); + I = new UnwindInst(Context); + InstructionList.push_back(I); break; case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE - I = new UnreachableInst(); + I = new UnreachableInst(Context); + InstructionList.push_back(I); break; case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] if (Record.size() < 1 || ((Record.size()-1)&1)) return Error("Invalid PHI record"); const Type *Ty = getTypeByID(Record[0]); if (!Ty) return Error("Invalid PHI record"); - + PHINode *PN = PHINode::Create(Ty); + InstructionList.push_back(PN); PN->reserveOperandSpace((Record.size()-1)/2); - + for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { Value *V = getFnValueByID(Record[1+i], Ty); BasicBlock *BB = getBasicBlock(Record[2+i]); @@ -1821,16 +2042,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = PN; break; } - + case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align] if (Record.size() < 3) return Error("Invalid MALLOC record"); const PointerType *Ty = dyn_cast_or_null(getTypeByID(Record[0])); - Value *Size = getFnValueByID(Record[1], Type::Int32Ty); + Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context)); unsigned Align = Record[2]; if (!Ty || !Size) return Error("Invalid MALLOC record"); I = new MallocInst(Ty->getElementType(), Size, (1 << Align) >> 1); + InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty] @@ -1840,6 +2062,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { OpNum != Record.size()) return Error("Invalid FREE record"); I = new FreeInst(Op); + InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, op, align] @@ -1847,10 +2070,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { return Error("Invalid ALLOCA record"); const PointerType *Ty = dyn_cast_or_null(getTypeByID(Record[0])); - Value *Size = getFnValueByID(Record[1], Type::Int32Ty); + Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context)); unsigned Align = Record[2]; if (!Ty || !Size) return Error("Invalid ALLOCA record"); I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1); + InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol] @@ -1859,20 +2083,22 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+2 != Record.size()) return Error("Invalid LOAD record"); - + I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1); + InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || - getValue(Record, OpNum, + getValue(Record, OpNum, cast(Ptr->getType())->getElementType(), Val) || OpNum+2 != Record.size()) return Error("Invalid STORE record"); - + I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1); + InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol] @@ -1880,32 +2106,34 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Val) || - getValue(Record, OpNum, PointerType::getUnqual(Val->getType()), Ptr)|| + getValue(Record, OpNum, + PointerType::getUnqual(Val->getType()), Ptr)|| OpNum+2 != Record.size()) return Error("Invalid STORE record"); - + I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1); + InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CALL: { // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...] if (Record.size() < 3) return Error("Invalid CALL record"); - + AttrListPtr PAL = getAttributes(Record[0]); unsigned CCInfo = Record[1]; - + unsigned OpNum = 2; Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) return Error("Invalid CALL record"); - + const PointerType *OpTy = dyn_cast(Callee->getType()); const FunctionType *FTy = 0; if (OpTy) FTy = dyn_cast(OpTy->getElementType()); if (!FTy || Record.size() < FTy->getNumParams()+OpNum) return Error("Invalid CALL record"); - + SmallVector Args; // Read the fixed params. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { @@ -1915,7 +2143,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i))); if (Args.back() == 0) return Error("Invalid CALL record"); } - + // Read type/value pairs for varargs params. if (!FTy->isVarArg()) { if (OpNum != Record.size()) @@ -1928,9 +2156,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Args.push_back(Op); } } - + I = CallInst::Create(Callee, Args.begin(), Args.end()); - cast(I)->setCallingConv(CCInfo>>1); + InstructionList.push_back(I); + cast(I)->setCallingConv( + static_cast(CCInfo>>1)); cast(I)->setTailCall(CCInfo & 1); cast(I)->setAttributes(PAL); break; @@ -1944,6 +2174,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (!OpTy || !Op || !ResTy) return Error("Invalid VAARG record"); I = new VAArgInst(Op, ResTy); + InstructionList.push_back(I); break; } } @@ -1955,18 +2186,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { return Error("Invalid instruction with no BB"); } CurBB->getInstList().push_back(I); - + // If this was a terminator instruction, move to the next block. if (isa(I)) { ++CurBBNo; CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0; } - + // Non-void values get registered in the value table for future use. - if (I && I->getType() != Type::VoidTy) + if (I && I->getType() != Type::getVoidTy(Context)) ValueList.AssignValue(I, NextValueNo++); } - + // Check the function list for unresolved values. if (Argument *A = dyn_cast(ValueList.back())) { if (A->getParent() == 0) { @@ -1980,11 +2211,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { return Error("Never resolved value found in function!"); } } - + // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); std::vector().swap(FunctionBBs); - + return false; } @@ -1996,16 +2227,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) { // If it already is material, ignore the request. if (!F->hasNotBeenReadFromBitcode()) return false; - - DenseMap >::iterator DFII = + + DenseMap >::iterator DFII = DeferredFunctionInfo.find(F); assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); - + // Move the bit stream to the saved position of the deferred function body and // restore the real linkage type for the function. Stream.JumpToBit(DFII->second.first); F->setLinkage((GlobalValue::LinkageTypes)DFII->second.second); - + if (ParseFunctionBody(F)) { if (ErrInfo) *ErrInfo = ErrorString; return true; @@ -2022,7 +2253,7 @@ bool BitcodeReader::materializeFunction(Function *F, std::string *ErrInfo) { } } } - + return false; } @@ -2030,9 +2261,9 @@ void BitcodeReader::dematerializeFunction(Function *F) { // If this function isn't materialized, or if it is a proto, this is a noop. if (F->hasNotBeenReadFromBitcode() || F->isDeclaration()) return; - + assert(DeferredFunctionInfo.count(F) && "No info to read function later?"); - + // Just forget the function body, we can remat it later. F->deleteBody(); F->setLinkage(GlobalValue::GhostLinkage); @@ -2048,9 +2279,9 @@ Module *BitcodeReader::materializeModule(std::string *ErrInfo) { materializeFunction(F, ErrInfo)) return 0; - // Upgrade any intrinsic calls that slipped through (should not happen!) and - // delete the old functions to clean up. We can't do this unless the entire - // module is materialized because there could always be another function body + // Upgrade any intrinsic calls that slipped through (should not happen!) and + // delete the old functions to clean up. We can't do this unless the entire + // module is materialized because there could always be another function body // with calls to the old function. for (std::vector >::iterator I = UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) { @@ -2066,7 +2297,10 @@ Module *BitcodeReader::materializeModule(std::string *ErrInfo) { } } std::vector >().swap(UpgradedIntrinsics); - + + // Check debug info intrinsics. + CheckDebugInfoIntrinsics(TheModule); + return TheModule; } @@ -2096,7 +2330,7 @@ ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer, if (R->ParseBitcode()) { if (ErrMsg) *ErrMsg = R->getErrorString(); - + // Don't let the BitcodeReader dtor delete 'Buffer'. R->releaseMemoryBuffer(); delete R; @@ -2107,25 +2341,25 @@ ModuleProvider *llvm::getBitcodeModuleProvider(MemoryBuffer *Buffer, /// ParseBitcodeFile - Read the specified bitcode file, returning the module. /// If an error occurs, return null and fill in *ErrMsg if non-null. -Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, +Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context, std::string *ErrMsg){ BitcodeReader *R; - R = static_cast(getBitcodeModuleProvider(Buffer, Context, + R = static_cast(getBitcodeModuleProvider(Buffer, Context, ErrMsg)); if (!R) return 0; - + // Read in the entire module. Module *M = R->materializeModule(ErrMsg); // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether // there was an error. R->releaseMemoryBuffer(); - + // If there was no error, tell ModuleProvider not to delete it when its dtor // is run. if (M) M = R->releaseModule(ErrMsg); - + delete R; return M; } diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 662631bce9507..eefc7bdc28a8e 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -44,8 +44,9 @@ class BitcodeReaderValueList { /// number that holds the resolved value. typedef std::vector > ResolveConstantsTy; ResolveConstantsTy ResolveConstants; + LLVMContext& Context; public: - BitcodeReaderValueList() {} + BitcodeReaderValueList(LLVMContext& C) : Context(C) {} ~BitcodeReaderValueList() { assert(ResolveConstants.empty() && "Constants not resolved?"); } @@ -85,6 +86,41 @@ public: void ResolveConstantForwardRefs(); }; + +//===----------------------------------------------------------------------===// +// BitcodeReaderMDValueList Class +//===----------------------------------------------------------------------===// + +class BitcodeReaderMDValueList { + std::vector MDValuePtrs; + + LLVMContext& Context; +public: + BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {} + + // vector compatibility methods + unsigned size() const { return MDValuePtrs.size(); } + void resize(unsigned N) { MDValuePtrs.resize(N); } + void push_back(Value *V) { MDValuePtrs.push_back(V); } + void clear() { MDValuePtrs.clear(); } + Value *back() const { return MDValuePtrs.back(); } + void pop_back() { MDValuePtrs.pop_back(); } + bool empty() const { return MDValuePtrs.empty(); } + + Value *operator[](unsigned i) const { + assert(i < MDValuePtrs.size()); + return MDValuePtrs[i]; + } + + void shrinkTo(unsigned N) { + assert(N <= size() && "Invalid shrinkTo request!"); + MDValuePtrs.resize(N); + } + + Value *getValueFwdRef(unsigned Idx); + void AssignValue(Value *V, unsigned Idx); +}; + class BitcodeReader : public ModuleProvider { LLVMContext& Context; MemoryBuffer *Buffer; @@ -95,6 +131,9 @@ class BitcodeReader : public ModuleProvider { std::vector TypeList; BitcodeReaderValueList ValueList; + BitcodeReaderMDValueList MDValueList; + SmallVector InstructionList; + std::vector > GlobalInits; std::vector > AliasInits; @@ -126,7 +165,7 @@ class BitcodeReader : public ModuleProvider { DenseMap > DeferredFunctionInfo; public: explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext& C) - : Context(C), Buffer(buffer), ErrorString(0) { + : Context(C), Buffer(buffer), ErrorString(0), ValueList(C), MDValueList(C) { HasReversedFunctionsWithBodies = false; } ~BitcodeReader() { @@ -159,7 +198,10 @@ public: private: const Type *getTypeByID(unsigned ID, bool isTypeTable = false); Value *getFnValueByID(unsigned ID, const Type *Ty) { - return ValueList.getValueFwdRef(ID, Ty); + if (Ty == Type::getMetadataTy(Context)) + return MDValueList.getValueFwdRef(ID); + else + return ValueList.getValueFwdRef(ID, Ty); } BasicBlock *getBasicBlock(unsigned ID) const { if (ID >= FunctionBBs.size()) return 0; // Invalid ID @@ -209,6 +251,8 @@ private: bool RememberAndSkipFunctionBody(); bool ParseFunctionBody(Function *F); bool ResolveGlobalAndAliasInits(); + bool ParseMetadata(); + bool ParseMetadataAttachment(); }; } // End llvm namespace diff --git a/lib/Bitcode/Reader/Deserialize.cpp b/lib/Bitcode/Reader/Deserialize.cpp index 06da6ce727219..67607efae08a5 100644 --- a/lib/Bitcode/Reader/Deserialize.cpp +++ b/lib/Bitcode/Reader/Deserialize.cpp @@ -12,11 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Bitcode/Deserialize.h" - -#ifdef DEBUG_BACKPATCH -#include "llvm/Support/Streams.h" -#endif - +#include "llvm/Support/raw_ostream.h" using namespace llvm; Deserializer::Deserializer(BitstreamReader& stream) @@ -357,7 +353,7 @@ void Deserializer::RegisterPtr(const SerializedPtrID& PtrId, assert (!HasFinalPtr(E) && "Pointer already registered."); #ifdef DEBUG_BACKPATCH - llvm::cerr << "RegisterPtr: " << PtrId << " => " << Ptr << "\n"; + errs() << "RegisterPtr: " << PtrId << " => " << Ptr << "\n"; #endif SetPtr(E,Ptr); @@ -377,8 +373,8 @@ void Deserializer::ReadUIntPtr(uintptr_t& PtrRef, PtrRef = GetFinalPtr(E); #ifdef DEBUG_BACKPATCH - llvm::cerr << "ReadUintPtr: " << PtrId - << " <-- " << (void*) GetFinalPtr(E) << '\n'; + errs() << "ReadUintPtr: " << PtrId + << " <-- " << (void*) GetFinalPtr(E) << '\n'; #endif } else { @@ -386,7 +382,7 @@ void Deserializer::ReadUIntPtr(uintptr_t& PtrRef, "Client forbids backpatching for this pointer."); #ifdef DEBUG_BACKPATCH - llvm::cerr << "ReadUintPtr: " << PtrId << " (NO PTR YET)\n"; + errs() << "ReadUintPtr: " << PtrId << " (NO PTR YET)\n"; #endif // Register backpatch. Check the freelist for a BPNode. diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp index 8834964b040ce..7ed651b77e2ef 100644 --- a/lib/Bitcode/Writer/BitWriter.cpp +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -9,43 +9,31 @@ #include "llvm-c/BitWriter.h" #include "llvm/Bitcode/ReaderWriter.h" -#include - +#include "llvm/Support/raw_ostream.h" using namespace llvm; /*===-- Operations on modules ---------------------------------------------===*/ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { - std::ofstream OS(Path, std::ios_base::out|std::ios::trunc|std::ios::binary); - - if (!OS.fail()) - WriteBitcodeToFile(unwrap(M), OS); + std::string ErrorInfo; + raw_fd_ostream OS(Path, ErrorInfo, + raw_fd_ostream::F_Binary); - if (OS.fail()) + if (!ErrorInfo.empty()) return -1; + WriteBitcodeToFile(unwrap(M), OS); return 0; } #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4) #include -// FIXME: Control this with configure? Provide some portable abstraction in -// libSystem? As is, the user will just get a linker error if they use this on -// non-GCC. Some C++ stdlibs even have ofstream::ofstream(int fd). int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) { - __gnu_cxx::stdio_filebuf Buffer(FileHandle, std::ios_base::out | - std::ios::trunc | - std::ios::binary); - std::ostream OS(&Buffer); - - if (!OS.fail()) - WriteBitcodeToFile(unwrap(M), OS); - - if (OS.fail()) - return -1; + raw_fd_ostream OS(FileHandle, false); + WriteBitcodeToFile(unwrap(M), OS); return 0; } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 6dcddedef1caf..12a1f5ea5dc2e 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -19,12 +19,13 @@ #include "llvm/DerivedTypes.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" -#include "llvm/MDNode.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/TypeSymbolTable.h" #include "llvm/ValueSymbolTable.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Program.h" using namespace llvm; @@ -33,22 +34,23 @@ using namespace llvm; /// be kept in sync with the reader, but need to be consistent within this file. enum { CurVersion = 0, - + // VALUE_SYMTAB_BLOCK abbrev id's. VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV, VST_ENTRY_7_ABBREV, VST_ENTRY_6_ABBREV, VST_BBENTRY_6_ABBREV, - + // CONSTANTS_BLOCK abbrev id's. CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV, CONSTANTS_INTEGER_ABBREV, CONSTANTS_CE_CAST_Abbrev, CONSTANTS_NULL_Abbrev, - + // FUNCTION_BLOCK abbrev id's. FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV, FUNCTION_INST_BINOP_ABBREV, + FUNCTION_INST_BINOP_FLAGS_ABBREV, FUNCTION_INST_CAST_ABBREV, FUNCTION_INST_RET_VOID_ABBREV, FUNCTION_INST_RET_VAL_ABBREV, @@ -58,7 +60,7 @@ enum { static unsigned GetEncodedCastOpcode(unsigned Opcode) { switch (Opcode) { - default: assert(0 && "Unknown cast instruction!"); + default: llvm_unreachable("Unknown cast instruction!"); case Instruction::Trunc : return bitc::CAST_TRUNC; case Instruction::ZExt : return bitc::CAST_ZEXT; case Instruction::SExt : return bitc::CAST_SEXT; @@ -76,7 +78,7 @@ static unsigned GetEncodedCastOpcode(unsigned Opcode) { static unsigned GetEncodedBinaryOpcode(unsigned Opcode) { switch (Opcode) { - default: assert(0 && "Unknown binary instruction!"); + default: llvm_unreachable("Unknown binary instruction!"); case Instruction::Add: case Instruction::FAdd: return bitc::BINOP_ADD; case Instruction::Sub: @@ -100,24 +102,24 @@ static unsigned GetEncodedBinaryOpcode(unsigned Opcode) { -static void WriteStringRecord(unsigned Code, const std::string &Str, +static void WriteStringRecord(unsigned Code, const std::string &Str, unsigned AbbrevToUse, BitstreamWriter &Stream) { SmallVector Vals; - + // Code: [strchar x N] for (unsigned i = 0, e = Str.size(); i != e; ++i) Vals.push_back(Str[i]); - + // Emit the finished record. Stream.EmitRecord(Code, Vals, AbbrevToUse); } // Emit information about parameter attributes. -static void WriteAttributeTable(const ValueEnumerator &VE, +static void WriteAttributeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const std::vector &Attrs = VE.getAttributes(); if (Attrs.empty()) return; - + Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); SmallVector Record; @@ -138,21 +140,21 @@ static void WriteAttributeTable(const ValueEnumerator &VE, Record.push_back(FauxAttr); } - + Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); } - + Stream.ExitBlock(); } /// WriteTypeTable - Write out the type table for a module. static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const ValueEnumerator::TypeList &TypeList = VE.getTypes(); - + Stream.EnterSubblock(bitc::TYPE_BLOCK_ID, 4 /*count from # abbrevs */); SmallVector TypeVals; - + // Abbrev for TYPE_CODE_POINTER. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER)); @@ -160,7 +162,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Log2_32_Ceil(VE.getTypes().size()+1))); Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0 unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv); - + // Abbrev for TYPE_CODE_FUNCTION. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); @@ -170,7 +172,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(VE.getTypes().size()+1))); unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv); - + // Abbrev for TYPE_CODE_STRUCT. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT)); @@ -179,7 +181,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(VE.getTypes().size()+1))); unsigned StructAbbrev = Stream.EmitAbbrev(Abbv); - + // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); @@ -187,20 +189,20 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(VE.getTypes().size()+1))); unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv); - + // Emit an entry count so the reader can reserve space. TypeVals.push_back(TypeList.size()); Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals); TypeVals.clear(); - + // Loop over all of the types, emitting each in turn. for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { const Type *T = TypeList[i].first; int AbbrevToUse = 0; unsigned Code = 0; - + switch (T->getTypeID()) { - default: assert(0 && "Unknown type!"); + default: llvm_unreachable("Unknown type!"); case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; @@ -272,33 +274,34 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.EmitRecord(Code, TypeVals, AbbrevToUse); TypeVals.clear(); } - + Stream.ExitBlock(); } static unsigned getEncodedLinkage(const GlobalValue *GV) { switch (GV->getLinkage()) { - default: assert(0 && "Invalid linkage!"); + default: llvm_unreachable("Invalid linkage!"); case GlobalValue::GhostLinkage: // Map ghost linkage onto external. - case GlobalValue::ExternalLinkage: return 0; - case GlobalValue::WeakAnyLinkage: return 1; - case GlobalValue::AppendingLinkage: return 2; - case GlobalValue::InternalLinkage: return 3; - case GlobalValue::LinkOnceAnyLinkage: return 4; - case GlobalValue::DLLImportLinkage: return 5; - case GlobalValue::DLLExportLinkage: return 6; - case GlobalValue::ExternalWeakLinkage: return 7; - case GlobalValue::CommonLinkage: return 8; - case GlobalValue::PrivateLinkage: return 9; - case GlobalValue::WeakODRLinkage: return 10; - case GlobalValue::LinkOnceODRLinkage: return 11; - case GlobalValue::AvailableExternallyLinkage: return 12; + case GlobalValue::ExternalLinkage: return 0; + case GlobalValue::WeakAnyLinkage: return 1; + case GlobalValue::AppendingLinkage: return 2; + case GlobalValue::InternalLinkage: return 3; + case GlobalValue::LinkOnceAnyLinkage: return 4; + case GlobalValue::DLLImportLinkage: return 5; + case GlobalValue::DLLExportLinkage: return 6; + case GlobalValue::ExternalWeakLinkage: return 7; + case GlobalValue::CommonLinkage: return 8; + case GlobalValue::PrivateLinkage: return 9; + case GlobalValue::WeakODRLinkage: return 10; + case GlobalValue::LinkOnceODRLinkage: return 11; + case GlobalValue::AvailableExternallyLinkage: return 12; + case GlobalValue::LinkerPrivateLinkage: return 13; } } static unsigned getEncodedVisibility(const GlobalValue *GV) { switch (GV->getVisibility()) { - default: assert(0 && "Invalid visibility!"); + default: llvm_unreachable("Invalid visibility!"); case GlobalValue::DefaultVisibility: return 0; case GlobalValue::HiddenVisibility: return 1; case GlobalValue::ProtectedVisibility: return 2; @@ -334,7 +337,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, GV != E; ++GV) { MaxAlignment = std::max(MaxAlignment, GV->getAlignment()); MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType())); - + if (!GV->hasSection()) continue; // Give section names unique ID's. unsigned &Entry = SectionMap[GV->getSection()]; @@ -364,10 +367,10 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, } } } - + // Emit abbrev for globals, now that we know # sections and max alignment. unsigned SimpleGVarAbbrev = 0; - if (!M->global_empty()) { + if (!M->global_empty()) { // Add an abbrev for common globals with no visibility or thread localness. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR)); @@ -391,14 +394,14 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Don't bother emitting vis + thread local. SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv); } - + // Emit the global variable information. SmallVector Vals; for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end(); GV != E; ++GV) { unsigned AbbrevToUse = 0; - // GLOBALVAR: [type, isconst, initid, + // GLOBALVAR: [type, isconst, initid, // linkage, alignment, section, visibility, threadlocal] Vals.push_back(VE.getTypeID(GV->getType())); Vals.push_back(GV->isConstant()); @@ -407,14 +410,14 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Vals.push_back(getEncodedLinkage(GV)); Vals.push_back(Log2_32(GV->getAlignment())+1); Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0); - if (GV->isThreadLocal() || + if (GV->isThreadLocal() || GV->getVisibility() != GlobalValue::DefaultVisibility) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(GV->isThreadLocal()); } else { AbbrevToUse = SimpleGVarAbbrev; } - + Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse); Vals.clear(); } @@ -432,13 +435,13 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0); Vals.push_back(getEncodedVisibility(F)); Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0); - + unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse); Vals.clear(); } - - + + // Emit the alias information. for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end(); AI != E; ++AI) { @@ -452,20 +455,185 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, } } +static uint64_t GetOptimizationFlags(const Value *V) { + uint64_t Flags = 0; + + if (const OverflowingBinaryOperator *OBO = + dyn_cast(V)) { + if (OBO->hasNoSignedWrap()) + Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP; + if (OBO->hasNoUnsignedWrap()) + Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP; + } else if (const SDivOperator *Div = dyn_cast(V)) { + if (Div->isExact()) + Flags |= 1 << bitc::SDIV_EXACT; + } + + return Flags; +} + +static void WriteMDNode(const MDNode *N, + const ValueEnumerator &VE, + BitstreamWriter &Stream, + SmallVector &Record) { + for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) { + if (N->getElement(i)) { + Record.push_back(VE.getTypeID(N->getElement(i)->getType())); + Record.push_back(VE.getValueID(N->getElement(i))); + } else { + Record.push_back(VE.getTypeID(Type::getVoidTy(N->getContext()))); + Record.push_back(0); + } + } + Stream.EmitRecord(bitc::METADATA_NODE, Record, 0); + Record.clear(); +} + +static void WriteModuleMetadata(const ValueEnumerator &VE, + BitstreamWriter &Stream) { + const ValueEnumerator::ValueList &Vals = VE.getMDValues(); + bool StartedMetadataBlock = false; + unsigned MDSAbbrev = 0; + SmallVector Record; + for (unsigned i = 0, e = Vals.size(); i != e; ++i) { + + if (const MDNode *N = dyn_cast(Vals[i].first)) { + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + StartedMetadataBlock = true; + } + WriteMDNode(N, VE, Stream, Record); + } else if (const MDString *MDS = dyn_cast(Vals[i].first)) { + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + + // Abbrev for METADATA_STRING. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + MDSAbbrev = Stream.EmitAbbrev(Abbv); + StartedMetadataBlock = true; + } + + // Code: [strchar x N] + const char *StrBegin = MDS->begin(); + for (unsigned i = 0, e = MDS->length(); i != e; ++i) + Record.push_back(StrBegin[i]); + + // Emit the finished record. + Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev); + Record.clear(); + } else if (const NamedMDNode *NMD = dyn_cast(Vals[i].first)) { + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + StartedMetadataBlock = true; + } + + // Write name. + std::string Str = NMD->getNameStr(); + const char *StrBegin = Str.c_str(); + for (unsigned i = 0, e = Str.length(); i != e; ++i) + Record.push_back(StrBegin[i]); + Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/); + Record.clear(); + + // Write named metadata elements. + for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) { + if (NMD->getElement(i)) + Record.push_back(VE.getValueID(NMD->getElement(i))); + else + Record.push_back(0); + } + Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0); + Record.clear(); + } + } + + if (StartedMetadataBlock) + Stream.ExitBlock(); +} + +static void WriteMetadataAttachment(const Function &F, + const ValueEnumerator &VE, + BitstreamWriter &Stream) { + bool StartedMetadataBlock = false; + SmallVector Record; + + // Write metadata attachments + // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] + MetadataContext &TheMetadata = F.getContext().getMetadata(); + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + const MetadataContext::MDMapTy *P = TheMetadata.getMDs(I); + if (!P) continue; + bool RecordedInstruction = false; + for (MetadataContext::MDMapTy::const_iterator PI = P->begin(), + PE = P->end(); PI != PE; ++PI) { + if (MDNode *ND = dyn_cast_or_null(PI->second)) { + if (RecordedInstruction == false) { + Record.push_back(VE.getInstructionID(I)); + RecordedInstruction = true; + } + Record.push_back(PI->first); + Record.push_back(VE.getValueID(ND)); + } + } + if (!Record.empty()) { + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3); + StartedMetadataBlock = true; + } + Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); + Record.clear(); + } + } + + if (StartedMetadataBlock) + Stream.ExitBlock(); +} + +static void WriteModuleMetadataStore(const Module *M, + const ValueEnumerator &VE, + BitstreamWriter &Stream) { + + bool StartedMetadataBlock = false; + SmallVector Record; + + // Write metadata kinds + // METADATA_KIND - [n x [id, name]] + MetadataContext &TheMetadata = M->getContext().getMetadata(); + const StringMap *Kinds = TheMetadata.getHandlerNames(); + for (StringMap::const_iterator + I = Kinds->begin(), E = Kinds->end(); I != E; ++I) { + Record.push_back(I->second); + StringRef KName = I->first(); + for (unsigned i = 0, e = KName.size(); i != e; ++i) + Record.push_back(KName[i]); + if (!StartedMetadataBlock) { + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + StartedMetadataBlock = true; + } + Stream.EmitRecord(bitc::METADATA_KIND, Record, 0); + Record.clear(); + } + + if (StartedMetadataBlock) + Stream.ExitBlock(); +} static void WriteConstants(unsigned FirstVal, unsigned LastVal, const ValueEnumerator &VE, BitstreamWriter &Stream, bool isGlobal) { if (FirstVal == LastVal) return; - + Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4); unsigned AggregateAbbrev = 0; unsigned String8Abbrev = 0; unsigned CString7Abbrev = 0; unsigned CString6Abbrev = 0; - unsigned MDString8Abbrev = 0; - unsigned MDString6Abbrev = 0; // If this is a constant pool for the module, emit module-specific abbrevs. if (isGlobal) { // Abbrev for CST_CODE_AGGREGATE. @@ -493,21 +661,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); CString6Abbrev = Stream.EmitAbbrev(Abbv); + } - // Abbrev for CST_CODE_MDSTRING. - Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - MDString8Abbrev = Stream.EmitAbbrev(Abbv); - // Abbrev for CST_CODE_MDSTRING. - Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); - MDString6Abbrev = Stream.EmitAbbrev(Abbv); - } - SmallVector Record; const ValueEnumerator::ValueList &Vals = VE.getValues(); @@ -522,16 +677,17 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, CONSTANTS_SETTYPE_ABBREV); Record.clear(); } - + if (const InlineAsm *IA = dyn_cast(V)) { - Record.push_back(unsigned(IA->hasSideEffects())); - + Record.push_back(unsigned(IA->hasSideEffects()) | + unsigned(IA->isMsAsm()) << 1); + // Add the asm string. const std::string &AsmStr = IA->getAsmString(); Record.push_back(AsmStr.size()); for (unsigned i = 0, e = AsmStr.size(); i != e; ++i) Record.push_back(AsmStr[i]); - + // Add the constraint string. const std::string &ConstraintStr = IA->getConstraintString(); Record.push_back(ConstraintStr.size()); @@ -558,11 +714,11 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Code = bitc::CST_CODE_INTEGER; AbbrevToUse = CONSTANTS_INTEGER_ABBREV; } else { // Wide integers, > 64 bits in size. - // We have an arbitrary precision integer value to write whose - // bit width is > 64. However, in canonical unsigned integer + // We have an arbitrary precision integer value to write whose + // bit width is > 64. However, in canonical unsigned integer // format it is likely that the high bits are going to be zero. // So, we only write the number of active words. - unsigned NWords = IV->getValue().getActiveWords(); + unsigned NWords = IV->getValue().getActiveWords(); const uint64_t *RawWords = IV->getValue().getRawData(); for (unsigned i = 0; i != NWords; ++i) { int64_t V = RawWords[i]; @@ -576,16 +732,16 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, } else if (const ConstantFP *CFP = dyn_cast(C)) { Code = bitc::CST_CODE_FLOAT; const Type *Ty = CFP->getType(); - if (Ty == Type::FloatTy || Ty == Type::DoubleTy) { + if (Ty->isFloatTy() || Ty->isDoubleTy()) { Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); - } else if (Ty == Type::X86_FP80Ty) { + } else if (Ty->isX86_FP80Ty()) { // api needed to prevent premature destruction // bits are not in the same order as a normal i80 APInt, compensate. APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Record.push_back((p[1] << 48) | (p[0] >> 16)); Record.push_back(p[0] & 0xffffLL); - } else if (Ty == Type::FP128Ty || Ty == Type::PPC_FP128Ty) { + } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) { APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Record.push_back(p[0]); @@ -610,10 +766,10 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, unsigned char V = cast(C->getOperand(i))->getZExtValue(); Record.push_back(V); isCStr7 &= (V & 128) == 0; - if (isCStrChar6) + if (isCStrChar6) isCStrChar6 = BitCodeAbbrevOp::isChar6(V); } - + if (isCStrChar6) AbbrevToUse = CString6Abbrev; else if (isCStr7) @@ -639,10 +795,15 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode())); Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); + uint64_t Flags = GetOptimizationFlags(CE); + if (Flags != 0) + Record.push_back(Flags); } break; case Instruction::GetElementPtr: Code = bitc::CST_CODE_CE_GEP; + if (cast(C)->isInBounds()) + Code = bitc::CST_CODE_CE_INBOUNDS_GEP; for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { Record.push_back(VE.getTypeID(C->getOperand(i)->getType())); Record.push_back(VE.getValueID(C->getOperand(i))); @@ -683,45 +844,15 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, break; case Instruction::ICmp: case Instruction::FCmp: - case Instruction::VICmp: - case Instruction::VFCmp: - if (isa(C->getOperand(0)->getType()) - && (CE->getOpcode() == Instruction::ICmp - || CE->getOpcode() == Instruction::FCmp)) { - // compare returning vector of Int1Ty - assert(0 && "Unsupported constant!"); - } else { - Code = bitc::CST_CODE_CE_CMP; - } + Code = bitc::CST_CODE_CE_CMP; Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(CE->getPredicate()); break; } - } else if (const MDString *S = dyn_cast(C)) { - Code = bitc::CST_CODE_MDSTRING; - AbbrevToUse = MDString6Abbrev; - for (unsigned i = 0, e = S->size(); i != e; ++i) { - char V = S->begin()[i]; - Record.push_back(V); - - if (!BitCodeAbbrevOp::isChar6(V)) - AbbrevToUse = MDString8Abbrev; - } - } else if (const MDNode *N = dyn_cast(C)) { - Code = bitc::CST_CODE_MDNODE; - for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) { - if (N->getElement(i)) { - Record.push_back(VE.getTypeID(N->getElement(i)->getType())); - Record.push_back(VE.getValueID(N->getElement(i))); - } else { - Record.push_back(VE.getTypeID(Type::VoidTy)); - Record.push_back(0); - } - } } else { - assert(0 && "Unknown constant!"); + llvm_unreachable("Unknown constant!"); } Stream.EmitRecord(Code, Record, AbbrevToUse); Record.clear(); @@ -733,7 +864,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, static void WriteModuleConstants(const ValueEnumerator &VE, BitstreamWriter &Stream) { const ValueEnumerator::ValueList &Vals = VE.getValues(); - + // Find the first constant to emit, which is the first non-globalvalue value. // We know globalvalues have been emitted by WriteModuleInfo. for (unsigned i = 0, e = Vals.size(); i != e; ++i) { @@ -753,7 +884,7 @@ static void WriteModuleConstants(const ValueEnumerator &VE, /// instruction ID, then it is a forward reference, and it also includes the /// type ID. static bool PushValueAndType(const Value *V, unsigned InstID, - SmallVector &Vals, + SmallVector &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); Vals.push_back(ValID); @@ -770,6 +901,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, SmallVector &Vals) { unsigned Code = 0; unsigned AbbrevToUse = 0; + VE.setInstructionID(&I); switch (I.getOpcode()) { default: if (Instruction::isCast(I.getOpcode())) { @@ -785,11 +917,19 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, AbbrevToUse = FUNCTION_INST_BINOP_ABBREV; Vals.push_back(VE.getValueID(I.getOperand(1))); Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode())); + uint64_t Flags = GetOptimizationFlags(&I); + if (Flags != 0) { + if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV) + AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV; + Vals.push_back(Flags); + } } break; case Instruction::GetElementPtr: Code = bitc::FUNC_CODE_INST_GEP; + if (cast(&I)->isInBounds()) + Code = bitc::FUNC_CODE_INST_INBOUNDS_GEP; for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) PushValueAndType(I.getOperand(i), InstID, Vals, VE); break; @@ -835,21 +975,14 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, break; case Instruction::ICmp: case Instruction::FCmp: - case Instruction::VICmp: - case Instruction::VFCmp: - if (I.getOpcode() == Instruction::ICmp - || I.getOpcode() == Instruction::FCmp) { - // compare returning Int1Ty or vector of Int1Ty - Code = bitc::FUNC_CODE_INST_CMP2; - } else { - Code = bitc::FUNC_CODE_INST_CMP; - } + // compare returning Int1Ty or vector of Int1Ty + Code = bitc::FUNC_CODE_INST_CMP2; PushValueAndType(I.getOperand(0), InstID, Vals, VE); Vals.push_back(VE.getValueID(I.getOperand(1))); Vals.push_back(cast(I).getPredicate()); break; - case Instruction::Ret: + case Instruction::Ret: { Code = bitc::FUNC_CODE_INST_RET; unsigned NumOperands = I.getNumOperands(); @@ -887,13 +1020,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, const PointerType *PTy = cast(Callee->getType()); const FunctionType *FTy = cast(PTy->getElementType()); Code = bitc::FUNC_CODE_INST_INVOKE; - + Vals.push_back(VE.getAttributeID(II->getAttributes())); Vals.push_back(II->getCallingConv()); Vals.push_back(VE.getValueID(II->getNormalDest())); Vals.push_back(VE.getValueID(II->getUnwindDest())); PushValueAndType(Callee, InstID, Vals, VE); - + // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) Vals.push_back(VE.getValueID(I.getOperand(i+3))); // fixed param. @@ -913,38 +1046,38 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Code = bitc::FUNC_CODE_INST_UNREACHABLE; AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; break; - + case Instruction::PHI: Code = bitc::FUNC_CODE_INST_PHI; Vals.push_back(VE.getTypeID(I.getType())); for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) Vals.push_back(VE.getValueID(I.getOperand(i))); break; - + case Instruction::Malloc: Code = bitc::FUNC_CODE_INST_MALLOC; Vals.push_back(VE.getTypeID(I.getType())); Vals.push_back(VE.getValueID(I.getOperand(0))); // size. Vals.push_back(Log2_32(cast(I).getAlignment())+1); break; - + case Instruction::Free: Code = bitc::FUNC_CODE_INST_FREE; PushValueAndType(I.getOperand(0), InstID, Vals, VE); break; - + case Instruction::Alloca: Code = bitc::FUNC_CODE_INST_ALLOCA; Vals.push_back(VE.getTypeID(I.getType())); Vals.push_back(VE.getValueID(I.getOperand(0))); // size. Vals.push_back(Log2_32(cast(I).getAlignment())+1); break; - + case Instruction::Load: Code = bitc::FUNC_CODE_INST_LOAD; if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr AbbrevToUse = FUNCTION_INST_LOAD_ABBREV; - + Vals.push_back(Log2_32(cast(I).getAlignment())+1); Vals.push_back(cast(I).isVolatile()); break; @@ -960,16 +1093,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, const FunctionType *FTy = cast(PTy->getElementType()); Code = bitc::FUNC_CODE_INST_CALL; - + const CallInst *CI = cast(&I); Vals.push_back(VE.getAttributeID(CI->getAttributes())); Vals.push_back((CI->getCallingConv() << 1) | unsigned(CI->isTailCall())); PushValueAndType(CI->getOperand(0), InstID, Vals, VE); // Callee - + // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) Vals.push_back(VE.getValueID(I.getOperand(i+1))); // fixed param. - + // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { unsigned NumVarargs = I.getNumOperands()-1-FTy->getNumParams(); @@ -986,7 +1119,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(VE.getTypeID(I.getType())); // restype. break; } - + Stream.EmitRecord(Code, Vals, AbbrevToUse); Vals.clear(); } @@ -1001,27 +1134,27 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST, // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. SmallVector NameVals; - + for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end(); SI != SE; ++SI) { - + const ValueName &Name = *SI; - + // Figure out the encoding to use for the name. bool is7Bit = true; bool isChar6 = true; for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength(); C != E; ++C) { - if (isChar6) + if (isChar6) isChar6 = BitCodeAbbrevOp::isChar6(*C); if ((unsigned char)*C & 128) { is7Bit = false; break; // don't bother scanning the rest. } } - + unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; - + // VST_ENTRY: [valueid, namechar x N] // VST_BBENTRY: [bbid, namechar x N] unsigned Code; @@ -1036,12 +1169,12 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST, else if (is7Bit) AbbrevToUse = VST_ENTRY_7_ABBREV; } - + NameVals.push_back(VE.getValueID(SI->getValue())); for (const char *P = Name.getKeyData(), *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P) NameVals.push_back((unsigned char)*P); - + // Emit the finished record. Stream.EmitRecord(Code, NameVals, AbbrevToUse); NameVals.clear(); @@ -1050,39 +1183,40 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST, } /// WriteFunction - Emit a function body to the module stream. -static void WriteFunction(const Function &F, ValueEnumerator &VE, +static void WriteFunction(const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4); VE.incorporateFunction(F); SmallVector Vals; - + // Emit the number of basic blocks, so the reader can create them ahead of // time. Vals.push_back(VE.getBasicBlocks().size()); Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals); Vals.clear(); - + // If there are function-local constants, emit them now. unsigned CstStart, CstEnd; VE.getFunctionConstantRange(CstStart, CstEnd); WriteConstants(CstStart, CstEnd, VE, Stream, false); - - // Keep a running idea of what the instruction ID is. + + // Keep a running idea of what the instruction ID is. unsigned InstID = CstEnd; - + // Finally, emit all the instructions, in order. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { WriteInstruction(*I, InstID, VE, Stream, Vals); - if (I->getType() != Type::VoidTy) + if (I->getType() != Type::getVoidTy(F.getContext())) ++InstID; } - + // Emit names for all the instructions etc. WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream); - + + WriteMetadataAttachment(F, VE, Stream); VE.purgeFunction(); Stream.ExitBlock(); } @@ -1092,9 +1226,9 @@ static void WriteTypeSymbolTable(const TypeSymbolTable &TST, const ValueEnumerator &VE, BitstreamWriter &Stream) { if (TST.empty()) return; - + Stream.EnterSubblock(bitc::TYPE_SYMTAB_BLOCK_ID, 3); - + // 7-bit fixed width VST_CODE_ENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); @@ -1103,14 +1237,14 @@ static void WriteTypeSymbolTable(const TypeSymbolTable &TST, Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); unsigned V7Abbrev = Stream.EmitAbbrev(Abbv); - + SmallVector NameVals; - - for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); + + for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); TI != TE; ++TI) { // TST_ENTRY: [typeid, namechar x N] NameVals.push_back(VE.getTypeID(TI->second)); - + const std::string &Str = TI->first; bool is7Bit = true; for (unsigned i = 0, e = Str.size(); i != e; ++i) { @@ -1118,12 +1252,12 @@ static void WriteTypeSymbolTable(const TypeSymbolTable &TST, if (Str[i] & 128) is7Bit = false; } - + // Emit the finished record. Stream.EmitRecord(bitc::VST_CODE_ENTRY, NameVals, is7Bit ? V7Abbrev : 0); NameVals.clear(); } - + Stream.ExitBlock(); } @@ -1133,18 +1267,18 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. Other // blocks can defined their abbrevs inline. Stream.EnterBlockInfoBlock(2); - + { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, + if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_8_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } - + { // 7-bit fixed width VST_ENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); @@ -1153,7 +1287,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_7_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } { // 6-bit char6 VST_ENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); @@ -1163,7 +1297,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_6_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } { // 6-bit char6 VST_BBENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); @@ -1173,11 +1307,11 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_BBENTRY_6_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } - - - + + + { // SETTYPE abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE)); @@ -1185,18 +1319,18 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Log2_32_Ceil(VE.getTypes().size()+1))); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_SETTYPE_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } - + { // INTEGER abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_INTEGER_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } - + { // CE_CAST abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST)); @@ -1207,18 +1341,18 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_CE_CAST_Abbrev) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } { // NULL abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL)); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_NULL_Abbrev) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } - + // FIXME: This should only use space for first class types! - + { // INST_LOAD abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD)); @@ -1227,7 +1361,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_LOAD_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_BINOP abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); @@ -1237,7 +1371,18 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_BINOP_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); + } + { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK. + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags + if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, + Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV) + llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_CAST abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); @@ -1248,15 +1393,15 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_CAST_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } - + { // INST_RET abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_RET_VOID_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_RET abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); @@ -1264,16 +1409,16 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_RET_VAL_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV) - assert(0 && "Unexpected abbrev ordering!"); + llvm_unreachable("Unexpected abbrev ordering!"); } - + Stream.ExitBlock(); } @@ -1281,44 +1426,50 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); - + // Emit the version number if it is non-zero. if (CurVersion) { SmallVector Vals; Vals.push_back(CurVersion); Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); } - + // Analyze the module, enumerating globals, functions, etc. ValueEnumerator VE(M); // Emit blockinfo, which defines the standard abbreviations etc. WriteBlockInfo(VE, Stream); - + // Emit information about parameter attributes. WriteAttributeTable(VE, Stream); - + // Emit information describing all of the types in the module. WriteTypeTable(VE, Stream); - + // Emit top-level description of module, including target triple, inline asm, // descriptors for global variables, and function prototype info. WriteModuleInfo(M, VE, Stream); - + // Emit constants. WriteModuleConstants(VE, Stream); - + + // Emit metadata. + WriteModuleMetadata(VE, Stream); + // Emit function bodies. for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) if (!I->isDeclaration()) WriteFunction(*I, VE, Stream); - + + // Emit metadata. + WriteModuleMetadataStore(M, VE, Stream); + // Emit the type symbol table information. WriteTypeSymbolTable(M->getTypeSymbolTable(), VE, Stream); - + // Emit names for globals/functions etc. WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream); - + Stream.ExitBlock(); } @@ -1326,7 +1477,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { /// header and trailer to make it compatible with the system archiver. To do /// this we emit the following header, and then emit a trailer that pads the /// file out to be a multiple of 16 bytes. -/// +/// /// struct bc_header { /// uint32_t Magic; // 0x0B17C0DE /// uint32_t Version; // Version, currently always 0. @@ -1343,7 +1494,7 @@ enum { static void EmitDarwinBCHeader(BitstreamWriter &Stream, const std::string &TT) { unsigned CPUType = ~0U; - + // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a // magic number from /usr/include/mach/machine.h. It is ok to reproduce the // specific constants here because they are implicitly part of the Darwin ABI. @@ -1352,7 +1503,7 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, DARWIN_CPU_TYPE_X86 = 7, DARWIN_CPU_TYPE_POWERPC = 18 }; - + if (TT.find("x86_64-") == 0) CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64; else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && @@ -1362,10 +1513,10 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, CPUType = DARWIN_CPU_TYPE_POWERPC; else if (TT.find("powerpc64-") == 0) CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; - + // Traditional Bitcode starts after header. unsigned BCOffset = DarwinBCHeaderSize; - + Stream.Emit(0x0B17C0DE, 32); Stream.Emit(0 , 32); // Version. Stream.Emit(BCOffset , 32); @@ -1378,7 +1529,7 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) { // Update the size field in the header. Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize); - + // If the file is not a multiple of 16 bytes, insert dummy padding. while (BufferSize & 15) { Stream.Emit(0, 8); @@ -1387,33 +1538,23 @@ static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) { } -/// WriteBitcodeToFile - Write the specified module to the specified output -/// stream. -void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) { - raw_os_ostream RawOut(Out); - // If writing to stdout, set binary mode. - if (llvm::cout == Out) - sys::Program::ChangeStdoutToBinary(); - WriteBitcodeToFile(M, RawOut); -} - /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { std::vector Buffer; BitstreamWriter Stream(Buffer); - + Buffer.reserve(256*1024); WriteBitcodeToStream( M, Stream ); - + // If writing to stdout, set binary mode. if (&llvm::outs() == &Out) sys::Program::ChangeStdoutToBinary(); // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); - + // Make sure it hits disk now. Out.flush(); } @@ -1425,7 +1566,7 @@ void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) { bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos; if (isDarwin) EmitDarwinBCHeader(Stream, M->getTargetTriple()); - + // Emit the file header. Stream.Emit((unsigned)'B', 8); Stream.Emit((unsigned)'C', 8); diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp index 209cf0980d2d3..3a0d3ce0be994 100644 --- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -17,24 +17,16 @@ using namespace llvm; namespace { class WriteBitcodePass : public ModulePass { - // FIXME: Kill off std::ostream - std::ostream *Out; - raw_ostream *RawOut; // raw_ostream to print on + raw_ostream &OS; // raw_ostream to print on public: static char ID; // Pass identification, replacement for typeid - explicit WriteBitcodePass(std::ostream &o) - : ModulePass(&ID), Out(&o), RawOut(0) {} explicit WriteBitcodePass(raw_ostream &o) - : ModulePass(&ID), Out(0), RawOut(&o) {} + : ModulePass(&ID), OS(o) {} const char *getPassName() const { return "Bitcode Writer"; } bool runOnModule(Module &M) { - if (Out) { - WriteBitcodeToFile(&M, *Out); - } else { - WriteBitcodeToFile(&M, *RawOut); - } + WriteBitcodeToFile(&M, OS); return false; } }; @@ -42,13 +34,6 @@ namespace { char WriteBitcodePass::ID = 0; -/// CreateBitcodeWriterPass - Create and return a pass that writes the module -/// to the specified ostream. -ModulePass *llvm::CreateBitcodeWriterPass(std::ostream &Str) { - return new WriteBitcodePass(Str); -} - - /// createBitcodeWriterPass - Create and return a pass that writes the module /// to the specified ostream. ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) { diff --git a/lib/Bitcode/Writer/Serialize.cpp b/lib/Bitcode/Writer/Serialize.cpp index 79464a61be46a..a6beb1789e1e1 100644 --- a/lib/Bitcode/Writer/Serialize.cpp +++ b/lib/Bitcode/Writer/Serialize.cpp @@ -12,11 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Bitcode/Serialize.h" -#include "string.h" - -#ifdef DEBUG_BACKPATCH -#include "llvm/Support/Streams.h" -#endif +#include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -86,7 +83,7 @@ SerializedPtrID Serializer::getPtrId(const void* ptr) { if (I == PtrMap.end()) { unsigned id = PtrMap.size()+1; #ifdef DEBUG_BACKPATCH - llvm::cerr << "Registered PTR: " << ptr << " => " << id << "\n"; + errs() << "Registered PTR: " << ptr << " => " << id << "\n"; #endif PtrMap[ptr] = id; return id; diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 32b2819762db5..60253ad91e6ec 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -14,7 +14,7 @@ #include "ValueEnumerator.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" -#include "llvm/MDNode.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/TypeSymbolTable.h" #include "llvm/ValueSymbolTable.h" @@ -40,6 +40,8 @@ static bool CompareByFrequency(const std::pairglobal_begin(), E = M->global_end(); I != E; ++I) @@ -55,10 +57,10 @@ ValueEnumerator::ValueEnumerator(const Module *M) { for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) EnumerateValue(I); - + // Remember what is the cutoff between globalvalue's and other constants. unsigned FirstConstant = Values.size(); - + // Enumerate the global variable initializers. for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) @@ -69,24 +71,25 @@ ValueEnumerator::ValueEnumerator(const Module *M) { for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) EnumerateValue(I->getAliasee()); - + // Enumerate types used by the type symbol table. EnumerateTypeSymbolTable(M->getTypeSymbolTable()); // Insert constants that are named at module level into the slot pool so that // the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); - + // Enumerate types used by function bodies and argument lists. for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { - + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) EnumerateType(I->getType()); - + + MetadataContext &TheMetadata = F->getContext().getMetadata(); for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){ - for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); + for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) EnumerateOperandType(*OI); EnumerateType(I->getType()); @@ -94,16 +97,24 @@ ValueEnumerator::ValueEnumerator(const Module *M) { EnumerateAttributes(CI->getAttributes()); else if (const InvokeInst *II = dyn_cast(I)) EnumerateAttributes(II->getAttributes()); + + // Enumerate metadata attached with this instruction. + const MetadataContext::MDMapTy *MDs = TheMetadata.getMDs(I); + if (MDs) + for (MetadataContext::MDMapTy::const_iterator MI = MDs->begin(), + ME = MDs->end(); MI != ME; ++MI) + if (MDNode *MDN = dyn_cast_or_null(MI->second)) + EnumerateMetadata(MDN); } } - + // Optimize constant ordering. OptimizeConstants(FirstConstant, Values.size()); - + // Sort the type table by frequency so that most commonly used types are early // in the table (have low bit-width). std::stable_sort(Types.begin(), Types.end(), CompareByFrequency); - + // Partition the Type ID's so that the single-value types occur before the // aggregate types. This allows the aggregate types to be dropped from the // type table after parsing the global variable initializers. @@ -114,6 +125,28 @@ ValueEnumerator::ValueEnumerator(const Module *M) { TypeMap[Types[i].first] = i+1; } +unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { + InstructionMapType::const_iterator I = InstructionMap.find(Inst); + assert (I != InstructionMap.end() && "Instruction is not mapped!"); + return I->second; +} + +void ValueEnumerator::setInstructionID(const Instruction *I) { + InstructionMap[I] = InstructionCount++; +} + +unsigned ValueEnumerator::getValueID(const Value *V) const { + if (isa(V)) { + ValueMapType::const_iterator I = MDValueMap.find(V); + assert(I != MDValueMap.end() && "Value not in slotcalculator!"); + return I->second-1; + } + + ValueMapType::const_iterator I = ValueMap.find(V); + assert(I != ValueMap.end() && "Value not in slotcalculator!"); + return I->second-1; +} + // Optimize constant ordering. namespace { struct CstSortPredicate { @@ -123,7 +156,7 @@ namespace { const std::pair &RHS) { // Sort by plane. if (LHS.first->getType() != RHS.first->getType()) - return VE.getTypeID(LHS.first->getType()) < + return VE.getTypeID(LHS.first->getType()) < VE.getTypeID(RHS.first->getType()); // Then by frequency. return LHS.second > RHS.second; @@ -134,15 +167,15 @@ namespace { /// OptimizeConstants - Reorder constant pool for denser encoding. void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { if (CstStart == CstEnd || CstStart+1 == CstEnd) return; - + CstSortPredicate P(*this); std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P); - + // Ensure that integer constants are at the start of the constant pool. This // is important so that GEP structure indices come before gep constant exprs. std::partition(Values.begin()+CstStart, Values.begin()+CstEnd, isIntegerValue); - + // Rebuild the modified portion of ValueMap. for (; CstStart != CstEnd; ++CstStart) ValueMap[Values[CstStart].first] = CstStart+1; @@ -152,7 +185,7 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { /// EnumerateTypeSymbolTable - Insert all of the types in the specified symbol /// table. void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) { - for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); + for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); TI != TE; ++TI) EnumerateType(TI->second); } @@ -160,14 +193,57 @@ void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) { /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol /// table into the values table. void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) { - for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); + for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end(); VI != VE; ++VI) EnumerateValue(VI->getValue()); } +void ValueEnumerator::EnumerateMetadata(const MetadataBase *MD) { + // Check to see if it's already in! + unsigned &MDValueID = MDValueMap[MD]; + if (MDValueID) { + // Increment use count. + MDValues[MDValueID-1].second++; + return; + } + + // Enumerate the type of this value. + EnumerateType(MD->getType()); + + if (const MDNode *N = dyn_cast(MD)) { + MDValues.push_back(std::make_pair(MD, 1U)); + MDValueMap[MD] = MDValues.size(); + MDValueID = MDValues.size(); + for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end(); + I != E; ++I) { + if (*I) + EnumerateValue(*I); + else + EnumerateType(Type::getVoidTy(MD->getContext())); + } + return; + } else if (const NamedMDNode *N = dyn_cast(MD)) { + for(NamedMDNode::const_elem_iterator I = N->elem_begin(), + E = N->elem_end(); I != E; ++I) { + MetadataBase *M = *I; + EnumerateValue(M); + } + MDValues.push_back(std::make_pair(MD, 1U)); + MDValueMap[MD] = Values.size(); + return; + } + + // Add the value. + MDValues.push_back(std::make_pair(MD, 1U)); + MDValueID = MDValues.size(); +} + void ValueEnumerator::EnumerateValue(const Value *V) { - assert(V->getType() != Type::VoidTy && "Can't insert void values!"); - + assert(V->getType() != Type::getVoidTy(V->getContext()) && + "Can't insert void values!"); + if (const MetadataBase *MB = dyn_cast(V)) + return EnumerateMetadata(MB); + // Check to see if it's already in! unsigned &ValueID = ValueMap[V]; if (ValueID) { @@ -178,7 +254,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) { // Enumerate the type of this value. EnumerateType(V->getType()); - + if (const Constant *C = dyn_cast(V)) { if (isa(C)) { // Initializers for globals are handled explicitly elsewhere. @@ -190,7 +266,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) { // If a constant has operands, enumerate them. This makes sure that if a // constant has uses (for example an array of const ints), that they are // inserted also. - + // We prefer to enumerate them with values before we enumerate the user // itself. This makes it more likely that we can avoid forward references // in the reader. We know that there can be no cycles in the constants @@ -198,27 +274,15 @@ void ValueEnumerator::EnumerateValue(const Value *V) { for (User::const_op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) EnumerateValue(*I); - + // Finally, add the value. Doing this could make the ValueID reference be // dangling, don't reuse it. - Values.push_back(std::make_pair(V, 1U)); - ValueMap[V] = Values.size(); - return; - } else if (const MDNode *N = dyn_cast(C)) { - for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end(); - I != E; ++I) { - if (*I) - EnumerateValue(*I); - else - EnumerateType(Type::VoidTy); - } - Values.push_back(std::make_pair(V, 1U)); ValueMap[V] = Values.size(); return; } } - + // Add the value. Values.push_back(std::make_pair(V, 1U)); ValueID = Values.size(); @@ -227,17 +291,17 @@ void ValueEnumerator::EnumerateValue(const Value *V) { void ValueEnumerator::EnumerateType(const Type *Ty) { unsigned &TypeID = TypeMap[Ty]; - + if (TypeID) { // If we've already seen this type, just increase its occurrence count. Types[TypeID-1].second++; return; } - + // First time we saw this type, add it. Types.push_back(std::make_pair(Ty, 1U)); TypeID = Types.size(); - + // Enumerate subtypes. for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I) @@ -259,10 +323,14 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) { EnumerateOperandType(C->getOperand(i)); if (const MDNode *N = dyn_cast(V)) { - for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) - EnumerateOperandType(N->getElement(i)); + for (unsigned i = 0, e = N->getNumElements(); i != e; ++i) { + Value *Elem = N->getElement(i); + if (Elem) + EnumerateOperandType(Elem); + } } - } + } else if (isa(V) || isa(V)) + EnumerateValue(V); } void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) { @@ -279,18 +347,18 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) { void ValueEnumerator::incorporateFunction(const Function &F) { NumModuleValues = Values.size(); - + // Adding function arguments to the value table. for(Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) EnumerateValue(I); FirstFuncConstantID = Values.size(); - + // Add all function-level constants to the value table. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) - for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); + for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { if ((isa(*OI) && !isa(*OI)) || isa(*OI)) @@ -299,20 +367,20 @@ void ValueEnumerator::incorporateFunction(const Function &F) { BasicBlocks.push_back(BB); ValueMap[BB] = BasicBlocks.size(); } - + // Optimize the constant layout. OptimizeConstants(FirstFuncConstantID, Values.size()); - + // Add the function's parameter attributes so they are available for use in // the function's instruction. EnumerateAttributes(F.getAttributes()); FirstInstID = Values.size(); - + // Add all of the instructions. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { - if (I->getType() != Type::VoidTy) + if (I->getType() != Type::getVoidTy(F.getContext())) EnumerateValue(I); } } @@ -324,8 +392,7 @@ void ValueEnumerator::purgeFunction() { ValueMap.erase(Values[i].first); for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i) ValueMap.erase(BasicBlocks[i]); - + Values.resize(NumModuleValues); BasicBlocks.clear(); } - diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 40eeabb2b6aa7..da63dde2a2799 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -22,9 +22,11 @@ namespace llvm { class Type; class Value; +class Instruction; class BasicBlock; class Function; class Module; +class MetadataBase; class AttrListPtr; class TypeSymbolTable; class ValueSymbolTable; @@ -44,11 +46,17 @@ private: typedef DenseMap ValueMapType; ValueMapType ValueMap; ValueList Values; + ValueList MDValues; + ValueMapType MDValueMap; typedef DenseMap AttributeMapType; AttributeMapType AttributeMap; std::vector Attributes; + typedef DenseMap InstructionMapType; + InstructionMapType InstructionMap; + unsigned InstructionCount; + /// BasicBlocks - This contains all the basic blocks for the currently /// incorporated function. Their reverse mapping is stored in ValueMap. std::vector BasicBlocks; @@ -64,18 +72,17 @@ private: public: ValueEnumerator(const Module *M); - unsigned getValueID(const Value *V) const { - ValueMapType::const_iterator I = ValueMap.find(V); - assert(I != ValueMap.end() && "Value not in slotcalculator!"); - return I->second-1; - } - + unsigned getValueID(const Value *V) const; + unsigned getTypeID(const Type *T) const { TypeMapType::const_iterator I = TypeMap.find(T); assert(I != TypeMap.end() && "Type not in ValueEnumerator!"); return I->second-1; } - + + unsigned getInstructionID(const Instruction *I) const; + void setInstructionID(const Instruction *I); + unsigned getAttributeID(const AttrListPtr &PAL) const { if (PAL.isEmpty()) return 0; // Null maps to zero. AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer()); @@ -91,6 +98,7 @@ public: } const ValueList &getValues() const { return Values; } + const ValueList &getMDValues() const { return MDValues; } const TypeList &getTypes() const { return Types; } const std::vector &getBasicBlocks() const { return BasicBlocks; @@ -108,6 +116,7 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); + void EnumerateMetadata(const MetadataBase *MD); void EnumerateValue(const Value *V); void EnumerateType(const Type *T); void EnumerateOperandType(const Value *V); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6d125810d9275..8bc5ef91cdf4a 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,16 +18,25 @@ #include "llvm/Module.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallPtrSet.h" @@ -41,12 +50,17 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), cl::init(cl::BOU_UNSET)); char AsmPrinter::ID = 0; -AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm, - const TargetAsmInfo *T, bool VDef) +AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm, + const MCAsmInfo *T, bool VDef) : MachineFunctionPass(&ID), FunctionNumber(0), O(o), - TM(tm), TAI(T), TRI(tm.getRegisterInfo()), - IsInTextSection(false), LastMI(0), LastFn(0), Counter(~0U), - PrevDLT(0, ~0U, ~0U) { + TM(tm), MAI(T), TRI(tm.getRegisterInfo()), + + OutContext(*new MCContext()), + // FIXME: Pass instprinter to streamer. + OutStreamer(*createAsmStreamer(OutContext, O, *T, 0)), + + LastMI(0), LastFn(0), Counter(~0U), + PrevDLT(0, 0, ~0U, ~0U) { DW = 0; MMI = 0; switch (AsmVerbose) { case cl::BOU_UNSET: VerboseAsm = VDef; break; @@ -59,188 +73,124 @@ AsmPrinter::~AsmPrinter() { for (gcp_iterator I = GCMetadataPrinters.begin(), E = GCMetadataPrinters.end(); I != E; ++I) delete I->second; -} - -/// SwitchToTextSection - Switch to the specified text section of the executable -/// if we are not already in it! -/// -void AsmPrinter::SwitchToTextSection(const char *NewSection, - const GlobalValue *GV) { - std::string NS; - if (GV && GV->hasSection()) - NS = TAI->getSwitchToSectionDirective() + GV->getSection(); - else - NS = NewSection; - // If we're already in this section, we're done. - if (CurrentSection == NS) return; - - // Close the current section, if applicable. - if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) - O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; - - CurrentSection = NS; - - if (!CurrentSection.empty()) - O << CurrentSection << TAI->getTextSectionStartSuffix() << '\n'; - - IsInTextSection = true; + delete &OutStreamer; + delete &OutContext; } -/// SwitchToDataSection - Switch to the specified data section of the executable -/// if we are not already in it! -/// -void AsmPrinter::SwitchToDataSection(const char *NewSection, - const GlobalValue *GV) { - std::string NS; - if (GV && GV->hasSection()) - NS = TAI->getSwitchToSectionDirective() + GV->getSection(); - else - NS = NewSection; - - // If we're already in this section, we're done. - if (CurrentSection == NS) return; - - // Close the current section, if applicable. - if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) - O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; - - CurrentSection = NS; - - if (!CurrentSection.empty()) - O << CurrentSection << TAI->getDataSectionStartSuffix() << '\n'; - - IsInTextSection = false; +TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { + return TM.getTargetLowering()->getObjFileLowering(); } -/// SwitchToSection - Switch to the specified section of the executable if we -/// are not already in it! -void AsmPrinter::SwitchToSection(const Section* NS) { - const std::string& NewSection = NS->getName(); - - // If we're already in this section, we're done. - if (CurrentSection == NewSection) return; - - // Close the current section, if applicable. - if (TAI->getSectionEndDirectiveSuffix() && !CurrentSection.empty()) - O << CurrentSection << TAI->getSectionEndDirectiveSuffix() << '\n'; - - // FIXME: Make CurrentSection a Section* in the future - CurrentSection = NewSection; - CurrentSection_ = NS; - - if (!CurrentSection.empty()) { - // If section is named we need to switch into it via special '.section' - // directive and also append funky flags. Otherwise - section name is just - // some magic assembler directive. - if (NS->isNamed()) - O << TAI->getSwitchToSectionDirective() - << CurrentSection - << TAI->getSectionFlags(NS->getFlags()); - else - O << CurrentSection; - O << TAI->getDataSectionStartSuffix() << '\n'; - } - - IsInTextSection = (NS->getFlags() & SectionFlags::Code); +/// getCurrentSection() - Return the current section we are emitting to. +const MCSection *AsmPrinter::getCurrentSection() const { + return OutStreamer.getCurrentSection(); } + void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); + if (VerboseAsm) + AU.addRequired(); } bool AsmPrinter::doInitialization(Module &M) { - Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix()); + // Initialize TargetLoweringObjectFile. + const_cast(getObjFileLowering()) + .Initialize(OutContext, TM); - if (TAI->doesAllowQuotesInName()) + Mang = new Mangler(M, MAI->getGlobalPrefix(), MAI->getPrivateGlobalPrefix(), + MAI->getLinkerPrivateGlobalPrefix()); + + if (MAI->doesAllowQuotesInName()) Mang->setUseQuotes(true); + + if (MAI->doesAllowNameToStartWithDigit()) + Mang->setSymbolsCanStartWithDigit(true); - GCModuleInfo *MI = getAnalysisIfAvailable(); - assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + // Allow the target to emit any magic that it wants at the start of the file. + EmitStartOfAsmFile(M); - if (TAI->hasSingleParameterDotFile()) { + if (MAI->hasSingleParameterDotFile()) { /* Very minimal debug info. It is ignored if we emit actual - debug info. If we don't, this at helps the user find where + debug info. If we don't, this at least helps the user find where a function came from. */ O << "\t.file\t\"" << M.getModuleIdentifier() << "\"\n"; } + GCModuleInfo *MI = getAnalysisIfAvailable(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) - MP->beginAssembly(O, *this, *TAI); + MP->beginAssembly(O, *this, *MAI); if (!M.getModuleInlineAsm().empty()) - O << TAI->getCommentString() << " Start of file scope inline assembly\n" + O << MAI->getCommentString() << " Start of file scope inline assembly\n" << M.getModuleInlineAsm() - << '\n' << TAI->getCommentString() + << '\n' << MAI->getCommentString() << " End of file scope inline assembly\n"; - SwitchToDataSection(""); // Reset back to no section. - - if (TAI->doesSupportDebugInformation() || - TAI->doesSupportExceptionHandling()) { - MMI = getAnalysisIfAvailable(); - if (MMI) - MMI->AnalyzeModule(M); - DW = getAnalysisIfAvailable(); - if (DW) - DW->BeginModule(&M, MMI, O, this, TAI); - } + MMI = getAnalysisIfAvailable(); + if (MMI) + MMI->AnalyzeModule(M); + DW = getAnalysisIfAvailable(); + if (DW) + DW->BeginModule(&M, MMI, O, this, MAI); return false; } bool AsmPrinter::doFinalization(Module &M) { + // Emit global variables. + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + PrintGlobalVariable(I); + // Emit final debug information. - if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) + if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) DW->EndModule(); // If the target wants to know about weak references, print them all. - if (TAI->getWeakRefDirective()) { + if (MAI->getWeakRefDirective()) { // FIXME: This is not lazy, it would be nice to only print weak references // to stuff that is actually used. Note that doing so would require targets // to notice uses in operands (due to constant exprs etc). This should // happen with the MC stuff eventually. - SwitchToDataSection(""); // Print out module-level global variables here. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (I->hasExternalWeakLinkage()) - O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n'; + O << MAI->getWeakRefDirective() << Mang->getMangledName(I) << '\n'; } - for (Module::const_iterator I = M.begin(), E = M.end(); - I != E; ++I) { + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (I->hasExternalWeakLinkage()) - O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n'; + O << MAI->getWeakRefDirective() << Mang->getMangledName(I) << '\n'; } } - if (TAI->getSetDirective()) { - if (!M.alias_empty()) - SwitchToSection(TAI->getTextSection()); - + if (MAI->getSetDirective()) { O << '\n'; for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { - std::string Name = Mang->getValueName(I); - std::string Target; + std::string Name = Mang->getMangledName(I); const GlobalValue *GV = cast(I->getAliasedGlobal()); - Target = Mang->getValueName(GV); + std::string Target = Mang->getMangledName(GV); - if (I->hasExternalLinkage() || !TAI->getWeakRefDirective()) + if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) O << "\t.globl\t" << Name << '\n'; else if (I->hasWeakLinkage()) - O << TAI->getWeakRefDirective() << Name << '\n'; + O << MAI->getWeakRefDirective() << Name << '\n'; else if (!I->hasLocalLinkage()) - assert(0 && "Invalid alias linkage"); + llvm_unreachable("Invalid alias linkage"); printVisibility(Name, I->getVisibility()); - O << TAI->getSetDirective() << ' ' << Name << ", " << Target << '\n'; + O << MAI->getSetDirective() << ' ' << Name << ", " << Target << '\n'; } } @@ -248,45 +198,43 @@ bool AsmPrinter::doFinalization(Module &M) { assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) - MP->finishAssembly(O, *this, *TAI); + MP->finishAssembly(O, *this, *MAI); // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) - if (TAI->getNonexecutableStackDirective()) - O << TAI->getNonexecutableStackDirective() << '\n'; + if (MAI->getNonexecutableStackDirective()) + O << MAI->getNonexecutableStackDirective() << '\n'; + + // Allow the target to emit any magic that it wants at the end of the file, + // after everything else has gone out. + EmitEndOfAsmFile(M); + delete Mang; Mang = 0; DW = 0; MMI = 0; + + OutStreamer.Finish(); return false; } -const std::string & -AsmPrinter::getCurrentFunctionEHName(const MachineFunction *MF, - std::string &Name) const { - assert(MF && "No machine function?"); - Name = MF->getFunction()->getName(); - if (Name.empty()) - Name = Mang->getValueName(MF->getFunction()); - Name = Mang->makeNameProper(TAI->getEHGlobalPrefix() + - Name + ".eh", TAI->getGlobalPrefix()); - return Name; -} - void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { // What's my mangled name? - CurrentFnName = Mang->getValueName(MF.getFunction()); + CurrentFnName = Mang->getMangledName(MF.getFunction()); IncrementFunctionNumber(); + + if (VerboseAsm) + LI = &getAnalysis(); } namespace { // SectionCPs - Keep track the alignment, constpool entries per Section. struct SectionCPs { - const Section *S; + const MCSection *S; unsigned Alignment; SmallVector CPEs; - SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {}; + SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}; }; } @@ -303,9 +251,27 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { // the same section together to reduce amount of section switch statements. SmallVector CPSections; for (unsigned i = 0, e = CP.size(); i != e; ++i) { - MachineConstantPoolEntry CPE = CP[i]; + const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - const Section* S = TAI->SelectSectionForMachineConst(CPE.getType()); + + SectionKind Kind; + switch (CPE.getRelocationInfo()) { + default: llvm_unreachable("Unknown section kind"); + case 2: Kind = SectionKind::getReadOnlyWithRel(); break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { + case 4: Kind = SectionKind::getMergeableConst4(); break; + case 8: Kind = SectionKind::getMergeableConst8(); break; + case 16: Kind = SectionKind::getMergeableConst16();break; + default: Kind = SectionKind::getMergeableConst(); break; + } + } + + const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); + // The number of sections are small, just do a linear search from the // last section to the first. bool Found = false; @@ -328,7 +294,7 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { // Now print stuff into the calculated sections. for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { - SwitchToSection(CPSections[i].S); + OutStreamer.SwitchSection(CPSections[i].S); EmitAlignment(Log2_32(CPSections[i].Alignment)); unsigned Offset = 0; @@ -344,11 +310,12 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) { const Type *Ty = CPE.getType(); Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty); - O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' - << CPI << ":\t\t\t\t\t"; + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << CPI << ':'; if (VerboseAsm) { - O << TAI->getCommentString() << ' '; - WriteTypeSymbolic(O, CPE.getType(), 0); + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " constant "; + WriteTypeSymbolic(O, CPE.getType(), MF->getFunction()->getParent()); } O << '\n'; if (CPE.isMachineConstantPoolEntry()) @@ -373,20 +340,21 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, // the appropriate section. TargetLowering *LoweringInfo = TM.getTargetLowering(); - const char* JumpTableDataSection = TAI->getJumpTableDataSection(); const Function *F = MF.getFunction(); - unsigned SectionFlags = TAI->SectionFlagsForGlobal(F); bool JTInDiffSection = false; - if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) || - !JumpTableDataSection || - SectionFlags & SectionFlags::Linkonce) { + if (F->isWeakForLinker() || + (IsPic && !LoweringInfo->usesGlobalOffsetTable())) { // In PIC mode, we need to emit the jump table to the same section as the // function body itself, otherwise the label differences won't make sense. // We should also do if the section name is NULL or function is declared in // discardable section. - SwitchToSection(TAI->SectionForGlobal(F)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, + TM)); } else { - SwitchToDataSection(JumpTableDataSection); + // Otherwise, drop it in the readonly section. + const MCSection *ReadOnlySection = + getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); + OutStreamer.SwitchSection(ReadOnlySection); JTInDiffSection = true; } @@ -402,21 +370,21 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, // the number of relocations the assembler will generate for the jump table. // Set directives are all printed before the jump table itself. SmallPtrSet EmittedSets; - if (TAI->getSetDirective() && IsPic) + if (MAI->getSetDirective() && IsPic) for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) if (EmittedSets.insert(JTBBs[ii])) printPICJumpTableSetLabel(i, JTBBs[ii]); - // On some targets (e.g. darwin) we want to emit two consequtive labels + // On some targets (e.g. Darwin) we want to emit two consequtive labels // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The // second label is actually referenced by the code. - if (JTInDiffSection) { - if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix()) - O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n"; + if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0]) { + O << MAI->getLinkerPrivateGlobalPrefix() + << "JTI" << getFunctionNumber() << '_' << i << ":\n"; } - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << i << ":\n"; for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { @@ -429,15 +397,15 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI, void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid) const { - bool IsPic = TM.getRelocationModel() == Reloc::PIC_; + bool isPIC = TM.getRelocationModel() == Reloc::PIC_; // Use JumpTableDirective otherwise honor the entry size from the jump table // info. - const char *JTEntryDirective = TAI->getJumpTableDirective(); + const char *JTEntryDirective = MAI->getJumpTableDirective(isPIC); bool HadJTEntryDirective = JTEntryDirective != NULL; if (!HadJTEntryDirective) { JTEntryDirective = MJTI->getEntrySize() == 4 ? - TAI->getData32bitsDirective() : TAI->getData64bitsDirective(); + MAI->getData32bitsDirective() : MAI->getData64bitsDirective(); } O << JTEntryDirective << ' '; @@ -447,20 +415,18 @@ void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, // emit the table entries as differences between two text section labels. // If we're emitting non-PIC code, then emit the entries as direct // references to the target basic blocks. - if (IsPic) { - if (TAI->getSetDirective()) { - O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() - << '_' << uid << "_set_" << MBB->getNumber(); - } else { - printBasicBlockLabel(MBB, false, false, false); - // If the arch uses custom Jump Table directives, don't calc relative to - // JT - if (!HadJTEntryDirective) - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" - << getFunctionNumber() << '_' << uid; - } + if (!isPIC) { + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + } else if (MAI->getSetDirective()) { + O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << uid << "_set_" << MBB->getNumber(); } else { - printBasicBlockLabel(MBB, false, false, false); + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + // If the arch uses custom Jump Table directives, don't calc relative to + // JT + if (!HadJTEntryDirective) + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << uid; } } @@ -470,12 +436,12 @@ void AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, /// do nothing and return false. bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getName() == "llvm.used") { - if (TAI->getUsedDirective() != 0) // No need to emit this at all. + if (MAI->getUsedDirective() != 0) // No need to emit this at all. EmitLLVMUsedList(GV->getInitializer()); return true; } - // Ignore debug and non-emitted data. + // Ignore debug and non-emitted data. This handles llvm.compiler.used. if (GV->getSection() == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; @@ -487,14 +453,14 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { const TargetData *TD = TM.getTargetData(); unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { - SwitchToDataSection(TAI->getStaticCtorsSection()); + OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); EmitAlignment(Align, 0); EmitXXStructorList(GV->getInitializer()); return true; } if (GV->getName() == "llvm.global_dtors") { - SwitchToDataSection(TAI->getStaticDtorsSection()); + OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); EmitAlignment(Align, 0); EmitXXStructorList(GV->getInitializer()); return true; @@ -503,45 +469,20 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { return false; } -/// findGlobalValue - if CV is an expression equivalent to a single -/// global value, return that value. -const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) { - if (const GlobalValue *GV = dyn_cast(CV)) - return GV; - else if (const ConstantExpr *CE = dyn_cast(CV)) { - const TargetData *TD = TM.getTargetData(); - unsigned Opcode = CE->getOpcode(); - switch (Opcode) { - case Instruction::GetElementPtr: { - const Constant *ptrVal = CE->getOperand(0); - SmallVector idxVec(CE->op_begin()+1, CE->op_end()); - if (TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], idxVec.size())) - return 0; - return findGlobalValue(ptrVal); - } - case Instruction::BitCast: - return findGlobalValue(CE->getOperand(0)); - default: - return 0; - } - } - return 0; -} - -/// EmitLLVMUsedList - For targets that define a TAI::UsedDirective, mark each +/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each /// global in the specified llvm.used list for which emitUsedDirectiveFor /// is true, as being used with this directive. - void AsmPrinter::EmitLLVMUsedList(Constant *List) { - const char *Directive = TAI->getUsedDirective(); + const char *Directive = MAI->getUsedDirective(); // Should be an array of 'i8*'. ConstantArray *InitList = dyn_cast(List); if (InitList == 0) return; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - const GlobalValue *GV = findGlobalValue(InitList->getOperand(i)); - if (TAI->emitUsedDirectiveFor(GV, Mang)) { + const GlobalValue *GV = + dyn_cast(InitList->getOperand(i)->stripPointerCasts()); + if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) { O << Directive; EmitConstantValueOnly(InitList->getOperand(i)); O << '\n'; @@ -567,32 +508,6 @@ void AsmPrinter::EmitXXStructorList(Constant *List) { } } -/// getGlobalLinkName - Returns the asm/link name of of the specified -/// global variable. Should be overridden by each target asm printer to -/// generate the appropriate value. -const std::string &AsmPrinter::getGlobalLinkName(const GlobalVariable *GV, - std::string &LinkName) const { - if (isa(GV)) { - LinkName += TAI->getFunctionAddrPrefix(); - LinkName += Mang->getValueName(GV); - LinkName += TAI->getFunctionAddrSuffix(); - } else { - LinkName += TAI->getGlobalVarAddrPrefix(); - LinkName += Mang->getValueName(GV); - LinkName += TAI->getGlobalVarAddrSuffix(); - } - - return LinkName; -} - -/// EmitExternalGlobal - Emit the external reference to a global variable. -/// Should be overridden if an indirect reference should be used. -void AsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) { - std::string GLN; - O << getGlobalLinkName(GV, GLN); -} - - //===----------------------------------------------------------------------===// /// LEB 128 number encoding. @@ -646,8 +561,8 @@ void AsmPrinter::EOL() const { void AsmPrinter::EOL(const std::string &Comment) const { if (VerboseAsm && !Comment.empty()) { - O << '\t' - << TAI->getCommentString() + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' ' << Comment; } @@ -656,22 +571,72 @@ void AsmPrinter::EOL(const std::string &Comment) const { void AsmPrinter::EOL(const char* Comment) const { if (VerboseAsm && *Comment) { - O << '\t' - << TAI->getCommentString() + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' ' << Comment; } O << '\n'; } +static const char *DecodeDWARFEncoding(unsigned Encoding) { + switch (Encoding) { + case dwarf::DW_EH_PE_absptr: + return "absptr"; + case dwarf::DW_EH_PE_omit: + return "omit"; + case dwarf::DW_EH_PE_pcrel: + return "pcrel"; + case dwarf::DW_EH_PE_udata4: + return "udata4"; + case dwarf::DW_EH_PE_udata8: + return "udata8"; + case dwarf::DW_EH_PE_sdata4: + return "sdata4"; + case dwarf::DW_EH_PE_sdata8: + return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: + return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: + return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: + return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: + return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: + return "indirect pcrel udata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: + return "indirect pcrel sdata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: + return "indirect pcrel udata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: + return "indirect pcrel sdata8"; + } + + return 0; +} + +void AsmPrinter::EOL(const char *Comment, unsigned Encoding) const { + if (VerboseAsm && *Comment) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << ' ' + << Comment; + + if (const char *EncStr = DecodeDWARFEncoding(Encoding)) + O << " (" << EncStr << ')'; + } + O << '\n'; +} + /// EmitULEB128Bytes - Emit an assembler byte data directive to compose an /// unsigned leb128 value. void AsmPrinter::EmitULEB128Bytes(unsigned Value) const { - if (TAI->hasLEB128()) { + if (MAI->hasLEB128()) { O << "\t.uleb128\t" << Value; } else { - O << TAI->getData8bitsDirective(); + O << MAI->getData8bitsDirective(); PrintULEB128(Value); } } @@ -679,11 +644,11 @@ void AsmPrinter::EmitULEB128Bytes(unsigned Value) const { /// EmitSLEB128Bytes - print an assembler byte data directive to compose a /// signed leb128 value. void AsmPrinter::EmitSLEB128Bytes(int Value) const { - if (TAI->hasLEB128()) { + if (MAI->hasLEB128()) { O << "\t.sleb128\t" << Value; } else { - O << TAI->getData8bitsDirective(); + O << MAI->getData8bitsDirective(); PrintSLEB128(Value); } } @@ -691,29 +656,29 @@ void AsmPrinter::EmitSLEB128Bytes(int Value) const { /// EmitInt8 - Emit a byte directive and value. /// void AsmPrinter::EmitInt8(int Value) const { - O << TAI->getData8bitsDirective(); + O << MAI->getData8bitsDirective(); PrintHex(Value & 0xFF); } /// EmitInt16 - Emit a short directive and value. /// void AsmPrinter::EmitInt16(int Value) const { - O << TAI->getData16bitsDirective(); + O << MAI->getData16bitsDirective(); PrintHex(Value & 0xFFFF); } /// EmitInt32 - Emit a long directive and value. /// void AsmPrinter::EmitInt32(int Value) const { - O << TAI->getData32bitsDirective(); + O << MAI->getData32bitsDirective(); PrintHex(Value); } /// EmitInt64 - Emit a long long directive and value. /// void AsmPrinter::EmitInt64(uint64_t Value) const { - if (TAI->getData64bitsDirective()) { - O << TAI->getData64bitsDirective(); + if (MAI->getData64bitsDirective()) { + O << MAI->getData64bitsDirective(); PrintHex(Value); } else { if (TM.getTargetData()->isBigEndian()) { @@ -734,7 +699,7 @@ static inline char toOctal(int X) { /// printStringChar - Print a char, escaped if necessary. /// -static void printStringChar(raw_ostream &O, unsigned char C) { +static void printStringChar(formatted_raw_ostream &O, unsigned char C) { if (C == '"') { O << "\\\""; } else if (C == '\\') { @@ -766,11 +731,11 @@ void AsmPrinter::EmitString(const std::string &String) const { } void AsmPrinter::EmitString(const char *String, unsigned Size) const { - const char* AscizDirective = TAI->getAscizDirective(); + const char* AscizDirective = MAI->getAscizDirective(); if (AscizDirective) O << AscizDirective; else - O << TAI->getAsciiDirective(); + O << MAI->getAsciiDirective(); O << '\"'; for (unsigned i = 0; i < Size; ++i) printStringChar(O, String[i]); @@ -813,31 +778,26 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV, NumBits = std::max(NumBits, ForcedAlignBits); if (NumBits == 0) return; // No need to emit alignment. - if (TAI->getAlignmentIsInBytes()) NumBits = 1 << NumBits; - O << TAI->getAlignDirective() << NumBits; - - unsigned FillValue = TAI->getTextAlignFillValue(); - UseFillExpr &= IsInTextSection && FillValue; - if (UseFillExpr) { - O << ','; - PrintHex(FillValue); - } - O << '\n'; + + unsigned FillValue = 0; + if (getCurrentSection()->getKind().isText()) + FillValue = MAI->getTextAlignFillValue(); + + OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0); } - /// EmitZeros - Emit a block of zeros. /// void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const { if (NumZeros) { - if (TAI->getZeroDirective()) { - O << TAI->getZeroDirective() << NumZeros; - if (TAI->getZeroDirectiveSuffix()) - O << TAI->getZeroDirectiveSuffix(); + if (MAI->getZeroDirective()) { + O << MAI->getZeroDirective() << NumZeros; + if (MAI->getZeroDirectiveSuffix()) + O << MAI->getZeroDirectiveSuffix(); O << '\n'; } else { for (; NumZeros; --NumZeros) - O << TAI->getData8bitsDirective(AddrSpace) << "0\n"; + O << MAI->getData8bitsDirective(AddrSpace) << "0\n"; } } } @@ -851,22 +811,22 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { O << CI->getZExtValue(); } else if (const GlobalValue *GV = dyn_cast(CV)) { // This is a constant address for a global variable or function. Use the - // name of the variable or function as the address value, possibly - // decorating it with GlobalVarAddrPrefix/Suffix or - // FunctionAddrPrefix/Suffix (these all default to "" ) - if (isa(GV)) { - O << TAI->getFunctionAddrPrefix() - << Mang->getValueName(GV) - << TAI->getFunctionAddrSuffix(); - } else { - O << TAI->getGlobalVarAddrPrefix() - << Mang->getValueName(GV) - << TAI->getGlobalVarAddrSuffix(); - } + // name of the variable or function as the address value. + O << Mang->getMangledName(GV); } else if (const ConstantExpr *CE = dyn_cast(CV)) { const TargetData *TD = TM.getTargetData(); unsigned Opcode = CE->getOpcode(); switch (Opcode) { + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPToUI: + case Instruction::FPToSI: + llvm_unreachable("FIXME: Don't support this constant cast expr"); case Instruction::GetElementPtr: { // generate a symbolic expression for the byte address const Constant *ptrVal = CE->getOperand(0); @@ -891,17 +851,6 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { } break; } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - assert(0 && "FIXME: Don't yet support this kind of constant cast expr"); - break; case Instruction::BitCast: return EmitConstantValueOnly(CE->getOperand(0)); @@ -909,7 +858,8 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(), false/*ZExt*/); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), + false/*ZExt*/); return EmitConstantValueOnly(Op); } @@ -922,16 +872,17 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { // We can emit the pointer value into this slot if the slot is an // integer slot greater or equal to the size of the pointer. - if (TD->getTypeAllocSize(Ty) >= TD->getTypeAllocSize(Op->getType())) + if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) return EmitConstantValueOnly(Op); O << "(("; EmitConstantValueOnly(Op); - APInt ptrMask = APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Ty)); + APInt ptrMask = + APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType())); SmallString<40> S; ptrMask.toStringUnsigned(S); - O << ") & " << S.c_str() << ')'; + O << ") & " << S.str() << ')'; break; } case Instruction::Add: @@ -966,17 +917,17 @@ void AsmPrinter::EmitConstantValueOnly(const Constant *CV) { O << ')'; break; default: - assert(0 && "Unsupported operator!"); + llvm_unreachable("Unsupported operator!"); } } else { - assert(0 && "Unknown constant value!"); + llvm_unreachable("Unknown constant value!"); } } /// printAsCString - Print the specified array as a C compatible string, only if /// the predicate isString is true. /// -static void printAsCString(raw_ostream &O, const ConstantArray *CVA, +static void printAsCString(formatted_raw_ostream &O, const ConstantArray *CVA, unsigned LastElt) { assert(CVA->isString() && "Array is not string compatible!"); @@ -993,12 +944,12 @@ static void printAsCString(raw_ostream &O, const ConstantArray *CVA, /// void AsmPrinter::EmitString(const ConstantArray *CVA) const { unsigned NumElts = CVA->getNumOperands(); - if (TAI->getAscizDirective() && NumElts && + if (MAI->getAscizDirective() && NumElts && cast(CVA->getOperand(NumElts-1))->getZExtValue() == 0) { - O << TAI->getAscizDirective(); + O << MAI->getAscizDirective(); printAsCString(O, CVA, NumElts-1); } else { - O << TAI->getAsciiDirective(); + O << MAI->getAsciiDirective(); printAsCString(O, CVA, NumElts); } O << '\n'; @@ -1053,48 +1004,65 @@ void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace) { // FP Constants are printed as integer constants to avoid losing // precision... + LLVMContext &Context = CFP->getContext(); const TargetData *TD = TM.getTargetData(); - if (CFP->getType() == Type::DoubleTy) { + if (CFP->getType()->isDoubleTy()) { double Val = CFP->getValueAPF().convertToDouble(); // for comment only uint64_t i = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - if (TAI->getData64bitsDirective(AddrSpace)) { - O << TAI->getData64bitsDirective(AddrSpace) << i; - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " double value: " << Val; + if (MAI->getData64bitsDirective(AddrSpace)) { + O << MAI->getData64bitsDirective(AddrSpace) << i; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " double " << Val; + } O << '\n'; } else if (TD->isBigEndian()) { - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " double most significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant word of double " << Val; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " double least significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant word of double " << Val; + } O << '\n'; } else { - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " double least significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant word of double " << Val; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " double most significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(i >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant word of double " << Val; + } O << '\n'; } return; - } else if (CFP->getType() == Type::FloatTy) { + } + + if (CFP->getType()->isFloatTy()) { float Val = CFP->getValueAPF().convertToFloat(); // for comment only - O << TAI->getData32bitsDirective(AddrSpace) + O << MAI->getData32bitsDirective(AddrSpace) << CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " float " << Val; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " float " << Val; + } O << '\n'; return; - } else if (CFP->getType() == Type::X86_FP80Ty) { + } + + if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); @@ -1105,110 +1073,148 @@ void AsmPrinter::EmitGlobalConstantFP(const ConstantFP *CFP, DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); if (TD->isBigEndian()) { - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double most significant halfword of ~" + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant halfword of x86_fp80 ~" << DoubleVal.convertToDouble(); + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double least significant halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant halfword"; + } O << '\n'; } else { - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double least significant halfword of ~" + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant halfword of x86_fp80 ~" << DoubleVal.convertToDouble(); + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 16); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[0] >> 48); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next halfword"; + } O << '\n'; - O << TAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double most significant halfword"; + O << MAI->getData16bitsDirective(AddrSpace) << uint16_t(p[1]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant halfword"; + } O << '\n'; } - EmitZeros(TD->getTypeAllocSize(Type::X86_FP80Ty) - - TD->getTypeStoreSize(Type::X86_FP80Ty), AddrSpace); + EmitZeros(TD->getTypeAllocSize(Type::getX86_FP80Ty(Context)) - + TD->getTypeStoreSize(Type::getX86_FP80Ty(Context)), AddrSpace); return; - } else if (CFP->getType() == Type::PPC_FP128Ty) { + } + + if (CFP->getType()->isPPC_FP128Ty()) { // all long double variants are printed as hex // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); if (TD->isBigEndian()) { - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double most significant word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant word of ppc_fp128"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next word"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next word"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double least significant word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant word"; + } O << '\n'; } else { - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double least significant word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant word of ppc_fp128"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[1] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next word"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double next word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0]); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " next word"; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " long double most significant word"; + O << MAI->getData32bitsDirective(AddrSpace) << uint32_t(p[0] >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant word"; + } O << '\n'; } return; - } else assert(0 && "Floating point constant type not handled"); + } else llvm_unreachable("Floating point constant type not handled"); } void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, @@ -1229,29 +1235,37 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI, else Val = RawData[i]; - if (TAI->getData64bitsDirective(AddrSpace)) - O << TAI->getData64bitsDirective(AddrSpace) << Val << '\n'; + if (MAI->getData64bitsDirective(AddrSpace)) + O << MAI->getData64bitsDirective(AddrSpace) << Val << '\n'; else if (TD->isBigEndian()) { - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " Double-word most significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant half of i64 " << Val; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " Double-word least significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant half of i64 " << Val; + } O << '\n'; } else { - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " Double-word least significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " least significant half of i64 " << Val; + } O << '\n'; - O << TAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); - if (VerboseAsm) - O << '\t' << TAI->getCommentString() - << " Double-word most significant word " << Val; + O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() + << " most significant half of i64 " << Val; + } O << '\n'; } } @@ -1292,7 +1306,8 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { if (const ConstantInt *CI = dyn_cast(CV)) { SmallString<40> S; CI->getValue().toStringUnsigned(S, 16); - O << "\t\t\t" << TAI->getCommentString() << " 0x" << S.c_str(); + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " 0x" << S.str(); } } O << '\n'; @@ -1300,7 +1315,7 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { // Target doesn't support this yet! - abort(); + llvm_unreachable("Target does not support EmitMachineConstantPoolValue"); } /// PrintSpecial - Print information related to the specified machine instr @@ -1311,10 +1326,10 @@ void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { /// for their own strange codes. void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const { if (!strcmp(Code, "private")) { - O << TAI->getPrivateGlobalPrefix(); + O << MAI->getPrivateGlobalPrefix(); } else if (!strcmp(Code, "comment")) { if (VerboseAsm) - O << TAI->getCommentString(); + O << MAI->getCommentString(); } else if (!strcmp(Code, "uid")) { // Comparing the address of MI isn't sufficient, because machineinstrs may // be allocated to the same address across functions. @@ -1328,23 +1343,38 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const { } O << Counter; } else { - cerr << "Unknown special formatter '" << Code + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Unknown special formatter '" << Code << "' for machine instr: " << *MI; - exit(1); + llvm_report_error(Msg.str()); } } /// processDebugLoc - Processes the debug information of each machine /// instruction's DebugLoc. -void AsmPrinter::processDebugLoc(DebugLoc DL) { - if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { +void AsmPrinter::processDebugLoc(const MachineInstr *MI, + bool BeforePrintingInsn) { + if (!MAI || !DW) + return; + DebugLoc DL = MI->getDebugLoc(); + if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) { if (!DL.isUnknown()) { DebugLocTuple CurDLT = MF->getDebugLocTuple(DL); - - if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT) - printLabel(DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, - DICompileUnit(CurDLT.CompileUnit))); - + if (BeforePrintingInsn) { + if (CurDLT.Scope != 0 && PrevDLT != CurDLT) { + unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col, + CurDLT.Scope); + printLabel(L); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + DW->SetDbgScopeBeginLabels(MI, L); +#endif + } else { +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + DW->SetDbgScopeEndLabels(MI, 0); +#endif + } + } PrevDLT = CurDLT; } } @@ -1369,14 +1399,15 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { // If this asmstr is empty, just print the #APP/#NOAPP markers. // These are useful to see where empty asm's wound up. if (AsmStr[0] == 0) { - O << TAI->getInlineAsmStart() << "\n\t" << TAI->getInlineAsmEnd() << '\n'; + O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t"; + O << MAI->getCommentString() << MAI->getInlineAsmEnd() << '\n'; return; } - O << TAI->getInlineAsmStart() << "\n\t"; + O << MAI->getCommentString() << MAI->getInlineAsmStart() << "\n\t"; // The variant of the current asmprinter. - int AsmPrinterVariant = TAI->getAssemblerDialect(); + int AsmPrinterVariant = MAI->getAssemblerDialect(); int CurVariant = -1; // The number of the {.|.|.} region we are in. const char *LastEmitted = AsmStr; // One past the last character emitted. @@ -1413,9 +1444,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { case '(': // $( -> same as GCC's { character. ++LastEmitted; // Consume '(' character. if (CurVariant != -1) { - cerr << "Nested variants found in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Nested variants found in inline asm string: '" + + std::string(AsmStr) + "'"); } CurVariant = 0; // We're in the first variant now. break; @@ -1450,9 +1480,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { const char *StrStart = LastEmitted; const char *StrEnd = strchr(StrStart, '}'); if (StrEnd == 0) { - cerr << "Unterminated ${:foo} operand in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Unterminated ${:foo} operand in inline asm string: '" + + std::string(AsmStr) + "'"); } std::string Val(StrStart, StrEnd); @@ -1466,9 +1495,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { errno = 0; long Val = strtol(IDStart, &IDEnd, 10); // We only accept numbers for IDs. if (!isdigit(*IDStart) || (Val == 0 && errno == EINVAL)) { - cerr << "Bad $ operand number in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Bad $ operand number in inline asm string: '" + + std::string(AsmStr) + "'"); } LastEmitted = IDEnd; @@ -1480,9 +1508,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { if (*LastEmitted == ':') { ++LastEmitted; // Consume ':' character. if (*LastEmitted == 0) { - cerr << "Bad ${:} expression in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Bad ${:} expression in inline asm string: '" + + std::string(AsmStr) + "'"); } Modifier[0] = *LastEmitted; @@ -1490,17 +1517,15 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { } if (*LastEmitted != '}') { - cerr << "Bad ${} expression in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Bad ${} expression in inline asm string: '" + + std::string(AsmStr) + "'"); } ++LastEmitted; // Consume '}' character. } if ((unsigned)Val >= NumOperands-1) { - cerr << "Invalid $ operand number in inline asm string: '" - << AsmStr << "'\n"; - exit(1); + llvm_report_error("Invalid $ operand number in inline asm string: '" + + std::string(AsmStr) + "'"); } // Okay, we finally have a value number. Ask the target to print this @@ -1524,8 +1549,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { ++OpNo; // Skip over the ID number. if (Modifier[0]=='l') // labels are target independent - printBasicBlockLabel(MI->getOperand(OpNo).getMBB(), - false, false, false); + GetMBBSymbol(MI->getOperand(OpNo).getMBB() + ->getNumber())->print(O, MAI); else { AsmPrinter *AP = const_cast(this); if ((OpFlags & 7) == 4) { @@ -1538,25 +1563,28 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const { } } if (Error) { - cerr << "Invalid operand found in inline asm: '" + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Invalid operand found in inline asm: '" << AsmStr << "'\n"; - MI->dump(); - exit(1); + MI->print(Msg); + llvm_report_error(Msg.str()); } } break; } } } - O << "\n\t" << TAI->getInlineAsmEnd() << '\n'; + O << "\n\t" << MAI->getCommentString() << MAI->getInlineAsmEnd(); } /// printImplicitDef - This method prints the specified machine instruction /// that is an implicit def. void AsmPrinter::printImplicitDef(const MachineInstr *MI) const { - if (VerboseAsm) - O << '\t' << TAI->getCommentString() << " implicit-def: " - << TRI->getAsmName(MI->getOperand(0).getReg()) << '\n'; + if (!VerboseAsm) return; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " implicit-def: " + << TRI->getName(MI->getOperand(0).getReg()); } /// printLabel - This method prints a local label used by debug and @@ -1566,17 +1594,7 @@ void AsmPrinter::printLabel(const MachineInstr *MI) const { } void AsmPrinter::printLabel(unsigned Id) const { - O << TAI->getPrivateGlobalPrefix() << "label" << Id << ":\n"; -} - -/// printDeclare - This method prints a local variable declaration used by -/// debug tables. -/// FIXME: It doesn't really print anything rather it inserts a DebugVariable -/// entry into dwarf table. -void AsmPrinter::printDeclare(const MachineInstr *MI) const { - unsigned FI = MI->getOperand(0).getIndex(); - GlobalValue *GV = MI->getOperand(1).getGlobal(); - DW->RecordVariable(cast(GV), FI, MI); + O << MAI->getPrivateGlobalPrefix() << "label" << Id << ':'; } /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM @@ -1595,51 +1613,69 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } -/// printBasicBlockLabel - This method prints the label for the specified -/// MachineBasicBlock -void AsmPrinter::printBasicBlockLabel(const MachineBasicBlock *MBB, - bool printAlign, - bool printColon, - bool printComment) const { - if (printAlign) { - unsigned Align = MBB->getAlignment(); - if (Align) - EmitAlignment(Log2_32(Align)); - } +MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const { + SmallString<60> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BB" + << getFunctionNumber() << '_' << MBBID; + + return OutContext.GetOrCreateSymbol(Name.str()); +} + - O << TAI->getPrivateGlobalPrefix() << "BB" << getFunctionNumber() << '_' - << MBB->getNumber(); - if (printColon) +/// EmitBasicBlockStart - This method prints the label for the specified +/// MachineBasicBlock, an alignment (if present) and a comment describing +/// it if appropriate. +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { + if (unsigned Align = MBB->getAlignment()) + EmitAlignment(Log2_32(Align)); + + if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) { + if (VerboseAsm) + O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':'; + } else { + GetMBBSymbol(MBB->getNumber())->print(O, MAI); O << ':'; - if (printComment && MBB->getBasicBlock()) - O << '\t' << TAI->getCommentString() << ' ' - << MBB->getBasicBlock()->getNameStart(); + if (!VerboseAsm) + O << '\n'; + } + + if (VerboseAsm) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, BB, /*PrintType=*/false); + } + + EmitComments(*MBB); + O << '\n'; + } } /// printPICJumpTableSetLabel - This method prints a set label for the /// specified MachineBasicBlock for a jumptable entry. void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, const MachineBasicBlock *MBB) const { - if (!TAI->getSetDirective()) + if (!MAI->getSetDirective()) return; - O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; - printBasicBlockLabel(MBB, false, false, false); - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '\n'; } void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2, const MachineBasicBlock *MBB) const { - if (!TAI->getSetDirective()) + if (!MAI->getSetDirective()) return; - O << TAI->getSetDirective() << ' ' << TAI->getPrivateGlobalPrefix() + O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() << getFunctionNumber() << '_' << uid << '_' << uid2 << "_set_" << MBB->getNumber() << ','; - printBasicBlockLabel(MBB, false, false, false); - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2 << '\n'; } @@ -1648,73 +1684,51 @@ void AsmPrinter::printPICJumpTableSetLabel(unsigned uid, unsigned uid2, void AsmPrinter::printDataDirective(const Type *type, unsigned AddrSpace) { const TargetData *TD = TM.getTargetData(); switch (type->getTypeID()) { + case Type::FloatTyID: case Type::DoubleTyID: + case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: + assert(0 && "Should have already output floating point constant."); + default: + assert(0 && "Can't handle printing this type of thing"); case Type::IntegerTyID: { unsigned BitWidth = cast(type)->getBitWidth(); if (BitWidth <= 8) - O << TAI->getData8bitsDirective(AddrSpace); + O << MAI->getData8bitsDirective(AddrSpace); else if (BitWidth <= 16) - O << TAI->getData16bitsDirective(AddrSpace); + O << MAI->getData16bitsDirective(AddrSpace); else if (BitWidth <= 32) - O << TAI->getData32bitsDirective(AddrSpace); + O << MAI->getData32bitsDirective(AddrSpace); else if (BitWidth <= 64) { - assert(TAI->getData64bitsDirective(AddrSpace) && + assert(MAI->getData64bitsDirective(AddrSpace) && "Target cannot handle 64-bit constant exprs!"); - O << TAI->getData64bitsDirective(AddrSpace); + O << MAI->getData64bitsDirective(AddrSpace); } else { - assert(0 && "Target cannot handle given data directive width!"); + llvm_unreachable("Target cannot handle given data directive width!"); } break; } case Type::PointerTyID: if (TD->getPointerSize() == 8) { - assert(TAI->getData64bitsDirective(AddrSpace) && + assert(MAI->getData64bitsDirective(AddrSpace) && "Target cannot handle 64-bit pointer exprs!"); - O << TAI->getData64bitsDirective(AddrSpace); + O << MAI->getData64bitsDirective(AddrSpace); } else if (TD->getPointerSize() == 2) { - O << TAI->getData16bitsDirective(AddrSpace); + O << MAI->getData16bitsDirective(AddrSpace); } else if (TD->getPointerSize() == 1) { - O << TAI->getData8bitsDirective(AddrSpace); + O << MAI->getData8bitsDirective(AddrSpace); } else { - O << TAI->getData32bitsDirective(AddrSpace); + O << MAI->getData32bitsDirective(AddrSpace); } break; - case Type::FloatTyID: case Type::DoubleTyID: - case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - assert (0 && "Should have already output floating point constant."); - default: - assert (0 && "Can't handle printing this type of thing"); - break; } } -void AsmPrinter::printSuffixedName(const char *Name, const char *Suffix, - const char *Prefix) { - if (Name[0]=='\"') - O << '\"'; - O << TAI->getPrivateGlobalPrefix(); - if (Prefix) O << Prefix; - if (Name[0]=='\"') - O << '\"'; - if (Name[0]=='\"') - O << Name[1]; - else - O << Name; - O << Suffix; - if (Name[0]=='\"') - O << '\"'; -} - -void AsmPrinter::printSuffixedName(const std::string &Name, const char* Suffix) { - printSuffixedName(Name.c_str(), Suffix); -} - void AsmPrinter::printVisibility(const std::string& Name, unsigned Visibility) const { if (Visibility == GlobalValue::HiddenVisibility) { - if (const char *Directive = TAI->getHiddenDirective()) + if (const char *Directive = MAI->getHiddenDirective()) O << Directive << Name << '\n'; } else if (Visibility == GlobalValue::ProtectedVisibility) { - if (const char *Directive = TAI->getProtectedDirective()) + if (const char *Directive = MAI->getProtectedDirective()) O << Directive << Name << '\n'; } } @@ -1746,6 +1760,104 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { return GMP; } - cerr << "no GCMetadataPrinter registered for GC: " << Name << "\n"; - abort(); + errs() << "no GCMetadataPrinter registered for GC: " << Name << "\n"; + llvm_unreachable(0); +} + +/// EmitComments - Pretty-print comments for instructions +void AsmPrinter::EmitComments(const MachineInstr &MI) const { + assert(VerboseAsm && !MI.getDebugLoc().isUnknown()); + + DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc()); + + // Print source line info. + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " SrcLine "; + if (DLT.Scope) { + DICompileUnit CU(DLT.Scope); + if (!CU.isNull()) + O << CU.getFilename() << " "; + } + O << DLT.Line; + if (DLT.Col != 0) + O << ":" << DLT.Col; +} + +/// PrintChildLoopComment - Print comments about child loops within +/// the loop for this basic block, with nesting. +/// +static void PrintChildLoopComment(formatted_raw_ostream &O, + const MachineLoop *loop, + const MCAsmInfo *MAI, + int FunctionNumber) { + // Add child loop information + for(MachineLoop::iterator cl = loop->begin(), + clend = loop->end(); + cl != clend; + ++cl) { + MachineBasicBlock *Header = (*cl)->getHeader(); + assert(Header && "No header for loop"); + + O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + + O << MAI->getCommentString(); + O.indent(((*cl)->getLoopDepth()-1)*2) + << " Child Loop BB" << FunctionNumber << "_" + << Header->getNumber() << " Depth " << (*cl)->getLoopDepth(); + + PrintChildLoopComment(O, *cl, MAI, FunctionNumber); + } +} + +/// EmitComments - Pretty-print comments for basic blocks +void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const +{ + if (VerboseAsm) { + // Add loop depth information + const MachineLoop *loop = LI->getLoopFor(&MBB); + + if (loop) { + // Print a newline after bb# annotation. + O << "\n"; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Loop Depth " << loop->getLoopDepth() + << '\n'; + + O.PadToColumn(MAI->getCommentColumn()); + + MachineBasicBlock *Header = loop->getHeader(); + assert(Header && "No header for loop"); + + if (Header == &MBB) { + O << MAI->getCommentString() << " Loop Header"; + PrintChildLoopComment(O, loop, MAI, getFunctionNumber()); + } + else { + O << MAI->getCommentString() << " Loop Header is BB" + << getFunctionNumber() << "_" << loop->getHeader()->getNumber(); + } + + if (loop->empty()) { + O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << " Inner Loop"; + } + + // Add parent loop information + for (const MachineLoop *CurLoop = loop->getParentLoop(); + CurLoop; + CurLoop = CurLoop->getParentLoop()) { + MachineBasicBlock *Header = CurLoop->getHeader(); + assert(Header && "No header for loop"); + + O << '\n'; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString(); + O.indent((CurLoop->getLoopDepth()-1)*2) + << " Inside Loop BB" << getFunctionNumber() << "_" + << Header->getNumber() << " Depth " << CurLoop->getLoopDepth(); + } + } + } } diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 01c431c849a3c..ecf00077fc314 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -14,9 +14,10 @@ #include "DIE.h" #include "DwarfPrinter.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" -#include +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -75,24 +76,24 @@ void DIEAbbrev::Emit(const AsmPrinter *Asm) const { } #ifndef NDEBUG -void DIEAbbrev::print(std::ostream &O) { +void DIEAbbrev::print(raw_ostream &O) { O << "Abbreviation @" - << std::hex << (intptr_t)this << std::dec + << format("0x%lx", (long)(intptr_t)this) << " " << dwarf::TagString(Tag) << " " << dwarf::ChildrenString(ChildrenFlag) - << "\n"; + << '\n'; for (unsigned i = 0, N = Data.size(); i < N; ++i) { O << " " << dwarf::AttributeString(Data[i].getAttribute()) << " " << dwarf::FormEncodingString(Data[i].getForm()) - << "\n"; + << '\n'; } } -void DIEAbbrev::dump() { print(cerr); } +void DIEAbbrev::dump() { print(errs()); } #endif //===----------------------------------------------------------------------===// @@ -125,7 +126,7 @@ void DIE::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIE::print(std::ostream &O, unsigned IncIndent) { +void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; const std::string Indent(IndentCount, ' '); bool isBlock = Abbrev.getTag() == 0; @@ -133,7 +134,7 @@ void DIE::print(std::ostream &O, unsigned IncIndent) { if (!isBlock) { O << Indent << "Die: " - << "0x" << std::hex << (intptr_t)this << std::dec + << format("0x%lx", (long)(intptr_t)this) << ", Offset: " << Offset << ", Size: " << Size << "\n"; @@ -175,14 +176,14 @@ void DIE::print(std::ostream &O, unsigned IncIndent) { } void DIE::dump() { - print(cerr); + print(errs()); } #endif #ifndef NDEBUG void DIEValue::dump() { - print(cerr); + print(errs()); } #endif @@ -206,7 +207,7 @@ void DIEInteger::EmitValue(Dwarf *D, unsigned Form) const { case dwarf::DW_FORM_data8: Asm->EmitInt64(Integer); break; case dwarf::DW_FORM_udata: Asm->EmitULEB128Bytes(Integer); break; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128Bytes(Integer); break; - default: assert(0 && "DIE Value form not supported yet"); break; + default: llvm_unreachable("DIE Value form not supported yet"); } } @@ -223,9 +224,9 @@ unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const { case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru case dwarf::DW_FORM_data8: return sizeof(int64_t); - case dwarf::DW_FORM_udata: return TargetAsmInfo::getULEB128Size(Integer); - case dwarf::DW_FORM_sdata: return TargetAsmInfo::getSLEB128Size(Integer); - default: assert(0 && "DIE Value form not supported yet"); break; + case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + default: llvm_unreachable("DIE Value form not supported yet"); break; } return 0; } @@ -241,9 +242,9 @@ void DIEInteger::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEInteger::print(std::ostream &O) { +void DIEInteger::print(raw_ostream &O) { O << "Int: " << (int64_t)Integer - << " 0x" << std::hex << Integer << std::dec; + << format(" 0x%llx", (unsigned long long)Integer); } #endif @@ -268,7 +269,7 @@ void DIEString::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEString::print(std::ostream &O) { +void DIEString::print(raw_ostream &O) { O << "Str: \"" << Str << "\""; } #endif @@ -302,7 +303,7 @@ void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEDwarfLabel::print(std::ostream &O) { +void DIEDwarfLabel::print(raw_ostream &O) { O << "Lbl: "; Label.print(O); } @@ -337,7 +338,7 @@ void DIEObjectLabel::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEObjectLabel::print(std::ostream &O) { +void DIEObjectLabel::print(raw_ostream &O) { O << "Obj: " << Label; } #endif @@ -377,7 +378,7 @@ void DIESectionOffset::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIESectionOffset::print(std::ostream &O) { +void DIESectionOffset::print(raw_ostream &O) { O << "Off: "; Label.print(O); O << "-"; @@ -417,7 +418,7 @@ void DIEDelta::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEDelta::print(std::ostream &O) { +void DIEDelta::print(raw_ostream &O) { O << "Del: "; LabelHi.print(O); O << "-"; @@ -451,8 +452,8 @@ void DIEEntry::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEEntry::print(std::ostream &O) { - O << "Die: 0x" << std::hex << (intptr_t)Entry << std::dec; +void DIEEntry::print(raw_ostream &O) { + O << format("Die: 0x%lx", (long)(intptr_t)Entry); } #endif @@ -481,7 +482,7 @@ void DIEBlock::EmitValue(Dwarf *D, unsigned Form) const { case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; case dwarf::DW_FORM_block: Asm->EmitULEB128Bytes(Size); break; - default: assert(0 && "Improper form for block"); break; + default: llvm_unreachable("Improper form for block"); break; } const SmallVector &AbbrevData = Abbrev.getData(); @@ -498,8 +499,8 @@ unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); - case dwarf::DW_FORM_block: return Size + TargetAsmInfo::getULEB128Size(Size); - default: assert(0 && "Improper form for block"); break; + case dwarf::DW_FORM_block: return Size + MCAsmInfo::getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); break; } return 0; } @@ -510,7 +511,7 @@ void DIEBlock::Profile(FoldingSetNodeID &ID) { } #ifndef NDEBUG -void DIEBlock::print(std::ostream &O) { +void DIEBlock::print(raw_ostream &O) { O << "Blk: "; DIE::print(O, 5); } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 5b60327f9036c..62b51ecd18ac6 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -19,8 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/raw_ostream.h" -#include +#include namespace llvm { class AsmPrinter; @@ -103,10 +102,7 @@ namespace llvm { void Emit(const AsmPrinter *Asm) const; #ifndef NDEBUG - void print(std::ostream *O) { - if (O) print(*O); - } - void print(std::ostream &O); + void print(raw_ostream &O); void dump(); #endif }; @@ -198,10 +194,7 @@ namespace llvm { void Profile(FoldingSetNodeID &ID) ; #ifndef NDEBUG - void print(std::ostream *O, unsigned IncIndent = 0) { - if (O) print(*O, IncIndent); - } - void print(std::ostream &O, unsigned IncIndent = 0); + void print(raw_ostream &O, unsigned IncIndent = 0); void dump(); #endif }; @@ -248,10 +241,7 @@ namespace llvm { static bool classof(const DIEValue *) { return true; } #ifndef NDEBUG - void print(std::ostream *O) { - if (O) print(*O); - } - virtual void print(std::ostream &O) = 0; + virtual void print(raw_ostream &O) = 0; void dump(); #endif }; @@ -297,7 +287,7 @@ namespace llvm { static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -329,7 +319,7 @@ namespace llvm { static bool classof(const DIEValue *S) { return S->getType() == isString; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -359,7 +349,7 @@ namespace llvm { static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -392,7 +382,7 @@ namespace llvm { } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -431,7 +421,7 @@ namespace llvm { } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -464,7 +454,7 @@ namespace llvm { static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -500,7 +490,7 @@ namespace llvm { static bool classof(const DIEValue *E) { return E->getType() == isEntry; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; @@ -544,7 +534,7 @@ namespace llvm { static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG - virtual void print(std::ostream &O); + virtual void print(raw_ostream &O); #endif }; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 547140fa52174..4394ec08ef227 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -10,16 +10,24 @@ // This file contains support for writing dwarf debug info into asm files. // //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "llvm/Module.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Support/Timer.h" -#include "llvm/System/Path.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/Timer.h" +#include "llvm/System/Path.h" using namespace llvm; static TimerGroup &getDwarfTimerGroup() { @@ -51,11 +59,13 @@ class VISIBILITY_HIDDEN CompileUnit { /// GVToDieMap - Tracks the mapping of unit level debug informaton /// variables to debug information entries. - std::map GVToDieMap; + /// FIXME : Rename GVToDieMap -> NodeToDieMap + std::map GVToDieMap; /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton /// descriptors to debug information entries using a DIEEntry proxy. - std::map GVToDIEEntryMap; + /// FIXME : Rename + std::map GVToDIEEntryMap; /// Globals - A map of globally visible named entities for this unit. /// @@ -84,12 +94,12 @@ public: /// getDieMapSlotFor - Returns the debug information entry map slot for the /// specified debug variable. - DIE *&getDieMapSlotFor(GlobalVariable *GV) { return GVToDieMap[GV]; } + DIE *&getDieMapSlotFor(MDNode *N) { return GVToDieMap[N]; } - /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for the - /// specified debug variable. - DIEEntry *&getDIEEntrySlotFor(GlobalVariable *GV) { - return GVToDIEEntryMap[GV]; + /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for + /// the specified debug variable. + DIEEntry *&getDIEEntrySlotFor(MDNode *N) { + return GVToDIEEntryMap[N]; } /// AddDie - Adds or interns the DIE to the compile unit. @@ -138,15 +148,18 @@ class VISIBILITY_HIDDEN DbgScope { // Either subprogram or block. unsigned StartLabelID; // Label ID of the beginning of scope. unsigned EndLabelID; // Label ID of the end of scope. + const MachineInstr *LastInsn; // Last instruction of this scope. + const MachineInstr *FirstInsn; // First instruction of this scope. SmallVector Scopes; // Scopes defined in scope. SmallVector Variables;// Variables declared in scope. SmallVector ConcreteInsts;// Concrete insts of funcs. - + // Private state for dump() mutable unsigned IndentLevel; public: DbgScope(DbgScope *P, DIDescriptor D) - : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), IndentLevel(0) {} + : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), LastInsn(0), + FirstInsn(0), IndentLevel(0) {} virtual ~DbgScope(); // Accessors. @@ -159,7 +172,10 @@ public: SmallVector &getConcreteInsts() { return ConcreteInsts; } void setStartLabelID(unsigned S) { StartLabelID = S; } void setEndLabelID(unsigned E) { EndLabelID = E; } - + void setLastInsn(const MachineInstr *MI) { LastInsn = MI; } + const MachineInstr *getLastInsn() { return LastInsn; } + void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; } + const MachineInstr *getFirstInsn() { return FirstInsn; } /// AddScope - Add a scope to the scope. /// void AddScope(DbgScope *S) { Scopes.push_back(S); } @@ -172,6 +188,21 @@ public: /// void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); } + void FixInstructionMarkers() { + assert (getFirstInsn() && "First instruction is missing!"); + if (getLastInsn()) + return; + + // If a scope does not have an instruction to mark an end then use + // the end of last child scope. + SmallVector &Scopes = getScopes(); + assert (!Scopes.empty() && "Inner most scope does not have last insn!"); + DbgScope *L = Scopes.back(); + if (!L->getLastInsn()) + L->FixInstructionMarkers(); + setLastInsn(L->getLastInsn()); + } + #ifndef NDEBUG void dump() const; #endif @@ -179,10 +210,10 @@ public: #ifndef NDEBUG void DbgScope::dump() const { - std::string Indent(IndentLevel, ' '); - - cerr << Indent; Desc.dump(); - cerr << " [" << StartLabelID << ", " << EndLabelID << "]\n"; + raw_ostream &err = errs(); + err.indent(IndentLevel); + Desc.dump(); + err << " [" << StartLabelID << ", " << EndLabelID << "]\n"; IndentLevel += 2; @@ -220,10 +251,10 @@ DbgScope::~DbgScope() { } // end llvm namespace -DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T) +DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T) : Dwarf(OS, A, T, "dbg"), ModuleCU(0), AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(), - ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionMap(), + ValuesSet(InitValuesSetSize), Values(), StringPool(), SectionSourceLines(), didInitial(false), shouldEmit(false), FunctionDbgScope(0), DebugTimer(0) { if (TimePassesIsEnabled) @@ -234,7 +265,7 @@ DwarfDebug::~DwarfDebug() { for (unsigned j = 0, M = Values.size(); j < M; ++j) delete Values[j]; - for (DenseMap::iterator + for (DenseMap::iterator I = AbstractInstanceRootMap.begin(), E = AbstractInstanceRootMap.end(); I != E;++I) delete I->second; @@ -479,6 +510,27 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) { AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } + +/// AddSourceLine - Add location information to specified debug information +/// entry. +void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) { + // If there is no compile unit specified, don't add a line #. + if (SP->getCompileUnit().isNull()) + return; + // If the line number is 0, don't add it. + if (SP->getLineNumber() == 0) + return; + + + unsigned Line = SP->getLineNumber(); + unsigned FileID = FindCompileUnit(SP->getCompileUnit()).getID(); + assert(FileID && "Invalid file id"); + AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// AddSourceLine - Add location information to specified debug information +/// entry. void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) { // If there is no compile unit specified, don't add a line #. DICompileUnit CU = Ty->getCompileUnit(); @@ -492,6 +544,270 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) { AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } +/* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function AddBlockByrefType does this. */ + +/// Find the type the programmer originally declared the variable to be +/// and return that type. +/// +DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) { + + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty.getNode()); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType.getNode()); + + DIArray Elements = blockStruct.getTypeArray(); + + if (Elements.isNull()) + return Ty; + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element.getNode()); + if (strcmp(Name.c_str(), DT.getName()) == 0) + return (DT.getTypeDerivedFrom()); + } + + return Ty; +} + +/// AddComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DIVariable, starting from +/// the starting location. Add the DWARF information to the die. +/// +void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + const DIVariable &VD = DV->getVariable(); + DIType Ty = VD.getType(); + + // Decode the original location, and use that as the start of the byref + // variable's location. + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + } else { + Reg = Reg - dwarf::DW_OP_reg0; + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) { + uint64_t Element = VD.getAddrElement(i); + + if (Element == DIFactory::OpPlus) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + AddUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i)); + } else if (Element == DIFactory::OpDeref) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else llvm_unreachable("unknown DIFactory Opcode"); + } + + // Now attach the location information to the DIE. + AddBlock(Die, Attribute, 0, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function GetBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... + struct __Block_byref_x_VarName *forwarding; + ... + SomeType VarName; + ... + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst + DW_OP_deref + DW_OP_plus_uconst + + That is what this function does. */ + +/// AddBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + const DIVariable &VD = DV->getVariable(); + DIType Ty = VD.getType(); + DIType TmpTy = Ty; + unsigned Tag = Ty.getTag(); + bool isPointer = false; + + const char *varName = VD.getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty.getNode()); + TmpTy = DTy.getTypeDerivedFrom(); + isPointer = true; + } + + DICompositeType blockStruct = DICompositeType(TmpTy.getNode()); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDescriptor varField = DIDescriptor(); + DIDescriptor forwardingField = DIDescriptor(); + + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDescriptor Element = Fields.getElement(i); + DIDerivedType DT = DIDerivedType(Element.getNode()); + const char *fieldName = DT.getName(); + if (strcmp(fieldName, "__forwarding") == 0) + forwardingField = Element; + else if (strcmp(fieldName, varName) == 0) + varField = Element; + } + + assert(!varField.isNull() && "Can't find byref variable in Block struct"); + assert(!forwardingField.isNull() + && "Can't find forwarding field in Block struct"); + + // Get the offsets for the forwarding field and the variable field. + unsigned int forwardingFieldOffset = + DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3; + unsigned int varFieldOffset = + DIDerivedType(varField.getNode()).getOffsetInBits() >> 3; + + // Decode the original location, and use that as the start of the byref + // variable's location. + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + else { + Reg = Reg - dwarf::DW_OP_reg0; + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + AddUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + AddUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + AddBlock(Die, Attribute, 0, Block); +} + /// AddAddress - Add an address attribute to a die based on the location /// provided. void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute, @@ -526,7 +842,7 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { return; // Check for pre-existence. - DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getGV()); + DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getNode()); // If it exists then use the existing value. if (Slot) { @@ -539,20 +855,20 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { // Construct type. DIE Buffer(dwarf::DW_TAG_base_type); - if (Ty.isBasicType(Ty.getTag())) - ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getGV())); - else if (Ty.isDerivedType(Ty.getTag())) - ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getGV())); + if (Ty.isBasicType()) + ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getNode())); + else if (Ty.isCompositeType()) + ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getNode())); else { - assert(Ty.isCompositeType(Ty.getTag()) && "Unknown kind of DIType"); - ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getGV())); + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getNode())); } // Add debug information entry to entity and appropriate context. DIE *Die = NULL; DIDescriptor Context = Ty.getContext(); if (!Context.isNull()) - Die = DW_Unit->getDieMapSlotFor(Context.getGV()); + Die = DW_Unit->getDieMapSlotFor(Context.getNode()); if (Die) { DIE *Child = new DIE(Buffer); @@ -571,14 +887,13 @@ void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIBasicType BTy) { // Get core information. - std::string Name; - BTy.getName(Name); + const char *Name = BTy.getName(); Buffer.setTag(dwarf::DW_TAG_base_type); AddUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); // Add name if not anonymous or intermediate type. - if (!Name.empty()) + if (Name) AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); uint64_t Size = BTy.getSizeInBits() >> 3; AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -588,8 +903,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIDerivedType DTy) { // Get core information. - std::string Name; - DTy.getName(Name); + const char *Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; unsigned Tag = DTy.getTag(); @@ -603,7 +917,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, AddType(DW_Unit, &Buffer, FromTy); // Add name if not anonymous or intermediate type. - if (!Name.empty()) + if (Name) AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); // Add size if non-zero (derived types might be zero-sized.) @@ -619,8 +933,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DICompositeType CTy) { // Get core information. - std::string Name; - CTy.getName(Name); + const char *Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; unsigned Tag = CTy.getTag(); @@ -637,9 +950,11 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add enumerators to enumeration type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIE *ElemDie = NULL; - DIEnumerator Enum(Elements.getElement(i).getGV()); - ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum); - Buffer.AddChild(ElemDie); + DIEnumerator Enum(Elements.getElement(i).getNode()); + if (!Enum.isNull()) { + ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum); + Buffer.AddChild(ElemDie); + } } } break; @@ -647,7 +962,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add return type. DIArray Elements = CTy.getTypeArray(); DIDescriptor RTy = Elements.getElement(0); - AddType(DW_Unit, &Buffer, DIType(RTy.getGV())); + AddType(DW_Unit, &Buffer, DIType(RTy.getNode())); // Add prototype flag. AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); @@ -656,7 +971,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIDescriptor Ty = Elements.getElement(i); - AddType(DW_Unit, Arg, DIType(Ty.getGV())); + AddType(DW_Unit, Arg, DIType(Ty.getNode())); Buffer.AddChild(Arg); } } @@ -674,20 +989,19 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add elements to structure type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); + if (Element.isNull()) + continue; DIE *ElemDie = NULL; if (Element.getTag() == dwarf::DW_TAG_subprogram) ElemDie = CreateSubprogramDIE(DW_Unit, - DISubprogram(Element.getGV())); + DISubprogram(Element.getNode())); else ElemDie = CreateMemberDIE(DW_Unit, - DIDerivedType(Element.getGV())); + DIDerivedType(Element.getNode())); Buffer.AddChild(ElemDie); } - // FIXME: We'd like an API to register additional attributes for the - // frontend to use while synthesizing, and then we'd use that api in clang - // instead of this. - if (Name == "__block_literal_generic") + if (CTy.isAppleBlockExtension()) AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); unsigned RLang = CTy.getRunTimeLang(); @@ -701,7 +1015,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } // Add name if not anonymous or intermediate type. - if (!Name.empty()) + if (Name) AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); if (Tag == dwarf::DW_TAG_enumeration_type || @@ -729,12 +1043,11 @@ void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ int64_t H = SR.getHi(); DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - if (L != H) { - AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - if (L) - AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + if (L) + AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + if (H) AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); - } Buffer.AddChild(DW_Subrange); } @@ -761,15 +1074,14 @@ void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) - ConstructSubrangeDIE(Buffer, DISubrange(Element.getGV()), IndexTy); + ConstructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IndexTy); } } /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator. DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - std::string Name; - ETy->getName(Name); + const char *Name = ETy->getName(); AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); int64_t Value = ETy->getEnumValue(); AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); @@ -780,27 +1092,39 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit, const DIGlobalVariable &GV) { DIE *GVDie = new DIE(dwarf::DW_TAG_variable); - std::string Name; - GV.getDisplayName(Name); - AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - std::string LinkageName; - GV.getLinkageName(LinkageName); - if (!LinkageName.empty()) + AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); + + const char *LinkageName = GV.getLinkageName(); + if (LinkageName) { + // Skip special LLVM prefix that is used to inform the asm printer to not + // emit usual symbol prefix before the symbol name. This happens for + // Objective-C symbol names and symbol whose name is replaced using GCC's + // __asm__ attribute. + if (LinkageName[0] == 1) + LinkageName = &LinkageName[1]; AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, LinkageName); + } AddType(DW_Unit, GVDie, GV.getType()); if (!GV.isLocalToUnit()) AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); AddSourceLine(GVDie, &GV); + + // Add address. + DIEBlock *Block = new DIEBlock(); + AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getMangledName(GV.getGlobal())); + AddBlock(GVDie, dwarf::DW_AT_location, 0, Block); + return GVDie; } /// CreateMemberDIE - Create new member DIE. DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ DIE *MemberDie = new DIE(DT.getTag()); - std::string Name; - DT.getName(Name); - if (!Name.empty()) + if (const char *Name = DT.getName()) AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom()); @@ -849,17 +1173,19 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, bool IsInlined) { DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram); - std::string Name; - SP.getName(Name); + const char * Name = SP.getName(); AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - std::string LinkageName; - SP.getLinkageName(LinkageName); - - if (!LinkageName.empty()) + const char *LinkageName = SP.getLinkageName(); + if (LinkageName) { + // Skip special LLVM prefix that is used to inform the asm printer to not emit + // usual symbol prefix before the symbol name. This happens for Objective-C + // symbol names and symbol whose name is replaced using GCC's __asm__ attribute. + if (LinkageName[0] == 1) + LinkageName = &LinkageName[1]; AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, LinkageName); - + } AddSourceLine(SPDie, &SP); DICompositeType SPTy = SP.getType(); @@ -877,7 +1203,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) AddType(DW_Unit, SPDie, SPTy); else - AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getGV())); + AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode())); } if (!SP.isDefinition()) { @@ -888,7 +1214,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - AddType(DW_Unit, Arg, DIType(Args.getElement(i).getGV())); + AddType(DW_Unit, Arg, DIType(Args.getElement(i).getNode())); AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? SPDie->AddChild(Arg); } @@ -898,7 +1224,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); // DW_TAG_inlined_subroutine may refer to this DIE. - DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getGV()); + DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode()); Slot = SPDie; return SPDie; } @@ -907,7 +1233,7 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, /// CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const { DenseMap::const_iterator I = - CompileUnitMap.find(Unit.getGV()); + CompileUnitMap.find(Unit.getNode()); assert(I != CompileUnitMap.end() && "Missing compile unit."); return *I->second; } @@ -935,15 +1261,18 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { // Define variable debug information entry. DIE *VariableDie = new DIE(Tag); - std::string Name; - VD.getName(Name); + const char *Name = VD.getName(); AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); // Add source line info if available. AddSourceLine(VariableDie, &VD); // Add variable type. - AddType(Unit, VariableDie, VD.getType()); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead. + if (VD.isBlockByrefVariable()) + AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name)); + else + AddType(Unit, VariableDie, VD.getType()); // Add variable address. if (!DV->isInlinedFnVar()) { @@ -952,7 +1281,14 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { MachineLocation Location; Location.set(RI->getFrameRegister(*MF), RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); - AddAddress(VariableDie, dwarf::DW_AT_location, Location); + + + if (VD.hasComplexAddress()) + AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else if (VD.isBlockByrefVariable()) + AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else + AddAddress(VariableDie, dwarf::DW_AT_location, Location); } return VariableDie; @@ -960,26 +1296,64 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { /// getOrCreateScope - Returns the scope associated with the given descriptor. /// -DbgScope *DwarfDebug::getOrCreateScope(GlobalVariable *V) { - DbgScope *&Slot = DbgScopeMap[V]; +DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI) { + DbgScope *&Slot = DbgScopeMap[N]; + if (Slot) return Slot; + + DbgScope *Parent = NULL; + + DIDescriptor Scope(N); + if (Scope.isCompileUnit()) { + return NULL; + } else if (Scope.isSubprogram()) { + DISubprogram SP(N); + DIDescriptor ParentDesc = SP.getContext(); + if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit()) + Parent = getDbgScope(ParentDesc.getNode(), MI); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(N); + DIDescriptor ParentDesc = DB.getContext(); + if (!ParentDesc.isNull()) + Parent = getDbgScope(ParentDesc.getNode(), MI); + } else + assert (0 && "Unexpected scope info"); + + Slot = new DbgScope(Parent, DIDescriptor(N)); + Slot->setFirstInsn(MI); + + if (Parent) + Parent->AddScope(Slot); + else + // First function is top level function. + if (!FunctionDbgScope) + FunctionDbgScope = Slot; + + return Slot; +} + + +/// getOrCreateScope - Returns the scope associated with the given descriptor. +/// FIXME - Remove this method. +DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) { + DbgScope *&Slot = DbgScopeMap[N]; if (Slot) return Slot; DbgScope *Parent = NULL; - DIBlock Block(V); + DILexicalBlock Block(N); // Don't create a new scope if we already created one for an inlined function. - DenseMap::iterator - II = AbstractInstanceRootMap.find(V); + DenseMap::iterator + II = AbstractInstanceRootMap.find(N); if (II != AbstractInstanceRootMap.end()) return LexicalScopeStack.back(); if (!Block.isNull()) { DIDescriptor ParentDesc = Block.getContext(); Parent = - ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getGV()); + ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getNode()); } - Slot = new DbgScope(Parent, DIDescriptor(V)); + Slot = new DbgScope(Parent, DIDescriptor(N)); if (Parent) Parent->AddScope(Slot); @@ -1088,10 +1462,14 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope, return; // Get the subprogram debug information entry. - DISubprogram SPD(Desc.getGV()); + DISubprogram SPD(Desc.getNode()); // Get the subprogram die. - DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getGV()); + DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); + if (!SPDie) { + ConstructSubprogram(SPD.getNode()); + SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode()); + } assert(SPDie && "Missing subprogram descriptor"); if (!AbstractScope) { @@ -1105,23 +1483,33 @@ void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope, } ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU); + // If there are global variables at this scope then add their dies. + for (SmallVector::iterator SGI = ScopedGVs.begin(), + SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { + MDNode *N = dyn_cast_or_null(*SGI); + if (!N) continue; + DIGlobalVariable GV(N); + if (GV.getContext().getNode() == RootScope->getDesc().getNode()) { + DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV); + SPDie->AddChild(ScopedGVDie); + } + } } /// ConstructDefaultDbgScope - Construct a default scope for the subprogram. /// void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) { - const char *FnName = MF->getFunction()->getNameStart(); StringMap &Globals = ModuleCU->getGlobals(); - StringMap::iterator GI = Globals.find(FnName); + StringMap::iterator GI = Globals.find(MF->getFunction()->getName()); if (GI != Globals.end()) { DIE *SPDie = GI->second; - + // Add the function bounds. AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, DWLabel("func_begin", SubprogramCount)); AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, DWLabel("func_end", SubprogramCount)); - + MachineLocation Location(RI->getFrameRegister(*MF)); AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); } @@ -1131,8 +1519,8 @@ void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) { /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName, - const std::string &FileName) { +unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName, + const char *FileName) { unsigned DId; StringMap::iterator DI = DirectoryIdMap.find(DirName); if (DI != DirectoryIdMap.end()) { @@ -1165,30 +1553,28 @@ unsigned DwarfDebug::GetOrCreateSourceID(const std::string &DirName, return SrcId; } -void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) { - DICompileUnit DIUnit(GV); - std::string Dir, FN, Prod; - unsigned ID = GetOrCreateSourceID(DIUnit.getDirectory(Dir), - DIUnit.getFilename(FN)); +void DwarfDebug::ConstructCompileUnit(MDNode *N) { + DICompileUnit DIUnit(N); + const char *FN = DIUnit.getFilename(); + const char *Dir = DIUnit.getDirectory(); + unsigned ID = GetOrCreateSourceID(Dir, FN); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, DWLabel("section_line", 0), DWLabel("section_line", 0), false); AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer(Prod)); + DIUnit.getProducer()); AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, DIUnit.getLanguage()); AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); - if (!Dir.empty()) + if (Dir) AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); if (DIUnit.isOptimized()) AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); - std::string Flags; - DIUnit.getFlags(Flags); - if (!Flags.empty()) + if (const char *Flags = DIUnit.getFlags()) AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); unsigned RVer = DIUnit.getRunTimeVersion(); @@ -1203,28 +1589,24 @@ void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) { ModuleCU = Unit; } - CompileUnitMap[DIUnit.getGV()] = Unit; + CompileUnitMap[DIUnit.getNode()] = Unit; CompileUnits.push_back(Unit); } -void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) { - DIGlobalVariable DI_GV(GV); +void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) { + DIGlobalVariable DI_GV(N); + + // If debug information is malformed then ignore it. + if (DI_GV.Verify() == false) + return; // Check for pre-existence. - DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getGV()); + DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getNode()); if (Slot) return; DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV); - // Add address. - DIEBlock *Block = new DIEBlock(); - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - std::string GLN; - AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->getGlobalLinkName(DI_GV.getGlobal(), GLN)); - AddBlock(VariableDie, dwarf::DW_AT_location, 0, Block); - // Add to map. Slot = VariableDie; @@ -1232,16 +1614,15 @@ void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) { ModuleCU->getDie()->AddChild(VariableDie); // Expose as global. FIXME - need to check external flag. - std::string Name; - ModuleCU->AddGlobal(DI_GV.getName(Name), VariableDie); + ModuleCU->AddGlobal(DI_GV.getName(), VariableDie); return; } -void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) { - DISubprogram SP(GV); +void DwarfDebug::ConstructSubprogram(MDNode *N) { + DISubprogram SP(N); // Check for pre-existence. - DIE *&Slot = ModuleCU->getDieMapSlotFor(GV); + DIE *&Slot = ModuleCU->getDieMapSlotFor(N); if (Slot) return; @@ -1259,28 +1640,25 @@ void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) { ModuleCU->getDie()->AddChild(SubprogramDie); // Expose as global. - std::string Name; - ModuleCU->AddGlobal(SP.getName(Name), SubprogramDie); + ModuleCU->AddGlobal(SP.getName(), SubprogramDie); return; } - /// BeginModule - Emit all Dwarf sections that should come prior to the - /// content. Create global DIEs and emit initial debug info sections. - /// This is inovked by the target AsmPrinter. +/// BeginModule - Emit all Dwarf sections that should come prior to the +/// content. Create global DIEs and emit initial debug info sections. +/// This is inovked by the target AsmPrinter. void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { this->M = M; if (TimePassesIsEnabled) DebugTimer->startTimer(); - SmallVector CUs; - SmallVector GVs; - SmallVector SPs; - CollectDebugInfoAnchors(*M, CUs, GVs, SPs); + DebugInfoFinder DbgFinder; + DbgFinder.processModule(*M); // Create all the compile unit DIEs. - for (SmallVector::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) ConstructCompileUnit(*I); if (CompileUnits.empty()) { @@ -1295,23 +1673,19 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { if (!ModuleCU) ModuleCU = CompileUnits[0]; - // If there is not any debug info available for any global variables and any - // subprograms then there is not any debug info to emit. - if (GVs.empty() && SPs.empty()) { - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return; - } - // Create DIEs for each of the externally visible global variables. - for (SmallVector::iterator I = GVs.begin(), - E = GVs.end(); I != E; ++I) - ConstructGlobalVariableDIE(*I); + for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), + E = DbgFinder.global_variable_end(); I != E; ++I) { + DIGlobalVariable GV(*I); + if (GV.getContext().getNode() != GV.getCompileUnit().getNode()) + ScopedGVs.push_back(*I); + else + ConstructGlobalVariableDIE(*I); + } // Create DIEs for each of the externally visible subprograms. - for (SmallVector::iterator I = SPs.begin(), - E = SPs.end(); I != E; ++I) + for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), + E = DbgFinder.subprogram_end(); I != E; ++I) ConstructSubprogram(*I); MMI = mmi; @@ -1319,11 +1693,11 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { MMI->setDebugInfoAvailability(true); // Prime section data. - SectionMap.insert(TAI->getTextSection()); + SectionMap.insert(Asm->getObjFileLowering().getTextSection()); // Print out .file directives to specify files for .loc directives. These are // printed out early so that they precede any .loc directives. - if (TAI->hasDotLocAndDotFile()) { + if (MAI->hasDotLocAndDotFile()) { for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) { // Remember source id starts at 1. std::pair Id = getSourceDirectoryAndFileIds(i); @@ -1332,7 +1706,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { FullPath.appendComponent(getSourceFileName(Id.second)); assert(AppendOk && "Could not append filename to directory!"); AppendOk = false; - Asm->EmitFile(i, FullPath.toString()); + Asm->EmitFile(i, FullPath.str()); Asm->EOL(); } } @@ -1347,21 +1721,21 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { /// EndModule - Emit all Dwarf sections that should come after the content. /// void DwarfDebug::EndModule() { - if (!ShouldEmitDwarfDebug()) + if (!ModuleCU) return; if (TimePassesIsEnabled) DebugTimer->startTimer(); // Standard sections final addresses. - Asm->SwitchToSection(TAI->getTextSection()); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); EmitLabel("text_end", 0); - Asm->SwitchToSection(TAI->getDataSection()); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection()); EmitLabel("data_end", 0); // End text sections. for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) { - Asm->SwitchToSection(SectionMap[i]); + Asm->OutStreamer.SwitchSection(SectionMap[i]); EmitLabel("section_end", i); } @@ -1410,6 +1784,135 @@ void DwarfDebug::EndModule() { DebugTimer->stopTimer(); } +/// CollectVariableInfo - Populate DbgScope entries with variables' info. +void DwarfDebug::CollectVariableInfo() { + if (!MMI) return; + MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); + for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), + VE = VMap.end(); VI != VE; ++VI) { + MetadataBase *MB = VI->first; + MDNode *Var = dyn_cast_or_null(MB); + DIVariable DV (Var); + if (DV.isNull()) continue; + unsigned VSlot = VI->second; + DbgScope *Scope = getDbgScope(DV.getContext().getNode(), NULL); + Scope->AddVariable(new DbgVariable(DV, VSlot, false)); + } +} + +/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that +/// start with this machine instruction. +void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) { + InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); + if (I == DbgScopeBeginMap.end()) + return; + SmallVector &SD = I->second; + for (SmallVector::iterator SDI = SD.begin(), SDE = SD.end(); + SDI != SDE; ++SDI) + (*SDI)->setStartLabelID(Label); +} + +/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that +/// end with this machine instruction. +void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) { + InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); + if (I == DbgScopeEndMap.end()) + return; + SmallVector &SD = I->second; + for (SmallVector::iterator SDI = SD.begin(), SDE = SD.end(); + SDI != SDE; ++SDI) + (*SDI)->setEndLabelID(Label); +} + +/// ExtractScopeInformation - Scan machine instructions in this function +/// and collect DbgScopes. Return true, if atleast one scope was found. +bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { + // If scope information was extracted using .dbg intrinsics then there is not + // any need to extract these information by scanning each instruction. + if (!DbgScopeMap.empty()) + return false; + + // Scan each instruction and create scopes. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *MInsn = II; + DebugLoc DL = MInsn->getDebugLoc(); + if (DL.isUnknown()) + continue; + DebugLocTuple DLT = MF->getDebugLocTuple(DL); + if (!DLT.Scope) + continue; + // There is no need to create another DIE for compile unit. For all + // other scopes, create one DbgScope now. This will be translated + // into a scope DIE at the end. + DIDescriptor D(DLT.Scope); + if (!D.isCompileUnit()) { + DbgScope *Scope = getDbgScope(DLT.Scope, MInsn); + Scope->setLastInsn(MInsn); + } + } + } + + // If a scope's last instruction is not set then use its child scope's + // last instruction as this scope's last instrunction. + for (DenseMap::iterator DI = DbgScopeMap.begin(), + DE = DbgScopeMap.end(); DI != DE; ++DI) { + assert (DI->second->getFirstInsn() && "Invalid first instruction!"); + DI->second->FixInstructionMarkers(); + assert (DI->second->getLastInsn() && "Invalid last instruction!"); + } + + // Each scope has first instruction and last instruction to mark beginning + // and end of a scope respectively. Create an inverse map that list scopes + // starts (and ends) with an instruction. One instruction may start (or end) + // multiple scopes. + for (DenseMap::iterator DI = DbgScopeMap.begin(), + DE = DbgScopeMap.end(); DI != DE; ++DI) { + DbgScope *S = DI->second; + assert (S && "DbgScope is missing!"); + const MachineInstr *MI = S->getFirstInsn(); + assert (MI && "DbgScope does not have first instruction!"); + + InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI); + if (IDI != DbgScopeBeginMap.end()) + IDI->second.push_back(S); + else + DbgScopeBeginMap.insert(std::make_pair(MI, + SmallVector(2, S))); + + MI = S->getLastInsn(); + assert (MI && "DbgScope does not have last instruction!"); + IDI = DbgScopeEndMap.find(MI); + if (IDI != DbgScopeEndMap.end()) + IDI->second.push_back(S); + else + DbgScopeEndMap.insert(std::make_pair(MI, + SmallVector(2, S))); + } + + return !DbgScopeMap.empty(); +} + +static DISubprogram getDISubprogram(MDNode *N) { + + DIDescriptor D(N); + if (D.isNull()) + return DISubprogram(); + + if (D.isCompileUnit()) + return DISubprogram(); + + if (D.isSubprogram()) + return DISubprogram(N); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(N).getContext().getNode()); + + llvm_unreachable("Unexpected Descriptor!"); +} + /// BeginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. void DwarfDebug::BeginFunction(MachineFunction *MF) { @@ -1420,6 +1923,12 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (!ExtractScopeInformation(MF)) + return; + CollectVariableInfo(); +#endif + // Begin accumulating function debug information. MMI->BeginFunction(MF); @@ -1428,14 +1937,28 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { // Emit label for the implicitly defined dbg.stoppoint at the start of the // function. +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN DebugLoc FDL = MF->getDefaultDebugLoc(); if (!FDL.isUnknown()) { DebugLocTuple DLT = MF->getDebugLocTuple(FDL); - unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, - DICompileUnit(DLT.CompileUnit)); + unsigned LabelID = 0; + DISubprogram SP = getDISubprogram(DLT.Scope); + if (!SP.isNull()) + LabelID = RecordSourceLine(SP.getLineNumber(), 0, DLT.Scope); + else + LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope); Asm->printLabel(LabelID); + O << '\n'; } - +#else + DebugLoc FDL = MF->getDefaultDebugLoc(); + if (!FDL.isUnknown()) { + DebugLocTuple DLT = MF->getDebugLocTuple(FDL); + unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope); + Asm->printLabel(LabelID); + O << '\n'; + } +#endif if (TimePassesIsEnabled) DebugTimer->stopTimer(); } @@ -1448,13 +1971,17 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + if (DbgScopeMap.empty()) + return; +#endif // Define end label for subprogram. EmitLabel("func_end", SubprogramCount); // Get function line info. if (!Lines.empty()) { // Get section line info. - unsigned ID = SectionMap.insert(Asm->CurrentSection_); + unsigned ID = SectionMap.insert(Asm->getCurrentSection()); if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID); std::vector &SectionLineInfos = SectionSourceLines[ID-1]; // Append the function info to section info. @@ -1489,9 +2016,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { if (FunctionDbgScope) { delete FunctionDbgScope; DbgScopeMap.clear(); + DbgScopeBeginMap.clear(); + DbgScopeEndMap.clear(); DbgAbstractScopeMap.clear(); DbgConcreteScopeMap.clear(); - InlinedVariableScopes.clear(); FunctionDbgScope = NULL; LexicalScopeStack.clear(); AbstractInstanceRootList.clear(); @@ -1507,32 +2035,34 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { /// RecordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. -unsigned DwarfDebug::RecordSourceLine(Value *V, unsigned Line, unsigned Col) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - CompileUnit *Unit = CompileUnitMap[V]; - assert(Unit && "Unable to find CompileUnit"); - unsigned ID = MMI->NextLabelID(); - Lines.push_back(SrcLineInfo(Line, Col, Unit->getID(), ID)); +unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, + MDNode *S) { + if (!MMI) + return 0; - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return ID; -} - -/// RecordSourceLine - Records location information and associates it with a -/// label. Returns a unique label ID used to generate a label and provide -/// correspondence to the source line list. -unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, - DICompileUnit CU) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - std::string Dir, Fn; - unsigned Src = GetOrCreateSourceID(CU.getDirectory(Dir), - CU.getFilename(Fn)); + const char *Dir = NULL; + const char *Fn = NULL; + + DIDescriptor Scope(S); + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Dir = CU.getDirectory(); + Fn = CU.getFilename(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Dir = SP.getDirectory(); + Fn = SP.getFilename(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Dir = DB.getDirectory(); + Fn = DB.getFilename(); + } else + assert (0 && "Unexpected scope info"); + + unsigned Src = GetOrCreateSourceID(Dir, Fn); unsigned ID = MMI->NextLabelID(); Lines.push_back(SrcLineInfo(Line, Col, Src, ID)); @@ -1552,7 +2082,7 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, if (TimePassesIsEnabled) DebugTimer->startTimer(); - unsigned SrcId = GetOrCreateSourceID(DirName, FileName); + unsigned SrcId = GetOrCreateSourceID(DirName.c_str(), FileName.c_str()); if (TimePassesIsEnabled) DebugTimer->stopTimer(); @@ -1561,11 +2091,11 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, } /// RecordRegionStart - Indicate the start of a region. -unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) { +unsigned DwarfDebug::RecordRegionStart(MDNode *N) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - DbgScope *Scope = getOrCreateScope(V); + DbgScope *Scope = getOrCreateScope(N); unsigned ID = MMI->NextLabelID(); if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID); LexicalScopeStack.push_back(Scope); @@ -1577,11 +2107,11 @@ unsigned DwarfDebug::RecordRegionStart(GlobalVariable *V) { } /// RecordRegionEnd - Indicate the end of a region. -unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) { +unsigned DwarfDebug::RecordRegionEnd(MDNode *N) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - DbgScope *Scope = getOrCreateScope(V); + DbgScope *Scope = getOrCreateScope(N); unsigned ID = MMI->NextLabelID(); Scope->setEndLabelID(ID); // FIXME : region.end() may not be in the last basic block. @@ -1598,62 +2128,36 @@ unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) { } /// RecordVariable - Indicate the declaration of a local variable. -void DwarfDebug::RecordVariable(GlobalVariable *GV, unsigned FrameIndex, - const MachineInstr *MI) { +void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - DIDescriptor Desc(GV); + DIDescriptor Desc(N); DbgScope *Scope = NULL; bool InlinedFnVar = false; - if (Desc.getTag() == dwarf::DW_TAG_variable) { - // GV is a global variable. - DIGlobalVariable DG(GV); - Scope = getOrCreateScope(DG.getContext().getGV()); - } else { - DenseMap::iterator - SI = InlinedVariableScopes.find(MI); - - if (SI != InlinedVariableScopes.end()) { - // or GV is an inlined local variable. - Scope = SI->second; - } else { - DIVariable DV(GV); - GlobalVariable *V = DV.getContext().getGV(); - - // FIXME: The code that checks for the inlined local variable is a hack! - DenseMap::iterator - AI = AbstractInstanceRootMap.find(V); - - if (AI != AbstractInstanceRootMap.end()) { - // This method is called each time a DECLARE node is encountered. For an - // inlined function, this could be many, many times. We don't want to - // re-add variables to that DIE for each time. We just want to add them - // once. Check to make sure that we haven't added them already. - DenseMap >::iterator - IP = InlinedParamMap.find(V); - - if (IP != InlinedParamMap.end() && IP->second.count(GV) > 0) { - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - return; - } - - // or GV is an inlined local variable. - Scope = AI->second; - InlinedParamMap[V].insert(GV); - InlinedFnVar = true; - } else { - // or GV is a local variable. - Scope = getOrCreateScope(V); + if (Desc.getTag() == dwarf::DW_TAG_variable) + Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode()); + else { + bool InlinedVar = false; + MDNode *Context = DIVariable(N).getContext().getNode(); + DISubprogram SP(Context); + if (!SP.isNull()) { + // SP is inserted into DbgAbstractScopeMap when inlined function + // start was recorded by RecordInlineFnStart. + DenseMap::iterator + I = DbgAbstractScopeMap.find(SP.getNode()); + if (I != DbgAbstractScopeMap.end()) { + InlinedVar = true; + Scope = I->second; } } + if (!InlinedVar) + Scope = getOrCreateScope(Context); } assert(Scope && "Unable to find the variable's scope"); - DbgVariable *DV = new DbgVariable(DIVariable(GV), FrameIndex, InlinedFnVar); + DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar); Scope->AddVariable(DV); if (TimePassesIsEnabled) @@ -1665,23 +2169,23 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, unsigned Line, unsigned Col) { unsigned LabelID = MMI->NextLabelID(); - if (!TAI->doesDwarfUsesInlineInfoSection()) + if (!MAI->doesDwarfUsesInlineInfoSection()) return LabelID; if (TimePassesIsEnabled) DebugTimer->startTimer(); - GlobalVariable *GV = SP.getGV(); - DenseMap::iterator - II = AbstractInstanceRootMap.find(GV); + MDNode *Node = SP.getNode(); + DenseMap::iterator + II = AbstractInstanceRootMap.find(Node); if (II == AbstractInstanceRootMap.end()) { // Create an abstract instance entry for this inlined function if it doesn't // already exist. - DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV)); + DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node)); // Get the compile unit context. - DIE *SPDie = ModuleCU->getDieMapSlotFor(GV); + DIE *SPDie = ModuleCU->getDieMapSlotFor(Node); if (!SPDie) SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true); @@ -1693,18 +2197,18 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined); // Keep track of the abstract scope for this function. - DbgAbstractScopeMap[GV] = Scope; + DbgAbstractScopeMap[Node] = Scope; - AbstractInstanceRootMap[GV] = Scope; + AbstractInstanceRootMap[Node] = Scope; AbstractInstanceRootList.push_back(Scope); } // Create a concrete inlined instance for this inlined function. - DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV)); + DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node)); DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine); ScopeDie->setAbstractCompileUnit(ModuleCU); - DIE *Origin = ModuleCU->getDieMapSlotFor(GV); + DIE *Origin = ModuleCU->getDieMapSlotFor(Node); AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, Origin); AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); @@ -1718,20 +2222,20 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, LexicalScopeStack.back()->AddConcreteInst(ConcreteScope); // Keep track of the concrete scope that's inlined into this function. - DenseMap >::iterator - SI = DbgConcreteScopeMap.find(GV); + DenseMap >::iterator + SI = DbgConcreteScopeMap.find(Node); if (SI == DbgConcreteScopeMap.end()) - DbgConcreteScopeMap[GV].push_back(ConcreteScope); + DbgConcreteScopeMap[Node].push_back(ConcreteScope); else SI->second.push_back(ConcreteScope); // Track the start label for this inlined function. - DenseMap >::iterator - I = InlineInfo.find(GV); + DenseMap >::iterator + I = InlineInfo.find(Node); if (I == InlineInfo.end()) - InlineInfo[GV].push_back(LabelID); + InlineInfo[Node].push_back(LabelID); else I->second.push_back(LabelID); @@ -1743,15 +2247,15 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { - if (!TAI->doesDwarfUsesInlineInfoSection()) + if (!MAI->doesDwarfUsesInlineInfoSection()) return 0; if (TimePassesIsEnabled) DebugTimer->startTimer(); - GlobalVariable *GV = SP.getGV(); - DenseMap >::iterator - I = DbgConcreteScopeMap.find(GV); + MDNode *Node = SP.getNode(); + DenseMap >::iterator + I = DbgConcreteScopeMap.find(Node); if (I == DbgConcreteScopeMap.end()) { // FIXME: Can this situation actually happen? And if so, should it? @@ -1781,33 +2285,6 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) { return ID; } -/// RecordVariableScope - Record scope for the variable declared by -/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes -/// for only inlined subroutine variables. Other variables's scopes are -/// determined during RecordVariable(). -void DwarfDebug::RecordVariableScope(DIVariable &DV, - const MachineInstr *DeclareMI) { - if (TimePassesIsEnabled) - DebugTimer->startTimer(); - - DISubprogram SP(DV.getContext().getGV()); - - if (SP.isNull()) { - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); - - return; - } - - DenseMap::iterator - I = DbgAbstractScopeMap.find(SP.getGV()); - if (I != DbgAbstractScopeMap.end()) - InlinedVariableScopes[DeclareMI] = I->second; - - if (TimePassesIsEnabled) - DebugTimer->stopTimer(); -} - //===----------------------------------------------------------------------===// // Emit Methods //===----------------------------------------------------------------------===// @@ -1832,7 +2309,7 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) { Die->setOffset(Offset); // Start the size with the size of abbreviation code. - Offset += TargetAsmInfo::getULEB128Size(AbbrevNumber); + Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); const SmallVector &Values = Die->getValues(); const SmallVector &AbbrevData = Abbrev->getData(); @@ -1879,38 +2356,40 @@ void DwarfDebug::EmitInitial() { if (didInitial) return; didInitial = true; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + // Dwarf sections base addresses. - if (TAI->doesDwarfRequireFrameSection()) { - Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + if (MAI->doesDwarfRequireFrameSection()) { + Asm->OutStreamer.SwitchSection(TLOF.getDwarfFrameSection()); EmitLabel("section_debug_frame", 0); } - Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfInfoSection()); EmitLabel("section_info", 0); - Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfAbbrevSection()); EmitLabel("section_abbrev", 0); - Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfARangesSection()); EmitLabel("section_aranges", 0); - if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) { - Asm->SwitchToDataSection(LineInfoDirective); + if (const MCSection *LineInfoDirective = TLOF.getDwarfMacroInfoSection()) { + Asm->OutStreamer.SwitchSection(LineInfoDirective); EmitLabel("section_macinfo", 0); } - Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfLineSection()); EmitLabel("section_line", 0); - Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfLocSection()); EmitLabel("section_loc", 0); - Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection()); EmitLabel("section_pubnames", 0); - Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection()); EmitLabel("section_str", 0); - Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection()); EmitLabel("section_ranges", 0); - Asm->SwitchToSection(TAI->getTextSection()); + Asm->OutStreamer.SwitchSection(TLOF.getTextSection()); EmitLabel("text_begin", 0); - Asm->SwitchToSection(TAI->getDataSection()); + Asm->OutStreamer.SwitchSection(TLOF.getDataSection()); EmitLabel("data_begin", 0); } @@ -2012,7 +2491,8 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) { void DwarfDebug::EmitDebugInfo() { // Start debug info section. - Asm->SwitchToDataSection(TAI->getDwarfInfoSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfInfoSection()); EmitDebugInfoPerCU(ModuleCU); } @@ -2023,7 +2503,8 @@ void DwarfDebug::EmitAbbreviations() const { // Check to see if it is worth the effort. if (!Abbreviations.empty()) { // Start the debug abbrev section. - Asm->SwitchToDataSection(TAI->getDwarfAbbrevSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAbbrevSection()); EmitLabel("abbrev_begin", 0); @@ -2071,7 +2552,7 @@ void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) { void DwarfDebug::EmitDebugLines() { // If the target is using .loc/.file, the assembler will be emitting the // .debug_line table automatically. - if (TAI->hasDotLocAndDotFile()) + if (MAI->hasDotLocAndDotFile()) return; // Minimum line delta, thus ranging from -10..(255-10). @@ -2080,7 +2561,8 @@ void DwarfDebug::EmitDebugLines() { const int MaxLineDelta = 255 + MinLineDelta; // Start the dwarf line section. - Asm->SwitchToDataSection(TAI->getDwarfLineSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLineSection()); // Construct the section header. EmitDifference("line_end", 0, "line_begin", 0, true); @@ -2147,13 +2629,12 @@ void DwarfDebug::EmitDebugLines() { // Isolate current sections line info. const std::vector &LineInfos = SectionSourceLines[j]; - if (Asm->isVerbose()) { - const Section* S = SectionMap[j + 1]; - O << '\t' << TAI->getCommentString() << " Section" + /*if (Asm->isVerbose()) { + const MCSection *S = SectionMap[j + 1]; + O << '\t' << MAI->getCommentString() << " Section" << S->getName() << '\n'; - } else { - Asm->EOL(); - } + }*/ + Asm->EOL(); // Dwarf assumes we start with first line of first source file. unsigned Source = 1; @@ -2165,12 +2646,14 @@ void DwarfDebug::EmitDebugLines() { unsigned LabelID = MMI->MappedLabel(LineInfo.getLabelID()); if (!LabelID) continue; + if (LineInfo.getLine() == 0) continue; + if (!Asm->isVerbose()) Asm->EOL(); else { std::pair SourceID = getSourceDirectoryAndFileIds(LineInfo.getSourceID()); - O << '\t' << TAI->getCommentString() << ' ' + O << '\t' << MAI->getCommentString() << ' ' << getSourceDirectoryName(SourceID.first) << ' ' << getSourceFileName(SourceID.second) <<" :" << utostr_32(LineInfo.getLine()) << '\n'; @@ -2231,7 +2714,7 @@ void DwarfDebug::EmitDebugLines() { /// EmitCommonDebugFrame - Emit common frame info into a debug frame section. /// void DwarfDebug::EmitCommonDebugFrame() { - if (!TAI->doesDwarfRequireFrameSection()) + if (!MAI->doesDwarfRequireFrameSection()) return; int stackGrowth = @@ -2240,7 +2723,8 @@ void DwarfDebug::EmitCommonDebugFrame() { TD->getPointerSize() : -TD->getPointerSize(); // Start the dwarf frame section. - Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfFrameSection()); EmitLabel("debug_frame_common", 0); EmitDifference("debug_frame_common_end", 0, @@ -2276,11 +2760,12 @@ void DwarfDebug::EmitCommonDebugFrame() { /// section. void DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ - if (!TAI->doesDwarfRequireFrameSection()) + if (!MAI->doesDwarfRequireFrameSection()) return; // Start the dwarf frame section. - Asm->SwitchToDataSection(TAI->getDwarfFrameSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfFrameSection()); EmitDifference("debug_frame_end", DebugFrameInfo.Number, "debug_frame_begin", DebugFrameInfo.Number, true); @@ -2344,7 +2829,8 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) { /// void DwarfDebug::EmitDebugPubNames() { // Start the dwarf pubnames section. - Asm->SwitchToDataSection(TAI->getDwarfPubNamesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); EmitDebugPubNamesPerCU(ModuleCU); } @@ -2355,7 +2841,8 @@ void DwarfDebug::EmitDebugStr() { // Check to see if it is worth the effort. if (!StringPool.empty()) { // Start the dwarf str section. - Asm->SwitchToDataSection(TAI->getDwarfStrSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfStrSection()); // For each of strings in the string pool. for (unsigned StringID = 1, N = StringPool.size(); @@ -2376,7 +2863,8 @@ void DwarfDebug::EmitDebugStr() { /// void DwarfDebug::EmitDebugLoc() { // Start the dwarf loc section. - Asm->SwitchToDataSection(TAI->getDwarfLocSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); Asm->EOL(); } @@ -2384,7 +2872,8 @@ void DwarfDebug::EmitDebugLoc() { /// void DwarfDebug::EmitDebugARanges() { // Start the dwarf aranges section. - Asm->SwitchToDataSection(TAI->getDwarfARangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); // FIXME - Mock up #if 0 @@ -2420,16 +2909,18 @@ void DwarfDebug::EmitDebugARanges() { /// void DwarfDebug::EmitDebugRanges() { // Start the dwarf ranges section. - Asm->SwitchToDataSection(TAI->getDwarfRangesSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); Asm->EOL(); } /// EmitDebugMacInfo - Emit visible names into a debug macinfo section. /// void DwarfDebug::EmitDebugMacInfo() { - if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) { + if (const MCSection *LineInfo = + Asm->getObjFileLowering().getDwarfMacroInfoSection()) { // Start the dwarf macinfo section. - Asm->SwitchToDataSection(LineInfoDirective); + Asm->OutStreamer.SwitchSection(LineInfo); Asm->EOL(); } } @@ -2453,13 +2944,14 @@ void DwarfDebug::EmitDebugMacInfo() { /// __debug_info section, and the low_pc is the starting address for the /// inlining instance. void DwarfDebug::EmitDebugInlineInfo() { - if (!TAI->doesDwarfUsesInlineInfoSection()) + if (!MAI->doesDwarfUsesInlineInfoSection()) return; if (!ModuleCU) return; - Asm->SwitchToDataSection(TAI->getDwarfDebugInlineSection()); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfDebugInlineSection()); Asm->EOL(); EmitDifference("debug_inlined_end", 1, "debug_inlined_begin", 1, true); @@ -2470,18 +2962,25 @@ void DwarfDebug::EmitDebugInlineInfo() { Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version"); Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); - for (DenseMap >::iterator + for (DenseMap >::iterator I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { - GlobalVariable *GV = I->first; + MDNode *Node = I->first; SmallVector &Labels = I->second; - DISubprogram SP(GV); - std::string Name; - std::string LName; - - SP.getLinkageName(LName); - SP.getName(Name); + DISubprogram SP(Node); + const char *LName = SP.getLinkageName(); + const char *Name = SP.getName(); - Asm->EmitString(LName.empty() ? Name : LName); + if (!LName) + Asm->EmitString(Name); + else { + // Skip special LLVM prefix that is used to inform the asm printer to not + // emit usual symbol prefix before the symbol name. This happens for + // Objective-C symbol names and symbol whose name is replaced using GCC's + // __asm__ attribute. + if (LName[0] == 1) + LName = &LName[1]; + Asm->EmitString(LName); + } Asm->EOL("MIPS linkage name"); Asm->EmitString(Name); Asm->EOL("Function name"); @@ -2490,13 +2989,13 @@ void DwarfDebug::EmitDebugInlineInfo() { for (SmallVector::iterator LI = Labels.begin(), LE = Labels.end(); LI != LE; ++LI) { - DIE *SP = ModuleCU->getDieMapSlotFor(GV); + DIE *SP = ModuleCU->getDieMapSlotFor(Node); Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset"); if (TD->getPointerSize() == sizeof(int32_t)) - O << TAI->getData32bitsDirective(); + O << MAI->getData32bitsDirective(); else - O << TAI->getData64bitsDirective(); + O << MAI->getData64bitsDirective(); PrintLabelName("label", *LI); Asm->EOL("low_pc"); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 101dc705d3b0c..bd377c5593ccf 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -35,7 +35,7 @@ class DbgScope; class DbgConcreteScope; class MachineFrameInfo; class MachineModuleInfo; -class TargetAsmInfo; +class MCAsmInfo; class Timer; //===----------------------------------------------------------------------===// @@ -120,7 +120,7 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// SectionMap - Provides a unique id per text section. /// - UniqueVector SectionMap; + UniqueVector SectionMap; /// SectionSourceLines - Tracks line numbers per text section. /// @@ -139,34 +139,38 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { DbgScope *FunctionDbgScope; /// DbgScopeMap - Tracks the scopes in the current function. - DenseMap DbgScopeMap; + DenseMap DbgScopeMap; + + /// ScopedGVs - Tracks global variables that are not at file scope. + /// For example void f() { static int b = 42; } + SmallVector ScopedGVs; + + typedef DenseMap > + InsnToDbgScopeMapTy; + + /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts. + InsnToDbgScopeMapTy DbgScopeBeginMap; + + /// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends. + InsnToDbgScopeMapTy DbgScopeEndMap; /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current /// function. - DenseMap DbgAbstractScopeMap; + DenseMap DbgAbstractScopeMap; /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current /// function. - DenseMap > DbgConcreteScopeMap; /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. - DenseMap > InlineInfo; - - /// InlinedVariableScopes - Scopes information for the inlined subroutine - /// variables. - DenseMap InlinedVariableScopes; + DenseMap > InlineInfo; /// AbstractInstanceRootMap - Map of abstract instance roots of inlined /// functions. These are subroutine entries that contain a DW_AT_inline /// attribute. - DenseMap AbstractInstanceRootMap; - - /// InlinedParamMap - A map keeping track of which parameters are assigned to - /// which abstract instance. - DenseMap > InlinedParamMap; + DenseMap AbstractInstanceRootMap; /// AbstractInstanceRootList - List of abstract instance roots of inlined /// functions. These are subroutine entries that contain a DW_AT_inline @@ -284,11 +288,8 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// AddSourceLine - Add location information to specified debug information /// entry. void AddSourceLine(DIE *Die, const DIVariable *V); - - /// AddSourceLine - Add location information to specified debug information - /// entry. void AddSourceLine(DIE *Die, const DIGlobal *G); - + void AddSourceLine(DIE *Die, const DISubprogram *SP); void AddSourceLine(DIE *Die, const DIType *Ty); /// AddAddress - Add an address attribute to a die based on the location @@ -296,6 +297,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { void AddAddress(DIE *Die, unsigned Attribute, const MachineLocation &Location); + /// AddComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + /// + void AddComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of AddComplexAddress. + /// AddBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use AddComplexAddress instead. + /// + void AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + /// AddType - Add a new type attribute to the specified entity. void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty); @@ -342,9 +361,10 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); - /// getOrCreateScope - Returns the scope associated with the given descriptor. + /// getDbgScope - Returns the scope associated with the given descriptor. /// - DbgScope *getOrCreateScope(GlobalVariable *V); + DbgScope *getOrCreateScope(MDNode *N); + DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI); /// ConstructDbgScope - Construct the components of a scope. /// @@ -454,20 +474,26 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf { /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps /// as well. - unsigned GetOrCreateSourceID(const std::string &DirName, - const std::string &FileName); + unsigned GetOrCreateSourceID(const char *DirName, + const char *FileName); - void ConstructCompileUnit(GlobalVariable *GV); + void ConstructCompileUnit(MDNode *N); - void ConstructGlobalVariableDIE(GlobalVariable *GV); + void ConstructGlobalVariableDIE(MDNode *N); - void ConstructSubprogram(GlobalVariable *GV); + void ConstructSubprogram(MDNode *N); + + // FIXME: This should go away in favor of complex addresses. + /// Find the type the programmer originally declared the variable to be + /// and return that type. Obsolete, use GetComplexAddrType instead. + /// + DIType GetBlockByrefType(DIType Ty, std::string Name); public: //===--------------------------------------------------------------------===// // Main entry points. // - DwarfDebug(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T); + DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T); virtual ~DwarfDebug(); /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should @@ -493,12 +519,7 @@ public: /// RecordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. - unsigned RecordSourceLine(Value *V, unsigned Line, unsigned Col); - - /// RecordSourceLine - Records location information and associates it with a - /// label. Returns a unique label ID used to generate a label and provide - /// correspondence to the source line list. - unsigned RecordSourceLine(unsigned Line, unsigned Col, DICompileUnit CU); + unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope); /// getRecordSourceLineCount - Return the number of source lines in the debug /// info. @@ -515,14 +536,13 @@ public: const std::string &FileName); /// RecordRegionStart - Indicate the start of a region. - unsigned RecordRegionStart(GlobalVariable *V); + unsigned RecordRegionStart(MDNode *N); /// RecordRegionEnd - Indicate the end of a region. - unsigned RecordRegionEnd(GlobalVariable *V); + unsigned RecordRegionEnd(MDNode *N); /// RecordVariable - Indicate the declaration of a local variable. - void RecordVariable(GlobalVariable *GV, unsigned FrameIndex, - const MachineInstr *MI); + void RecordVariable(MDNode *N, unsigned FrameIndex); //// RecordInlinedFnStart - Indicate the start of inlined subroutine. unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU, @@ -531,11 +551,20 @@ public: /// RecordInlinedFnEnd - Indicate the end of inlined subroutine. unsigned RecordInlinedFnEnd(DISubprogram &SP); - /// RecordVariableScope - Record scope for the variable declared by - /// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. Record scopes - /// for only inlined subroutine variables. Other variables's scopes are - /// determined during RecordVariable(). - void RecordVariableScope(DIVariable &DV, const MachineInstr *DeclareMI); + /// ExtractScopeInformation - Scan machine instructions in this function + /// and collect DbgScopes. Return true, if atleast one scope was found. + bool ExtractScopeInformation(MachineFunction *MF); + + /// CollectVariableInfo - Populate DbgScope entries with variables' info. + void CollectVariableInfo(); + + /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that + /// start with this machine instruction. + void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label); + + /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that + /// end with this machine instruction. + void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 37466ab39a234..626523b820f63 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing dwarf exception info into asm files. +// This file contains support for writing DWARF exception info into asm files. // //===----------------------------------------------------------------------===// @@ -15,30 +15,38 @@ #include "llvm/Module.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLocation.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; static TimerGroup &getDwarfTimerGroup() { - static TimerGroup DwarfTimerGroup("Dwarf Exception"); + static TimerGroup DwarfTimerGroup("DWARF Exception"); return DwarfTimerGroup; } DwarfException::DwarfException(raw_ostream &OS, AsmPrinter *A, - const TargetAsmInfo *T) + const MCAsmInfo *T) : Dwarf(OS, A, T, "eh"), shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false), shouldEmitMovesModule(false), ExceptionTimer(0) { - if (TimePassesIsEnabled) - ExceptionTimer = new Timer("Dwarf Exception Writer", + if (TimePassesIsEnabled) + ExceptionTimer = new Timer("DWARF Exception Writer", getDwarfTimerGroup()); } @@ -46,21 +54,45 @@ DwarfException::~DwarfException() { delete ExceptionTimer; } -void DwarfException::EmitCommonEHFrame(const Function *Personality, - unsigned Index) { +/// SizeOfEncodedValue - Return the size of the encoding in bytes. +unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) { + if (Encoding == dwarf::DW_EH_PE_omit) + return 0; + + switch (Encoding & 0x07) { + case dwarf::DW_EH_PE_absptr: + return TD->getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; + } + + assert(0 && "Invalid encoded value."); + return 0; +} + +/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that +/// is shared among many Frame Description Entries. There is at least one CIE +/// in every non-empty .debug_frame section. +void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) { // Size and sign of stack growth. int stackGrowth = Asm->TM.getFrameInfo()->getStackGrowthDirection() == TargetFrameInfo::StackGrowsUp ? TD->getPointerSize() : -TD->getPointerSize(); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + // Begin eh frame section. - Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); - - if (!TAI->doesRequireNonLocalEHFrameLabel()) - O << TAI->getEHGlobalPrefix(); + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + if (MAI->is_EHSymbolPrivate()) + O << MAI->getPrivateGlobalPrefix(); O << "EH_frame" << Index << ":\n"; + EmitLabel("section_eh_frame", Index); // Define base labels. @@ -79,8 +111,53 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality, Asm->EOL("CIE Version"); // The personality presence indicates that language specific information will - // show up in the eh frame. - Asm->EmitString(Personality ? "zPLR" : "zR"); + // show up in the eh frame. Find out how we are supposed to lower the + // personality function reference: + const MCExpr *PersonalityRef = 0; + bool IsPersonalityIndirect = false, IsPersonalityPCRel = false; + if (PersonalityFn) { + // FIXME: HANDLE STATIC CODEGEN MODEL HERE. + + // In non-static mode, ask the object file how to represent this reference. + PersonalityRef = + TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang, + Asm->MMI, + IsPersonalityIndirect, + IsPersonalityPCRel); + } + + unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + if (IsPersonalityIndirect) + PerEncoding |= dwarf::DW_EH_PE_indirect; + unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + + char Augmentation[5] = { 0 }; + unsigned AugmentationSize = 0; + char *APtr = Augmentation + 1; + + if (PersonalityRef) { + // There is a personality function. + *APtr++ = 'P'; + AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding); + } + + if (UsesLSDA[Index]) { + // An LSDA pointer is in the FDE augmentation. + *APtr++ = 'L'; + ++AugmentationSize; + } + + if (FDEEncoding != dwarf::DW_EH_PE_absptr) { + // A non-default pointer encoding for the FDE. + *APtr++ = 'R'; + ++AugmentationSize; + } + + if (APtr != Augmentation + 1) + Augmentation[0] = 'z'; + + Asm->EmitString(Augmentation); Asm->EOL("CIE Augmentation"); // Round out reader. @@ -91,39 +168,41 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality, Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true)); Asm->EOL("CIE Return Address Column"); - // If there is a personality, we need to indicate the functions location. - if (Personality) { - Asm->EmitULEB128Bytes(7); - Asm->EOL("Augmentation Size"); - - if (TAI->getNeedsIndirectEncoding()) { - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 | - dwarf::DW_EH_PE_indirect); - Asm->EOL("Personality (pcrel sdata4 indirect)"); - } else { - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - Asm->EOL("Personality (pcrel sdata4)"); + Asm->EmitULEB128Bytes(AugmentationSize); + Asm->EOL("Augmentation Size"); + + Asm->EmitInt8(PerEncoding); + Asm->EOL("Personality", PerEncoding); + + // If there is a personality, we need to indicate the function's location. + if (PersonalityRef) { + // If the reference to the personality function symbol is not already + // pc-relative, then we need to subtract our current address from it. Do + // this by emitting a label and subtracting it from the expression we + // already have. This is equivalent to emitting "foo - .", but we have to + // emit the label for "." directly. + if (!IsPersonalityPCRel) { + SmallString<64> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index; + MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str()); + Asm->OutStreamer.EmitLabel(DotSym); + + PersonalityRef = + MCBinaryExpr::CreateSub(PersonalityRef, + MCSymbolRefExpr::Create(DotSym,Asm->OutContext), + Asm->OutContext); } - - PrintRelDirective(true); - O << TAI->getPersonalityPrefix(); - Asm->EmitExternalGlobal((const GlobalVariable *)(Personality)); - O << TAI->getPersonalitySuffix(); - if (strcmp(TAI->getPersonalitySuffix(), "+4@GOTPCREL")) - O << "-" << TAI->getPCSymbol(); + + O << MAI->getData32bitsDirective(); + PersonalityRef->print(O, MAI); Asm->EOL("Personality"); - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - Asm->EOL("LSDA Encoding (pcrel sdata4)"); + Asm->EmitInt8(LSDAEncoding); + Asm->EOL("LSDA Encoding", LSDAEncoding); - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - Asm->EOL("FDE Encoding (pcrel sdata4)"); - } else { - Asm->EmitULEB128Bytes(1); - Asm->EOL("Augmentation Size"); - - Asm->EmitInt8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - Asm->EOL("FDE Encoding (pcrel sdata4)"); + Asm->EmitInt8(FDEEncoding); + Asm->EOL("FDE Encoding", FDEEncoding); } // Indicate locations of general callee saved registers in frame. @@ -134,55 +213,44 @@ void DwarfException::EmitCommonEHFrame(const Function *Personality, // On Darwin the linker honors the alignment of eh_frame, which means it must // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get // holes which confuse readers of eh_frame. - Asm->EmitAlignment(TD->getPointerSize() == sizeof(int32_t) ? 2 : 3, - 0, 0, false); + Asm->EmitAlignment(TD->getPointerSize() == 4 ? 2 : 3, 0, 0, false); EmitLabel("eh_frame_common_end", Index); Asm->EOL(); } -/// EmitEHFrame - Emit function exception frame information. -/// -void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { - assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && +/// EmitFDE - Emit the Frame Description Entry (FDE) for the function. +void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { + assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() && "Should not emit 'available externally' functions at all"); - Function::LinkageTypes linkage = EHFrameInfo.function->getLinkage(); - Asm->SwitchToTextSection(TAI->getDwarfEHFrameSection()); + const Function *TheFunc = EHFrameInfo.function; + + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection()); // Externally visible entry into the functions eh frame info. If the // corresponding function is static, this should not be externally visible. - if (linkage != Function::InternalLinkage && - linkage != Function::PrivateLinkage) { - if (const char *GlobalEHDirective = TAI->getGlobalEHDirective()) + if (!TheFunc->hasLocalLinkage()) + if (const char *GlobalEHDirective = MAI->getGlobalEHDirective()) O << GlobalEHDirective << EHFrameInfo.FnName << "\n"; - } // If corresponding function is weak definition, this should be too. - if ((linkage == Function::WeakAnyLinkage || - linkage == Function::WeakODRLinkage || - linkage == Function::LinkOnceAnyLinkage || - linkage == Function::LinkOnceODRLinkage) && - TAI->getWeakDefDirective()) - O << TAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n"; + if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective()) + O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n"; // If there are no calls then you can't unwind. This may mean we can omit the // EH Frame, but some environments do not handle weak absolute symbols. If // UnwindTablesMandatory is set we cannot do this optimization; the unwind // info is to be available for non-EH uses. - if (!EHFrameInfo.hasCalls && - !UnwindTablesMandatory && - ((linkage != Function::WeakAnyLinkage && - linkage != Function::WeakODRLinkage && - linkage != Function::LinkOnceAnyLinkage && - linkage != Function::LinkOnceODRLinkage) || - !TAI->getWeakDefDirective() || - TAI->getSupportsWeakOmittedEHFrame())) { + if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory && + (!TheFunc->isWeakForLinker() || + !MAI->getWeakDefDirective() || + MAI->getSupportsWeakOmittedEHFrame())) { O << EHFrameInfo.FnName << " = 0\n"; // This name has no connection to the function, so it might get // dead-stripped when the function is not, erroneously. Prohibit // dead-stripping unconditionally. - if (const char *UsedDirective = TAI->getUsedDirective()) + if (const char *UsedDirective = MAI->getUsedDirective()) O << UsedDirective << EHFrameInfo.FnName << "\n\n"; } else { O << EHFrameInfo.FnName << ":\n"; @@ -194,17 +262,9 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { EmitLabel("eh_frame_begin", EHFrameInfo.Number); - if (TAI->doesRequireNonLocalEHFrameLabel()) { - PrintRelDirective(true, true); - PrintLabelName("eh_frame_begin", EHFrameInfo.Number); - - if (!TAI->isAbsoluteEHSectionOffsets()) - O << "-EH_frame" << EHFrameInfo.PersonalityIndex; - } else { - EmitSectionOffset("eh_frame_begin", "eh_frame_common", - EHFrameInfo.Number, EHFrameInfo.PersonalityIndex, - true, true, false); - } + EmitSectionOffset("eh_frame_begin", "eh_frame_common", + EHFrameInfo.Number, EHFrameInfo.PersonalityIndex, + true, true, false); Asm->EOL("FDE CIE offset"); @@ -216,14 +276,20 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { // If there is a personality and landing pads then point to the language // specific data area in the exception table. - if (EHFrameInfo.PersonalityIndex) { - Asm->EmitULEB128Bytes(4); + if (MMI->getPersonalities()[0] != NULL) { + bool is4Byte = TD->getPointerSize() == sizeof(int32_t); + + Asm->EmitULEB128Bytes(is4Byte ? 4 : 8); Asm->EOL("Augmentation size"); if (EHFrameInfo.hasLandingPads) - EmitReference("exception", EHFrameInfo.Number, true, true); - else - Asm->EmitInt32((int)0); + EmitReference("exception", EHFrameInfo.Number, true, false); + else { + if (is4Byte) + Asm->EmitInt32((int)0); + else + Asm->EmitInt64((int)0); + } Asm->EOL("Language Specific Data Area"); } else { Asm->EmitULEB128Bytes(0); @@ -231,7 +297,7 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { } // Indicate locations of function specific callee saved registers in frame. - EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves, + EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves, true); // On Darwin the linker honors the alignment of eh_frame, which means it @@ -246,32 +312,13 @@ void DwarfException::EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo) { // retains the function in this case, and there is code around that depends // on unused functions (calling undefined externals) being dead-stripped to // link correctly. Yes, there really is. - if (MMI->getUsedFunctions().count(EHFrameInfo.function)) - if (const char *UsedDirective = TAI->getUsedDirective()) + if (MMI->isUsedFunction(EHFrameInfo.function)) + if (const char *UsedDirective = MAI->getUsedDirective()) O << UsedDirective << EHFrameInfo.FnName << "\n\n"; } -} -/// EmitExceptionTable - Emit landing pads and actions. -/// -/// The general organization of the table is complex, but the basic concepts are -/// easy. First there is a header which describes the location and organization -/// of the three components that follow. -/// -/// 1. The landing pad site information describes the range of code covered by -/// the try. In our case it's an accumulation of the ranges covered by the -/// invokes in the try. There is also a reference to the landing pad that -/// handles the exception once processed. Finally an index into the actions -/// table. -/// 2. The action table, in our case, is composed of pairs of type ids and next -/// action offset. Starting with the action index from the landing pad -/// site, each type Id is checked for a match to the current exception. If -/// it matches then the exception and type id are passed on to the landing -/// pad. Otherwise the next action is looked up. This chain is terminated -/// with a next action of zero. If no type id is found the the frame is -/// unwound and handling continues. -/// 3. Type id table contains references to all the C++ typeinfo for all -/// catches in the function. This tables is reversed indexed base 1. + Asm->EOL(); +} /// SharedTypeIds - How many leading type ids two landing pads have in common. unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, @@ -301,51 +348,58 @@ bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { return LSize < RSize; } -void DwarfException::EmitExceptionTable() { - const std::vector &TypeInfos = MMI->getTypeInfos(); - const std::vector &FilterIds = MMI->getFilterIds(); - const std::vector &PadInfos = MMI->getLandingPads(); - if (PadInfos.empty()) return; - - // Sort the landing pads in order of their type ids. This is used to fold - // duplicate actions. - SmallVector LandingPads; - LandingPads.reserve(PadInfos.size()); - for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) - LandingPads.push_back(&PadInfos[i]); - std::sort(LandingPads.begin(), LandingPads.end(), PadLT); - - // Negative type ids index into FilterIds, positive type ids index into - // TypeInfos. The value written for a positive type id is just the type id - // itself. For a negative type id, however, the value written is the +/// ComputeActionsTable - Compute the actions table and gather the first action +/// index for each landing pad site. +unsigned DwarfException:: +ComputeActionsTable(const SmallVectorImpl &LandingPads, + SmallVectorImpl &Actions, + SmallVectorImpl &FirstActions) { + + // The action table follows the call-site table in the LSDA. The individual + // records are of two types: + // + // * Catch clause + // * Exception specification + // + // The two record kinds have the same format, with only small differences. + // They are distinguished by the "switch value" field: Catch clauses + // (TypeInfos) have strictly positive switch values, and exception + // specifications (FilterIds) have strictly negative switch values. Value 0 + // indicates a catch-all clause. + // + // Negative type IDs index into FilterIds. Positive type IDs index into + // TypeInfos. The value written for a positive type ID is just the type ID + // itself. For a negative type ID, however, the value written is the // (negative) byte offset of the corresponding FilterIds entry. The byte - // offset is usually equal to the type id, because the FilterIds entries are - // written using a variable width encoding which outputs one byte per entry as - // long as the value written is not too large, but can differ. This kind of - // complication does not occur for positive type ids because type infos are + // offset is usually equal to the type ID (because the FilterIds entries are + // written using a variable width encoding, which outputs one byte per entry + // as long as the value written is not too large) but can differ. This kind + // of complication does not occur for positive type IDs because type infos are // output using a fixed width encoding. FilterOffsets[i] holds the byte // offset corresponding to FilterIds[i]. + + const std::vector &FilterIds = MMI->getFilterIds(); SmallVector FilterOffsets; FilterOffsets.reserve(FilterIds.size()); int Offset = -1; - for(std::vector::const_iterator I = FilterIds.begin(), - E = FilterIds.end(); I != E; ++I) { + + for (std::vector::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { FilterOffsets.push_back(Offset); - Offset -= TargetAsmInfo::getULEB128Size(*I); + Offset -= MCAsmInfo::getULEB128Size(*I); } - // Compute the actions table and gather the first action index for each - // landing pad site. - SmallVector Actions; - SmallVector FirstActions; FirstActions.reserve(LandingPads.size()); int FirstAction = 0; unsigned SizeActions = 0; - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LP = LandingPads[i]; - const std::vector &TypeIds = LP->TypeIds; - const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; + const LandingPadInfo *PrevLPI = 0; + + for (SmallVectorImpl::const_iterator + I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { + const LandingPadInfo *LPI = *I; + const std::vector &TypeIds = LPI->TypeIds; + const unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0; unsigned SizeSiteActions = 0; if (NumShared < TypeIds.size()) { @@ -353,34 +407,33 @@ void DwarfException::EmitExceptionTable() { ActionEntry *PrevAction = 0; if (NumShared) { - const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); + const unsigned SizePrevIds = PrevLPI->TypeIds.size(); assert(Actions.size()); PrevAction = &Actions.back(); - SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) + - TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) + + MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { SizeAction -= - TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); SizeAction += -PrevAction->NextAction; PrevAction = PrevAction->Previous; } } // Compute the actions. - for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { - int TypeID = TypeIds[I]; - assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { + int TypeID = TypeIds[J]; + assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; - unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID); + unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction); + SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); SizeSiteActions += SizeAction; - ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; + ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; Actions.push_back(Action); - PrevAction = &Actions.back(); } @@ -388,35 +441,34 @@ void DwarfException::EmitExceptionTable() { FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; } // else identical - re-use previous FirstAction + // Information used when created the call-site table. The action record + // field of the call site record is the offset of the first associated + // action record, relative to the start of the actions table. This value is + // biased by 1 (1 in dicating the start of the actions table), and 0 + // indicates that there are no actions. FirstActions.push_back(FirstAction); // Compute this sites contribution to size. SizeActions += SizeSiteActions; - } - - // Compute the call-site table. The entry for an invoke has a try-range - // containing the call, a non-zero landing pad and an appropriate action. The - // entry for an ordinary call has a try-range containing the call and zero for - // the landing pad and the action. Calls marked 'nounwind' have no entry and - // must not be contained in the try-range of any entry - they form gaps in the - // table. Entries must be ordered by try-range address. - SmallVector CallSites; - - RangeMapType PadMap; - // Invokes and nounwind calls have entries in PadMap (due to being bracketed - // by try-range labels when lowered). Ordinary calls do not, so appropriate - // try-ranges for them need be deduced. - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LandingPad = LandingPads[i]; - for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { - unsigned BeginLabel = LandingPad->BeginLabels[j]; - assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); - PadRange P = { i, j }; - PadMap[BeginLabel] = P; - } + PrevLPI = LPI; } + return SizeActions; +} + +/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke +/// has a try-range containing the call, a non-zero landing pad, and an +/// appropriate action. The entry for an ordinary call has a try-range +/// containing the call and zero for the landing pad and the action. Calls +/// marked 'nounwind' have no entry and must not be contained in the try-range +/// of any entry - they form gaps in the table. Entries must be ordered by +/// try-range address. +void DwarfException:: +ComputeCallSiteTable(SmallVectorImpl &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl &LandingPads, + const SmallVectorImpl &FirstActions) { // The end label of the previous invoke or nounwind try-range. unsigned LastLabel = 0; @@ -424,7 +476,7 @@ void DwarfException::EmitExceptionTable() { // an ordinary call) between the end of the previous try-range and now. bool SawPotentiallyThrowing = false; - // Whether the last callsite entry was for an invoke. + // Whether the last CallSite entry was for an invoke. bool PreviousIsInvoke = false; // Visit all instructions in order of address. @@ -450,17 +502,18 @@ void DwarfException::EmitExceptionTable() { // Nope, it was just some random label. continue; - PadRange P = L->second; + const PadRange &P = L->second; const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; - assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && "Inconsistent landing pad map!"); - // If some instruction between the previous try-range and this one may - // throw, create a call-site entry with no landing pad for the region - // between the try-ranges. - if (SawPotentiallyThrowing) { - CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0}; + // For Dwarf exception handling (SjLj handling doesn't use this). If some + // instruction between the previous try-range and this one may throw, + // create a call-site entry with no landing pad for the region between the + // try-ranges. + if (SawPotentiallyThrowing && + MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; } @@ -470,12 +523,16 @@ void DwarfException::EmitExceptionTable() { if (LandingPad->LandingPadLabel) { // This try-range is for an invoke. - CallSiteEntry Site = {BeginLabel, LastLabel, - LandingPad->LandingPadLabel, - FirstActions[P.PadIndex]}; - - // Try to merge with the previous call-site. - if (PreviousIsInvoke) { + CallSiteEntry Site = { + BeginLabel, + LastLabel, + LandingPad->LandingPadLabel, + FirstActions[P.PadIndex] + }; + + // Try to merge with the previous call-site. SJLJ doesn't do this + if (PreviousIsInvoke && + MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { CallSiteEntry &Prev = CallSites.back(); if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { // Extend the range of the previous entry. @@ -497,128 +554,363 @@ void DwarfException::EmitExceptionTable() { // If some instruction between the previous try-range and the end of the // function may throw, create a call-site entry with no landing pad for the // region following the try-range. - if (SawPotentiallyThrowing) { - CallSiteEntry Site = {LastLabel, 0, 0, 0}; + if (SawPotentiallyThrowing && + MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) { + CallSiteEntry Site = { LastLabel, 0, 0, 0 }; CallSites.push_back(Site); } +} + +/// EmitExceptionTable - Emit landing pads and actions. +/// +/// The general organization of the table is complex, but the basic concepts are +/// easy. First there is a header which describes the location and organization +/// of the three components that follow. +/// +/// 1. The landing pad site information describes the range of code covered by +/// the try. In our case it's an accumulation of the ranges covered by the +/// invokes in the try. There is also a reference to the landing pad that +/// handles the exception once processed. Finally an index into the actions +/// table. +/// 2. The action table, in our case, is composed of pairs of type IDs and next +/// action offset. Starting with the action index from the landing pad +/// site, each type ID is checked for a match to the current exception. If +/// it matches then the exception and type id are passed on to the landing +/// pad. Otherwise the next action is looked up. This chain is terminated +/// with a next action of zero. If no type id is found then the frame is +/// unwound and handling continues. +/// 3. Type ID table contains references to all the C++ typeinfo for all +/// catches in the function. This tables is reverse indexed base 1. +void DwarfException::EmitExceptionTable() { + const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &FilterIds = MMI->getFilterIds(); + const std::vector &PadInfos = MMI->getLandingPads(); + if (PadInfos.empty()) return; + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector LandingPads; + LandingPads.reserve(PadInfos.size()); + + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + + std::sort(LandingPads.begin(), LandingPads.end(), PadLT); + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector Actions; + SmallVector FirstActions; + unsigned SizeActions = ComputeActionsTable(LandingPads, Actions, + FirstActions); + + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced when using DWARF exception handling. + RangeMapType PadMap; + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + unsigned BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } + + // Compute the call-site table. + SmallVector CallSites; + ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); // Final tallies. // Call sites. - const unsigned SiteStartSize = sizeof(int32_t); // DW_EH_PE_udata4 - const unsigned SiteLengthSize = sizeof(int32_t); // DW_EH_PE_udata4 - const unsigned LandingPadSize = sizeof(int32_t); // DW_EH_PE_udata4 - unsigned SizeSites = CallSites.size() * (SiteStartSize + - SiteLengthSize + - LandingPadSize); - for (unsigned i = 0, e = CallSites.size(); i < e; ++i) - SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action); + const unsigned SiteStartSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4); + const unsigned SiteLengthSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4); + const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4); + bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; + unsigned SizeSites; + + if (IsSJLJ) + SizeSites = 0; + else + SizeSites = CallSites.size() * + (SiteStartSize + SiteLengthSize + LandingPadSize); + + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { + SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); + if (IsSJLJ) + SizeSites += MCAsmInfo::getULEB128Size(i); + } // Type infos. - const unsigned TypeInfoSize = TD->getPointerSize(); // DW_EH_PE_absptr - unsigned SizeTypes = TypeInfos.size() * TypeInfoSize; - - unsigned TypeOffset = sizeof(int8_t) + // Call site format - TargetAsmInfo::getULEB128Size(SizeSites) + // Call-site table length - SizeSites + SizeActions + SizeTypes; - - unsigned TotalSize = sizeof(int8_t) + // LPStart format - sizeof(int8_t) + // TType format - TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset - TypeOffset; + const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); + unsigned TTypeFormat; + unsigned TypeFormatSize; + + if (!HaveTTData) { + // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say + // that we're omitting that bit. + TTypeFormat = dwarf::DW_EH_PE_omit; + TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr); + } else { + // Okay, we have actual filters or typeinfos to emit. As such, we need to + // pick a type encoding for them. We're about to emit a list of pointers to + // typeinfo objects at the end of the LSDA. However, unless we're in static + // mode, this reference will require a relocation by the dynamic linker. + // + // Because of this, we have a couple of options: + // + // 1) If we are in -static mode, we can always use an absolute reference + // from the LSDA, because the static linker will resolve it. + // + // 2) Otherwise, if the LSDA section is writable, we can output the direct + // reference to the typeinfo and allow the dynamic linker to relocate + // it. Since it is in a writable section, the dynamic linker won't + // have a problem. + // + // 3) Finally, if we're in PIC mode and the LDSA section isn't writable, + // we need to use some form of indirection. For example, on Darwin, + // we can output a statically-relocatable reference to a dyld stub. The + // offset to the stub is constant, but the contents are in a section + // that is updated by the dynamic linker. This is easy enough, but we + // need to tell the personality function of the unwinder to indirect + // through the dyld stub. + // + // FIXME: When (3) is actually implemented, we'll have to emit the stubs + // somewhere. This predicate should be moved to a shared location that is + // in target-independent code. + // + if (LSDASection->getKind().isWriteable() || + Asm->TM.getRelocationModel() == Reloc::Static) + TTypeFormat = dwarf::DW_EH_PE_absptr; + else + TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | + dwarf::DW_EH_PE_sdata4; - unsigned SizeAlign = (4 - TotalSize) & 3; + TypeFormatSize = SizeOfEncodedValue(TTypeFormat); + } // Begin the exception table. - Asm->SwitchToDataSection(TAI->getDwarfExceptionSection()); + Asm->OutStreamer.SwitchSection(LSDASection); Asm->EmitAlignment(2, 0, 0, false); + O << "GCC_except_table" << SubprogramCount << ":\n"; + // The type infos need to be aligned. GCC does this by inserting padding just + // before the type infos. However, this changes the size of the exception + // table, so you need to take this into account when you output the exception + // table size. However, the size is output using a variable length encoding. + // So by increasing the size by inserting padding, you may increase the number + // of bytes used for writing the size. If it increases, say by one byte, then + // you now need to output one less byte of padding to get the type infos + // aligned. However this decreases the size of the exception table. This + // changes the value you have to output for the exception table size. Due to + // the variable length encoding, the number of bytes used for writing the + // length may decrease. If so, you then have to increase the amount of + // padding. And so on. If you look carefully at the GCC code you will see that + // it indeed does this in a loop, going on and on until the values stabilize. + // We chose another solution: don't output padding inside the table like GCC + // does, instead output it before the table. + unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; + unsigned TyOffset = sizeof(int8_t) + // Call site format + MCAsmInfo::getULEB128Size(SizeSites) + // Call-site table length + SizeSites + SizeActions + SizeTypes; + unsigned TotalSize = sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + (HaveTTData ? + MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset + TyOffset; + unsigned SizeAlign = (4 - TotalSize) & 3; + for (unsigned i = 0; i != SizeAlign; ++i) { Asm->EmitInt8(0); Asm->EOL("Padding"); - } + } EmitLabel("exception", SubprogramCount); + if (IsSJLJ) { + SmallString<16> LSDAName; + raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() << + "_LSDA_" << Asm->getFunctionNumber(); + O << LSDAName.str() << ":\n"; + } + // Emit the header. Asm->EmitInt8(dwarf::DW_EH_PE_omit); - Asm->EOL("LPStart format (DW_EH_PE_omit)"); - Asm->EmitInt8(dwarf::DW_EH_PE_absptr); - Asm->EOL("TType format (DW_EH_PE_absptr)"); - Asm->EmitULEB128Bytes(TypeOffset); - Asm->EOL("TType base offset"); - Asm->EmitInt8(dwarf::DW_EH_PE_udata4); - Asm->EOL("Call site format (DW_EH_PE_udata4)"); - Asm->EmitULEB128Bytes(SizeSites); - Asm->EOL("Call-site table length"); - - // Emit the landing pad site information. - for (unsigned i = 0; i < CallSites.size(); ++i) { - CallSiteEntry &S = CallSites[i]; - const char *BeginTag; - unsigned BeginNumber; - - if (!S.BeginLabel) { - BeginTag = "eh_func_begin"; - BeginNumber = SubprogramCount; - } else { - BeginTag = "label"; - BeginNumber = S.BeginLabel; - } + Asm->EOL("@LPStart format", dwarf::DW_EH_PE_omit); - EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount, - true, true); - Asm->EOL("Region start"); + Asm->EmitInt8(TTypeFormat); + Asm->EOL("@TType format", TTypeFormat); - if (!S.EndLabel) - EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber, - true); - else - EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true); + if (HaveTTData) { + Asm->EmitULEB128Bytes(TyOffset); + Asm->EOL("@TType base offset"); + } - Asm->EOL("Region length"); + // SjLj Exception handling + if (IsSJLJ) { + Asm->EmitInt8(dwarf::DW_EH_PE_udata4); + Asm->EOL("Call site format", dwarf::DW_EH_PE_udata4); + Asm->EmitULEB128Bytes(SizeSites); + Asm->EOL("Call site table length"); + + // Emit the landing pad site information. + unsigned idx = 0; + for (SmallVectorImpl::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { + const CallSiteEntry &S = *I; + + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + Asm->EmitULEB128Bytes(idx); + Asm->EOL("Landing pad"); + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + Asm->EmitULEB128Bytes(S.Action); + Asm->EOL("Action"); + } + } else { + // DWARF Exception handling + assert(MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf); + + // The call-site table is a list of all call sites that may throw an + // exception (including C++ 'throw' statements) in the procedure + // fragment. It immediately follows the LSDA header. Each entry indicates, + // for a given call, the first corresponding action record and corresponding + // landing pad. + // + // The table begins with the number of bytes, stored as an LEB128 + // compressed, unsigned integer. The records immediately follow the record + // count. They are sorted in increasing call-site address. Each record + // indicates: + // + // * The position of the call-site. + // * The position of the landing pad. + // * The first action record for that call site. + // + // A missing entry in the call-site table indicates that a call is not + // supposed to throw. + + // Emit the landing pad call site table. + Asm->EmitInt8(dwarf::DW_EH_PE_udata4); + Asm->EOL("Call site format", dwarf::DW_EH_PE_udata4); + Asm->EmitULEB128Bytes(SizeSites); + Asm->EOL("Call site table size"); + + for (SmallVectorImpl::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { + const CallSiteEntry &S = *I; + const char *BeginTag; + unsigned BeginNumber; + + if (!S.BeginLabel) { + BeginTag = "eh_func_begin"; + BeginNumber = SubprogramCount; + } else { + BeginTag = "label"; + BeginNumber = S.BeginLabel; + } - if (!S.PadLabel) - Asm->EmitInt32(0); - else - EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount, + // Offset of the call site relative to the previous call site, counted in + // number of 16-byte bundles. The first call site is counted relative to + // the start of the procedure fragment. + EmitSectionOffset(BeginTag, "eh_func_begin", BeginNumber, SubprogramCount, true, true); + Asm->EOL("Region start"); + + if (!S.EndLabel) + EmitDifference("eh_func_end", SubprogramCount, BeginTag, BeginNumber, + true); + else + EmitDifference("label", S.EndLabel, BeginTag, BeginNumber, true); + + Asm->EOL("Region length"); - Asm->EOL("Landing pad"); + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + if (!S.PadLabel) + Asm->EmitInt32(0); + else + EmitSectionOffset("label", "eh_func_begin", S.PadLabel, SubprogramCount, + true, true); + + Asm->EOL("Landing pad"); - Asm->EmitULEB128Bytes(S.Action); - Asm->EOL("Action"); + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + Asm->EmitULEB128Bytes(S.Action); + Asm->EOL("Action"); + } } - // Emit the actions. - for (unsigned I = 0, N = Actions.size(); I != N; ++I) { - ActionEntry &Action = Actions[I]; + // Emit the Action Table. + for (SmallVectorImpl::const_iterator + I = Actions.begin(), E = Actions.end(); I != E; ++I) { + const ActionEntry &Action = *I; + + // Type Filter + // + // Used by the runtime to match the type of the thrown exception to the + // type of the catch clauses or the types in the exception specification. Asm->EmitSLEB128Bytes(Action.ValueForTypeID); Asm->EOL("TypeInfo index"); + + // Action Record + // + // Self-relative signed displacement in bytes of the next action record, + // or 0 if there is no next action record. + Asm->EmitSLEB128Bytes(Action.NextAction); Asm->EOL("Next action"); } - // Emit the type ids. - for (unsigned M = TypeInfos.size(); M; --M) { - GlobalVariable *GV = TypeInfos[M - 1]; + // Emit the Catch Clauses. The code for the catch clauses following the same + // try is similar to a switch statement. The catch clause action record + // informs the runtime about the type of a catch clause and about the + // associated switch value. + // + // Action Record Fields: + // + // * Filter Value + // Positive value, starting at 1. Index in the types table of the + // __typeinfo for the catch-clause type. 1 is the first word preceding + // TTBase, 2 is the second word, and so on. Used by the runtime to check + // if the thrown exception type matches the catch-clause type. Back-end + // generated switch statements check against this value. + // + // * Next + // Signed offset, in bytes from the start of this field, to the next + // chained action record, or zero if none. + // + // The order of the action records determined by the next field is the order + // of the catch clauses as they appear in the source code, and must be kept in + // the same order. As a result, changing the order of the catch clause would + // change the semantics of the program. + for (std::vector::const_reverse_iterator + I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { + const GlobalVariable *GV = *I; PrintRelDirective(); if (GV) { - std::string GLN; - O << Asm->getGlobalLinkName(GV, GLN); + O << Asm->Mang->getMangledName(GV); } else { - O << "0"; + O << "0x0"; } Asm->EOL("TypeInfo"); } - // Emit the filter typeids. - for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { - unsigned TypeID = FilterIds[j]; + // Emit the Type Table. + for (std::vector::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; Asm->EmitULEB128Bytes(TypeID); Asm->EOL("Filter TypeInfo index"); } @@ -629,48 +921,53 @@ void DwarfException::EmitExceptionTable() { /// EndModule - Emit all exception information that should come after the /// content. void DwarfException::EndModule() { + if (MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf) + return; + + if (!shouldEmitMovesModule && !shouldEmitTableModule) + return; + if (TimePassesIsEnabled) ExceptionTimer->startTimer(); - if (shouldEmitMovesModule || shouldEmitTableModule) { - const std::vector Personalities = MMI->getPersonalities(); - for (unsigned i = 0; i < Personalities.size(); ++i) - EmitCommonEHFrame(Personalities[i], i); + const std::vector Personalities = MMI->getPersonalities(); - for (std::vector::iterator I = EHFrames.begin(), - E = EHFrames.end(); I != E; ++I) - EmitEHFrame(*I); - } + for (unsigned I = 0, E = Personalities.size(); I < E; ++I) + EmitCIE(Personalities[I], I); + + for (std::vector::iterator + I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I) + EmitFDE(*I); if (TimePassesIsEnabled) ExceptionTimer->stopTimer(); } -/// BeginFunction - Gather pre-function exception information. Assumes being -/// emitted immediately after the function entry point. +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. void DwarfException::BeginFunction(MachineFunction *MF) { + if (!MMI || !MAI->doesSupportExceptionHandling()) return; + if (TimePassesIsEnabled) ExceptionTimer->startTimer(); this->MF = MF; shouldEmitTable = shouldEmitMoves = false; - if (MMI && TAI->doesSupportExceptionHandling()) { - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); - // If any landing pads survive, we need an EH table. - if (MMI->getLandingPads().size()) - shouldEmitTable = true; + // If any landing pads survive, we need an EH table. + if (!MMI->getLandingPads().empty()) + shouldEmitTable = true; - // See if we need frame move info. - if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) - shouldEmitMoves = true; + // See if we need frame move info. + if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) + shouldEmitMoves = true; - if (shouldEmitMoves || shouldEmitTable) - // Assumes in correct section after the entry point. - EmitLabel("eh_func_begin", ++SubprogramCount); - } + if (shouldEmitMoves || shouldEmitTable) + // Assumes in correct section after the entry point. + EmitLabel("eh_func_begin", ++SubprogramCount); shouldEmitTableModule |= shouldEmitTable; shouldEmitMovesModule |= shouldEmitMoves; @@ -682,25 +979,29 @@ void DwarfException::BeginFunction(MachineFunction *MF) { /// EndFunction - Gather and emit post-function exception information. /// void DwarfException::EndFunction() { - if (TimePassesIsEnabled) + if (!shouldEmitMoves && !shouldEmitTable) return; + + if (TimePassesIsEnabled) ExceptionTimer->startTimer(); - if (shouldEmitMoves || shouldEmitTable) { - EmitLabel("eh_func_end", SubprogramCount); - EmitExceptionTable(); - - // Save EH frame information - std::string Name; - EHFrames.push_back( - FunctionEHFrameInfo(getAsm()->getCurrentFunctionEHName(MF, Name), - SubprogramCount, - MMI->getPersonalityIndex(), - MF->getFrameInfo()->hasCalls(), - !MMI->getLandingPads().empty(), - MMI->getFrameMoves(), - MF->getFunction())); - } + EmitLabel("eh_func_end", SubprogramCount); + EmitExceptionTable(); - if (TimePassesIsEnabled) + std::string FunctionEHName = + Asm->Mang->getMangledName(MF->getFunction(), ".eh", + Asm->MAI->is_EHSymbolPrivate()); + + // Save EH frame information + EHFrames.push_back(FunctionEHFrameInfo(FunctionEHName, SubprogramCount, + MMI->getPersonalityIndex(), + MF->getFrameInfo()->hasCalls(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves(), + MF->getFunction())); + + // Record if this personality index uses a landing pad. + UsesLSDA[MMI->getPersonalityIndex()] |= !MMI->getLandingPads().empty(); + + if (TimePassesIsEnabled) ExceptionTimer->stopTimer(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index f1c3e56423595..f6f50255f2e7a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__ -#define CODEGEN_ASMPRINTER_DWARFEXCEPTION_H__ +#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #include "DIE.h" #include "DwarfPrinter.h" @@ -24,7 +24,7 @@ namespace llvm { struct LandingPadInfo; class MachineModuleInfo; -class TargetAsmInfo; +class MCAsmInfo; class Timer; class raw_ostream; @@ -51,6 +51,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { std::vector EHFrames; + /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index + /// uses an LSDA. If so, then we need to encode that information in the CIE's + /// augmentation. + DenseMap UsesLSDA; + /// shouldEmitTable - Per-function flag to indicate if EH tables should /// be emitted. bool shouldEmitTable; @@ -70,13 +75,16 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { /// ExceptionTimer - Timer for the Dwarf exception writer. Timer *ExceptionTimer; - /// EmitCommonEHFrame - Emit the common eh unwind frame. - /// - void EmitCommonEHFrame(const Function *Personality, unsigned Index); + /// SizeOfEncodedValue - Return the size of the encoding in bytes. + unsigned SizeOfEncodedValue(unsigned Encoding); - /// EmitEHFrame - Emit function exception frame information. - /// - void EmitEHFrame(const FunctionEHFrameInfo &EHFrameInfo); + /// EmitCIE - Emit a Common Information Entry (CIE). This holds information + /// that is shared among many Frame Description Entries. There is at least + /// one CIE in every non-empty .debug_frame section. + void EmitCIE(const Function *Personality, unsigned Index); + + /// EmitFDE - Emit the Frame Description Entry (FDE) for the function. + void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo); /// EmitExceptionTable - Emit landing pads and actions. /// @@ -113,13 +121,6 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { static bool isPod() { return true; } }; - /// ActionEntry - Structure describing an entry in the actions table. - struct ActionEntry { - int ValueForTypeID; // The value to write - may not be equal to the type id. - int NextAction; - struct ActionEntry *Previous; - }; - /// PadRange - Structure holding a try-range and the associated landing pad. struct PadRange { // The index of the landing pad. @@ -130,23 +131,48 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf { typedef DenseMap RangeMapType; + /// ActionEntry - Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + struct ActionEntry *Previous; + }; + /// CallSiteEntry - Structure describing an entry in the call-site table. struct CallSiteEntry { // The 'try-range' is BeginLabel .. EndLabel. unsigned BeginLabel; // zero indicates the start of the function. unsigned EndLabel; // zero indicates the end of the function. + // The landing pad starts at PadLabel. unsigned PadLabel; // zero indicates that there is no landing pad. unsigned Action; }; + /// ComputeActionsTable - Compute the actions table and gather the first + /// action index for each landing pad site. + unsigned ComputeActionsTable(const SmallVectorImpl&LPs, + SmallVectorImpl &Actions, + SmallVectorImpl &FirstActions); + + /// ComputeCallSiteTable - Compute the call-site table. The entry for an + /// invoke has a try-range containing the call, a non-zero landing pad and an + /// appropriate action. The entry for an ordinary call has a try-range + /// containing the call and zero for the landing pad and the action. Calls + /// marked 'nounwind' have no entry and must not be contained in the try-range + /// of any entry - they form gaps in the table. Entries must be ordered by + /// try-range address. + void ComputeCallSiteTable(SmallVectorImpl &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl &LPs, + const SmallVectorImpl &FirstActions); void EmitExceptionTable(); public: //===--------------------------------------------------------------------===// // Main entry points. // - DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T); + DwarfException(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T); virtual ~DwarfException(); /// BeginModule - Emit all exception information that should come prior to the diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp index 8021b7c97bb09..6e9293a03bd54 100644 --- a/lib/CodeGen/AsmPrinter/DwarfLabel.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfLabel.cpp @@ -13,7 +13,7 @@ #include "DwarfLabel.h" #include "llvm/ADT/FoldingSet.h" -#include +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -25,10 +25,7 @@ void DWLabel::Profile(FoldingSetNodeID &ID) const { } #ifndef NDEBUG -void DWLabel::print(std::ostream *O) const { - if (O) print(*O); -} -void DWLabel::print(std::ostream &O) const { +void DWLabel::print(raw_ostream &O) const { O << "." << Tag; if (Number) O << Number; } diff --git a/lib/CodeGen/AsmPrinter/DwarfLabel.h b/lib/CodeGen/AsmPrinter/DwarfLabel.h index b49390334bd26..0c0cc4bdc3c6b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfLabel.h +++ b/lib/CodeGen/AsmPrinter/DwarfLabel.h @@ -14,19 +14,16 @@ #ifndef CODEGEN_ASMPRINTER_DWARFLABEL_H__ #define CODEGEN_ASMPRINTER_DWARFLABEL_H__ -#include "llvm/Support/Compiler.h" -#include -#include - namespace llvm { class FoldingSetNodeID; + class raw_ostream; //===--------------------------------------------------------------------===// /// DWLabel - Labels are used to track locations in the assembler file. /// Labels appear in the form @verbatim @endverbatim, /// where the tag is a category of label (Ex. location) and number is a value /// unique in that category. - class VISIBILITY_HIDDEN DWLabel { + class DWLabel { /// Tag - Label category tag. Should always be a statically declared C /// string. /// @@ -47,8 +44,7 @@ namespace llvm { void Profile(FoldingSetNodeID &ID) const; #ifndef NDEBUG - void print(std::ostream *O) const; - void print(std::ostream &O) const; + void print(raw_ostream &O) const; #endif }; } // end llvm namespace diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp index a1b97df82afce..20b959b914fcc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -15,39 +15,41 @@ #include "llvm/Module.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetRegisterInfo.h" - +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/StringExtras.h" using namespace llvm; -Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T, +Dwarf::Dwarf(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T, const char *flavor) -: O(OS), Asm(A), TAI(T), TD(Asm->TM.getTargetData()), +: O(OS), Asm(A), MAI(T), TD(Asm->TM.getTargetData()), RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL), SubprogramCount(0), Flavor(flavor), SetCounter(1) {} void Dwarf::PrintRelDirective(bool Force32Bit, bool isInSection) const { - if (isInSection && TAI->getDwarfSectionOffsetDirective()) - O << TAI->getDwarfSectionOffsetDirective(); + if (isInSection && MAI->getDwarfSectionOffsetDirective()) + O << MAI->getDwarfSectionOffsetDirective(); else if (Force32Bit || TD->getPointerSize() == sizeof(int32_t)) - O << TAI->getData32bitsDirective(); + O << MAI->getData32bitsDirective(); else - O << TAI->getData64bitsDirective(); + O << MAI->getData64bitsDirective(); } /// PrintLabelName - Print label name in form used by Dwarf writer. /// void Dwarf::PrintLabelName(const char *Tag, unsigned Number) const { - O << TAI->getPrivateGlobalPrefix() << Tag; + O << MAI->getPrivateGlobalPrefix() << Tag; if (Number) O << Number; } void Dwarf::PrintLabelName(const char *Tag, unsigned Number, const char *Suffix) const { - O << TAI->getPrivateGlobalPrefix() << Tag; + O << MAI->getPrivateGlobalPrefix() << Tag; if (Number) O << Number; O << Suffix; } @@ -65,13 +67,13 @@ void Dwarf::EmitReference(const char *Tag, unsigned Number, bool IsPCRelative, bool Force32Bit) const { PrintRelDirective(Force32Bit); PrintLabelName(Tag, Number); - if (IsPCRelative) O << "-" << TAI->getPCSymbol(); + if (IsPCRelative) O << "-" << MAI->getPCSymbol(); } void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative, bool Force32Bit) const { PrintRelDirective(Force32Bit); O << Name; - if (IsPCRelative) O << "-" << TAI->getPCSymbol(); + if (IsPCRelative) O << "-" << MAI->getPCSymbol(); } /// EmitDifference - Emit the difference between two labels. Some assemblers do @@ -80,7 +82,7 @@ void Dwarf::EmitReference(const std::string &Name, bool IsPCRelative, void Dwarf::EmitDifference(const char *TagHi, unsigned NumberHi, const char *TagLo, unsigned NumberLo, bool IsSmall) { - if (TAI->needsSet()) { + if (MAI->needsSet()) { O << "\t.set\t"; PrintLabelName("set", SetCounter, Flavor); O << ","; @@ -106,11 +108,11 @@ void Dwarf::EmitSectionOffset(const char* Label, const char* Section, bool useSet) { bool printAbsolute = false; if (isEH) - printAbsolute = TAI->isAbsoluteEHSectionOffsets(); + printAbsolute = MAI->isAbsoluteEHSectionOffsets(); else - printAbsolute = TAI->isAbsoluteDebugSectionOffsets(); + printAbsolute = MAI->isAbsoluteDebugSectionOffsets(); - if (TAI->needsSet() && useSet) { + if (MAI->needsSet() && useSet) { O << "\t.set\t"; PrintLabelName("set", SetCounter, Flavor); O << ","; @@ -190,7 +192,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, Asm->EmitULEB128Bytes(Offset); Asm->EOL("Offset"); } else { - assert(0 && "Machine move not supported yet."); + llvm_unreachable("Machine move not supported yet."); } } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { @@ -200,7 +202,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID, Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH)); Asm->EOL("Register"); } else { - assert(0 && "Machine move not supported yet."); + llvm_unreachable("Machine move not supported yet."); } } else { unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH); diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h index 6e75992cb07c2..33ebb3bd0eb5d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h @@ -25,7 +25,7 @@ namespace llvm { class MachineFunction; class MachineModuleInfo; class Module; - class TargetAsmInfo; + class MCAsmInfo; class TargetData; class TargetRegisterInfo; @@ -43,9 +43,9 @@ namespace llvm { /// AsmPrinter *Asm; - /// TAI - Target asm information. + /// MAI - Target asm information. /// - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; /// TD - Target data. /// @@ -80,7 +80,7 @@ namespace llvm { /// unsigned SetCounter; - Dwarf(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T, + Dwarf(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T, const char *flavor); public: //===------------------------------------------------------------------===// @@ -88,7 +88,7 @@ namespace llvm { // const AsmPrinter *getAsm() const { return Asm; } MachineModuleInfo *getMMI() const { return MMI; } - const TargetAsmInfo *getTargetAsmInfo() const { return TAI; } + const MCAsmInfo *getMCAsmInfo() const { return MAI; } const TargetData *getTargetData() const { return TD; } void PrintRelDirective(bool Force32Bit = false, diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp index 89084989b8751..0638d35685490 100644 --- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -39,7 +39,7 @@ DwarfWriter::~DwarfWriter() { void DwarfWriter::BeginModule(Module *M, MachineModuleInfo *MMI, raw_ostream &OS, AsmPrinter *A, - const TargetAsmInfo *T) { + const MCAsmInfo *T) { DE = new DwarfException(OS, A, T); DD = new DwarfDebug(OS, A, T); DE->BeginModule(M, MMI); @@ -51,6 +51,8 @@ void DwarfWriter::BeginModule(Module *M, void DwarfWriter::EndModule() { DE->EndModule(); DD->EndModule(); + delete DD; DD = 0; + delete DE; DE = 0; } /// BeginFunction - Gather pre-function debug information. Assumes being @@ -75,18 +77,18 @@ void DwarfWriter::EndFunction(MachineFunction *MF) { /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, - DICompileUnit CU) { - return DD->RecordSourceLine(Line, Col, CU); + MDNode *Scope) { + return DD->RecordSourceLine(Line, Col, Scope); } /// RecordRegionStart - Indicate the start of a region. -unsigned DwarfWriter::RecordRegionStart(GlobalVariable *V) { - return DD->RecordRegionStart(V); +unsigned DwarfWriter::RecordRegionStart(MDNode *N) { + return DD->RecordRegionStart(N); } /// RecordRegionEnd - Indicate the end of a region. -unsigned DwarfWriter::RecordRegionEnd(GlobalVariable *V) { - return DD->RecordRegionEnd(V); +unsigned DwarfWriter::RecordRegionEnd(MDNode *N) { + return DD->RecordRegionEnd(N); } /// getRecordSourceLineCount - Count source lines. @@ -96,9 +98,8 @@ unsigned DwarfWriter::getRecordSourceLineCount() { /// RecordVariable - Indicate the declaration of a local variable. /// -void DwarfWriter::RecordVariable(GlobalVariable *GV, unsigned FrameIndex, - const MachineInstr *MI) { - DD->RecordVariable(GV, FrameIndex, MI); +void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) { + DD->RecordVariable(N, FrameIndex); } /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should @@ -107,8 +108,7 @@ bool DwarfWriter::ShouldEmitDwarfDebug() const { return DD && DD->ShouldEmitDwarfDebug(); } -//// RecordInlinedFnStart - Global variable GV is inlined at the location marked -//// by LabelID label. +//// RecordInlinedFnStart unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU, unsigned Line, unsigned Col) { return DD->RecordInlinedFnStart(SP, CU, Line, Col); @@ -119,9 +119,9 @@ unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) { return DD->RecordInlinedFnEnd(SP); } -/// RecordVariableScope - Record scope for the variable declared by -/// DeclareMI. DeclareMI must describe TargetInstrInfo::DECLARE. -void DwarfWriter::RecordVariableScope(DIVariable &DV, - const MachineInstr *DeclareMI) { - DD->RecordVariableScope(DV, DeclareMI); +void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) { + DD->SetDbgScopeEndLabels(MI, L); +} +void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) { + DD->SetDbgScopeBeginLabels(MI, L); } diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index 8ba903a65d79f..06b92b7294b62 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -15,12 +15,14 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/Module.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" - +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -28,10 +30,10 @@ namespace { class VISIBILITY_HIDDEN OcamlGCMetadataPrinter : public GCMetadataPrinter { public: void beginAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI); + const MCAsmInfo &MAI); void finishAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI); + const MCAsmInfo &MAI); }; } @@ -42,11 +44,11 @@ Y("ocaml", "ocaml 3.10-compatible collector"); void llvm::linkOcamlGCPrinter() { } static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI, const char *Id) { + const MCAsmInfo &MAI, const char *Id) { const std::string &MId = M.getModuleIdentifier(); std::string Mangled; - Mangled += TAI.getGlobalPrefix(); + Mangled += MAI.getGlobalPrefix(); Mangled += "caml"; size_t Letter = Mangled.size(); Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); @@ -56,18 +58,18 @@ static void EmitCamlGlobal(const Module &M, raw_ostream &OS, AsmPrinter &AP, // Capitalize the first letter of the module name. Mangled[Letter] = toupper(Mangled[Letter]); - if (const char *GlobalDirective = TAI.getGlobalDirective()) + if (const char *GlobalDirective = MAI.getGlobalDirective()) OS << GlobalDirective << Mangled << "\n"; OS << Mangled << ":\n"; } void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI) { - AP.SwitchToSection(TAI.getTextSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin"); + const MCAsmInfo &MAI) { + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "code_begin"); - AP.SwitchToSection(TAI.getDataSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin"); + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "data_begin"); } /// emitAssembly - Print the frametable. The ocaml frametable format is thus: @@ -87,55 +89,59 @@ void OcamlGCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, /// either condition is detected in a function which uses the GC. /// void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI) { + const MCAsmInfo &MAI) { const char *AddressDirective; int AddressAlignLog; if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) { - AddressDirective = TAI.getData32bitsDirective(); + AddressDirective = MAI.getData32bitsDirective(); AddressAlignLog = 2; } else { - AddressDirective = TAI.getData64bitsDirective(); + AddressDirective = MAI.getData64bitsDirective(); AddressAlignLog = 3; } - AP.SwitchToSection(TAI.getTextSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end"); + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "code_end"); - AP.SwitchToSection(TAI.getDataSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end"); + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "data_end"); OS << AddressDirective << 0; // FIXME: Why does ocaml emit this?? AP.EOL(); - AP.SwitchToSection(TAI.getDataSection()); - EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable"); + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, MAI, "frametable"); for (iterator I = begin(), IE = end(); I != IE; ++I) { GCFunctionInfo &FI = **I; uint64_t FrameSize = FI.getFrameSize(); if (FrameSize >= 1<<16) { - cerr << "Function '" << FI.getFunction().getNameStart() + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Function '" << FI.getFunction().getName() << "' is too large for the ocaml GC! " << "Frame size " << FrameSize << " >= 65536.\n"; - cerr << "(" << uintptr_t(&FI) << ")\n"; - abort(); // Very rude! + Msg << "(" << uintptr_t(&FI) << ")"; + llvm_report_error(Msg.str()); // Very rude! } - OS << "\t" << TAI.getCommentString() << " live roots for " - << FI.getFunction().getNameStart() << "\n"; + OS << "\t" << MAI.getCommentString() << " live roots for " + << FI.getFunction().getName() << "\n"; for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { size_t LiveCount = FI.live_size(J); if (LiveCount >= 1<<16) { - cerr << "Function '" << FI.getFunction().getNameStart() + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Function '" << FI.getFunction().getName() << "' is too large for the ocaml GC! " - << "Live root count " << LiveCount << " >= 65536.\n"; - abort(); // Very rude! + << "Live root count " << LiveCount << " >= 65536."; + llvm_report_error(Msg.str()); // Very rude! } OS << AddressDirective - << TAI.getPrivateGlobalPrefix() << "label" << J->Num; + << MAI.getPrivateGlobalPrefix() << "label" << J->Num; AP.EOL("call return address"); AP.EmitInt16(FrameSize); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 26353035ae2f8..f9abeacbdbb3d 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "branchfolding" +#include "BranchFolding.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -27,6 +28,8 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -44,70 +47,35 @@ TailMergeThreshold("tail-merge-threshold", cl::desc("Max number of predecessors to consider tail merging"), cl::init(150), cl::Hidden); -namespace { - struct VISIBILITY_HIDDEN BranchFolder : public MachineFunctionPass { - static char ID; - explicit BranchFolder(bool defaultEnableTailMerge) : - MachineFunctionPass(&ID) { - switch (FlagEnableTailMerge) { - case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; - case cl::BOU_TRUE: EnableTailMerge = true; break; - case cl::BOU_FALSE: EnableTailMerge = false; break; - } - } - virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Control Flow Optimizer"; } - const TargetInstrInfo *TII; - MachineModuleInfo *MMI; - bool MadeChange; - private: - // Tail Merging. - bool EnableTailMerge; - bool TailMergeBlocks(MachineFunction &MF); - bool TryMergeBlocks(MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); - void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, - MachineBasicBlock *NewDest); - MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1); - unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength); - void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, - MachineBasicBlock* PredBB); - unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength); - - typedef std::pair MergePotentialsElt; - typedef std::vector::iterator MPIterator; - std::vector MergePotentials; - - typedef std::pair SameTailElt; - std::vector SameTails; - - const TargetRegisterInfo *RegInfo; - RegScavenger *RS; - // Branch optzn. - bool OptimizeBranches(MachineFunction &MF); - void OptimizeBlock(MachineBasicBlock *MBB); - void RemoveDeadBlock(MachineBasicBlock *MBB); - bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); - - bool CanFallThrough(MachineBasicBlock *CurBB); - bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, - MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond); - }; - char BranchFolder::ID = 0; -} +char BranchFolderPass::ID = 0; FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) { - return new BranchFolder(DefaultEnableTailMerge); } + return new BranchFolderPass(DefaultEnableTailMerge); +} + +bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { + return OptimizeFunction(MF, + MF.getTarget().getInstrInfo(), + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable()); +} + + + +BranchFolder::BranchFolder(bool defaultEnableTailMerge) { + switch (FlagEnableTailMerge) { + case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_TRUE: EnableTailMerge = true; break; + case cl::BOU_FALSE: EnableTailMerge = false; break; + } +} /// RemoveDeadBlock - Remove the specified dead machine basic block from the /// function, updating the CFG. void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { assert(MBB->pred_empty() && "MBB must be dead!"); - DOUT << "\nRemoving MBB: " << *MBB; + DEBUG(errs() << "\nRemoving MBB: " << *MBB); MachineFunction *MF = MBB->getParent(); // drop all successors. @@ -146,7 +114,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { break; unsigned Reg = I->getOperand(0).getReg(); ImpDefRegs.insert(Reg); - for (const unsigned *SubRegs = RegInfo->getSubRegisters(Reg); + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) ImpDefRegs.insert(SubReg); ++I; @@ -180,32 +148,37 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { return true; } -bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getTarget().getInstrInfo(); - if (!TII) return false; +/// OptimizeFunction - Perhaps branch folding, tail merging and other +/// CFG optimizations on the given function. +bool BranchFolder::OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi) { + if (!tii) return false; + + TII = tii; + TRI = tri; + MMI = mmi; - RegInfo = MF.getTarget().getRegisterInfo(); + RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; // Fix CFG. The later algorithms expect it to be right. - bool EverMadeChange = false; + bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; SmallVector Cond; if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) - EverMadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); - EverMadeChange |= OptimizeImpDefsBlock(MBB); + MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + MadeChange |= OptimizeImpDefsBlock(MBB); } - RS = RegInfo->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; - - MMI = getAnalysisIfAvailable(); bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { MadeChangeThisIteration = false; MadeChangeThisIteration |= TailMergeBlocks(MF); MadeChangeThisIteration |= OptimizeBranches(MF); - EverMadeChange |= MadeChangeThisIteration; + MadeChange |= MadeChangeThisIteration; } // See if any jump tables have become mergable or dead as the code generator @@ -222,8 +195,12 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { // Scan the jump tables, seeing if there are any duplicates. Note that this // is N^2, which should be fixed someday. - for (unsigned i = 1, e = JTs.size(); i != e; ++i) - JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); + for (unsigned i = 1, e = JTs.size(); i != e; ++i) { + if (JTs[i].MBBs.empty()) + JTMapping.push_back(i); + else + JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); + } // If a jump table was merge with another one, walk the function rewriting // references to jump tables to reference the new JT ID's. Keep track of @@ -250,12 +227,12 @@ bool BranchFolder::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) if (!JTIsLive.test(i)) { JTI->RemoveJumpTable(i); - EverMadeChange = true; + MadeChange = true; } } - + delete RS; - return EverMadeChange; + return MadeChange; } //===----------------------------------------------------------------------===// @@ -395,9 +372,9 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, RS->enterBasicBlock(&CurMBB); if (!CurMBB.empty()) RS->forward(prior(CurMBB.end())); - BitVector RegsLiveAtExit(RegInfo->getNumRegs()); + BitVector RegsLiveAtExit(TRI->getNumRegs()); RS->getRegsUsed(RegsLiveAtExit, false); - for (unsigned int i=0, e=RegInfo->getNumRegs(); i!=e; i++) + for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++) if (RegsLiveAtExit[i]) NewMBB->addLiveIn(i); } @@ -461,7 +438,7 @@ static bool MergeCompare(const std::pair &p, // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing // an object with itself. #ifndef _GLIBCXX_DEBUG - assert(0 && "Predecessor appears twice"); + llvm_unreachable("Predecessor appears twice"); #endif return false; } @@ -567,8 +544,8 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second; MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; - DOUT << "\nSplitting " << MBB->getNumber() << ", size " << - maxCommonTailLength; + DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size " + << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); SameTails[commonTailIndex].first->second = newMBB; @@ -590,13 +567,14 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, MachineBasicBlock* PredBB) { + bool MadeChange = false; + // It doesn't make sense to save a single instruction since tail merging // will add a jump. // FIXME: Ask the target to provide the threshold? unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1; - MadeChange = false; - DOUT << "\nTryMergeBlocks " << MergePotentials.size() << '\n'; + DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n'); // Sort by hash value so that blocks with identical end sequences sort // together. @@ -643,17 +621,17 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB, MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second; // MBB is common tail. Adjust all other BB's to jump to this one. // Traversal must be forwards so erases work. - DOUT << "\nUsing common tail " << MBB->getNumber() << " for "; + DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for "); for (unsigned int i=0; isecond->getNumber() << ","; + DEBUG(errs() << SameTails[i].first->second->getNumber() << ","); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].second, MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. MergePotentials.erase(SameTails[i].first); } - DOUT << "\n"; + DEBUG(errs() << "\n"); // We leave commonTailIndex in the worklist in case there are other blocks // that match it with a smaller number of instructions. MadeChange = true; @@ -665,7 +643,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (!EnableTailMerge) return false; - MadeChange = false; + bool MadeChange = false; // First find blocks with no successors. MergePotentials.clear(); @@ -699,6 +677,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) { + SmallPtrSet UniquePreds; MachineBasicBlock *IBB = I; MachineBasicBlock *PredBB = prior(I); MergePotentials.clear(); @@ -709,6 +688,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Skip blocks that loop to themselves, can't tail merge these. if (PBB==IBB) continue; + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB)) + continue; MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { @@ -772,14 +754,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { //===----------------------------------------------------------------------===// bool BranchFolder::OptimizeBranches(MachineFunction &MF) { - MadeChange = false; + bool MadeChange = false; // Make sure blocks are numbered in order MF.RenumberBlocks(); for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; - OptimizeBlock(MBB); + MadeChange |= OptimizeBlock(MBB); // If it is dead, remove it. if (MBB->pred_empty()) { @@ -873,7 +855,9 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, /// OptimizeBlock - Analyze and optimize control flow related to the specified /// block. This is never called on the entry block. -void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { +bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { + bool MadeChange = false; + MachineFunction::iterator FallThrough = MBB; ++FallThrough; @@ -882,7 +866,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // points to this block. if (MBB->empty() && !MBB->isLandingPad()) { // Dead block? Leave for cleanup later. - if (MBB->pred_empty()) return; + if (MBB->pred_empty()) return MadeChange; if (FallThrough == MBB->getParent()->end()) { // TODO: Simplify preds to not branch here if possible! @@ -893,14 +877,13 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MachineBasicBlock *Pred = *(MBB->pred_end()-1); Pred->ReplaceUsesOfBlockWith(MBB, FallThrough); } - // If MBB was the target of a jump table, update jump tables to go to the // fallthrough instead. MBB->getParent()->getJumpTableInfo()-> ReplaceMBBInJumpTables(MBB, FallThrough); MadeChange = true; } - return; + return MadeChange; } // Check to see if we can simplify the terminator of the block before this @@ -1004,8 +987,8 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // Reverse the branch so we will fall through on the previous true cond. SmallVector NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { - DOUT << "\nMoving MBB: " << *MBB; - DOUT << "To make fallthrough to: " << *PriorTBB << "\n"; + DEBUG(errs() << "\nMoving MBB: " << *MBB + << "To make fallthrough to: " << *PriorTBB << "\n"); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); @@ -1014,7 +997,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { MBB->moveAfter(--MBB->getParent()->end()); MadeChange = true; ++NumBranchOpts; - return; + return MadeChange; } } } @@ -1116,7 +1099,7 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { if (DidChange) { ++NumBranchOpts; MadeChange = true; - if (!HasBranchToSelf) return; + if (!HasBranchToSelf) return MadeChange; } } } @@ -1197,8 +1180,10 @@ void BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { PrevBB.isSuccessor(FallThrough)) { MBB->moveAfter(--MBB->getParent()->end()); MadeChange = true; - return; + return MadeChange; } } } + + return MadeChange; } diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h new file mode 100644 index 0000000000000..9763e3339a20a --- /dev/null +++ b/lib/CodeGen/BranchFolding.h @@ -0,0 +1,84 @@ +//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP +#define LLVM_CODEGEN_BRANCHFOLDING_HPP + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include + +namespace llvm { + class MachineFunction; + class MachineModuleInfo; + class RegScavenger; + class TargetInstrInfo; + class TargetRegisterInfo; + + class BranchFolder { + public: + explicit BranchFolder(bool defaultEnableTailMerge); + + bool OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi); + private: + typedef std::pair MergePotentialsElt; + typedef std::vector::iterator MPIterator; + std::vector MergePotentials; + + typedef std::pair SameTailElt; + std::vector SameTails; + + bool EnableTailMerge; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineModuleInfo *MMI; + RegScavenger *RS; + + bool TailMergeBlocks(MachineFunction &MF); + bool TryMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest); + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength); + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength); + + bool OptimizeBranches(MachineFunction &MF); + bool OptimizeBlock(MachineBasicBlock *MBB); + void RemoveDeadBlock(MachineBasicBlock *MBB); + bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); + + bool CanFallThrough(MachineBasicBlock *CurBB); + bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, + MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl &Cond); + }; + + + /// BranchFolderPass - Wrap branch folder in a machine function pass. + class BranchFolderPass : public MachineFunctionPass, + public BranchFolder { + public: + static char ID; + explicit BranchFolderPass(bool defaultEnableTailMerge) + : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Control Flow Optimizer"; } + }; +} + +#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 48f17d0d04c10..713c30c7d4ab2 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMCodeGen DwarfEHPrepare.cpp ELFCodeEmitter.cpp ELFWriter.cpp + ExactHazardRecognizer.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp @@ -12,7 +13,6 @@ add_llvm_library(LLVMCodeGen IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp - LazyLiveness.cpp LiveInterval.cpp LiveIntervalAnalysis.cpp LiveStackAnalysis.cpp @@ -23,27 +23,28 @@ add_llvm_library(LLVMCodeGen MachineBasicBlock.cpp MachineDominators.cpp MachineFunction.cpp + MachineFunctionAnalysis.cpp + MachineFunctionPass.cpp MachineInstr.cpp MachineLICM.cpp MachineLoopInfo.cpp MachineModuleInfo.cpp + MachineModuleInfoImpls.cpp MachinePassRegistry.cpp MachineRegisterInfo.cpp MachineSink.cpp MachineVerifier.cpp + ObjectCodeEmitter.cpp OcamlGC.cpp - PBQP.cpp PHIElimination.cpp Passes.cpp PostRASchedulerList.cpp PreAllocSplitting.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp - RegAllocBigBlock.cpp RegAllocLinearScan.cpp RegAllocLocal.cpp RegAllocPBQP.cpp - RegAllocSimple.cpp RegisterCoalescer.cpp RegisterScavenging.cpp ScheduleDAG.cpp @@ -53,6 +54,7 @@ add_llvm_library(LLVMCodeGen ShadowStackGC.cpp ShrinkWrapping.cpp SimpleRegisterCoalescing.cpp + SjLjEHPrepare.cpp Spiller.cpp StackProtector.cpp StackSlotColoring.cpp diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 383098e11efdc..932fae4f316cf 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -95,11 +95,11 @@ FunctionPass *llvm::createCodePlacementOptPass() { /// ... /// jmp B /// -/// C: --> new loop header +/// C: /// ... /// /// -/// B: +/// B: --> loop header /// ... /// jcc C, [exit] /// diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 4832a5ee9ae04..078ed3d31b1c3 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -36,7 +37,7 @@ namespace { DeadMachineInstructionElim() : MachineFunctionPass(&ID) {} private: - bool isDead(MachineInstr *MI) const; + bool isDead(const MachineInstr *MI) const; }; } char DeadMachineInstructionElim::ID = 0; @@ -49,10 +50,10 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() { return new DeadMachineInstructionElim(); } -bool DeadMachineInstructionElim::isDead(MachineInstr *MI) const { +bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Don't delete instructions with side effects. bool SawStore = false; - if (!MI->isSafeToMove(TII, SawStore)) + if (!MI->isSafeToMove(TII, SawStore, 0)) return false; // Examine each operand. @@ -110,7 +111,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // If the instruction is dead, delete it! if (isDead(MI)) { - DOUT << "DeadMachineInstructionElim: DELETING: " << *MI; + DEBUG(errs() << "DeadMachineInstructionElim: DELETING: " << *MI); AnyChanges = true; MI->eraseFromParent(); MIE = MBB->rend(); diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 720e3d19b7594..72b3f92d326e1 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -107,7 +107,9 @@ FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) { /// NormalizeLandingPads - Normalize and discover landing pads, noting them /// in the LandingPads set. A landing pad is normal if the only CFG edges -/// that end at it are unwind edges from invoke instructions. +/// that end at it are unwind edges from invoke instructions. If we inlined +/// through an invoke we could have a normal branch from the previous +/// unwind block through to the landing pad for the original invoke. /// Abnormal landing pads are fixed up by redirecting all unwind edges to /// a new basic block which falls through to the original. bool DwarfEHPrepare::NormalizeLandingPads() { @@ -132,6 +134,7 @@ bool DwarfEHPrepare::NormalizeLandingPads() { break; } } + if (OnlyUnwoundTo) { // Only unwind edges lead to the landing pad. Remember the landing pad. LandingPads.insert(LPad); @@ -142,7 +145,8 @@ bool DwarfEHPrepare::NormalizeLandingPads() { // edges to a new basic block which falls through into this one. // Create the new basic block. - BasicBlock *NewBB = BasicBlock::Create(LPad->getName() + "_unwind_edge"); + BasicBlock *NewBB = BasicBlock::Create(F->getContext(), + LPad->getName() + "_unwind_edge"); // Insert it into the function right before the original landing pad. LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); @@ -218,28 +222,43 @@ bool DwarfEHPrepare::NormalizeLandingPads() { /// at runtime if there is no such exception: using unwind to throw a new /// exception is currently not supported. bool DwarfEHPrepare::LowerUnwinds() { - bool Changed = false; + SmallVector UnwindInsts; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); - if (!isa(TI)) - continue; + if (isa(TI)) + UnwindInsts.push_back(TI); + } + + if (UnwindInsts.empty()) return false; + + // Find the rewind function if we didn't already. + if (!RewindFunction) { + LLVMContext &Ctx = UnwindInsts[0]->getContext(); + std::vector + Params(1, Type::getInt8PtrTy(Ctx)); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), + Params, false); + const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + } + + bool Changed = false; + + for (SmallVectorImpl::iterator + I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) { + TerminatorInst *TI = *I; // Replace the unwind instruction with a call to _Unwind_Resume (or the // appropriate target equivalent) followed by an UnreachableInst. - // Find the rewind function if we didn't already. - if (!RewindFunction) { - std::vector Params(1, PointerType::getUnqual(Type::Int8Ty)); - FunctionType *FTy = FunctionType::get(Type::VoidTy, Params, false); - const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); - } - // Create the call... - CallInst::Create(RewindFunction, CreateReadOfExceptionValue(I), "", TI); + CallInst *CI = CallInst::Create(RewindFunction, + CreateReadOfExceptionValue(TI->getParent()), + "", TI); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); // ...followed by an UnreachableInst. - new UnreachableInst(TI); + new UnreachableInst(TI->getContext(), TI); // Nuke the unwind instruction. TI->eraseFromParent(); @@ -314,7 +333,7 @@ bool DwarfEHPrepare::PromoteStackTemporaries() { if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) { // Turn the exception temporary into registers and phi nodes if possible. std::vector Allocas(1, ExceptionValueVar); - PromoteMemToReg(Allocas, *DT, *DF); + PromoteMemToReg(Allocas, *DT, *DF, ExceptionValueVar->getContext()); return true; } return false; @@ -354,8 +373,8 @@ Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) { // Create the temporary if we didn't already. if (!ExceptionValueVar) { - ExceptionValueVar = new AllocaInst(PointerType::getUnqual(Type::Int8Ty), - "eh.value", F->begin()->begin()); + ExceptionValueVar = new AllocaInst(PointerType::getUnqual( + Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin()); ++NumStackTempsIntroduced; } diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index 7e983a4d0512c..b466e89cb2612 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -52,6 +52,159 @@ namespace llvm { EV_CURRENT = 1 }; + /// ELFSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct ELFSym { + + // ELF symbols are related to llvm ones by being one of the two llvm + // types, for the other ones (section, file, func) a null pointer is + // assumed by default. + union { + const GlobalValue *GV; // If this is a pointer to a GV + const char *Ext; // If this is a pointer to a named symbol + } Source; + + // Describes from which source type this ELF symbol comes from, + // they can be GlobalValue, ExternalSymbol or neither. + enum { + isGV, // The Source.GV field is valid. + isExtSym, // The Source.ExtSym field is valid. + isOther // Not a GlobalValue or External Symbol + }; + unsigned SourceType; + + bool isGlobalValue() const { return SourceType == isGV; } + bool isExternalSym() const { return SourceType == isExtSym; } + + // getGlobalValue - If this is a global value which originated the + // elf symbol, return a reference to it. + const GlobalValue *getGlobalValue() const { + assert(SourceType == isGV && "This is not a global value"); + return Source.GV; + }; + + // getExternalSym - If this is an external symbol which originated the + // elf symbol, return a reference to it. + const char *getExternalSymbol() const { + assert(SourceType == isExtSym && "This is not an external symbol"); + return Source.Ext; + }; + + // getGV - From a global value return a elf symbol to represent it + static ELFSym *getGV(const GlobalValue *GV, unsigned Bind, + unsigned Type, unsigned Visibility) { + ELFSym *Sym = new ELFSym(); + Sym->Source.GV = GV; + Sym->setBind(Bind); + Sym->setType(Type); + Sym->setVisibility(Visibility); + Sym->SourceType = isGV; + return Sym; + } + + // getExtSym - Create and return an elf symbol to represent an + // external symbol + static ELFSym *getExtSym(const char *Ext) { + ELFSym *Sym = new ELFSym(); + Sym->Source.Ext = Ext; + Sym->setBind(STB_GLOBAL); + Sym->setType(STT_NOTYPE); + Sym->setVisibility(STV_DEFAULT); + Sym->SourceType = isExtSym; + return Sym; + } + + // getSectionSym - Returns a elf symbol to represent an elf section + static ELFSym *getSectionSym() { + ELFSym *Sym = new ELFSym(); + Sym->setBind(STB_LOCAL); + Sym->setType(STT_SECTION); + Sym->setVisibility(STV_DEFAULT); + Sym->SourceType = isOther; + return Sym; + } + + // getFileSym - Returns a elf symbol to represent the module identifier + static ELFSym *getFileSym() { + ELFSym *Sym = new ELFSym(); + Sym->setBind(STB_LOCAL); + Sym->setType(STT_FILE); + Sym->setVisibility(STV_DEFAULT); + Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS; + Sym->SourceType = isOther; + return Sym; + } + + // getUndefGV - Returns a STT_NOTYPE symbol + static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) { + ELFSym *Sym = new ELFSym(); + Sym->Source.GV = GV; + Sym->setBind(Bind); + Sym->setType(STT_NOTYPE); + Sym->setVisibility(STV_DEFAULT); + Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF; + Sym->SourceType = isGV; + return Sym; + } + + // ELF specific fields + unsigned NameIdx; // Index in .strtab of name, once emitted. + uint64_t Value; + unsigned Size; + uint8_t Info; + uint8_t Other; + unsigned short SectionIdx; + + // Symbol index into the Symbol table + unsigned SymTabIdx; + + enum { + STB_LOCAL = 0, // Local sym, not visible outside obj file containing def + STB_GLOBAL = 1, // Global sym, visible to all object files being combined + STB_WEAK = 2 // Weak symbol, like global but lower-precedence + }; + + enum { + STT_NOTYPE = 0, // Symbol's type is not specified + STT_OBJECT = 1, // Symbol is a data object (variable, array, etc.) + STT_FUNC = 2, // Symbol is executable code (function, etc.) + STT_SECTION = 3, // Symbol refers to a section + STT_FILE = 4 // Local, absolute symbol that refers to a file + }; + + enum { + STV_DEFAULT = 0, // Visibility is specified by binding type + STV_INTERNAL = 1, // Defined by processor supplements + STV_HIDDEN = 2, // Not visible to other components + STV_PROTECTED = 3 // Visible in other components but not preemptable + }; + + ELFSym() : SourceType(isOther), NameIdx(0), Value(0), + Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0), + SymTabIdx(0) {} + + unsigned getBind() const { return (Info >> 4) & 0xf; } + unsigned getType() const { return Info & 0xf; } + bool isLocalBind() const { return getBind() == STB_LOCAL; } + bool isFileType() const { return getType() == STT_FILE; } + + void setBind(unsigned X) { + assert(X == (X & 0xF) && "Bind value out of range!"); + Info = (Info & 0x0F) | (X << 4); + } + + void setType(unsigned X) { + assert(X == (X & 0xF) && "Type value out of range!"); + Info = (Info & 0xF0) | X; + } + + void setVisibility(unsigned V) { + assert(V == (V & 0x3) && "Visibility value out of range!"); + Other = V; + } + }; + /// ELFSection - This struct contains information about each section that is /// emitted to the file. This is eventually turned into the section header /// table at the end of the file. @@ -117,78 +270,19 @@ namespace llvm { /// SectionIdx - The number of the section in the Section Table. unsigned short SectionIdx; - ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit) - : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0), - Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0) {} - }; - - /// ELFSym - This struct contains information about each symbol that is - /// added to logical symbol table for the module. This is eventually - /// turned into a real symbol table in the file. - struct ELFSym { - // The global value this corresponds to. Global symbols can be on of the - // 3 types : if this symbol has a zero initializer, it is common or should - // be placed in bss section otherwise it's a constant. - const GlobalValue *GV; - bool IsCommon; - bool IsBss; - bool IsConstant; - - // ELF specific fields - unsigned NameIdx; // Index in .strtab of name, once emitted. - uint64_t Value; - unsigned Size; - uint8_t Info; - uint8_t Other; - unsigned short SectionIdx; + /// Sym - The symbol to represent this section if it has one. + ELFSym *Sym; - // Symbol index into the Symbol table - unsigned SymTabIdx; - - enum { - STB_LOCAL = 0, - STB_GLOBAL = 1, - STB_WEAK = 2 - }; - - enum { - STT_NOTYPE = 0, - STT_OBJECT = 1, - STT_FUNC = 2, - STT_SECTION = 3, - STT_FILE = 4 - }; - - enum { - STV_DEFAULT = 0, // Visibility is specified by binding type - STV_INTERNAL = 1, // Defined by processor supplements - STV_HIDDEN = 2, // Not visible to other components - STV_PROTECTED = 3 // Visible in other components but not preemptable - }; - - ELFSym(const GlobalValue *gv) : GV(gv), IsCommon(false), IsBss(false), - IsConstant(false), NameIdx(0), Value(0), - Size(0), Info(0), Other(STV_DEFAULT), - SectionIdx(ELFSection::SHN_UNDEF), - SymTabIdx(0) {} - - unsigned getBind() { return (Info >> 4) & 0xf; } - unsigned getType() { return Info & 0xf; } - - void setBind(unsigned X) { - assert(X == (X & 0xF) && "Bind value out of range!"); - Info = (Info & 0x0F) | (X << 4); + /// getSymIndex - Returns the symbol table index of the symbol + /// representing this section. + unsigned getSymbolTableIndex() const { + assert(Sym && "section not present in the symbol table"); + return Sym->SymTabIdx; } - void setType(unsigned X) { - assert(X == (X & 0xF) && "Type value out of range!"); - Info = (Info & 0xF0) | X; - } - - void setVisibility(unsigned V) { - assert(V == (V & 0x3) && "Type value out of range!"); - Other = V; - } + ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit) + : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0), + Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {} }; /// ELFRelocation - This class contains all the information necessary to diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 691f19408d478..a6429f70001ad 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -17,12 +17,16 @@ #include "llvm/Function.h" #include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRelocation.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetELFWriterInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" //===----------------------------------------------------------------------===// // ELFCodeEmitter Implementation @@ -33,84 +37,75 @@ namespace llvm { /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. void ELFCodeEmitter::startFunction(MachineFunction &MF) { - // Get the ELF Section that this function belongs in. - ES = &EW.getTextSection(); + DEBUG(errs() << "processing function: " + << MF.getFunction()->getName() << "\n"); - DOUT << "processing function: " << MF.getFunction()->getName() << "\n"; + // Get the ELF Section that this function belongs in. + ES = &EW.getTextSection(MF.getFunction()); - // FIXME: better memory management, this will be replaced by BinaryObjects - BinaryData &BD = ES->getData(); - BD.reserve(4096); - BufferBegin = &BD[0]; - BufferEnd = BufferBegin + BD.capacity(); + // Set the desired binary object to be used by the code emitters + setBinaryObject(ES); // Get the function alignment in bytes unsigned Align = (1 << MF.getAlignment()); - // Align the section size with the function alignment, so the function can - // start in a aligned offset, also update the section alignment if needed. - if (ES->Align < Align) ES->Align = Align; - ES->Size = (ES->Size + (Align-1)) & (-Align); - - // Snaity check on allocated space for text section - assert( ES->Size < 4096 && "no more space in TextSection" ); - - // FIXME: Using ES->Size directly here instead of calculating it from the - // output buffer size (impossible because the code emitter deals only in raw - // bytes) forces us to manually synchronize size and write padding zero bytes - // to the output buffer for all non-text sections. For text sections, we do - // not synchonize the output buffer, and we just blow up if anyone tries to - // write non-code to it. An assert should probably be added to - // AddSymbolToSection to prevent calling it on the text section. - CurBufferPtr = BufferBegin + ES->Size; - - // Record function start address relative to BufferBegin - FnStartPtr = CurBufferPtr; + // The function must start on its required alignment + ES->emitAlignment(Align); + + // Update the section alignment if needed. + ES->Align = std::max(ES->Align, Align); + + // Record the function start offset + FnStartOff = ES->getCurrentPCOffset(); + + // Emit constant pool and jump tables to their appropriate sections. + // They need to be emitted before the function because in some targets + // the later may reference JT or CP entry address. + emitConstantPool(MF.getConstantPool()); + emitJumpTables(MF.getJumpTableInfo()); } /// finishFunction - This callback is invoked after the function is completely /// finished. bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { - // Update Section Size - ES->Size = CurBufferPtr - BufferBegin; - // Add a symbol to represent the function. const Function *F = MF.getFunction(); - ELFSym FnSym(F); - FnSym.setType(ELFSym::STT_FUNC); - FnSym.setBind(EW.getGlobalELFLinkage(F)); - FnSym.setVisibility(EW.getGlobalELFVisibility(F)); - FnSym.SectionIdx = ES->SectionIdx; - FnSym.Size = CurBufferPtr-FnStartPtr; + ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC, + EW.getGlobalELFVisibility(F)); + FnSym->SectionIdx = ES->SectionIdx; + FnSym->Size = ES->getCurrentPCOffset()-FnStartOff; + EW.AddPendingGlobalSymbol(F, true); // Offset from start of Section - FnSym.Value = FnStartPtr-BufferBegin; - - // Locals should go on the symbol list front - if (!F->hasPrivateLinkage()) { - if (FnSym.getBind() == ELFSym::STB_LOCAL) - EW.SymbolList.push_front(FnSym); - else - EW.SymbolList.push_back(FnSym); + FnSym->Value = FnStartOff; + + if (!F->hasPrivateLinkage()) + EW.SymbolList.push_back(FnSym); + + // Patch up Jump Table Section relocations to use the real MBBs offsets + // now that the MBB label offsets inside the function are known. + if (!MF.getJumpTableInfo()->isEmpty()) { + ELFSection &JTSection = EW.getJumpTableSection(); + for (std::vector::iterator MRI = JTRelocations.begin(), + MRE = JTRelocations.end(); MRI != MRE; ++MRI) { + MachineRelocation &MR = *MRI; + unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setResultPointer((void*)MBBOffset); + MR.setConstantVal(ES->SectionIdx); + JTSection.addRelocation(MR); + } } - // Emit constant pool to appropriate section(s) - emitConstantPool(MF.getConstantPool()); - - // Emit jump tables to appropriate section - emitJumpTables(MF.getJumpTableInfo()); - - // Relocations - // ----------- // If we have emitted any relocations to function-specific objects such as // basic blocks, constant pools entries, or jump tables, record their - // addresses now so that we can rewrite them with the correct addresses - // later. + // addresses now so that we can rewrite them with the correct addresses later for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { MachineRelocation &MR = Relocations[i]; intptr_t Addr; if (MR.isGlobalValue()) { - EW.PendingGlobals.insert(MR.getGlobalValue()); + EW.AddPendingGlobalSymbol(MR.getGlobalValue()); + } else if (MR.isExternalSymbol()) { + EW.AddPendingExternalSymbol(MR.getExternalSymbol()); } else if (MR.isBasicBlock()) { Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); MR.setConstantVal(ES->SectionIdx); @@ -120,16 +115,18 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); MR.setResultPointer((void*)Addr); } else if (MR.isJumpTableIndex()) { + ELFSection &JTSection = EW.getJumpTableSection(); Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); + MR.setConstantVal(JTSection.SectionIdx); MR.setResultPointer((void*)Addr); - MR.setConstantVal(JumpTableSectionIdx); } else { - assert(0 && "Unhandled relocation type"); + llvm_unreachable("Unhandled relocation type"); } ES->addRelocation(MR); } // Clear per-function data structures. + JTRelocations.clear(); Relocations.clear(); CPLocations.clear(); CPSections.clear(); @@ -148,25 +145,19 @@ void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { assert(TM.getRelocationModel() != Reloc::PIC_ && "PIC codegen not yet handled for elf constant pools!"); - const TargetAsmInfo *TAI = TM.getTargetAsmInfo(); for (unsigned i = 0, e = CP.size(); i != e; ++i) { MachineConstantPoolEntry CPE = CP[i]; - // Get the right ELF Section for this constant pool entry - std::string CstPoolName = - TAI->SelectSectionForMachineConst(CPE.getType())->getName(); - ELFSection &CstPoolSection = - EW.getConstantPoolSection(CstPoolName, CPE.getAlignment()); - // Record the constant pool location and the section index - CPLocations.push_back(CstPoolSection.size()); - CPSections.push_back(CstPoolSection.SectionIdx); + ELFSection &CstPool = EW.getConstantPoolSection(CPE); + CPLocations.push_back(CstPool.size()); + CPSections.push_back(CstPool.SectionIdx); if (CPE.isMachineConstantPoolEntry()) assert("CPE.isMachineConstantPoolEntry not supported yet"); // Emit the constant to constant pool section - EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPoolSection); + EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool); } } @@ -180,44 +171,32 @@ void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { assert(TM.getRelocationModel() != Reloc::PIC_ && "PIC codegen not yet handled for elf jump tables!"); - const TargetAsmInfo *TAI = TM.getTargetAsmInfo(); + const TargetELFWriterInfo *TEW = TM.getELFWriterInfo(); + unsigned EntrySize = MJTI->getEntrySize(); // Get the ELF Section to emit the jump table - unsigned Align = TM.getTargetData()->getPointerABIAlignment(); - std::string JTName(TAI->getJumpTableDataSection()); - ELFSection &JTSection = EW.getJumpTableSection(JTName, Align); - JumpTableSectionIdx = JTSection.SectionIdx; - - // Entries in the JT Section are relocated against the text section - ELFSection &TextSection = EW.getTextSection(); + ELFSection &JTSection = EW.getJumpTableSection(); // For each JT, record its offset from the start of the section for (unsigned i = 0, e = JT.size(); i != e; ++i) { const std::vector &MBBs = JT[i].MBBs; - DOUT << "JTSection.size(): " << JTSection.size() << "\n"; - DOUT << "JTLocations.size: " << JTLocations.size() << "\n"; - // Record JT 'i' offset in the JT section JTLocations.push_back(JTSection.size()); // Each MBB entry in the Jump table section has a relocation entry // against the current text section. for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy(); MachineRelocation MR = - MachineRelocation::getBB(JTSection.size(), - MachineRelocation::VANILLA, - MBBs[mi]); - - // Offset of JT 'i' in JT section - MR.setResultPointer((void*)getMachineBasicBlockAddress(MBBs[mi])); - MR.setConstantVal(TextSection.SectionIdx); + MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]); // Add the relocation to the Jump Table section - JTSection.addRelocation(MR); + JTRelocations.push_back(MR); // Output placeholder for MBB in the JT section - JTSection.emitWord(0); + for (unsigned s=0; s < EntrySize; ++s) + JTSection.emitByte(0); } } } diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h index 982aebf8fcc02..b5e9c844ec691 100644 --- a/lib/CodeGen/ELFCodeEmitter.h +++ b/lib/CodeGen/ELFCodeEmitter.h @@ -10,7 +10,7 @@ #ifndef ELFCODEEMITTER_H #define ELFCODEEMITTER_H -#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" #include namespace llvm { @@ -19,7 +19,7 @@ namespace llvm { /// ELFCodeEmitter - This class is used by the ELFWriter to /// emit the code for functions to the ELF file. - class ELFCodeEmitter : public MachineCodeEmitter { + class ELFCodeEmitter : public ObjectCodeEmitter { ELFWriter &EW; /// Target machine description @@ -28,102 +28,48 @@ namespace llvm { /// Section containing code for functions ELFSection *ES; - /// Relocations - These are the relocations that the function needs, as - /// emitted. + /// Relocations - Record relocations needed by the current function std::vector Relocations; - /// CPLocations - This is a map of constant pool indices to offsets from the - /// start of the section for that constant pool index. - std::vector CPLocations; + /// JTRelocations - Record relocations needed by the relocation + /// section. + std::vector JTRelocations; - /// CPSections - This is a map of constant pool indices to the MachOSection - /// containing the constant pool entry for that index. - std::vector CPSections; - - /// JTLocations - This is a map of jump table indices to offsets from the - /// start of the section for that jump table index. - std::vector JTLocations; - - /// MBBLocations - This vector is a mapping from MBB ID's to their address. - /// It is filled in by the StartMachineBasicBlock callback and queried by - /// the getMachineBasicBlockAddress callback. - std::vector MBBLocations; - - /// FnStartPtr - Pointer to the start location of the current function - /// in the buffer - uint8_t *FnStartPtr; - - /// JumpTableSectionIdx - Holds the index of the Jump Table Section - unsigned JumpTableSectionIdx; + /// FnStartPtr - Function offset from the beginning of ELFSection 'ES' + uintptr_t FnStartOff; public: - explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), - JumpTableSectionIdx(0) {} - - void startFunction(MachineFunction &F); - bool finishFunction(MachineFunction &F); + explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {} + /// addRelocation - Register new relocations for this function void addRelocation(const MachineRelocation &MR) { Relocations.push_back(MR); } - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); - } + /// emitConstantPool - For each constant pool entry, figure out which + /// section the constant should live in and emit data to it + void emitConstantPool(MachineConstantPool *MCP); - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - assert(CPLocations.size() > Index && "CP not emitted!"); - return CPLocations[Index]; - } + /// emitJumpTables - Emit all the jump tables for a given jump table + /// info and record them to the appropriate section. + void emitJumpTables(MachineJumpTableInfo *MJTI); - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - assert(JTLocations.size() > Index && "JT not emitted!"); - return JTLocations[Index]; - } + void startFunction(MachineFunction &F); + bool finishFunction(MachineFunction &F); - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; + /// emitLabel - Emits a label + virtual void emitLabel(uint64_t LabelID) { + assert("emitLabel not implemented"); } + /// getLabelAddress - Return the address of the specified LabelID, + /// only usable after the LabelID has been emitted. virtual uintptr_t getLabelAddress(uint64_t Label) const { - assert(0 && "Label address not implementated yet!"); - abort(); + assert("getLabelAddress not implemented"); return 0; } - virtual void emitLabel(uint64_t LabelID) { - assert(0 && "emit Label not implementated yet!"); - abort(); - } - - /// emitConstantPool - For each constant pool entry, figure out which section - /// the constant should live in and emit the constant. - void emitConstantPool(MachineConstantPool *MCP); - - /// emitJumpTables - Emit all the jump tables for a given jump table info - /// record to the appropriate section. - void emitJumpTables(MachineJumpTableInfo *MJTI); - virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {} - /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! - void startGVStub(const GlobalValue* F, unsigned StubSize, - unsigned Alignment = 1) { - assert(0 && "JIT specific function called!"); - abort(); - } - void startGVStub(const GlobalValue* F, void *Buffer, unsigned StubSize) { - assert(0 && "JIT specific function called!"); - abort(); - } - void *finishGVStub(const GlobalValue *F) { - assert(0 && "JIT specific function called!"); - abort(); - return 0; - } }; // end class ELFCodeEmitter } // end namespace llvm diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 9e915245525a3..3e1ee11b21660 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -29,7 +29,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "elfwriter" - #include "ELF.h" #include "ELFWriter.h" #include "ELFCodeEmitter.h" @@ -40,26 +39,33 @@ #include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/FileWriters.h" #include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" -#include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" + using namespace llvm; char ELFWriter::ID = 0; -/// AddELFWriter - Concrete function to add the ELF writer to the function pass -/// manager. -MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, - raw_ostream &O, - TargetMachine &TM) { + +/// AddELFWriter - Add the ELF writer to the function pass manager +ObjectCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, + raw_ostream &O, + TargetMachine &TM) { ELFWriter *EW = new ELFWriter(O, TM); PM.add(EW); - return &EW->getMachineCodeEmitter(); + return EW->getObjectCodeEmitter(); } //===----------------------------------------------------------------------===// @@ -68,27 +74,51 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) : MachineFunctionPass(&ID), O(o), TM(tm), + OutContext(*new MCContext()), + TLOF(TM.getTargetLowering()->getObjFileLowering()), is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), isLittleEndian(TM.getTargetData()->isLittleEndian()), ElfHdr(isLittleEndian, is64Bit) { - TAI = TM.getTargetAsmInfo(); + MAI = TM.getMCAsmInfo(); TEW = TM.getELFWriterInfo(); - // Create the machine code emitter object for this target. - MCE = new ELFCodeEmitter(*this); + // Create the object code emitter object for this target. + ElfCE = new ELFCodeEmitter(*this); // Inital number of sections NumSections = 0; } ELFWriter::~ELFWriter() { - delete MCE; + delete ElfCE; + delete &OutContext; + + while(!SymbolList.empty()) { + delete SymbolList.back(); + SymbolList.pop_back(); + } + + while(!PrivateSyms.empty()) { + delete PrivateSyms.back(); + PrivateSyms.pop_back(); + } + + while(!SectionList.empty()) { + delete SectionList.back(); + SectionList.pop_back(); + } + + // Release the name mangler object. + delete Mang; Mang = 0; } // doInitialization - Emit the file header and all of the global variables for // the module to the ELF file. bool ELFWriter::doInitialization(Module &M) { + // Initialize TargetLoweringObjectFile. + const_cast(TLOF).Initialize(OutContext, TM); + Mang = new Mangler(M); // ELF Header @@ -138,13 +168,115 @@ bool ELFWriter::doInitialization(Module &M) { // Add the null section, which is required to be first in the file. getNullSection(); + // The first entry in the symtab is the null symbol and the second + // is a local symbol containing the module/file name + SymbolList.push_back(new ELFSym()); + SymbolList.push_back(ELFSym::getFileSym()); + return false; } +// AddPendingGlobalSymbol - Add a global to be processed and to +// the global symbol lookup, use a zero index because the table +// index will be determined later. +void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, + bool AddToLookup /* = false */) { + PendingGlobals.insert(GV); + if (AddToLookup) + GblSymLookup[GV] = 0; +} + +// AddPendingExternalSymbol - Add the external to be processed +// and to the external symbol lookup, use a zero index because +// the symbol table index will be determined later. +void ELFWriter::AddPendingExternalSymbol(const char *External) { + PendingExternals.insert(External); + ExtSymLookup[External] = 0; +} + +ELFSection &ELFWriter::getDataSection() { + const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection(); + return getSection(Data->getSectionName(), Data->getType(), + Data->getFlags(), 4); +} + +ELFSection &ELFWriter::getBSSSection() { + const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection(); + return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4); +} + +// getCtorSection - Get the static constructor section +ELFSection &ELFWriter::getCtorSection() { + const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection(); + return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); +} + +// getDtorSection - Get the static destructor section +ELFSection &ELFWriter::getDtorSection() { + const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection(); + return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags()); +} + +// getTextSection - Get the text section for the specified function +ELFSection &ELFWriter::getTextSection(Function *F) { + const MCSectionELF *Text = + (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM); + return getSection(Text->getSectionName(), Text->getType(), Text->getFlags()); +} + +// getJumpTableSection - Get a read only section for constants when +// emitting jump tables. TODO: add PIC support +ELFSection &ELFWriter::getJumpTableSection() { + const MCSectionELF *JT = + (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly()); + return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(), + TM.getTargetData()->getPointerABIAlignment()); +} + +// getConstantPoolSection - Get a constant pool section based on the machine +// constant pool entry type and relocation info. +ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) { + SectionKind Kind; + switch (CPE.getRelocationInfo()) { + default: llvm_unreachable("Unknown section kind"); + case 2: Kind = SectionKind::getReadOnlyWithRel(); break; + case 1: + Kind = SectionKind::getReadOnlyWithRelLocal(); + break; + case 0: + switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) { + case 4: Kind = SectionKind::getMergeableConst4(); break; + case 8: Kind = SectionKind::getMergeableConst8(); break; + case 16: Kind = SectionKind::getMergeableConst16(); break; + default: Kind = SectionKind::getMergeableConst(); break; + } + } + + const MCSectionELF *CPSect = + (const MCSectionELF *)TLOF.getSectionForConstant(Kind); + return getSection(CPSect->getSectionName(), CPSect->getType(), + CPSect->getFlags(), CPE.getAlignment()); +} + +// getRelocSection - Return the relocation section of section 'S'. 'RelA' +// is true if the relocation section contains entries with addends. +ELFSection &ELFWriter::getRelocSection(ELFSection &S) { + unsigned SectionType = TEW->hasRelocationAddend() ? + ELFSection::SHT_RELA : ELFSection::SHT_REL; + + std::string SectionName(".rel"); + if (TEW->hasRelocationAddend()) + SectionName.append("a"); + SectionName.append(S.getName()); + + return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment()); +} + +// getGlobalELFVisibility - Returns the ELF specific visibility type unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { switch (GV->getVisibility()) { default: - assert(0 && "unknown visibility type"); + llvm_unreachable("unknown visibility type"); case GlobalValue::DefaultVisibility: return ELFSym::STV_DEFAULT; case GlobalValue::HiddenVisibility: @@ -152,134 +284,132 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) { case GlobalValue::ProtectedVisibility: return ELFSym::STV_PROTECTED; } - return 0; } -unsigned ELFWriter::getGlobalELFLinkage(const GlobalValue *GV) { +// getGlobalELFBinding - Returns the ELF specific binding type +unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) { if (GV->hasInternalLinkage()) return ELFSym::STB_LOCAL; - if (GV->hasWeakLinkage()) + if (GV->isWeakForLinker() && !GV->hasCommonLinkage()) return ELFSym::STB_WEAK; return ELFSym::STB_GLOBAL; } -// getElfSectionFlags - Get the ELF Section Header based on the -// flags defined in ELFTargetAsmInfo. -unsigned ELFWriter::getElfSectionFlags(unsigned Flags) { - unsigned ElfSectionFlags = ELFSection::SHF_ALLOC; - - if (Flags & SectionFlags::Code) - ElfSectionFlags |= ELFSection::SHF_EXECINSTR; - if (Flags & SectionFlags::Writeable) - ElfSectionFlags |= ELFSection::SHF_WRITE; - if (Flags & SectionFlags::Mergeable) - ElfSectionFlags |= ELFSection::SHF_MERGE; - if (Flags & SectionFlags::TLS) - ElfSectionFlags |= ELFSection::SHF_TLS; - if (Flags & SectionFlags::Strings) - ElfSectionFlags |= ELFSection::SHF_STRINGS; - - return ElfSectionFlags; -} - -// For global symbols without a section, return the Null section as a -// placeholder -ELFSection &ELFWriter::getGlobalSymELFSection(const GlobalVariable *GV, - ELFSym &Sym) { - // If this is a declaration, the symbol does not have a section. - if (!GV->hasInitializer()) { - Sym.SectionIdx = ELFSection::SHN_UNDEF; - return getNullSection(); - } +// getGlobalELFType - Returns the ELF specific type for a global +unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) { + if (GV->isDeclaration()) + return ELFSym::STT_NOTYPE; - // Get the name and flags of the section for the global - const Section *S = TAI->SectionForGlobal(GV); - unsigned SectionType = ELFSection::SHT_PROGBITS; - unsigned SectionFlags = getElfSectionFlags(S->getFlags()); - DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n"; + if (isa(GV)) + return ELFSym::STT_FUNC; - const TargetData *TD = TM.getTargetData(); - unsigned Align = TD->getPreferredAlignment(GV); - Constant *CV = GV->getInitializer(); - - // If this global has a zero initializer, go to .bss or common section. - // Variables are part of the common block if they are zero initialized - // and allowed to be merged with other symbols. - if (CV->isNullValue() || isa(CV)) { - SectionType = ELFSection::SHT_NOBITS; - ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags); - if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || - GV->hasCommonLinkage()) { - Sym.SectionIdx = ELFSection::SHN_COMMON; - Sym.IsCommon = true; - ElfS.Align = 1; - return ElfS; - } - Sym.IsBss = true; - Sym.SectionIdx = ElfS.SectionIdx; - if (Align) ElfS.Size = (ElfS.Size + Align-1) & ~(Align-1); - ElfS.Align = std::max(ElfS.Align, Align); - return ElfS; - } - - Sym.IsConstant = true; - ELFSection &ElfS = getSection(S->getName(), SectionType, SectionFlags); - Sym.SectionIdx = ElfS.SectionIdx; - ElfS.Align = std::max(ElfS.Align, Align); - return ElfS; + return ELFSym::STT_OBJECT; } -void ELFWriter::EmitFunctionDeclaration(const Function *F) { - ELFSym GblSym(F); - GblSym.setBind(ELFSym::STB_GLOBAL); - GblSym.setType(ELFSym::STT_NOTYPE); - GblSym.setVisibility(ELFSym::STV_DEFAULT); - GblSym.SectionIdx = ELFSection::SHN_UNDEF; - SymbolList.push_back(GblSym); +// IsELFUndefSym - True if the global value must be marked as a symbol +// which points to a SHN_UNDEF section. This means that the symbol has +// no definition on the module. +static bool IsELFUndefSym(const GlobalValue *GV) { + return GV->isDeclaration() || (isa(GV)); } -void ELFWriter::EmitGlobalVar(const GlobalVariable *GV) { - unsigned SymBind = getGlobalELFLinkage(GV); - unsigned Align=0, Size=0; - ELFSym GblSym(GV); - GblSym.setBind(SymBind); - GblSym.setVisibility(getGlobalELFVisibility(GV)); - - if (GV->hasInitializer()) { - GblSym.setType(ELFSym::STT_OBJECT); - const TargetData *TD = TM.getTargetData(); - Align = TD->getPreferredAlignment(GV); - Size = TD->getTypeAllocSize(GV->getInitializer()->getType()); - GblSym.Size = Size; +// AddToSymbolList - Update the symbol lookup and If the symbol is +// private add it to PrivateSyms list, otherwise to SymbolList. +void ELFWriter::AddToSymbolList(ELFSym *GblSym) { + assert(GblSym->isGlobalValue() && "Symbol must be a global value"); + + const GlobalValue *GV = GblSym->getGlobalValue(); + if (GV->hasPrivateLinkage()) { + // For a private symbols, keep track of the index inside + // the private list since it will never go to the symbol + // table and won't be patched up later. + PrivateSyms.push_back(GblSym); + GblSymLookup[GV] = PrivateSyms.size()-1; } else { - GblSym.setType(ELFSym::STT_NOTYPE); + // Non private symbol are left with zero indices until + // they are patched up during the symbol table emition + // (where the indicies are created). + SymbolList.push_back(GblSym); + GblSymLookup[GV] = 0; } +} - ELFSection &GblSection = getGlobalSymELFSection(GV, GblSym); - - if (GblSym.IsCommon) { - GblSym.Value = Align; - } else if (GblSym.IsBss) { - GblSym.Value = GblSection.Size; - GblSection.Size += Size; - } else if (GblSym.IsConstant){ - // GblSym.Value should contain the symbol index inside the section, - // and all symbols should start on their required alignment boundary - GblSym.Value = (GblSection.size() + (Align-1)) & (-Align); - GblSection.emitAlignment(Align); - EmitGlobalConstant(GV->getInitializer(), GblSection); - } +// EmitGlobal - Choose the right section for global and emit it +void ELFWriter::EmitGlobal(const GlobalValue *GV) { - // Local symbols should come first on the symbol table. - if (!GV->hasPrivateLinkage()) { - if (SymBind == ELFSym::STB_LOCAL) - SymbolList.push_front(GblSym); - else - SymbolList.push_back(GblSym); + // Check if the referenced symbol is already emitted + if (GblSymLookup.find(GV) != GblSymLookup.end()) + return; + + // Handle ELF Bind, Visibility and Type for the current symbol + unsigned SymBind = getGlobalELFBinding(GV); + unsigned SymType = getGlobalELFType(GV); + bool IsUndefSym = IsELFUndefSym(GV); + + ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind) + : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV)); + + if (!IsUndefSym) { + assert(isa(GV) && "GV not a global variable!"); + const GlobalVariable *GVar = dyn_cast(GV); + + // Handle special llvm globals + if (EmitSpecialLLVMGlobal(GVar)) + return; + + // Get the ELF section where this global belongs from TLOF + const MCSectionELF *S = + (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM); + ELFSection &ES = + getSection(S->getSectionName(), S->getType(), S->getFlags()); + SectionKind Kind = S->getKind(); + + // The symbol align should update the section alignment if needed + const TargetData *TD = TM.getTargetData(); + unsigned Align = TD->getPreferredAlignment(GVar); + unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType()); + GblSym->Size = Size; + + if (S->HasCommonSymbols()) { // Symbol must go to a common section + GblSym->SectionIdx = ELFSection::SHN_COMMON; + + // A new linkonce section is created for each global in the + // common section, the default alignment is 1 and the symbol + // value contains its alignment. + ES.Align = 1; + GblSym->Value = Align; + + } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS. + GblSym->SectionIdx = ES.SectionIdx; + + // Update the size with alignment and the next object can + // start in the right offset in the section + if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1); + ES.Align = std::max(ES.Align, Align); + + // GblSym->Value should contain the virtual offset inside the section. + // Virtual because the BSS space is not allocated on ELF objects + GblSym->Value = ES.Size; + ES.Size += Size; + + } else { // The symbol must go to some kind of data section + GblSym->SectionIdx = ES.SectionIdx; + + // GblSym->Value should contain the symbol offset inside the section, + // and all symbols should start on their required alignment boundary + ES.Align = std::max(ES.Align, Align); + ES.emitAlignment(Align); + GblSym->Value = ES.size(); + + // Emit the global to the data section 'ES' + EmitGlobalConstant(GVar->getInitializer(), ES); + } } + + AddToSymbolList(GblSym); } void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, @@ -305,8 +435,7 @@ void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. - for (unsigned p=0; p < padSize; p++) - GblS.emitByte(0); + GblS.emitZeros(padSize); } assert(sizeSoFar == cvsLayout->getSizeInBytes() && "Layout of constant struct may be incorrect!"); @@ -317,65 +446,242 @@ void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { unsigned Size = TD->getTypeAllocSize(CV->getType()); if (const ConstantArray *CVA = dyn_cast(CV)) { - if (CVA->isString()) { - std::string GblStr = CVA->getAsString(); - GblStr.resize(GblStr.size()-1); - GblS.emitString(GblStr); - } else { // Not a string. Print the values in successive locations - for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) - EmitGlobalConstant(CVA->getOperand(i), GblS); - } + for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) + EmitGlobalConstant(CVA->getOperand(i), GblS); + return; + } else if (isa(CV)) { + GblS.emitZeros(Size); return; } else if (const ConstantStruct *CVS = dyn_cast(CV)) { EmitGlobalConstantStruct(CVS, GblS); return; } else if (const ConstantFP *CFP = dyn_cast(CV)) { - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - if (CFP->getType() == Type::DoubleTy) - GblS.emitWord64(Val); - else if (CFP->getType() == Type::FloatTy) - GblS.emitWord32(Val); - else if (CFP->getType() == Type::X86_FP80Ty) { - assert(0 && "X86_FP80Ty global emission not implemented"); - } else if (CFP->getType() == Type::PPC_FP128Ty) - assert(0 && "PPC_FP128Ty global emission not implemented"); + APInt Val = CFP->getValueAPF().bitcastToAPInt(); + if (CFP->getType()->isDoubleTy()) + GblS.emitWord64(Val.getZExtValue()); + else if (CFP->getType()->isFloatTy()) + GblS.emitWord32(Val.getZExtValue()); + else if (CFP->getType()->isX86_FP80Ty()) { + unsigned PadSize = TD->getTypeAllocSize(CFP->getType())- + TD->getTypeStoreSize(CFP->getType()); + GblS.emitWordFP80(Val.getRawData(), PadSize); + } else if (CFP->getType()->isPPC_FP128Ty()) + llvm_unreachable("PPC_FP128Ty global emission not implemented"); return; } else if (const ConstantInt *CI = dyn_cast(CV)) { - if (Size == 4) + if (Size == 1) + GblS.emitByte(CI->getZExtValue()); + else if (Size == 2) + GblS.emitWord16(CI->getZExtValue()); + else if (Size == 4) GblS.emitWord32(CI->getZExtValue()); - else if (Size == 8) - GblS.emitWord64(CI->getZExtValue()); - else - assert(0 && "LargeInt global emission not implemented"); + else + EmitGlobalConstantLargeInt(CI, GblS); return; } else if (const ConstantVector *CP = dyn_cast(CV)) { const VectorType *PTy = CP->getType(); for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) EmitGlobalConstant(CP->getOperand(I), GblS); return; + } else if (const ConstantExpr *CE = dyn_cast(CV)) { + // Resolve a constant expression which returns a (Constant, Offset) + // pair. If 'Res.first' is a GlobalValue, emit a relocation with + // the offset 'Res.second', otherwise emit a global constant like + // it is always done for not contant expression types. + CstExprResTy Res = ResolveConstantExpr(CE); + const Constant *Op = Res.first; + + if (isa(Op)) + EmitGlobalDataRelocation(cast(Op), + TD->getTypeAllocSize(Op->getType()), + GblS, Res.second); + else + EmitGlobalConstant(Op, GblS); + + return; + } else if (CV->getType()->getTypeID() == Type::PointerTyID) { + // Fill the data entry with zeros or emit a relocation entry + if (isa(CV)) + GblS.emitZeros(Size); + else + EmitGlobalDataRelocation(cast(CV), + Size, GblS); + return; + } else if (const GlobalValue *GV = dyn_cast(CV)) { + // This is a constant address for a global variable or function and + // therefore must be referenced using a relocation entry. + EmitGlobalDataRelocation(GV, Size, GblS); + return; + } + + std::string msg; + raw_string_ostream ErrorMsg(msg); + ErrorMsg << "Constant unimp for type: " << *CV->getType(); + llvm_report_error(ErrorMsg.str()); +} + +// ResolveConstantExpr - Resolve the constant expression until it stop +// yielding other constant expressions. +CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) { + const TargetData *TD = TM.getTargetData(); + + // There ins't constant expression inside others anymore + if (!isa(CV)) + return std::make_pair(CV, 0); + + const ConstantExpr *CE = dyn_cast(CV); + switch (CE->getOpcode()) { + case Instruction::BitCast: + return ResolveConstantExpr(CE->getOperand(0)); + + case Instruction::GetElementPtr: { + const Constant *ptrVal = CE->getOperand(0); + SmallVector idxVec(CE->op_begin()+1, CE->op_end()); + int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0], + idxVec.size()); + return std::make_pair(ptrVal, Offset); + } + case Instruction::IntToPtr: { + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()), + false/*ZExt*/); + return ResolveConstantExpr(Op); + } + case Instruction::PtrToInt: { + Constant *Op = CE->getOperand(0); + const Type *Ty = CE->getType(); + + // We can emit the pointer value into this slot if the slot is an + // integer slot greater or equal to the size of the pointer. + if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType())) + return ResolveConstantExpr(Op); + + llvm_unreachable("Integer size less then pointer size"); + } + case Instruction::Add: + case Instruction::Sub: { + // Only handle cases where there's a constant expression with GlobalValue + // as first operand and ConstantInt as second, which are the cases we can + // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1 + // 1) Instruction::Add => (global) + CstInt + // 2) Instruction::Sub => (global) + -CstInt + const Constant *Op0 = CE->getOperand(0); + const Constant *Op1 = CE->getOperand(1); + assert(isa(Op1) && "Op1 must be a ConstantInt"); + + CstExprResTy Res = ResolveConstantExpr(Op0); + assert(isa(Res.first) && "Op0 must be a GlobalValue"); + + const APInt &RHS = cast(Op1)->getValue(); + switch (CE->getOpcode()) { + case Instruction::Add: + return std::make_pair(Res.first, RHS.getSExtValue()); + case Instruction::Sub: + return std::make_pair(Res.first, (-RHS).getSExtValue()); + } + } + } + + std::string msg(CE->getOpcodeName()); + raw_string_ostream ErrorMsg(msg); + ErrorMsg << ": Unsupported ConstantExpr type"; + llvm_report_error(ErrorMsg.str()); + + return std::make_pair(CV, 0); // silence warning +} + +void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, + ELFSection &GblS, int64_t Offset) { + // Create the relocation entry for the global value + MachineRelocation MR = + MachineRelocation::getGV(GblS.getCurrentPCOffset(), + TEW->getAbsoluteLabelMachineRelTy(), + const_cast(GV), + Offset); + + // Fill the data entry with zeros + GblS.emitZeros(Size); + + // Add the relocation entry for the current data section + GblS.addRelocation(MR); +} + +void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, + ELFSection &S) { + const TargetData *TD = TM.getTargetData(); + unsigned BitWidth = CI->getBitWidth(); + assert(isPowerOf2_32(BitWidth) && + "Non-power-of-2-sized integers not handled!"); + + const uint64_t *RawData = CI->getValue().getRawData(); + uint64_t Val = 0; + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i]; + S.emitWord64(Val); } - assert(0 && "unknown global constant"); } +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") + llvm_unreachable("not implemented yet"); + + // Ignore debug and non-emitted data. This handles llvm.compiler.used. + if (GV->getSection() == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + const TargetData *TD = TM.getTargetData(); + unsigned Align = TD->getPointerPrefAlignment(); + if (GV->getName() == "llvm.global_ctors") { + ELFSection &Ctor = getCtorSection(); + Ctor.emitAlignment(Align); + EmitXXStructorList(GV->getInitializer(), Ctor); + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + ELFSection &Dtor = getDtorSection(); + Dtor.emitAlignment(Align); + EmitXXStructorList(GV->getInitializer(), Dtor); + return true; + } + + return false; +} + +/// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the +/// function pointers, ignoring the init priority. +void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority, which we ignore. + if (!isa(List)) return; + ConstantArray *InitList = cast(List); + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (ConstantStruct *CS = dyn_cast(InitList->getOperand(i))){ + if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + + if (CS->getOperand(1)->isNullValue()) + return; // Found a null terminator, exit printing. + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1), Xtor); + } +} bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { - // Nothing to do here, this is all done through the MCE object above. + // Nothing to do here, this is all done through the ElfCE object above. return false; } /// doFinalization - Now that the module has been completely processed, emit /// the ELF file to 'O'. bool ELFWriter::doFinalization(Module &M) { - /// FIXME: This should be removed when moving to ObjectCodeEmiter. Since the - /// current ELFCodeEmiter uses CurrBuff, ... it doesn't update S.Data - /// vector size for .text sections, so this is a quick dirty fix - ELFSection &TS = getTextSection(); - if (TS.Size) { - BinaryData &BD = TS.getData(); - for (unsigned e=0; e::const_iterator I = PendingGlobals.begin(), - E = PendingGlobals.end(); I != E; ++I) { + for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end(); + I != E; ++I) + EmitGlobal(*I); - // No need to emit the symbol again - if (GblSymLookup.find(*I) != GblSymLookup.end()) - continue; - - if (GlobalVariable *GV = dyn_cast(*I)) { - EmitGlobalVar(GV); - } else if (Function *F = dyn_cast(*I)) { - // If function is not in GblSymLookup, it doesn't have a body, - // so emit the symbol as a function declaration (no section associated) - EmitFunctionDeclaration(F); - } else { - assert("unknown howto handle pending global"); - } - GblSymLookup[*I] = 0; - } + // Emit all pending externals + for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end(); + I != E; ++I) + SymbolList.push_back(ELFSym::getExtSym(*I)); // Emit non-executable stack note - if (TAI->getNonexecutableStackDirective()) + if (MAI->getNonexecutableStackDirective()) getNonExecStackSection(); - // Emit a symbol for each section created until now - for (std::map::iterator I = SectionLookup.begin(), - E = SectionLookup.end(); I != E; ++I) { - ELFSection *ES = I->second; - - // Skip null section - if (ES->SectionIdx == 0) continue; - - ELFSym SectionSym(0); - SectionSym.SectionIdx = ES->SectionIdx; - SectionSym.Size = 0; - SectionSym.setBind(ELFSym::STB_LOCAL); - SectionSym.setType(ELFSym::STT_SECTION); - SectionSym.setVisibility(ELFSym::STV_DEFAULT); - - // Local symbols go in the list front - SymbolList.push_front(SectionSym); + // Emit a symbol for each section created until now, skip null section + for (unsigned i = 1, e = SectionList.size(); i < e; ++i) { + ELFSection &ES = *SectionList[i]; + ELFSym *SectionSym = ELFSym::getSectionSym(); + SectionSym->SectionIdx = ES.SectionIdx; + SymbolList.push_back(SectionSym); + ES.Sym = SymbolList.back(); } // Emit string table - EmitStringTable(); + EmitStringTable(M.getModuleIdentifier()); // Emit the symbol table now, if non-empty. EmitSymbolTable(); @@ -448,77 +731,106 @@ bool ELFWriter::doFinalization(Module &M) { // Dump the sections and section table to the .o file. OutputSectionsAndSectionTable(); - // We are done with the abstract symbols. - SectionList.clear(); - NumSections = 0; - - // Release the name mangler object. - delete Mang; Mang = 0; return false; } +// RelocateField - Patch relocatable field with 'Offset' in 'BO' +// using a 'Value' of known 'Size' +void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset, + int64_t Value, unsigned Size) { + if (Size == 32) + BO.fixWord32(Value, Offset); + else if (Size == 64) + BO.fixWord64(Value, Offset); + else + llvm_unreachable("don't know howto patch relocatable field"); +} + /// EmitRelocations - Emit relocations void ELFWriter::EmitRelocations() { + // True if the target uses the relocation entry to hold the addend, + // otherwise the addend is written directly to the relocatable field. + bool HasRelA = TEW->hasRelocationAddend(); + // Create Relocation sections for each section which needs it. - for (std::list::iterator I = SectionList.begin(), - E = SectionList.end(); I != E; ++I) { + for (unsigned i=0, e=SectionList.size(); i != e; ++i) { + ELFSection &S = *SectionList[i]; // This section does not have relocations - if (!I->hasRelocations()) continue; - - // Get the relocation section for section 'I' - bool HasRelA = TEW->hasRelocationAddend(); - ELFSection &RelSec = getRelocSection(I->getName(), HasRelA, - TEW->getPrefELFAlignment()); + if (!S.hasRelocations()) continue; + ELFSection &RelSec = getRelocSection(S); // 'Link' - Section hdr idx of the associated symbol table // 'Info' - Section hdr idx of the section to which the relocation applies ELFSection &SymTab = getSymbolTableSection(); RelSec.Link = SymTab.SectionIdx; - RelSec.Info = I->SectionIdx; + RelSec.Info = S.SectionIdx; RelSec.EntSize = TEW->getRelocationEntrySize(); // Get the relocations from Section - std::vector Relos = I->getRelocations(); + std::vector Relos = S.getRelocations(); for (std::vector::iterator MRI = Relos.begin(), MRE = Relos.end(); MRI != MRE; ++MRI) { MachineRelocation &MR = *MRI; - // Offset from the start of the section containing the symbol - unsigned Offset = MR.getMachineCodeOffset(); + // Relocatable field offset from the section start + unsigned RelOffset = MR.getMachineCodeOffset(); // Symbol index in the symbol table unsigned SymIdx = 0; - // Target specific ELF relocation type + // Target specific relocation field type and size unsigned RelType = TEW->getRelocationType(MR.getRelocationType()); - - // Constant addend used to compute the value to be stored - // into the relocatable field + unsigned RelTySize = TEW->getRelocationTySize(RelType); int64_t Addend = 0; // There are several machine relocations types, and each one of // them needs a different approach to retrieve the symbol table index. if (MR.isGlobalValue()) { const GlobalValue *G = MR.getGlobalValue(); + int64_t GlobalOffset = MR.getConstantVal(); SymIdx = GblSymLookup[G]; - Addend = TEW->getAddendForRelTy(RelType); + if (G->hasPrivateLinkage()) { + // If the target uses a section offset in the relocation: + // SymIdx + Addend = section sym for global + section offset + unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx; + Addend = PrivateSyms[SymIdx]->Value + GlobalOffset; + SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); + } else { + Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset); + } + } else if (MR.isExternalSymbol()) { + const char *ExtSym = MR.getExternalSymbol(); + SymIdx = ExtSymLookup[ExtSym]; + Addend = TEW->getDefaultAddendForRelTy(RelType); } else { + // Get the symbol index for the section symbol unsigned SectionIdx = MR.getConstantVal(); - // TODO: use a map for this. - for (std::list::iterator I = SymbolList.begin(), - E = SymbolList.end(); I != E; ++I) - if ((SectionIdx == I->SectionIdx) && - (I->getType() == ELFSym::STT_SECTION)) { - SymIdx = I->SymTabIdx; - break; - } - Addend = (uint64_t)MR.getResultPointer(); + SymIdx = SectionList[SectionIdx]->getSymbolTableIndex(); + + // The symbol offset inside the section + int64_t SymOffset = (int64_t)MR.getResultPointer(); + + // For pc relative relocations where symbols are defined in the same + // section they are referenced, ignore the relocation entry and patch + // the relocatable field with the symbol offset directly. + if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) { + int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType); + RelocateField(S, RelOffset, Value, RelTySize); + continue; + } + + Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset); } + // The target without addend on the relocation symbol must be + // patched in the relocation place itself to contain the addend + // otherwise write zeros to make sure there is no garbage there + RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize); + // Get the relocation entry and emit to the relocation section - ELFRelocation Rel(Offset, SymIdx, RelType, HasRelA, Addend); + ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend); EmitRelocation(RelSec, Rel, HasRelA); } } @@ -554,7 +866,7 @@ void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) { /// EmitSectionHeader - Write section 'Section' header in 'SHdrTab' /// Section Header Table -void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, +void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr) { SHdrTab.emitWord32(SHdr.NameIdx); SHdrTab.emitWord32(SHdr.Type); @@ -581,27 +893,30 @@ void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, /// EmitStringTable - If the current symbol table is non-empty, emit the string /// table for it -void ELFWriter::EmitStringTable() { +void ELFWriter::EmitStringTable(const std::string &ModuleName) { if (!SymbolList.size()) return; // Empty symbol table. ELFSection &StrTab = getStringTableSection(); // Set the zero'th symbol to a null byte, as required. StrTab.emitByte(0); - // Walk on the symbol list and write symbol names into the - // string table. + // Walk on the symbol list and write symbol names into the string table. unsigned Index = 1; - for (std::list::iterator I = SymbolList.begin(), - E = SymbolList.end(); I != E; ++I) { + for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { + ELFSym &Sym = *(*I); - // Use the name mangler to uniquify the LLVM symbol. std::string Name; - if (I->GV) Name.append(Mang->getValueName(I->GV)); + if (Sym.isGlobalValue()) + Name.append(Mang->getMangledName(Sym.getGlobalValue())); + else if (Sym.isExternalSym()) + Name.append(Sym.getExternalSymbol()); + else if (Sym.isFileType()) + Name.append(ModuleName); if (Name.empty()) { - I->NameIdx = 0; + Sym.NameIdx = 0; } else { - I->NameIdx = Index; + Sym.NameIdx = Index; StrTab.emitString(Name); // Keep track of the number of bytes emitted to this section. @@ -612,11 +927,38 @@ void ELFWriter::EmitStringTable() { StrTab.Size = Index; } +// SortSymbols - On the symbol table local symbols must come before +// all other symbols with non-local bindings. The return value is +// the position of the first non local symbol. +unsigned ELFWriter::SortSymbols() { + unsigned FirstNonLocalSymbol; + std::vector LocalSyms, OtherSyms; + + for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) { + if ((*I)->isLocalBind()) + LocalSyms.push_back(*I); + else + OtherSyms.push_back(*I); + } + SymbolList.clear(); + FirstNonLocalSymbol = LocalSyms.size(); + + for (unsigned i = 0; i < FirstNonLocalSymbol; ++i) + SymbolList.push_back(LocalSyms[i]); + + for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I) + SymbolList.push_back(*I); + + LocalSyms.clear(); + OtherSyms.clear(); + + return FirstNonLocalSymbol; +} + /// EmitSymbolTable - Emit the symbol table itself. void ELFWriter::EmitSymbolTable() { if (!SymbolList.size()) return; // Empty symbol table. - unsigned FirstNonLocalSymbol = 1; // Now that we have emitted the string table and know the offset into the // string table of each symbol, emit the symbol table itself. ELFSection &SymTab = getSymbolTableSection(); @@ -628,30 +970,27 @@ void ELFWriter::EmitSymbolTable() { // Size of each symtab entry. SymTab.EntSize = TEW->getSymTabEntrySize(); - // The first entry in the symtab is the null symbol - ELFSym NullSym = ELFSym(0); - EmitSymbol(SymTab, NullSym); + // Reorder the symbol table with local symbols first! + unsigned FirstNonLocalSymbol = SortSymbols(); - // Emit all the symbols to the symbol table. Skip the null - // symbol, cause it's emitted already - unsigned Index = 1; - for (std::list::iterator I = SymbolList.begin(), - E = SymbolList.end(); I != E; ++I, ++Index) { - // Keep track of the first non-local symbol - if (I->getBind() == ELFSym::STB_LOCAL) - FirstNonLocalSymbol++; + // Emit all the symbols to the symbol table. + for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) { + ELFSym &Sym = *SymbolList[i]; // Emit symbol to the symbol table - EmitSymbol(SymTab, *I); + EmitSymbol(SymTab, Sym); - // Record the symbol table index for each global value - if (I->GV) - GblSymLookup[I->GV] = Index; + // Record the symbol table index for each symbol + if (Sym.isGlobalValue()) + GblSymLookup[Sym.getGlobalValue()] = i; + else if (Sym.isExternalSym()) + ExtSymLookup[Sym.getExternalSymbol()] = i; // Keep track on the symbol index into the symbol table - I->SymTabIdx = Index; + Sym.SymTabIdx = i; } + // One greater than the symbol table index of the last local symbol SymTab.Info = FirstNonLocalSymbol; SymTab.Size = SymTab.size(); } @@ -671,15 +1010,15 @@ void ELFWriter::EmitSectionTableStringTable() { // the string table. unsigned Index = 0; - for (std::list::iterator I = SectionList.begin(), - E = SectionList.end(); I != E; ++I) { + for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { + ELFSection &S = *(*I); // Set the index into the table. Note if we have lots of entries with // common suffixes, we could memoize them here if we cared. - I->NameIdx = Index; - SHStrTab.emitString(I->getName()); + S.NameIdx = Index; + SHStrTab.emitString(S.getName()); // Keep track of the number of bytes emitted to this section. - Index += I->getName().size()+1; + Index += S.getName().size()+1; } // Set the size of .shstrtab now that we know what it is. @@ -694,29 +1033,24 @@ void ELFWriter::OutputSectionsAndSectionTable() { // Pass #1: Compute the file offset for each section. size_t FileOff = ElfHdr.size(); // File header first. - // Adjust alignment of all section if needed. - for (std::list::iterator I = SectionList.begin(), - E = SectionList.end(); I != E; ++I) { - - // Section idx 0 has 0 offset - if (!I->SectionIdx) - continue; - - if (!I->size()) { - I->Offset = FileOff; + // Adjust alignment of all section if needed, skip the null section. + for (unsigned i=1, e=SectionList.size(); i < e; ++i) { + ELFSection &ES = *SectionList[i]; + if (!ES.size()) { + ES.Offset = FileOff; continue; } // Update Section size - if (!I->Size) - I->Size = I->size(); + if (!ES.Size) + ES.Size = ES.size(); // Align FileOff to whatever the alignment restrictions of the section are. - if (I->Align) - FileOff = (FileOff+I->Align-1) & ~(I->Align-1); + if (ES.Align) + FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1); - I->Offset = FileOff; - FileOff += I->Size; + ES.Offset = FileOff; + FileOff += ES.Size; } // Align Section Header. @@ -740,11 +1074,11 @@ void ELFWriter::OutputSectionsAndSectionTable() { BinaryObject SHdrTable(isLittleEndian, is64Bit); // Emit all of sections to the file and build the section header table. - while (!SectionList.empty()) { - ELFSection &S = *SectionList.begin(); - DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() - << ", Size: " << S.Size << ", Offset: " << S.Offset - << ", SectionData Size: " << S.size() << "\n"; + for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) { + ELFSection &S = *(*I); + DEBUG(errs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() + << ", Size: " << S.Size << ", Offset: " << S.Offset + << ", SectionData Size: " << S.size() << "\n"); // Align FileOff to whatever the alignment restrictions of the section are. if (S.size()) { @@ -758,7 +1092,6 @@ void ELFWriter::OutputSectionsAndSectionTable() { } EmitSectionHeader(SHdrTable, S); - SectionList.pop_front(); } // Align output for the section table. diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index bab118c6e3568..b61b4848b654d 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -16,23 +16,35 @@ #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include #include namespace llvm { class BinaryObject; class Constant; + class ConstantInt; class ConstantStruct; class ELFCodeEmitter; + class ELFRelocation; + class ELFSection; + struct ELFSym; class GlobalVariable; + class JITDebugRegisterer; class Mangler; class MachineCodeEmitter; - class TargetAsmInfo; + class MachineConstantPoolEntry; + class ObjectCodeEmitter; + class MCAsmInfo; class TargetELFWriterInfo; + class TargetLoweringObjectFile; class raw_ostream; - class ELFSection; - class ELFSym; - class ELFRelocation; + class SectionKind; + class MCContext; + + typedef std::vector::iterator ELFSymIter; + typedef std::vector::iterator ELFSectionIter; + typedef SetVector::const_iterator PendingGblsIter; + typedef SetVector::const_iterator PendingExtsIter; + typedef std::pair CstExprResTy; /// ELFWriter - This class implements the common target-independent code for /// writing ELF files. Targets should derive a class from this to @@ -40,18 +52,18 @@ namespace llvm { /// class ELFWriter : public MachineFunctionPass { friend class ELFCodeEmitter; + friend class JITDebugRegisterer; public: static char ID; - MachineCodeEmitter &getMachineCodeEmitter() const { - return *(MachineCodeEmitter*)MCE; + /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter + ObjectCodeEmitter *getObjectCodeEmitter() { + return reinterpret_cast(ElfCE); } ELFWriter(raw_ostream &O, TargetMachine &TM); ~ELFWriter(); - typedef std::vector DataBuffer; - protected: /// Output stream to send the resultant object file to. raw_ostream &O; @@ -59,6 +71,9 @@ namespace llvm { /// Target machine description. TargetMachine &TM; + /// Context object for machine code objects. + MCContext &OutContext; + /// Target Elf Writer description. const TargetELFWriterInfo *TEW; @@ -67,11 +82,15 @@ namespace llvm { /// MCE - The MachineCodeEmitter object that we are exposing to emit machine /// code for functions to the .o file. - ELFCodeEmitter *MCE; + ELFCodeEmitter *ElfCE; - /// TAI - Target Asm Info, provide information about section names for + /// TLOF - Target Lowering Object File, provide section names for globals + /// and other object file specific stuff + const TargetLoweringObjectFile &TLOF; + + /// MAI - Target Asm Info, provide information about section names for /// globals and other target specific stuff. - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; //===------------------------------------------------------------------===// // Properties inferred automatically from the target machine. @@ -95,59 +114,49 @@ namespace llvm { BinaryObject ElfHdr; /// SectionList - This is the list of sections that we have emitted to the - /// file. Once the file has been completely built, the section header table + /// file. Once the file has been completely built, the section header table /// is constructed from this info. - std::list SectionList; + std::vector SectionList; unsigned NumSections; // Always = SectionList.size() /// SectionLookup - This is a mapping from section name to section number in - /// the SectionList. + /// the SectionList. Used to quickly gather the Section Index from MAI names std::map SectionLookup; + /// PendingGlobals - Globals not processed as symbols yet. + SetVector PendingGlobals; + /// GblSymLookup - This is a mapping from global value to a symbol index - /// in the symbol table. This is useful since relocations symbol references - /// must be quickly mapped to a symbol table index + /// in the symbol table or private symbols list. This is useful since reloc + /// symbol references must be quickly mapped to their indices on the lists. std::map GblSymLookup; - /// SymbolList - This is the list of symbols emitted to the symbol table - /// Local symbols go to the front and Globals to the back. - std::list SymbolList; - - /// PendingGlobals - List of externally defined symbols that we have been - /// asked to emit, but have not seen a reference to. When a reference - /// is seen, the symbol will move from this list to the SymbolList. - SetVector PendingGlobals; - - // Remove tab from section name prefix. This is necessary becase TAI - // sometimes return a section name prefixed with a "\t" char. This is - // a little bit dirty. FIXME: find a better approach, maybe add more - // methods to TAI to get the clean name? - void fixNameForSection(std::string &Name) { - size_t Pos = Name.find("\t"); - if (Pos != std::string::npos) - Name.erase(Pos, 1); - - Pos = Name.find(".section "); - if (Pos != std::string::npos) - Name.erase(Pos, 9); - - Pos = Name.find("\n"); - if (Pos != std::string::npos) - Name.erase(Pos, 1); - } + /// PendingExternals - Externals not processed as symbols yet. + SetVector PendingExternals; + + /// ExtSymLookup - This is a mapping from externals to a symbol index + /// in the symbol table list. This is useful since reloc symbol references + /// must be quickly mapped to their symbol table indices. + std::map ExtSymLookup; + + /// SymbolList - This is the list of symbols emitted to the symbol table. + /// When the SymbolList is finally built, local symbols must be placed in + /// the beginning while non-locals at the end. + std::vector SymbolList; + + /// PrivateSyms - Record private symbols, every symbol here must never be + /// present in the SymbolList. + std::vector PrivateSyms; /// getSection - Return the section with the specified name, creating a new /// section if one does not already exist. ELFSection &getSection(const std::string &Name, unsigned Type, unsigned Flags = 0, unsigned Align = 0) { - std::string SectionName(Name); - fixNameForSection(SectionName); - - ELFSection *&SN = SectionLookup[SectionName]; + ELFSection *&SN = SectionLookup[Name]; if (SN) return *SN; - SectionList.push_back(ELFSection(SectionName, isLittleEndian, is64Bit)); - SN = &SectionList.back(); + SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit)); + SN = SectionList.back(); SN->SectionIdx = NumSections++; SN->Type = Type; SN->Flags = Flags; @@ -156,37 +165,6 @@ namespace llvm { return *SN; } - /// TODO: support mangled names here to emit the right .text section - /// for c++ object files. - ELFSection &getTextSection() { - return getSection(".text", ELFSection::SHT_PROGBITS, - ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC); - } - - /// Get jump table section on the section name returned by TAI - ELFSection &getJumpTableSection(std::string SName, unsigned Align) { - return getSection(SName, ELFSection::SHT_PROGBITS, - ELFSection::SHF_ALLOC, Align); - } - - /// Get a constant pool section based on the section name returned by TAI - ELFSection &getConstantPoolSection(std::string SName, unsigned Align) { - return getSection(SName, ELFSection::SHT_PROGBITS, - ELFSection::SHF_MERGE | ELFSection::SHF_ALLOC, Align); - } - - /// Return the relocation section of section 'S'. 'RelA' is true - /// if the relocation section contains entries with addends. - ELFSection &getRelocSection(std::string SName, bool RelA, unsigned Align) { - std::string RelSName(".rel"); - unsigned SHdrTy = RelA ? ELFSection::SHT_RELA : ELFSection::SHT_REL; - - if (RelA) RelSName.append("a"); - RelSName.append(SName); - - return getSection(RelSName, SHdrTy, 0, Align); - } - ELFSection &getNonExecStackSection() { return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1); } @@ -203,24 +181,38 @@ namespace llvm { return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1); } - ELFSection &getDataSection() { - return getSection(".data", ELFSection::SHT_PROGBITS, - ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4); - } - - ELFSection &getBSSSection() { - return getSection(".bss", ELFSection::SHT_NOBITS, - ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 4); - } - ELFSection &getNullSection() { return getSection("", ELFSection::SHT_NULL, 0); } + ELFSection &getDataSection(); + ELFSection &getBSSSection(); + ELFSection &getCtorSection(); + ELFSection &getDtorSection(); + ELFSection &getJumpTableSection(); + ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE); + ELFSection &getTextSection(Function *F); + ELFSection &getRelocSection(ELFSection &S); + // Helpers for obtaining ELF specific info. - unsigned getGlobalELFLinkage(const GlobalValue *GV); + unsigned getGlobalELFBinding(const GlobalValue *GV); + unsigned getGlobalELFType(const GlobalValue *GV); unsigned getGlobalELFVisibility(const GlobalValue *GV); - unsigned getElfSectionFlags(unsigned Flags); + + // AddPendingGlobalSymbol - Add a global to be processed and to + // the global symbol lookup, use a zero index because the table + // index will be determined later. + void AddPendingGlobalSymbol(const GlobalValue *GV, + bool AddToLookup = false); + + // AddPendingExternalSymbol - Add the external to be processed + // and to the external symbol lookup, use a zero index because + // the symbol table index will be determined later. + void AddPendingExternalSymbol(const char *External); + + // AddToSymbolList - Update the symbol lookup and If the symbol is + // private add it to PrivateSyms list, otherwise to SymbolList. + void AddToSymbolList(ELFSym *GblSym); // As we complete the ELF file, we need to update fields in the ELF header // (e.g. the location of the section table). These members keep track of @@ -231,20 +223,27 @@ namespace llvm { unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. private: - void EmitFunctionDeclaration(const Function *F); - void EmitGlobalVar(const GlobalVariable *GV); + void EmitGlobal(const GlobalValue *GV); void EmitGlobalConstant(const Constant *C, ELFSection &GblS); void EmitGlobalConstantStruct(const ConstantStruct *CVS, ELFSection &GblS); - ELFSection &getGlobalSymELFSection(const GlobalVariable *GV, ELFSym &Sym); + void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S); + void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, + ELFSection &GblS, int64_t Offset = 0); + bool EmitSpecialLLVMGlobal(const GlobalVariable *GV); + void EmitXXStructorList(Constant *List, ELFSection &Xtor); void EmitRelocations(); void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA); void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); void EmitSectionTableStringTable(); void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym); void EmitSymbolTable(); - void EmitStringTable(); + void EmitStringTable(const std::string &ModuleName); void OutputSectionsAndSectionTable(); + void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value, + unsigned Size); + unsigned SortSymbols(); + CstExprResTy ResolveConstantExpr(const Constant *CV); }; } diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp new file mode 100644 index 0000000000000..4f32c2b78b1f3 --- /dev/null +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -0,0 +1,160 @@ +//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a a hazard recognizer using the instructions itineraries +// defined for the current target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "exact-hazards" +#include "ExactHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrItineraries.h" + +using namespace llvm; + +ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData) : + ScheduleHazardRecognizer(), ItinData(LItinData) +{ + // Determine the maximum depth of any itinerary. This determines the + // depth of the scoreboard. We always make the scoreboard at least 1 + // cycle deep to avoid dealing with the boundary condition. + ScoreboardDepth = 1; + if (!ItinData.isEmpty()) { + for (unsigned idx = 0; ; ++idx) { + if (ItinData.isEndMarker(idx)) + break; + + const InstrStage *IS = ItinData.beginStage(idx); + const InstrStage *E = ItinData.endStage(idx); + unsigned ItinDepth = 0; + for (; IS != E; ++IS) + ItinDepth += IS->getCycles(); + + ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); + } + } + + Scoreboard = new unsigned[ScoreboardDepth]; + ScoreboardHead = 0; + + DEBUG(errs() << "Using exact hazard recognizer: ScoreboardDepth = " + << ScoreboardDepth << '\n'); +} + +ExactHazardRecognizer::~ExactHazardRecognizer() { + delete [] Scoreboard; +} + +void ExactHazardRecognizer::Reset() { + memset(Scoreboard, 0, ScoreboardDepth * sizeof(unsigned)); + ScoreboardHead = 0; +} + +unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) { + return (ScoreboardHead + offset) % ScoreboardDepth; +} + +void ExactHazardRecognizer::dumpScoreboard() { + errs() << "Scoreboard:\n"; + + unsigned last = ScoreboardDepth - 1; + while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0)) + last--; + + for (unsigned i = 0; i <= last; i++) { + unsigned FUs = Scoreboard[getFutureIndex(i)]; + errs() << "\t"; + for (int j = 31; j >= 0; j--) + errs() << ((FUs & (1 << j)) ? '1' : '0'); + errs() << '\n'; + } +} + +ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) { + if (ItinData.isEmpty()) + return NoHazard; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to check for + // free FU's in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must find one of the stage's units free for every cycle the + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; + if (!freeUnits) { + DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(errs() << "SU(" << SU->NodeNum << "): "); + DEBUG(SU->getInstr()->dump()); + return Hazard; + } + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + return NoHazard; +} + +void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { + if (ItinData.isEmpty()) + return; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to reserve FU's + // in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must reserve one of the stage's units for every cycle the + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; + + // reduce to a single unit + unsigned freeUnit = 0; + do { + freeUnit = freeUnits; + freeUnits = freeUnit & (freeUnit - 1); + } while (freeUnits); + + assert(freeUnit && "No function unit available!"); + Scoreboard[index] |= freeUnit; + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + DEBUG(dumpScoreboard()); +} + +void ExactHazardRecognizer::AdvanceCycle() { + Scoreboard[ScoreboardHead] = 0; + ScoreboardHead = getFutureIndex(1); +} diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h new file mode 100644 index 0000000000000..71ac979e6cd8d --- /dev/null +++ b/lib/CodeGen/ExactHazardRecognizer.h @@ -0,0 +1,61 @@ +//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ExactHazardRecognizer class, which +// implements hazard-avoidance heuristics for scheduling, based on the +// scheduling itineraries specified for the target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H +#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetInstrItineraries.h" + +namespace llvm { + class ExactHazardRecognizer : public ScheduleHazardRecognizer { + // Itinerary data for the target. + const InstrItineraryData &ItinData; + + // Scoreboard to track function unit usage. Scoreboard[0] is a + // mask of the FUs in use in the cycle currently being + // schedule. Scoreboard[1] is a mask for the next cycle. The + // Scoreboard is used as a circular buffer with the current cycle + // indicated by ScoreboardHead. + unsigned *Scoreboard; + + // The maximum number of cycles monitored by the Scoreboard. This + // value is determined based on the target itineraries to ensure + // that all hazards can be tracked. + unsigned ScoreboardDepth; + + // Indices into the Scoreboard that represent the current cycle. + unsigned ScoreboardHead; + + // Return the scoreboard index to use for 'offset' cycles in the + // future. 'offset' of 0 returns ScoreboardHead. + unsigned getFutureIndex(unsigned offset); + + // Print the scoreboard. + void dumpScoreboard(); + + public: + ExactHazardRecognizer(const InstrItineraryData &ItinData); + ~ExactHazardRecognizer(); + + virtual HazardType getHazardType(SUnit *SU); + virtual void Reset(); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + }; +} + +#endif diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index cf2ebb39ad823..a57296c2a67f4 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -18,17 +18,20 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" #include "llvm/Support/Compiler.h" - +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { class VISIBILITY_HIDDEN Printer : public FunctionPass { static char ID; - std::ostream &OS; + raw_ostream &OS; public: - explicit Printer(std::ostream &OS = *cerr); + Printer() : FunctionPass(&ID), OS(errs()) {} + explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {} + const char *getPassName() const; void getAnalysisUsage(AnalysisUsage &AU) const; @@ -74,27 +77,24 @@ GCModuleInfo::~GCModuleInfo() { GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, const std::string &Name) { - const char *Start = Name.c_str(); - - strategy_map_type::iterator NMI = - StrategyMap.find(Start, Start + Name.size()); + strategy_map_type::iterator NMI = StrategyMap.find(Name); if (NMI != StrategyMap.end()) return NMI->getValue(); for (GCRegistry::iterator I = GCRegistry::begin(), E = GCRegistry::end(); I != E; ++I) { - if (strcmp(Start, I->getName()) == 0) { + if (Name == I->getName()) { GCStrategy *S = I->instantiate(); S->M = M; S->Name = Name; - StrategyMap.GetOrCreateValue(Start, Start + Name.size()).setValue(S); + StrategyMap.GetOrCreateValue(Name).setValue(S); StrategyList.push_back(S); return S; } } - - cerr << "unsupported GC: " << Name << "\n"; - abort(); + + errs() << "unsupported GC: " << Name << "\n"; + llvm_unreachable(0); } GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { @@ -124,12 +124,10 @@ void GCModuleInfo::clear() { char Printer::ID = 0; -FunctionPass *llvm::createGCInfoPrinter(std::ostream &OS) { +FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) { return new Printer(OS); } -Printer::Printer(std::ostream &OS) - : FunctionPass(&ID), OS(OS) {} const char *Printer::getPassName() const { return "Print Garbage Collector Information"; @@ -143,7 +141,7 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const { static const char *DescKind(GC::PointKind Kind) { switch (Kind) { - default: assert(0 && "Unknown GC point kind"); + default: llvm_unreachable("Unknown GC point kind"); case GC::Loop: return "loop"; case GC::Return: return "return"; case GC::PreCall: return "pre-call"; @@ -155,12 +153,12 @@ bool Printer::runOnFunction(Function &F) { if (!F.hasGC()) { GCFunctionInfo *FD = &getAnalysis().getFunctionInfo(F); - OS << "GC roots for " << FD->getFunction().getNameStart() << ":\n"; + OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n"; for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), RE = FD->roots_end(); RI != RE; ++RI) OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; - OS << "GC safe points for " << FD->getFunction().getNameStart() << ":\n"; + OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n"; for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; ++PI) { diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp index 5a5ef84fa4ebd..9cd2925e2d281 100644 --- a/lib/CodeGen/GCMetadataPrinter.cpp +++ b/lib/CodeGen/GCMetadataPrinter.cpp @@ -20,11 +20,11 @@ GCMetadataPrinter::GCMetadataPrinter() { } GCMetadataPrinter::~GCMetadataPrinter() { } void GCMetadataPrinter::beginAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI) { + const MCAsmInfo &MAI) { // Default is no action. } void GCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP, - const TargetAsmInfo &TAI) { + const MCAsmInfo &MAI) { // Default is no action. } diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index ad7421abc2117..6d0de41e2c31b 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -28,6 +28,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -70,7 +72,8 @@ namespace { void FindSafePoints(MachineFunction &MF); void VisitCallPoint(MachineBasicBlock::iterator MI); unsigned InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; + MachineBasicBlock::iterator MI, + DebugLoc DL) const; void FindStackOffsets(MachineFunction &MF); @@ -107,8 +110,8 @@ GCStrategy::~GCStrategy() { bool GCStrategy::initializeCustomLowering(Module &M) { return false; } bool GCStrategy::performCustomLowering(Function &F) { - cerr << "gc " << getName() << " must override performCustomLowering.\n"; - abort(); + errs() << "gc " << getName() << " must override performCustomLowering.\n"; + llvm_unreachable(0); return 0; } @@ -327,11 +330,13 @@ void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { } unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { + MachineBasicBlock::iterator MI, + DebugLoc DL) const { unsigned Label = MMI->NextLabelID(); - // N.B. we assume that MI is *not* equal to the "end()" iterator. - BuildMI(MBB, MI, MI->getDebugLoc(), + + BuildMI(MBB, MI, DL, TII->get(TargetInstrInfo::GC_LABEL)).addImm(Label); + return Label; } @@ -342,10 +347,12 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { ++RAI; if (FI->getStrategy().needsSafePoint(GC::PreCall)) - FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI)); + FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI, + CI->getDebugLoc())); if (FI->getStrategy().needsSafePoint(GC::PostCall)) - FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI)); + FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI, + CI->getDebugLoc())); } void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index d5e7ea59a7459..7b613ff25013b 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ifcvt" +#include "BranchFolding.h" #include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -21,6 +22,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -226,14 +229,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); if (!TII) return false; - DOUT << "\nIfcvt: function (" << ++FnNum << ") \'" - << MF.getFunction()->getName() << "\'"; + DEBUG(errs() << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getFunction()->getName() << "\'"); if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { - DOUT << " skipped\n"; + DEBUG(errs() << " skipped\n"); return false; } - DOUT << "\n"; + DEBUG(errs() << "\n"); MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); @@ -278,13 +281,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DOUT << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") - << "): BB#" << BBI.BB->getNumber() << " (" - << ((Kind == ICSimpleFalse) - ? BBI.FalseBB->getNumber() - : BBI.TrueBB->getNumber()) << ") "; + DEBUG(errs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + << "): BB#" << BBI.BB->getNumber() << " (" + << ((Kind == ICSimpleFalse) + ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) << ") "); RetVal = IfConvertSimple(BBI, Kind); - DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) NumSimpleFalse++; else NumSimple++; @@ -301,16 +304,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (DisableTriangleR && !isFalse && isRev) break; if (DisableTriangleF && isFalse && !isRev) break; if (DisableTriangleFR && isFalse && isRev) break; - DOUT << "Ifcvt (Triangle"; + DEBUG(errs() << "Ifcvt (Triangle"); if (isFalse) - DOUT << " false"; + DEBUG(errs() << " false"); if (isRev) - DOUT << " rev"; - DOUT << "): BB#" << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "; + DEBUG(errs() << " rev"); + DEBUG(errs() << "): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertTriangle(BBI, Kind); - DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { if (isRev) NumTriangleFRev++; @@ -324,11 +327,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } case ICDiamond: { if (DisableDiamond) break; - DOUT << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" - << BBI.TrueBB->getNumber() << ",F:" - << BBI.FalseBB->getNumber() << ") "; + DEBUG(errs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); - DOUT << (RetVal ? "succeeded!" : "failed!") << "\n"; + DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) NumDiamonds++; break; } @@ -358,6 +361,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { Roots.clear(); BBAnalysis.clear(); + if (MadeChange) { + BranchFolder BF(false); + BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable()); + } + return MadeChange; } @@ -1130,8 +1140,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI, if (TII->isPredicated(I)) continue; if (!TII->PredicateInstruction(I, Cond)) { - cerr << "Unable to predicate " << *I << "!\n"; - abort(); +#ifndef NDEBUG + errs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(0); } } @@ -1164,8 +1176,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, if (!isPredicated) if (!TII->PredicateInstruction(MI, Cond)) { - cerr << "Unable to predicate " << *MI << "!\n"; - abort(); +#ifndef NDEBUG + errs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(0); } } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 052334a05ba59..3e3b28a8109b9 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -16,7 +16,9 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" using namespace llvm; @@ -39,11 +41,11 @@ static void EnsureFPIntrinsicsExist(Module &M, Function *Fn, switch((int)Fn->arg_begin()->getType()->getTypeID()) { case Type::FloatTyID: EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(), - Type::FloatTy); + Type::getFloatTy(M.getContext())); break; case Type::DoubleTyID: EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(), - Type::DoubleTy); + Type::getDoubleTy(M.getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: @@ -82,39 +84,43 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, } void IntrinsicLowering::AddPrototypes(Module &M) { + LLVMContext &Context = M.getContext(); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->isDeclaration() && !I->use_empty()) switch (I->getIntrinsicID()) { default: break; case Intrinsic::setjmp: EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(), - Type::Int32Ty); + Type::getInt32Ty(M.getContext())); break; case Intrinsic::longjmp: EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(), - Type::VoidTy); + Type::getVoidTy(M.getContext())); break; case Intrinsic::siglongjmp: EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(), - Type::VoidTy); + Type::getVoidTy(M.getContext())); break; case Intrinsic::memcpy: - M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - TD.getIntPtrType(), (Type *)0); + M.getOrInsertFunction("memcpy", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memmove: - M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - TD.getIntPtrType(), (Type *)0); + M.getOrInsertFunction("memmove", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::memset: - M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty), - PointerType::getUnqual(Type::Int8Ty), - Type::Int32Ty, - TD.getIntPtrType(), (Type *)0); + M.getOrInsertFunction("memset", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt32Ty(M.getContext()), + TD.getIntPtrType(Context), (Type *)0); break; case Intrinsic::sqrt: EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); @@ -148,7 +154,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) { /// LowerBSWAP - Emit the code to lower bswap of V before the specified /// instruction IP. -static Value *LowerBSWAP(Value *V, Instruction *IP) { +static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isInteger() && "Can't bswap a non-integer type!"); unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); @@ -156,7 +162,7 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) { IRBuilder<> Builder(IP->getParent(), IP); switch(BitSize) { - default: assert(0 && "Unhandled type size of value to byteswap!"); + default: llvm_unreachable("Unhandled type size of value to byteswap!"); case 16: { Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), "bswap.2"); @@ -172,11 +178,13 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) { "bswap.3"); Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), "bswap.2"); - Value *Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), + Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24), "bswap.1"); - Tmp3 = Builder.CreateAnd(Tmp3, ConstantInt::get(Type::Int32Ty, 0xFF0000), + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000), "bswap.and3"); - Tmp2 = Builder.CreateAnd(Tmp2, ConstantInt::get(Type::Int32Ty, 0xFF00), + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF00), "bswap.and2"); Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); @@ -194,31 +202,38 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) { "bswap.5"); Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), "bswap.4"); - Value* Tmp3 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 24), + Value* Tmp3 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 24), "bswap.3"); - Value* Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 40), + Value* Tmp2 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 40), "bswap.2"); - Value* Tmp1 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 56), + Value* Tmp1 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 56), "bswap.1"); Tmp7 = Builder.CreateAnd(Tmp7, - ConstantInt::get(Type::Int64Ty, + ConstantInt::get(Type::getInt64Ty(Context), 0xFF000000000000ULL), "bswap.and7"); Tmp6 = Builder.CreateAnd(Tmp6, - ConstantInt::get(Type::Int64Ty, + ConstantInt::get(Type::getInt64Ty(Context), 0xFF0000000000ULL), "bswap.and6"); Tmp5 = Builder.CreateAnd(Tmp5, - ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL), + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00000000ULL), "bswap.and5"); Tmp4 = Builder.CreateAnd(Tmp4, - ConstantInt::get(Type::Int64Ty, 0xFF000000ULL), + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF000000ULL), "bswap.and4"); Tmp3 = Builder.CreateAnd(Tmp3, - ConstantInt::get(Type::Int64Ty, 0xFF0000ULL), + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF0000ULL), "bswap.and3"); Tmp2 = Builder.CreateAnd(Tmp2, - ConstantInt::get(Type::Int64Ty, 0xFF00ULL), + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00ULL), "bswap.and2"); Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2"); @@ -235,7 +250,7 @@ static Value *LowerBSWAP(Value *V, Instruction *IP) { /// LowerCTPOP - Emit the code to lower ctpop of V before the specified /// instruction IP. -static Value *LowerCTPOP(Value *V, Instruction *IP) { +static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!"); static const uint64_t MaskValues[6] = { @@ -257,7 +272,7 @@ static Value *LowerCTPOP(Value *V, Instruction *IP) { Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); Value *VShift = Builder.CreateLShr(PartValue, - ConstantInt::get(V->getType(), i), + ConstantInt::get(V->getType(), i), "ctpop.sh"); Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2"); PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step"); @@ -275,7 +290,7 @@ static Value *LowerCTPOP(Value *V, Instruction *IP) { /// LowerCTLZ - Emit the code to lower ctlz of V before the specified /// instruction IP. -static Value *LowerCTLZ(Value *V, Instruction *IP) { +static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { IRBuilder<> Builder(IP->getParent(), IP); @@ -287,353 +302,21 @@ static Value *LowerCTLZ(Value *V, Instruction *IP) { } V = Builder.CreateNot(V); - return LowerCTPOP(V, IP); -} - -/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes -/// three integer arguments. The first argument is the Value from which the -/// bits will be selected. It may be of any bit width. The second and third -/// arguments specify a range of bits to select with the second argument -/// specifying the low bit and the third argument specifying the high bit. Both -/// must be type i32. The result is the corresponding selected bits from the -/// Value in the same width as the Value (first argument). If the low bit index -/// is higher than the high bit index then the inverse selection is done and -/// the bits are returned in inverse order. -/// @brief Lowering of llvm.part.select intrinsic. -static Instruction *LowerPartSelect(CallInst *CI) { - IRBuilder<> Builder; - - // Make sure we're dealing with a part select intrinsic here - Function *F = CI->getCalledFunction(); - const FunctionType *FT = F->getFunctionType(); - if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || - FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() || - !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger()) - return CI; - - // Get the intrinsic implementation function by converting all the . to _ - // in the intrinsic's function name and then reconstructing the function - // declaration. - std::string Name(F->getName()); - for (unsigned i = 4; i < Name.length(); ++i) - if (Name[i] == '.') - Name[i] = '_'; - Module* M = F->getParent(); - F = cast(M->getOrInsertFunction(Name, FT)); - F->setLinkage(GlobalValue::WeakAnyLinkage); - - // If we haven't defined the impl function yet, do so now - if (F->isDeclaration()) { - - // Get the arguments to the function - Function::arg_iterator args = F->arg_begin(); - Value* Val = args++; Val->setName("Val"); - Value* Lo = args++; Lo->setName("Lo"); - Value* Hi = args++; Hi->setName("High"); - - // We want to select a range of bits here such that [Hi, Lo] is shifted - // down to the low bits. However, it is quite possible that Hi is smaller - // than Lo in which case the bits have to be reversed. - - // Create the blocks we will need for the two cases (forward, reverse) - BasicBlock* CurBB = BasicBlock::Create("entry", F); - BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent()); - BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent()); - BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent()); - BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent()); - BasicBlock *RsltBlk = BasicBlock::Create("result", CurBB->getParent()); - - Builder.SetInsertPoint(CurBB); - - // Cast Hi and Lo to the size of Val so the widths are all the same - if (Hi->getType() != Val->getType()) - Hi = Builder.CreateIntCast(Hi, Val->getType(), /* isSigned */ false, - "tmp"); - if (Lo->getType() != Val->getType()) - Lo = Builder.CreateIntCast(Lo, Val->getType(), /* isSigned */ false, - "tmp"); - - // Compute a few things that both cases will need, up front. - Constant* Zero = ConstantInt::get(Val->getType(), 0); - Constant* One = ConstantInt::get(Val->getType(), 1); - Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType()); - - // Compare the Hi and Lo bit positions. This is used to determine - // which case we have (forward or reverse) - Value *Cmp = Builder.CreateICmpULT(Hi, Lo, "less"); - Builder.CreateCondBr(Cmp, RevSize, FwdSize); - - // First, compute the number of bits in the forward case. - Builder.SetInsertPoint(FwdSize); - Value* FBitSize = Builder.CreateSub(Hi, Lo, "fbits"); - Builder.CreateBr(Compute); - - // Second, compute the number of bits in the reverse case. - Builder.SetInsertPoint(RevSize); - Value* RBitSize = Builder.CreateSub(Lo, Hi, "rbits"); - Builder.CreateBr(Compute); - - // Now, compute the bit range. Start by getting the bitsize and the shift - // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for - // the number of bits we want in the range. We shift the bits down to the - // least significant bits, apply the mask to zero out unwanted high bits, - // and we have computed the "forward" result. It may still need to be - // reversed. - Builder.SetInsertPoint(Compute); - - // Get the BitSize from one of the two subtractions - PHINode *BitSize = Builder.CreatePHI(Val->getType(), "bits"); - BitSize->reserveOperandSpace(2); - BitSize->addIncoming(FBitSize, FwdSize); - BitSize->addIncoming(RBitSize, RevSize); - - // Get the ShiftAmount as the smaller of Hi/Lo - PHINode *ShiftAmt = Builder.CreatePHI(Val->getType(), "shiftamt"); - ShiftAmt->reserveOperandSpace(2); - ShiftAmt->addIncoming(Lo, FwdSize); - ShiftAmt->addIncoming(Hi, RevSize); - - // Increment the bit size - Value *BitSizePlusOne = Builder.CreateAdd(BitSize, One, "bits"); - - // Create a Mask to zero out the high order bits. - Value* Mask = Builder.CreateShl(AllOnes, BitSizePlusOne, "mask"); - Mask = Builder.CreateNot(Mask, "mask"); - - // Shift the bits down and apply the mask - Value* FRes = Builder.CreateLShr(Val, ShiftAmt, "fres"); - FRes = Builder.CreateAnd(FRes, Mask, "fres"); - Builder.CreateCondBr(Cmp, Reverse, RsltBlk); - - // In the Reverse block we have the mask already in FRes but we must reverse - // it by shifting FRes bits right and putting them in RRes by shifting them - // in from left. - Builder.SetInsertPoint(Reverse); - - // First set up our loop counters - PHINode *Count = Builder.CreatePHI(Val->getType(), "count"); - Count->reserveOperandSpace(2); - Count->addIncoming(BitSizePlusOne, Compute); - - // Next, get the value that we are shifting. - PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val"); - BitsToShift->reserveOperandSpace(2); - BitsToShift->addIncoming(FRes, Compute); - - // Finally, get the result of the last computation - PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres"); - RRes->reserveOperandSpace(2); - RRes->addIncoming(Zero, Compute); - - // Decrement the counter - Value *Decr = Builder.CreateSub(Count, One, "decr"); - Count->addIncoming(Decr, Reverse); - - // Compute the Bit that we want to move - Value *Bit = Builder.CreateAnd(BitsToShift, One, "bit"); - - // Compute the new value for next iteration. - Value *NewVal = Builder.CreateLShr(BitsToShift, One, "rshift"); - BitsToShift->addIncoming(NewVal, Reverse); - - // Shift the bit into the low bits of the result. - Value *NewRes = Builder.CreateShl(RRes, One, "lshift"); - NewRes = Builder.CreateOr(NewRes, Bit, "addbit"); - RRes->addIncoming(NewRes, Reverse); - - // Terminate loop if we've moved all the bits. - Value *Cond = Builder.CreateICmpEQ(Decr, Zero, "cond"); - Builder.CreateCondBr(Cond, RsltBlk, Reverse); - - // Finally, in the result block, select one of the two results with a PHI - // node and return the result; - Builder.SetInsertPoint(RsltBlk); - PHINode *BitSelect = Builder.CreatePHI(Val->getType(), "part_select"); - BitSelect->reserveOperandSpace(2); - BitSelect->addIncoming(FRes, Compute); - BitSelect->addIncoming(NewRes, Reverse); - Builder.CreateRet(BitSelect); - } - - // Return a call to the implementation function - Builder.SetInsertPoint(CI->getParent(), CI); - CallInst *NewCI = Builder.CreateCall3(F, CI->getOperand(1), - CI->getOperand(2), CI->getOperand(3)); - NewCI->setName(CI->getName()); - return NewCI; -} - -/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes -/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High) -/// The first two arguments can be any bit width. The result is the same width -/// as %Value. The operation replaces bits between %Low and %High with the value -/// in %Replacement. If %Replacement is not the same width, it is truncated or -/// zero extended as appropriate to fit the bits being replaced. If %Low is -/// greater than %High then the inverse set of bits are replaced. -/// @brief Lowering of llvm.bit.part.set intrinsic. -static Instruction *LowerPartSet(CallInst *CI) { - IRBuilder<> Builder; - - // Make sure we're dealing with a part select intrinsic here - Function *F = CI->getCalledFunction(); - const FunctionType *FT = F->getFunctionType(); - if (!F->isDeclaration() || !FT->getReturnType()->isInteger() || - FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() || - !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() || - !FT->getParamType(3)->isInteger()) - return CI; - - // Get the intrinsic implementation function by converting all the . to _ - // in the intrinsic's function name and then reconstructing the function - // declaration. - std::string Name(F->getName()); - for (unsigned i = 4; i < Name.length(); ++i) - if (Name[i] == '.') - Name[i] = '_'; - Module* M = F->getParent(); - F = cast(M->getOrInsertFunction(Name, FT)); - F->setLinkage(GlobalValue::WeakAnyLinkage); - - // If we haven't defined the impl function yet, do so now - if (F->isDeclaration()) { - // Get the arguments for the function. - Function::arg_iterator args = F->arg_begin(); - Value* Val = args++; Val->setName("Val"); - Value* Rep = args++; Rep->setName("Rep"); - Value* Lo = args++; Lo->setName("Lo"); - Value* Hi = args++; Hi->setName("Hi"); - - // Get some types we need - const IntegerType* ValTy = cast(Val->getType()); - const IntegerType* RepTy = cast(Rep->getType()); - uint32_t RepBits = RepTy->getBitWidth(); - - // Constant Definitions - ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits); - ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy); - ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy); - ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1); - ConstantInt* ValOne = ConstantInt::get(ValTy, 1); - ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0); - ConstantInt* ValZero = ConstantInt::get(ValTy, 0); - - // Basic blocks we fill in below. - BasicBlock* entry = BasicBlock::Create("entry", F, 0); - BasicBlock* large = BasicBlock::Create("large", F, 0); - BasicBlock* small = BasicBlock::Create("small", F, 0); - BasicBlock* reverse = BasicBlock::Create("reverse", F, 0); - BasicBlock* result = BasicBlock::Create("result", F, 0); - - // BASIC BLOCK: entry - Builder.SetInsertPoint(entry); - // First, get the number of bits that we're placing as an i32 - Value* is_forward = Builder.CreateICmpULT(Lo, Hi); - Value* Hi_pn = Builder.CreateSelect(is_forward, Hi, Lo); - Value* Lo_pn = Builder.CreateSelect(is_forward, Lo, Hi); - Value* NumBits = Builder.CreateSub(Hi_pn, Lo_pn); - NumBits = Builder.CreateAdd(NumBits, One); - // Now, convert Lo and Hi to ValTy bit width - Lo = Builder.CreateIntCast(Lo_pn, ValTy, /* isSigned */ false); - // Determine if the replacement bits are larger than the number of bits we - // are replacing and deal with it. - Value* is_large = Builder.CreateICmpULT(NumBits, RepBitWidth); - Builder.CreateCondBr(is_large, large, small); - - // BASIC BLOCK: large - Builder.SetInsertPoint(large); - Value* MaskBits = Builder.CreateSub(RepBitWidth, NumBits); - MaskBits = Builder.CreateIntCast(MaskBits, RepMask->getType(), - /* isSigned */ false); - Value* Mask1 = Builder.CreateLShr(RepMask, MaskBits); - Value* Rep2 = Builder.CreateAnd(Mask1, Rep); - Builder.CreateBr(small); - - // BASIC BLOCK: small - Builder.SetInsertPoint(small); - PHINode* Rep3 = Builder.CreatePHI(RepTy); - Rep3->reserveOperandSpace(2); - Rep3->addIncoming(Rep2, large); - Rep3->addIncoming(Rep, entry); - Value* Rep4 = Builder.CreateIntCast(Rep3, ValTy, /* isSigned */ false); - Builder.CreateCondBr(is_forward, result, reverse); - - // BASIC BLOCK: reverse (reverses the bits of the replacement) - Builder.SetInsertPoint(reverse); - // Set up our loop counter as a PHI so we can decrement on each iteration. - // We will loop for the number of bits in the replacement value. - PHINode *Count = Builder.CreatePHI(Type::Int32Ty, "count"); - Count->reserveOperandSpace(2); - Count->addIncoming(NumBits, small); - - // Get the value that we are shifting bits out of as a PHI because - // we'll change this with each iteration. - PHINode *BitsToShift = Builder.CreatePHI(Val->getType(), "val"); - BitsToShift->reserveOperandSpace(2); - BitsToShift->addIncoming(Rep4, small); - - // Get the result of the last computation or zero on first iteration - PHINode *RRes = Builder.CreatePHI(Val->getType(), "rres"); - RRes->reserveOperandSpace(2); - RRes->addIncoming(ValZero, small); - - // Decrement the loop counter by one - Value *Decr = Builder.CreateSub(Count, One); - Count->addIncoming(Decr, reverse); - - // Get the bit that we want to move into the result - Value *Bit = Builder.CreateAnd(BitsToShift, ValOne); - - // Compute the new value of the bits to shift for the next iteration. - Value *NewVal = Builder.CreateLShr(BitsToShift, ValOne); - BitsToShift->addIncoming(NewVal, reverse); - - // Shift the bit we extracted into the low bit of the result. - Value *NewRes = Builder.CreateShl(RRes, ValOne); - NewRes = Builder.CreateOr(NewRes, Bit); - RRes->addIncoming(NewRes, reverse); - - // Terminate loop if we've moved all the bits. - Value *Cond = Builder.CreateICmpEQ(Decr, Zero); - Builder.CreateCondBr(Cond, result, reverse); - - // BASIC BLOCK: result - Builder.SetInsertPoint(result); - PHINode *Rplcmnt = Builder.CreatePHI(Val->getType()); - Rplcmnt->reserveOperandSpace(2); - Rplcmnt->addIncoming(NewRes, reverse); - Rplcmnt->addIncoming(Rep4, small); - Value* t0 = Builder.CreateIntCast(NumBits, ValTy, /* isSigned */ false); - Value* t1 = Builder.CreateShl(ValMask, Lo); - Value* t2 = Builder.CreateNot(t1); - Value* t3 = Builder.CreateShl(t1, t0); - Value* t4 = Builder.CreateOr(t2, t3); - Value* t5 = Builder.CreateAnd(t4, Val); - Value* t6 = Builder.CreateShl(Rplcmnt, Lo); - Value* Rslt = Builder.CreateOr(t5, t6, "part_set"); - Builder.CreateRet(Rslt); - } - - // Return a call to the implementation function - Builder.SetInsertPoint(CI->getParent(), CI); - CallInst *NewCI = Builder.CreateCall4(F, CI->getOperand(1), - CI->getOperand(2), CI->getOperand(3), - CI->getOperand(4)); - NewCI->setName(CI->getName()); - return NewCI; + return LowerCTPOP(Context, V, IP); } static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, const char *Dname, const char *LDname) { switch (CI->getOperand(1)->getType()->getTypeID()) { - default: assert(0 && "Invalid type in intrinsic"); abort(); + default: llvm_unreachable("Invalid type in intrinsic"); case Type::FloatTyID: ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(), - Type::FloatTy); + Type::getFloatTy(CI->getContext())); break; case Type::DoubleTyID: ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(), - Type::DoubleTy); + Type::getDoubleTy(CI->getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: @@ -646,19 +329,18 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { IRBuilder<> Builder(CI->getParent(), CI); + LLVMContext &Context = CI->getContext(); Function *Callee = CI->getCalledFunction(); assert(Callee && "Cannot lower an indirect call!"); switch (Callee->getIntrinsicID()) { case Intrinsic::not_intrinsic: - cerr << "Cannot lower a call to a non-intrinsic function '" - << Callee->getName() << "'!\n"; - abort(); + llvm_report_error("Cannot lower a call to a non-intrinsic function '"+ + Callee->getName() + "'!"); default: - cerr << "Error: Code generator does not support intrinsic function '" - << Callee->getName() << "'!\n"; - abort(); + llvm_report_error("Code generator does not support intrinsic function '"+ + Callee->getName()+"'!"); // The setjmp/longjmp intrinsics should only exist in the code if it was // never optimized (ie, right out of the CFE), or if it has been hacked on @@ -666,38 +348,38 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { // convert the call to an explicit setjmp or longjmp call. case Intrinsic::setjmp: { Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), - Type::Int32Ty); - if (CI->getType() != Type::VoidTy) + Type::getInt32Ty(Context)); + if (CI->getType() != Type::getVoidTy(Context)) CI->replaceAllUsesWith(V); break; } case Intrinsic::sigsetjmp: - if (CI->getType() != Type::VoidTy) + if (CI->getType() != Type::getVoidTy(Context)) CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; case Intrinsic::longjmp: { ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(), - Type::VoidTy); + Type::getVoidTy(Context)); break; } case Intrinsic::siglongjmp: { // Insert the call to abort ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), - Type::VoidTy); + Type::getVoidTy(Context)); break; } case Intrinsic::ctpop: - CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI)); break; case Intrinsic::bswap: - CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI)); break; case Intrinsic::ctlz: - CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI)); break; case Intrinsic::cttz: { @@ -707,24 +389,16 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { NotSrc->setName(Src->getName() + ".not"); Value *SrcM1 = ConstantInt::get(Src->getType(), 1); SrcM1 = Builder.CreateSub(Src, SrcM1); - Src = LowerCTPOP(Builder.CreateAnd(NotSrc, SrcM1), CI); + Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI); CI->replaceAllUsesWith(Src); break; } - case Intrinsic::part_select: - CI->replaceAllUsesWith(LowerPartSelect(CI)); - break; - - case Intrinsic::part_set: - CI->replaceAllUsesWith(LowerPartSet(CI)); - break; - case Intrinsic::stacksave: case Intrinsic::stackrestore: { if (!Warned) - cerr << "WARNING: this target does not support the llvm.stack" - << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + errs() << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? "save" : "restore") << " intrinsic.\n"; Warned = true; if (Callee->getIntrinsicID() == Intrinsic::stacksave) @@ -734,8 +408,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::returnaddress: case Intrinsic::frameaddress: - cerr << "WARNING: this target does not support the llvm." - << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + errs() << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? "return" : "frame") << "address intrinsic.\n"; CI->replaceAllUsesWith(ConstantPointerNull::get( cast(CI->getType()))); @@ -747,9 +421,9 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::pcmarker: break; // Simply strip out pcmarker on unsupported architectures case Intrinsic::readcyclecounter: { - cerr << "WARNING: this target does not support the llvm.readcyclecoun" - << "ter intrinsic. It is being lowered to a constant 0\n"; - CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0)); + errs() << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); break; } @@ -761,13 +435,11 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Simply strip out debugging intrinsics case Intrinsic::eh_exception: - case Intrinsic::eh_selector_i32: - case Intrinsic::eh_selector_i64: + case Intrinsic::eh_selector: CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); break; - case Intrinsic::eh_typeid_for_i32: - case Intrinsic::eh_typeid_for_i64: + case Intrinsic::eh_typeid_for: // Return something different to eh_selector. CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; @@ -776,7 +448,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; // Strip out annotate intrinsic case Intrinsic::memcpy: { - const IntegerType *IntPtr = TD.getIntPtrType(); + const IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -787,7 +459,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memmove: { - const IntegerType *IntPtr = TD.getIntPtrType(); + const IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, /* isSigned */ false); Value *Ops[3]; @@ -798,13 +470,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - const IntegerType *IntPtr = TD.getIntPtrType(); + const IntegerType *IntPtr = TD.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, /* isSigned */ false); Value *Ops[3]; Ops[0] = CI->getOperand(1); // Extend the amount to i32. - Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty, + Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context), /* isSigned */ false); Ops[2] = Size; ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType()); @@ -840,7 +512,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { } case Intrinsic::flt_rounds: // Lower to "round to the nearest" - if (CI->getType() != Type::VoidTy) + if (CI->getType() != Type::getVoidTy(Context)) CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); break; } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index a163cac75b0f4..4e713a6ed3165 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -15,14 +15,16 @@ #include "llvm/PassManager.h" #include "llvm/Pass.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormattedStream.h" using namespace llvm; namespace llvm { @@ -37,26 +39,31 @@ static cl::opt PrintEmittedAsm("print-emitted-asm", cl::Hidden, cl::desc("Dump emitter generated instructions as assembly")); static cl::opt PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); +static cl::opt HoistConstants("hoist-constants", cl::Hidden, + cl::desc("Hoist constants out of loops")); static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); -// When this works it will be on by default. -static cl::opt -DisablePostRAScheduler("disable-post-RA-scheduler", - cl::desc("Disable scheduling after register allocation"), - cl::init(true)); - // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be -// able to enable or disable fast-isel independently from -fast. +// able to enable or disable fast-isel independently from -O0. static cl::opt EnableFastISelOption("fast-isel", cl::Hidden, - cl::desc("Enable the experimental \"fast\" instruction selector")); + cl::desc("Enable the \"fast\" instruction selector")); + + +LLVMTargetMachine::LLVMTargetMachine(const Target &T, + const std::string &TargetTriple) + : TargetMachine(T) { + AsmInfo = T.createAsmInfo(TargetTriple); +} + + FileModel::Model LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - raw_ostream &Out, + formatted_raw_ostream &Out, CodeGenFileType FileType, CodeGenOpt::Level OptLevel) { // Add common CodeGen passes. @@ -67,10 +74,10 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, PM.add(createDebugLabelFoldingPass()); if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); if (OptLevel != CodeGenOpt::None) PM.add(createCodePlacementOptPass()); @@ -92,6 +99,19 @@ LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return FileModel::Error; } +bool LLVMTargetMachine::addAssemblyEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool Verbose, + formatted_raw_ostream &Out) { + FunctionPass *Printer = + getTarget().createAsmPrinter(Out, *this, getMCAsmInfo(), Verbose); + if (!Printer) + return true; + + PM.add(Printer); + return false; +} + /// addPassesToEmitFileFinish - If the passes to emit the specified file had to /// be split up (e.g., to add an object writer pass), this method can be used to /// finish up adding passes to emit the file, if necessary. @@ -99,13 +119,12 @@ bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, MachineCodeEmitter *MCE, CodeGenOpt::Level OptLevel) { if (MCE) - addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *MCE); + addSimpleCodeEmitter(PM, OptLevel, *MCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); PM.add(createGCInfoDeleter()); - // Delete machine code for this function - PM.add(createMachineCodeDeleter()); - return false; // success! } @@ -116,12 +135,27 @@ bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, JITCodeEmitter *JCE, CodeGenOpt::Level OptLevel) { if (JCE) - addSimpleCodeEmitter(PM, OptLevel, PrintEmittedAsm, *JCE); + addSimpleCodeEmitter(PM, OptLevel, *JCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); PM.add(createGCInfoDeleter()); - // Delete machine code for this function - PM.add(createMachineCodeDeleter()); + return false; // success! +} + +/// addPassesToEmitFileFinish - If the passes to emit the specified file had to +/// be split up (e.g., to add an object writer pass), this method can be used to +/// finish up adding passes to emit the file, if necessary. +bool LLVMTargetMachine::addPassesToEmitFileFinish(PassManagerBase &PM, + ObjectCodeEmitter *OCE, + CodeGenOpt::Level OptLevel) { + if (OCE) + addSimpleCodeEmitter(PM, OptLevel, *OCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); + + PM.add(createGCInfoDeleter()); return false; // success! } @@ -140,15 +174,14 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return true; if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); - addCodeEmitter(PM, OptLevel, PrintEmittedAsm, MCE); + addCodeEmitter(PM, OptLevel, MCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); PM.add(createGCInfoDeleter()); - // Delete machine code for this function - PM.add(createMachineCodeDeleter()); - return false; // success! } @@ -166,22 +199,21 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return true; if (addPreEmitPass(PM, OptLevel) && PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); - addCodeEmitter(PM, OptLevel, PrintEmittedAsm, JCE); + addCodeEmitter(PM, OptLevel, JCE); + if (PrintEmittedAsm) + addAssemblyEmitter(PM, OptLevel, true, ferrs()); PM.add(createGCInfoDeleter()); - // Delete machine code for this function - PM.add(createMachineCodeDeleter()); - return false; // success! } static void printAndVerify(PassManagerBase &PM, bool allowDoubleDefs = false) { if (PrintMachineCode) - PM.add(createMachineFunctionPrinterPass(cerr)); + PM.add(createMachineFunctionPrinterPass(errs())); if (VerifyMachineCode) PM.add(createMachineVerifierPass(allowDoubleDefs)); @@ -203,18 +235,31 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Turn exception handling constructs into something the code generators can // handle. - if (!getTargetAsmInfo()->doesSupportExceptionHandling()) - PM.add(createLowerInvokePass(getTargetLowering())); - else + switch (getMCAsmInfo()->getExceptionHandlingType()) + { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + PM.add(createSjLjEHPass(getTargetLowering())); + break; + case ExceptionHandling::Dwarf: + PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None)); + break; + case ExceptionHandling::None: + PM.add(createLowerInvokePass(getTargetLowering())); + break; + } PM.add(createGCLoweringPass()); // Make sure that no unreachable blocks are instruction selected. PM.add(createUnreachableBlockEliminationPass()); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { + if (HoistConstants) + PM.add(createCodeGenLICMPass()); PM.add(createCodeGenPreparePass(getTargetLowering())); + } PM.add(createStackProtectorPass(getTargetLowering())); @@ -225,6 +270,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // Standard Lower-Level Passes. + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + // Enable FastISel with -fast, but allow that to be overridden. if (EnableFastISelOption == cl::BOU_TRUE || (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) @@ -240,19 +288,21 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { PM.add(createMachineLICMPass()); PM.add(createMachineSinkingPass()); - printAndVerify(PM, /* allowDoubleDefs= */ false); + printAndVerify(PM, /* allowDoubleDefs= */ true); } // Run pre-ra passes. if (addPreRegAlloc(PM, OptLevel)) - printAndVerify(PM); + printAndVerify(PM, /* allowDoubleDefs= */ true); // Perform register allocation. PM.add(createRegisterAllocator()); // Perform stack slot coloring. if (OptLevel != CodeGenOpt::None) - PM.add(createStackSlotColoringPass(OptLevel >= CodeGenOpt::Aggressive)); + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + PM.add(createStackSlotColoringPass(false)); printAndVerify(PM); // Print the register-allocated code @@ -267,8 +317,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createPrologEpilogCodeInserter()); printAndVerify(PM); + // Run pre-sched2 passes. + if (addPreSched2(PM, OptLevel)) + printAndVerify(PM); + // Second pass scheduler. - if (OptLevel != CodeGenOpt::None && !DisablePostRAScheduler) { + if (OptLevel != CodeGenOpt::None) { PM.add(createPostRAScheduler()); printAndVerify(PM); } @@ -283,7 +337,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM); if (PrintGCInfo) - PM.add(createGCInfoPrinter(*cerr)); + PM.add(createGCInfoPrinter(errs())); return false; } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 26722a3ca11a4..a02a4a6c83a1a 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -23,12 +23,16 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include -#include using namespace llvm; +// Print a LiveIndex to a raw_ostream. +void LiveIndex::print(raw_ostream &os) const { + os << (index & ~PHI_BIT); +} + // An example for liveAt(): // // this = [1,4), liveAt(0) will return false. The instruction defining this @@ -36,7 +40,7 @@ using namespace llvm; // variable it represents. This is because slot 1 is used (def slot) and spans // up to slot 3 (store slot). // -bool LiveInterval::liveAt(unsigned I) const { +bool LiveInterval::liveAt(LiveIndex I) const { Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); if (r == ranges.begin()) @@ -49,7 +53,7 @@ bool LiveInterval::liveAt(unsigned I) const { // liveBeforeAndAt - Check if the interval is live at the index and the index // just before it. If index is liveAt, check if it starts a new live range. // If it does, then check if the previous live range ends at index-1. -bool LiveInterval::liveBeforeAndAt(unsigned I) const { +bool LiveInterval::liveBeforeAndAt(LiveIndex I) const { Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I); if (r == ranges.begin()) @@ -127,7 +131,7 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, /// overlaps - Return true if the live interval overlaps a range specified /// by [Start, End). -bool LiveInterval::overlaps(unsigned Start, unsigned End) const { +bool LiveInterval::overlaps(LiveIndex Start, LiveIndex End) const { assert(Start < End && "Invalid range"); const_iterator I = begin(); const_iterator E = end(); @@ -145,10 +149,10 @@ bool LiveInterval::overlaps(unsigned Start, unsigned End) const { /// specified by I to end at the specified endpoint. To do this, we should /// merge and eliminate all ranges that this will overlap with. The iterator is /// not invalidated. -void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { +void LiveInterval::extendIntervalEndTo(Ranges::iterator I, LiveIndex NewEnd) { assert(I != ranges.end() && "Not a valid interval!"); VNInfo *ValNo = I->valno; - unsigned OldEnd = I->end; + LiveIndex OldEnd = I->end; // Search for the first interval that we can't merge with. Ranges::iterator MergeTo = next(I); @@ -163,7 +167,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { ranges.erase(next(I), MergeTo); // Update kill info. - removeKills(ValNo, OldEnd, I->end-1); + ValNo->removeKills(OldEnd, I->end.prevSlot_()); // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. @@ -179,7 +183,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, unsigned NewEnd) { /// specified by I to start at the specified endpoint. To do this, we should /// merge and eliminate all ranges that this will overlap with. LiveInterval::Ranges::iterator -LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) { +LiveInterval::extendIntervalStartTo(Ranges::iterator I, LiveIndex NewStart) { assert(I != ranges.end() && "Not a valid interval!"); VNInfo *ValNo = I->valno; @@ -212,7 +216,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, unsigned NewStart) { LiveInterval::iterator LiveInterval::addRangeFrom(LiveRange LR, iterator From) { - unsigned Start = LR.start, End = LR.end; + LiveIndex Start = LR.start, End = LR.end; iterator it = std::upper_bound(From, ranges.end(), Start); // If the inserted interval starts in the middle or right at the end of @@ -246,7 +250,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { extendIntervalEndTo(it, End); else if (End < it->end) // Overlapping intervals, there might have been a kill here. - removeKill(it->valno, End); + it->valno->removeKill(End); return it; } } else { @@ -262,33 +266,32 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } -/// isInOneLiveRange - Return true if the range specified is entirely in the +/// isInOneLiveRange - Return true if the range specified is entirely in /// a single LiveRange of the live interval. -bool LiveInterval::isInOneLiveRange(unsigned Start, unsigned End) { +bool LiveInterval::isInOneLiveRange(LiveIndex Start, LiveIndex End) { Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); if (I == ranges.begin()) return false; --I; - return I->contains(Start) && I->contains(End-1); + return I->containsRange(Start, End); } /// removeRange - Remove the specified range from this interval. Note that /// the range must be in a single LiveRange in its entirety. -void LiveInterval::removeRange(unsigned Start, unsigned End, +void LiveInterval::removeRange(LiveIndex Start, LiveIndex End, bool RemoveDeadValNo) { // Find the LiveRange containing this span. Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start); assert(I != ranges.begin() && "Range is not in interval!"); --I; - assert(I->contains(Start) && I->contains(End-1) && - "Range is not entirely in interval!"); + assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); // If the span we are removing is at the start of the LiveRange, adjust it. VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { - removeKills(I->valno, Start, End); + ValNo->removeKills(Start, End); if (RemoveDeadValNo) { // Check if val# is dead. bool isDead = true; @@ -322,13 +325,13 @@ void LiveInterval::removeRange(unsigned Start, unsigned End, // Otherwise if the span we are removing is at the end of the LiveRange, // adjust the other way. if (I->end == End) { - removeKills(ValNo, Start, End); + ValNo->removeKills(Start, End); I->end = Start; return; } // Otherwise, we are splitting the LiveRange into two pieces. - unsigned OldEnd = I->end; + LiveIndex OldEnd = I->end; I->end = Start; // Trim the old interval. // Insert the new one. @@ -362,11 +365,12 @@ void LiveInterval::removeValNo(VNInfo *ValNo) { /// scaleNumbering - Renumber VNI and ranges to provide gaps for new /// instructions. + void LiveInterval::scaleNumbering(unsigned factor) { // Scale ranges. for (iterator RI = begin(), RE = end(); RI != RE; ++RI) { - RI->start = InstrSlots::scale(RI->start, factor); - RI->end = InstrSlots::scale(RI->end, factor); + RI->start = RI->start.scale(factor); + RI->end = RI->end.scale(factor); } // Scale VNI info. @@ -374,19 +378,20 @@ void LiveInterval::scaleNumbering(unsigned factor) { VNInfo *vni = *VNI; if (vni->isDefAccurate()) - vni->def = InstrSlots::scale(vni->def, factor); + vni->def = vni->def.scale(factor); for (unsigned i = 0; i < vni->kills.size(); ++i) { - if (vni->kills[i] != 0) - vni->kills[i] = InstrSlots::scale(vni->kills[i], factor); + if (!vni->kills[i].isPHIIndex()) + vni->kills[i] = vni->kills[i].scale(factor); } } } + /// getLiveRangeContaining - Return the live range that contains the /// specified index, or null if there is none. LiveInterval::const_iterator -LiveInterval::FindLiveRangeContaining(unsigned Idx) const { +LiveInterval::FindLiveRangeContaining(LiveIndex Idx) const { const_iterator It = std::upper_bound(begin(), end(), Idx); if (It != ranges.begin()) { --It; @@ -398,7 +403,7 @@ LiveInterval::FindLiveRangeContaining(unsigned Idx) const { } LiveInterval::iterator -LiveInterval::FindLiveRangeContaining(unsigned Idx) { +LiveInterval::FindLiveRangeContaining(LiveIndex Idx) { iterator It = std::upper_bound(begin(), end(), Idx); if (It != begin()) { --It; @@ -409,17 +414,27 @@ LiveInterval::FindLiveRangeContaining(unsigned Idx) { return end(); } -/// findDefinedVNInfo - Find the VNInfo that's defined at the specified index -/// (register interval) or defined by the specified register (stack inteval). -VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const { - VNInfo *VNI = NULL; +/// findDefinedVNInfo - Find the VNInfo defined by the specified +/// index (register interval). +VNInfo *LiveInterval::findDefinedVNInfoForRegInt(LiveIndex Idx) const { for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); - i != e; ++i) - if ((*i)->def == DefIdxOrReg) { - VNI = *i; - break; - } - return VNI; + i != e; ++i) { + if ((*i)->def == Idx) + return *i; + } + + return 0; +} + +/// findDefinedVNInfo - Find the VNInfo defined by the specified +/// register (stack inteval). +VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const { + for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end(); + i != e; ++i) { + if ((*i)->getReg() == reg) + return *i; + } + return 0; } /// join - Join two live intervals (this, and other) together. This applies @@ -502,7 +517,7 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, InsertPos = addRangeFrom(*I, InsertPos); } - weight += Other.weight; + ComputeJoinedWeight(Other); // Update regalloc hint if currently there isn't one. if (TargetRegisterInfo::isVirtualRegister(reg) && @@ -546,7 +561,7 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { if (I->valno != RHSValNo) continue; - unsigned Start = I->start, End = I->end; + LiveIndex Start = I->start, End = I->end; IP = std::upper_bound(IP, end(), Start); // If the start of this range overlaps with an existing liverange, trim it. if (IP != begin() && IP[-1].end > Start) { @@ -622,20 +637,21 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers, else if (UnusedValNo) ClobberValNo = UnusedValNo; else { - UnusedValNo = ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator); + UnusedValNo = ClobberValNo = + getNextValue(LiveIndex(), 0, false, VNInfoAllocator); ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo)); } bool Done = false; - unsigned Start = I->start, End = I->end; + LiveIndex Start = I->start, End = I->end; // If a clobber range starts before an existing range and ends after // it, the clobber range will need to be split into multiple ranges. // Loop until the entire clobber range is handled. while (!Done) { Done = true; IP = std::upper_bound(IP, end(), Start); - unsigned SubRangeStart = Start; - unsigned SubRangeEnd = End; + LiveIndex SubRangeStart = Start; + LiveIndex SubRangeEnd = End; // If the start of this range overlaps with an existing liverange, trim it. if (IP != begin() && IP[-1].end > SubRangeStart) { @@ -671,11 +687,13 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers, } } -void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End, +void LiveInterval::MergeInClobberRange(LiveIndex Start, + LiveIndex End, BumpPtrAllocator &VNInfoAllocator) { // Find a value # to use for the clobber ranges. If there is already a value# // for unknown values, use it. - VNInfo *ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator); + VNInfo *ClobberValNo = + getNextValue(LiveIndex(), 0, false, VNInfoAllocator); iterator IP = begin(); IP = std::upper_bound(IP, end(), Start); @@ -711,7 +729,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { // Make sure V2 is smaller than V1. if (V1->id < V2->id) { - copyValNumInfo(V1, V2); + V1->copyFrom(*V2); std::swap(V1, V2); } @@ -788,20 +806,42 @@ void LiveInterval::Copy(const LiveInterval &RHS, unsigned LiveInterval::getSize() const { unsigned Sum = 0; for (const_iterator I = begin(), E = end(); I != E; ++I) - Sum += I->end - I->start; + Sum += I->start.distance(I->end); return Sum; } -std::ostream& llvm::operator<<(std::ostream& os, const LiveRange &LR) { +/// ComputeJoinedWeight - Set the weight of a live interval Joined +/// after Other has been merged into it. +void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) { + // If either of these intervals was spilled, the weight is the + // weight of the non-spilled interval. This can only happen with + // iterative coalescers. + + if (Other.weight != HUGE_VALF) { + weight += Other.weight; + } + else if (weight == HUGE_VALF && + !TargetRegisterInfo::isPhysicalRegister(reg)) { + // Remove this assert if you have an iterative coalescer + assert(0 && "Joining to spilled interval"); + weight = Other.weight; + } + else { + // Otherwise the weight stays the same + // Remove this assert if you have an iterative coalescer + assert(0 && "Joining from spilled interval"); + } +} + +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; } void LiveRange::dump() const { - cerr << *this << "\n"; + errs() << *this << "\n"; } -void LiveInterval::print(std::ostream &OS, - const TargetRegisterInfo *TRI) const { +void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { if (isStackSlot()) OS << "SS#" << getStackSlotIndex(); else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg)) @@ -841,6 +881,8 @@ void LiveInterval::print(std::ostream &OS, OS << "-("; for (unsigned j = 0; j != ee; ++j) { OS << vni->kills[j]; + if (vni->kills[j].isPHIIndex()) + OS << "*"; if (j != ee-1) OS << " "; } @@ -857,10 +899,10 @@ void LiveInterval::print(std::ostream &OS, } void LiveInterval::dump() const { - cerr << *this << "\n"; + errs() << *this << "\n"; } -void LiveRange::print(std::ostream &os) const { +void LiveRange::print(raw_ostream &os) const { os << *this; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 52a30bc067955..93d3d4c83896c 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -34,6 +35,8 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -47,24 +50,24 @@ using namespace llvm; static cl::opt DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); -static cl::opt SplitAtBB("split-intervals-at-bb", - cl::init(true), cl::Hidden); -static cl::opt SplitLimit("split-limit", - cl::init(-1), cl::Hidden); - -static cl::opt EnableAggressiveRemat("aggressive-remat", cl::Hidden); - static cl::opt EnableFastSpilling("fast-spill", cl::init(false), cl::Hidden); -STATISTIC(numIntervals, "Number of original intervals"); -STATISTIC(numFolds , "Number of loads/stores folded into instructions"); -STATISTIC(numSplits , "Number of intervals split"); +static cl::opt EarlyCoalescing("early-coalescing", cl::init(false)); + +static cl::opt CoalescingLimit("early-coalescing-limit", + cl::init(-1), cl::Hidden); + +STATISTIC(numIntervals , "Number of original intervals"); +STATISTIC(numFolds , "Number of loads/stores folded into instructions"); +STATISTIC(numSplits , "Number of intervals split"); +STATISTIC(numCoalescing, "Number of early coalescing performed"); char LiveIntervals::ID = 0; static RegisterPass X("liveintervals", "Live Interval Analysis"); void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -92,15 +95,32 @@ void LiveIntervals::releaseMemory() { mi2iMap_.clear(); i2miMap_.clear(); r2iMap_.clear(); + terminatorGaps.clear(); + phiJoinCopies.clear(); + // Release VNInfo memroy regions after all VNInfo objects are dtor'd. VNInfoAllocator.Reset(); - while (!ClonedMIs.empty()) { - MachineInstr *MI = ClonedMIs.back(); - ClonedMIs.pop_back(); + while (!CloneMIs.empty()) { + MachineInstr *MI = CloneMIs.back(); + CloneMIs.pop_back(); mf_->DeleteMachineInstr(MI); } } +static bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, + unsigned OpIdx, const TargetInstrInfo *tii_){ + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + Reg == SrcReg) + return true; + + if (OpIdx == 2 && MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + return true; + if (OpIdx == 1 && MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) + return true; + return false; +} + /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure /// there is one implicit_def for each use. Add isUndef marker to /// implicit_def defs and their uses. @@ -119,16 +139,33 @@ void LiveIntervals::processImplicitDefs() { ++I; if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { unsigned Reg = MI->getOperand(0).getReg(); - MI->getOperand(0).setIsUndef(); ImpDefRegs.insert(Reg); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS) + ImpDefRegs.insert(*SS); + } ImpDefMIs.push_back(MI); continue; } + if (MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) { + MachineOperand &MO = MI->getOperand(2); + if (ImpDefRegs.count(MO.getReg())) { + // %reg1032 = INSERT_SUBREG %reg1032, undef, 2 + // This is an identity copy, eliminate it now. + if (MO.isKill()) { + LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); + vi.removeKill(MI); + } + MI->eraseFromParent(); + continue; + } + } + bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -136,22 +173,30 @@ void LiveIntervals::processImplicitDefs() { if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg) { + if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) { bool isKill = MO.isKill(); MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); - if (isKill) + if (isKill) { ImpDefRegs.erase(Reg); + LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + vi.removeKill(MI); + } ChangedToImpDef = true; break; } MO.setIsUndef(); - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { + // Make sure other uses of + for (unsigned j = i+1; j != e; ++j) { + MachineOperand &MOJ = MI->getOperand(j); + if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg) + MOJ.setIsUndef(); + } ImpDefRegs.erase(Reg); + } } if (ChangedToImpDef) { @@ -171,11 +216,13 @@ void LiveIntervals::processImplicitDefs() { for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) { MachineInstr *MI = ImpDefMIs[i]; unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - // Physical registers are not liveout (yet). - continue; - if (!ImpDefRegs.count(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + !ImpDefRegs.count(Reg)) { + // Delete all "local" implicit_def's. That include those which define + // physical registers since they cannot be liveout. + MI->eraseFromParent(); continue; + } // If there are multiple defs of the same register and at least one // is not an implicit_def, do not insert implicit_def's before the @@ -191,6 +238,10 @@ void LiveIntervals::processImplicitDefs() { if (Skip) continue; + // The only implicit_def which we want to keep are those that are live + // out of its block. + MI->eraseFromParent(); + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), UE = mri_->use_end(); UI != UE; ) { MachineOperand &RMO = UI.getOperand(); @@ -199,12 +250,19 @@ void LiveIntervals::processImplicitDefs() { MachineBasicBlock *RMBB = RMI->getParent(); if (RMBB == MBB) continue; + + // Turn a copy use into an implicit_def. + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + Reg == SrcReg) { + RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); + for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j) + RMI->RemoveOperand(j); + continue; + } + const TargetRegisterClass* RC = mri_->getRegClass(Reg); unsigned NewVReg = mri_->createVirtualRegister(RC); - MachineInstrBuilder MIB = - BuildMI(*RMBB, RMI, RMI->getDebugLoc(), - tii_->get(TargetInstrInfo::IMPLICIT_DEF), NewVReg); - (*MIB).getOperand(0).setIsUndef(); RMO.setReg(NewVReg); RMO.setIsUndef(); RMO.setIsKill(); @@ -215,6 +273,7 @@ void LiveIntervals::processImplicitDefs() { } } + void LiveIntervals::computeNumbering() { Index2MiMap OldI2MI = i2miMap_; std::vector OldI2MBB = Idx2MBBMap; @@ -223,44 +282,79 @@ void LiveIntervals::computeNumbering() { MBB2IdxMap.clear(); mi2iMap_.clear(); i2miMap_.clear(); + terminatorGaps.clear(); + phiJoinCopies.clear(); FunctionSize = 0; // Number MachineInstrs and MachineBasicBlocks. // Initialize MBB indexes to a sentinal. - MBB2IdxMap.resize(mf_->getNumBlockIDs(), std::make_pair(~0U,~0U)); + MBB2IdxMap.resize(mf_->getNumBlockIDs(), + std::make_pair(LiveIndex(),LiveIndex())); - unsigned MIIndex = 0; + LiveIndex MIIndex; for (MachineFunction::iterator MBB = mf_->begin(), E = mf_->end(); MBB != E; ++MBB) { - unsigned StartIdx = MIIndex; + LiveIndex StartIdx = MIIndex; // Insert an empty slot at the beginning of each block. - MIIndex += InstrSlots::NUM; + MIIndex = getNextIndex(MIIndex); i2miMap_.push_back(0); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { + + if (I == MBB->getFirstTerminator()) { + // Leave a gap for before terminators, this is where we will point + // PHI kills. + LiveIndex tGap(true, MIIndex); + bool inserted = + terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second; + assert(inserted && + "Multiple 'first' terminators encountered during numbering."); + inserted = inserted; // Avoid compiler warning if assertions turned off. + i2miMap_.push_back(0); + + MIIndex = getNextIndex(MIIndex); + } + bool inserted = mi2iMap_.insert(std::make_pair(I, MIIndex)).second; assert(inserted && "multiple MachineInstr -> index mappings"); inserted = true; i2miMap_.push_back(I); - MIIndex += InstrSlots::NUM; + MIIndex = getNextIndex(MIIndex); FunctionSize++; // Insert max(1, numdefs) empty slots after every instruction. unsigned Slots = I->getDesc().getNumDefs(); if (Slots == 0) Slots = 1; - MIIndex += InstrSlots::NUM * Slots; - while (Slots--) + while (Slots--) { + MIIndex = getNextIndex(MIIndex); i2miMap_.push_back(0); + } + + } + + if (MBB->getFirstTerminator() == MBB->end()) { + // Leave a gap for before terminators, this is where we will point + // PHI kills. + LiveIndex tGap(true, MIIndex); + bool inserted = + terminatorGaps.insert(std::make_pair(&*MBB, tGap)).second; + assert(inserted && + "Multiple 'first' terminators encountered during numbering."); + inserted = inserted; // Avoid compiler warning if assertions turned off. + i2miMap_.push_back(0); + + MIIndex = getNextIndex(MIIndex); } // Set the MBB2IdxMap entry for this MBB. - MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, MIIndex - 1); + MBB2IdxMap[MBB->getNumber()] = std::make_pair(StartIdx, getPrevSlot(MIIndex)); Idx2MBBMap.push_back(std::make_pair(StartIdx, MBB)); } + std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare()); if (!OldI2MI.empty()) @@ -272,9 +366,9 @@ void LiveIntervals::computeNumbering() { // number, or our best guess at what it _should_ correspond to if the // original instruction has been erased. This is either the following // instruction or its predecessor. - unsigned index = LI->start / InstrSlots::NUM; - unsigned offset = LI->start % InstrSlots::NUM; - if (offset == InstrSlots::LOAD) { + unsigned index = LI->start.getVecIndex(); + LiveIndex::Slot offset = LI->start.getSlot(); + if (LI->start.isLoad()) { std::vector::const_iterator I = std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->start); // Take the pair containing the index @@ -283,29 +377,34 @@ void LiveIntervals::computeNumbering() { LI->start = getMBBStartIdx(J->second); } else { - LI->start = mi2iMap_[OldI2MI[index]] + offset; + LI->start = LiveIndex( + LiveIndex(mi2iMap_[OldI2MI[index]]), + (LiveIndex::Slot)offset); } // Remap the ending index in the same way that we remapped the start, // except for the final step where we always map to the immediately // following instruction. - index = (LI->end - 1) / InstrSlots::NUM; - offset = LI->end % InstrSlots::NUM; - if (offset == InstrSlots::LOAD) { + index = (getPrevSlot(LI->end)).getVecIndex(); + offset = LI->end.getSlot(); + if (LI->end.isLoad()) { // VReg dies at end of block. std::vector::const_iterator I = std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), LI->end); --I; - LI->end = getMBBEndIdx(I->second) + 1; + LI->end = getNextSlot(getMBBEndIdx(I->second)); } else { unsigned idx = index; while (index < OldI2MI.size() && !OldI2MI[index]) ++index; if (index != OldI2MI.size()) - LI->end = mi2iMap_[OldI2MI[index]] + (idx == index ? offset : 0); + LI->end = + LiveIndex(mi2iMap_[OldI2MI[index]], + (idx == index ? offset : LiveIndex::LOAD)); else - LI->end = InstrSlots::NUM * i2miMap_.size(); + LI->end = + LiveIndex(LiveIndex::NUM * i2miMap_.size()); } } @@ -317,9 +416,9 @@ void LiveIntervals::computeNumbering() { // start indices above. VN's with special sentinel defs // don't need to be remapped. if (vni->isDefAccurate() && !vni->isUnused()) { - unsigned index = vni->def / InstrSlots::NUM; - unsigned offset = vni->def % InstrSlots::NUM; - if (offset == InstrSlots::LOAD) { + unsigned index = vni->def.getVecIndex(); + LiveIndex::Slot offset = vni->def.getSlot(); + if (vni->def.isLoad()) { std::vector::const_iterator I = std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->def); // Take the pair containing the index @@ -328,25 +427,36 @@ void LiveIntervals::computeNumbering() { vni->def = getMBBStartIdx(J->second); } else { - vni->def = mi2iMap_[OldI2MI[index]] + offset; + vni->def = LiveIndex(mi2iMap_[OldI2MI[index]], offset); } } // Remap the VNInfo kill indices, which works the same as // the end indices above. for (size_t i = 0; i < vni->kills.size(); ++i) { - // PHI kills don't need to be remapped. - if (!vni->kills[i]) continue; - - unsigned index = (vni->kills[i]-1) / InstrSlots::NUM; - unsigned offset = vni->kills[i] % InstrSlots::NUM; - if (offset == InstrSlots::LOAD) { - std::vector::const_iterator I = + unsigned index = getPrevSlot(vni->kills[i]).getVecIndex(); + LiveIndex::Slot offset = vni->kills[i].getSlot(); + + if (vni->kills[i].isLoad()) { + assert("Value killed at a load slot."); + /*std::vector::const_iterator I = std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]); --I; - vni->kills[i] = getMBBEndIdx(I->second); + vni->kills[i] = getMBBEndIdx(I->second);*/ } else { + if (vni->kills[i].isPHIIndex()) { + std::vector::const_iterator I = + std::lower_bound(OldI2MBB.begin(), OldI2MBB.end(), vni->kills[i]); + --I; + vni->kills[i] = terminatorGaps[I->second]; + } else { + assert(OldI2MI[index] != 0 && + "Kill refers to instruction not present in index maps."); + vni->kills[i] = LiveIndex(mi2iMap_[OldI2MI[index]], offset); + } + + /* unsigned idx = index; while (index < OldI2MI.size() && !OldI2MI[index]) ++index; @@ -355,6 +465,7 @@ void LiveIntervals::computeNumbering() { (idx == index ? offset : 0); else vni->kills[i] = InstrSlots::NUM * i2miMap_.size(); + */ } } } @@ -372,13 +483,20 @@ void LiveIntervals::scaleNumbering(int factor) { Idx2MBBMap.clear(); for (MachineFunction::iterator MBB = mf_->begin(), MBBE = mf_->end(); MBB != MBBE; ++MBB) { - std::pair &mbbIndices = MBB2IdxMap[MBB->getNumber()]; - mbbIndices.first = InstrSlots::scale(mbbIndices.first, factor); - mbbIndices.second = InstrSlots::scale(mbbIndices.second, factor); + std::pair &mbbIndices = MBB2IdxMap[MBB->getNumber()]; + mbbIndices.first = mbbIndices.first.scale(factor); + mbbIndices.second = mbbIndices.second.scale(factor); Idx2MBBMap.push_back(std::make_pair(mbbIndices.first, MBB)); } std::sort(Idx2MBBMap.begin(), Idx2MBBMap.end(), Idx2MBBCompare()); + // Scale terminator gaps. + for (DenseMap::iterator + TGI = terminatorGaps.begin(), TGE = terminatorGaps.end(); + TGI != TGE; ++TGI) { + terminatorGaps[TGI->first] = TGI->second.scale(factor); + } + // Scale the intervals. for (iterator LI = begin(), LE = end(); LI != LE; ++LI) { LI->second->scaleNumbering(factor); @@ -386,19 +504,20 @@ void LiveIntervals::scaleNumbering(int factor) { // Scale MachineInstrs. Mi2IndexMap oldmi2iMap = mi2iMap_; - unsigned highestSlot = 0; + LiveIndex highestSlot; for (Mi2IndexMap::iterator MI = oldmi2iMap.begin(), ME = oldmi2iMap.end(); MI != ME; ++MI) { - unsigned newSlot = InstrSlots::scale(MI->second, factor); + LiveIndex newSlot = MI->second.scale(factor); mi2iMap_[MI->first] = newSlot; highestSlot = std::max(highestSlot, newSlot); } + unsigned highestVIndex = highestSlot.getVecIndex(); i2miMap_.clear(); - i2miMap_.resize(highestSlot + 1); + i2miMap_.resize(highestVIndex + 1); for (Mi2IndexMap::iterator MI = mi2iMap_.begin(), ME = mi2iMap_.end(); MI != ME; ++MI) { - i2miMap_[MI->second] = MI->first; + i2miMap_[MI->second.getVecIndex()] = const_cast(MI->first); } } @@ -419,6 +538,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { processImplicitDefs(); computeNumbering(); computeIntervals(); + performEarlyCoalescing(); numIntervals += getNumIntervals(); @@ -427,36 +547,45 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { } /// print - Implement the dump method. -void LiveIntervals::print(std::ostream &O, const Module* ) const { - O << "********** INTERVALS **********\n"; +void LiveIntervals::print(raw_ostream &OS, const Module* ) const { + OS << "********** INTERVALS **********\n"; for (const_iterator I = begin(), E = end(); I != E; ++I) { - I->second->print(O, tri_); - O << "\n"; + I->second->print(OS, tri_); + OS << "\n"; } - O << "********** MACHINEINSTRS **********\n"; + printInstrs(OS); +} + +void LiveIntervals::printInstrs(raw_ostream &OS) const { + OS << "********** MACHINEINSTRS **********\n"; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); mbbi != mbbe; ++mbbi) { - O << ((Value*)mbbi->getBasicBlock())->getName() << ":\n"; + OS << ((Value*)mbbi->getBasicBlock())->getName() << ":\n"; for (MachineBasicBlock::iterator mii = mbbi->begin(), mie = mbbi->end(); mii != mie; ++mii) { - O << getInstructionIndex(mii) << '\t' << *mii; + OS << getInstructionIndex(mii) << '\t' << *mii; } } } +void LiveIntervals::dumpInstrs() const { + printInstrs(errs()); +} + /// conflictsWithPhysRegDef - Returns true if the specified register /// is defined during the duration of the specified interval. bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li, VirtRegMap &vrm, unsigned reg) { for (LiveInterval::Ranges::const_iterator I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (unsigned index = getBaseIndex(I->start), - end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; - index += InstrSlots::NUM) { + for (LiveIndex index = getBaseIndex(I->start), + end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end; + index = getNextIndex(index)) { // skip deleted instructions while (index != end && !getInstructionFromIndex(index)) - index += InstrSlots::NUM; + index = getNextIndex(index); if (index == end) break; MachineInstr *MI = getInstructionFromIndex(index); @@ -492,16 +621,16 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, SmallPtrSet &JoinedCopies) { for (LiveInterval::Ranges::const_iterator I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (unsigned index = getBaseIndex(I->start), - end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; - index += InstrSlots::NUM) { + for (LiveIndex index = getBaseIndex(I->start), + end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); index != end; + index = getNextIndex(index)) { // Skip deleted instructions. MachineInstr *MI = 0; while (index != end) { MI = getInstructionFromIndex(index); if (MI) break; - index += InstrSlots::NUM; + index = getNextIndex(index); } if (index == end) break; @@ -525,35 +654,36 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li, return false; } - -void LiveIntervals::printRegName(unsigned reg) const { +#ifndef NDEBUG +static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) { if (TargetRegisterInfo::isPhysicalRegister(reg)) - cerr << tri_->getName(reg); + errs() << tri_->getName(reg); else - cerr << "%reg" << reg; + errs() << "%reg" << reg; } +#endif void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineBasicBlock::iterator mi, - unsigned MIIdx, MachineOperand& MO, + LiveIndex MIIdx, + MachineOperand& MO, unsigned MOIdx, LiveInterval &interval) { - DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); - LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); - - if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) { - DOUT << "is a implicit_def\n"; - return; - } + DEBUG({ + errs() << "\t\tregister: "; + printRegName(interval.reg, tri_); + }); // Virtual registers may be defined multiple times (due to phi // elimination and 2-addr elimination). Much of what we do only has to be // done once for the vreg. We use an empty interval to detect the first // time we see a vreg. + LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. - unsigned defIndex = getDefIndex(MIIdx); - // Earlyclobbers move back one. + LiveIndex defIndex = getDefIndex(MIIdx); + // Earlyclobbers move back one, so that they overlap the live range + // of inputs. if (MO.isEarlyClobber()) defIndex = getUseIndex(MIIdx); VNInfo *ValNo; @@ -575,11 +705,16 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // will be a single kill, in MBB, which comes after the definition. if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { // FIXME: what about dead vars? - unsigned killIdx; + LiveIndex killIdx; if (vi.Kills[0] != mi) - killIdx = getUseIndex(getInstructionIndex(vi.Kills[0]))+1; + killIdx = getNextSlot(getUseIndex(getInstructionIndex(vi.Kills[0]))); + else if (MO.isEarlyClobber()) + // Earlyclobbers that die in this instruction move up one extra, to + // compensate for having the starting point moved back one. This + // gets them to overlap the live range of other outputs. + killIdx = getNextSlot(getNextSlot(defIndex)); else - killIdx = defIndex+1; + killIdx = getNextSlot(defIndex); // If the kill happens after the definition, we have an intra-block // live range. @@ -588,8 +723,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, "Shouldn't be alive across any blocks!"); LiveRange LR(defIndex, killIdx, ValNo); interval.addRange(LR); - DOUT << " +" << LR << "\n"; - interval.addKill(ValNo, killIdx); + DEBUG(errs() << " +" << LR << "\n"); + ValNo->addKill(killIdx); return; } } @@ -598,8 +733,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // of the defining block, potentially live across some blocks, then is // live into some number of blocks, but gets killed. Start by adding a // range that goes from this definition to the end of the defining block. - LiveRange NewLR(defIndex, getMBBEndIdx(mbb)+1, ValNo); - DOUT << " +" << NewLR; + LiveRange NewLR(defIndex, getNextSlot(getMBBEndIdx(mbb)), ValNo); + DEBUG(errs() << " +" << NewLR); interval.addRange(NewLR); // Iterate over all of the blocks that the variable is completely @@ -608,22 +743,22 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), E = vi.AliveBlocks.end(); I != E; ++I) { LiveRange LR(getMBBStartIdx(*I), - getMBBEndIdx(*I)+1, // MBB ends at -1. + getNextSlot(getMBBEndIdx(*I)), // MBB ends at -1. ValNo); interval.addRange(LR); - DOUT << " +" << LR; + DEBUG(errs() << " +" << LR); } // Finally, this virtual register is live from the start of any killing // block to the 'use' slot of the killing instruction. for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { MachineInstr *Kill = vi.Kills[i]; - unsigned killIdx = getUseIndex(getInstructionIndex(Kill))+1; - LiveRange LR(getMBBStartIdx(Kill->getParent()), - killIdx, ValNo); + LiveIndex killIdx = + getNextSlot(getUseIndex(getInstructionIndex(Kill))); + LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo); interval.addRange(LR); - interval.addKill(ValNo, killIdx); - DOUT << " +" << LR; + ValNo->addKill(killIdx); + DEBUG(errs() << " +" << LR); } } else { @@ -638,12 +773,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // need to take the LiveRegion that defines this register and split it // into two values. assert(interval.containsOneValue()); - unsigned DefIndex = getDefIndex(interval.getValNumInfo(0)->def); - unsigned RedefIndex = getDefIndex(MIIdx); + LiveIndex DefIndex = getDefIndex(interval.getValNumInfo(0)->def); + LiveIndex RedefIndex = getDefIndex(MIIdx); if (MO.isEarlyClobber()) RedefIndex = getUseIndex(MIIdx); - const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex-1); + const LiveRange *OldLR = + interval.getLiveRangeContaining(getPrevSlot(RedefIndex)); VNInfo *OldValNo = OldLR->valno; // Delete the initial value, which should be short and continuous, @@ -656,68 +792,85 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // The new value number (#1) is defined by the instruction we claimed // defined value #0. - VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy, + VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(), false, // update at * VNInfoAllocator); ValNo->setFlags(OldValNo->getFlags()); // * <- updating here // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; - OldValNo->copy = 0; + OldValNo->setCopy(0); if (MO.isEarlyClobber()) OldValNo->setHasRedefByEC(true); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); - DOUT << " replace range with " << LR; + DEBUG(errs() << " replace range with " << LR); interval.addRange(LR); - interval.addKill(ValNo, RedefIndex); + ValNo->addKill(RedefIndex); // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex+1, OldValNo)); - - DOUT << " RESULT: "; - interval.print(DOUT, tri_); - + interval.addRange( + LiveRange(RedefIndex, MO.isEarlyClobber() ? + getNextSlot(getNextSlot(RedefIndex)) : + getNextSlot(RedefIndex), OldValNo)); + + DEBUG({ + errs() << " RESULT: "; + interval.print(errs(), tri_); + }); } else { // Otherwise, this must be because of phi elimination. If this is the // first redefinition of the vreg that we have seen, go back and change // the live range in the PHI block to be a different value number. if (interval.containsOneValue()) { - assert(vi.Kills.size() == 1 && - "PHI elimination vreg should have one kill, the PHI itself!"); - // Remove the old range that we now know has an incorrect number. VNInfo *VNI = interval.getValNumInfo(0); MachineInstr *Killer = vi.Kills[0]; - unsigned Start = getMBBStartIdx(Killer->getParent()); - unsigned End = getUseIndex(getInstructionIndex(Killer))+1; - DOUT << " Removing [" << Start << "," << End << "] from: "; - interval.print(DOUT, tri_); DOUT << "\n"; - interval.removeRange(Start, End); + phiJoinCopies.push_back(Killer); + LiveIndex Start = getMBBStartIdx(Killer->getParent()); + LiveIndex End = + getNextSlot(getUseIndex(getInstructionIndex(Killer))); + DEBUG({ + errs() << " Removing [" << Start << "," << End << "] from: "; + interval.print(errs(), tri_); + errs() << "\n"; + }); + interval.removeRange(Start, End); + assert(interval.ranges.size() == 1 && + "Newly discovered PHI interval has >1 ranges."); + MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex()); + VNI->addKill(terminatorGaps[killMBB]); VNI->setHasPHIKill(true); - DOUT << " RESULT: "; interval.print(DOUT, tri_); + DEBUG({ + errs() << " RESULT: "; + interval.print(errs(), tri_); + }); // Replace the interval with one of a NEW value number. Note that this // value number isn't actually defined by an instruction, weird huh? :) LiveRange LR(Start, End, - interval.getNextValue(mbb->getNumber(), 0, false, VNInfoAllocator)); + interval.getNextValue(LiveIndex(mbb->getNumber()), + 0, false, VNInfoAllocator)); LR.valno->setIsPHIDef(true); - DOUT << " replace range with " << LR; + DEBUG(errs() << " replace range with " << LR); interval.addRange(LR); - interval.addKill(LR.valno, End); - DOUT << " RESULT: "; interval.print(DOUT, tri_); + LR.valno->addKill(End); + DEBUG({ + errs() << " RESULT: "; + interval.print(errs(), tri_); + }); } // In the case of PHI elimination, each variable definition is only // live until the end of the block. We've already taken care of the // rest of the live range. - unsigned defIndex = getDefIndex(MIIdx); + LiveIndex defIndex = getDefIndex(MIIdx); if (MO.isEarlyClobber()) defIndex = getUseIndex(MIIdx); - + VNInfo *ValNo; MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; @@ -728,55 +881,63 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); - unsigned killIndex = getMBBEndIdx(mbb) + 1; + LiveIndex killIndex = getNextSlot(getMBBEndIdx(mbb)); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - interval.addKill(ValNo, killIndex); + ValNo->addKill(terminatorGaps[mbb]); ValNo->setHasPHIKill(true); - DOUT << " +" << LR; + DEBUG(errs() << " +" << LR); } } - DOUT << '\n'; + DEBUG(errs() << '\n'); } void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator mi, - unsigned MIIdx, + LiveIndex MIIdx, MachineOperand& MO, LiveInterval &interval, MachineInstr *CopyMI) { // A physical register cannot be live across basic block, so its // lifetime must end somewhere in its defining basic block. - DOUT << "\t\tregister: "; DEBUG(printRegName(interval.reg)); + DEBUG({ + errs() << "\t\tregister: "; + printRegName(interval.reg, tri_); + }); - unsigned baseIndex = MIIdx; - unsigned start = getDefIndex(baseIndex); + LiveIndex baseIndex = MIIdx; + LiveIndex start = getDefIndex(baseIndex); // Earlyclobbers move back one. if (MO.isEarlyClobber()) start = getUseIndex(MIIdx); - unsigned end = start; + LiveIndex end = start; // If it is not used after definition, it is considered dead at // the instruction defining it. Hence its interval is: // [defSlot(def), defSlot(def)+1) + // For earlyclobbers, the defSlot was pushed back one; the extra + // advance below compensates. if (MO.isDead()) { - DOUT << " dead"; - end = start + 1; + DEBUG(errs() << " dead"); + if (MO.isEarlyClobber()) + end = getNextSlot(getNextSlot(start)); + else + end = getNextSlot(start); goto exit; } // If it is not dead on definition, it must be killed by a // subsequent instruction. Hence its interval is: // [defSlot(def), useSlot(kill)+1) - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); while (++mi != MBB->end()) { - while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + while (baseIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(baseIndex) == 0) - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); if (mi->killsRegister(interval.reg, tri_)) { - DOUT << " killed"; - end = getUseIndex(baseIndex) + 1; + DEBUG(errs() << " killed"); + end = getNextSlot(getUseIndex(baseIndex)); goto exit; } else { int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_); @@ -791,21 +952,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) - DOUT << " dead"; - end = start + 1; + DEBUG(errs() << " dead"); + end = getNextSlot(start); } goto exit; } } - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); } // The only case we should have a dead physreg here without a killing or // instruction where we know it's dead is if it is live-in to the function // and never used. Another possible case is the implicit use of the // physical register has been deleted by two-address pass. - end = start + 1; + end = getNextSlot(start); exit: assert(start < end && "did not find end of interval?"); @@ -819,13 +980,13 @@ exit: ValNo->setHasRedefByEC(true); LiveRange LR(start, end, ValNo); interval.addRange(LR); - interval.addKill(LR.valno, end); - DOUT << " +" << LR << '\n'; + LR.valno->addKill(end); + DEBUG(errs() << " +" << LR << '\n'); } void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, MachineBasicBlock::iterator MI, - unsigned MIIdx, + LiveIndex MIIdx, MachineOperand& MO, unsigned MOIdx) { if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) @@ -852,25 +1013,28 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, } void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, - unsigned MIIdx, + LiveIndex MIIdx, LiveInterval &interval, bool isAlias) { - DOUT << "\t\tlivein register: "; DEBUG(printRegName(interval.reg)); + DEBUG({ + errs() << "\t\tlivein register: "; + printRegName(interval.reg, tri_); + }); // Look for kills, if it reaches a def before it's killed, then it shouldn't // be considered a livein. MachineBasicBlock::iterator mi = MBB->begin(); - unsigned baseIndex = MIIdx; - unsigned start = baseIndex; - while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + LiveIndex baseIndex = MIIdx; + LiveIndex start = baseIndex; + while (baseIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(baseIndex) == 0) - baseIndex += InstrSlots::NUM; - unsigned end = baseIndex; + baseIndex = getNextIndex(baseIndex); + LiveIndex end = baseIndex; bool SeenDefUse = false; while (mi != MBB->end()) { if (mi->killsRegister(interval.reg, tri_)) { - DOUT << " killed"; - end = getUseIndex(baseIndex) + 1; + DEBUG(errs() << " killed"); + end = getNextSlot(getUseIndex(baseIndex)); SeenDefUse = true; break; } else if (mi->modifiesRegister(interval.reg, tri_)) { @@ -878,40 +1042,167 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Then the register is essentially dead at the instruction that defines // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) - DOUT << " dead"; - end = getDefIndex(start) + 1; + DEBUG(errs() << " dead"); + end = getNextSlot(getDefIndex(start)); SeenDefUse = true; break; } - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); ++mi; if (mi != MBB->end()) { - while (baseIndex / InstrSlots::NUM < i2miMap_.size() && + while (baseIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(baseIndex) == 0) - baseIndex += InstrSlots::NUM; + baseIndex = getNextIndex(baseIndex); } } // Live-in register might not be used at all. if (!SeenDefUse) { if (isAlias) { - DOUT << " dead"; - end = getDefIndex(MIIdx) + 1; + DEBUG(errs() << " dead"); + end = getNextSlot(getDefIndex(MIIdx)); } else { - DOUT << " live through"; + DEBUG(errs() << " live through"); end = baseIndex; } } VNInfo *vni = - interval.getNextValue(MBB->getNumber(), 0, false, VNInfoAllocator); + interval.getNextValue(LiveIndex(MBB->getNumber()), + 0, false, VNInfoAllocator); vni->setIsPHIDef(true); LiveRange LR(start, end, vni); interval.addRange(LR); - interval.addKill(LR.valno, end); - DOUT << " +" << LR << '\n'; + LR.valno->addKill(end); + DEBUG(errs() << " +" << LR << '\n'); +} + +bool +LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt, + SmallVector &IdentCopies, + SmallVector &OtherCopies) { + bool HaveConflict = false; + unsigned NumIdent = 0; + for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg), + re = mri_->def_end(); ri != re; ++ri) { + MachineInstr *MI = &*ri; + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + return false; + if (SrcReg != DstInt.reg) { + OtherCopies.push_back(MI); + HaveConflict |= DstInt.liveAt(getInstructionIndex(MI)); + } else { + IdentCopies.push_back(MI); + ++NumIdent; + } + } + + if (!HaveConflict) + return false; // Let coalescer handle it + return IdentCopies.size() > OtherCopies.size(); +} + +void LiveIntervals::performEarlyCoalescing() { + if (!EarlyCoalescing) + return; + + /// Perform early coalescing: eliminate copies which feed into phi joins + /// and whose sources are defined by the phi joins. + for (unsigned i = 0, e = phiJoinCopies.size(); i != e; ++i) { + MachineInstr *Join = phiJoinCopies[i]; + if (CoalescingLimit != -1 && (int)numCoalescing == CoalescingLimit) + break; + + unsigned PHISrc, PHIDst, SrcSubReg, DstSubReg; + bool isMove= tii_->isMoveInstr(*Join, PHISrc, PHIDst, SrcSubReg, DstSubReg); +#ifndef NDEBUG + assert(isMove && "PHI join instruction must be a move!"); +#else + isMove = isMove; +#endif + + LiveInterval &DstInt = getInterval(PHIDst); + LiveInterval &SrcInt = getInterval(PHISrc); + SmallVector IdentCopies; + SmallVector OtherCopies; + if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies)) + continue; + + DEBUG(errs() << "PHI Join: " << *Join); + assert(DstInt.containsOneValue() && "PHI join should have just one val#!"); + VNInfo *VNI = DstInt.getValNumInfo(0); + + // Change the non-identity copies to directly target the phi destination. + for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) { + MachineInstr *PHICopy = OtherCopies[i]; + DEBUG(errs() << "Moving: " << *PHICopy); + + LiveIndex MIIndex = getInstructionIndex(PHICopy); + LiveIndex DefIndex = getDefIndex(MIIndex); + LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex); + LiveIndex StartIndex = SLR->start; + LiveIndex EndIndex = SLR->end; + + // Delete val# defined by the now identity copy and add the range from + // beginning of the mbb to the end of the range. + SrcInt.removeValNo(SLR->valno); + DEBUG(errs() << " added range [" << StartIndex << ',' + << EndIndex << "] to reg" << DstInt.reg << '\n'); + if (DstInt.liveAt(StartIndex)) + DstInt.removeRange(StartIndex, EndIndex); + VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true, + VNInfoAllocator); + NewVNI->setHasPHIKill(true); + DstInt.addRange(LiveRange(StartIndex, EndIndex, NewVNI)); + for (unsigned j = 0, ee = PHICopy->getNumOperands(); j != ee; ++j) { + MachineOperand &MO = PHICopy->getOperand(j); + if (!MO.isReg() || MO.getReg() != PHISrc) + continue; + MO.setReg(PHIDst); + } + } + + // Now let's eliminate all the would-be identity copies. + for (unsigned i = 0, e = IdentCopies.size(); i != e; ++i) { + MachineInstr *PHICopy = IdentCopies[i]; + DEBUG(errs() << "Coalescing: " << *PHICopy); + + LiveIndex MIIndex = getInstructionIndex(PHICopy); + LiveIndex DefIndex = getDefIndex(MIIndex); + LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex); + LiveIndex StartIndex = SLR->start; + LiveIndex EndIndex = SLR->end; + + // Delete val# defined by the now identity copy and add the range from + // beginning of the mbb to the end of the range. + SrcInt.removeValNo(SLR->valno); + RemoveMachineInstrFromMaps(PHICopy); + PHICopy->eraseFromParent(); + DEBUG(errs() << " added range [" << StartIndex << ',' + << EndIndex << "] to reg" << DstInt.reg << '\n'); + DstInt.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } + + // Remove the phi join and update the phi block liveness. + LiveIndex MIIndex = getInstructionIndex(Join); + LiveIndex UseIndex = getUseIndex(MIIndex); + LiveIndex DefIndex = getDefIndex(MIIndex); + LiveRange *SLR = SrcInt.getLiveRangeContaining(UseIndex); + LiveRange *DLR = DstInt.getLiveRangeContaining(DefIndex); + DLR->valno->setCopy(0); + DLR->valno->setIsDefAccurate(false); + DstInt.addRange(LiveRange(SLR->start, SLR->end, DLR->valno)); + SrcInt.removeRange(SLR->start, SLR->end); + assert(SrcInt.empty()); + removeInterval(PHISrc); + RemoveMachineInstrFromMaps(Join); + Join->eraseFromParent(); + + ++numCoalescing; + } } /// computeIntervals - computes the live intervals for virtual @@ -919,17 +1210,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, /// live interval is an interval [i, j) where 1 <= i <= j < N for /// which a variable is live void LiveIntervals::computeIntervals() { + DEBUG(errs() << "********** COMPUTING LIVE INTERVALS **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); - DOUT << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'; - + SmallVector UndefUses; for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = MBBI; // Track the index of the current machine instr. - unsigned MIIndex = getMBBStartIdx(MBB); - DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + LiveIndex MIIndex = getMBBStartIdx(MBB); + DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n"); MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); @@ -945,37 +1236,52 @@ void LiveIntervals::computeIntervals() { } // Skip over empty initial indices. - while (MIIndex / InstrSlots::NUM < i2miMap_.size() && + while (MIIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(MIIndex) == 0) - MIIndex += InstrSlots::NUM; + MIIndex = getNextIndex(MIIndex); for (; MI != miEnd; ++MI) { - DOUT << MIIndex << "\t" << *MI; + DEBUG(errs() << MIIndex << "\t" << *MI); // Handle defs. for (int i = MI->getNumOperands() - 1; i >= 0; --i) { MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + // handle register defs - build intervals - if (MO.isReg() && MO.getReg() && MO.isDef()) { + if (MO.isDef()) handleRegisterDef(MBB, MI, MIIndex, MO, i); - } + else if (MO.isUndef()) + UndefUses.push_back(MO.getReg()); } // Skip over the empty slots after each instruction. unsigned Slots = MI->getDesc().getNumDefs(); if (Slots == 0) Slots = 1; - MIIndex += InstrSlots::NUM * Slots; + + while (Slots--) + MIIndex = getNextIndex(MIIndex); // Skip over empty indices. - while (MIIndex / InstrSlots::NUM < i2miMap_.size() && + while (MIIndex.getVecIndex() < i2miMap_.size() && getInstructionFromIndex(MIIndex) == 0) - MIIndex += InstrSlots::NUM; + MIIndex = getNextIndex(MIIndex); } } + + // Create empty intervals for registers defined by implicit_def's (except + // for those implicit_def that define values which are liveout of their + // blocks. + for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) { + unsigned UndefReg = UndefUses[i]; + (void)getOrCreateInterval(UndefReg); + } } -bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End, +bool LiveIntervals::findLiveInMBBs( + LiveIndex Start, LiveIndex End, SmallVectorImpl &MBBs) const { std::vector::const_iterator I = std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start); @@ -991,7 +1297,8 @@ bool LiveIntervals::findLiveInMBBs(unsigned Start, unsigned End, return ResVal; } -bool LiveIntervals::findReachableMBBs(unsigned Start, unsigned End, +bool LiveIntervals::findReachableMBBs( + LiveIndex Start, LiveIndex End, SmallVectorImpl &MBBs) const { std::vector::const_iterator I = std::lower_bound(Idx2MBBMap.begin(), Idx2MBBMap.end(), Start); @@ -1028,23 +1335,23 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { /// getVNInfoSourceReg - Helper function that parses the specified VNInfo /// copy field and returns the source register that defines it. unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { - if (!VNI->copy) + if (!VNI->getCopy()) return 0; - if (VNI->copy->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { + if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { // If it's extracting out of a physical register, return the sub-register. - unsigned Reg = VNI->copy->getOperand(1).getReg(); + unsigned Reg = VNI->getCopy()->getOperand(1).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - Reg = tri_->getSubReg(Reg, VNI->copy->getOperand(2).getImm()); + Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm()); return Reg; - } else if (VNI->copy->getOpcode() == TargetInstrInfo::INSERT_SUBREG || - VNI->copy->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) - return VNI->copy->getOperand(2).getReg(); + } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG || + VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + return VNI->getCopy()->getOperand(2).getReg(); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*VNI->copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg)) return SrcReg; - assert(0 && "Unrecognized copy instruction!"); + llvm_unreachable("Unrecognized copy instruction!"); return 0; } @@ -1083,8 +1390,8 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li, /// isValNoAvailableAt - Return true if the val# of the specified interval /// which reaches the given instruction also reaches the specified use index. bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, - unsigned UseIdx) const { - unsigned Index = getInstructionIndex(MI); + LiveIndex UseIdx) const { + LiveIndex Index = getInstructionIndex(MI); VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno; LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx); return UI != li.end() && UI->valno == ValNo; @@ -1099,102 +1406,19 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li, if (DisableReMat) return false; - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - return true; - - int FrameIdx = 0; - if (tii_->isLoadFromStackSlot(MI, FrameIdx) && - mf_->getFrameInfo()->isImmutableObjectIndex(FrameIdx)) - // FIXME: Let target specific isReallyTriviallyReMaterializable determines - // this but remember this is not safe to fold into a two-address - // instruction. - // This is a load from fixed stack slot. It can be rematerialized. - return true; - - // If the target-specific rules don't identify an instruction as - // being trivially rematerializable, use some target-independent - // rules. - if (!MI->getDesc().isRematerializable() || - !tii_->isTriviallyReMaterializable(MI)) { - if (!EnableAggressiveRemat) - return false; - - // If the instruction accesses memory but the memoperands have been lost, - // we can't analyze it. - const TargetInstrDesc &TID = MI->getDesc(); - if ((TID.mayLoad() || TID.mayStore()) && MI->memoperands_empty()) - return false; - - // Avoid instructions obviously unsafe for remat. - if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable()) - return false; - - // If the instruction accesses memory and the memory could be non-constant, - // assume the instruction is not rematerializable. - for (std::list::const_iterator - I = MI->memoperands_begin(), E = MI->memoperands_end(); I != E; ++I){ - const MachineMemOperand &MMO = *I; - if (MMO.isVolatile() || MMO.isStore()) - return false; - const Value *V = MMO.getValue(); - if (!V) - return false; - if (const PseudoSourceValue *PSV = dyn_cast(V)) { - if (!PSV->isConstant(mf_->getFrameInfo())) - return false; - } else if (!aa_->pointsToConstantMemory(V)) - return false; - } - - // If any of the registers accessed are non-constant, conservatively assume - // the instruction is not rematerializable. - unsigned ImpUse = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return false; - - // Only allow one def, and that in the first operand. - if (MO.isDef() != (i == 0)) - return false; - - // Only allow constant-valued registers. - bool IsLiveIn = mri_->isLiveIn(Reg); - MachineRegisterInfo::def_iterator I = mri_->def_begin(Reg), - E = mri_->def_end(); - - // For the def, it should be the only def of that register. - if (MO.isDef() && (next(I) != E || IsLiveIn)) - return false; - - if (MO.isUse()) { - // Only allow one use other register use, as that's all the - // remat mechanisms support currently. - if (Reg != li.reg) { - if (ImpUse == 0) - ImpUse = Reg; - else if (Reg != ImpUse) - return false; - } - // For the use, there should be only one associated def. - if (I != E && (next(I) != E || IsLiveIn)) - return false; - } - } - } - } + if (!tii_->isTriviallyReMaterializable(MI, aa_)) + return false; + // Target-specific code can mark an instruction as being rematerializable + // if it has one virtual reg use, though it had better be something like + // a PIC base register which is likely to be live everywhere. unsigned ImpUse = getReMatImplicitUse(li, MI); if (ImpUse) { const LiveInterval &ImpLi = getInterval(ImpUse); for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg), re = mri_->use_end(); ri != re; ++ri) { MachineInstr *UseMI = &*ri; - unsigned UseIdx = getInstructionIndex(UseMI); + LiveIndex UseIdx = getInstructionIndex(UseMI); if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo) continue; if (!isValNoAvailableAt(ImpLi, MI, UseIdx)) @@ -1279,7 +1503,7 @@ static bool FilterFoldedOps(MachineInstr *MI, /// returns true. bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm, MachineInstr *DefMI, - unsigned InstrIdx, + LiveIndex InstrIdx, SmallVector &Ops, bool isSS, int Slot, unsigned Reg) { // If it is an implicit def instruction, just delete it. @@ -1318,7 +1542,7 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, vrm.transferRestorePts(MI, fmi); vrm.transferEmergencySpills(MI, fmi); mi2iMap_.erase(MI); - i2miMap_[InstrIdx /InstrSlots::NUM] = fmi; + i2miMap_[InstrIdx.getVecIndex()] = fmi; mi2iMap_[fmi] = InstrIdx; MI = MBB.insert(MBB.erase(MI), fmi); ++numFolds; @@ -1391,7 +1615,8 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, /// for addIntervalsForSpills to rewrite uses / defs for the given live range. bool LiveIntervals:: rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, unsigned index, unsigned end, MachineInstr *MI, + bool TrySplit, LiveIndex index, LiveIndex end, + MachineInstr *MI, MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, unsigned Slot, int LdSlot, bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, @@ -1422,8 +1647,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // If this is the rematerializable definition MI itself and // all of its uses are rematerialized, simply delete it. if (MI == ReMatOrigDefMI && CanDelete) { - DOUT << "\t\t\t\tErasing re-materlizable def: "; - DOUT << MI << '\n'; + DEBUG(errs() << "\t\t\t\tErasing re-materlizable def: " + << MI << '\n'); RemoveMachineInstrFromMaps(MI); vrm.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); @@ -1465,23 +1690,13 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, continue; if (RegJ == RegI) { Ops.push_back(j); - HasUse |= MOj.isUse(); - HasDef |= MOj.isDef(); + if (!MOj.isUndef()) { + HasUse |= MOj.isUse(); + HasDef |= MOj.isDef(); + } } } - if (HasUse && !li.liveAt(getUseIndex(index))) - // Must be defined by an implicit def. It should not be spilled. Note, - // this is for correctness reason. e.g. - // 8 %reg1024 = IMPLICIT_DEF - // 12 %reg1024 = INSERT_SUBREG %reg1024, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - HasUse = false; - // Create a new virtual register for the spill interval. // Create the new register now so we can map the fold instruction // to the new register so when it is unfolded we get the correct @@ -1537,7 +1752,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (CreatedNewVReg) { if (DefIsReMat) { - vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/); + vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { // Each valnum may have its own remat id. ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); @@ -1577,38 +1792,46 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (HasUse) { if (CreatedNewVReg) { - LiveRange LR(getLoadIndex(index), getUseIndex(index)+1, - nI.getNextValue(0, 0, false, VNInfoAllocator)); - DOUT << " +" << LR; + LiveRange LR(getLoadIndex(index), getNextSlot(getUseIndex(index)), + nI.getNextValue(LiveIndex(), 0, false, + VNInfoAllocator)); + DEBUG(errs() << " +" << LR); nI.addRange(LR); } else { // Extend the split live interval to this def / use. - unsigned End = getUseIndex(index)+1; + LiveIndex End = getNextSlot(getUseIndex(index)); LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, nI.getValNumInfo(nI.getNumValNums()-1)); - DOUT << " +" << LR; + DEBUG(errs() << " +" << LR); nI.addRange(LR); } } if (HasDef) { LiveRange LR(getDefIndex(index), getStoreIndex(index), - nI.getNextValue(0, 0, false, VNInfoAllocator)); - DOUT << " +" << LR; + nI.getNextValue(LiveIndex(), 0, false, + VNInfoAllocator)); + DEBUG(errs() << " +" << LR); nI.addRange(LR); } - DOUT << "\t\t\t\tAdded new interval: "; - nI.print(DOUT, tri_); - DOUT << '\n'; + DEBUG({ + errs() << "\t\t\t\tAdded new interval: "; + nI.print(errs(), tri_); + errs() << '\n'; + }); } return CanFold; } bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI, - MachineBasicBlock *MBB, unsigned Idx) const { - unsigned End = getMBBEndIdx(MBB); + MachineBasicBlock *MBB, + LiveIndex Idx) const { + LiveIndex End = getMBBEndIdx(MBB); for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) { - unsigned KillIdx = VNI->kills[j]; + if (VNI->kills[j].isPHIIndex()) + continue; + + LiveIndex KillIdx = VNI->kills[j]; if (KillIdx > Idx && KillIdx < End) return true; } @@ -1619,11 +1842,11 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, /// during spilling. namespace { struct RewriteInfo { - unsigned Index; + LiveIndex Index; MachineInstr *MI; bool HasUse; bool HasDef; - RewriteInfo(unsigned i, MachineInstr *mi, bool u, bool d) + RewriteInfo(LiveIndex i, MachineInstr *mi, bool u, bool d) : Index(i), MI(mi), HasUse(u), HasDef(d) {} }; @@ -1652,8 +1875,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, std::vector &NewLIs) { bool AllCanFold = true; unsigned NewVReg = 0; - unsigned start = getBaseIndex(I->start); - unsigned end = getBaseIndex(I->end-1) + InstrSlots::NUM; + LiveIndex start = getBaseIndex(I->start); + LiveIndex end = getNextIndex(getBaseIndex(getPrevSlot(I->end))); // First collect all the def / use in this live range that will be rewritten. // Make sure they are sorted according to instruction index. @@ -1664,10 +1887,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, MachineOperand &O = ri.getOperand(); ++ri; assert(!O.isImplicit() && "Spilling register that's used as implicit use?"); - unsigned index = getInstructionIndex(MI); + LiveIndex index = getInstructionIndex(MI); if (index < start || index >= end) continue; - if (O.isUse() && !li.liveAt(getUseIndex(index))) + + if (O.isUndef()) // Must be defined by an implicit def. It should not be spilled. Note, // this is for correctness reason. e.g. // 8 %reg1024 = IMPLICIT_DEF @@ -1687,7 +1911,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { RewriteInfo &rwi = RewriteMIs[i]; ++i; - unsigned index = rwi.Index; + LiveIndex index = rwi.Index; bool MIHasUse = rwi.HasUse; bool MIHasDef = rwi.HasDef; MachineInstr *MI = rwi.MI; @@ -1773,7 +1997,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, getDefIndex(index)); else { // If this is a two-address code, then this index starts a new VNInfo. - const VNInfo *VNI = li.findDefinedVNInfo(getDefIndex(index)); + const VNInfo *VNI = li.findDefinedVNInfoForRegInt(getDefIndex(index)); if (VNI) HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, getDefIndex(index)); } @@ -1786,7 +2010,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, SpillIdxes.insert(std::make_pair(MBBId, S)); } else if (SII->second.back().vreg != NewVReg) { SII->second.push_back(SRInfo(index, NewVReg, true)); - } else if ((int)index > SII->second.back().index) { + } else if (index > SII->second.back().index) { // If there is an earlier def and this is a two-address // instruction, then it's not possible to fold the store (which // would also fold the load). @@ -1797,7 +2021,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, SpillMBBs.set(MBBId); } else if (SII != SpillIdxes.end() && SII->second.back().vreg == NewVReg && - (int)index > SII->second.back().index) { + index > SII->second.back().index) { // There is an earlier def that's not killed (must be two-address). // The spill is no longer needed. SII->second.pop_back(); @@ -1814,7 +2038,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, SpillIdxes.find(MBBId); if (SII != SpillIdxes.end() && SII->second.back().vreg == NewVReg && - (int)index > SII->second.back().index) + index > SII->second.back().index) // Use(s) following the last def, it's not safe to fold the spill. SII->second.back().canFold = false; DenseMap >::iterator RII = @@ -1848,8 +2072,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, } } -bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr, - BitVector &RestoreMBBs, +bool LiveIntervals::alsoFoldARestore(int Id, LiveIndex index, + unsigned vr, BitVector &RestoreMBBs, DenseMap > &RestoreIdxes) { if (!RestoreMBBs[Id]) return false; @@ -1862,15 +2086,15 @@ bool LiveIntervals::alsoFoldARestore(int Id, int index, unsigned vr, return false; } -void LiveIntervals::eraseRestoreInfo(int Id, int index, unsigned vr, - BitVector &RestoreMBBs, +void LiveIntervals::eraseRestoreInfo(int Id, LiveIndex index, + unsigned vr, BitVector &RestoreMBBs, DenseMap > &RestoreIdxes) { if (!RestoreMBBs[Id]) return; std::vector &Restores = RestoreIdxes[Id]; for (unsigned i = 0, e = Restores.size(); i != e; ++i) if (Restores[i].index == index && Restores[i].vreg) - Restores[i].index = -1; + Restores[i].index = LiveIndex(); } /// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being @@ -1920,9 +2144,11 @@ addIntervalsForSpillsFast(const LiveInterval &li, assert(li.weight != HUGE_VALF && "attempt to spill already spilled interval!"); - DOUT << "\t\t\t\tadding intervals for spills for interval: "; - DEBUG(li.dump()); - DOUT << '\n'; + DEBUG({ + errs() << "\t\t\t\tadding intervals for spills for interval: "; + li.dump(); + errs() << '\n'; + }); const TargetRegisterClass* rc = mri_->getRegClass(li.reg); @@ -1967,27 +2193,31 @@ addIntervalsForSpillsFast(const LiveInterval &li, } // Fill in the new live interval. - unsigned index = getInstructionIndex(MI); + LiveIndex index = getInstructionIndex(MI); if (HasUse) { LiveRange LR(getLoadIndex(index), getUseIndex(index), - nI.getNextValue(0, 0, false, getVNInfoAllocator())); - DOUT << " +" << LR; + nI.getNextValue(LiveIndex(), 0, false, + getVNInfoAllocator())); + DEBUG(errs() << " +" << LR); nI.addRange(LR); vrm.addRestorePoint(NewVReg, MI); } if (HasDef) { LiveRange LR(getDefIndex(index), getStoreIndex(index), - nI.getNextValue(0, 0, false, getVNInfoAllocator())); - DOUT << " +" << LR; + nI.getNextValue(LiveIndex(), 0, false, + getVNInfoAllocator())); + DEBUG(errs() << " +" << LR); nI.addRange(LR); vrm.addSpillPoint(NewVReg, true, MI); } added.push_back(&nI); - DOUT << "\t\t\t\tadded new interval: "; - DEBUG(nI.dump()); - DOUT << '\n'; + DEBUG({ + errs() << "\t\t\t\tadded new interval: "; + nI.dump(); + errs() << '\n'; + }); } @@ -2008,9 +2238,11 @@ addIntervalsForSpills(const LiveInterval &li, assert(li.weight != HUGE_VALF && "attempt to spill already spilled interval!"); - DOUT << "\t\t\t\tadding intervals for spills for interval: "; - li.print(DOUT, tri_); - DOUT << '\n'; + DEBUG({ + errs() << "\t\t\t\tadding intervals for spills for interval: "; + li.print(errs(), tri_); + errs() << '\n'; + }); // Each bit specify whether a spill is required in the MBB. BitVector SpillMBBs(mf_->getNumBlockIDs()); @@ -2036,8 +2268,8 @@ addIntervalsForSpills(const LiveInterval &li, if (vrm.getPreSplitReg(li.reg)) { vrm.setIsSplitFromReg(li.reg, 0); // Unset the split kill marker on the last use. - unsigned KillIdx = vrm.getKillPoint(li.reg); - if (KillIdx) { + LiveIndex KillIdx = vrm.getKillPoint(li.reg); + if (KillIdx != LiveIndex()) { MachineInstr *KillMI = getInstructionFromIndex(KillIdx); assert(KillMI && "Last use disappeared?"); int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); @@ -2081,9 +2313,7 @@ addIntervalsForSpills(const LiveInterval &li, return NewLIs; } - bool TrySplit = SplitAtBB && !intervalIsInOneMBB(li); - if (SplitLimit != -1 && (int)numSplits >= SplitLimit) - TrySplit = false; + bool TrySplit = !intervalIsInOneMBB(li); if (TrySplit) ++numSplits; bool NeedStackSlot = false; @@ -2102,7 +2332,7 @@ addIntervalsForSpills(const LiveInterval &li, ReMatOrigDefs[VN] = ReMatDefMI; // Original def may be modified so we have to make a copy here. MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); - ClonedMIs.push_back(Clone); + CloneMIs.push_back(Clone); ReMatDefs[VN] = Clone; bool CanDelete = true; @@ -2165,7 +2395,7 @@ addIntervalsForSpills(const LiveInterval &li, while (Id != -1) { std::vector &spills = SpillIdxes[Id]; for (unsigned i = 0, e = spills.size(); i != e; ++i) { - int index = spills[i].index; + LiveIndex index = spills[i].index; unsigned VReg = spills[i].vreg; LiveInterval &nI = getOrCreateInterval(VReg); bool isReMat = vrm.isReMaterialized(VReg); @@ -2203,7 +2433,7 @@ addIntervalsForSpills(const LiveInterval &li, if (FoundUse) { // Also folded uses, do not issue a load. eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); - nI.removeRange(getLoadIndex(index), getUseIndex(index)+1); + nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index))); } nI.removeRange(getDefIndex(index), getStoreIndex(index)); } @@ -2228,8 +2458,8 @@ addIntervalsForSpills(const LiveInterval &li, while (Id != -1) { std::vector &restores = RestoreIdxes[Id]; for (unsigned i = 0, e = restores.size(); i != e; ++i) { - int index = restores[i].index; - if (index == -1) + LiveIndex index = restores[i].index; + if (index == LiveIndex()) continue; unsigned VReg = restores[i].vreg; LiveInterval &nI = getOrCreateInterval(VReg); @@ -2284,7 +2514,7 @@ addIntervalsForSpills(const LiveInterval &li, // If folding is not possible / failed, then tell the spiller to issue a // load / rematerialization for us. if (Folded) - nI.removeRange(getLoadIndex(index), getUseIndex(index)+1); + nI.removeRange(getLoadIndex(index), getNextSlot(getUseIndex(index))); else vrm.addRestorePoint(VReg, MI); } @@ -2300,7 +2530,7 @@ addIntervalsForSpills(const LiveInterval &li, LI->weight /= InstrSlots::NUM * getApproximateInstructionCount(*LI); if (!AddedKill.count(LI)) { LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; - unsigned LastUseIdx = getBaseIndex(LR->end); + LiveIndex LastUseIdx = getBaseIndex(LR->end); MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); assert(UseIdx != -1); @@ -2351,7 +2581,7 @@ unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, E = mri_->reg_end(); I != E; ++I) { MachineOperand &O = I.getOperand(); MachineInstr *MI = O.getParent(); - unsigned Index = getInstructionIndex(MI); + LiveIndex Index = getInstructionIndex(MI); if (pli.liveAt(Index)) ++NumConflicts; } @@ -2382,29 +2612,31 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, if (SeenMIs.count(MI)) continue; SeenMIs.insert(MI); - unsigned Index = getInstructionIndex(MI); + LiveIndex Index = getInstructionIndex(MI); if (pli.liveAt(Index)) { vrm.addEmergencySpill(SpillReg, MI); - unsigned StartIdx = getLoadIndex(Index); - unsigned EndIdx = getStoreIndex(Index)+1; + LiveIndex StartIdx = getLoadIndex(Index); + LiveIndex EndIdx = getNextSlot(getStoreIndex(Index)); if (pli.isInOneLiveRange(StartIdx, EndIdx)) { pli.removeRange(StartIdx, EndIdx); Cut = true; } else { - cerr << "Ran out of registers during register allocation!\n"; + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { - cerr << "Please check your inline asm statement for invalid " + Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; - MI->print(cerr.stream(), tm_); + MI->print(Msg, tm_); } - exit(1); + llvm_report_error(Msg.str()); } for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS) { if (!hasInterval(*AS)) continue; LiveInterval &spli = getInterval(*AS); if (spli.liveAt(Index)) - spli.removeRange(getLoadIndex(Index), getStoreIndex(Index)+1); + spli.removeRange(getLoadIndex(Index), getNextSlot(getStoreIndex(Index))); } } } @@ -2412,16 +2644,18 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, } LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, - MachineInstr* startInst) { + MachineInstr* startInst) { LiveInterval& Interval = getOrCreateInterval(reg); VNInfo* VN = Interval.getNextValue( - getInstructionIndex(startInst) + InstrSlots::DEF, - startInst, true, getVNInfoAllocator()); + LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF), + startInst, true, getVNInfoAllocator()); VN->setHasPHIKill(true); - VN->kills.push_back(getMBBEndIdx(startInst->getParent())); - LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF, - getMBBEndIdx(startInst->getParent()) + 1, VN); + VN->kills.push_back(terminatorGaps[startInst->getParent()]); + LiveRange LR( + LiveIndex(getInstructionIndex(startInst), LiveIndex::DEF), + getNextSlot(getMBBEndIdx(startInst->getParent())), VN); Interval.addRange(LR); return LR; } + diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 86f7ea20c9be7..a7bea1fd4f986 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include using namespace llvm; @@ -52,15 +53,16 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) { } /// print - Implement the dump method. -void LiveStacks::print(std::ostream &O, const Module*) const { - O << "********** INTERVALS **********\n"; +void LiveStacks::print(raw_ostream &OS, const Module*) const { + + OS << "********** INTERVALS **********\n"; for (const_iterator I = begin(), E = end(); I != E; ++I) { - I->second.print(O); + I->second.print(OS); int Slot = I->first; const TargetRegisterClass *RC = getIntervalRegClass(Slot); if (RC) - O << " [" << RC->getName() << "]\n"; + OS << " [" << RC->getName() << "]\n"; else - O << " [Unknown]\n"; + OS << " [Unknown]\n"; } } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index bd845085bbf5c..139e0291ea7a8 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -37,7 +37,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Config/alloca.h" #include using namespace llvm; @@ -48,20 +47,21 @@ static RegisterPass X("livevars", "Live Variable Analysis"); void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(UnreachableMachineBlockElimID); AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); } void LiveVariables::VarInfo::dump() const { - cerr << " Alive in blocks: "; + errs() << " Alive in blocks: "; for (SparseBitVector<>::iterator I = AliveBlocks.begin(), E = AliveBlocks.end(); I != E; ++I) - cerr << *I << ", "; - cerr << "\n Killed by:"; + errs() << *I << ", "; + errs() << "\n Killed by:"; if (Kills.empty()) - cerr << " No instructions.\n"; + errs() << " No instructions.\n"; else { for (unsigned i = 0, e = Kills.size(); i != e; ++i) - cerr << "\n #" << i << ": " << *Kills[i]; - cerr << "\n"; + errs() << "\n #" << i << ": " << *Kills[i]; + errs() << "\n"; } } @@ -180,9 +180,9 @@ void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) { } /// FindLastPartialDef - Return the last partial def of the specified register. -/// Also returns the sub-register that's defined. +/// Also returns the sub-registers that're defined by the instruction. MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, - unsigned &PartDefReg) { + SmallSet &PartDefRegs) { unsigned LastDefReg = 0; unsigned LastDefDist = 0; MachineInstr *LastDef = NULL; @@ -198,7 +198,23 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, LastDefDist = Dist; } } - PartDefReg = LastDefReg; + + if (!LastDef) + return 0; + + PartDefRegs.insert(LastDefReg); + for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) { + MachineOperand &MO = LastDef->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) + continue; + unsigned DefReg = MO.getReg(); + if (TRI->isSubRegister(Reg, DefReg)) { + PartDefRegs.insert(DefReg); + for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg); + unsigned SubReg = *SubRegs; ++SubRegs) + PartDefRegs.insert(SubReg); + } + } return LastDef; } @@ -216,8 +232,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { // ... // = EAX // All of the sub-registers must have been defined before the use of Reg! - unsigned PartDefReg = 0; - MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefReg); + SmallSet PartDefRegs; + MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs); // If LastPartialDef is NULL, it must be using a livein register. if (LastPartialDef) { LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, @@ -228,7 +244,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { unsigned SubReg = *SubRegs; ++SubRegs) { if (Processed.count(SubReg)) continue; - if (SubReg == PartDefReg || TRI->isSubRegister(PartDefReg, SubReg)) + if (PartDefRegs.count(SubReg)) continue; // This part of Reg was defined before the last partial def. It's killed // here. @@ -249,78 +265,13 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { PhysRegUse[SubReg] = MI; } -/// hasRegisterUseBelow - Return true if the specified register is used after -/// the current instruction and before it's next definition. -bool LiveVariables::hasRegisterUseBelow(unsigned Reg, - MachineBasicBlock::iterator I, - MachineBasicBlock *MBB) { - if (I == MBB->end()) - return false; - - // First find out if there are any uses / defs below. - bool hasDistInfo = true; - unsigned CurDist = DistanceMap[I]; - SmallVector Uses; - SmallVector Defs; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineOperand &UDO = RI.getOperand(); - MachineInstr *UDMI = &*RI; - if (UDMI->getParent() != MBB) - continue; - DenseMap::iterator DI = DistanceMap.find(UDMI); - bool isBelow = false; - if (DI == DistanceMap.end()) { - // Must be below if it hasn't been assigned a distance yet. - isBelow = true; - hasDistInfo = false; - } else if (DI->second > CurDist) - isBelow = true; - if (isBelow) { - if (UDO.isUse()) - Uses.push_back(UDMI); - if (UDO.isDef()) - Defs.push_back(UDMI); - } - } - - if (Uses.empty()) - // No uses below. - return false; - else if (!Uses.empty() && Defs.empty()) - // There are uses below but no defs below. - return true; - // There are both uses and defs below. We need to know which comes first. - if (!hasDistInfo) { - // Complete DistanceMap for this MBB. This information is computed only - // once per MBB. - ++I; - ++CurDist; - for (MachineBasicBlock::iterator E = MBB->end(); I != E; ++I, ++CurDist) - DistanceMap.insert(std::make_pair(I, CurDist)); - } - - unsigned EarliestUse = DistanceMap[Uses[0]]; - for (unsigned i = 1, e = Uses.size(); i != e; ++i) { - unsigned Dist = DistanceMap[Uses[i]]; - if (Dist < EarliestUse) - EarliestUse = Dist; - } - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Dist = DistanceMap[Defs[i]]; - if (Dist < EarliestUse) - // The register is defined before its first use below. - return false; - } - return true; -} - bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { - if (!PhysRegUse[Reg] && !PhysRegDef[Reg]) + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) return false; - MachineInstr *LastRefOrPartRef = PhysRegUse[Reg] - ? PhysRegUse[Reg] : PhysRegDef[Reg]; + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; // The whole register is used. // AL = @@ -339,9 +290,22 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // AX = AL // = AL // AX = + MachineInstr *LastPartDef = 0; + unsigned LastPartDefDist = 0; SmallSet PartUses; for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) { + LastPartDefDist = Dist; + LastPartDef = Def; + } + continue; + } if (MachineInstr *Use = PhysRegUse[SubReg]) { PartUses.insert(SubReg); for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) @@ -354,35 +318,47 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { } } - if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) - // If the last reference is the last def, then it's not used at all. - // That is, unless we are currently processing the last reference itself. - LastRefOrPartRef->addRegisterDead(Reg, TRI, true); - - // Partial uses. Mark register def dead and add implicit def of - // sub-registers which are used. - // EAX = op AL - // That is, EAX def is dead but AL def extends pass it. - // Enable this after live interval analysis is fixed to improve codegen! - else if (!PhysRegUse[Reg]) { + if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { + if (LastPartDef) + // The last partial def kills the register. + LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/, true/*IsKill*/)); + else + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + } else if (!PhysRegUse[Reg]) { + // Partial uses. Mark register def dead and add implicit def of + // sub-registers which are used. + // EAX = op AL + // That is, EAX def is dead but AL def extends pass it. PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) { - if (PartUses.count(SubReg)) { - PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, - true, true)); - LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) - PartUses.erase(*SS); + if (!PartUses.count(SubReg)) + continue; + bool NeedDef = true; + if (PhysRegDef[Reg] == PhysRegDef[SubReg]) { + MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg); + if (MO) { + NeedDef = false; + assert(!MO->isDead()); + } } + if (NeedDef) + PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, + true/*IsDef*/, true/*IsImp*/)); + LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) + PartUses.erase(*SS); } - } - else + } else LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); return true; } -void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { +void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, + SmallVector &Defs) { // What parts of the register are previously defined? SmallSet Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { @@ -398,6 +374,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { // AL = // AH = // = AX + if (Live.count(SubReg)) + continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { Live.insert(SubReg); for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) @@ -408,68 +386,25 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { // Start from the largest piece, find the last time any part of the register // is referenced. - if (!HandlePhysRegKill(Reg, MI)) { - // Only some of the sub-registers are used. - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) { - if (!Live.count(SubReg)) - // Skip if this sub-register isn't defined. - continue; - if (HandlePhysRegKill(SubReg, MI)) { - Live.erase(SubReg); - for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) - Live.erase(*SS); - } - } - assert(Live.empty() && "Not all defined registers are killed / dead?"); + HandlePhysRegKill(Reg, MI); + // Only some of the sub-registers are used. + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + if (!Live.count(SubReg)) + // Skip if this sub-register isn't defined. + continue; + HandlePhysRegKill(SubReg, MI); } - if (MI) { - // Does this extend the live range of a super-register? - SmallSet Processed; - for (const unsigned *SuperRegs = TRI->getSuperRegisters(Reg); - unsigned SuperReg = *SuperRegs; ++SuperRegs) { - if (Processed.count(SuperReg)) - continue; - MachineInstr *LastRef = PhysRegUse[SuperReg] - ? PhysRegUse[SuperReg] : PhysRegDef[SuperReg]; - if (LastRef && LastRef != MI) { - // The larger register is previously defined. Now a smaller part is - // being re-defined. Treat it as read/mod/write if there are uses - // below. - // EAX = - // AX = EAX, EAX - // ... - /// = EAX - if (hasRegisterUseBelow(SuperReg, MI, MI->getParent())) { - MI->addOperand(MachineOperand::CreateReg(SuperReg, false/*IsDef*/, - true/*IsImp*/,true/*IsKill*/)); - MI->addOperand(MachineOperand::CreateReg(SuperReg, true/*IsDef*/, - true/*IsImp*/)); - PhysRegDef[SuperReg] = MI; - PhysRegUse[SuperReg] = NULL; - Processed.insert(SuperReg); - for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) { - PhysRegDef[*SS] = MI; - PhysRegUse[*SS] = NULL; - Processed.insert(*SS); - } - } else { - // Otherwise, the super register is killed. - if (HandlePhysRegKill(SuperReg, MI)) { - PhysRegDef[SuperReg] = NULL; - PhysRegUse[SuperReg] = NULL; - for (const unsigned *SS = TRI->getSubRegisters(SuperReg); *SS; ++SS) { - PhysRegDef[*SS] = NULL; - PhysRegUse[*SS] = NULL; - Processed.insert(*SS); - } - } - } - } - } + if (MI) + Defs.push_back(Reg); // Remember this def. +} - // Remember this def. +void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, + SmallVector &Defs) { + while (!Defs.empty()) { + unsigned Reg = Defs.back(); + Defs.pop_back(); PhysRegDef[Reg] = MI; PhysRegUse[Reg] = NULL; for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); @@ -480,6 +415,21 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI) { } } +namespace { + struct RegSorter { + const TargetRegisterInfo *TRI; + + RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { } + bool operator()(unsigned A, unsigned B) { + if (TRI->isSubRegister(A, B)) + return true; + else if (TRI->isSubRegister(B, A)) + return false; + return A < B; + } + }; +} + bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MF = &mf; MRI = &mf.getRegInfo(); @@ -512,11 +462,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *MBB = *DFI; // Mark live-in registers as live-in. + SmallVector Defs; for (MachineBasicBlock::const_livein_iterator II = MBB->livein_begin(), EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); - HandlePhysRegDef(*II, 0); + HandlePhysRegDef(*II, 0, Defs); } // Loop over all of the instructions, processing them. @@ -563,8 +514,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { if (TargetRegisterInfo::isVirtualRegister(MOReg)) HandleVirtRegDef(MOReg, MI); else if (!ReservedRegisters[MOReg]) - HandlePhysRegDef(MOReg, MI); + HandlePhysRegDef(MOReg, MI, Defs); } + UpdatePhysRegDefs(MI, Defs); } // Handle any virtual assignments from PHI nodes which might be at the @@ -603,7 +555,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) if (PhysRegDef[i] || PhysRegUse[i]) - HandlePhysRegDef(i, 0); + HandlePhysRegDef(i, 0, Defs); std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 14acb71eeb40f..8486bb084fe9a 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -19,12 +19,14 @@ #include "llvm/Function.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -38,6 +40,7 @@ namespace { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -53,7 +56,8 @@ namespace { void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, const TargetRegisterInfo &TRI); void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, - const TargetRegisterInfo &TRI); + const TargetRegisterInfo &TRI, + bool AddIfNotFound = false); }; char LowerSubregsInstructionPass::ID = 0; @@ -85,10 +89,11 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, void LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, unsigned SrcReg, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI, + bool AddIfNotFound) { for (MachineBasicBlock::iterator MII = prior(MachineBasicBlock::iterator(MI)); ; --MII) { - if (MII->addRegisterKilled(SrcReg, &TRI)) + if (MII->addRegisterKilled(SrcReg, &TRI, AddIfNotFound)) break; assert(MII != MI->getParent()->begin() && "copyRegToReg output doesn't reference source register!"); @@ -100,7 +105,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { MachineFunction &MF = *MBB->getParent(); const TargetRegisterInfo &TRI = *MF.getTarget().getRegisterInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - + assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && MI->getOperand(1).isReg() && MI->getOperand(1).isUse() && MI->getOperand(2).isImm() && "Malformed extract_subreg"); @@ -114,41 +119,41 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { "Extract supperg source must be a physical register"); assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && "Extract destination must be in a physical register"); - - DOUT << "subreg: CONVERTING: " << *MI; + assert(SrcReg && "invalid subregister index for register"); + + DEBUG(errs() << "subreg: CONVERTING: " << *MI); if (SrcReg == DstReg) { - // No need to insert an identify copy instruction. - DOUT << "subreg: eliminated!"; - // Find the kill of the destination register's live range, and insert - // a kill of the source register at that point. - if (MI->getOperand(1).isKill() && !MI->getOperand(0).isDead()) - for (MachineBasicBlock::iterator MII = - next(MachineBasicBlock::iterator(MI)); - MII != MBB->end(); ++MII) - if (MII->killsRegister(DstReg, &TRI)) { - MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true); - break; - } + // No need to insert an identity copy instruction. + if (MI->getOperand(1).isKill()) { + // We must make sure the super-register gets killed. Replace the + // instruction with KILL. + MI->setDesc(TII.get(TargetInstrInfo::KILL)); + MI->RemoveOperand(2); // SubIdx + DEBUG(errs() << "subreg: replace by: " << *MI); + return true; + } + + DEBUG(errs() << "subreg: eliminated!"); } else { // Insert copy - const TargetRegisterClass *TRC = TRI.getPhysicalRegisterRegClass(DstReg); - assert(TRC == TRI.getPhysicalRegisterRegClass(SrcReg) && - "Extract subreg and Dst must be of same register class"); - TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRC, TRC); + const TargetRegisterClass *TRCS = TRI.getPhysicalRegisterRegClass(DstReg); + const TargetRegisterClass *TRCD = TRI.getPhysicalRegisterRegClass(SrcReg); + bool Emitted = TII.copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS); + (void)Emitted; + assert(Emitted && "Subreg and Dst must be of compatible register class"); // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstReg, TRI); if (MI->getOperand(1).isKill()) - TransferKillFlag(MI, SrcReg, TRI); - -#ifndef NDEBUG - MachineBasicBlock::iterator dMI = MI; - DOUT << "subreg: " << *(--dMI); -#endif + TransferKillFlag(MI, SuperReg, TRI, true); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + errs() << "subreg: " << *(--dMI); + }); } - DOUT << "\n"; + DEBUG(errs() << '\n'); MBB->erase(MI); return true; } @@ -176,7 +181,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DOUT << "subreg: CONVERTING: " << *MI; + DEBUG(errs() << "subreg: CONVERTING: " << *MI); if (DstSubReg == InsReg && InsSIdx == 0) { // No need to insert an identify copy instruction. @@ -185,7 +190,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { // %RAX = SUBREG_TO_REG 0, %EAX:3, 3 // The first def is defining RAX, not EAX so the top bits were not // zero extended. - DOUT << "subreg: eliminated!"; + DEBUG(errs() << "subreg: eliminated!"); } else { // Insert sub-register copy const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); @@ -196,14 +201,13 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { TransferDeadFlag(MI, DstSubReg, TRI); if (MI->getOperand(2).isKill()) TransferKillFlag(MI, InsReg, TRI); - -#ifndef NDEBUG - MachineBasicBlock::iterator dMI = MI; - DOUT << "subreg: " << *(--dMI); -#endif + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + errs() << "subreg: " << *(--dMI); + }); } - DOUT << "\n"; + DEBUG(errs() << '\n'); MBB->erase(MI); return true; } @@ -228,49 +232,79 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?"); assert(SubIdx != 0 && "Invalid index for insert_subreg"); unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx); - + assert(DstSubReg && "invalid subregister index for register"); assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && "Insert superreg source must be in a physical register"); assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && "Inserted value must be in a physical register"); - DOUT << "subreg: CONVERTING: " << *MI; + DEBUG(errs() << "subreg: CONVERTING: " << *MI); if (DstSubReg == InsReg) { - // No need to insert an identify copy instruction. - DOUT << "subreg: eliminated!"; + // No need to insert an identity copy instruction. If the SrcReg was + // , we need to make sure it is alive by inserting a KILL + if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) { + MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), + TII.get(TargetInstrInfo::KILL), DstReg); + if (MI->getOperand(2).isUndef()) + MIB.addReg(InsReg, RegState::Undef); + else + MIB.addReg(InsReg, RegState::Kill); + } else { + DEBUG(errs() << "subreg: eliminated!\n"); + MBB->erase(MI); + return true; + } } else { // Insert sub-register copy const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); - TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + if (MI->getOperand(2).isUndef()) + // If the source register being inserted is undef, then this becomes a + // KILL. + BuildMI(*MBB, MI, MI->getDebugLoc(), + TII.get(TargetInstrInfo::KILL), DstSubReg); + else + TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + + // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg. + if (!MI->getOperand(1).isUndef()) + CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true)); + // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) + if (MI->getOperand(0).isDead()) { TransferDeadFlag(MI, DstSubReg, TRI); - if (MI->getOperand(1).isKill()) - TransferKillFlag(MI, InsReg, TRI); + } else { + // Make sure the full DstReg is live after this replacement. + CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true)); + } -#ifndef NDEBUG - MachineBasicBlock::iterator dMI = MI; - DOUT << "subreg: " << *(--dMI); -#endif + // Make sure the inserted register gets killed + if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef()) + TransferKillFlag(MI, InsReg, TRI); } - DOUT << "\n"; + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + errs() << "subreg: " << *(--dMI) << "\n"; + }); + MBB->erase(MI); - return true; + return true; } /// runOnMachineFunction - Reduce subregister inserts and extracts to register /// copies. /// bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DOUT << "Machine Function\n"; - - bool MadeChange = false; + DEBUG(errs() << "Machine Function\n" + << "********** LOWERING SUBREG INSTRS **********\n" + << "********** Function: " + << MF.getFunction()->getName() << '\n'); - DOUT << "********** LOWERING SUBREG INSTRS **********\n"; - DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + bool MadeChange = false; for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); mbbi != mbbe; ++mbbi) { diff --git a/lib/CodeGen/MachO.h b/lib/CodeGen/MachO.h index bd9bd61e9ede9..f2b40fe58e21f 100644 --- a/lib/CodeGen/MachO.h +++ b/lib/CodeGen/MachO.h @@ -14,17 +14,15 @@ #ifndef MACHO_H #define MACHO_H -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/MachineRelocation.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/CodeGen/BinaryObject.h" #include #include namespace llvm { -typedef std::vector DataBuffer; - +class GlobalValue; +class MCAsmInfo; + /// MachOSym - This struct contains information about each symbol that is /// added to logical symbol table for the module. This is eventually /// turned into a real symbol table in the file. @@ -70,7 +68,7 @@ struct MachOSym { }; MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, - const TargetAsmInfo *TAI); + const MCAsmInfo *MAI); struct SymCmp { // FIXME: this does not appear to be sorting 'f' after 'F' @@ -110,7 +108,7 @@ struct MachOHeader { /// HeaderData - The actual data for the header which we are building /// up for emission to the file. - DataBuffer HeaderData; + std::vector HeaderData; // Constants for the filetype field // see for additional info on the various types @@ -180,8 +178,8 @@ struct MachOHeader { }; MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0), - reserved(0) { } - + reserved(0) {} + /// cmdSize - This routine returns the size of the MachOSection as written /// to disk, depending on whether the destination is a 64 bit Mach-O file. unsigned cmdSize(bool is64Bit) const { @@ -203,7 +201,7 @@ struct MachOHeader { } }; // end struct MachOHeader - + /// MachOSegment - This struct contains the necessary information to /// emit the load commands for each section in the file. struct MachOSegment { @@ -245,13 +243,13 @@ struct MachOSegment { SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE, SEG_VM_PROT_ALL = VM_PROT_ALL }; - + // Constants for the cmd field // see enum { LC_SEGMENT = 0x01, // segment of this file to be mapped LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped }; - + /// cmdSize - This routine returns the size of the MachOSection as written /// to disk, depending on whether the destination is a 64 bit Mach-O file. unsigned cmdSize(bool is64Bit) const { @@ -272,11 +270,10 @@ struct MachOSegment { /// turned into the SectionCommand in the load command for a particlar /// segment. -struct MachOSection { +struct MachOSection : public BinaryObject { std::string sectname; // name of this section, std::string segname; // segment this section goes in uint64_t addr; // memory address of this section - uint64_t size; // size in bytes of this section uint32_t offset; // file offset of this section uint32_t align; // section alignment (power of 2) uint32_t reloff; // file offset of relocation entries @@ -285,24 +282,15 @@ struct MachOSection { uint32_t reserved1; // reserved (for offset or index) uint32_t reserved2; // reserved (for count or sizeof) uint32_t reserved3; // reserved (64 bit only) - + /// A unique number for this section, which will be used to match symbols /// to the correct section. uint32_t Index; - - /// SectionData - The actual data for this section which we are building - /// up for emission to the file. - DataBuffer SectionData; /// RelocBuffer - A buffer to hold the mach-o relocations before we write /// them out at the appropriate location in the file. - DataBuffer RelocBuffer; - - /// Relocations - The relocations that we have encountered so far in this - /// section that we will need to convert to MachORelocation entries when - /// the file is written. - std::vector Relocations; - + std::vector RelocBuffer; + // Constants for the section types (low 8 bits of flags field) // see enum { S_REGULAR = 0, @@ -374,48 +362,49 @@ struct MachOSection { } MachOSection(const std::string &seg, const std::string §) - : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2), - reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), + : BinaryObject(), sectname(sect), segname(seg), addr(0), offset(0), + align(2), reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), reserved3(0) { } }; // end struct MachOSection - /// MachOSymTab - This struct contains information about the offsets and - /// size of symbol table information. - /// segment. - struct MachODySymTab { - uint32_t cmd; // LC_DYSYMTAB - uint32_t cmdsize; // sizeof( MachODySymTab ) - uint32_t ilocalsym; // index to local symbols - uint32_t nlocalsym; // number of local symbols - uint32_t iextdefsym; // index to externally defined symbols - uint32_t nextdefsym; // number of externally defined symbols - uint32_t iundefsym; // index to undefined symbols - uint32_t nundefsym; // number of undefined symbols - uint32_t tocoff; // file offset to table of contents - uint32_t ntoc; // number of entries in table of contents - uint32_t modtaboff; // file offset to module table - uint32_t nmodtab; // number of module table entries - uint32_t extrefsymoff; // offset to referenced symbol table - uint32_t nextrefsyms; // number of referenced symbol table entries - uint32_t indirectsymoff; // file offset to the indirect symbol table - uint32_t nindirectsyms; // number of indirect symbol table entries - uint32_t extreloff; // offset to external relocation entries - uint32_t nextrel; // number of external relocation entries - uint32_t locreloff; // offset to local relocation entries - uint32_t nlocrel; // number of local relocation entries - - // Constants for the cmd field - // see - enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info - }; - - MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), - ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), - iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), - nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), - nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } - }; +/// MachOSymTab - This struct contains information about the offsets and +/// size of symbol table information. +/// segment. +struct MachODySymTab { + uint32_t cmd; // LC_DYSYMTAB + uint32_t cmdsize; // sizeof(MachODySymTab) + uint32_t ilocalsym; // index to local symbols + uint32_t nlocalsym; // number of local symbols + uint32_t iextdefsym; // index to externally defined symbols + uint32_t nextdefsym; // number of externally defined symbols + uint32_t iundefsym; // index to undefined symbols + uint32_t nundefsym; // number of undefined symbols + uint32_t tocoff; // file offset to table of contents + uint32_t ntoc; // number of entries in table of contents + uint32_t modtaboff; // file offset to module table + uint32_t nmodtab; // number of module table entries + uint32_t extrefsymoff; // offset to referenced symbol table + uint32_t nextrefsyms; // number of referenced symbol table entries + uint32_t indirectsymoff; // file offset to the indirect symbol table + uint32_t nindirectsyms; // number of indirect symbol table entries + uint32_t extreloff; // offset to external relocation entries + uint32_t nextrel; // number of external relocation entries + uint32_t locreloff; // offset to local relocation entries + uint32_t nlocrel; // number of local relocation entries + + // Constants for the cmd field + // see + enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info + }; + + MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), + ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), + iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), + nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), + nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) {} + +}; // end struct MachODySymTab } // end namespace llvm diff --git a/lib/CodeGen/MachOCodeEmitter.cpp b/lib/CodeGen/MachOCodeEmitter.cpp index 02b02de9ec36c..13184772cdb45 100644 --- a/lib/CodeGen/MachOCodeEmitter.cpp +++ b/lib/CodeGen/MachOCodeEmitter.cpp @@ -7,22 +7,37 @@ // //===----------------------------------------------------------------------===// +#include "MachO.h" +#include "MachOWriter.h" #include "MachOCodeEmitter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/OutputBuffer.h" +#include //===----------------------------------------------------------------------===// // MachOCodeEmitter Implementation //===----------------------------------------------------------------------===// namespace llvm { - + +MachOCodeEmitter::MachOCodeEmitter(MachOWriter &mow, MachOSection &mos) : + ObjectCodeEmitter(&mos), MOW(mow), TM(MOW.TM) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + MAI = TM.getMCAsmInfo(); +} + /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. @@ -39,28 +54,18 @@ void MachOCodeEmitter::startFunction(MachineFunction &MF) { // Get the Mach-O Section that this function belongs in. MachOSection *MOS = MOW.getTextSection(); - // FIXME: better memory management - MOS->SectionData.reserve(4096); - BufferBegin = &MOS->SectionData[0]; - BufferEnd = BufferBegin + MOS->SectionData.capacity(); - // Upgrade the section alignment if required. if (MOS->align < Align) MOS->align = Align; - // Round the size up to the correct alignment for starting the new function. - if ((MOS->size & ((1 << Align) - 1)) != 0) { - MOS->size += (1 << Align); - MOS->size &= ~((1 << Align) - 1); - } + MOS->emitAlignment(Align); + + // Create symbol for function entry + const GlobalValue *FuncV = MF.getFunction(); + MachOSym FnSym(FuncV, MOW.Mang->getMangledName(FuncV), MOS->Index, MAI); + FnSym.n_value = getCurrentPCOffset(); - // FIXME: Using MOS->size directly here instead of calculating it from the - // output buffer size (impossible because the code emitter deals only in raw - // bytes) forces us to manually synchronize size and write padding zero bytes - // to the output buffer for all non-text sections. For text sections, we do - // not synchonize the output buffer, and we just blow up if anyone tries to - // write non-code to it. An assert should probably be added to - // AddSymbolToSection to prevent calling it on the text section. - CurBufferPtr = BufferBegin + MOS->size; + // add it to the symtab. + MOW.SymbolTable.push_back(FnSym); } /// finishFunction - This callback is invoked after the function is completely @@ -71,15 +76,6 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { // Get the Mach-O Section that this function belongs in. MachOSection *MOS = MOW.getTextSection(); - // Get a symbol for the function to add to the symbol table - // FIXME: it seems like we should call something like AddSymbolToSection - // in startFunction rather than changing the section size and symbol n_value - // here. - const GlobalValue *FuncV = MF.getFunction(); - MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TAI); - FnSym.n_value = MOS->size; - MOS->size = CurBufferPtr - BufferBegin; - // Emit constant pool to appropriate section(s) emitConstantPool(MF.getConstantPool()); @@ -110,14 +106,11 @@ bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { // FIXME: This should be a set or something that uniques MOW.PendingGlobals.push_back(MR.getGlobalValue()); } else { - assert(0 && "Unhandled relocation type"); + llvm_unreachable("Unhandled relocation type"); } - MOS->Relocations.push_back(MR); + MOS->addRelocation(MR); } Relocations.clear(); - - // Finally, add it to the symtab. - MOW.SymbolTable.push_back(FnSym); // Clear per-function data structures. CPLocations.clear(); @@ -151,13 +144,10 @@ void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal); - OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian); - CPLocations.push_back(Sec->SectionData.size()); + CPLocations.push_back(Sec->size()); CPSections.push_back(Sec->Index); - - // FIXME: remove when we have unified size + output buffer - Sec->size += Size; // Allocate space in the section for the global. // FIXME: need alignment? @@ -165,14 +155,13 @@ void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { for (unsigned j = 0; j < Size; ++j) SecDataOut.outbyte(0); - MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i], - TM.getTargetData(), Sec->Relocations); + MachOWriter::InitMem(CP[i].Val.ConstVal, CPLocations[i], + TM.getTargetData(), Sec); } } /// emitJumpTables - Emit all the jump tables for a given jump table info /// record to the appropriate section. - void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { const std::vector &JT = MJTI->getJumpTables(); if (JT.empty()) return; @@ -183,24 +172,21 @@ void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { MachOSection *Sec = MOW.getJumpTableSection(); unsigned TextSecIndex = MOW.getTextSection()->Index; - OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian); for (unsigned i = 0, e = JT.size(); i != e; ++i) { // For each jump table, record its offset from the start of the section, // reserve space for the relocations to the MBBs, and add the relocations. const std::vector &MBBs = JT[i].MBBs; - JTLocations.push_back(Sec->SectionData.size()); + JTLocations.push_back(Sec->size()); for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { - MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(), - MBBs[mi])); + MachineRelocation MR(MOW.GetJTRelocation(Sec->size(), MBBs[mi])); MR.setResultPointer((void *)JTLocations[i]); MR.setConstantVal(TextSecIndex); - Sec->Relocations.push_back(MR); + Sec->addRelocation(MR); SecDataOut.outaddr(0); } } - // FIXME: remove when we have unified size + output buffer - Sec->size = Sec->SectionData.size(); } } // end namespace llvm diff --git a/lib/CodeGen/MachOCodeEmitter.h b/lib/CodeGen/MachOCodeEmitter.h index 0a6e4e4d19ec3..475244646bd13 100644 --- a/lib/CodeGen/MachOCodeEmitter.h +++ b/lib/CodeGen/MachOCodeEmitter.h @@ -10,16 +10,17 @@ #ifndef MACHOCODEEMITTER_H #define MACHOCODEEMITTER_H -#include "MachOWriter.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include +#include "llvm/CodeGen/ObjectCodeEmitter.h" +#include namespace llvm { +class MachOWriter; + /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code /// for functions to the Mach-O file. -class MachOCodeEmitter : public MachineCodeEmitter { +class MachOCodeEmitter : public ObjectCodeEmitter { MachOWriter &MOW; /// Target machine description. @@ -29,36 +30,16 @@ class MachOCodeEmitter : public MachineCodeEmitter { /// machine directly, indicating what header values and flags to set. bool is64Bit, isLittleEndian; - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; /// Relocations - These are the relocations that the function needs, as /// emitted. std::vector Relocations; - - /// CPLocations - This is a map of constant pool indices to offsets from the - /// start of the section for that constant pool index. - std::vector CPLocations; - - /// CPSections - This is a map of constant pool indices to the MachOSection - /// containing the constant pool entry for that index. - std::vector CPSections; - - /// JTLocations - This is a map of jump table indices to offsets from the - /// start of the section for that jump table index. - std::vector JTLocations; - - /// MBBLocations - This vector is a mapping from MBB ID's to their address. - /// It is filled in by the StartMachineBasicBlock callback and queried by - /// the getMachineBasicBlockAddress callback. - std::vector MBBLocations; - + + std::map Labels; + public: - MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) - { - is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; - isLittleEndian = TM.getTargetData()->isLittleEndian(); - TAI = TM.getTargetAsmInfo(); - } + MachOCodeEmitter(MachOWriter &mow, MachOSection &mos); virtual void startFunction(MachineFunction &MF); virtual bool finishFunction(MachineFunction &MF); @@ -66,61 +47,20 @@ public: virtual void addRelocation(const MachineRelocation &MR) { Relocations.push_back(MR); } - + void emitConstantPool(MachineConstantPool *MCP); void emitJumpTables(MachineJumpTableInfo *MJTI); - - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - assert(CPLocations.size() > Index && "CP not emitted!"); - return CPLocations[Index]; - } - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - assert(JTLocations.size() > Index && "JT not emitted!"); - return JTLocations[Index]; - } - - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); - } - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; + virtual void emitLabel(uint64_t LabelID) { + Labels[LabelID] = getCurrentPCOffset(); } virtual uintptr_t getLabelAddress(uint64_t Label) const { - assert(0 && "get Label not implemented"); - abort(); - return 0; - } - - virtual void emitLabel(uint64_t LabelID) { - assert(0 && "emit Label not implemented"); - abort(); + return Labels.find(Label)->second; } virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { } - /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! - virtual void startGVStub(const GlobalValue* F, unsigned StubSize, - unsigned Alignment = 1) { - assert(0 && "JIT specific function called!"); - abort(); - } - virtual void startGVStub(const GlobalValue* F, void *Buffer, - unsigned StubSize) { - assert(0 && "JIT specific function called!"); - abort(); - } - virtual void *finishGVStub(const GlobalValue* F) { - assert(0 && "JIT specific function called!"); - abort(); - return 0; - } - }; // end class MachOCodeEmitter } // end namespace llvm diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp index 163df6994aa62..73b15edba37ff 100644 --- a/lib/CodeGen/MachOWriter.cpp +++ b/lib/CodeGen/MachOWriter.cpp @@ -22,36 +22,32 @@ // //===----------------------------------------------------------------------===// +#include "MachO.h" #include "MachOWriter.h" #include "MachOCodeEmitter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/CodeGen/FileWriters.h" -#include "llvm/CodeGen/MachineCodeEmitter.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetJITInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetMachOWriterInfo.h" #include "llvm/Support/Mangler.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/OutputBuffer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include -#include namespace llvm { /// AddMachOWriter - Concrete function to add the Mach-O writer to the function /// pass manager. -MachineCodeEmitter *AddMachOWriter(PassManagerBase &PM, +ObjectCodeEmitter *AddMachOWriter(PassManagerBase &PM, raw_ostream &O, TargetMachine &TM) { MachOWriter *MOW = new MachOWriter(O, TM); PM.add(MOW); - return &MOW->getMachineCodeEmitter(); + return MOW->getObjectCodeEmitter(); } //===----------------------------------------------------------------------===// @@ -65,15 +61,14 @@ MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm) is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; isLittleEndian = TM.getTargetData()->isLittleEndian(); - TAI = TM.getTargetAsmInfo(); + MAI = TM.getMCAsmInfo(); // Create the machine code emitter object for this target. - - MCE = new MachOCodeEmitter(*this); + MachOCE = new MachOCodeEmitter(*this, *getTextSection(true)); } MachOWriter::~MachOWriter() { - delete MCE; + delete MachOCE; } bool MachOWriter::doInitialization(Module &M) { @@ -97,9 +92,9 @@ bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { /// the Mach-O file to 'O'. bool MachOWriter::doFinalization(Module &M) { // FIXME: we don't handle debug info yet, we should probably do that. - // Okay, the.text section has been completed, build the .data, .bss, and // "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) EmitGlobal(I); @@ -125,6 +120,89 @@ bool MachOWriter::doFinalization(Module &M) { return false; } +// getConstSection - Get constant section for Constant 'C' +MachOSection *MachOWriter::getConstSection(Constant *C) { + const ConstantArray *CVA = dyn_cast(C); + if (CVA && CVA->isCString()) + return getSection("__TEXT", "__cstring", + MachOSection::S_CSTRING_LITERALS); + + const Type *Ty = C->getType(); + if (Ty->isPrimitiveType() || Ty->isInteger()) { + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + switch(Size) { + default: break; // Fall through to __TEXT,__const + case 4: + return getSection("__TEXT", "__literal4", + MachOSection::S_4BYTE_LITERALS); + case 8: + return getSection("__TEXT", "__literal8", + MachOSection::S_8BYTE_LITERALS); + case 16: + return getSection("__TEXT", "__literal16", + MachOSection::S_16BYTE_LITERALS); + } + } + return getSection("__TEXT", "__const"); +} + +// getJumpTableSection - Select the Jump Table section +MachOSection *MachOWriter::getJumpTableSection() { + if (TM.getRelocationModel() == Reloc::PIC_) + return getTextSection(false); + else + return getSection("__TEXT", "__const"); +} + +// getSection - Return the section with the specified name, creating a new +// section if one does not already exist. +MachOSection *MachOWriter::getSection(const std::string &seg, + const std::string §, + unsigned Flags /* = 0 */ ) { + MachOSection *MOS = SectionLookup[seg+sect]; + if (MOS) return MOS; + + MOS = new MachOSection(seg, sect); + SectionList.push_back(MOS); + MOS->Index = SectionList.size(); + MOS->flags = MachOSection::S_REGULAR | Flags; + SectionLookup[seg+sect] = MOS; + return MOS; +} + +// getTextSection - Return text section with different flags for code/data +MachOSection *MachOWriter::getTextSection(bool isCode /* = true */ ) { + if (isCode) + return getSection("__TEXT", "__text", + MachOSection::S_ATTR_PURE_INSTRUCTIONS | + MachOSection::S_ATTR_SOME_INSTRUCTIONS); + else + return getSection("__TEXT", "__text"); +} + +MachOSection *MachOWriter::getBSSSection() { + return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL); +} + +// GetJTRelocation - Get a relocation a new BB relocation based +// on target information. +MachineRelocation MachOWriter::GetJTRelocation(unsigned Offset, + MachineBasicBlock *MBB) const { + return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB); +} + +// GetTargetRelocation - Returns the number of relocations. +unsigned MachOWriter::GetTargetRelocation(MachineRelocation &MR, + unsigned FromIdx, unsigned ToAddr, + unsigned ToIndex, OutputBuffer &RelocOut, + OutputBuffer &SecOut, bool Scattered, + bool Extern) { + return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr, + ToIndex, RelocOut, + SecOut, Scattered, + Extern); +} + void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { const Type *Ty = GV->getType()->getElementType(); unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); @@ -133,37 +211,31 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { // Reserve space in the .bss section for this symbol while maintaining the // desired section alignment, which must be at least as much as required by // this symbol. - OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + OutputBuffer SecDataOut(Sec->getData(), is64Bit, isLittleEndian); if (Align) { - uint64_t OrigSize = Sec->size; Align = Log2_32(Align); Sec->align = std::max(unsigned(Sec->align), Align); - Sec->size = (Sec->size + Align - 1) & ~(Align-1); - // Add alignment padding to buffer as well. - // FIXME: remove when we have unified size + output buffer - unsigned AlignedSize = Sec->size - OrigSize; - for (unsigned i = 0; i < AlignedSize; ++i) - SecDataOut.outbyte(0); + Sec->emitAlignment(Sec->align); } // Globals without external linkage apparently do not go in the symbol table. if (!GV->hasLocalLinkage()) { - MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TAI); - Sym.n_value = Sec->size; + MachOSym Sym(GV, Mang->getMangledName(GV), Sec->Index, MAI); + Sym.n_value = Sec->size(); SymbolTable.push_back(Sym); } // Record the offset of the symbol, and then allocate space for it. // FIXME: remove when we have unified size + output buffer - Sec->size += Size; // Now that we know what section the GlovalVariable is going to be emitted // into, update our mappings. // FIXME: We may also need to update this when outputting non-GlobalVariable // GlobalValues such as functions. + GVSection[GV] = Sec; - GVOffset[GV] = Sec->SectionData.size(); + GVOffset[GV] = Sec->size(); // Allocate space in the section for the global. for (unsigned i = 0; i < Size; ++i) @@ -183,8 +255,8 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { // merged with other symbols. if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || GV->hasCommonLinkage()) { - MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), - MachOSym::NO_SECT, TAI); + MachOSym ExtOrCommonSym(GV, Mang->getMangledName(GV), + MachOSym::NO_SECT, MAI); // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in // bytes of the symbol. ExtOrCommonSym.n_value = Size; @@ -205,8 +277,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : getDataSection(); AddSymbolToSection(Sec, GV); - InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV], - TM.getTargetData(), Sec->Relocations); + InitMem(GV->getInitializer(), GVOffset[GV], TM.getTargetData(), Sec); } @@ -214,6 +285,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { void MachOWriter::EmitHeaderAndLoadCommands() { // Step #0: Fill in the segment load command size, since we need it to figure // out the rest of the header fields + MachOSegment SEG("", is64Bit); SEG.nsects = SectionList.size(); SEG.cmdsize = SEG.cmdSize(is64Bit) + @@ -231,7 +303,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { // Step #3: write the header to the file // Local alias to shortenify coming code. - DataBuffer &FH = Header.HeaderData; + std::vector &FH = Header.HeaderData; OutputBuffer FHOut(FH, is64Bit, isLittleEndian); FHOut.outword(Header.magic); @@ -247,7 +319,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { // Step #4: Finish filling in the segment load command and write it out for (std::vector::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) - SEG.filesize += (*I)->size; + SEG.filesize += (*I)->size(); SEG.vmsize = SEG.filesize; SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; @@ -271,9 +343,8 @@ void MachOWriter::EmitHeaderAndLoadCommands() { MachOSection *MOS = *I; MOS->addr = currentAddr; MOS->offset = currentAddr + SEG.fileoff; - // FIXME: do we need to do something with alignment here? - currentAddr += MOS->size; + currentAddr += MOS->size(); } // Step #6: Emit the symbol table to temporary buffers, so that we know the @@ -288,6 +359,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { for (std::vector::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) { MachOSection *MOS = *I; + // Convert the relocations to target-specific relocations, and fill in the // relocation offset for this section. CalculateRelocations(*MOS); @@ -298,7 +370,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { FHOut.outstring(MOS->sectname, 16); FHOut.outstring(MOS->segname, 16); FHOut.outaddr(MOS->addr); - FHOut.outaddr(MOS->size); + FHOut.outaddr(MOS->size()); FHOut.outword(MOS->offset); FHOut.outword(MOS->align); FHOut.outword(MOS->reloff); @@ -351,24 +423,26 @@ void MachOWriter::EmitHeaderAndLoadCommands() { /// EmitSections - Now that we have constructed the file header and load /// commands, emit the data for each section to the file. - void MachOWriter::EmitSections() { for (std::vector::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) // Emit the contents of each section - O.write((char*)&(*I)->SectionData[0], (*I)->size); + if ((*I)->size()) + O.write((char*)&(*I)->getData()[0], (*I)->size()); } + +/// EmitRelocations - emit relocation data from buffer. void MachOWriter::EmitRelocations() { for (std::vector::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) // Emit the relocation entry data for each section. - O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); + if ((*I)->RelocBuffer.size()) + O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); } /// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them /// each a string table index so that they appear in the correct order in the /// output file. - void MachOWriter::BufferSymbolAndStringTable() { // The order of the symbol table is: // 1. local symbols @@ -377,11 +451,10 @@ void MachOWriter::BufferSymbolAndStringTable() { // Before sorting the symbols, check the PendingGlobals for any undefined // globals that need to be put in the symbol table. - for (std::vector::iterator I = PendingGlobals.begin(), E = PendingGlobals.end(); I != E; ++I) { if (GVOffset[*I] == 0 && GVSection[*I] == 0) { - MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TAI); + MachOSym UndfSym(*I, Mang->getMangledName(*I), MachOSym::NO_SECT, MAI); SymbolTable.push_back(UndfSym); GVOffset[*I] = -1; } @@ -389,19 +462,16 @@ void MachOWriter::BufferSymbolAndStringTable() { // Sort the symbols by name, so that when we partition the symbols by scope // of definition, we won't have to sort by name within each partition. - std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSym::SymCmp()); // Parition the symbol table entries so that all local symbols come before // all symbols with external linkage. { 1 | 2 3 } - std::partition(SymbolTable.begin(), SymbolTable.end(), MachOSym::PartitionByLocal); // Advance iterator to beginning of external symbols and partition so that // all external symbols defined in this module come before all external // symbols defined elsewhere. { 1 | 2 | 3 } - for (std::vector::iterator I = SymbolTable.begin(), E = SymbolTable.end(); I != E; ++I) { if (!MachOSym::PartitionByLocal(*I)) { @@ -413,7 +483,6 @@ void MachOWriter::BufferSymbolAndStringTable() { // Calculate the starting index for each of the local, extern defined, and // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB // load command. - for (std::vector::iterator I = SymbolTable.begin(), E = SymbolTable.end(); I != E; ++I) { if (MachOSym::PartitionByLocal(*I)) { @@ -430,7 +499,6 @@ void MachOWriter::BufferSymbolAndStringTable() { // Write out a leading zero byte when emitting string table, for n_strx == 0 // which means an empty string. - OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian); StrTOut.outbyte(0); @@ -439,7 +507,6 @@ void MachOWriter::BufferSymbolAndStringTable() { // 2. strings for local symbols // Since this is the opposite order from the symbol table, which we have just // sorted, we can walk the symbol table backwards to output the string table. - for (std::vector::reverse_iterator I = SymbolTable.rbegin(), E = SymbolTable.rend(); I != E; ++I) { if (I->GVName == "") { @@ -478,24 +545,22 @@ void MachOWriter::BufferSymbolAndStringTable() { /// and the offset into that section. From this information, create the /// appropriate target-specific MachORelocation type and add buffer it to be /// written out after we are finished writing out sections. - void MachOWriter::CalculateRelocations(MachOSection &MOS) { - for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) { - MachineRelocation &MR = MOS.Relocations[i]; + std::vector Relocations = MOS.getRelocations(); + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; unsigned TargetSection = MR.getConstantVal(); unsigned TargetAddr = 0; unsigned TargetIndex = 0; // This is a scattered relocation entry if it points to a global value with // a non-zero offset. - bool Scattered = false; bool Extern = false; // Since we may not have seen the GlobalValue we were interested in yet at // the time we emitted the relocation for it, fix it up now so that it // points to the offset into the correct section. - if (MR.isGlobalValue()) { GlobalValue *GV = MR.getGlobalValue(); MachOSection *MOSPtr = GVSection[GV]; @@ -503,7 +568,6 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { // If we have never seen the global before, it must be to a symbol // defined in another module (N_UNDF). - if (!MOSPtr) { // FIXME: need to append stub suffix Extern = true; @@ -518,7 +582,6 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { // If the symbol is locally defined, pass in the address of the section and // the section index to the code which will generate the target relocation. - if (!Extern) { MachOSection &To = *SectionList[TargetSection - 1]; TargetAddr = To.addr; @@ -526,7 +589,7 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { } OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian); - OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian); + OutputBuffer SecOut(MOS.getData(), is64Bit, isLittleEndian); MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex, RelocOut, SecOut, Scattered, Extern); @@ -535,12 +598,11 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { // InitMem - Write the value of a Constant to the specified memory location, // converting it into bytes and relocations. - -void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, - const TargetData *TD, - std::vector &MRs) { +void MachOWriter::InitMem(const Constant *C, uintptr_t Offset, + const TargetData *TD, MachOSection* mos) { typedef std::pair CPair; std::vector WorkList; + uint8_t *Addr = &mos->getData()[0]; WorkList.push_back(CPair(C,(intptr_t)Addr + Offset)); @@ -572,9 +634,8 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, } case Instruction::Add: default: - cerr << "ConstantExpr not handled as global var init: " << *CE << "\n"; - abort(); - break; + errs() << "ConstantExpr not handled as global var init: " << *CE <<"\n"; + llvm_unreachable(0); } } else if (PC->getType()->isSingleValueType()) { unsigned char *ptr = (unsigned char *)PA; @@ -608,7 +669,7 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, ptr[6] = val >> 48; ptr[7] = val >> 56; } else { - assert(0 && "Not implemented: bit widths > 64"); + llvm_unreachable("Not implemented: bit widths > 64"); } break; } @@ -643,17 +704,19 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, memset(ptr, 0, TD->getPointerSize()); else if (const GlobalValue* GV = dyn_cast(PC)) { // FIXME: what about function stubs? - MRs.push_back(MachineRelocation::getGV(PA-(intptr_t)Addr, + mos->addRelocation(MachineRelocation::getGV(PA-(intptr_t)Addr, MachineRelocation::VANILLA, const_cast(GV), ScatteredOffset)); ScatteredOffset = 0; } else - assert(0 && "Unknown constant pointer type!"); + llvm_unreachable("Unknown constant pointer type!"); break; default: - cerr << "ERROR: Constant unimp for type: " << *PC->getType() << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "ERROR: Constant unimp for type: " << *PC->getType(); + llvm_report_error(Msg.str()); } } else if (isa(PC)) { memset((void*)PA, 0, (size_t)TD->getTypeAllocSize(PC->getType())); @@ -669,8 +732,8 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, WorkList.push_back(CPair(CPS->getOperand(i), PA+SL->getElementOffset(i))); } else { - cerr << "Bad Type: " << *PC->getType() << "\n"; - assert(0 && "Unknown constant type to initialize memory with!"); + errs() << "Bad Type: " << *PC->getType() << "\n"; + llvm_unreachable("Unknown constant type to initialize memory with!"); } } } @@ -680,13 +743,14 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, //===----------------------------------------------------------------------===// MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, - const TargetAsmInfo *TAI) : + const MCAsmInfo *MAI) : GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect), n_desc(0), n_value(0) { + // FIXME: This is completely broken, it should use the mangler interface. switch (GV->getLinkage()) { default: - assert(0 && "Unexpected linkage type!"); + llvm_unreachable("Unexpected linkage type!"); break; case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: @@ -695,17 +759,19 @@ MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, case GlobalValue::CommonLinkage: assert(!isa(gv) && "Unexpected linkage type for Function!"); case GlobalValue::ExternalLinkage: - GVName = TAI->getGlobalPrefix() + name; + GVName = MAI->getGlobalPrefix() + name; n_type |= GV->hasHiddenVisibility() ? N_PEXT : N_EXT; break; case GlobalValue::PrivateLinkage: - GVName = TAI->getPrivateGlobalPrefix() + name; + GVName = MAI->getPrivateGlobalPrefix() + name; + break; + case GlobalValue::LinkerPrivateLinkage: + GVName = MAI->getLinkerPrivateGlobalPrefix() + name; break; case GlobalValue::InternalLinkage: - GVName = TAI->getGlobalPrefix() + name; + GVName = MAI->getGlobalPrefix() + name; break; } } } // end namespace llvm - diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h index 3af2b0af4b78f..9273f3854863c 100644 --- a/lib/CodeGen/MachOWriter.h +++ b/lib/CodeGen/MachOWriter.h @@ -14,22 +14,28 @@ #ifndef MACHOWRITER_H #define MACHOWRITER_H -#include "MachO.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetMachOWriterInfo.h" +#include #include namespace llvm { + class Constant; class GlobalVariable; class Mangler; - class MachineCodeEmitter; + class MachineBasicBlock; + class MachineRelocation; class MachOCodeEmitter; + struct MachODySymTab; + struct MachOHeader; + struct MachOSection; + struct MachOSym; + class TargetData; + class TargetMachine; + class MCAsmInfo; + class ObjectCodeEmitter; class OutputBuffer; class raw_ostream; - /// MachOWriter - This class implements the common target-independent code for /// writing Mach-O files. Targets should derive a class from this to /// parameterize the output format. @@ -38,8 +44,9 @@ namespace llvm { friend class MachOCodeEmitter; public: static char ID; - MachineCodeEmitter &getMachineCodeEmitter() const { - return *(MachineCodeEmitter*)MCE; + + ObjectCodeEmitter *getObjectCodeEmitter() { + return reinterpret_cast(MachOCE); } MachOWriter(raw_ostream &O, TargetMachine &TM); @@ -61,36 +68,30 @@ namespace llvm { /// Mang - The object used to perform name mangling for this module. /// Mangler *Mang; - - /// MCE - The MachineCodeEmitter object that we are exposing to emit machine - /// code for functions to the .o file. - MachOCodeEmitter *MCE; + /// MachOCE - The MachineCodeEmitter object that we are exposing to emit + /// machine code for functions to the .o file. + MachOCodeEmitter *MachOCE; /// is64Bit/isLittleEndian - This information is inferred from the target /// machine directly, indicating what header values and flags to set. - bool is64Bit, isLittleEndian; // Target Asm Info - - const TargetAsmInfo *TAI; + const MCAsmInfo *MAI; /// Header - An instance of MachOHeader that we will update while we build /// the file, and then emit during finalization. - MachOHeader Header; /// doInitialization - Emit the file header and all of the global variables /// for the module to the Mach-O file. - bool doInitialization(Module &M); bool runOnMachineFunction(MachineFunction &MF); /// doFinalization - Now that the module has been completely processed, emit /// the Mach-O file to 'O'. - bool doFinalization(Module &M); private: @@ -98,85 +99,37 @@ namespace llvm { /// SectionList - This is the list of sections that we have emitted to the /// file. Once the file has been completely built, the segment load command /// SectionCommands are constructed from this info. - std::vector SectionList; /// SectionLookup - This is a mapping from section name to SectionList entry - std::map SectionLookup; - + /// GVSection - This is a mapping from a GlobalValue to a MachOSection, /// to aid in emitting relocations. - std::map GVSection; - /// GVOffset - This is a mapping from a GlobalValue to an offset from the + /// GVOffset - This is a mapping from a GlobalValue to an offset from the /// start of the section in which the GV resides, to aid in emitting /// relocations. - std::map GVOffset; /// getSection - Return the section with the specified name, creating a new /// section if one does not already exist. - MachOSection *getSection(const std::string &seg, const std::string §, - unsigned Flags = 0) { - MachOSection *MOS = SectionLookup[seg+sect]; - if (MOS) return MOS; - - MOS = new MachOSection(seg, sect); - SectionList.push_back(MOS); - MOS->Index = SectionList.size(); - MOS->flags = MachOSection::S_REGULAR | Flags; - SectionLookup[seg+sect] = MOS; - return MOS; - } - MachOSection *getTextSection(bool isCode = true) { - if (isCode) - return getSection("__TEXT", "__text", - MachOSection::S_ATTR_PURE_INSTRUCTIONS | - MachOSection::S_ATTR_SOME_INSTRUCTIONS); - else - return getSection("__TEXT", "__text"); - } - MachOSection *getBSSSection() { - return getSection("__DATA", "__bss", MachOSection::S_ZEROFILL); - } + unsigned Flags = 0); + + /// getTextSection - Return text section with different flags for code/data + MachOSection *getTextSection(bool isCode = true); + MachOSection *getDataSection() { return getSection("__DATA", "__data"); } - MachOSection *getConstSection(Constant *C) { - const ConstantArray *CVA = dyn_cast(C); - if (CVA && CVA->isCString()) - return getSection("__TEXT", "__cstring", - MachOSection::S_CSTRING_LITERALS); - - const Type *Ty = C->getType(); - if (Ty->isPrimitiveType() || Ty->isInteger()) { - unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); - switch(Size) { - default: break; // Fall through to __TEXT,__const - case 4: - return getSection("__TEXT", "__literal4", - MachOSection::S_4BYTE_LITERALS); - case 8: - return getSection("__TEXT", "__literal8", - MachOSection::S_8BYTE_LITERALS); - case 16: - return getSection("__TEXT", "__literal16", - MachOSection::S_16BYTE_LITERALS); - } - } - return getSection("__TEXT", "__const"); - } - MachOSection *getJumpTableSection() { - if (TM.getRelocationModel() == Reloc::PIC_) - return getTextSection(false); - else - return getSection("__TEXT", "__const"); - } - - /// MachOSymTab - This struct contains information about the offsets and + + MachOSection *getBSSSection(); + MachOSection *getConstSection(Constant *C); + MachOSection *getJumpTableSection(); + + /// MachOSymTab - This struct contains information about the offsets and /// size of symbol table information. /// segment. struct MachOSymTab { @@ -191,43 +144,42 @@ namespace llvm { // see enum { LC_SYMTAB = 0x02 // link-edit stab symbol table info }; - + MachOSymTab() : cmd(LC_SYMTAB), cmdsize(6 * sizeof(uint32_t)), symoff(0), nsyms(0), stroff(0), strsize(0) { } }; - + /// SymTab - The "stab" style symbol table information - MachOSymTab SymTab; + MachOSymTab SymTab; /// DySymTab - symbol table info for the dynamic link editor MachODySymTab DySymTab; protected: - + /// SymbolTable - This is the list of symbols we have emitted to the file. /// This actually gets rearranged before emission to the file (to put the /// local symbols first in the list). std::vector SymbolTable; - + /// SymT - A buffer to hold the symbol table before we write it out at the /// appropriate location in the file. - DataBuffer SymT; - + std::vector SymT; + /// StrT - A buffer to hold the string table before we write it out at the /// appropriate location in the file. - DataBuffer StrT; - + std::vector StrT; + /// PendingSyms - This is a list of externally defined symbols that we have /// been asked to emit, but have not seen a reference to. When a reference /// is seen, the symbol will move from this list to the SymbolTable. std::vector PendingGlobals; - + /// DynamicSymbolTable - This is just a vector of indices into /// SymbolTable to aid in emitting the DYSYMTAB load command. std::vector DynamicSymbolTable; - - static void InitMem(const Constant *C, void *Addr, intptr_t Offset, - const TargetData *TD, - std::vector &MRs); + + static void InitMem(const Constant *C, uintptr_t Offset, + const TargetData *TD, MachOSection* mos); private: void AddSymbolToSection(MachOSection *MOS, GlobalVariable *GV); @@ -238,25 +190,16 @@ namespace llvm { void BufferSymbolAndStringTable(); void CalculateRelocations(MachOSection &MOS); + // GetJTRelocation - Get a relocation a new BB relocation based + // on target information. MachineRelocation GetJTRelocation(unsigned Offset, - MachineBasicBlock *MBB) const { - return TM.getMachOWriterInfo()->GetJTRelocation(Offset, MBB); - } + MachineBasicBlock *MBB) const; /// GetTargetRelocation - Returns the number of relocations. - unsigned GetTargetRelocation(MachineRelocation &MR, - unsigned FromIdx, - unsigned ToAddr, - unsigned ToIndex, - OutputBuffer &RelocOut, - OutputBuffer &SecOut, - bool Scattered, - bool Extern) { - return TM.getMachOWriterInfo()->GetTargetRelocation(MR, FromIdx, ToAddr, - ToIndex, RelocOut, - SecOut, Scattered, - Extern); - } + unsigned GetTargetRelocation(MachineRelocation &MR, unsigned FromIdx, + unsigned ToAddr, unsigned ToIndex, + OutputBuffer &RelocOut, OutputBuffer &SecOut, + bool Scattered, bool Extern); }; } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 71e6b3e4d0f86..b3eb2da762812 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/LeakDetector.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -31,7 +32,7 @@ MachineBasicBlock::~MachineBasicBlock() { LeakDetector::removeGarbageObject(this); } -std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) { +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { MBB.print(OS); return OS; } @@ -43,7 +44,7 @@ std::ostream& llvm::operator<<(std::ostream &OS, const MachineBasicBlock &MBB) { /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it /// gets the next available unique MBB number. If it is removed from a /// MachineFunction, it goes back to being #-1. -void ilist_traits::addNodeToList(MachineBasicBlock* N) { +void ilist_traits::addNodeToList(MachineBasicBlock *N) { MachineFunction &MF = *N->getParent(); N->Number = MF.addToMBBNumbering(N); @@ -55,7 +56,7 @@ void ilist_traits::addNodeToList(MachineBasicBlock* N) { LeakDetector::removeGarbageObject(N); } -void ilist_traits::removeNodeFromList(MachineBasicBlock* N) { +void ilist_traits::removeNodeFromList(MachineBasicBlock *N) { N->getParent()->removeFromMBBNumbering(N->Number); N->Number = -1; LeakDetector::addGarbageObject(N); @@ -65,7 +66,7 @@ void ilist_traits::removeNodeFromList(MachineBasicBlock* N) { /// addNodeToList (MI) - When we add an instruction to a basic block /// list, we update its parent pointer and add its operands from reg use/def /// lists if appropriate. -void ilist_traits::addNodeToList(MachineInstr* N) { +void ilist_traits::addNodeToList(MachineInstr *N) { assert(N->getParent() == 0 && "machine instruction already in a basic block"); N->setParent(Parent); @@ -80,7 +81,7 @@ void ilist_traits::addNodeToList(MachineInstr* N) { /// removeNodeFromList (MI) - When we remove an instruction from a basic block /// list, we update its parent pointer and remove its operands from reg use/def /// lists if appropriate. -void ilist_traits::removeNodeFromList(MachineInstr* N) { +void ilist_traits::removeNodeFromList(MachineInstr *N) { assert(N->getParent() != 0 && "machine instruction not in a basic block"); // Remove from the use/def lists. @@ -94,10 +95,10 @@ void ilist_traits::removeNodeFromList(MachineInstr* N) { /// transferNodesFromList (MI) - When moving a range of instructions from one /// MBB list to another, we need to update the parent pointers and the use/def /// lists. -void ilist_traits::transferNodesFromList( - ilist_traits& fromList, - MachineBasicBlock::iterator first, - MachineBasicBlock::iterator last) { +void ilist_traits:: +transferNodesFromList(ilist_traits &fromList, + MachineBasicBlock::iterator first, + MachineBasicBlock::iterator last) { assert(Parent->getParent() == fromList.Parent->getParent() && "MachineInstr parent mismatch!"); @@ -123,21 +124,41 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { return I; } -bool -MachineBasicBlock::isOnlyReachableByFallthrough() const { - return !isLandingPad() && - !pred_empty() && - next(pred_begin()) == pred_end() && - (*pred_begin())->isLayoutSuccessor(this) && - ((*pred_begin())->empty() || - !(*pred_begin())->back().getDesc().isBarrier()); +/// isOnlyReachableViaFallthough - Return true if this basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool MachineBasicBlock::isOnlyReachableByFallthrough() const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (isLandingPad() || pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + const_pred_iterator PI = pred_begin(), PI2 = PI; + ++PI2; + if (PI2 != pred_end()) + return false; + + // The predecessor has to be immediately before this block. + const MachineBasicBlock *Pred = *PI; + + if (!Pred->isLayoutSuccessor(this)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Otherwise, check the last instruction. + const MachineInstr &LastInst = Pred->back(); + return !LastInst.getDesc().isBarrier(); } void MachineBasicBlock::dump() const { - print(*cerr.stream()); + print(errs()); } -static inline void OutputReg(std::ostream &os, unsigned RegNo, +static inline void OutputReg(raw_ostream &os, unsigned RegNo, const TargetRegisterInfo *TRI = 0) { if (!RegNo || TargetRegisterInfo::isPhysicalRegister(RegNo)) { if (TRI) @@ -148,16 +169,16 @@ static inline void OutputReg(std::ostream &os, unsigned RegNo, os << " %reg" << RegNo; } -void MachineBasicBlock::print(std::ostream &OS) const { +void MachineBasicBlock::print(raw_ostream &OS) const { const MachineFunction *MF = getParent(); - if(!MF) { + if (!MF) { OS << "Can't print out MachineBasicBlock because parent MachineFunction" << " is null\n"; return; } const BasicBlock *LBB = getBasicBlock(); - OS << "\n"; + OS << '\n'; if (LBB) OS << LBB->getName() << ": "; OS << (const void*)this << ", LLVM BB @" << (const void*) LBB << ", ID#" << getNumber(); @@ -170,18 +191,18 @@ void MachineBasicBlock::print(std::ostream &OS) const { OS << "Live Ins:"; for (const_livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I) OutputReg(OS, *I, TRI); - OS << "\n"; + OS << '\n'; } // Print the preds of this block according to the CFG. if (!pred_empty()) { OS << " Predecessors according to CFG:"; for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) - OS << " " << *PI << " (#" << (*PI)->getNumber() << ")"; - OS << "\n"; + OS << ' ' << *PI << " (#" << (*PI)->getNumber() << ')'; + OS << '\n'; } for (const_iterator I = begin(); I != end(); ++I) { - OS << "\t"; + OS << '\t'; I->print(OS, &getParent()->getTarget()); } @@ -189,8 +210,8 @@ void MachineBasicBlock::print(std::ostream &OS) const { if (!succ_empty()) { OS << " Successors according to CFG:"; for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) - OS << " " << *SI << " (#" << (*SI)->getNumber() << ")"; - OS << "\n"; + OS << ' ' << *SI << " (#" << (*SI)->getNumber() << ')'; + OS << '\n'; } } @@ -245,16 +266,15 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { Predecessors.erase(I); } -void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) -{ +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(), - end = fromMBB->succ_end(); iter != end; ++iter) { - addSuccessor(*iter); - } - while(!fromMBB->succ_empty()) + for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(), + E = fromMBB->succ_end(); I != E; ++I) + addSuccessor(*I); + + while (!fromMBB->succ_empty()) fromMBB->removeSuccessor(fromMBB->succ_begin()); } diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 37c86019d4a22..0f796f3952c31 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -51,3 +51,7 @@ MachineDominatorTree::~MachineDominatorTree() { void MachineDominatorTree::releaseMemory() { DT->releaseMemory(); } + +void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { + DT->print(OS); +} diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 599efb8bd276c..b0ec809c69291 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Config/config.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" @@ -32,89 +33,56 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include -#include using namespace llvm; -bool MachineFunctionPass::runOnFunction(Function &F) { - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (F.hasAvailableExternallyLinkage()) - return false; - - return runOnMachineFunction(MachineFunction::get(&F)); -} - namespace { struct VISIBILITY_HIDDEN Printer : public MachineFunctionPass { static char ID; - std::ostream *OS; + raw_ostream &OS; const std::string Banner; - Printer (std::ostream *os, const std::string &banner) + Printer(raw_ostream &os, const std::string &banner) : MachineFunctionPass(&ID), OS(os), Banner(banner) {} const char *getPassName() const { return "MachineFunction Printer"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) { - (*OS) << Banner; - MF.print (*OS); + OS << Banner; + MF.print(OS); return false; } }; char Printer::ID = 0; } -/// Returns a newly-created MachineFunction Printer pass. The default output -/// stream is std::cerr; the default banner is empty. +/// Returns a newly-created MachineFunction Printer pass. The default banner is +/// empty. /// -FunctionPass *llvm::createMachineFunctionPrinterPass(std::ostream *OS, +FunctionPass *llvm::createMachineFunctionPrinterPass(raw_ostream &OS, const std::string &Banner){ return new Printer(OS, Banner); } -namespace { - struct VISIBILITY_HIDDEN Deleter : public MachineFunctionPass { - static char ID; - Deleter() : MachineFunctionPass(&ID) {} - - const char *getPassName() const { return "Machine Code Deleter"; } - - bool runOnMachineFunction(MachineFunction &MF) { - // Delete the annotation from the function now. - MachineFunction::destruct(MF.getFunction()); - return true; - } - }; - char Deleter::ID = 0; -} - -/// MachineCodeDeletion Pass - This pass deletes all of the machine code for -/// the current function, which should happen after the function has been -/// emitted to a .s file or to memory. -FunctionPass *llvm::createMachineCodeDeleter() { - return new Deleter(); -} - - - //===---------------------------------------------------------------------===// // MachineFunction implementation //===---------------------------------------------------------------------===// +// Out of line virtual method. +MachineFunctionInfo::~MachineFunctionInfo() {} + void ilist_traits::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); } -MachineFunction::MachineFunction(const Function *F, +MachineFunction::MachineFunction(Function *F, const TargetMachine &TM) - : Annotation(AnnotationManager::getID("CodeGen::MachineCodeForFunction")), - Fn(F), Target(TM) { + : Fn(F), Target(TM) { if (TM.getRegisterInfo()) RegInfo = new (Allocator.Allocate()) MachineRegisterInfo(*TM.getRegisterInfo()); @@ -131,7 +99,8 @@ MachineFunction::MachineFunction(const Function *F, const TargetData &TD = *TM.getTargetData(); bool IsPic = TM.getRelocationModel() == Reloc::PIC_; unsigned EntrySize = IsPic ? 4 : TD.getPointerSize(); - unsigned TyAlignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty) + unsigned TyAlignment = IsPic ? + TD.getABITypeAlignment(Type::getInt32Ty(F->getContext())) : TD.getPointerABIAlignment(); JumpTableInfo = new (Allocator.Allocate()) MachineJumpTableInfo(EntrySize, TyAlignment); @@ -221,11 +190,6 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { /// void MachineFunction::DeleteMachineInstr(MachineInstr *MI) { - // Clear the instructions memoperands. This must be done manually because - // the instruction's parent pointer is now null, so it can't properly - // deallocate them on its own. - MI->clearMemOperands(*this); - MI->~MachineInstr(); InstructionRecycler.Deallocate(Allocator, MI); } @@ -248,12 +212,99 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { BasicBlockRecycler.Deallocate(Allocator, MBB); } +MachineMemOperand * +MachineFunction::getMachineMemOperand(const Value *v, unsigned f, + int64_t o, uint64_t s, + unsigned base_alignment) { + return new (Allocator.Allocate()) + MachineMemOperand(v, f, o, s, base_alignment); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, + int64_t Offset, uint64_t Size) { + return new (Allocator.Allocate()) + MachineMemOperand(MMO->getValue(), MMO->getFlags(), + int64_t(uint64_t(MMO->getOffset()) + + uint64_t(Offset)), + Size, MMO->getBaseAlignment()); +} + +MachineInstr::mmo_iterator +MachineFunction::allocateMemRefsArray(unsigned long Num) { + return Allocator.Allocate(Num); +} + +std::pair +MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isLoad()) + ++Num; + + // Allocate a new array and populate it with the load information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isLoad()) { + if (!(*I)->isStore()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the store flag. + MachineMemOperand *JustLoad = + getMachineMemOperand((*I)->getValue(), + (*I)->getFlags() & ~MachineMemOperand::MOStore, + (*I)->getOffset(), (*I)->getSize(), + (*I)->getBaseAlignment()); + Result[Index] = JustLoad; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + +std::pair +MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isStore()) + ++Num; + + // Allocate a new array and populate it with the store information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isStore()) { + if (!(*I)->isLoad()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the load flag. + MachineMemOperand *JustStore = + getMachineMemOperand((*I)->getValue(), + (*I)->getFlags() & ~MachineMemOperand::MOLoad, + (*I)->getOffset(), (*I)->getSize(), + (*I)->getBaseAlignment()); + Result[Index] = JustStore; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + void MachineFunction::dump() const { - print(*cerr.stream()); + print(errs()); } -void MachineFunction::print(std::ostream &OS) const { - OS << "# Machine code for " << Fn->getName () << "():\n"; +void MachineFunction::print(raw_ostream &OS) const { + OS << "# Machine code for " << Fn->getName() << "():\n"; // Print Frame Information FrameInfo->print(*this, OS); @@ -262,10 +313,7 @@ void MachineFunction::print(std::ostream &OS) const { JumpTableInfo->print(OS); // Print Constant Pool - { - raw_os_ostream OSS(OS); - ConstantPool->print(OSS); - } + ConstantPool->print(OS); const TargetRegisterInfo *TRI = getTarget().getRegisterInfo(); @@ -279,32 +327,32 @@ void MachineFunction::print(std::ostream &OS) const { OS << " Reg #" << I->first; if (I->second) - OS << " in VR#" << I->second << " "; + OS << " in VR#" << I->second << ' '; } - OS << "\n"; + OS << '\n'; } if (RegInfo && !RegInfo->liveout_empty()) { OS << "Live Outs:"; for (MachineRegisterInfo::liveout_iterator I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I) if (TRI) - OS << " " << TRI->getName(*I); + OS << ' ' << TRI->getName(*I); else OS << " Reg #" << *I; - OS << "\n"; + OS << '\n'; } - for (const_iterator BB = begin(); BB != end(); ++BB) + for (const_iterator BB = begin(), E = end(); BB != E; ++BB) BB->print(OS); - OS << "\n# End machine code for " << Fn->getName () << "().\n\n"; + OS << "\n# End machine code for " << Fn->getName() << "().\n\n"; } namespace llvm { template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getName() + "' function"; + return "CFG for '" + F->getFunction()->getNameStr() + "' function"; } static std::string getNodeLabel(const MachineBasicBlock *Node, @@ -312,17 +360,18 @@ namespace llvm { bool ShortNames) { if (ShortNames && Node->getBasicBlock() && !Node->getBasicBlock()->getName().empty()) - return Node->getBasicBlock()->getName() + ":"; - - std::ostringstream Out; - if (ShortNames) { - Out << Node->getNumber() << ':'; - return Out.str(); + return Node->getBasicBlock()->getNameStr() + ":"; + + std::string OutStr; + { + raw_string_ostream OSS(OutStr); + + if (ShortNames) + OSS << Node->getNumber() << ':'; + else + Node->print(OSS); } - Node->print(Out); - - std::string OutStr = Out.str(); if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); // Process string output to make it nicer... @@ -339,59 +388,23 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName()); + ViewGraph(this, "mf" + getFunction()->getNameStr()); #else - cerr << "SelectionDAG::viewGraph is only available in debug builds on " - << "systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; #endif // NDEBUG } void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getName(), true); + ViewGraph(this, "mf" + getFunction()->getNameStr(), true); #else - cerr << "SelectionDAG::viewGraph is only available in debug builds on " - << "systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; #endif // NDEBUG } -// The next two methods are used to construct and to retrieve -// the MachineCodeForFunction object for the given function. -// construct() -- Allocates and initializes for a given function and target -// get() -- Returns a handle to the object. -// This should not be called before "construct()" -// for a given Function. -// -MachineFunction& -MachineFunction::construct(const Function *Fn, const TargetMachine &Tar) -{ - AnnotationID MF_AID = - AnnotationManager::getID("CodeGen::MachineCodeForFunction"); - assert(Fn->getAnnotation(MF_AID) == 0 && - "Object already exists for this function!"); - MachineFunction* mcInfo = new MachineFunction(Fn, Tar); - Fn->addAnnotation(mcInfo); - return *mcInfo; -} - -void MachineFunction::destruct(const Function *Fn) { - AnnotationID MF_AID = - AnnotationManager::getID("CodeGen::MachineCodeForFunction"); - bool Deleted = Fn->deleteAnnotation(MF_AID); - assert(Deleted && "Machine code did not exist for function!"); - Deleted = Deleted; // silence warning when no assertions. -} - -MachineFunction& MachineFunction::get(const Function *F) -{ - AnnotationID MF_AID = - AnnotationManager::getID("CodeGen::MachineCodeForFunction"); - MachineFunction *mc = (MachineFunction*)F->getAnnotation(MF_AID); - assert(mc && "Call construct() method first to allocate the object"); - return *mc; -} - /// addLiveIn - Add the specified physical register as a live-in value and /// create a corresponding virtual register for it. unsigned MachineFunction::addLiveIn(unsigned PReg, @@ -402,23 +415,6 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, return VReg; } -/// getOrCreateDebugLocID - Look up the DebugLocTuple index with the given -/// source file, line, and column. If none currently exists, create a new -/// DebugLocTuple, and insert it into the DebugIdMap. -unsigned MachineFunction::getOrCreateDebugLocID(GlobalVariable *CompileUnit, - unsigned Line, unsigned Col) { - DebugLocTuple Tuple(CompileUnit, Line, Col); - DenseMap::iterator II - = DebugLocInfo.DebugIdMap.find(Tuple); - if (II != DebugLocInfo.DebugIdMap.end()) - return II->second; - // Add a new tuple. - unsigned Id = DebugLocInfo.DebugLocations.size(); - DebugLocInfo.DebugLocations.push_back(Tuple); - DebugLocInfo.DebugIdMap[Tuple] = Id; - return Id; -} - /// getDebugLocTuple - Get the DebugLocTuple for a given DebugLoc object. DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const { unsigned Idx = DL.getIndex(); @@ -444,7 +440,38 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, } -void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{ +BitVector +MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { + assert(MBB && "MBB must be valid"); + const MachineFunction *MF = MBB->getParent(); + assert(MF && "MBB must be part of a MachineFunction"); + const TargetMachine &TM = MF->getTarget(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + BitVector BV(TRI->getNumRegs()); + + // Before CSI is calculated, no registers are considered pristine. They can be + // freely used and PEI will make sure they are saved. + if (!isCalleeSavedInfoValid()) + return BV; + + for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + BV.set(*CSR); + + // The entry MBB always has all CSRs pristine. + if (MBB == &MF->front()) + return BV; + + // On other MBBs the saved CSRs are not pristine. + const std::vector &CSI = getCalleeSavedInfo(); + for (std::vector::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) + BV.reset(I->getReg()); + + return BV; +} + + +void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ const TargetFrameInfo *FI = MF.getTarget().getFrameInfo(); int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); @@ -481,10 +508,9 @@ void MachineFrameInfo::print(const MachineFunction &MF, std::ostream &OS) const{ } void MachineFrameInfo::dump(const MachineFunction &MF) const { - print(MF, *cerr.stream()); + print(MF, errs()); } - //===----------------------------------------------------------------------===// // MachineJumpTableInfo implementation //===----------------------------------------------------------------------===// @@ -521,7 +547,7 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, return MadeChange; } -void MachineJumpTableInfo::print(std::ostream &OS) const { +void MachineJumpTableInfo::print(raw_ostream &OS) const { // FIXME: this is lame, maybe we could print out the MBB numbers or something // like {1, 2, 4, 5, 3, 0} for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { @@ -530,7 +556,7 @@ void MachineJumpTableInfo::print(std::ostream &OS) const { } } -void MachineJumpTableInfo::dump() const { print(*cerr.stream()); } +void MachineJumpTableInfo::dump() const { print(errs()); } //===----------------------------------------------------------------------===// @@ -539,10 +565,17 @@ void MachineJumpTableInfo::dump() const { print(*cerr.stream()); } const Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) - return Val.MachineCPVal->getType(); + return Val.MachineCPVal->getType(); return Val.ConstVal->getType(); } + +unsigned MachineConstantPoolEntry::getRelocationInfo() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getRelocationInfo(); + return Val.ConstVal->getRelocationInfo(); +} + MachineConstantPool::~MachineConstantPool() { for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (Constants[i].isMachineConstantPoolEntry()) diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp new file mode 100644 index 0000000000000..56294d90398f7 --- /dev/null +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -0,0 +1,50 @@ +//===-- MachineFunctionAnalysis.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionAnalysis members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunction.h" +using namespace llvm; + +// Register this pass with PassInfo directly to avoid having to define +// a default constructor. +static PassInfo +X("Machine Function Analysis", "machine-function-analysis", + intptr_t(&MachineFunctionAnalysis::ID), 0, + /*CFGOnly=*/false, /*is_analysis=*/true); + +char MachineFunctionAnalysis::ID = 0; + +MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm, + CodeGenOpt::Level OL) : + FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) { +} + +MachineFunctionAnalysis::~MachineFunctionAnalysis() { + releaseMemory(); + assert(!MF && "MachineFunctionAnalysis left initialized!"); +} + +bool MachineFunctionAnalysis::runOnFunction(Function &F) { + assert(!MF && "MachineFunctionAnalysis already initialized!"); + MF = new MachineFunction(&F, TM); + return false; +} + +void MachineFunctionAnalysis::releaseMemory() { + delete MF; + MF = 0; +} + +void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp new file mode 100644 index 0000000000000..2f8d4c9e7aa4d --- /dev/null +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -0,0 +1,50 @@ +//===-- MachineFunctionPass.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionPass members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +bool MachineFunctionPass::runOnFunction(Function &F) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + MachineFunction &MF = getAnalysis().getMF(); + return runOnMachineFunction(MF); +} + +void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + + // MachineFunctionPass preserves all LLVM IR passes, but there's no + // high-level way to express this. Instead, just list a bunch of + // passes explicitly. This does not include setPreservesCFG, + // because CodeGen overloads that to mean preserving the MachineBasicBlock + // CFG in addition to the LLVM IR CFG. + AU.addPreserved(); + AU.addPreserved("scalar-evolution"); + AU.addPreserved("iv-users"); + AU.addPreserved("memdep"); + AU.addPreserved("live-values"); + AU.addPreserved("domtree"); + AU.addPreserved("domfrontier"); + AU.addPreserved("loops"); + AU.addPreserved("lda"); + + FunctionPass::getAnalysisUsage(AU); +} diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index d44305f333387..cbe5c7cb51e31 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -15,17 +15,20 @@ #include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/Value.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrDesc.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" using namespace llvm; @@ -156,7 +159,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return false; switch (getType()) { - default: assert(0 && "Unrecognized operand type"); + default: llvm_unreachable("Unrecognized operand type"); case MachineOperand::MO_Register: return getReg() == Other.getReg() && isDef() == Other.isDef() && getSubReg() == Other.getSubReg(); @@ -182,11 +185,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { /// print - Print the specified machine operand. /// -void MachineOperand::print(std::ostream &OS, const TargetMachine *TM) const { - raw_os_ostream RawOS(OS); - print(RawOS, TM); -} - void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { switch (getType()) { case MachineOperand::MO_Register: @@ -242,7 +240,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << getImm(); break; case MachineOperand::MO_FPImmediate: - if (getFPImm()->getType() == Type::FloatTy) + if (getFPImm()->getType()->isFloatTy()) OS << getFPImm()->getValueAPF().convertToFloat(); else OS << getFPImm()->getValueAPF().convertToDouble(); @@ -274,7 +272,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { OS << '>'; break; default: - assert(0 && "Unrecognized operand type"); + llvm_unreachable("Unrecognized operand type"); } if (unsigned TF = getTargetFlags()) @@ -289,7 +287,7 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f, int64_t o, uint64_t s, unsigned int a) : Offset(o), Size(s), V(v), Flags((f & 7) | ((Log2_32(a) + 1) << 3)) { - assert(isPowerOf2_32(a) && "Alignment is not a power of 2!"); + assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); } @@ -302,6 +300,66 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(Flags); } +void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { + // The Value and Offset may differ due to CSE. But the flags and size + // should be the same. + assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); + assert(MMO->getSize() == getSize() && "Size mismatch!"); + + if (MMO->getBaseAlignment() >= getBaseAlignment()) { + // Update the alignment value. + Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3); + // Also update the base and offset, because the new alignment may + // not be applicable with the old ones. + V = MMO->getValue(); + Offset = MMO->getOffset(); + } +} + +/// getAlignment - Return the minimum known alignment in bytes of the +/// actual memory reference. +uint64_t MachineMemOperand::getAlignment() const { + return MinAlign(getBaseAlignment(), getOffset()); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { + assert((MMO.isLoad() || MMO.isStore()) && + "SV has to be a load, store or both."); + + if (MMO.isVolatile()) + OS << "Volatile "; + + if (MMO.isLoad()) + OS << "LD"; + if (MMO.isStore()) + OS << "ST"; + OS << MMO.getSize(); + + // Print the address information. + OS << "["; + if (!MMO.getValue()) + OS << ""; + else + WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false); + + // If the alignment of the memory reference itself differs from the alignment + // of the base pointer, print the base alignment explicitly, next to the base + // pointer. + if (MMO.getBaseAlignment() != MMO.getAlignment()) + OS << "(align=" << MMO.getBaseAlignment() << ")"; + + if (MMO.getOffset() != 0) + OS << "+" << MMO.getOffset(); + OS << "]"; + + // Print the alignment of the reference. + if (MMO.getBaseAlignment() != MMO.getAlignment() || + MMO.getBaseAlignment() != MMO.getSize()) + OS << "(align=" << MMO.getAlignment() << ")"; + + return OS; +} + //===----------------------------------------------------------------------===// // MachineInstr Implementation //===----------------------------------------------------------------------===// @@ -309,7 +367,8 @@ void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { /// MachineInstr ctor - This constructor creates a dummy MachineInstr with /// TID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { + : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); } @@ -328,7 +387,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// TargetInstrDesc or the numOperands if it is not zero. (for /// instructions with variable number of operands). MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), Parent(0), + : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { if (!NoImp && TID->getImplicitDefs()) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) @@ -346,7 +405,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) /// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + Parent(0), debugLoc(dl) { if (!NoImp && TID->getImplicitDefs()) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) NumImplicitOps++; @@ -365,7 +425,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// basic block. /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Parent(0), + : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(DebugLoc::getUnknownLoc()) { assert(MBB && "Cannot use inserting ctor with null basic block!"); if (TID->ImplicitDefs) @@ -385,7 +445,8 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), + Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); if (TID->ImplicitDefs) for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs) @@ -403,8 +464,9 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), Parent(0), - debugLoc(MI.getDebugLoc()) { + : TID(&MI.getDesc()), NumImplicitOps(0), + MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), + Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); // Add operands @@ -412,11 +474,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) addOperand(MI.getOperand(i)); NumImplicitOps = MI.NumImplicitOps; - // Add memory operands. - for (std::list::const_iterator i = MI.memoperands_begin(), - j = MI.memoperands_end(); i != j; ++i) - addMemOperand(MF, *i); - // Set parent to null. Parent = 0; @@ -425,8 +482,6 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) MachineInstr::~MachineInstr() { LeakDetector::removeGarbageObject(this); - assert(MemOperands.empty() && - "MachineInstr being deleted with live memoperands!"); #ifndef NDEBUG for (unsigned i = 0, e = Operands.size(); i != e; ++i) { assert(Operands[i].ParentMI == this && "ParentMI mismatch!"); @@ -587,18 +642,24 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { } } -/// addMemOperand - Add a MachineMemOperand to the machine instruction, -/// referencing arbitrary storage. +/// addMemOperand - Add a MachineMemOperand to the machine instruction. +/// This function should be used only occasionally. The setMemRefs function +/// is the primary method for setting up a MachineInstr's MemRefs list. void MachineInstr::addMemOperand(MachineFunction &MF, - const MachineMemOperand &MO) { - MemOperands.push_back(MO); -} + MachineMemOperand *MO) { + mmo_iterator OldMemRefs = MemRefs; + mmo_iterator OldMemRefsEnd = MemRefsEnd; -/// clearMemOperands - Erase all of this MachineInstr's MachineMemOperands. -void MachineInstr::clearMemOperands(MachineFunction &MF) { - MemOperands.clear(); -} + size_t NewNum = (MemRefsEnd - MemRefs) + 1; + mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); + mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum; + + std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs); + NewMemRefs[NewNum - 1] = MO; + MemRefs = NewMemRefs; + MemRefsEnd = NewMemRefsEnd; +} /// removeFromParent - This method unlinks 'this' from the containing basic /// block, and returns it, but does not delete it. @@ -657,7 +718,7 @@ bool MachineInstr::isDebugLabel() const { } /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of -/// the specific register or -1 if it is not found. It further tightening +/// the specific register or -1 if it is not found. It further tightens /// the search criteria to a use that kills the register if isKill is true. int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const { @@ -731,7 +792,9 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { unsigned DefPart = 0; for (unsigned i = 1, e = getNumOperands(); i < e; ) { const MachineOperand &FMO = getOperand(i); - assert(FMO.isImm()); + // After the normal asm operands there may be additional imp-def regs. + if (!FMO.isImm()) + return false; // Skip over this def. unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm()); unsigned PrevDef = i + 1; @@ -782,16 +845,22 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { const MachineOperand &MO = getOperand(UseOpIdx); if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0) return false; - int FlagIdx = UseOpIdx - 1; - if (FlagIdx < 1) - return false; - while (!getOperand(FlagIdx).isImm()) { - if (--FlagIdx == 0) + + // Find the flag operand corresponding to UseOpIdx + unsigned FlagIdx, NumOps=0; + for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { + const MachineOperand &UFMO = getOperand(FlagIdx); + // After the normal asm operands there may be additional imp-def regs. + if (!UFMO.isImm()) return false; + NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm()); + assert(NumOps < getNumOperands() && "Invalid inline asm flag"); + if (UseOpIdx < FlagIdx+NumOps+1) + break; } - const MachineOperand &UFMO = getOperand(FlagIdx); - if (FlagIdx + InlineAsm::getNumOperandRegisters(UFMO.getImm()) < UseOpIdx) + if (FlagIdx >= UseOpIdx) return false; + const MachineOperand &UFMO = getOperand(FlagIdx); unsigned DefNo; if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) { if (!DefOpIdx) @@ -864,7 +933,8 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) { /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, - bool &SawStore) const { + bool &SawStore, + AliasAnalysis *AA) const { // Ignore stuff that we obviously can't move. if (TID->mayStore() || TID->isCall()) { SawStore = true; @@ -878,9 +948,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, // destination. The check for isInvariantLoad gives the targe the chance to // classify the load as always returning a constant, e.g. a constant pool // load. - if (TID->mayLoad() && !TII->isInvariantLoad(this)) + if (TID->mayLoad() && !isInvariantLoad(AA)) // Otherwise, this is a real load. If there is a store between the load and - // end of block, or if the laod is volatile, we can't move it. + // end of block, or if the load is volatile, we can't move it. return !SawStore && !hasVolatileMemoryRef(); return true; @@ -889,11 +959,11 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, /// isSafeToReMat - Return true if it's safe to rematerialize the specified /// instruction which defined the specified register instead of copying it. bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, - unsigned DstReg) const { + unsigned DstReg, + AliasAnalysis *AA) const { bool SawStore = false; - if (!getDesc().isRematerializable() || - !TII->isTriviallyReMaterializable(this) || - !isSafeToMove(TII, SawStore)) + if (!TII->isTriviallyReMaterializable(this, AA) || + !isSafeToMove(TII, SawStore, AA)) return false; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); @@ -930,21 +1000,55 @@ bool MachineInstr::hasVolatileMemoryRef() const { return true; // Check the memory reference information for volatile references. - for (std::list::const_iterator I = memoperands_begin(), - E = memoperands_end(); I != E; ++I) - if (I->isVolatile()) + for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) + if ((*I)->isVolatile()) return true; return false; } -void MachineInstr::dump() const { - cerr << " " << *this; +/// isInvariantLoad - Return true if this instruction is loading from a +/// location whose value is invariant across the function. For example, +/// loading a value from the constant pool or from from the argument area +/// of a function if it does not change. This should only return true of +/// *all* loads the instruction does are invariant (if it does multiple loads). +bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { + // If the instruction doesn't load at all, it isn't an invariant load. + if (!TID->mayLoad()) + return false; + + // If the instruction has lost its memoperands, conservatively assume that + // it may not be an invariant load. + if (memoperands_empty()) + return false; + + const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); + + for (mmo_iterator I = memoperands_begin(), + E = memoperands_end(); I != E; ++I) { + if ((*I)->isVolatile()) return false; + if ((*I)->isStore()) return false; + + if (const Value *V = (*I)->getValue()) { + // A load from a constant PseudoSourceValue is invariant. + if (const PseudoSourceValue *PSV = dyn_cast(V)) + if (PSV->isConstant(MFI)) + continue; + // If we have an AliasAnalysis, ask it whether the memory is constant. + if (AA && AA->pointsToConstantMemory(V)) + continue; + } + + // Otherwise assume conservatively. + return false; + } + + // Everything checks out. + return true; } -void MachineInstr::print(std::ostream &OS, const TargetMachine *TM) const { - raw_os_ostream RawOS(OS); - print(RawOS, TM); +void MachineInstr::dump() const { + errs() << " " << *this; } void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { @@ -967,46 +1071,23 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (!memoperands_empty()) { OS << ", Mem:"; - for (std::list::const_iterator i = memoperands_begin(), - e = memoperands_end(); i != e; ++i) { - const MachineMemOperand &MRO = *i; - const Value *V = MRO.getValue(); - - assert((MRO.isLoad() || MRO.isStore()) && - "SV has to be a load, store or both."); - - if (MRO.isVolatile()) - OS << "Volatile "; - - if (MRO.isLoad()) - OS << "LD"; - if (MRO.isStore()) - OS << "ST"; - - OS << "(" << MRO.getSize() << "," << MRO.getAlignment() << ") ["; - - if (!V) - OS << ""; - else if (!V->getName().empty()) - OS << V->getName(); - else if (const PseudoSourceValue *PSV = dyn_cast(V)) { - PSV->print(OS); - } else - OS << V; - - OS << " + " << MRO.getOffset() << "]"; + for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); + i != e; ++i) { + OS << **i; + if (next(i) != e) + OS << " "; } } if (!debugLoc.isUnknown()) { const MachineFunction *MF = getParent()->getParent(); DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); - DICompileUnit CU(DLT.CompileUnit); - std::string Dir, Fn; - OS << " [dbg: " - << CU.getDirectory(Dir) << '/' << CU.getFilename(Fn) << "," - << DLT.Line << "," - << DLT.Col << "]"; + DICompileUnit CU(DLT.Scope); + if (!CU.isNull()) + OS << " [dbg: " + << CU.getDirectory() << '/' << CU.getFilename() << "," + << DLT.Line << "," + << DLT.Col << "]"; } OS << "\n"; @@ -1021,7 +1102,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, SmallVector DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); - if (!MO.isReg() || !MO.isUse()) + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -1032,6 +1113,9 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, if (MO.isKill()) // The register is already marked kill. return true; + if (isPhysReg && isRegTiedToDefOperand(i)) + // Two-address uses of physregs must not be marked kill. + return true; MO.setIsKill(); Found = true; } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index aaa4de4b2c156..f92ddb2b908ad 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -28,11 +28,12 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -43,8 +44,11 @@ namespace { class VISIBILITY_HIDDEN MachineLICM : public MachineFunctionPass { const TargetMachine *TM; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + BitVector AllocatableSet; // Various analyses that we use... + AliasAnalysis *AA; // Alias analysis info. MachineLoopInfo *LI; // Current MachineLoopInfo MachineDominatorTree *DT; // Machine dominator tree for the cur loop MachineRegisterInfo *RegInfo; // Machine register information @@ -70,6 +74,7 @@ namespace { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); @@ -126,20 +131,19 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { /// loop. /// bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { - const Function *F = MF.getFunction(); - if (F->hasFnAttr(Attribute::OptimizeForSize)) - return false; - - DOUT << "******** Machine LICM ********\n"; + DEBUG(errs() << "******** Machine LICM ********\n"); Changed = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); + TRI = TM->getRegisterInfo(); RegInfo = &MF.getRegInfo(); + AllocatableSet = TRI->getAllocatableSet(MF); // Get our Loop information... LI = &getAnalysis(); DT = &getAnalysis(); + AA = &getAnalysis(); for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { @@ -210,7 +214,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // Okay, this instruction does a load. As a refinement, we allow the target // to decide whether the loaded value is actually a constant. If so, we can // actually use it as a load. - if (!TII->isInvariantLoad(&I)) + if (!I.isInvariantLoad(AA)) // FIXME: we should be able to sink loads with no other side effects if // there is nothing that can change memory from here until the end of // block. This is a trivial form of alias analysis. @@ -218,28 +222,28 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } DEBUG({ - DOUT << "--- Checking if we can hoist " << I; + errs() << "--- Checking if we can hoist " << I; if (I.getDesc().getImplicitUses()) { - DOUT << " * Instruction has implicit uses:\n"; + errs() << " * Instruction has implicit uses:\n"; const TargetRegisterInfo *TRI = TM->getRegisterInfo(); for (const unsigned *ImpUses = I.getDesc().getImplicitUses(); *ImpUses; ++ImpUses) - DOUT << " -> " << TRI->getName(*ImpUses) << "\n"; + errs() << " -> " << TRI->getName(*ImpUses) << "\n"; } if (I.getDesc().getImplicitDefs()) { - DOUT << " * Instruction has implicit defines:\n"; + errs() << " * Instruction has implicit defines:\n"; const TargetRegisterInfo *TRI = TM->getRegisterInfo(); for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs(); *ImpDefs; ++ImpDefs) - DOUT << " -> " << TRI->getName(*ImpDefs) << "\n"; + errs() << " -> " << TRI->getName(*ImpDefs) << "\n"; } }); if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) { - DOUT << "Cannot hoist with implicit defines or uses\n"; + DEBUG(errs() << "Cannot hoist with implicit defines or uses\n"); return false; } @@ -254,8 +258,30 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { if (Reg == 0) continue; // Don't hoist an instruction that uses or defines a physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!RegInfo->def_empty(Reg)) + return false; + if (AllocatableSet.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!RegInfo->def_empty(AliasReg)) + return false; + if (AllocatableSet.test(AliasReg)) + return false; + } + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return false; + } + } if (!MO.isUse()) continue; @@ -291,13 +317,10 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.getOpcode() == TargetInstrInfo::IMPLICIT_DEF) return false; - const TargetInstrDesc &TID = MI.getDesc(); - // FIXME: For now, only hoist re-materilizable instructions. LICM will // increase register pressure. We want to make sure it doesn't increase // spilling. - if (!TID.mayLoad() && (!TID.isRematerializable() || - !TII->isTriviallyReMaterializable(&MI))) + if (!TII->isTriviallyReMaterializable(&MI, AA)) return false; // If result(s) of this instruction is used by PHIs, then don't hoist it. @@ -355,14 +378,14 @@ void MachineLICM::Hoist(MachineInstr &MI) { // Now move the instructions to the predecessor, inserting it before any // terminator instructions. DEBUG({ - DOUT << "Hoisting " << MI; + errs() << "Hoisting " << MI; if (CurPreheader->getBasicBlock()) - DOUT << " to MachineBasicBlock " - << CurPreheader->getBasicBlock()->getName(); + errs() << " to MachineBasicBlock " + << CurPreheader->getBasicBlock()->getName(); if (MI.getParent()->getBasicBlock()) - DOUT << " from MachineBasicBlock " - << MI.getParent()->getBasicBlock()->getName(); - DOUT << "\n"; + errs() << " from MachineBasicBlock " + << MI.getParent()->getBasicBlock()->getName(); + errs() << "\n"; }); // Look for opportunity to CSE the hoisted instruction. @@ -374,8 +397,7 @@ void MachineLICM::Hoist(MachineInstr &MI) { if (CI != CSEMap.end()) { const MachineInstr *Dup = LookForDuplicate(&MI, CI->second, RegInfo); if (Dup) { - DOUT << "CSEing " << MI; - DOUT << " with " << *Dup; + DEBUG(errs() << "CSEing " << MI << " with " << *Dup); for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef()) diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index ff56f4de5906b..2da8e3760e9ae 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -19,8 +19,12 @@ #include "llvm/CodeGen/Passes.h" using namespace llvm; -TEMPLATE_INSTANTIATION(class LoopBase); -TEMPLATE_INSTANTIATION(class LoopInfoBase); +#define MLB class LoopBase +TEMPLATE_INSTANTIATION(MLB); +#undef MLB +#define MLIB class LoopInfoBase +TEMPLATE_INSTANTIATION(MLIB); +#undef MLIB char MachineLoopInfo::ID = 0; static RegisterPass @@ -37,4 +41,5 @@ bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 1d8109eb8d995..b62803f105e4c 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -23,7 +23,7 @@ #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Support/Dwarf.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; using namespace llvm::dwarf; @@ -32,23 +32,23 @@ static RegisterPass X("machinemoduleinfo", "Module Information"); char MachineModuleInfo::ID = 0; +// Out of line virtual method. +MachineModuleInfoImpl::~MachineModuleInfoImpl() {} + //===----------------------------------------------------------------------===// - + MachineModuleInfo::MachineModuleInfo() : ImmutablePass(&ID) -, LabelIDList() -, FrameMoves() -, LandingPads() -, Personalities() +, ObjFileMMI(0) , CallsEHReturn(0) , CallsUnwindInit(0) -, DbgInfoAvailable(false) -{ - // Always emit "no personality" info +, DbgInfoAvailable(false) { + // Always emit some info, by default "no personality" info. Personalities.push_back(NULL); } -MachineModuleInfo::~MachineModuleInfo() { +MachineModuleInfo::~MachineModuleInfo() { + delete ObjFileMMI; } /// doInitialization - Initialize the state for a new module. @@ -63,18 +63,12 @@ bool MachineModuleInfo::doFinalization() { return false; } -/// BeginFunction - Begin gathering function meta information. -/// -void MachineModuleInfo::BeginFunction(MachineFunction *MF) { - // Coming soon. -} - /// EndFunction - Discard function meta information. /// void MachineModuleInfo::EndFunction() { // Clean up frame info. FrameMoves.clear(); - + // Clean up exception info. LandingPads.clear(); TypeInfos.clear(); @@ -82,12 +76,16 @@ void MachineModuleInfo::EndFunction() { FilterEnds.clear(); CallsEHReturn = 0; CallsUnwindInit = 0; +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + VariableDbgInfo.clear(); +#endif } /// AnalyzeModule - Scan the module for global debug information. /// void MachineModuleInfo::AnalyzeModule(Module &M) { - // Insert functions in the llvm.used array into UsedFunctions. + // Insert functions in the llvm.used array (but not llvm.compiler.used) into + // UsedFunctions. GlobalVariable *GV = M.getGlobalVariable("llvm.used"); if (!GV || !GV->hasInitializer()) return; @@ -95,12 +93,10 @@ void MachineModuleInfo::AnalyzeModule(Module &M) { ConstantArray *InitList = dyn_cast(GV->getInitializer()); if (InitList == 0) return; - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - if (ConstantExpr *CE = dyn_cast(InitList->getOperand(i))) - if (CE->getOpcode() == Instruction::BitCast) - if (Function *F = dyn_cast(CE->getOperand(0))) - UsedFunctions.insert(F); - } + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (Function *F = + dyn_cast(InitList->getOperand(i)->stripPointerCasts())) + UsedFunctions.insert(F); } //===-EH-------------------------------------------------------------------===// @@ -115,7 +111,7 @@ LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo if (LP.LandingPadBlock == LandingPad) return LP; } - + LandingPads.push_back(LandingPadInfo(LandingPad)); return LandingPads[N]; } @@ -134,7 +130,7 @@ void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { unsigned LandingPadLabel = NextLabelID(); LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); - LP.LandingPadLabel = LandingPadLabel; + LP.LandingPadLabel = LandingPadLabel; return LandingPadLabel; } @@ -148,8 +144,13 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, for (unsigned i = 0; i < Personalities.size(); ++i) if (Personalities[i] == Personality) return; - - Personalities.push_back(Personality); + + // If this is the first personality we're adding go + // ahead and add it at the beginning. + if (Personalities[0] == NULL) + Personalities[0] = Personality; + else + Personalities.push_back(Personality); } /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. @@ -224,7 +225,7 @@ void MachineModuleInfo::TidyLandingPads() { } } -/// getTypeIDFor - Return the type id for the specified typeinfo. This is +/// getTypeIDFor - Return the type id for the specified typeinfo. This is /// function wide. unsigned MachineModuleInfo::getTypeIDFor(GlobalVariable *TI) { for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) @@ -273,24 +274,24 @@ Function *MachineModuleInfo::getPersonality() const { } /// getPersonalityIndex - Return unique index for current personality -/// function. NULL personality function should always get zero index. +/// function. NULL/first personality function should always get zero index. unsigned MachineModuleInfo::getPersonalityIndex() const { const Function* Personality = NULL; - + // Scan landing pads. If there is at least one non-NULL personality - use it. for (unsigned i = 0; i != LandingPads.size(); ++i) if (LandingPads[i].Personality) { Personality = LandingPads[i].Personality; break; } - + for (unsigned i = 0; i < Personalities.size(); ++i) { if (Personalities[i] == Personality) return i; } - // This should never happen - assert(0 && "Personality function should be set!"); + // This will happen if the current personality function is + // in the zero index. return 0; } @@ -306,6 +307,7 @@ struct DebugLabelFolder : public MachineFunctionPass { DebugLabelFolder() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -321,12 +323,12 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { // Get machine module info. MachineModuleInfo *MMI = getAnalysisIfAvailable(); if (!MMI) return false; - + // Track if change is made. bool MadeChange = false; // No prior label to begin. unsigned PriorLabel = 0; - + // Iterate through basic blocks. for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) { @@ -336,7 +338,7 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){ // The label ID # is always operand #0, an immediate. unsigned NextLabel = I->getOperand(0).getImm(); - + // If there was an immediate prior label. if (PriorLabel) { // Remap the current label to prior label. @@ -354,15 +356,14 @@ bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { // No consecutive labels. PriorLabel = 0; } - + ++I; } } - + return MadeChange; } FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); } } - diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp new file mode 100644 index 0000000000000..7a6292910f4b5 --- /dev/null +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -0,0 +1,45 @@ +//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements object-file format specific implementations of +// MachineModuleInfoImpl. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineModuleInfoMachO +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +void MachineModuleInfoMachO::Anchor() {} + + +static int SortSymbolPair(const void *LHS, const void *RHS) { + const MCSymbol *LHSS = + ((const std::pair*)LHS)->first; + const MCSymbol *RHSS = + ((const std::pair*)RHS)->first; + return LHSS->getName().compare(RHSS->getName()); +} + +/// GetSortedStubs - Return the entries from a DenseMap in a deterministic +/// sorted orer. +MachineModuleInfoMachO::SymbolListTy +MachineModuleInfoMachO::GetSortedStubs(const DenseMap &Map) { + MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end()); + if (!List.empty()) + qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); + return List; +} + diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 544d83a33f7f9..b31973e04fd9f 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -110,11 +110,9 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() && "Invalid vreg!"); - for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) { - // Since we are in SSA form, we can stop at the first definition. - if (I.getOperand().isDef()) - return &*I; - } + // Since we are in SSA form, we can use the first definition. + if (!def_empty(Reg)) + return &*def_begin(Reg); return 0; } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 0e18fa742f5b3..0f3b33f54d461 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -7,7 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This pass +// This pass moves instructions into successor blocks, when possible, so that +// they aren't executed on paths where their results aren't needed. +// +// This pass is not intended to be a replacement or a complete alternative +// for an LLVM-IR-level sinking pass. It is only designed to sink simple +// constructs that are not exposed before lowering and instruction selection. // //===----------------------------------------------------------------------===// @@ -15,12 +20,14 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumSunk, "Number of machine instructions sunk"); @@ -29,9 +36,12 @@ namespace { class VISIBILITY_HIDDEN MachineSinking : public MachineFunctionPass { const TargetMachine *TM; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; MachineFunction *CurMF; // Current MachineFunction MachineRegisterInfo *RegInfo; // Machine register information - MachineDominatorTree *DT; // Machine dominator tree for the current Loop + MachineDominatorTree *DT; // Machine dominator tree + AliasAnalysis *AA; + BitVector AllocatableSet; // Which physregs are allocatable? public: static char ID; // Pass identification @@ -40,7 +50,9 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -63,10 +75,8 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); - for (MachineRegisterInfo::reg_iterator I = RegInfo->reg_begin(Reg), - E = RegInfo->reg_end(); I != E; ++I) { - if (I.getOperand().isDef()) continue; // ignore def. - + for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg), + E = RegInfo->use_end(); I != E; ++I) { // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); @@ -85,13 +95,16 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { - DOUT << "******** Machine Sinking ********\n"; + DEBUG(errs() << "******** Machine Sinking ********\n"); CurMF = &MF; TM = &CurMF->getTarget(); TII = TM->getInstrInfo(); + TRI = TM->getRegisterInfo(); RegInfo = &CurMF->getRegInfo(); DT = &getAnalysis(); + AA = &getAnalysis(); + AllocatableSet = TRI->getAllocatableSet(*CurMF); bool EverMadeChange = false; @@ -142,7 +155,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Check if it's safe to move the instruction. - if (!MI->isSafeToMove(TII, SawStore)) + if (!MI->isSafeToMove(TII, SawStore, AA)) return false; // FIXME: This should include support for sinking instructions within the @@ -151,7 +164,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // also sink them down before their first use in the block. This xform has to // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y - // and z only the shrink the live range of x. + // and z and only shrink the live range of x. // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. @@ -169,10 +182,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (Reg == 0) continue; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // If this is a physical register use, we can't move it. If it is a def, - // we can move it, but only if the def is dead. - if (MO.isUse() || !MO.isDead()) + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!RegInfo->def_empty(Reg)) + return false; + if (AllocatableSet.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!RegInfo->def_empty(AliasReg)) + return false; + if (AllocatableSet.test(AliasReg)) + return false; + } + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. return false; + } } else { // Virtual register uses are always safe to sink. if (MO.isUse()) continue; @@ -232,15 +261,15 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { if (MI->getParent() == SuccToSinkTo) return false; - DEBUG(cerr << "Sink instr " << *MI); - DEBUG(cerr << "to block " << *SuccToSinkTo); + DEBUG(errs() << "Sink instr " << *MI); + DEBUG(errs() << "to block " << *SuccToSinkTo); // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. // FIXME: Split critical edges if not backedges. if (SuccToSinkTo->pred_size() > 1) { - DEBUG(cerr << " *** PUNTING: Critical edge found\n"); + DEBUG(errs() << " *** PUNTING: Critical edge found\n"); return false; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index be1396c7a8101..18a3ead3bc180 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,21 +23,23 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Function.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include - +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -53,6 +55,7 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF); @@ -61,7 +64,7 @@ namespace { const bool allowPhysDoubleDefs; const char *const OutFileName; - std::ostream *OS; + raw_ostream *OS; const MachineFunction *MF; const TargetMachine *TM; const TargetRegisterInfo *TRI; @@ -75,7 +78,8 @@ namespace { BitVector regsReserved; RegSet regsLive; - RegVector regsDefined, regsImpDefined, regsDead, regsKilled; + RegVector regsDefined, regsDead, regsKilled; + RegSet regsLiveInButUnused; // Add Reg and any sub-registers to RV void addRegWithSubRegs(RegVector &RV, unsigned Reg) { @@ -85,14 +89,6 @@ namespace { RV.push_back(*R); } - // Does RS contain any super-registers of Reg? - bool anySuperRegisters(const RegSet &RS, unsigned Reg) { - for (const unsigned *R = TRI->getSuperRegisters(Reg); *R; R++) - if (RS.count(*R)) - return true; - return false; - } - struct BBInfo { // Is this MBB reachable from the MF entry point? bool reachable; @@ -148,7 +144,7 @@ namespace { DenseMap MBBInfoMap; bool isReserved(unsigned Reg) { - return Reg < regsReserved.size() && regsReserved[Reg]; + return Reg < regsReserved.size() && regsReserved.test(Reg); } void visitMachineFunctionBefore(); @@ -176,21 +172,24 @@ static RegisterPass MachineVer("machineverifier", "Verify generated machine code"); static const PassInfo *const MachineVerifyID = &MachineVer; -FunctionPass * -llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) -{ +FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) { return new MachineVerifier(allowPhysDoubleDefs); } -bool -MachineVerifier::runOnMachineFunction(MachineFunction &MF) -{ - std::ofstream OutFile; +bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { + raw_ostream *OutFile = 0; if (OutFileName) { - OutFile.open(OutFileName, std::ios::out | std::ios::app); - OS = &OutFile; + std::string ErrorInfo; + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, + raw_fd_ostream::F_Append); + if (!ErrorInfo.empty()) { + errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; + exit(1); + } + + OS = OutFile; } else { - OS = cerr.stream(); + OS = &errs(); } foundErrors = 0; @@ -215,51 +214,48 @@ MachineVerifier::runOnMachineFunction(MachineFunction &MF) } visitMachineFunctionAfter(); - if (OutFileName) - OutFile.close(); + if (OutFile) + delete OutFile; + else if (foundErrors) + llvm_report_error("Found "+Twine(foundErrors)+" machine code errors."); - if (foundErrors) { - cerr << "\nStopping with " << foundErrors << " machine code errors.\n"; - exit(1); - } + // Clean up. + regsLive.clear(); + regsDefined.clear(); + regsDead.clear(); + regsKilled.clear(); + regsLiveInButUnused.clear(); + MBBInfoMap.clear(); return false; // no changes } -void -MachineVerifier::report(const char *msg, const MachineFunction *MF) -{ +void MachineVerifier::report(const char *msg, const MachineFunction *MF) { assert(MF); - *OS << "\n"; + *OS << '\n'; if (!foundErrors++) - MF->print(OS); + MF->print(*OS); *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getName() << "\n"; + << "- function: " << MF->getFunction()->getNameStr() << "\n"; } -void -MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) -{ +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: " << MBB->getBasicBlock()->getName() + *OS << "- basic block: " << MBB->getBasicBlock()->getNameStr() << " " << (void*)MBB << " (#" << MBB->getNumber() << ")\n"; } -void -MachineVerifier::report(const char *msg, const MachineInstr *MI) -{ +void MachineVerifier::report(const char *msg, const MachineInstr *MI) { assert(MI); report(msg, MI->getParent()); *OS << "- instruction: "; - MI->print(OS, TM); + MI->print(*OS, TM); } -void -MachineVerifier::report(const char *msg, - const MachineOperand *MO, unsigned MONum) -{ +void MachineVerifier::report(const char *msg, + const MachineOperand *MO, unsigned MONum) { assert(MO); report(msg, MO->getParent()); *OS << "- operand " << MONum << ": "; @@ -267,9 +263,7 @@ MachineVerifier::report(const char *msg, *OS << "\n"; } -void -MachineVerifier::markReachable(const MachineBasicBlock *MBB) -{ +void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { MInfo.reachable = true; @@ -279,16 +273,158 @@ MachineVerifier::markReachable(const MachineBasicBlock *MBB) } } -void -MachineVerifier::visitMachineFunctionBefore() -{ +void MachineVerifier::visitMachineFunctionBefore() { regsReserved = TRI->getReservedRegs(*MF); + + // A sub-register of a reserved register is also reserved + for (int Reg = regsReserved.find_first(); Reg>=0; + Reg = regsReserved.find_next(Reg)) { + for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) { + // FIXME: This should probably be: + // assert(regsReserved.test(*Sub) && "Non-reserved sub-register"); + regsReserved.set(*Sub); + } + } markReachable(&MF->front()); } -void -MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) -{ +void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + // Start with minimal CFG sanity checks. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI != MF->end()) { + // Block is not last in function. + if (!MBB->isSuccessor(MBBI)) { + // Block does not fall through. + if (MBB->empty()) { + report("MBB doesn't fall through but is empty!", MBB); + } + } + if (TII->BlockHasNoFallThrough(*MBB)) { + if (MBB->empty()) { + report("TargetInstrInfo says the block has no fall through, but the " + "block is empty!", MBB); + } else if (!MBB->back().getDesc().isBarrier()) { + report("TargetInstrInfo says the block has no fall through, but the " + "block does not end in a barrier!", MBB); + } + } + } else { + // Block is last in function. + if (MBB->empty()) { + report("MBB is last in function but is empty!", MBB); + } + } + + // Call AnalyzeBranch. If it succeeds, there several more conditions to check. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (!TII->AnalyzeBranch(*const_cast(MBB), + TBB, FBB, Cond)) { + // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's + // check whether its answers match up with reality. + if (!TBB && !FBB) { + // Block falls through to its successor. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI == MF->end()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actually fall + // out the bottom of the function. + } else if (MBB->succ_empty()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actuall fall + // out of the block. + } else if (MBB->succ_size() != 1) { + report("MBB exits via unconditional fall-through but doesn't have " + "exactly one CFG successor!", MBB); + } else if (MBB->succ_begin()[0] != MBBI) { + report("MBB exits via unconditional fall-through but its successor " + "differs from its CFG successor!", MBB); + } + if (!MBB->empty() && MBB->back().getDesc().isBarrier()) { + report("MBB exits via unconditional fall-through but ends with a " + "barrier instruction!", MBB); + } + if (!Cond.empty()) { + report("MBB exits via unconditional fall-through but has a condition!", + MBB); + } + } else if (TBB && !FBB && Cond.empty()) { + // Block unconditionally branches somewhere. + if (MBB->succ_size() != 1) { + report("MBB exits via unconditional branch but doesn't have " + "exactly one CFG successor!", MBB); + } else if (MBB->succ_begin()[0] != TBB) { + report("MBB exits via unconditional branch but the CFG " + "successor doesn't match the actual successor!", MBB); + } + if (MBB->empty()) { + report("MBB exits via unconditional branch but doesn't contain " + "any instructions!", MBB); + } else if (!MBB->back().getDesc().isBarrier()) { + report("MBB exits via unconditional branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via unconditional branch but the branch isn't a " + "terminator instruction!", MBB); + } + } else if (TBB && !FBB && !Cond.empty()) { + // Block conditionally branches somewhere, otherwise falls through. + MachineFunction::const_iterator MBBI = MBB; + ++MBBI; + if (MBBI == MF->end()) { + report("MBB conditionally falls through out of function!", MBB); + } if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/fall-through but doesn't have " + "exactly two CFG successors!", MBB); + } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) || + (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) { + report("MBB exits via conditional branch/fall-through but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/fall-through but doesn't " + "contain any instructions!", MBB); + } else if (MBB->back().getDesc().isBarrier()) { + report("MBB exits via conditional branch/fall-through but ends with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via conditional branch/fall-through but the branch " + "isn't a terminator instruction!", MBB); + } + } else if (TBB && FBB) { + // Block conditionally branches somewhere, otherwise branches + // somewhere else. + if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/branch but doesn't have " + "exactly two CFG successors!", MBB); + } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) || + (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) { + report("MBB exits via conditional branch/branch but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/branch but doesn't " + "contain any instructions!", MBB); + } else if (!MBB->back().getDesc().isBarrier()) { + report("MBB exits via conditional branch/branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!MBB->back().getDesc().isTerminator()) { + report("MBB exits via conditional branch/branch but the branch " + "isn't a terminator instruction!", MBB); + } + if (Cond.empty()) { + report("MBB exits via conditinal branch/branch but there's no " + "condition!", MBB); + } + } else { + report("AnalyzeBranch returned invalid data!", MBB); + } + } + regsLive.clear(); for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), E = MBB->livein_end(); I != E; ++I) { @@ -300,32 +436,41 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++) regsLive.insert(*R); } + regsLiveInButUnused = regsLive; + + const MachineFrameInfo *MFI = MF->getFrameInfo(); + assert(MFI && "Function has no frame info"); + BitVector PR = MFI->getPristineRegs(MBB); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { + regsLive.insert(I); + for (const unsigned *R = TRI->getSubRegisters(I); *R; R++) + regsLive.insert(*R); + } + regsKilled.clear(); regsDefined.clear(); - regsImpDefined.clear(); } -void -MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) -{ +void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { const TargetInstrDesc &TI = MI->getDesc(); - if (MI->getNumExplicitOperands() < TI.getNumOperands()) { + if (MI->getNumOperands() < TI.getNumOperands()) { report("Too few operands", MI); *OS << TI.getNumOperands() << " operands expected, but " << MI->getNumExplicitOperands() << " given.\n"; } - if (!TI.isVariadic()) { - if (MI->getNumExplicitOperands() > TI.getNumOperands()) { - report("Too many operands", MI); - *OS << TI.getNumOperands() << " operands expected, but " - << MI->getNumExplicitOperands() << " given.\n"; - } + + // Check the MachineMemOperands for basic consistency. + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + if ((*I)->isLoad() && !TI.mayLoad()) + report("Missing mayLoad flag", MI); + if ((*I)->isStore() && !TI.mayStore()) + report("Missing mayStore flag", MI); } } void -MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) -{ +MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const TargetInstrDesc &TI = MI->getDesc(); @@ -337,6 +482,16 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) report("Explicit definition marked as use", MO, MONum); else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); + } else if (MONum < TI.getNumOperands()) { + if (MO->isReg()) { + if (MO->isDef()) + report("Explicit operand marked as def", MO, MONum); + if (MO->isImplicit()) + report("Explicit operand marked as implicit", MO, MONum); + } + } else { + if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic()) + report("Extra explicit operand on non-variadic instruction", MO, MONum); } switch (MO->getType()) { @@ -346,18 +501,26 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) return; // Check Live Variables. - if (MO->isUse()) { + if (MO->isUndef()) { + // An doesn't refer to any register, so just skip it. + } else if (MO->isUse()) { + regsLiveInButUnused.erase(Reg); + if (MO->isKill()) { addRegWithSubRegs(regsKilled, Reg); + // Tied operands on two-address instuctions MUST NOT have a flag. + if (MI->isRegTiedToDefOperand(MONum)) + report("Illegal kill flag on two-address instruction operand", + MO, MONum); } else { - // TwoAddress instr modyfying a reg is treated as kill+def. + // TwoAddress instr modifying a reg is treated as kill+def. unsigned defIdx; if (MI->isRegTiedToDefOperand(MONum, &defIdx) && MI->getOperand(defIdx).getReg() == Reg) addRegWithSubRegs(regsKilled, Reg); } - // Explicit use of a dead register. - if (!MO->isImplicit() && !regsLive.count(Reg)) { + // Use of a dead register. + if (!regsLive.count(Reg)) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // Reserved registers may be used even when 'dead'. if (!isReserved(Reg)) @@ -374,15 +537,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) } } } else { + assert(MO->isDef()); // Register defined. // TODO: verify that earlyclobber ops are not used. - if (MO->isImplicit()) - addRegWithSubRegs(regsImpDefined, Reg); - else - addRegWithSubRegs(regsDefined, Reg); - if (MO->isDead()) addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); } // Check register classes. @@ -401,8 +562,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) } sr = s; } - if (TOI.RegClass) { - const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass); + if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { if (!DRC->contains(sr)) { report("Illegal physical register for instruction", MO, MONum); *OS << TRI->getName(sr) << " is not a " @@ -419,8 +579,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) } RC = *(RC->subregclasses_begin()+SubIdx); } - if (TOI.RegClass) { - const TargetRegisterClass *DRC = TRI->getRegClass(TOI.RegClass); + if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) { if (RC != DRC && !RC->hasSuperClass(DRC)) { report("Illegal virtual register for instruction", MO, MONum); *OS << "Expected a " << DRC->getName() << " register, but got a " @@ -431,34 +590,35 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) } break; } - // Can PHI instrs refer to MBBs not in the CFG? X86 and ARM do. - // case MachineOperand::MO_MachineBasicBlock: - // if (MI->getOpcode() == TargetInstrInfo::PHI) { - // if (!MO->getMBB()->isSuccessor(MI->getParent())) - // report("PHI operand is not in the CFG", MO, MONum); - // } - // break; + + case MachineOperand::MO_MachineBasicBlock: + if (MI->getOpcode() == TargetInstrInfo::PHI) { + if (!MO->getMBB()->isSuccessor(MI->getParent())) + report("PHI operand is not in the CFG", MO, MONum); + } + break; + default: break; } } -void -MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) -{ +void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); set_subtract(regsLive, regsKilled); regsKilled.clear(); - for (RegVector::const_iterator I = regsDefined.begin(), - E = regsDefined.end(); I != E; ++I) { + // Verify that both and operands refer to dead registers. + RegVector defs(regsDefined); + defs.append(regsDead.begin(), regsDead.end()); + + for (RegVector::const_iterator I = defs.begin(), E = defs.end(); + I != E; ++I) { if (regsLive.count(*I)) { if (TargetRegisterInfo::isPhysicalRegister(*I)) { - // We allow double defines to physical registers with live - // super-registers. if (!allowPhysDoubleDefs && !isReserved(*I) && - !anySuperRegisters(regsLive, *I)) { + !regsLiveInButUnused.count(*I)) { report("Redefining a live physical register", MI); *OS << "Register " << TRI->getName(*I) << " was defined but already live.\n"; @@ -478,14 +638,12 @@ MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) } } - set_union(regsLive, regsDefined); regsDefined.clear(); - set_union(regsLive, regsImpDefined); regsImpDefined.clear(); set_subtract(regsLive, regsDead); regsDead.clear(); + set_union(regsLive, regsDefined); regsDefined.clear(); } void -MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) -{ +MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { MBBInfoMap[MBB].regsLiveOut = regsLive; regsLive.clear(); } @@ -493,9 +651,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) // Calculate the largest possible vregsPassed sets. These are the registers that // can pass through an MBB live, but may not be live every time. It is assumed // that all vregsPassed sets are empty before the call. -void -MachineVerifier::calcMaxRegsPassed() -{ +void MachineVerifier::calcMaxRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. DenseSet todo; @@ -533,9 +689,7 @@ MachineVerifier::calcMaxRegsPassed() // Calculate the minimum vregsPassed set. These are the registers that always // pass live through an MBB. The calculation assumes that calcMaxRegsPassed has // been called earlier. -void -MachineVerifier::calcMinRegsPassed() -{ +void MachineVerifier::calcMinRegsPassed() { DenseSet todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) @@ -570,9 +724,7 @@ MachineVerifier::calcMinRegsPassed() // Check PHI instructions at the beginning of MBB. It is assumed that // calcMinRegsPassed has been run so BBInfo::isLiveOut is valid. -void -MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) -{ +void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { DenseSet seen; @@ -601,9 +753,7 @@ MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) } } -void -MachineVerifier::visitMachineFunctionAfter() -{ +void MachineVerifier::visitMachineFunctionAfter() { calcMaxRegsPassed(); // With the maximal set of vregsPassed we can verify dead-in registers. diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp new file mode 100644 index 0000000000000..cf05275d7a315 --- /dev/null +++ b/lib/CodeGen/ObjectCodeEmitter.cpp @@ -0,0 +1,141 @@ +//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BinaryObject.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" + +//===----------------------------------------------------------------------===// +// ObjectCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + +ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {} +ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {} +ObjectCodeEmitter::~ObjectCodeEmitter() {} + +/// setBinaryObject - set the BinaryObject we are writting to +void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; } + +/// emitByte - This callback is invoked when a byte needs to be +/// written to the data stream, without buffer overflow testing. +void ObjectCodeEmitter::emitByte(uint8_t B) { + BO->emitByte(B); +} + +/// emitWordLE - This callback is invoked when a 32-bit word needs to be +/// written to the data stream in little-endian format. +void ObjectCodeEmitter::emitWordLE(uint32_t W) { + BO->emitWordLE(W); +} + +/// emitWordBE - This callback is invoked when a 32-bit word needs to be +/// written to the data stream in big-endian format. +void ObjectCodeEmitter::emitWordBE(uint32_t W) { + BO->emitWordBE(W); +} + +/// emitDWordLE - This callback is invoked when a 64-bit word needs to be +/// written to the data stream in little-endian format. +void ObjectCodeEmitter::emitDWordLE(uint64_t W) { + BO->emitDWordLE(W); +} + +/// emitDWordBE - This callback is invoked when a 64-bit word needs to be +/// written to the data stream in big-endian format. +void ObjectCodeEmitter::emitDWordBE(uint64_t W) { + BO->emitDWordBE(W); +} + +/// emitAlignment - Align 'BO' to the necessary alignment boundary. +void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */, + uint8_t fill /* 0 */) { + BO->emitAlignment(Alignment, fill); +} + +/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) { + BO->emitULEB128Bytes(Value); +} + +/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) { + BO->emitSLEB128Bytes(Value); +} + +/// emitString - This callback is invoked when a String needs to be +/// written to the data stream. +void ObjectCodeEmitter::emitString(const std::string &String) { + BO->emitString(String); +} + +/// getCurrentPCValue - This returns the address that the next emitted byte +/// will be output to. +uintptr_t ObjectCodeEmitter::getCurrentPCValue() const { + return BO->getCurrentPCOffset(); +} + +/// getCurrentPCOffset - Return the offset from the start of the emitted +/// buffer that we are currently writing to. +uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const { + return BO->getCurrentPCOffset(); +} + +/// addRelocation - Whenever a relocatable address is needed, it should be +/// noted with this interface. +void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) { + BO->addRelocation(relocation); +} + +/// StartMachineBasicBlock - This should be called by the target when a new +/// basic block is about to be emitted. This way the MCE knows where the +/// start of the block is, and can implement getMachineBasicBlockAddress. +void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); +} + +/// getMachineBasicBlockAddress - Return the address of the specified +/// MachineBasicBlock, only usable after the label for the MBB has been +/// emitted. +uintptr_t +ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; +} + +/// getJumpTableEntryAddress - Return the address of the jump table with index +/// 'Index' in the function that last called initJumpTableInfo. +uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const { + assert(JTLocations.size() > Index && "JT not emitted!"); + return JTLocations[Index]; +} + +/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in +/// the constant pool that was last emitted with the emitConstantPool method. +uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const { + assert(CPLocations.size() > Index && "CP not emitted!"); + return CPLocations[Index]; +} + +/// getConstantPoolEntrySection - Return the section of the 'Index' entry in +/// the constant pool that was last emitted with the emitConstantPool method. +uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const { + assert(CPSections.size() > Index && "CP not emitted!"); + return CPSections[Index]; +} + +} // end namespace llvm + diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h new file mode 100644 index 0000000000000..904061ca4fbc0 --- /dev/null +++ b/lib/CodeGen/PBQP/AnnotatedGraph.h @@ -0,0 +1,184 @@ +//===-- AnnotatedGraph.h - Annotated PBQP Graph ----------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Annotated PBQP Graph class. This class is used internally by the PBQP solver +// to cache information to speed up reduction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H +#define LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H + +#include "GraphBase.h" + +namespace PBQP { + + +template class AnnotatedEdge; + +template +class AnnotatedNode : public NodeBase, + AnnotatedEdge > { +private: + + NodeData nodeData; + +public: + + AnnotatedNode(const Vector &costs, const NodeData &nodeData) : + NodeBase, + AnnotatedEdge >(costs), + nodeData(nodeData) {} + + NodeData& getNodeData() { return nodeData; } + const NodeData& getNodeData() const { return nodeData; } + +}; + +template +class AnnotatedEdge : public EdgeBase, + AnnotatedEdge > { +private: + + typedef typename GraphBase, + AnnotatedEdge >::NodeIterator + NodeIterator; + + EdgeData edgeData; + +public: + + + AnnotatedEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr, + const Matrix &costs, const EdgeData &edgeData) : + EdgeBase, + AnnotatedEdge >(node1Itr, node2Itr, costs), + edgeData(edgeData) {} + + EdgeData& getEdgeData() { return edgeData; } + const EdgeData& getEdgeData() const { return edgeData; } + +}; + +template +class AnnotatedGraph : public GraphBase, + AnnotatedEdge > { +private: + + typedef GraphBase, + AnnotatedEdge > PGraph; + + typedef AnnotatedNode NodeEntry; + typedef AnnotatedEdge EdgeEntry; + + + void copyFrom(const AnnotatedGraph &other) { + if (!other.areNodeIDsValid()) { + other.assignNodeIDs(); + } + std::vector newNodeItrs(other.getNumNodes()); + + for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd(); + nItr != nEnd; ++nItr) { + newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr)); + } + + for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd(); + eItr != eEnd; ++eItr) { + + unsigned node1ID = other.getNodeID(other.getEdgeNode1(eItr)), + node2ID = other.getNodeID(other.getEdgeNode2(eItr)); + + addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], + other.getEdgeCosts(eItr), other.getEdgeData(eItr)); + } + + } + +public: + + typedef typename PGraph::NodeIterator NodeIterator; + typedef typename PGraph::ConstNodeIterator ConstNodeIterator; + typedef typename PGraph::EdgeIterator EdgeIterator; + typedef typename PGraph::ConstEdgeIterator ConstEdgeIterator; + + AnnotatedGraph() {} + + AnnotatedGraph(const AnnotatedGraph &other) { + copyFrom(other); + } + + AnnotatedGraph& operator=(const AnnotatedGraph &other) { + PGraph::clear(); + copyFrom(other); + return *this; + } + + NodeIterator addNode(const Vector &costs, const NodeData &data) { + return PGraph::addConstructedNode(NodeEntry(costs, data)); + } + + EdgeIterator addEdge(const NodeIterator &node1Itr, + const NodeIterator &node2Itr, + const Matrix &costs, const EdgeData &data) { + return PGraph::addConstructedEdge(EdgeEntry(node1Itr, node2Itr, + costs, data)); + } + + NodeData& getNodeData(const NodeIterator &nodeItr) { + return getNodeEntry(nodeItr).getNodeData(); + } + + const NodeData& getNodeData(const NodeIterator &nodeItr) const { + return getNodeEntry(nodeItr).getNodeData(); + } + + EdgeData& getEdgeData(const EdgeIterator &edgeItr) { + return getEdgeEntry(edgeItr).getEdgeData(); + } + + const EdgeEntry& getEdgeData(const EdgeIterator &edgeItr) const { + return getEdgeEntry(edgeItr).getEdgeData(); + } + + SimpleGraph toSimpleGraph() const { + SimpleGraph g; + + if (!PGraph::areNodeIDsValid()) { + PGraph::assignNodeIDs(); + } + std::vector newNodeItrs(PGraph::getNumNodes()); + + for (ConstNodeIterator nItr = PGraph::nodesBegin(), + nEnd = PGraph::nodesEnd(); + nItr != nEnd; ++nItr) { + + newNodeItrs[getNodeID(nItr)] = g.addNode(getNodeCosts(nItr)); + } + + for (ConstEdgeIterator + eItr = PGraph::edgesBegin(), eEnd = PGraph::edgesEnd(); + eItr != eEnd; ++eItr) { + + unsigned node1ID = getNodeID(getEdgeNode1(eItr)), + node2ID = getNodeID(getEdgeNode2(eItr)); + + g.addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], + getEdgeCosts(eItr)); + } + + return g; + } + +}; + + +} + +#endif // LLVM_CODEGEN_PBQP_ANNOTATEDGRAPH_H diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h new file mode 100644 index 0000000000000..b2f2e6f620fdb --- /dev/null +++ b/lib/CodeGen/PBQP/ExhaustiveSolver.h @@ -0,0 +1,110 @@ +//===-- ExhaustiveSolver.h - Brute Force PBQP Solver -----------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Uses a trivial brute force algorithm to solve a PBQP problem. +// PBQP is NP-HARD - This solver should only be used for debugging small +// problems. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H +#define LLVM_CODEGEN_PBQP_EXHAUSTIVESOLVER_H + +#include "Solver.h" + +namespace PBQP { + +/// A brute force PBQP solver. This solver takes exponential time. It should +/// only be used for debugging purposes. +class ExhaustiveSolverImpl { +private: + + const SimpleGraph &g; + + PBQPNum getSolutionCost(const Solution &solution) const { + PBQPNum cost = 0.0; + + for (SimpleGraph::ConstNodeIterator + nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + unsigned nodeId = g.getNodeID(nodeItr); + + cost += g.getNodeCosts(nodeItr)[solution.getSelection(nodeId)]; + } + + for (SimpleGraph::ConstEdgeIterator + edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + SimpleGraph::ConstNodeIterator n1 = g.getEdgeNode1Itr(edgeItr), + n2 = g.getEdgeNode2Itr(edgeItr); + unsigned sol1 = solution.getSelection(g.getNodeID(n1)), + sol2 = solution.getSelection(g.getNodeID(n2)); + + cost += g.getEdgeCosts(edgeItr)[sol1][sol2]; + } + + return cost; + } + +public: + + ExhaustiveSolverImpl(const SimpleGraph &g) : g(g) {} + + Solution solve() const { + Solution current(g.getNumNodes(), true), optimal(current); + + PBQPNum bestCost = std::numeric_limits::infinity(); + bool finished = false; + + while (!finished) { + PBQPNum currentCost = getSolutionCost(current); + + if (currentCost < bestCost) { + optimal = current; + bestCost = currentCost; + } + + // assume we're done. + finished = true; + + for (unsigned i = 0; i < g.getNumNodes(); ++i) { + if (current.getSelection(i) == + (g.getNodeCosts(g.getNodeItr(i)).getLength() - 1)) { + current.setSelection(i, 0); + } + else { + current.setSelection(i, current.getSelection(i) + 1); + finished = false; + break; + } + } + + } + + optimal.setSolutionCost(bestCost); + + return optimal; + } + +}; + +class ExhaustiveSolver : public Solver { +public: + ~ExhaustiveSolver() {} + Solution solve(const SimpleGraph &g) const { + ExhaustiveSolverImpl solver(g); + return solver.solve(); + } +}; + +} + +#endif // LLVM_CODGEN_PBQP_EXHAUSTIVESOLVER_HPP diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h new file mode 100644 index 0000000000000..cc3e017adda13 --- /dev/null +++ b/lib/CodeGen/PBQP/GraphBase.h @@ -0,0 +1,582 @@ +//===-- GraphBase.h - Abstract Base PBQP Graph -----------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Base class for PBQP Graphs. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_PBQP_GRAPHBASE_H +#define LLVM_CODEGEN_PBQP_GRAPHBASE_H + +#include "PBQPMath.h" + +#include +#include + +namespace PBQP { + +// UGLY, but I'm not sure there's a good way around this: We need to be able to +// look up a Node's "adjacent edge list" structure type before the Node type is +// fully constructed. We can enable this by pushing the choice of data type +// out into this traits class. +template +class NodeBaseTraits { + public: + typedef std::list AdjEdgeList; + typedef typename AdjEdgeList::iterator AdjEdgeIterator; + typedef typename AdjEdgeList::const_iterator ConstAdjEdgeIterator; +}; + +/// \brief Base for concrete graph classes. Provides a basic set of graph +/// operations which are useful for PBQP solvers. +template +class GraphBase { +private: + + typedef GraphBase ThisGraphT; + + typedef std::list NodeList; + typedef std::list EdgeList; + + NodeList nodeList; + unsigned nodeListSize; + + EdgeList edgeList; + unsigned edgeListSize; + + GraphBase(const ThisGraphT &other) { abort(); } + void operator=(const ThisGraphT &other) { abort(); } + +public: + + /// \brief Iterates over the nodes of a graph. + typedef typename NodeList::iterator NodeIterator; + /// \brief Iterates over the nodes of a const graph. + typedef typename NodeList::const_iterator ConstNodeIterator; + /// \brief Iterates over the edges of a graph. + typedef typename EdgeList::iterator EdgeIterator; + /// \brief Iterates over the edges of a const graph. + typedef typename EdgeList::const_iterator ConstEdgeIterator; + + /// \brief Iterates over the edges attached to a node. + typedef typename NodeBaseTraits::AdjEdgeIterator + AdjEdgeIterator; + + /// \brief Iterates over the edges attached to a node in a const graph. + typedef typename NodeBaseTraits::ConstAdjEdgeIterator + ConstAdjEdgeIterator; + +private: + + typedef std::vector IDToNodeMap; + + IDToNodeMap idToNodeMap; + bool nodeIDsValid; + + void invalidateNodeIDs() { + if (nodeIDsValid) { + idToNodeMap.clear(); + nodeIDsValid = false; + } + } + + template + bool iteratorInRange(ItrT itr, const ItrT &begin, const ItrT &end) { + for (ItrT t = begin; t != end; ++t) { + if (itr == t) + return true; + } + + return false; + } + +protected: + + GraphBase() : nodeListSize(0), edgeListSize(0), nodeIDsValid(false) {} + + NodeEntry& getNodeEntry(const NodeIterator &nodeItr) { return *nodeItr; } + const NodeEntry& getNodeEntry(const ConstNodeIterator &nodeItr) const { + return *nodeItr; + } + + EdgeEntry& getEdgeEntry(const EdgeIterator &edgeItr) { return *edgeItr; } + const EdgeEntry& getEdgeEntry(const ConstEdgeIterator &edgeItr) const { + return *edgeItr; + } + + NodeIterator addConstructedNode(const NodeEntry &nodeEntry) { + ++nodeListSize; + + invalidateNodeIDs(); + + NodeIterator newNodeItr = nodeList.insert(nodeList.end(), nodeEntry); + + return newNodeItr; + } + + EdgeIterator addConstructedEdge(const EdgeEntry &edgeEntry) { + + assert((findEdge(edgeEntry.getNode1Itr(), edgeEntry.getNode2Itr()) + == edgeList.end()) && "Attempt to add duplicate edge."); + + ++edgeListSize; + + // Add the edge to the graph. + EdgeIterator edgeItr = edgeList.insert(edgeList.end(), edgeEntry); + + // Get a reference to the version in the graph. + EdgeEntry &newEdgeEntry = getEdgeEntry(edgeItr); + + // Node entries: + NodeEntry &node1Entry = getNodeEntry(newEdgeEntry.getNode1Itr()), + &node2Entry = getNodeEntry(newEdgeEntry.getNode2Itr()); + + // Sanity check on matrix dimensions. + assert((node1Entry.getCosts().getLength() == + newEdgeEntry.getCosts().getRows()) && + (node2Entry.getCosts().getLength() == + newEdgeEntry.getCosts().getCols()) && + "Matrix dimensions do not match cost vector dimensions."); + + // Create links between nodes and edges. + newEdgeEntry.setNode1ThisEdgeItr( + node1Entry.addAdjEdge(edgeItr)); + newEdgeEntry.setNode2ThisEdgeItr( + node2Entry.addAdjEdge(edgeItr)); + + return edgeItr; + } + +public: + + /// \brief Returns the number of nodes in this graph. + unsigned getNumNodes() const { return nodeListSize; } + + /// \brief Returns the number of edges in this graph. + unsigned getNumEdges() const { return edgeListSize; } + + /// \brief Return the cost vector for the given node. + Vector& getNodeCosts(const NodeIterator &nodeItr) { + return getNodeEntry(nodeItr).getCosts(); + } + + /// \brief Return the cost vector for the give node. + const Vector& getNodeCosts(const ConstNodeIterator &nodeItr) const { + return getNodeEntry(nodeItr).getCosts(); + } + + /// \brief Return the degree of the given node. + unsigned getNodeDegree(const NodeIterator &nodeItr) const { + return getNodeEntry(nodeItr).getDegree(); + } + + /// \brief Assigns sequential IDs to the nodes, starting at 0, which + /// remain valid until the next addition or removal of a node. + void assignNodeIDs() { + unsigned curID = 0; + idToNodeMap.resize(getNumNodes()); + for (NodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd(); + nodeItr != nodeEnd; ++nodeItr, ++curID) { + getNodeEntry(nodeItr).setID(curID); + idToNodeMap[curID] = nodeItr; + } + nodeIDsValid = true; + } + + /// \brief Assigns sequential IDs to the nodes using the ordering of the + /// given vector. + void assignNodeIDs(const std::vector &nodeOrdering) { + assert((getNumNodes() == nodeOrdering.size()) && + "Wrong number of nodes in node ordering."); + idToNodeMap = nodeOrdering; + for (unsigned nodeID = 0; nodeID < idToNodeMap.size(); ++nodeID) { + getNodeEntry(idToNodeMap[nodeID]).setID(nodeID); + } + nodeIDsValid = true; + } + + /// \brief Returns true if valid node IDs are assigned, false otherwise. + bool areNodeIDsValid() const { return nodeIDsValid; } + + /// \brief Return the numeric ID of the given node. + /// + /// Calls to this method will result in an assertion failure if there have + /// been any node additions or removals since the last call to + /// assignNodeIDs(). + unsigned getNodeID(const ConstNodeIterator &nodeItr) const { + assert(nodeIDsValid && "Attempt to retrieve invalid ID."); + return getNodeEntry(nodeItr).getID(); + } + + /// \brief Returns the iterator associated with the given node ID. + NodeIterator getNodeItr(unsigned nodeID) { + assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID."); + return idToNodeMap[nodeID]; + } + + /// \brief Returns the iterator associated with the given node ID. + ConstNodeIterator getNodeItr(unsigned nodeID) const { + assert(nodeIDsValid && "Attempt to retrieve iterator with invalid ID."); + return idToNodeMap[nodeID]; + } + + /// \brief Removes the given node (and all attached edges) from the graph. + void removeNode(const NodeIterator &nodeItr) { + assert(iteratorInRange(nodeItr, nodeList.begin(), nodeList.end()) && + "Iterator does not belong to this graph!"); + + invalidateNodeIDs(); + + NodeEntry &nodeEntry = getNodeEntry(nodeItr); + + // We need to copy this out because it will be destroyed as the edges are + // removed. + typedef std::vector AdjEdgeList; + typedef typename AdjEdgeList::iterator AdjEdgeListItr; + + AdjEdgeList adjEdges; + adjEdges.reserve(nodeEntry.getDegree()); + std::copy(nodeEntry.adjEdgesBegin(), nodeEntry.adjEdgesEnd(), + std::back_inserter(adjEdges)); + + // Iterate over the copied out edges and remove them from the graph. + for (AdjEdgeListItr itr = adjEdges.begin(), end = adjEdges.end(); + itr != end; ++itr) { + removeEdge(*itr); + } + + // Erase the node from the nodelist. + nodeList.erase(nodeItr); + --nodeListSize; + } + + NodeIterator nodesBegin() { return nodeList.begin(); } + ConstNodeIterator nodesBegin() const { return nodeList.begin(); } + NodeIterator nodesEnd() { return nodeList.end(); } + ConstNodeIterator nodesEnd() const { return nodeList.end(); } + + AdjEdgeIterator adjEdgesBegin(const NodeIterator &nodeItr) { + return getNodeEntry(nodeItr).adjEdgesBegin(); + } + + ConstAdjEdgeIterator adjEdgesBegin(const ConstNodeIterator &nodeItr) const { + return getNodeEntry(nodeItr).adjEdgesBegin(); + } + + AdjEdgeIterator adjEdgesEnd(const NodeIterator &nodeItr) { + return getNodeEntry(nodeItr).adjEdgesEnd(); + } + + ConstAdjEdgeIterator adjEdgesEnd(const ConstNodeIterator &nodeItr) const { + getNodeEntry(nodeItr).adjEdgesEnd(); + } + + EdgeIterator findEdge(const NodeIterator &node1Itr, + const NodeIterator &node2Itr) { + + for (AdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr), + adjEdgeEnd = adjEdgesEnd(node1Itr); + adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { + if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) || + (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) { + return *adjEdgeItr; + } + } + + return edgeList.end(); + } + + ConstEdgeIterator findEdge(const ConstNodeIterator &node1Itr, + const ConstNodeIterator &node2Itr) const { + + for (ConstAdjEdgeIterator adjEdgeItr = adjEdgesBegin(node1Itr), + adjEdgeEnd = adjEdgesEnd(node1Itr); + adjEdgeItr != adjEdgesEnd; ++adjEdgeItr) { + if ((getEdgeNode1Itr(*adjEdgeItr) == node2Itr) || + (getEdgeNode2Itr(*adjEdgeItr) == node2Itr)) { + return *adjEdgeItr; + } + } + + return edgeList.end(); + } + + Matrix& getEdgeCosts(const EdgeIterator &edgeItr) { + return getEdgeEntry(edgeItr).getCosts(); + } + + const Matrix& getEdgeCosts(const ConstEdgeIterator &edgeItr) const { + return getEdgeEntry(edgeItr).getCosts(); + } + + NodeIterator getEdgeNode1Itr(const EdgeIterator &edgeItr) { + return getEdgeEntry(edgeItr).getNode1Itr(); + } + + ConstNodeIterator getEdgeNode1Itr(const ConstEdgeIterator &edgeItr) const { + return getEdgeEntry(edgeItr).getNode1Itr(); + } + + NodeIterator getEdgeNode2Itr(const EdgeIterator &edgeItr) { + return getEdgeEntry(edgeItr).getNode2Itr(); + } + + ConstNodeIterator getEdgeNode2Itr(const ConstEdgeIterator &edgeItr) const { + return getEdgeEntry(edgeItr).getNode2Itr(); + } + + NodeIterator getEdgeOtherNode(const EdgeIterator &edgeItr, + const NodeIterator &nodeItr) { + + EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); + if (nodeItr == edgeEntry.getNode1Itr()) { + return edgeEntry.getNode2Itr(); + } + //else + return edgeEntry.getNode1Itr(); + } + + ConstNodeIterator getEdgeOtherNode(const ConstEdgeIterator &edgeItr, + const ConstNodeIterator &nodeItr) const { + + const EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); + if (nodeItr == edgeEntry.getNode1Itr()) { + return edgeEntry.getNode2Itr(); + } + //else + return edgeEntry.getNode1Itr(); + } + + void removeEdge(const EdgeIterator &edgeItr) { + assert(iteratorInRange(edgeItr, edgeList.begin(), edgeList.end()) && + "Iterator does not belong to this graph!"); + + --edgeListSize; + + // Get the edge entry. + EdgeEntry &edgeEntry = getEdgeEntry(edgeItr); + + // Get the nodes entry. + NodeEntry &node1Entry(getNodeEntry(edgeEntry.getNode1Itr())), + &node2Entry(getNodeEntry(edgeEntry.getNode2Itr())); + + // Disconnect the edge from the nodes. + node1Entry.removeAdjEdge(edgeEntry.getNode1ThisEdgeItr()); + node2Entry.removeAdjEdge(edgeEntry.getNode2ThisEdgeItr()); + + // Remove the edge from the graph. + edgeList.erase(edgeItr); + } + + EdgeIterator edgesBegin() { return edgeList.begin(); } + ConstEdgeIterator edgesBegin() const { return edgeList.begin(); } + EdgeIterator edgesEnd() { return edgeList.end(); } + ConstEdgeIterator edgesEnd() const { return edgeList.end(); } + + void clear() { + nodeList.clear(); + nodeListSize = 0; + edgeList.clear(); + edgeListSize = 0; + idToNodeMap.clear(); + } + + template + void printDot(OStream &os) const { + + assert(areNodeIDsValid() && + "Cannot print a .dot of a graph unless IDs have been assigned."); + + os << "graph {\n"; + + for (ConstNodeIterator nodeItr = nodesBegin(), nodeEnd = nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + os << " node" << getNodeID(nodeItr) << " [ label=\"" + << getNodeID(nodeItr) << ": " << getNodeCosts(nodeItr) << "\" ]\n"; + } + + os << " edge [ len=" << getNumNodes() << " ]\n"; + + for (ConstEdgeIterator edgeItr = edgesBegin(), edgeEnd = edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + os << " node" << getNodeID(getEdgeNode1Itr(edgeItr)) + << " -- node" << getNodeID(getEdgeNode2Itr(edgeItr)) + << " [ label=\""; + + const Matrix &edgeCosts = getEdgeCosts(edgeItr); + + for (unsigned i = 0; i < edgeCosts.getRows(); ++i) { + os << edgeCosts.getRowAsVector(i) << "\\n"; + } + + os << "\" ]\n"; + } + + os << "}\n"; + } + + template + void printDot(OStream &os) { + if (!areNodeIDsValid()) { + assignNodeIDs(); + } + + const_cast(this)->printDot(os); + } + + template + void dumpTo(OStream &os) const { + typedef ConstNodeIterator ConstNodeID; + + assert(areNodeIDsValid() && + "Cannot dump a graph unless IDs have been assigned."); + + for (ConstNodeIterator nItr = nodesBegin(), nEnd = nodesEnd(); + nItr != nEnd; ++nItr) { + os << getNodeID(nItr) << "\n"; + } + + unsigned edgeNumber = 1; + for (ConstEdgeIterator eItr = edgesBegin(), eEnd = edgesEnd(); + eItr != eEnd; ++eItr) { + + os << edgeNumber++ << ": { " + << getNodeID(getEdgeNode1Itr(eItr)) << ", " + << getNodeID(getEdgeNode2Itr(eItr)) << " }\n"; + } + + } + + template + void dumpTo(OStream &os) { + if (!areNodeIDsValid()) { + assignNodeIDs(); + } + + const_cast(this)->dumpTo(os); + } + +}; + +/// \brief Provides a base from which to derive nodes for GraphBase. +template +class NodeBase { +private: + + typedef GraphBase GraphBaseT; + typedef NodeBaseTraits ThisNodeBaseTraits; + +public: + typedef typename GraphBaseT::EdgeIterator EdgeIterator; + +private: + typedef typename ThisNodeBaseTraits::AdjEdgeList AdjEdgeList; + + unsigned degree, id; + Vector costs; + AdjEdgeList adjEdges; + + void operator=(const NodeBase& other) { + assert(false && "Can't assign NodeEntrys."); + } + +public: + + typedef typename ThisNodeBaseTraits::AdjEdgeIterator AdjEdgeIterator; + typedef typename ThisNodeBaseTraits::ConstAdjEdgeIterator + ConstAdjEdgeIterator; + + NodeBase(const Vector &costs) : degree(0), costs(costs) { + assert((costs.getLength() > 0) && "Can't have zero-length cost vector."); + } + + Vector& getCosts() { return costs; } + const Vector& getCosts() const { return costs; } + + unsigned getDegree() const { return degree; } + + void setID(unsigned id) { this->id = id; } + unsigned getID() const { return id; } + + AdjEdgeIterator addAdjEdge(const EdgeIterator &edgeItr) { + ++degree; + return adjEdges.insert(adjEdges.end(), edgeItr); + } + + void removeAdjEdge(const AdjEdgeIterator &adjEdgeItr) { + --degree; + adjEdges.erase(adjEdgeItr); + } + + AdjEdgeIterator adjEdgesBegin() { return adjEdges.begin(); } + ConstAdjEdgeIterator adjEdgesBegin() const { return adjEdges.begin(); } + AdjEdgeIterator adjEdgesEnd() { return adjEdges.end(); } + ConstAdjEdgeIterator adjEdgesEnd() const { return adjEdges.end(); } + +}; + +template +class EdgeBase { +public: + typedef typename GraphBase::NodeIterator NodeIterator; + typedef typename GraphBase::EdgeIterator EdgeIterator; + + typedef typename NodeImpl::AdjEdgeIterator NodeAdjEdgeIterator; + +private: + + NodeIterator node1Itr, node2Itr; + NodeAdjEdgeIterator node1ThisEdgeItr, node2ThisEdgeItr; + Matrix costs; + + void operator=(const EdgeBase &other) { + assert(false && "Can't assign EdgeEntrys."); + } + +public: + + EdgeBase(const NodeIterator &node1Itr, const NodeIterator &node2Itr, + const Matrix &costs) : + node1Itr(node1Itr), node2Itr(node2Itr), costs(costs) { + + assert((costs.getRows() > 0) && (costs.getCols() > 0) && + "Can't have zero-dimensioned cost matrices"); + } + + Matrix& getCosts() { return costs; } + const Matrix& getCosts() const { return costs; } + + const NodeIterator& getNode1Itr() const { return node1Itr; } + const NodeIterator& getNode2Itr() const { return node2Itr; } + + void setNode1ThisEdgeItr(const NodeAdjEdgeIterator &node1ThisEdgeItr) { + this->node1ThisEdgeItr = node1ThisEdgeItr; + } + + const NodeAdjEdgeIterator& getNode1ThisEdgeItr() const { + return node1ThisEdgeItr; + } + + void setNode2ThisEdgeItr(const NodeAdjEdgeIterator &node2ThisEdgeItr) { + this->node2ThisEdgeItr = node2ThisEdgeItr; + } + + const NodeAdjEdgeIterator& getNode2ThisEdgeItr() const { + return node2ThisEdgeItr; + } + +}; + + +} + +#endif // LLVM_CODEGEN_PBQP_GRAPHBASE_HPP diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h new file mode 100644 index 0000000000000..e786246b4e051 --- /dev/null +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -0,0 +1,789 @@ +//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Heuristic PBQP solver. This solver is able to perform optimal reductions for +// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is +// used to to select a node for reduction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H +#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H + +#include "Solver.h" +#include "AnnotatedGraph.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace PBQP { + +/// \brief Important types for the HeuristicSolverImpl. +/// +/// Declared seperately to allow access to heuristic classes before the solver +/// is fully constructed. +template +class HSITypes { +public: + + class NodeData; + class EdgeData; + + typedef AnnotatedGraph SolverGraph; + typedef typename SolverGraph::NodeIterator GraphNodeIterator; + typedef typename SolverGraph::EdgeIterator GraphEdgeIterator; + typedef typename SolverGraph::AdjEdgeIterator GraphAdjEdgeIterator; + + typedef std::list NodeList; + typedef typename NodeList::iterator NodeListIterator; + + typedef std::vector NodeStack; + typedef typename NodeStack::iterator NodeStackIterator; + + class NodeData { + friend class EdgeData; + + private: + + typedef std::list LinksList; + + unsigned numLinks; + LinksList links, solvedLinks; + NodeListIterator bucketItr; + HeuristicNodeData heuristicData; + + public: + + typedef typename LinksList::iterator AdjLinkIterator; + + private: + + AdjLinkIterator addLink(const GraphEdgeIterator &edgeItr) { + ++numLinks; + return links.insert(links.end(), edgeItr); + } + + void delLink(const AdjLinkIterator &adjLinkItr) { + --numLinks; + links.erase(adjLinkItr); + } + + public: + + NodeData() : numLinks(0) {} + + unsigned getLinkDegree() const { return numLinks; } + + HeuristicNodeData& getHeuristicData() { return heuristicData; } + const HeuristicNodeData& getHeuristicData() const { + return heuristicData; + } + + void setBucketItr(const NodeListIterator &bucketItr) { + this->bucketItr = bucketItr; + } + + const NodeListIterator& getBucketItr() const { + return bucketItr; + } + + AdjLinkIterator adjLinksBegin() { + return links.begin(); + } + + AdjLinkIterator adjLinksEnd() { + return links.end(); + } + + void addSolvedLink(const GraphEdgeIterator &solvedLinkItr) { + solvedLinks.push_back(solvedLinkItr); + } + + AdjLinkIterator solvedLinksBegin() { + return solvedLinks.begin(); + } + + AdjLinkIterator solvedLinksEnd() { + return solvedLinks.end(); + } + + }; + + class EdgeData { + private: + + SolverGraph &g; + GraphNodeIterator node1Itr, node2Itr; + HeuristicEdgeData heuristicData; + typename NodeData::AdjLinkIterator node1ThisEdgeItr, node2ThisEdgeItr; + + public: + + EdgeData(SolverGraph &g) : g(g) {} + + HeuristicEdgeData& getHeuristicData() { return heuristicData; } + const HeuristicEdgeData& getHeuristicData() const { + return heuristicData; + } + + void setup(const GraphEdgeIterator &thisEdgeItr) { + node1Itr = g.getEdgeNode1Itr(thisEdgeItr); + node2Itr = g.getEdgeNode2Itr(thisEdgeItr); + + node1ThisEdgeItr = g.getNodeData(node1Itr).addLink(thisEdgeItr); + node2ThisEdgeItr = g.getNodeData(node2Itr).addLink(thisEdgeItr); + } + + void unlink() { + g.getNodeData(node1Itr).delLink(node1ThisEdgeItr); + g.getNodeData(node2Itr).delLink(node2ThisEdgeItr); + } + + }; + +}; + +template +class HeuristicSolverImpl { +public: + // Typedefs to make life easier: + typedef HSITypes HSIT; + typedef typename HSIT::SolverGraph SolverGraph; + typedef typename HSIT::NodeData NodeData; + typedef typename HSIT::EdgeData EdgeData; + typedef typename HSIT::GraphNodeIterator GraphNodeIterator; + typedef typename HSIT::GraphEdgeIterator GraphEdgeIterator; + typedef typename HSIT::GraphAdjEdgeIterator GraphAdjEdgeIterator; + + typedef typename HSIT::NodeList NodeList; + typedef typename HSIT::NodeListIterator NodeListIterator; + + typedef std::vector NodeStack; + typedef typename NodeStack::iterator NodeStackIterator; + + /// \brief Constructor, which performs all the actual solver work. + HeuristicSolverImpl(const SimpleGraph &orig) : + solution(orig.getNumNodes(), true) + { + copyGraph(orig); + simplify(); + setup(); + computeSolution(); + computeSolutionCost(orig); + } + + /// \brief Returns the graph for this solver. + SolverGraph& getGraph() { return g; } + + /// \brief Return the solution found by this solver. + const Solution& getSolution() const { return solution; } + +private: + + /// \brief Add the given node to the appropriate bucket for its link + /// degree. + void addToBucket(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g.getNodeData(nodeItr); + + switch (nodeData.getLinkDegree()) { + case 0: nodeData.setBucketItr( + r0Bucket.insert(r0Bucket.end(), nodeItr)); + break; + case 1: nodeData.setBucketItr( + r1Bucket.insert(r1Bucket.end(), nodeItr)); + break; + case 2: nodeData.setBucketItr( + r2Bucket.insert(r2Bucket.end(), nodeItr)); + break; + default: heuristic.addToRNBucket(nodeItr); + break; + } + } + + /// \brief Remove the given node from the appropriate bucket for its link + /// degree. + void removeFromBucket(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g.getNodeData(nodeItr); + + switch (nodeData.getLinkDegree()) { + case 0: r0Bucket.erase(nodeData.getBucketItr()); break; + case 1: r1Bucket.erase(nodeData.getBucketItr()); break; + case 2: r2Bucket.erase(nodeData.getBucketItr()); break; + default: heuristic.removeFromRNBucket(nodeItr); break; + } + } + +public: + + /// \brief Add a link. + void addLink(const GraphEdgeIterator &edgeItr) { + g.getEdgeData(edgeItr).setup(edgeItr); + + if ((g.getNodeData(g.getEdgeNode1Itr(edgeItr)).getLinkDegree() > 2) || + (g.getNodeData(g.getEdgeNode2Itr(edgeItr)).getLinkDegree() > 2)) { + heuristic.handleAddLink(edgeItr); + } + } + + /// \brief Remove link, update info for node. + /// + /// Only updates information for the given node, since usually the other + /// is about to be removed. + void removeLink(const GraphEdgeIterator &edgeItr, + const GraphNodeIterator &nodeItr) { + + if (g.getNodeData(nodeItr).getLinkDegree() > 2) { + heuristic.handleRemoveLink(edgeItr, nodeItr); + } + g.getEdgeData(edgeItr).unlink(); + } + + /// \brief Remove link, update info for both nodes. Useful for R2 only. + void removeLinkR2(const GraphEdgeIterator &edgeItr) { + GraphNodeIterator node1Itr = g.getEdgeNode1Itr(edgeItr); + + if (g.getNodeData(node1Itr).getLinkDegree() > 2) { + heuristic.handleRemoveLink(edgeItr, node1Itr); + } + removeLink(edgeItr, g.getEdgeNode2Itr(edgeItr)); + } + + /// \brief Removes all links connected to the given node. + void unlinkNode(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g.getNodeData(nodeItr); + + typedef std::vector TempEdgeList; + + TempEdgeList edgesToUnlink; + edgesToUnlink.reserve(nodeData.getLinkDegree()); + + // Copy adj edges into a temp vector. We want to destroy them during + // the unlink, and we can't do that while we're iterating over them. + std::copy(nodeData.adjLinksBegin(), nodeData.adjLinksEnd(), + std::back_inserter(edgesToUnlink)); + + for (typename TempEdgeList::iterator + edgeItr = edgesToUnlink.begin(), edgeEnd = edgesToUnlink.end(); + edgeItr != edgeEnd; ++edgeItr) { + + GraphNodeIterator otherNode = g.getEdgeOtherNode(*edgeItr, nodeItr); + + removeFromBucket(otherNode); + removeLink(*edgeItr, otherNode); + addToBucket(otherNode); + } + } + + /// \brief Push the given node onto the stack to be solved with + /// backpropagation. + void pushStack(const GraphNodeIterator &nodeItr) { + stack.push_back(nodeItr); + } + + /// \brief Set the solution of the given node. + void setSolution(const GraphNodeIterator &nodeItr, unsigned solIndex) { + solution.setSelection(g.getNodeID(nodeItr), solIndex); + + for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr), + adjEdgeEnd = g.adjEdgesEnd(nodeItr); + adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { + GraphEdgeIterator edgeItr(*adjEdgeItr); + GraphNodeIterator adjNodeItr(g.getEdgeOtherNode(edgeItr, nodeItr)); + g.getNodeData(adjNodeItr).addSolvedLink(edgeItr); + } + } + +private: + + SolverGraph g; + Heuristic heuristic; + Solution solution; + + NodeList r0Bucket, + r1Bucket, + r2Bucket; + + NodeStack stack; + + // Copy the SimpleGraph into an annotated graph which we can use for reduction. + void copyGraph(const SimpleGraph &orig) { + + assert((g.getNumEdges() == 0) && (g.getNumNodes() == 0) && + "Graph should be empty prior to solver setup."); + + assert(orig.areNodeIDsValid() && + "Cannot copy from a graph with invalid node IDs."); + + std::vector newNodeItrs; + + for (unsigned nodeID = 0; nodeID < orig.getNumNodes(); ++nodeID) { + newNodeItrs.push_back( + g.addNode(orig.getNodeCosts(orig.getNodeItr(nodeID)), NodeData())); + } + + for (SimpleGraph::ConstEdgeIterator + origEdgeItr = orig.edgesBegin(), origEdgeEnd = orig.edgesEnd(); + origEdgeItr != origEdgeEnd; ++origEdgeItr) { + + unsigned id1 = orig.getNodeID(orig.getEdgeNode1Itr(origEdgeItr)), + id2 = orig.getNodeID(orig.getEdgeNode2Itr(origEdgeItr)); + + g.addEdge(newNodeItrs[id1], newNodeItrs[id2], + orig.getEdgeCosts(origEdgeItr), EdgeData(g)); + } + + // Assign IDs to the new nodes using the ordering from the old graph, + // this will lead to nodes in the new graph getting the same ID as the + // corresponding node in the old graph. + g.assignNodeIDs(newNodeItrs); + } + + // Simplify the annotated graph by eliminating independent edges and trivial + // nodes. + void simplify() { + disconnectTrivialNodes(); + eliminateIndependentEdges(); + } + + // Eliminate trivial nodes. + void disconnectTrivialNodes() { + for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + if (g.getNodeCosts(nodeItr).getLength() == 1) { + + std::vector edgesToRemove; + + for (GraphAdjEdgeIterator adjEdgeItr = g.adjEdgesBegin(nodeItr), + adjEdgeEnd = g.adjEdgesEnd(nodeItr); + adjEdgeItr != adjEdgeEnd; ++adjEdgeItr) { + + GraphEdgeIterator edgeItr = *adjEdgeItr; + + if (g.getEdgeNode1Itr(edgeItr) == nodeItr) { + GraphNodeIterator otherNodeItr = g.getEdgeNode2Itr(edgeItr); + g.getNodeCosts(otherNodeItr) += + g.getEdgeCosts(edgeItr).getRowAsVector(0); + } + else { + GraphNodeIterator otherNodeItr = g.getEdgeNode1Itr(edgeItr); + g.getNodeCosts(otherNodeItr) += + g.getEdgeCosts(edgeItr).getColAsVector(0); + } + + edgesToRemove.push_back(edgeItr); + } + + while (!edgesToRemove.empty()) { + g.removeEdge(edgesToRemove.back()); + edgesToRemove.pop_back(); + } + } + } + } + + void eliminateIndependentEdges() { + std::vector edgesToProcess; + + for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + edgesToProcess.push_back(edgeItr); + } + + while (!edgesToProcess.empty()) { + tryToEliminateEdge(edgesToProcess.back()); + edgesToProcess.pop_back(); + } + } + + void tryToEliminateEdge(const GraphEdgeIterator &edgeItr) { + if (tryNormaliseEdgeMatrix(edgeItr)) { + g.removeEdge(edgeItr); + } + } + + bool tryNormaliseEdgeMatrix(const GraphEdgeIterator &edgeItr) { + + Matrix &edgeCosts = g.getEdgeCosts(edgeItr); + Vector &uCosts = g.getNodeCosts(g.getEdgeNode1Itr(edgeItr)), + &vCosts = g.getNodeCosts(g.getEdgeNode2Itr(edgeItr)); + + for (unsigned r = 0; r < edgeCosts.getRows(); ++r) { + PBQPNum rowMin = edgeCosts.getRowMin(r); + uCosts[r] += rowMin; + if (rowMin != std::numeric_limits::infinity()) { + edgeCosts.subFromRow(r, rowMin); + } + else { + edgeCosts.setRow(r, 0); + } + } + + for (unsigned c = 0; c < edgeCosts.getCols(); ++c) { + PBQPNum colMin = edgeCosts.getColMin(c); + vCosts[c] += colMin; + if (colMin != std::numeric_limits::infinity()) { + edgeCosts.subFromCol(c, colMin); + } + else { + edgeCosts.setCol(c, 0); + } + } + + return edgeCosts.isZero(); + } + + void setup() { + setupLinks(); + heuristic.initialise(*this); + setupBuckets(); + } + + void setupLinks() { + for (GraphEdgeIterator edgeItr = g.edgesBegin(), edgeEnd = g.edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + g.getEdgeData(edgeItr).setup(edgeItr); + } + } + + void setupBuckets() { + for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + addToBucket(nodeItr); + } + } + + void computeSolution() { + assert(g.areNodeIDsValid() && + "Nodes cannot be added/removed during reduction."); + + reduce(); + computeTrivialSolutions(); + backpropagate(); + } + + void printNode(const GraphNodeIterator &nodeItr) { + llvm::errs() << "Node " << g.getNodeID(nodeItr) << " (" << &*nodeItr << "):\n" + << " costs = " << g.getNodeCosts(nodeItr) << "\n" + << " link degree = " << g.getNodeData(nodeItr).getLinkDegree() << "\n" + << " links = [ "; + + for (typename HSIT::NodeData::AdjLinkIterator + aeItr = g.getNodeData(nodeItr).adjLinksBegin(), + aeEnd = g.getNodeData(nodeItr).adjLinksEnd(); + aeItr != aeEnd; ++aeItr) { + llvm::errs() << "(" << g.getNodeID(g.getEdgeNode1Itr(*aeItr)) + << ", " << g.getNodeID(g.getEdgeNode2Itr(*aeItr)) + << ") "; + } + llvm::errs() << "]\n"; + } + + void dumpState() { + llvm::errs() << "\n"; + + for (GraphNodeIterator nodeItr = g.nodesBegin(), nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + printNode(nodeItr); + } + + NodeList* buckets[] = { &r0Bucket, &r1Bucket, &r2Bucket }; + + for (unsigned b = 0; b < 3; ++b) { + NodeList &bucket = *buckets[b]; + + llvm::errs() << "Bucket " << b << ": [ "; + + for (NodeListIterator nItr = bucket.begin(), nEnd = bucket.end(); + nItr != nEnd; ++nItr) { + llvm::errs() << g.getNodeID(*nItr) << " "; + } + + llvm::errs() << "]\n"; + } + + llvm::errs() << "Stack: [ "; + for (NodeStackIterator nsItr = stack.begin(), nsEnd = stack.end(); + nsItr != nsEnd; ++nsItr) { + llvm::errs() << g.getNodeID(*nsItr) << " "; + } + llvm::errs() << "]\n"; + } + + void reduce() { + bool reductionFinished = r1Bucket.empty() && r2Bucket.empty() && + heuristic.rNBucketEmpty(); + + while (!reductionFinished) { + + if (!r1Bucket.empty()) { + processR1(); + } + else if (!r2Bucket.empty()) { + processR2(); + } + else if (!heuristic.rNBucketEmpty()) { + solution.setProvedOptimal(false); + solution.incRNReductions(); + heuristic.processRN(); + } + else reductionFinished = true; + } + + }; + + void processR1() { + + // Remove the first node in the R0 bucket: + GraphNodeIterator xNodeItr = r1Bucket.front(); + r1Bucket.pop_front(); + + solution.incR1Reductions(); + + //llvm::errs() << "Applying R1 to " << g.getNodeID(xNodeItr) << "\n"; + + assert((g.getNodeData(xNodeItr).getLinkDegree() == 1) && + "Node in R1 bucket has degree != 1"); + + GraphEdgeIterator edgeItr = *g.getNodeData(xNodeItr).adjLinksBegin(); + + const Matrix &edgeCosts = g.getEdgeCosts(edgeItr); + + const Vector &xCosts = g.getNodeCosts(xNodeItr); + unsigned xLen = xCosts.getLength(); + + // Duplicate a little code to avoid transposing matrices: + if (xNodeItr == g.getEdgeNode1Itr(edgeItr)) { + GraphNodeIterator yNodeItr = g.getEdgeNode2Itr(edgeItr); + Vector &yCosts = g.getNodeCosts(yNodeItr); + unsigned yLen = yCosts.getLength(); + + for (unsigned j = 0; j < yLen; ++j) { + PBQPNum min = edgeCosts[0][j] + xCosts[0]; + for (unsigned i = 1; i < xLen; ++i) { + PBQPNum c = edgeCosts[i][j] + xCosts[i]; + if (c < min) + min = c; + } + yCosts[j] += min; + } + } + else { + GraphNodeIterator yNodeItr = g.getEdgeNode1Itr(edgeItr); + Vector &yCosts = g.getNodeCosts(yNodeItr); + unsigned yLen = yCosts.getLength(); + + for (unsigned i = 0; i < yLen; ++i) { + PBQPNum min = edgeCosts[i][0] + xCosts[0]; + + for (unsigned j = 1; j < xLen; ++j) { + PBQPNum c = edgeCosts[i][j] + xCosts[j]; + if (c < min) + min = c; + } + yCosts[i] += min; + } + } + + unlinkNode(xNodeItr); + pushStack(xNodeItr); + } + + void processR2() { + + GraphNodeIterator xNodeItr = r2Bucket.front(); + r2Bucket.pop_front(); + + solution.incR2Reductions(); + + // Unlink is unsafe here. At some point it may optimistically more a node + // to a lower-degree list when its degree will later rise, or vice versa, + // violating the assumption that node degrees monotonically decrease + // during the reduction phase. Instead we'll bucket shuffle manually. + pushStack(xNodeItr); + + assert((g.getNodeData(xNodeItr).getLinkDegree() == 2) && + "Node in R2 bucket has degree != 2"); + + const Vector &xCosts = g.getNodeCosts(xNodeItr); + + typename NodeData::AdjLinkIterator tempItr = + g.getNodeData(xNodeItr).adjLinksBegin(); + + GraphEdgeIterator yxEdgeItr = *tempItr, + zxEdgeItr = *(++tempItr); + + GraphNodeIterator yNodeItr = g.getEdgeOtherNode(yxEdgeItr, xNodeItr), + zNodeItr = g.getEdgeOtherNode(zxEdgeItr, xNodeItr); + + removeFromBucket(yNodeItr); + removeFromBucket(zNodeItr); + + removeLink(yxEdgeItr, yNodeItr); + removeLink(zxEdgeItr, zNodeItr); + + // Graph some of the costs: + bool flipEdge1 = (g.getEdgeNode1Itr(yxEdgeItr) == xNodeItr), + flipEdge2 = (g.getEdgeNode1Itr(zxEdgeItr) == xNodeItr); + + const Matrix *yxCosts = flipEdge1 ? + new Matrix(g.getEdgeCosts(yxEdgeItr).transpose()) : + &g.getEdgeCosts(yxEdgeItr), + *zxCosts = flipEdge2 ? + new Matrix(g.getEdgeCosts(zxEdgeItr).transpose()) : + &g.getEdgeCosts(zxEdgeItr); + + unsigned xLen = xCosts.getLength(), + yLen = yxCosts->getRows(), + zLen = zxCosts->getRows(); + + // Compute delta: + Matrix delta(yLen, zLen); + + for (unsigned i = 0; i < yLen; ++i) { + for (unsigned j = 0; j < zLen; ++j) { + PBQPNum min = (*yxCosts)[i][0] + (*zxCosts)[j][0] + xCosts[0]; + for (unsigned k = 1; k < xLen; ++k) { + PBQPNum c = (*yxCosts)[i][k] + (*zxCosts)[j][k] + xCosts[k]; + if (c < min) { + min = c; + } + } + delta[i][j] = min; + } + } + + if (flipEdge1) + delete yxCosts; + + if (flipEdge2) + delete zxCosts; + + // Deal with the potentially induced yz edge. + GraphEdgeIterator yzEdgeItr = g.findEdge(yNodeItr, zNodeItr); + if (yzEdgeItr == g.edgesEnd()) { + yzEdgeItr = g.addEdge(yNodeItr, zNodeItr, delta, EdgeData(g)); + } + else { + // There was an edge, but we're going to screw with it. Delete the old + // link, update the costs. We'll re-link it later. + removeLinkR2(yzEdgeItr); + g.getEdgeCosts(yzEdgeItr) += + (yNodeItr == g.getEdgeNode1Itr(yzEdgeItr)) ? + delta : delta.transpose(); + } + + bool nullCostEdge = tryNormaliseEdgeMatrix(yzEdgeItr); + + // Nulled the edge, remove it entirely. + if (nullCostEdge) { + g.removeEdge(yzEdgeItr); + } + else { + // Edge remains - re-link it. + addLink(yzEdgeItr); + } + + addToBucket(yNodeItr); + addToBucket(zNodeItr); + } + + void computeTrivialSolutions() { + + for (NodeListIterator r0Itr = r0Bucket.begin(), r0End = r0Bucket.end(); + r0Itr != r0End; ++r0Itr) { + GraphNodeIterator nodeItr = *r0Itr; + + solution.incR0Reductions(); + setSolution(nodeItr, g.getNodeCosts(nodeItr).minIndex()); + } + + } + + void backpropagate() { + while (!stack.empty()) { + computeSolution(stack.back()); + stack.pop_back(); + } + } + + void computeSolution(const GraphNodeIterator &nodeItr) { + + NodeData &nodeData = g.getNodeData(nodeItr); + + Vector v(g.getNodeCosts(nodeItr)); + + // Solve based on existing links. + for (typename NodeData::AdjLinkIterator + solvedLinkItr = nodeData.solvedLinksBegin(), + solvedLinkEnd = nodeData.solvedLinksEnd(); + solvedLinkItr != solvedLinkEnd; ++solvedLinkItr) { + + GraphEdgeIterator solvedEdgeItr(*solvedLinkItr); + Matrix &edgeCosts = g.getEdgeCosts(solvedEdgeItr); + + if (nodeItr == g.getEdgeNode1Itr(solvedEdgeItr)) { + GraphNodeIterator adjNode(g.getEdgeNode2Itr(solvedEdgeItr)); + unsigned adjSolution = + solution.getSelection(g.getNodeID(adjNode)); + v += edgeCosts.getColAsVector(adjSolution); + } + else { + GraphNodeIterator adjNode(g.getEdgeNode1Itr(solvedEdgeItr)); + unsigned adjSolution = + solution.getSelection(g.getNodeID(adjNode)); + v += edgeCosts.getRowAsVector(adjSolution); + } + + } + + setSolution(nodeItr, v.minIndex()); + } + + void computeSolutionCost(const SimpleGraph &orig) { + PBQPNum cost = 0.0; + + for (SimpleGraph::ConstNodeIterator + nodeItr = orig.nodesBegin(), nodeEnd = orig.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + unsigned nodeId = orig.getNodeID(nodeItr); + + cost += orig.getNodeCosts(nodeItr)[solution.getSelection(nodeId)]; + } + + for (SimpleGraph::ConstEdgeIterator + edgeItr = orig.edgesBegin(), edgeEnd = orig.edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + SimpleGraph::ConstNodeIterator n1 = orig.getEdgeNode1Itr(edgeItr), + n2 = orig.getEdgeNode2Itr(edgeItr); + unsigned sol1 = solution.getSelection(orig.getNodeID(n1)), + sol2 = solution.getSelection(orig.getNodeID(n2)); + + cost += orig.getEdgeCosts(edgeItr)[sol1][sol2]; + } + + solution.setSolutionCost(cost); + } + +}; + +template +class HeuristicSolver : public Solver { +public: + Solution solve(const SimpleGraph &g) const { + HeuristicSolverImpl solverImpl(g); + return solverImpl.getSolution(); + } +}; + +} + +#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h new file mode 100644 index 0000000000000..3ac9e707bab46 --- /dev/null +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -0,0 +1,383 @@ +//===-- Briggs.h --- Briggs Heuristic for PBQP -----------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the Briggs test for "allocability" of nodes in a +// PBQP graph representing a register allocation problem. Nodes which can be +// proven allocable (by a safe and relatively accurate test) are removed from +// the PBQP graph first. If no provably allocable node is present in the graph +// then the node with the minimal spill-cost to degree ratio is removed. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H +#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H + +#include "../HeuristicSolver.h" + +#include + +namespace PBQP { +namespace Heuristics { + +class Briggs { + public: + + class NodeData; + class EdgeData; + + private: + + typedef HeuristicSolverImpl Solver; + typedef HSITypes HSIT; + typedef HSIT::SolverGraph SolverGraph; + typedef HSIT::GraphNodeIterator GraphNodeIterator; + typedef HSIT::GraphEdgeIterator GraphEdgeIterator; + + class LinkDegreeComparator { + public: + LinkDegreeComparator() : g(0) {} + LinkDegreeComparator(SolverGraph *g) : g(g) {} + + bool operator()(const GraphNodeIterator &node1Itr, + const GraphNodeIterator &node2Itr) const { + assert((g != 0) && "Graph object not set, cannot access node data."); + unsigned n1Degree = g->getNodeData(node1Itr).getLinkDegree(), + n2Degree = g->getNodeData(node2Itr).getLinkDegree(); + if (n1Degree > n2Degree) { + return true; + } + else if (n1Degree < n2Degree) { + return false; + } + // else they're "equal" by degree, differentiate based on ID. + return g->getNodeID(node1Itr) < g->getNodeID(node2Itr); + } + + private: + SolverGraph *g; + }; + + class SpillPriorityComparator { + public: + SpillPriorityComparator() : g(0) {} + SpillPriorityComparator(SolverGraph *g) : g(g) {} + + bool operator()(const GraphNodeIterator &node1Itr, + const GraphNodeIterator &node2Itr) const { + assert((g != 0) && "Graph object not set, cannot access node data."); + PBQPNum cost1 = + g->getNodeCosts(node1Itr)[0] / + g->getNodeData(node1Itr).getLinkDegree(), + cost2 = + g->getNodeCosts(node2Itr)[0] / + g->getNodeData(node2Itr).getLinkDegree(); + + if (cost1 < cost2) { + return true; + } + else if (cost1 > cost2) { + return false; + } + // else they'er "equal" again, differentiate based on address again. + return g->getNodeID(node1Itr) < g->getNodeID(node2Itr); + } + + private: + SolverGraph *g; + }; + + typedef std::set + RNAllocableNodeList; + typedef RNAllocableNodeList::iterator RNAllocableNodeListIterator; + + typedef std::set + RNUnallocableNodeList; + typedef RNUnallocableNodeList::iterator RNUnallocableNodeListIterator; + + public: + + class NodeData { + private: + RNAllocableNodeListIterator rNAllocableNodeListItr; + RNUnallocableNodeListIterator rNUnallocableNodeListItr; + unsigned numRegOptions, numDenied, numSafe; + std::vector unsafeDegrees; + bool allocable; + + void addRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr, + const GraphEdgeIterator &edgeItr, bool add) { + + //assume we're adding... + unsigned udTarget = 0, dir = 1; + + if (!add) { + udTarget = 1; + dir = ~0; + } + + EdgeData &linkEdgeData = g.getEdgeData(edgeItr).getHeuristicData(); + + EdgeData::ConstUnsafeIterator edgeUnsafeBegin, edgeUnsafeEnd; + + if (nodeItr == g.getEdgeNode1Itr(edgeItr)) { + numDenied += (dir * linkEdgeData.getWorstDegree()); + edgeUnsafeBegin = linkEdgeData.unsafeBegin(); + edgeUnsafeEnd = linkEdgeData.unsafeEnd(); + } + else { + numDenied += (dir * linkEdgeData.getReverseWorstDegree()); + edgeUnsafeBegin = linkEdgeData.reverseUnsafeBegin(); + edgeUnsafeEnd = linkEdgeData.reverseUnsafeEnd(); + } + + assert((unsafeDegrees.size() == + static_cast( + std::distance(edgeUnsafeBegin, edgeUnsafeEnd))) + && "Unsafe array size mismatch."); + + std::vector::iterator unsafeDegreesItr = + unsafeDegrees.begin(); + + for (EdgeData::ConstUnsafeIterator edgeUnsafeItr = edgeUnsafeBegin; + edgeUnsafeItr != edgeUnsafeEnd; + ++edgeUnsafeItr, ++unsafeDegreesItr) { + + if ((*edgeUnsafeItr == 1) && (*unsafeDegreesItr == udTarget)) { + numSafe -= dir; + } + *unsafeDegreesItr += (dir * (*edgeUnsafeItr)); + } + + allocable = (numDenied < numRegOptions) || (numSafe > 0); + } + + public: + + void setup(SolverGraph &g, const GraphNodeIterator &nodeItr) { + + numRegOptions = g.getNodeCosts(nodeItr).getLength() - 1; + + numSafe = numRegOptions; // Optimistic, correct below. + numDenied = 0; // Also optimistic. + unsafeDegrees.resize(numRegOptions, 0); + + HSIT::NodeData &nodeData = g.getNodeData(nodeItr); + + for (HSIT::NodeData::AdjLinkIterator + adjLinkItr = nodeData.adjLinksBegin(), + adjLinkEnd = nodeData.adjLinksEnd(); + adjLinkItr != adjLinkEnd; ++adjLinkItr) { + + addRemoveLink(g, nodeItr, *adjLinkItr, true); + } + } + + bool isAllocable() const { return allocable; } + + void handleAddLink(SolverGraph &g, const GraphNodeIterator &nodeItr, + const GraphEdgeIterator &adjEdge) { + addRemoveLink(g, nodeItr, adjEdge, true); + } + + void handleRemoveLink(SolverGraph &g, const GraphNodeIterator &nodeItr, + const GraphEdgeIterator &adjEdge) { + addRemoveLink(g, nodeItr, adjEdge, false); + } + + void setRNAllocableNodeListItr( + const RNAllocableNodeListIterator &rNAllocableNodeListItr) { + + this->rNAllocableNodeListItr = rNAllocableNodeListItr; + } + + RNAllocableNodeListIterator getRNAllocableNodeListItr() const { + return rNAllocableNodeListItr; + } + + void setRNUnallocableNodeListItr( + const RNUnallocableNodeListIterator &rNUnallocableNodeListItr) { + + this->rNUnallocableNodeListItr = rNUnallocableNodeListItr; + } + + RNUnallocableNodeListIterator getRNUnallocableNodeListItr() const { + return rNUnallocableNodeListItr; + } + + + }; + + class EdgeData { + private: + + typedef std::vector UnsafeArray; + + unsigned worstDegree, + reverseWorstDegree; + UnsafeArray unsafe, reverseUnsafe; + + public: + + EdgeData() : worstDegree(0), reverseWorstDegree(0) {} + + typedef UnsafeArray::const_iterator ConstUnsafeIterator; + + void setup(SolverGraph &g, const GraphEdgeIterator &edgeItr) { + const Matrix &edgeCosts = g.getEdgeCosts(edgeItr); + unsigned numRegs = edgeCosts.getRows() - 1, + numReverseRegs = edgeCosts.getCols() - 1; + + unsafe.resize(numRegs, 0); + reverseUnsafe.resize(numReverseRegs, 0); + + std::vector rowInfCounts(numRegs, 0), + colInfCounts(numReverseRegs, 0); + + for (unsigned i = 0; i < numRegs; ++i) { + for (unsigned j = 0; j < numReverseRegs; ++j) { + if (edgeCosts[i + 1][j + 1] == + std::numeric_limits::infinity()) { + unsafe[i] = 1; + reverseUnsafe[j] = 1; + ++rowInfCounts[i]; + ++colInfCounts[j]; + + if (colInfCounts[j] > worstDegree) { + worstDegree = colInfCounts[j]; + } + + if (rowInfCounts[i] > reverseWorstDegree) { + reverseWorstDegree = rowInfCounts[i]; + } + } + } + } + } + + unsigned getWorstDegree() const { return worstDegree; } + unsigned getReverseWorstDegree() const { return reverseWorstDegree; } + ConstUnsafeIterator unsafeBegin() const { return unsafe.begin(); } + ConstUnsafeIterator unsafeEnd() const { return unsafe.end(); } + ConstUnsafeIterator reverseUnsafeBegin() const { + return reverseUnsafe.begin(); + } + ConstUnsafeIterator reverseUnsafeEnd() const { + return reverseUnsafe.end(); + } + }; + + void initialise(Solver &solver) { + this->s = &solver; + g = &s->getGraph(); + rNAllocableBucket = RNAllocableNodeList(LinkDegreeComparator(g)); + rNUnallocableBucket = + RNUnallocableNodeList(SpillPriorityComparator(g)); + + for (GraphEdgeIterator + edgeItr = g->edgesBegin(), edgeEnd = g->edgesEnd(); + edgeItr != edgeEnd; ++edgeItr) { + + g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr); + } + + for (GraphNodeIterator + nodeItr = g->nodesBegin(), nodeEnd = g->nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + + g->getNodeData(nodeItr).getHeuristicData().setup(*g, nodeItr); + } + } + + void addToRNBucket(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); + + if (nodeData.isAllocable()) { + nodeData.setRNAllocableNodeListItr( + rNAllocableBucket.insert(rNAllocableBucket.begin(), nodeItr)); + } + else { + nodeData.setRNUnallocableNodeListItr( + rNUnallocableBucket.insert(rNUnallocableBucket.begin(), nodeItr)); + } + } + + void removeFromRNBucket(const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); + + if (nodeData.isAllocable()) { + rNAllocableBucket.erase(nodeData.getRNAllocableNodeListItr()); + } + else { + rNUnallocableBucket.erase(nodeData.getRNUnallocableNodeListItr()); + } + } + + void handleAddLink(const GraphEdgeIterator &edgeItr) { + // We assume that if we got here this edge is attached to at least + // one high degree node. + g->getEdgeData(edgeItr).getHeuristicData().setup(*g, edgeItr); + + GraphNodeIterator n1Itr = g->getEdgeNode1Itr(edgeItr), + n2Itr = g->getEdgeNode2Itr(edgeItr); + + HSIT::NodeData &n1Data = g->getNodeData(n1Itr), + &n2Data = g->getNodeData(n2Itr); + + if (n1Data.getLinkDegree() > 2) { + n1Data.getHeuristicData().handleAddLink(*g, n1Itr, edgeItr); + } + if (n2Data.getLinkDegree() > 2) { + n2Data.getHeuristicData().handleAddLink(*g, n2Itr, edgeItr); + } + } + + void handleRemoveLink(const GraphEdgeIterator &edgeItr, + const GraphNodeIterator &nodeItr) { + NodeData &nodeData = g->getNodeData(nodeItr).getHeuristicData(); + nodeData.handleRemoveLink(*g, nodeItr, edgeItr); + } + + void processRN() { + + if (!rNAllocableBucket.empty()) { + GraphNodeIterator selectedNodeItr = *rNAllocableBucket.begin(); + //std::cerr << "RN safely pushing " << g->getNodeID(selectedNodeItr) << "\n"; + rNAllocableBucket.erase(rNAllocableBucket.begin()); + s->pushStack(selectedNodeItr); + s->unlinkNode(selectedNodeItr); + } + else { + GraphNodeIterator selectedNodeItr = *rNUnallocableBucket.begin(); + //std::cerr << "RN optimistically pushing " << g->getNodeID(selectedNodeItr) << "\n"; + rNUnallocableBucket.erase(rNUnallocableBucket.begin()); + s->pushStack(selectedNodeItr); + s->unlinkNode(selectedNodeItr); + } + + } + + bool rNBucketEmpty() const { + return (rNAllocableBucket.empty() && rNUnallocableBucket.empty()); + } + +private: + + Solver *s; + SolverGraph *g; + RNAllocableNodeList rNAllocableBucket; + RNUnallocableNodeList rNUnallocableBucket; +}; + + + +} +} + + +#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/PBQPMath.h new file mode 100644 index 0000000000000..11f4b4b4e34c0 --- /dev/null +++ b/lib/CodeGen/PBQP/PBQPMath.h @@ -0,0 +1,288 @@ +//===-- PBQPMath.h - PBQP Vector and Matrix classes ------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_PBQPMATH_H +#define LLVM_CODEGEN_PBQP_PBQPMATH_H + +#include +#include +#include + +namespace PBQP { + +typedef double PBQPNum; + +/// \brief PBQP Vector class. +class Vector { + public: + + /// \brief Construct a PBQP vector of the given size. + explicit Vector(unsigned length) : + length(length), data(new PBQPNum[length]) { + } + + /// \brief Construct a PBQP vector with initializer. + Vector(unsigned length, PBQPNum initVal) : + length(length), data(new PBQPNum[length]) { + std::fill(data, data + length, initVal); + } + + /// \brief Copy construct a PBQP vector. + Vector(const Vector &v) : + length(v.length), data(new PBQPNum[length]) { + std::copy(v.data, v.data + length, data); + } + + /// \brief Destroy this vector, return its memory. + ~Vector() { delete[] data; } + + /// \brief Assignment operator. + Vector& operator=(const Vector &v) { + delete[] data; + length = v.length; + data = new PBQPNum[length]; + std::copy(v.data, v.data + length, data); + return *this; + } + + /// \brief Return the length of the vector + unsigned getLength() const { + return length; + } + + /// \brief Element access. + PBQPNum& operator[](unsigned index) { + assert(index < length && "Vector element access out of bounds."); + return data[index]; + } + + /// \brief Const element access. + const PBQPNum& operator[](unsigned index) const { + assert(index < length && "Vector element access out of bounds."); + return data[index]; + } + + /// \brief Add another vector to this one. + Vector& operator+=(const Vector &v) { + assert(length == v.length && "Vector length mismatch."); + std::transform(data, data + length, v.data, data, std::plus()); + return *this; + } + + /// \brief Subtract another vector from this one. + Vector& operator-=(const Vector &v) { + assert(length == v.length && "Vector length mismatch."); + std::transform(data, data + length, v.data, data, std::minus()); + return *this; + } + + /// \brief Returns the index of the minimum value in this vector + unsigned minIndex() const { + return std::min_element(data, data + length) - data; + } + + private: + unsigned length; + PBQPNum *data; +}; + +/// \brief Output a textual representation of the given vector on the given +/// output stream. +template +OStream& operator<<(OStream &os, const Vector &v) { + assert((v.getLength() != 0) && "Zero-length vector badness."); + + os << "[ " << v[0]; + for (unsigned i = 1; i < v.getLength(); ++i) { + os << ", " << v[i]; + } + os << " ]"; + + return os; +} + + +/// \brief PBQP Matrix class +class Matrix { + public: + + /// \brief Construct a PBQP Matrix with the given dimensions. + Matrix(unsigned rows, unsigned cols) : + rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { + } + + /// \brief Construct a PBQP Matrix with the given dimensions and initial + /// value. + Matrix(unsigned rows, unsigned cols, PBQPNum initVal) : + rows(rows), cols(cols), data(new PBQPNum[rows * cols]) { + std::fill(data, data + (rows * cols), initVal); + } + + /// \brief Copy construct a PBQP matrix. + Matrix(const Matrix &m) : + rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) { + std::copy(m.data, m.data + (rows * cols), data); + } + + /// \brief Destroy this matrix, return its memory. + ~Matrix() { delete[] data; } + + /// \brief Assignment operator. + Matrix& operator=(const Matrix &m) { + delete[] data; + rows = m.rows; cols = m.cols; + data = new PBQPNum[rows * cols]; + std::copy(m.data, m.data + (rows * cols), data); + return *this; + } + + /// \brief Return the number of rows in this matrix. + unsigned getRows() const { return rows; } + + /// \brief Return the number of cols in this matrix. + unsigned getCols() const { return cols; } + + /// \brief Matrix element access. + PBQPNum* operator[](unsigned r) { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + /// \brief Matrix element access. + const PBQPNum* operator[](unsigned r) const { + assert(r < rows && "Row out of bounds."); + return data + (r * cols); + } + + /// \brief Returns the given row as a vector. + Vector getRowAsVector(unsigned r) const { + Vector v(cols); + for (unsigned c = 0; c < cols; ++c) + v[c] = (*this)[r][c]; + return v; + } + + /// \brief Returns the given column as a vector. + Vector getColAsVector(unsigned c) const { + Vector v(rows); + for (unsigned r = 0; r < rows; ++r) + v[r] = (*this)[r][c]; + return v; + } + + /// \brief Reset the matrix to the given value. + Matrix& reset(PBQPNum val = 0) { + std::fill(data, data + (rows * cols), val); + return *this; + } + + /// \brief Set a single row of this matrix to the given value. + Matrix& setRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds."); + std::fill(data + (r * cols), data + ((r + 1) * cols), val); + return *this; + } + + /// \brief Set a single column of this matrix to the given value. + Matrix& setCol(unsigned c, PBQPNum val) { + assert(c < cols && "Column out of bounds."); + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] = val; + return *this; + } + + /// \brief Matrix transpose. + Matrix transpose() const { + Matrix m(cols, rows); + for (unsigned r = 0; r < rows; ++r) + for (unsigned c = 0; c < cols; ++c) + m[c][r] = (*this)[r][c]; + return m; + } + + /// \brief Returns the diagonal of the matrix as a vector. + /// + /// Matrix must be square. + Vector diagonalize() const { + assert(rows == cols && "Attempt to diagonalize non-square matrix."); + + Vector v(rows); + for (unsigned r = 0; r < rows; ++r) + v[r] = (*this)[r][r]; + return v; + } + + /// \brief Add the given matrix to this one. + Matrix& operator+=(const Matrix &m) { + assert(rows == m.rows && cols == m.cols && + "Matrix dimensions mismatch."); + std::transform(data, data + (rows * cols), m.data, data, + std::plus()); + return *this; + } + + /// \brief Returns the minimum of the given row + PBQPNum getRowMin(unsigned r) const { + assert(r < rows && "Row out of bounds"); + return *std::min_element(data + (r * cols), data + ((r + 1) * cols)); + } + + /// \brief Returns the minimum of the given column + PBQPNum getColMin(unsigned c) const { + PBQPNum minElem = (*this)[0][c]; + for (unsigned r = 1; r < rows; ++r) + if ((*this)[r][c] < minElem) minElem = (*this)[r][c]; + return minElem; + } + + /// \brief Subtracts the given scalar from the elements of the given row. + Matrix& subFromRow(unsigned r, PBQPNum val) { + assert(r < rows && "Row out of bounds"); + std::transform(data + (r * cols), data + ((r + 1) * cols), + data + (r * cols), + std::bind2nd(std::minus(), val)); + return *this; + } + + /// \brief Subtracts the given scalar from the elements of the given column. + Matrix& subFromCol(unsigned c, PBQPNum val) { + for (unsigned r = 0; r < rows; ++r) + (*this)[r][c] -= val; + return *this; + } + + /// \brief Returns true if this is a zero matrix. + bool isZero() const { + return find_if(data, data + (rows * cols), + std::bind2nd(std::not_equal_to(), 0)) == + data + (rows * cols); + } + + private: + unsigned rows, cols; + PBQPNum *data; +}; + +/// \brief Output a textual representation of the given matrix on the given +/// output stream. +template +OStream& operator<<(OStream &os, const Matrix &m) { + + assert((m.getRows() != 0) && "Zero-row matrix badness."); + + for (unsigned i = 0; i < m.getRows(); ++i) { + os << m.getRowAsVector(i); + } + + return os; +} + +} + +#endif // LLVM_CODEGEN_PBQP_PBQPMATH_HPP diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h new file mode 100644 index 0000000000000..1ca9caee3467f --- /dev/null +++ b/lib/CodeGen/PBQP/SimpleGraph.h @@ -0,0 +1,100 @@ +//===-- SimpleGraph.h - Simple PBQP Graph ----------------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Simple PBQP graph class representing a PBQP problem. Graphs of this type +// can be passed to a PBQPSolver instance to solve the PBQP problem. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H +#define LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H + +#include "GraphBase.h" + +namespace PBQP { + +class SimpleEdge; + +class SimpleNode : public NodeBase { +public: + SimpleNode(const Vector &costs) : + NodeBase(costs) {} +}; + +class SimpleEdge : public EdgeBase { +public: + SimpleEdge(const NodeIterator &node1Itr, const NodeIterator &node2Itr, + const Matrix &costs) : + EdgeBase(node1Itr, node2Itr, costs) {} +}; + +class SimpleGraph : public GraphBase { +private: + + typedef GraphBase PGraph; + + void copyFrom(const SimpleGraph &other) { + assert(other.areNodeIDsValid() && + "Cannot copy from another graph unless IDs have been assigned."); + + std::vector newNodeItrs(other.getNumNodes()); + + for (ConstNodeIterator nItr = other.nodesBegin(), nEnd = other.nodesEnd(); + nItr != nEnd; ++nItr) { + newNodeItrs[other.getNodeID(nItr)] = addNode(other.getNodeCosts(nItr)); + } + + for (ConstEdgeIterator eItr = other.edgesBegin(), eEnd = other.edgesEnd(); + eItr != eEnd; ++eItr) { + + unsigned node1ID = other.getNodeID(other.getEdgeNode1Itr(eItr)), + node2ID = other.getNodeID(other.getEdgeNode2Itr(eItr)); + + addEdge(newNodeItrs[node1ID], newNodeItrs[node2ID], + other.getEdgeCosts(eItr)); + } + } + + void copyFrom(SimpleGraph &other) { + if (!other.areNodeIDsValid()) { + other.assignNodeIDs(); + } + copyFrom(const_cast(other)); + } + +public: + + SimpleGraph() {} + + + SimpleGraph(const SimpleGraph &other) : PGraph() { + copyFrom(other); + } + + SimpleGraph& operator=(const SimpleGraph &other) { + clear(); + copyFrom(other); + return *this; + } + + NodeIterator addNode(const Vector &costs) { + return PGraph::addConstructedNode(SimpleNode(costs)); + } + + EdgeIterator addEdge(const NodeIterator &node1Itr, + const NodeIterator &node2Itr, + const Matrix &costs) { + return PGraph::addConstructedEdge(SimpleEdge(node1Itr, node2Itr, costs)); + } + +}; + +} + +#endif // LLVM_CODEGEN_PBQP_SIMPLEGRAPH_H diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h new file mode 100644 index 0000000000000..c91e2fa560a08 --- /dev/null +++ b/lib/CodeGen/PBQP/Solution.h @@ -0,0 +1,88 @@ +//===-- Solution.h ------- PBQP Solution -----------------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Annotated PBQP Graph class. This class is used internally by the PBQP solver +// to cache information to speed up reduction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H +#define LLVM_CODEGEN_PBQP_SOLUTION_H + +#include "PBQPMath.h" + +namespace PBQP { + +class Solution { + + friend class SolverImplementation; + +private: + + std::vector selections; + PBQPNum solutionCost; + bool provedOptimal; + unsigned r0Reductions, r1Reductions, + r2Reductions, rNReductions; + +public: + + Solution() : + solutionCost(0.0), provedOptimal(false), + r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {} + + Solution(unsigned length, bool assumeOptimal) : + selections(length), solutionCost(0.0), provedOptimal(assumeOptimal), + r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {} + + void setProvedOptimal(bool provedOptimal) { + this->provedOptimal = provedOptimal; + } + + void setSelection(unsigned nodeID, unsigned selection) { + selections[nodeID] = selection; + } + + void setSolutionCost(PBQPNum solutionCost) { + this->solutionCost = solutionCost; + } + + void incR0Reductions() { ++r0Reductions; } + void incR1Reductions() { ++r1Reductions; } + void incR2Reductions() { ++r2Reductions; } + void incRNReductions() { ++rNReductions; } + + unsigned numNodes() const { return selections.size(); } + + unsigned getSelection(unsigned nodeID) const { + return selections[nodeID]; + } + + PBQPNum getCost() const { return solutionCost; } + + bool isProvedOptimal() const { return provedOptimal; } + + unsigned getR0Reductions() const { return r0Reductions; } + unsigned getR1Reductions() const { return r1Reductions; } + unsigned getR2Reductions() const { return r2Reductions; } + unsigned getRNReductions() const { return rNReductions; } + + bool operator==(const Solution &other) const { + return (selections == other.selections); + } + + bool operator!=(const Solution &other) const { + return !(*this == other); + } + +}; + +} + +#endif // LLVM_CODEGEN_PBQP_SOLUTION_H diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h new file mode 100644 index 0000000000000..a9c5f837c453e --- /dev/null +++ b/lib/CodeGen/PBQP/Solver.h @@ -0,0 +1,31 @@ +//===-- Solver.h ------- PBQP solver interface -----------------*- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CODEGEN_PBQP_SOLVER_H +#define LLVM_CODEGEN_PBQP_SOLVER_H + +#include "SimpleGraph.h" +#include "Solution.h" + +namespace PBQP { + +/// \brief Interface for solver classes. +class Solver { +public: + + virtual ~Solver() = 0; + virtual Solution solve(const SimpleGraph &orig) const = 0; +}; + +Solver::~Solver() {} + +} + +#endif // LLVM_CODEGEN_PBQP_SOLVER_H diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index c5c76fc79467d..8071b0a81a89b 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "phielim" +#include "PHIElimination.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" #include "llvm/CodeGen/LiveVariables.h" @@ -22,7 +23,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" @@ -34,78 +34,25 @@ using namespace llvm; STATISTIC(NumAtomic, "Number of atomic phis lowered"); -namespace { - class VISIBILITY_HIDDEN PNE : public MachineFunctionPass { - MachineRegisterInfo *MRI; // Machine register information - - public: - static char ID; // Pass identification, replacement for typeid - PNE() : MachineFunctionPass(&ID) {} - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions - /// in predecessor basic blocks. - /// - bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); - void LowerAtomicPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); - - /// analyzePHINodes - Gather information about the PHI nodes in - /// here. In particular, we want to map the number of uses of a virtual - /// register which is used in a PHI node. We map that to the BB the - /// vreg is coming from. This is used later to determine when the vreg - /// is killed in the BB. - /// - void analyzePHINodes(const MachineFunction& Fn); - - // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from - // SrcReg. This needs to be after any def or uses of SrcReg, but before - // any subsequent point where control flow might jump out of the basic - // block. - MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, - unsigned SrcReg); - - // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and - // also after any exception handling labels: in landing pads execution - // starts at the label, so any copies placed before it won't be executed! - MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { - // Rather than assuming that EH labels come before other kinds of labels, - // just skip all labels. - while (I != MBB.end() && - (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel())) - ++I; - return I; - } - - typedef std::pair BBVRegPair; - typedef std::map VRegPHIUse; - - VRegPHIUse VRegPHIUseCount; - - // Defs of PHI sources which are implicit_def. - SmallPtrSet ImpDefs; - }; -} - -char PNE::ID = 0; -static RegisterPass +char PHIElimination::ID = 0; +static RegisterPass X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); const PassInfo *const llvm::PHIEliminationID = &X; -bool PNE::runOnMachineFunction(MachineFunction &Fn) { +void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { MRI = &Fn.getRegInfo(); + PHIDefs.clear(); + PHIKills.clear(); analyzePHINodes(Fn); bool Changed = false; @@ -132,7 +79,8 @@ bool PNE::runOnMachineFunction(MachineFunction &Fn) { /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in /// predecessor basic blocks. /// -bool PNE::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB) { +bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF, + MachineBasicBlock &MBB) { if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) return false; // Quick exit for basic blocks without PHIs. @@ -162,8 +110,9 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg. // This needs to be after any def or uses of SrcReg, but before any subsequent // point where control flow might jump out of the basic block. -MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB, - unsigned SrcReg) { +MachineBasicBlock::iterator +llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB, + unsigned SrcReg) { // Handle the trivial case trivially. if (MBB.empty()) return MBB.begin(); @@ -206,9 +155,10 @@ MachineBasicBlock::iterator PNE::FindCopyInsertPoint(MachineBasicBlock &MBB, /// under the assuption that it needs to be lowered in a way that supports /// atomic execution of PHIs. This lowering method is always correct all of the /// time. -/// -void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { +/// +void llvm::PHIElimination::LowerAtomicPHINode( + MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -235,6 +185,10 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC); } + // Record PHI def. + assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?"); + PHIDefs[DestReg] = &MBB; + // Update live variable information if there is any. LiveVariables *LV = getAnalysisIfAvailable(); if (LV) { @@ -276,6 +230,13 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "Machine PHI Operands must all be virtual registers!"); + // Get the MachineBasicBlock equivalent of the BasicBlock that is the source + // path the PHI. + MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); + + // Record the kill. + PHIKills[SrcReg].insert(&opBlock); + // If source is defined by an implicit def, there is no need to insert a // copy. MachineInstr *DefMI = MRI->getVRegDef(SrcReg); @@ -284,10 +245,6 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, continue; } - // Get the MachineBasicBlock equivalent of the BasicBlock that is the source - // path the PHI. - MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); - // Check to make sure we haven't already emitted the copy for this block. // This can happen because PHI nodes may have multiple entries for the same // basic block. @@ -420,7 +377,7 @@ void PNE::LowerAtomicPHINode(MachineBasicBlock &MBB, /// used in a PHI node. We map that to the BB the vreg is coming from. This is /// used later to determine when the vreg is killed in the BB. /// -void PNE::analyzePHINodes(const MachineFunction& Fn) { +void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) { for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h new file mode 100644 index 0000000000000..3d02dfdcddba2 --- /dev/null +++ b/lib/CodeGen/PHIElimination.h @@ -0,0 +1,125 @@ +//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP +#define LLVM_CODEGEN_PHIELIMINATION_HPP + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" + +#include + +namespace llvm { + + /// Lower PHI instructions to copies. + class PHIElimination : public MachineFunctionPass { + MachineRegisterInfo *MRI; // Machine register information + private: + + typedef SmallSet PHIKillList; + typedef DenseMap PHIKillMap; + typedef DenseMap PHIDefMap; + + public: + + typedef PHIKillList::iterator phi_kill_iterator; + typedef PHIKillList::const_iterator const_phi_kill_iterator; + + static char ID; // Pass identification, replacement for typeid + PHIElimination() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + /// Return true if the given vreg was defined by a PHI intsr prior to + /// lowering. + bool hasPHIDef(unsigned vreg) const { + return PHIDefs.count(vreg); + } + + /// Returns the block in which the PHI instruction which defined the + /// given vreg used to reside. + MachineBasicBlock* getPHIDefBlock(unsigned vreg) { + PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg); + assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def."); + return phiDefItr->second; + } + + /// Returns true if the given vreg was killed by a PHI instr. + bool hasPHIKills(unsigned vreg) const { + return PHIKills.count(vreg); + } + + /// Returns an iterator over the BasicBlocks which contained PHI + /// kills of this register prior to lowering. + phi_kill_iterator phiKillsBegin(unsigned vreg) { + PHIKillMap::iterator phiKillItr = PHIKills.find(vreg); + assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills."); + return phiKillItr->second.begin(); + } + phi_kill_iterator phiKillsEnd(unsigned vreg) { + PHIKillMap::iterator phiKillItr = PHIKills.find(vreg); + assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills."); + return phiKillItr->second.end(); + } + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerAtomicPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from + // SrcReg. This needs to be after any def or uses of SrcReg, but before + // any subsequent point where control flow might jump out of the basic + // block. + MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB, + unsigned SrcReg); + + // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and + // also after any exception handling labels: in landing pads execution + // starts at the label, so any copies placed before it won't be executed! + MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + // Rather than assuming that EH labels come before other kinds of labels, + // just skip all labels. + while (I != MBB.end() && + (I->getOpcode() == TargetInstrInfo::PHI || I->isLabel())) + ++I; + return I; + } + + typedef std::pair BBVRegPair; + typedef std::map VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + PHIDefMap PHIDefs; + PHIKillMap PHIKills; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet ImpDefs; + }; + +} + +#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */ diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index de7746855b3f4..e52158cfeb4e7 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -19,45 +19,73 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "post-RA-sched" +#include "ExactHazardRecognizer.h" +#include "SimpleHazardRecognizer.h" #include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include +#include using namespace llvm; STATISTIC(NumNoops, "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); +// Post-RA scheduling is enabled with +// TargetSubtarget.enablePostRAScheduler(). This flag can be used to +// override the target. +static cl::opt +EnablePostRAScheduler("post-RA-scheduler", + cl::desc("Enable scheduling after register allocation"), + cl::init(false), cl::Hidden); static cl::opt EnableAntiDepBreaking("break-anti-dependencies", cl::desc("Break post-RA scheduling anti-dependencies"), cl::init(true), cl::Hidden); - static cl::opt EnablePostRAHazardAvoidance("avoid-hazards", - cl::desc("Enable simple hazard-avoidance"), + cl::desc("Enable exact hazard avoidance"), cl::init(true), cl::Hidden); +// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod +static cl::opt +DebugDiv("postra-sched-debugdiv", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); +static cl::opt +DebugMod("postra-sched-debugmod", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); + namespace { class VISIBILITY_HIDDEN PostRAScheduler : public MachineFunctionPass { + AliasAnalysis *AA; + public: static char ID; PostRAScheduler() : MachineFunctionPass(&ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -95,6 +123,9 @@ namespace { /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + /// Classes - For live regs that are only used in one register class in a /// live range, the register class. If the register is not live, the /// corresponding value is null. If the register is live but used in @@ -106,22 +137,27 @@ namespace { /// RegRegs - Map registers to all their references within a live range. std::multimap RegRefs; - /// The index of the most recent kill (proceding bottom-up), or ~0u if - /// the register is not live. + /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// or ~0u if the register is not live. unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister]; - /// The index of the most recent complete def (proceding bottom up), or ~0u - /// if the register is live. + /// DefIndices - The index of the most recent complete def (proceding bottom + /// up), or ~0u if the register is live. unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister]; + /// KeepRegs - A set of registers which are live and cannot be changed to + /// break anti-dependencies. + SmallSet KeepRegs; + public: SchedulePostRATDList(MachineFunction &MF, const MachineLoopInfo &MLI, const MachineDominatorTree &MDT, - ScheduleHazardRecognizer *HR) + ScheduleHazardRecognizer *HR, + AliasAnalysis *aa) : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AllocatableSet(TRI->getAllocatableSet(MF)), - HazardRec(HR) {} + HazardRec(HR), AA(aa) {} ~SchedulePostRATDList() { delete HazardRec; @@ -135,6 +171,11 @@ namespace { /// Schedule - Schedule the instruction range using list scheduling. /// void Schedule(); + + /// FixupKills - Fix register kill flags that have been made + /// invalid due to scheduling + /// + void FixupKills(MachineBasicBlock *MBB); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. @@ -153,62 +194,15 @@ namespace { void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); bool BreakAntiDependencies(); - }; - - /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses - /// a coarse classification and attempts to avoid that instructions of - /// a given class aren't grouped too densely together. - class SimpleHazardRecognizer : public ScheduleHazardRecognizer { - /// Class - A simple classification for SUnits. - enum Class { - Other, Load, Store - }; - - /// Window - The Class values of the most recently issued - /// instructions. - Class Window[8]; - - /// getClass - Classify the given SUnit. - Class getClass(const SUnit *SU) { - const MachineInstr *MI = SU->getInstr(); - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayLoad()) - return Load; - if (TID.mayStore()) - return Store; - return Other; - } - - /// Step - Rotate the existing entries in Window and insert the - /// given class value in position as the most recent. - void Step(Class C) { - std::copy(Window+1, array_endof(Window), Window); - Window[array_lengthof(Window)-1] = C; - } - - public: - SimpleHazardRecognizer() : Window() {} - - virtual HazardType getHazardType(SUnit *SU) { - Class C = getClass(SU); - if (C == Other) - return NoHazard; - unsigned Score = 0; - for (unsigned i = 0; i != array_lengthof(Window); ++i) - if (Window[i] == C) - Score += i + 1; - if (Score > array_lengthof(Window) * 2) - return Hazard; - return NoHazard; - } - - virtual void EmitInstruction(SUnit *SU) { - Step(getClass(SU)); - } - - virtual void AdvanceCycle() { - Step(Other); - } + unsigned findSuitableFreeRegister(unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *); + void StartBlockForKills(MachineBasicBlock *BB); + + // ToggleKillFlag - Toggle a register operand kill flag. Other + // adjustments may be made to the instruction if necessary. Return + // true if the operand has been deleted, false if not. + bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); }; } @@ -235,19 +229,44 @@ static bool isSchedulingBoundary(const MachineInstr *MI, } bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - DOUT << "PostRAScheduler\n"; + AA = &getAnalysis(); + + // Check for explicit enable/disable of post-ra scheduling. + if (EnablePostRAScheduler.getPosition() > 0) { + if (!EnablePostRAScheduler) + return true; + } else { + // Check that post-RA scheduling is enabled for this function + const TargetSubtarget &ST = Fn.getTarget().getSubtarget(); + if (!ST.enablePostRAScheduler()) + return true; + } + + DEBUG(errs() << "PostRAScheduler\n"); const MachineLoopInfo &MLI = getAnalysis(); const MachineDominatorTree &MDT = getAnalysis(); + const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData(); ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? - new SimpleHazardRecognizer : - new ScheduleHazardRecognizer(); + (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : + (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, AA); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); MBB != MBBe; ++MBB) { +#ifndef NDEBUG + // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int bbcnt = 0; + if (bbcnt++ % DebugDiv != DebugMod) + continue; + errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << + ":MBB ID#" << MBB->getNumber() << " ***\n"; + } +#endif + // Initialize register live-range state for scheduling in this block. Scheduler.StartBlock(MBB); @@ -259,7 +278,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineInstr *MI = prior(I); if (isSchedulingBoundary(MI, Fn)) { Scheduler.Run(MBB, I, Current, CurrentCount); - Scheduler.EmitSchedule(); + Scheduler.EmitSchedule(0); Current = MI; CurrentCount = Count - 1; Scheduler.Observe(MI, CurrentCount); @@ -271,10 +290,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); - Scheduler.EmitSchedule(); + Scheduler.EmitSchedule(0); // Clean up register live-range state. Scheduler.FinishBlock(); + + // Update register kills + Scheduler.FixupKills(MBB); } return true; @@ -287,6 +309,9 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { // Call the superclass. ScheduleDAGInstrs::StartBlock(BB); + // Reset the hazard recognizer. + HazardRec->Reset(); + // Clear out the register class data. std::fill(Classes, array_endof(Classes), static_cast(0)); @@ -295,8 +320,13 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { std::fill(KillIndices, array_endof(KillIndices), ~0u); std::fill(DefIndices, array_endof(DefIndices), BB->size()); + // Clear "do not change" set. + KeepRegs.clear(); + + bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn()); + // Determine the live-out physregs for this block. - if (!BB->empty() && BB->back().getDesc().isReturn()) + if (IsReturnBlock) { // In a return block, examine the function live-out regs. for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), E = MRI.liveout_end(); I != E; ++I) { @@ -312,7 +342,7 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - else + } else { // In a non-return block, examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) @@ -330,18 +360,16 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } + } - // Consider callee-saved registers as live-out, since we're running after - // prologue/epilogue insertion so there's no way to add additional - // saved registers. - // - // TODO: If the callee saves and restores these, then we can potentially - // use them between the save and the restore. To do that, we could scan - // the exit blocks to see which of these registers are defined. - // Alternatively, callee-saved registers that aren't saved and restored - // could be marked live-in in every block. + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(BB); for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) { unsigned Reg = *I; + if (!IsReturnBlock && !Pristine.test(Reg)) continue; Classes[Reg] = reinterpret_cast(-1); KillIndices[Reg] = BB->size(); DefIndices[Reg] = ~0u; @@ -358,10 +386,10 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { /// Schedule - Schedule the instruction range using list scheduling. /// void SchedulePostRATDList::Schedule() { - DOUT << "********** List Scheduling **********\n"; + DEBUG(errs() << "********** List Scheduling **********\n"); // Build the scheduling graph. - BuildSchedGraph(); + BuildSchedGraph(AA); if (EnableAntiDepBreaking) { if (BreakAntiDependencies()) { @@ -374,10 +402,13 @@ void SchedulePostRATDList::Schedule() { SUnits.clear(); EntrySU = SUnit(); ExitSU = SUnit(); - BuildSchedGraph(); + BuildSchedGraph(AA); } } + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + AvailableQueue.initNodes(SUnits); ListScheduleTopDown(); @@ -448,8 +479,10 @@ void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - const TargetRegisterClass *NewRC = - getInstrOperandRegClass(TRI, MI->getDesc(), i); + const TargetRegisterClass *NewRC = 0; + + if (i < MI->getDesc().getNumOperands()) + NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -473,6 +506,16 @@ void SchedulePostRATDList::PrescanInstruction(MachineInstr *MI) { // If we're still willing to consider this register, note the reference. if (Classes[Reg] != reinterpret_cast(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); + + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. + if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { + if (KeepRegs.insert(Reg)) { + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + KeepRegs.insert(*Subreg); + } + } } } @@ -492,9 +535,10 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, DefIndices[Reg] = Count; KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.erase(Reg); Classes[Reg] = 0; RegRefs.erase(Reg); // Repeat, for all subregs. @@ -503,6 +547,7 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, unsigned SubregReg = *Subreg; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; + KeepRegs.erase(SubregReg); Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } @@ -520,8 +565,9 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, if (Reg == 0) continue; if (!MO.isUse()) continue; - const TargetRegisterClass *NewRC = - getInstrOperandRegClass(TRI, MI->getDesc(), i); + const TargetRegisterClass *NewRC = 0; + if (i < MI->getDesc().getNumOperands()) + NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); // For now, only allow the register to be changed if its register // class is consistent across all uses. @@ -551,6 +597,36 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI, } } +unsigned +SchedulePostRATDList::findSuitableFreeRegister(unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC) { + for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), + RE = RC->allocation_order_end(MF); R != RE; ++R) { + unsigned NewReg = *R; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] != ~0u || + Classes[NewReg] == reinterpret_cast(-1) || + KillIndices[AntiDepReg] > DefIndices[NewReg]) + continue; + return NewReg; + } + + // No registers are free and available! + return 0; +} + /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. /// @@ -567,8 +643,18 @@ bool SchedulePostRATDList::BreakAntiDependencies() { Max = SU; } - DOUT << "Critical path has total latency " - << (Max->getDepth() + Max->Latency) << "\n"; +#ifndef NDEBUG + { + DEBUG(errs() << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"); + DEBUG(errs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] == ~0u) + DEBUG(errs() << " " << TRI->getName(Reg)); + } + DEBUG(errs() << '\n'); + } +#endif // Track progress along the critical path through the SUnit graph as we walk // the instructions. @@ -598,7 +684,7 @@ bool SchedulePostRATDList::BreakAntiDependencies() { // isn't A which is free. This re-introduces anti-dependencies // at all but one of the original anti-dependencies that we were // trying to break. To avoid this, keep track of the most recent - // register that each register was replaced with, avoid avoid + // register that each register was replaced with, avoid // using it to repair an anti-dependence on the same register. // This lets us produce this: // A = ... @@ -627,13 +713,6 @@ bool SchedulePostRATDList::BreakAntiDependencies() { I != E; --Count) { MachineInstr *MI = --I; - // After regalloc, IMPLICIT_DEF instructions aren't safe to treat as - // dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF - // is left behind appearing to clobber the super-register, while the - // subregister needs to remain live. So we just ignore them. - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - continue; - // Check if this instruction has a dependence on the critical path that // is an anti-dependence that we may be able to break. If it is, set // AntiDepReg to the non-zero register associated with the anti-dependence. @@ -656,8 +735,12 @@ bool SchedulePostRATDList::BreakAntiDependencies() { if (Edge->getKind() == SDep::Anti) { AntiDepReg = Edge->getReg(); assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); - // Don't break anti-dependencies on non-allocatable registers. if (!AllocatableSet.test(AntiDepReg)) + // Don't break anti-dependencies on non-allocatable registers. + AntiDepReg = 0; + else if (KeepRegs.count(AntiDepReg)) + // Don't break anti-dependencies if an use down below requires + // this exact register. AntiDepReg = 0; else { // If the SUnit has other dependencies on the SUnit that it @@ -689,16 +772,22 @@ bool SchedulePostRATDList::BreakAntiDependencies() { PrescanInstruction(MI); - // If this instruction has a use of AntiDepReg, breaking it - // is invalid. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (MO.isUse() && AntiDepReg == Reg) { - AntiDepReg = 0; - break; + if (MI->getDesc().hasExtraDefRegAllocReq()) + // If this instruction's defs have special allocation requirement, don't + // break this anti-dependency. + AntiDepReg = 0; + else if (AntiDepReg) { + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isUse() && AntiDepReg == Reg) { + AntiDepReg = 0; + break; + } } } @@ -715,60 +804,43 @@ bool SchedulePostRATDList::BreakAntiDependencies() { // TODO: Instead of picking the first free register, consider which might // be the best. if (AntiDepReg != 0) { - for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF), - RE = RC->allocation_order_end(MF); R != RE; ++R) { - unsigned NewReg = *R; - // Don't replace a register with itself. - if (NewReg == AntiDepReg) continue; - // Don't replace a register with one that was recently used to repair - // an anti-dependence with this AntiDepReg, because that would - // re-introduce that anti-dependence. - if (NewReg == LastNewReg[AntiDepReg]) continue; - // If NewReg is dead and NewReg's most recent def is not before - // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. - assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) && - "Kill and Def maps aren't consistent for AntiDepReg!"); - assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) && - "Kill and Def maps aren't consistent for NewReg!"); - if (KillIndices[NewReg] == ~0u && - Classes[NewReg] != reinterpret_cast(-1) && - KillIndices[AntiDepReg] <= DefIndices[NewReg]) { - DOUT << "Breaking anti-dependence edge on " - << TRI->getName(AntiDepReg) - << " with " << RegRefs.count(AntiDepReg) << " references" - << " using " << TRI->getName(NewReg) << "!\n"; - - // Update the references to the old register to refer to the new - // register. - std::pair::iterator, - std::multimap::iterator> - Range = RegRefs.equal_range(AntiDepReg); - for (std::multimap::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) - Q->second->setReg(NewReg); - - // We just went back in time and modified history; the - // liveness information for the anti-depenence reg is now - // inconsistent. Set the state as if it were dead. - Classes[NewReg] = Classes[AntiDepReg]; - DefIndices[NewReg] = DefIndices[AntiDepReg]; - KillIndices[NewReg] = KillIndices[AntiDepReg]; - assert(((KillIndices[NewReg] == ~0u) != - (DefIndices[NewReg] == ~0u)) && - "Kill and Def maps aren't consistent for NewReg!"); - - Classes[AntiDepReg] = 0; - DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; - KillIndices[AntiDepReg] = ~0u; - assert(((KillIndices[AntiDepReg] == ~0u) != - (DefIndices[AntiDepReg] == ~0u)) && - "Kill and Def maps aren't consistent for AntiDepReg!"); - - RegRefs.erase(AntiDepReg); - Changed = true; - LastNewReg[AntiDepReg] = NewReg; - break; - } + if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg, + LastNewReg[AntiDepReg], + RC)) { + DEBUG(errs() << "Breaking anti-dependence edge on " + << TRI->getName(AntiDepReg) + << " with " << RegRefs.count(AntiDepReg) << " references" + << " using " << TRI->getName(NewReg) << "!\n"); + + // Update the references to the old register to refer to the new + // register. + std::pair::iterator, + std::multimap::iterator> + Range = RegRefs.equal_range(AntiDepReg); + for (std::multimap::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q->second->setReg(NewReg); + + // We just went back in time and modified history; the + // liveness information for the anti-depenence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + assert(((KillIndices[NewReg] == ~0u) != + (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + + Classes[AntiDepReg] = 0; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = ~0u; + assert(((KillIndices[AntiDepReg] == ~0u) != + (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + + RegRefs.erase(AntiDepReg); + Changed = true; + LastNewReg[AntiDepReg] = NewReg; } } @@ -778,6 +850,177 @@ bool SchedulePostRATDList::BreakAntiDependencies() { return Changed; } +/// StartBlockForKills - Initialize register live-range state for updating kills +/// +void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { + // Initialize the indices to indicate that no registers are live. + std::fill(KillIndices, array_endof(KillIndices), ~0u); + + // Determine the live-out physregs for this block. + if (!BB->empty() && BB->back().getDesc().isReturn()) { + // In a return block, examine the function live-out regs. + for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), + E = MRI.liveout_end(); I != E; ++I) { + unsigned Reg = *I; + KillIndices[Reg] = BB->size(); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = BB->size(); + } + } + } + else { + // In a non-return block, examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { + unsigned Reg = *I; + KillIndices[Reg] = BB->size(); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = BB->size(); + } + } + } + } +} + +bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, + MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (KillIndices[MO.getReg()] != ~0u) { + MO.setIsKill(false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg); + *Subreg; ++Subreg) { + if (KillIndices[*Subreg] != ~0u) { + MI->addOperand(MachineOperand::CreateReg(*Subreg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/, + false /*IsDead*/)); + AllDead = false; + } + } + + if(AllDead) + MO.setIsKill(true); + return false; +} + +/// FixupKills - Fix the register kill flags, they may have been made +/// incorrect by instruction reordering. +/// +void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { + DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n'); + + std::set killedRegs; + BitVector ReservedRegs = TRI->getReservedRegs(MF); + + StartBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + KillIndices[Reg] = ~0u; + + // Repeat for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = ~0u; + } + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. + killedRegs.clear(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + + bool kill = false; + if (killedRegs.find(Reg) == killedRegs.end()) { + kill = true; + // A register is not killed if any subregs are live... + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + if (KillIndices[*Subreg] != ~0u) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = (KillIndices[Reg] == ~0u); + } + + if (MO.isKill() != kill) { + bool removed = ToggleKillFlag(MI, MO); + if (removed) { + DEBUG(errs() << "Fixed in "); + } else { + DEBUG(errs() << "Fixed " << MO << " in "); + } + DEBUG(MI->dump()); + } + + killedRegs.insert(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || ReservedRegs.test(Reg)) continue; + + KillIndices[Reg] = Count; + + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + KillIndices[*Subreg] = Count; + } + } + } +} + //===----------------------------------------------------------------------===// // Top-Down Scheduling //===----------------------------------------------------------------------===// @@ -786,17 +1029,17 @@ bool SchedulePostRATDList::BreakAntiDependencies() { /// the PendingQueue if the count reaches zero. Also update its cycle bound. void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); - --SuccSU->NumPredsLeft; - + #ifndef NDEBUG - if (SuccSU->NumPredsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (SuccSU->NumPredsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --SuccSU->NumPredsLeft; + // Compute how many cycles it will be before this actually becomes // available. This is the max of the start time of all predecessors plus // their latencies. @@ -819,7 +1062,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); @@ -848,6 +1091,10 @@ void SchedulePostRATDList::ListScheduleTopDown() { } } + // In any cycle where we can't schedule any instructions, we must + // stall or emit a noop, depending on the target. + bool CycleHasInsts = false; + // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. std::vector NotReady; @@ -866,13 +1113,14 @@ void SchedulePostRATDList::ListScheduleTopDown() { } else if (PendingQueue[i]->getDepth() < MinDepth) MinDepth = PendingQueue[i]->getDepth(); } - - // If there are no instructions available, don't try to issue anything, and - // don't advance the hazard recognizer. - if (AvailableQueue.empty()) { - CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1; - continue; - } + + DEBUG(errs() << "\n*** Examining Available\n"; + LatencyPriorityQueue q = AvailableQueue; + while (!q.empty()) { + SUnit *su = q.pop(); + errs() << "Height " << su->getHeight() << ": "; + su->dump(this); + }); SUnit *FoundSUnit = 0; @@ -903,27 +1151,38 @@ void SchedulePostRATDList::ListScheduleTopDown() { if (FoundSUnit) { ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); - - // If this is a pseudo-op node, we don't want to increment the current - // cycle. - if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! - ++CurCycle; - } else if (!HasNoopHazards) { - // Otherwise, we have a pipeline stall, but no other problem, just advance - // the current cycle and try again. - DOUT << "*** Advancing cycle, no work to do\n"; - HazardRec->AdvanceCycle(); - ++NumStalls; - ++CurCycle; + CycleHasInsts = true; + + // If we are using the target-specific hazards, then don't + // advance the cycle time just because we schedule a node. If + // the target allows it we can schedule multiple nodes in the + // same cycle. + if (!EnablePostRAHazardAvoidance) { + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } } else { - // Otherwise, we have no instructions to issue and we have instructions - // that will fault if we don't do this right. This is the case for - // processors without pipeline interlocks and other cases. - DOUT << "*** Emitting noop\n"; - HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop - ++NumNoops; + if (CycleHasInsts) { + DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, + // just advance the current cycle and try again. + DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + ++NumStalls; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + HazardRec->EmitNoop(); + Sequence.push_back(0); // NULL here means noop + ++NumNoops; + } + ++CurCycle; + CycleHasInsts = false; } } diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index ae60c86c3d7c0..8fa07d4d9afc2 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -67,7 +68,7 @@ namespace { MachineBasicBlock *BarrierMBB; // Barrier - Current barrier index. - unsigned BarrierIdx; + LiveIndex BarrierIdx; // CurrLI - Current live interval being split. LiveInterval *CurrLI; @@ -82,7 +83,7 @@ namespace { DenseMap IntervalSSMap; // Def2SpillMap - A map from a def instruction index to spill index. - DenseMap Def2SpillMap; + DenseMap Def2SpillMap; public: static char ID; @@ -91,6 +92,7 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -119,33 +121,31 @@ namespace { } /// print - Implement the dump method. - virtual void print(std::ostream &O, const Module* M = 0) const { + virtual void print(raw_ostream &O, const Module* M = 0) const { LIs->print(O, M); } - void print(std::ostream *O, const Module* M = 0) const { - if (O) print(*O, M); - } private: MachineBasicBlock::iterator findNextEmptySlot(MachineBasicBlock*, MachineInstr*, - unsigned&); + LiveIndex&); MachineBasicBlock::iterator findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*, - SmallPtrSet&, unsigned&); + SmallPtrSet&, LiveIndex&); MachineBasicBlock::iterator - findRestorePoint(MachineBasicBlock*, MachineInstr*, unsigned, - SmallPtrSet&, unsigned&); + findRestorePoint(MachineBasicBlock*, MachineInstr*, LiveIndex, + SmallPtrSet&, LiveIndex&); int CreateSpillStackSlot(unsigned, const TargetRegisterClass *); - bool IsAvailableInStack(MachineBasicBlock*, unsigned, unsigned, unsigned, - unsigned&, int&) const; + bool IsAvailableInStack(MachineBasicBlock*, unsigned, + LiveIndex, LiveIndex, + LiveIndex&, int&) const; - void UpdateSpillSlotInterval(VNInfo*, unsigned, unsigned); + void UpdateSpillSlotInterval(VNInfo*, LiveIndex, LiveIndex); bool SplitRegLiveInterval(LiveInterval*); @@ -157,7 +157,7 @@ namespace { bool Rematerialize(unsigned vreg, VNInfo* ValNo, MachineInstr* DefMI, MachineBasicBlock::iterator RestorePt, - unsigned RestoreIdx, + LiveIndex RestoreIdx, SmallPtrSet& RefsInMBB); MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC, MachineInstr* DefMI, @@ -209,11 +209,12 @@ const PassInfo *const llvm::PreAllocSplittingID = &X; /// instruction index map. If there isn't one, return end(). MachineBasicBlock::iterator PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI, - unsigned &SpotIndex) { + LiveIndex &SpotIndex) { MachineBasicBlock::iterator MII = MI; if (++MII != MBB->end()) { - unsigned Index = LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII)); - if (Index) { + LiveIndex Index = + LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII)); + if (Index != LiveIndex()) { SpotIndex = Index; return MII; } @@ -229,7 +230,7 @@ MachineBasicBlock::iterator PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, MachineInstr *DefMI, SmallPtrSet &RefsInMBB, - unsigned &SpillIndex) { + LiveIndex &SpillIndex) { MachineBasicBlock::iterator Pt = MBB->begin(); MachineBasicBlock::iterator MII = MI; @@ -242,7 +243,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, if (MII == EndPt || RefsInMBB.count(MII)) return Pt; while (MII != EndPt && !RefsInMBB.count(MII)) { - unsigned Index = LIs->getInstructionIndex(MII); + LiveIndex Index = LIs->getInstructionIndex(MII); // We can't insert the spill between the barrier (a call), and its // corresponding call frame setup. @@ -275,9 +276,9 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI, /// found. MachineBasicBlock::iterator PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, - unsigned LastIdx, + LiveIndex LastIdx, SmallPtrSet &RefsInMBB, - unsigned &RestoreIndex) { + LiveIndex &RestoreIndex) { // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb // begin index accordingly. MachineBasicBlock::iterator Pt = MBB->end(); @@ -298,10 +299,10 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, // FIXME: Limit the number of instructions to examine to reduce // compile time? while (MII != EndPt) { - unsigned Index = LIs->getInstructionIndex(MII); + LiveIndex Index = LIs->getInstructionIndex(MII); if (Index > LastIdx) break; - unsigned Gap = LIs->findGapBeforeInstr(Index); + LiveIndex Gap = LIs->findGapBeforeInstr(Index); // We can't insert a restore between the barrier (a call) and its // corresponding call frame teardown. @@ -310,7 +311,7 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI, if (MII == EndPt || RefsInMBB.count(MII)) return Pt; ++MII; } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode()); - } else if (Gap) { + } else if (Gap != LiveIndex()) { Pt = MII; RestoreIndex = Gap; } @@ -343,7 +344,8 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, if (CurrSLI->hasAtLeastOneValue()) CurrSValNo = CurrSLI->getValNumInfo(0); else - CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator()); + CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false, + LSs->getVNInfoAllocator()); return SS; } @@ -351,8 +353,9 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg, /// slot at the specified index. bool PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, - unsigned Reg, unsigned DefIndex, - unsigned RestoreIndex, unsigned &SpillIndex, + unsigned Reg, LiveIndex DefIndex, + LiveIndex RestoreIndex, + LiveIndex &SpillIndex, int& SS) const { if (!DefMBB) return false; @@ -360,7 +363,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, DenseMap::iterator I = IntervalSSMap.find(Reg); if (I == IntervalSSMap.end()) return false; - DenseMap::iterator II = Def2SpillMap.find(DefIndex); + DenseMap::iterator + II = Def2SpillMap.find(DefIndex); if (II == Def2SpillMap.end()) return false; @@ -380,8 +384,8 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB, /// interval being split, and the spill and restore indicies, update the live /// interval of the spill stack slot. void -PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, - unsigned RestoreIndex) { +PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, LiveIndex SpillIndex, + LiveIndex RestoreIndex) { assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB && "Expect restore in the barrier mbb"); @@ -394,8 +398,8 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, } SmallPtrSet Processed; - unsigned EndIdx = LIs->getMBBEndIdx(MBB); - LiveRange SLR(SpillIndex, EndIdx+1, CurrSValNo); + LiveIndex EndIdx = LIs->getMBBEndIdx(MBB); + LiveRange SLR(SpillIndex, LIs->getNextSlot(EndIdx), CurrSValNo); CurrSLI->addRange(SLR); Processed.insert(MBB); @@ -414,7 +418,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, WorkList.pop_back(); if (Processed.count(MBB)) continue; - unsigned Idx = LIs->getMBBStartIdx(MBB); + LiveIndex Idx = LIs->getMBBStartIdx(MBB); LR = CurrLI->getLiveRangeContaining(Idx); if (LR && LR->valno == ValNo) { EndIdx = LIs->getMBBEndIdx(MBB); @@ -424,7 +428,7 @@ PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, unsigned SpillIndex, CurrSLI->addRange(SLR); } else if (LR->end > EndIdx) { // Live range extends beyond end of mbb, process successors. - LiveRange SLR(Idx, EndIdx+1, CurrSValNo); + LiveRange SLR(Idx, LIs->getNextIndex(EndIdx), CurrSValNo); CurrSLI->addRange(SLR); for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) @@ -487,12 +491,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, } // Once we've found it, extend its VNInfo to our instruction. - unsigned DefIndex = LIs->getInstructionIndex(Walker); - DefIndex = LiveIntervals::getDefIndex(DefIndex); - unsigned EndIndex = LIs->getMBBEndIdx(MBB); + LiveIndex DefIndex = LIs->getInstructionIndex(Walker); + DefIndex = LIs->getDefIndex(DefIndex); + LiveIndex EndIndex = LIs->getMBBEndIdx(MBB); RetVNI = NewVNs[Walker]; - LI->addRange(LiveRange(DefIndex, EndIndex+1, RetVNI)); + LI->addRange(LiveRange(DefIndex, LIs->getNextSlot(EndIndex), RetVNI)); } else if (!ContainsDefs && ContainsUses) { SmallPtrSet& BlockUses = Uses[MBB]; @@ -524,12 +528,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, IsTopLevel, IsIntraBlock); } - unsigned UseIndex = LIs->getInstructionIndex(Walker); - UseIndex = LiveIntervals::getUseIndex(UseIndex); - unsigned EndIndex = 0; + LiveIndex UseIndex = LIs->getInstructionIndex(Walker); + UseIndex = LIs->getUseIndex(UseIndex); + LiveIndex EndIndex; if (IsIntraBlock) { EndIndex = LIs->getInstructionIndex(UseI); - EndIndex = LiveIntervals::getUseIndex(EndIndex); + EndIndex = LIs->getUseIndex(EndIndex); } else EndIndex = LIs->getMBBEndIdx(MBB); @@ -538,12 +542,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, NewVNs, LiveOut, Phis, false, true); - LI->addRange(LiveRange(UseIndex, EndIndex+1, RetVNI)); + LI->addRange(LiveRange(UseIndex, LIs->getNextSlot(EndIndex), RetVNI)); // FIXME: Need to set kills properly for inter-block stuff. - if (LI->isKill(RetVNI, UseIndex)) LI->removeKill(RetVNI, UseIndex); + if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex); if (IsIntraBlock) - LI->addKill(RetVNI, EndIndex); + RetVNI->addKill(EndIndex); } else if (ContainsDefs && ContainsUses) { SmallPtrSet& BlockDefs = Defs[MBB]; SmallPtrSet& BlockUses = Uses[MBB]; @@ -584,13 +588,13 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, IsTopLevel, IsIntraBlock); } - unsigned StartIndex = LIs->getInstructionIndex(Walker); - StartIndex = foundDef ? LiveIntervals::getDefIndex(StartIndex) : - LiveIntervals::getUseIndex(StartIndex); - unsigned EndIndex = 0; + LiveIndex StartIndex = LIs->getInstructionIndex(Walker); + StartIndex = foundDef ? LIs->getDefIndex(StartIndex) : + LIs->getUseIndex(StartIndex); + LiveIndex EndIndex; if (IsIntraBlock) { EndIndex = LIs->getInstructionIndex(UseI); - EndIndex = LiveIntervals::getUseIndex(EndIndex); + EndIndex = LIs->getUseIndex(EndIndex); } else EndIndex = LIs->getMBBEndIdx(MBB); @@ -600,12 +604,12 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses, NewVNs, LiveOut, Phis, false, true); - LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI)); + LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI)); - if (foundUse && LI->isKill(RetVNI, StartIndex)) - LI->removeKill(RetVNI, StartIndex); + if (foundUse && RetVNI->isKill(StartIndex)) + RetVNI->removeKill(StartIndex); if (IsIntraBlock) { - LI->addKill(RetVNI, EndIndex); + RetVNI->addKill(EndIndex); } } @@ -636,9 +640,10 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us // assume that we are not intrablock here. if (Phis.count(MBB)) return Phis[MBB]; - unsigned StartIndex = LIs->getMBBStartIdx(MBB); + LiveIndex StartIndex = LIs->getMBBStartIdx(MBB); VNInfo *RetVNI = Phis[MBB] = - LI->getNextValue(0, /*FIXME*/ 0, false, LIs->getVNInfoAllocator()); + LI->getNextValue(LiveIndex(), /*FIXME*/ 0, false, + LIs->getVNInfoAllocator()); if (!IsIntraBlock) LiveOut[MBB] = RetVNI; @@ -680,21 +685,21 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us for (DenseMap::iterator I = IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { I->second->setHasPHIKill(true); - unsigned KillIndex = LIs->getMBBEndIdx(I->first); - if (!LiveInterval::isKill(I->second, KillIndex)) - LI->addKill(I->second, KillIndex); + LiveIndex KillIndex = LIs->getMBBEndIdx(I->first); + if (!I->second->isKill(KillIndex)) + I->second->addKill(KillIndex); } } - unsigned EndIndex = 0; + LiveIndex EndIndex; if (IsIntraBlock) { EndIndex = LIs->getInstructionIndex(UseI); - EndIndex = LiveIntervals::getUseIndex(EndIndex); + EndIndex = LIs->getUseIndex(EndIndex); } else EndIndex = LIs->getMBBEndIdx(MBB); - LI->addRange(LiveRange(StartIndex, EndIndex+1, RetVNI)); + LI->addRange(LiveRange(StartIndex, LIs->getNextSlot(EndIndex), RetVNI)); if (IsIntraBlock) - LI->addKill(RetVNI, EndIndex); + RetVNI->addKill(EndIndex); // Memoize results so we don't have to recompute them. if (!IsIntraBlock) @@ -728,8 +733,8 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { DE = MRI->def_end(); DI != DE; ++DI) { Defs[(*DI).getParent()].insert(&*DI); - unsigned DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = LiveIntervals::getDefIndex(DefIdx); + LiveIndex DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = LIs->getDefIndex(DefIdx); assert(DI->getOpcode() != TargetInstrInfo::PHI && "Following NewVN isPHIDef flag incorrect. Fix me!"); @@ -739,7 +744,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) if (DstReg == LI->reg) - NewVN->copy = &*DI; + NewVN->setCopy(&*DI); NewVNs[&*DI] = NewVN; } @@ -764,14 +769,32 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { // Add ranges for dead defs for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg), DE = MRI->def_end(); DI != DE; ++DI) { - unsigned DefIdx = LIs->getInstructionIndex(&*DI); - DefIdx = LiveIntervals::getDefIndex(DefIdx); + LiveIndex DefIdx = LIs->getInstructionIndex(&*DI); + DefIdx = LIs->getDefIndex(DefIdx); if (LI->liveAt(DefIdx)) continue; VNInfo* DeadVN = NewVNs[&*DI]; - LI->addRange(LiveRange(DefIdx, DefIdx+1, DeadVN)); - LI->addKill(DeadVN, DefIdx); + LI->addRange(LiveRange(DefIdx, LIs->getNextSlot(DefIdx), DeadVN)); + DeadVN->addKill(DefIdx); + } + + // Update kill markers. + for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); + VI != VE; ++VI) { + VNInfo* VNI = *VI; + for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) { + LiveIndex KillIdx = VNI->kills[i]; + if (KillIdx.isPHIIndex()) + continue; + MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx); + if (KillMI) { + MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg); + if (KillMO) + // It could be a dead def. + KillMO->setIsKill(); + } + } } } @@ -801,14 +824,16 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { VNsToCopy.push_back(OldVN); // Locate two-address redefinitions - for (SmallVector::iterator KI = OldVN->kills.begin(), + for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(), KE = OldVN->kills.end(); KI != KE; ++KI) { + assert(!KI->isPHIIndex() && + "VN previously reported having no PHI kills."); MachineInstr* MI = LIs->getInstructionFromIndex(*KI); unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg); if (DefIdx == ~0U) continue; if (MI->isRegTiedToUseOperand(DefIdx)) { VNInfo* NextVN = - CurrLI->findDefinedVNInfo(LiveIntervals::getDefIndex(*KI)); + CurrLI->findDefinedVNInfoForRegInt(LIs->getDefIndex(*KI)); if (NextVN == OldVN) continue; Stack.push_back(NextVN); } @@ -840,10 +865,10 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg), E = MRI->reg_end(); I != E; ++I) { MachineOperand& MO = I.getOperand(); - unsigned InstrIdx = LIs->getInstructionIndex(&*I); + LiveIndex InstrIdx = LIs->getInstructionIndex(&*I); - if ((MO.isUse() && NewLI.liveAt(LiveIntervals::getUseIndex(InstrIdx))) || - (MO.isDef() && NewLI.liveAt(LiveIntervals::getDefIndex(InstrIdx)))) + if ((MO.isUse() && NewLI.liveAt(LIs->getUseIndex(InstrIdx))) || + (MO.isDef() && NewLI.liveAt(LIs->getDefIndex(InstrIdx)))) OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo())); } @@ -865,15 +890,15 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { NumRenumbers++; } -bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo, +bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, MachineInstr* DefMI, MachineBasicBlock::iterator RestorePt, - unsigned RestoreIdx, + LiveIndex RestoreIdx, SmallPtrSet& RefsInMBB) { MachineBasicBlock& MBB = *RestorePt->getParent(); MachineBasicBlock::iterator KillPt = BarrierMBB->end(); - unsigned KillIdx = 0; + LiveIndex KillIdx; if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB) KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx); else @@ -882,13 +907,13 @@ bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo, if (KillPt == DefMI->getParent()->end()) return false; - TII->reMaterialize(MBB, RestorePt, vreg, DefMI); + TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI); LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx); ReconstructLiveInterval(CurrLI); - unsigned RematIdx = LIs->getInstructionIndex(prior(RestorePt)); - RematIdx = LiveIntervals::getDefIndex(RematIdx); - RenumberValno(CurrLI->findDefinedVNInfo(RematIdx)); + LiveIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt)); + RematIdx = LIs->getDefIndex(RematIdx); + RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx)); ++NumSplits; ++NumRemats; @@ -943,7 +968,8 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, if (CurrSLI->hasAtLeastOneValue()) CurrSValNo = CurrSLI->getValNumInfo(0); else - CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator()); + CurrSValNo = CurrSLI->getNextValue(LiveIndex(), 0, false, + LSs->getVNInfoAllocator()); } return FMI; @@ -1033,11 +1059,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx)); VNInfo *ValNo = LR->valno; - if (ValNo->isUnused()) { - // Defined by a dead def? How can this be? - assert(0 && "Val# is defined by a dead def?"); - abort(); - } + assert(!ValNo->isUnused() && "Val# is defined by a dead def?"); MachineInstr *DefMI = ValNo->isDefAccurate() ? LIs->getInstructionFromIndex(ValNo->def) : NULL; @@ -1056,7 +1078,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } // Find a point to restore the value after the barrier. - unsigned RestoreIndex = 0; + LiveIndex RestoreIndex; MachineBasicBlock::iterator RestorePt = findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex); if (RestorePt == BarrierMBB->end()) @@ -1070,7 +1092,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { // Add a spill either before the barrier or after the definition. MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL; const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg); - unsigned SpillIndex = 0; + LiveIndex SpillIndex; MachineInstr *SpillMI = NULL; int SS = -1; if (!ValNo->isDefAccurate()) { @@ -1098,7 +1120,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { return false; // Def is dead. Do nothing. if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier, - BarrierMBB, SS, RefsInMBB))) { + BarrierMBB, SS, RefsInMBB))) { SpillIndex = LIs->getInstructionIndex(SpillMI); } else { // Check if it's possible to insert a spill after the def MI. @@ -1114,11 +1136,9 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { if (SpillPt == DefMBB->end()) return false; // No gap to insert spill. } - // Add spill. The store instruction kills the register if def is before - // the barrier in the barrier block. + // Add spill. SS = CreateSpillStackSlot(CurrLI->reg, RC); - TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, - DefMBB == BarrierMBB, SS, RC); + TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC); SpillMI = prior(SpillPt); LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex); } @@ -1142,15 +1162,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) { } // Update spill stack slot live interval. - UpdateSpillSlotInterval(ValNo, LIs->getUseIndex(SpillIndex)+1, + UpdateSpillSlotInterval(ValNo, LIs->getNextSlot(LIs->getUseIndex(SpillIndex)), LIs->getDefIndex(RestoreIndex)); ReconstructLiveInterval(CurrLI); - + if (!FoldedRestore) { - unsigned RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); - RestoreIdx = LiveIntervals::getDefIndex(RestoreIdx); - RenumberValno(CurrLI->findDefinedVNInfo(RestoreIdx)); + LiveIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt)); + RestoreIdx = LIs->getDefIndex(RestoreIdx); + RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx)); } ++NumSplits; @@ -1189,8 +1209,6 @@ PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs, while (!Intervals.empty()) { if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit) break; - else if (NumSplits == 4) - Change |= Change; LiveInterval *LI = Intervals.back(); Intervals.pop_back(); bool result = SplitRegLiveInterval(LI); @@ -1236,8 +1254,8 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { // reaching definition (VNInfo). for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg), UE = MRI->use_end(); UI != UE; ++UI) { - unsigned index = LIs->getInstructionIndex(&*UI); - index = LiveIntervals::getUseIndex(index); + LiveIndex index = LIs->getInstructionIndex(&*UI); + index = LIs->getUseIndex(index); const LiveRange* LR = (*LI)->getLiveRangeContaining(index); VNUseCount[LR->valno].insert(&*UI); @@ -1386,7 +1404,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR, if (LR->valno->hasPHIKill()) return false; - unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB); + LiveIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB); if (LR->end < MBBEnd) return false; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 9e7ad6752a73d..8793df7705fa9 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -31,7 +31,9 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" #include @@ -51,22 +53,26 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); } /// frame indexes with appropriate references. /// bool PEI::runOnMachineFunction(MachineFunction &Fn) { + const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Get MachineModuleInfo so that we can track the construction of the // frame. if (MachineModuleInfo *MMI = getAnalysisIfAvailable()) Fn.getFrameInfo()->setMachineModuleInfo(MMI); + // Calculate the MaxCallFrameSize and HasCalls variables for the function's + // frame information. Also eliminates call frame pseudo instructions. + calculateCallsInformation(Fn); + // Allow the target machine to make some adjustments to the function // e.g. UsedPhysRegs before calculateCalleeSavedRegisters. TRI->processFunctionBeforeCalleeSavedScan(Fn, RS); - // Scan the function for modified callee saved registers and insert spill - // code for any callee saved registers that are modified. Also calculate - // the MaxCallFrameSize and HasCalls variables for the function's frame - // information and eliminates call frame pseudo instructions. + // Scan the function for modified callee saved registers and insert spill code + // for any callee saved registers that are modified. calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: @@ -78,7 +84,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers - insertCSRSpillsAndRestores(Fn); + if (!F->hasFnAttr(Attribute::Naked)) + insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. @@ -92,13 +99,20 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters // must be called before this function in order to set the HasCalls // and MaxCallFrameSize variables. - insertPrologEpilogCode(Fn); + if (!F->hasFnAttr(Attribute::Naked)) + insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references // and actual offsets. // replaceFrameIndices(Fn); + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimiation + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn); + delete RS; clearAllSets(); return true; @@ -117,35 +131,24 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { } #endif -/// calculateCalleeSavedRegisters - Scan the function for modified callee saved -/// registers. Also calculate the MaxCallFrameSize and HasCalls variables for -/// the function's frame information and eliminates call frame pseudo -/// instructions. -/// -void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { +/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls +/// variables for the function's frame information and eliminate call frame +/// pseudo instructions. +void PEI::calculateCallsInformation(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); - // Get the callee saved register list... - const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + unsigned MaxCallFrameSize = 0; + bool HasCalls = false; // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode(); - // These are used to keep track the callee-save area. Initialize them. - MinCSFrameIndex = INT_MAX; - MaxCSFrameIndex = 0; - - // Early exit for targets which have no callee saved registers and no call - // frame setup/destroy pseudo instructions. - if ((CSRegs == 0 || CSRegs[0] == 0) && - FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) + // Early exit for targets which have no call frame setup/destroy pseudo + // instructions. + if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1) return; - unsigned MaxCallFrameSize = 0; - bool HasCalls = false; - std::vector FrameSDOps; for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) @@ -157,31 +160,57 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; HasCalls = true; FrameSDOps.push_back(I); + } else if (I->getOpcode() == TargetInstrInfo::INLINEASM) { + // An InlineAsm might be a call; assume it is to get the stack frame + // aligned correctly for calls. + HasCalls = true; } MachineFrameInfo *FFI = Fn.getFrameInfo(); FFI->setHasCalls(HasCalls); FFI->setMaxCallFrameSize(MaxCallFrameSize); - for (unsigned i = 0, e = FrameSDOps.size(); i != e; ++i) { - MachineBasicBlock::iterator I = FrameSDOps[i]; - // If call frames are not being included as part of the stack frame, - // and there is no dynamic allocation (therefore referencing frame slots - // off sp), leave the pseudo ops alone. We'll eliminate them later. + for (std::vector::iterator + i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { + MachineBasicBlock::iterator I = *i; + + // If call frames are not being included as part of the stack frame, and + // there is no dynamic allocation (therefore referencing frame slots off + // sp), leave the pseudo ops alone. We'll eliminate them later. if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn)) RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } +} + + +/// calculateCalleeSavedRegisters - Scan the function for modified callee saved +/// registers. +void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { + const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); + const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo(); + MachineFrameInfo *FFI = Fn.getFrameInfo(); + + // Get the callee saved register list... + const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + // Early exit for targets which have no callee saved registers. + if (CSRegs == 0 || CSRegs[0] == 0) + return; - // Now figure out which *callee saved* registers are modified by the current + // Figure out which *callee saved* registers are modified by the current // function, thus needing to be saved and restored in the prolog/epilog. - // - const TargetRegisterClass* const *CSRegClasses = + const TargetRegisterClass * const *CSRegClasses = RegInfo->getCalleeSavedRegClasses(&Fn); + std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { - // If the reg is modified, save it! + // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); @@ -198,39 +227,47 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // Early exit if no callee saved registers are modified! unsigned NumFixedSpillSlots; - const std::pair *FixedSpillSlots = + const TargetFrameInfo::SpillSlot *FixedSpillSlots = TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); // Now that we know which registers need to be saved and restored, allocate // stack slots for them. - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = CSI[i].getRegClass(); + for (std::vector::iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = I->getRegClass(); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } // Check to see if this physreg must be spilled to a particular stack slot // on this target. - const std::pair *FixedSlot = FixedSpillSlots; + const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots; while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && - FixedSlot->first != Reg) + FixedSlot->Reg != Reg) ++FixedSlot; - int FrameIdx; - if (FixedSlot == FixedSpillSlots+NumFixedSpillSlots) { + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { // Nope, just spill it anywhere convenient. unsigned Align = RC->getAlignment(); unsigned StackAlign = TFI->getStackAlignment(); - // We may not be able to sastify the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. - // Use the min. + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. Align = std::min(Align, StackAlign); FrameIdx = FFI->CreateStackObject(RC->getSize(), Align); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->second); + FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset); } - CSI[i].setFrameIdx(FrameIdx); + + I->setFrameIdx(FrameIdx); } FFI->setCalleeSavedInfo(CSI); @@ -244,6 +281,8 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MachineFrameInfo *FFI = Fn.getFrameInfo(); const std::vector &CSI = FFI->getCalleeSavedInfo(); + FFI->setCalleeSavedInfoValid(true); + // Early exit if no callee saved registers are modified! if (CSI.empty()) return; @@ -403,8 +442,7 @@ static inline void AdjustStackOffset(MachineFrameInfo *FFI, int FrameIdx, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { - // If stack grows down, we need to add size of find the lowest address of the - // object. + // If the stack grows down, add the object size to find the lowest address. if (StackGrowsDown) Offset += FFI->getObjectSize(FrameIdx); @@ -437,16 +475,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo *FFI = Fn.getFrameInfo(); - unsigned MaxAlign = FFI->getMaxAlignment(); + unsigned MaxAlign = 1; // Start at the beginning of the local area. // The Offset is the distance from the stack top in the direction // of stack growth -- so it's always nonnegative. - int64_t Offset = TFI.getOffsetOfLocalArea(); + int LocalAreaOffset = TFI.getOffsetOfLocalArea(); if (StackGrowsDown) - Offset = -Offset; - assert(Offset >= 0 + LocalAreaOffset = -LocalAreaOffset; + assert(LocalAreaOffset >= 0 && "Local area offset should be in direction of stack growth"); + int64_t Offset = LocalAreaOffset; // If there are fixed sized objects that are preallocated in the local area, // non-fixed objects can't be allocated right at the start of local area. @@ -538,32 +577,38 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { AdjustStackOffset(FFI, SFI, StackGrowsDown, Offset, MaxAlign); } - // Round up the size to a multiple of the alignment, but only if there are - // calls or alloca's in the function. This ensures that any calls to - // subroutines have their stack frames suitable aligned. - // Also do this if we need runtime alignment of the stack. In this case - // offsets will be relative to SP not FP; round up the stack size so this - // works. - if (!RegInfo->targetHandlesStackFrameRounding() && - (FFI->hasCalls() || FFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(Fn) && - FFI->getObjectIndexEnd() != 0))) { + if (!RegInfo->targetHandlesStackFrameRounding()) { // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (RegInfo->hasReservedCallFrame(Fn)) + if (FFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) Offset += FFI->getMaxCallFrameSize(); - unsigned AlignMask = std::max(TFI.getStackAlignment(),MaxAlign) - 1; + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (FFI->hasCalls() || FFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && FFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + // If the frame pointer is eliminated, all frame offsets will be relative + // to SP not FP; align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); } // Update frame info to pretend that this is part of the stack... - FFI->setStackSize(Offset+TFI.getOffsetOfLocalArea()); + FFI->setStackSize(Offset - LocalAreaOffset); // Remember the required stack alignment in case targets need it to perform // dynamic stack alignment. - FFI->setMaxAlignment(MaxAlign); + if (MaxAlign > FFI->getMaxAlignment()) + FFI->setMaxAlignment(MaxAlign); } @@ -604,14 +649,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == TargetInstrInfo::DECLARE) { - // Ignore it. - ++I; - continue; - } if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { @@ -654,8 +694,16 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - - TRI.eliminateFrameIndex(MI, SPAdj, RS); + int Value; + unsigned VReg = + TRI.eliminateFrameIndex(MI, SPAdj, &Value, + FrameIndexVirtualScavenging ? NULL : RS); + if (VReg) { + assert (FrameIndexVirtualScavenging && + "Not scavenging, but virtual returned from " + "eliminateFrameIndex()!"); + FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj); + } // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -670,10 +718,170 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (DoIncr && I != BB->end()) ++I; // Update register states. - if (RS && MI) RS->forward(MI); + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); } } +/// findLastUseReg - find the killing use of the specified register within +/// the instruciton range. Return the operand number of the kill in Operand. +static MachineBasicBlock::iterator +findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME, + unsigned Reg, unsigned *Operand) { + // Scan forward to find the last use of this virtual register + for (++I; I != ME; ++I) { + MachineInstr *MI = I; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) + if (MI->getOperand(i).isReg()) { + unsigned OpReg = MI->getOperand(i).getReg(); + if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg)) + continue; + assert (OpReg == Reg + && "overlapping use of scavenged index register!"); + // If this is the killing use, we're done + if (MI->getOperand(i).isKill()) { + if (Operand) + *Operand = i; + return I; + } + } + } + // If we hit the end of the basic block, there was no kill of + // the virtual register, which is wrong. + assert (0 && "scavenged index register never killed!"); + return ME; +} + +/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// with physical registers. Use the register scavenger to find an +/// appropriate register to use. +void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(BB); + + unsigned CurrentVirtReg = 0; + unsigned CurrentScratchReg = 0; + bool havePrevValue = false; + unsigned PrevScratchReg = 0; + int PrevValue; + MachineInstr *PrevLastUseMI = NULL; + unsigned PrevLastUseOp = 0; + bool trackingCurrentValue = false; + int SPAdj = 0; + int Value = 0; + + // The instruction stream may change in the loop, so check BB->end() + // directly. + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + MachineInstr *MI = I; + // Likewise, call getNumOperands() each iteration, as the MI may change + // inside the loop (with 'i' updated accordingly). + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + if (MI->getOperand(i).isReg()) { + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // If we have an active scavenged register, we shouldn't be + // seeing any references to it. + assert (Reg != CurrentScratchReg + && "overlapping use of scavenged frame index register!"); + + // If we have a previous scratch reg, check and see if anything + // here kills whatever value is in there. + if (Reg == PrevScratchReg) { + if (MO.isUse()) { + // Two-address operands implicitly kill + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { + havePrevValue = false; + PrevScratchReg = 0; + } + } else { + assert (MO.isDef()); + havePrevValue = false; + PrevScratchReg = 0; + } + } + continue; + } + + // Have we already allocated a scratch register for this virtual? + if (Reg != CurrentVirtReg) { + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // We can't have nested virtual register live ranges because + // there's only a guarantee of one scavenged register at a time. + assert (CurrentVirtReg == 0 && + "overlapping frame index virtual registers!"); + + // If the target gave us information about what's in the register, + // we can use that to re-use scratch regs. + DenseMap::iterator Entry = + FrameConstantRegMap.find(Reg); + trackingCurrentValue = Entry != FrameConstantRegMap.end(); + if (trackingCurrentValue) { + SPAdj = (*Entry).second.second; + Value = (*Entry).second.first; + } else + SPAdj = Value = 0; + + // If the scratch register from the last allocation is still + // available, see if the value matches. If it does, just re-use it. + if (trackingCurrentValue && havePrevValue && PrevValue == Value) { + // FIXME: This assumes that the instructions in the live range + // for the virtual register are exclusively for the purpose + // of populating the value in the register. That's reasonable + // for these frame index registers, but it's still a very, very + // strong assumption. Perhaps this implies that the frame index + // elimination should be before register allocation, with + // conservative heuristics since we'll know less then, and + // the reuse calculations done directly when doing the code-gen? + + // Find the last use of the new virtual register. Remove all + // instruction between here and there, and update the current + // instruction to reference the last use insn instead. + MachineBasicBlock::iterator LastUseMI = + findLastUseReg(I, BB->end(), Reg, &i); + // Remove all instructions up 'til the last use, since they're + // just calculating the value we already have. + BB->erase(I, LastUseMI); + MI = I = LastUseMI; + + CurrentScratchReg = PrevScratchReg; + // Extend the live range of the register + PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false); + RS->setUsed(CurrentScratchReg); + } else { + CurrentVirtReg = Reg; + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + CurrentScratchReg = RS->FindUnusedReg(RC); + if (CurrentScratchReg == 0) + // No register is "free". Scavenge a register. + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); + + PrevValue = Value; + } + } + assert (CurrentScratchReg && "Missing scratch register!"); + MI->getOperand(i).setReg(CurrentScratchReg); + + // If this is the last use of the register, stop tracking it. + if (MI->getOperand(i).isKill()) { + PrevScratchReg = CurrentScratchReg; + PrevLastUseMI = MI; + PrevLastUseOp = i; + CurrentScratchReg = CurrentVirtReg = 0; + havePrevValue = trackingCurrentValue; + } + } + RS->forward(MI); + } + } +} diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index c158dd8ac2322..931f1eb231b27 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class RegScavenger; @@ -93,6 +94,17 @@ namespace llvm { // functions. bool ShrinkWrapThisFunction; + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the curren function. + bool FrameIndexVirtualScavenging; + + // When using the scavenger post-pass to resolve frame reference + // materialization registers, maintain a map of the registers to + // the constant value and SP adjustment associated with it. + typedef std::pair FrameConstantEntry; + DenseMap FrameConstantRegMap; + #ifndef NDEBUG // Machine function handle. MachineFunction* MF; @@ -118,10 +130,12 @@ namespace llvm { CSRegBlockMap &prevRestores); void placeSpillsAndRestores(MachineFunction &Fn); void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); void calculateCalleeSavedRegisters(MachineFunction &Fn); void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); + void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); // Initialize DFA sets, called before iterations. diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index b4c20e6bfd311..00c5d46d21a14 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/DerivedTypes.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/raw_ostream.h" #include @@ -38,15 +39,16 @@ static const char *const PSVNames[] = { "ConstantPool" }; +// FIXME: THIS IS A HACK!!!! +// Eventually these should be uniqued on LLVMContext rather than in a managed +// static. For now, we can safely use the global context for the time being to +// squeak by. PseudoSourceValue::PseudoSourceValue() : - Value(PointerType::getUnqual(Type::Int8Ty), PseudoSourceValueVal) {} + Value(Type::getInt8PtrTy(getGlobalContext()), + PseudoSourceValueVal) {} -void PseudoSourceValue::dump() const { - print(errs()); errs() << '\n'; -} - -void PseudoSourceValue::print(raw_ostream &OS) const { - OS << PSVNames[this - *PSVs]; +void PseudoSourceValue::printCustom(raw_ostream &O) const { + O << PSVNames[this - *PSVs]; } namespace { @@ -61,7 +63,7 @@ namespace { virtual bool isConstant(const MachineFrameInfo *MFI) const; - virtual void print(raw_ostream &OS) const { + virtual void printCustom(raw_ostream &OS) const { OS << "FixedStack" << FI; } }; @@ -83,7 +85,7 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { this == getConstantPool() || this == getJumpTable()) return true; - assert(0 && "Unknown PseudoSourceValue!"); + llvm_unreachable("Unknown PseudoSourceValue!"); return false; } diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index 64374ce137fd7..b655dda411530 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -30,44 +30,6 @@ It also increase the likelyhood the store may become dead. //===---------------------------------------------------------------------===// -I think we should have a "hasSideEffects" flag (which is automatically set for -stuff that "isLoad" "isCall" etc), and the remat pass should eventually be able -to remat any instruction that has no side effects, if it can handle it and if -profitable. - -For now, I'd suggest having the remat stuff work like this: - -1. I need to spill/reload this thing. -2. Check to see if it has side effects. -3. Check to see if it is simple enough: e.g. it only has one register -destination and no register input. -4. If so, clone the instruction, do the xform, etc. - -Advantages of this are: - -1. the .td file describes the behavior of the instructions, not the way the - algorithm should work. -2. as remat gets smarter in the future, we shouldn't have to be changing the .td - files. -3. it is easier to explain what the flag means in the .td file, because you - don't have to pull in the explanation of how the current remat algo works. - -Some potential added complexities: - -1. Some instructions have to be glued to it's predecessor or successor. All of - the PC relative instructions and condition code setting instruction. We could - mark them as hasSideEffects, but that's not quite right. PC relative loads - from constantpools can be remat'ed, for example. But it requires more than - just cloning the instruction. Some instructions can be remat'ed but it - expands to more than one instruction. But allocator will have to make a - decision. - -4. As stated in 3, not as simple as cloning in some cases. The target will have - to decide how to remat it. For example, an ARM 2-piece constant generation - instruction is remat'ed as a load from constantpool. - -//===---------------------------------------------------------------------===// - bb27 ... ... %reg1037 = ADDri %reg1039, 1 @@ -206,3 +168,32 @@ Stack coloring improvments: not spill slots. 2. Reorder objects to fill in gaps between objects. e.g. 4, 1, , 4, 1, 1, 1, , 4 => 4, 1, 1, 1, 1, 4, 4 + +//===---------------------------------------------------------------------===// + +The scheduler should be able to sort nearby instructions by their address. For +example, in an expanded memset sequence it's not uncommon to see code like this: + + movl $0, 4(%rdi) + movl $0, 8(%rdi) + movl $0, 12(%rdi) + movl $0, 0(%rdi) + +Each of the stores is independent, and the scheduler is currently making an +arbitrary decision about the order. + +//===---------------------------------------------------------------------===// + +Another opportunitiy in this code is that the $0 could be moved to a register: + + movl $0, 4(%rdi) + movl $0, 8(%rdi) + movl $0, 12(%rdi) + movl $0, 0(%rdi) + +This would save substantial code size, especially for longer sequences like +this. It would be easy to have a rule telling isel to avoid matching MOV32mi +if the immediate has more than some fixed number of uses. It's more involved +to teach the register allocator how to do late folding to recover from +excessive register pressure. + diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 904b4cb2a46f8..5d58ea984f215 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -33,8 +33,10 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -142,6 +144,7 @@ namespace { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); @@ -173,11 +176,11 @@ namespace { /// processActiveIntervals - expire old intervals and move non-overlapping /// ones to the inactive list. - void processActiveIntervals(unsigned CurPoint); + void processActiveIntervals(LiveIndex CurPoint); /// processInactiveIntervals - expire old intervals and move overlapping /// ones to the active list. - void processInactiveIntervals(unsigned CurPoint); + void processInactiveIntervals(LiveIndex CurPoint); /// hasNextReloadInterval - Return the next liveinterval that's being /// defined by a reload from the same SS as the specified one. @@ -230,12 +233,12 @@ namespace { bool Error = false; for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { if (regUse_[i] != 0) { - cerr << tri_->getName(i) << " is still in use!\n"; + errs() << tri_->getName(i) << " is still in use!\n"; Error = true; } } if (Error) - abort(); + llvm_unreachable(0); #endif regUse_.clear(); regUseBackUp_.clear(); @@ -295,15 +298,20 @@ namespace { template void printIntervals(const char* const str, ItTy i, ItTy e) const { - if (str) DOUT << str << " intervals:\n"; - for (; i != e; ++i) { - DOUT << "\t" << *i->first << " -> "; - unsigned reg = i->first->reg; - if (TargetRegisterInfo::isVirtualRegister(reg)) { - reg = vrm_->getPhys(reg); - } - DOUT << tri_->getName(reg) << '\n'; - } + DEBUG({ + if (str) + errs() << str << " intervals:\n"; + + for (; i != e; ++i) { + errs() << "\t" << *i->first << " -> "; + + unsigned reg = i->first->reg; + if (TargetRegisterInfo::isVirtualRegister(reg)) + reg = vrm_->getPhys(reg); + + errs() << tri_->getName(reg) << '\n'; + } + }); } }; char RALinScan::ID = 0; @@ -358,7 +366,8 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { return Reg; VNInfo *vni = cur.begin()->valno; - if (!vni->def || vni->isUnused() || !vni->isDefAccurate()) + if ((vni->def == LiveIndex()) || + vni->isUnused() || !vni->isDefAccurate()) return Reg; MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg; @@ -380,18 +389,18 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { // Try to coalesce. if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) { - DOUT << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg) - << '\n'; + DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg) + << '\n'); vrm_->clearVirt(cur.reg); vrm_->assignVirt2Phys(cur.reg, PhysReg); // Remove unnecessary kills since a copy does not clobber the register. if (li_->hasInterval(SrcReg)) { LiveInterval &SrcLI = li_->getInterval(SrcReg); - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(cur.reg), - E = mri_->reg_end(); I != E; ++I) { + for (MachineRegisterInfo::use_iterator I = mri_->use_begin(cur.reg), + E = mri_->use_end(); I != E; ++I) { MachineOperand &O = I.getOperand(); - if (!O.isUse() || !O.isKill()) + if (!O.isKill()) continue; MachineInstr *MI = &*I; if (SrcLI.liveAt(li_->getDefIndex(li_->getInstructionIndex(MI)))) @@ -478,24 +487,25 @@ void RALinScan::initIntervalSets() } } -void RALinScan::linearScan() -{ +void RALinScan::linearScan() { // linear scan algorithm - DOUT << "********** LINEAR SCAN **********\n"; - DOUT << "********** Function: " << mf_->getFunction()->getName() << '\n'; - - DEBUG(printIntervals("fixed", fixed_.begin(), fixed_.end())); + DEBUG({ + errs() << "********** LINEAR SCAN **********\n" + << "********** Function: " + << mf_->getFunction()->getName() << '\n'; + printIntervals("fixed", fixed_.begin(), fixed_.end()); + }); while (!unhandled_.empty()) { // pick the interval with the earliest start point LiveInterval* cur = unhandled_.top(); unhandled_.pop(); ++NumIters; - DOUT << "\n*** CURRENT ***: " << *cur << '\n'; + DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n'); if (!cur->empty()) { - processActiveIntervals(cur->beginNumber()); - processInactiveIntervals(cur->beginNumber()); + processActiveIntervals(cur->beginIndex()); + processInactiveIntervals(cur->beginIndex()); assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && "Can only allocate virtual registers!"); @@ -506,15 +516,17 @@ void RALinScan::linearScan() // assign it one. assignRegOrStackSlotAtInterval(cur); - DEBUG(printIntervals("active", active_.begin(), active_.end())); - DEBUG(printIntervals("inactive", inactive_.begin(), inactive_.end())); + DEBUG({ + printIntervals("active", active_.begin(), active_.end()); + printIntervals("inactive", inactive_.begin(), inactive_.end()); + }); } // Expire any remaining active intervals while (!active_.empty()) { IntervalPtr &IP = active_.back(); unsigned reg = IP.first->reg; - DOUT << "\tinterval " << *IP.first << " expired\n"; + DEBUG(errs() << "\tinterval " << *IP.first << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -523,9 +535,11 @@ void RALinScan::linearScan() } // Expire any remaining inactive intervals - DEBUG(for (IntervalPtrs::reverse_iterator - i = inactive_.rbegin(); i != inactive_.rend(); ++i) - DOUT << "\tinterval " << *i->first << " expired\n"); + DEBUG({ + for (IntervalPtrs::reverse_iterator + i = inactive_.rbegin(); i != inactive_.rend(); ++i) + errs() << "\tinterval " << *i->first << " expired\n"; + }); inactive_.clear(); // Add live-ins to every BB except for entry. Also perform trivial coalescing. @@ -560,7 +574,7 @@ void RALinScan::linearScan() } } - DOUT << *vrm_; + DEBUG(errs() << *vrm_); // Look for physical registers that end up not being allocated even though // register allocator had to spill other registers in its register class. @@ -572,9 +586,9 @@ void RALinScan::linearScan() /// processActiveIntervals - expire old intervals and move non-overlapping ones /// to the inactive list. -void RALinScan::processActiveIntervals(unsigned CurPoint) +void RALinScan::processActiveIntervals(LiveIndex CurPoint) { - DOUT << "\tprocessing active intervals:\n"; + DEBUG(errs() << "\tprocessing active intervals:\n"); for (unsigned i = 0, e = active_.size(); i != e; ++i) { LiveInterval *Interval = active_[i].first; @@ -584,7 +598,7 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // Remove expired intervals. - DOUT << "\t\tinterval " << *Interval << " expired\n"; + DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -597,7 +611,7 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) } else if (IntervalPos->start > CurPoint) { // Move inactive intervals to inactive list. - DOUT << "\t\tinterval " << *Interval << " inactive\n"; + DEBUG(errs() << "\t\tinterval " << *Interval << " inactive\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -618,9 +632,9 @@ void RALinScan::processActiveIntervals(unsigned CurPoint) /// processInactiveIntervals - expire old intervals and move overlapping /// ones to the active list. -void RALinScan::processInactiveIntervals(unsigned CurPoint) +void RALinScan::processInactiveIntervals(LiveIndex CurPoint) { - DOUT << "\tprocessing inactive intervals:\n"; + DEBUG(errs() << "\tprocessing inactive intervals:\n"); for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { LiveInterval *Interval = inactive_[i].first; @@ -630,7 +644,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint) IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); if (IntervalPos == Interval->end()) { // remove expired intervals. - DOUT << "\t\tinterval " << *Interval << " expired\n"; + DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n"); // Pop off the end of the list. inactive_[i] = inactive_.back(); @@ -638,7 +652,7 @@ void RALinScan::processInactiveIntervals(unsigned CurPoint) --i; --e; } else if (IntervalPos->start <= CurPoint) { // move re-activated intervals in active list - DOUT << "\t\tinterval " << *Interval << " active\n"; + DEBUG(errs() << "\t\tinterval " << *Interval << " active\n"); assert(TargetRegisterInfo::isVirtualRegister(reg) && "Can only allocate virtual registers!"); reg = vrm_->getPhys(reg); @@ -699,7 +713,7 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { return IP.end(); } -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, unsigned Point){ +static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, LiveIndex Point){ for (unsigned i = 0, e = V.size(); i != e; ++i) { RALinScan::IntervalPtr &IP = V[i]; LiveInterval::iterator I = std::upper_bound(IP.first->begin(), @@ -725,7 +739,8 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_, if (SI.hasAtLeastOneValue()) VNI = SI.getValNumInfo(0); else - VNI = SI.getNextValue(0, 0, false, ls_->getVNInfoAllocator()); + VNI = SI.getNextValue(LiveIndex(), 0, false, + ls_->getVNInfoAllocator()); LiveInterval &RI = li_->getInterval(cur->reg); // FIXME: This may be overly conservative. @@ -764,10 +779,12 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; SmallVector SLIs[3]; - DOUT << "\tConsidering " << NumCands << " candidates: "; - DEBUG(for (unsigned i = 0; i != NumCands; ++i) - DOUT << tri_->getName(Candidates[i].first) << " "; - DOUT << "\n";); + DEBUG({ + errs() << "\tConsidering " << NumCands << " candidates: "; + for (unsigned i = 0; i != NumCands; ++i) + errs() << tri_->getName(Candidates[i].first) << " "; + errs() << "\n"; + }); // Calculate the number of conflicts of each candidate. for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { @@ -865,16 +882,15 @@ void RALinScan::UpgradeRegister(unsigned Reg) { namespace { struct LISorter { bool operator()(LiveInterval* A, LiveInterval* B) { - return A->beginNumber() < B->beginNumber(); + return A->beginIndex() < B->beginIndex(); } }; } /// assignRegOrStackSlotAtInterval - assign a register if one is available, or /// spill. -void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) -{ - DOUT << "\tallocating current interval: "; +void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { + DEBUG(errs() << "\tallocating current interval: "); // This is an implicitly defined live interval, just assign any register. const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); @@ -882,7 +898,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) unsigned physReg = vrm_->getRegAllocPref(cur->reg); if (!physReg) physReg = *RC->allocation_order_begin(*mf_); - DOUT << tri_->getName(physReg) << '\n'; + DEBUG(errs() << tri_->getName(physReg) << '\n'); // Note the register is not really in use. vrm_->assignVirt2Phys(cur->reg, physReg); return; @@ -891,7 +907,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) backUpRegUses(); std::vector > SpillWeightsToAdd; - unsigned StartPosition = cur->beginNumber(); + LiveIndex StartPosition = cur->beginIndex(); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); // If start of this live interval is defined by a move instruction and its @@ -901,7 +917,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // one, e.g. X86::mov32to32_. These move instructions are not coalescable. if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { VNInfo *vni = cur->begin()->valno; - if (vni->def && !vni->isUnused() && vni->isDefAccurate()) { + if ((vni->def != LiveIndex()) && !vni->isUnused() && + vni->isDefAccurate()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (CopyMI && @@ -963,7 +980,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // Okay, this reg is on the fixed list. Check to see if we actually // conflict. LiveInterval *I = IP.first; - if (I->endNumber() > StartPosition) { + if (I->endIndex() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); IP.second = II; if (II != I->begin() && II->start > StartPosition) @@ -988,7 +1005,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - I->endNumber() > StartPosition) { + I->endIndex() > StartPosition) { LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); IP.second = II; if (II != I->begin() && II->start > StartPosition) @@ -1015,7 +1032,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // the free physical register and add this interval to the active // list. if (physReg) { - DOUT << tri_->getName(physReg) << '\n'; + DEBUG(errs() << tri_->getName(physReg) << '\n'); vrm_->assignVirt2Phys(cur->reg, physReg); addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); @@ -1031,7 +1048,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) } return; } - DOUT << "no free registers\n"; + DEBUG(errs() << "no free registers\n"); // Compile the spill weights into an array that is better for scanning. std::vector SpillWeights(tri_->getNumRegs(), 0.0f); @@ -1049,7 +1066,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) updateSpillWeights(SpillWeights, reg, i->first->weight, RC); } - DOUT << "\tassigning stack slot at interval "<< *cur << ":\n"; + DEBUG(errs() << "\tassigning stack slot at interval "<< *cur << ":\n"); // Find a register to spill. float minWeight = HUGE_VALF; @@ -1102,8 +1119,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) DowngradedRegs.clear(); assignRegOrStackSlotAtInterval(cur); } else { - cerr << "Ran out of registers during register allocation!\n"; - exit(1); + llvm_report_error("Ran out of registers during register allocation!"); } return; } @@ -1117,16 +1133,19 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) --LastCandidate; } - DOUT << "\t\tregister(s) with min weight(s): "; - DEBUG(for (unsigned i = 0; i != LastCandidate; ++i) - DOUT << tri_->getName(RegsWeights[i].first) - << " (" << RegsWeights[i].second << ")\n"); + DEBUG({ + errs() << "\t\tregister(s) with min weight(s): "; + + for (unsigned i = 0; i != LastCandidate; ++i) + errs() << tri_->getName(RegsWeights[i].first) + << " (" << RegsWeights[i].second << ")\n"; + }); // If the current has the minimum weight, we need to spill it and // add any added intervals back to unhandled, and restart // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DOUT << "\t\t\tspilling(c): " << *cur << '\n'; + DEBUG(errs() << "\t\t\tspilling(c): " << *cur << '\n'); SmallVector spillIs; std::vector added; @@ -1154,14 +1173,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) LiveInterval *ReloadLi = added[i]; if (ReloadLi->weight == HUGE_VALF && li_->getApproximateInstructionCount(*ReloadLi) == 0) { - unsigned ReloadIdx = ReloadLi->beginNumber(); + LiveIndex ReloadIdx = ReloadLi->beginIndex(); MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { // Last reload of same SS is in the same MBB. We want to try to // allocate both reloads the same register and make sure the reg // isn't clobbered in between if at all possible. - assert(LastReload->beginNumber() < ReloadIdx); + assert(LastReload->beginIndex() < ReloadIdx); NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); } LastReloadMBB = ReloadMBB; @@ -1206,12 +1225,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // mark our rollback point. std::vector added; while (!spillIs.empty()) { - bool epicFail = false; LiveInterval *sli = spillIs.back(); spillIs.pop_back(); - DOUT << "\t\t\tspilling(a): " << *sli << '\n'; + DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n'); earliestStartInterval = - (earliestStartInterval->beginNumber() < sli->beginNumber()) ? + (earliestStartInterval->beginIndex() < sli->beginIndex()) ? earliestStartInterval : sli; std::vector newIs; @@ -1223,15 +1241,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) addStackInterval(sli, ls_, li_, mri_, *vrm_); std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); - - if (epicFail) { - //abort(); - } } - unsigned earliestStart = earliestStartInterval->beginNumber(); + LiveIndex earliestStart = earliestStartInterval->beginIndex(); - DOUT << "\t\trolling back to: " << earliestStart << '\n'; + DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n'); // Scan handled in reverse order up to the earliest start of a // spilled live interval and undo each one, restoring the state of @@ -1239,9 +1253,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) while (!handled_.empty()) { LiveInterval* i = handled_.back(); // If this interval starts before t we are done. - if (i->beginNumber() < earliestStart) + if (i->beginIndex() < earliestStart) break; - DOUT << "\t\t\tundo changes for: " << *i << '\n'; + DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n'); handled_.pop_back(); // When undoing a live interval allocation we must know if it is active or @@ -1290,8 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) for (unsigned i = 0, e = handled_.size(); i != e; ++i) { LiveInterval *HI = handled_[i]; if (!HI->expiredAt(earliestStart) && - HI->expiredAt(cur->beginNumber())) { - DOUT << "\t\t\tundo changes for: " << *HI << '\n'; + HI->expiredAt(cur->beginIndex())) { + DEBUG(errs() << "\t\t\tundo changes for: " << *HI << '\n'); active_.push_back(std::make_pair(HI, HI->begin())); assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); addRegUse(vrm_->getPhys(HI->reg)); @@ -1310,14 +1324,14 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) LiveInterval *ReloadLi = added[i]; if (ReloadLi->weight == HUGE_VALF && li_->getApproximateInstructionCount(*ReloadLi) == 0) { - unsigned ReloadIdx = ReloadLi->beginNumber(); + LiveIndex ReloadIdx = ReloadLi->beginIndex(); MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { // Last reload of same SS is in the same MBB. We want to try to // allocate both reloads the same register and make sure the reg // isn't clobbered in between if at all possible. - assert(LastReload->beginNumber() < ReloadIdx); + assert(LastReload->beginIndex() < ReloadIdx); NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); } LastReloadMBB = ReloadMBB; @@ -1420,7 +1434,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { // available first. unsigned Preference = vrm_->getRegAllocPref(cur->reg); if (Preference) { - DOUT << "(preferred: " << tri_->getName(Preference) << ") "; + DEBUG(errs() << "(preferred: " << tri_->getName(Preference) << ") "); if (isRegAvail(Preference) && RC->contains(Preference)) return Preference; diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index e1cc20cf4fb11..6caa2d3b824fb 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -25,6 +25,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallSet.h" @@ -151,6 +153,7 @@ namespace { } virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequiredID(PHIEliminationID); AU.addRequiredID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); @@ -291,11 +294,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, assert(VirtReg && "Spilling a physical register is illegal!" " Must not have appropriate kill for the register or use exists beyond" " the intended one."); - DOUT << " Spilling register " << TRI->getName(PhysReg) - << " containing %reg" << VirtReg; + DEBUG(errs() << " Spilling register " << TRI->getName(PhysReg) + << " containing %reg" << VirtReg); if (!isVirtRegModified(VirtReg)) { - DOUT << " which has not been modified, so no store necessary!"; + DEBUG(errs() << " which has not been modified, so no store necessary!"); std::pair &LastUse = getVirtRegLastUse(VirtReg); if (LastUse.first) LastUse.first->getOperand(LastUse.second).setIsKill(); @@ -305,7 +308,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, // modified. const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DOUT << " to stack slot #" << FrameIndex; + DEBUG(errs() << " to stack slot #" << FrameIndex); // If the instruction reads the register that's spilled, (e.g. this can // happen if it is a move to a physical register), then the spill // instruction is not a kill. @@ -316,7 +319,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB, getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available - DOUT << "\n"; + DEBUG(errs() << '\n'); removePhysReg(PhysReg); } @@ -505,8 +508,8 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded - DOUT << " Reloading %reg" << VirtReg << " into " - << TRI->getName(PhysReg) << "\n"; + DEBUG(errs() << " Reloading %reg" << VirtReg << " into " + << TRI->getName(PhysReg) << "\n"); // Add move instruction(s) TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC); @@ -517,24 +520,28 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); if (!ReloadedRegs.insert(PhysReg)) { - cerr << "Ran out of registers during register allocation!\n"; + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { - cerr << "Please check your inline asm statement for invalid " + Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; - MI->print(cerr.stream(), TM); + MI->print(Msg, TM); } - exit(1); + llvm_report_error(Msg.str()); } for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { if (!ReloadedRegs.insert(*SubRegs)) { - cerr << "Ran out of registers during register allocation!\n"; + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; if (MI->getOpcode() == TargetInstrInfo::INLINEASM) { - cerr << "Please check your inline asm statement for invalid " + Msg << "\nPlease check your inline asm statement for invalid " << "constraints:\n"; - MI->print(cerr.stream(), TM); + MI->print(Msg, TM); } - exit(1); + llvm_report_error(Msg.str()); } } @@ -707,8 +714,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { // loop over each instruction MachineBasicBlock::iterator MII = MBB.begin(); - DEBUG(const BasicBlock *LBB = MBB.getBasicBlock(); - if (LBB) DOUT << "\nStarting RegAlloc of BB: " << LBB->getName()); + DEBUG({ + const BasicBlock *LBB = MBB.getBasicBlock(); + if (LBB) + errs() << "\nStarting RegAlloc of BB: " << LBB->getName(); + }); // Add live-in registers as active. for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), @@ -733,13 +743,15 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { while (MII != MBB.end()) { MachineInstr *MI = MII++; const TargetInstrDesc &TID = MI->getDesc(); - DEBUG(DOUT << "\nStarting RegAlloc of: " << *MI; - DOUT << " Regs have values: "; - for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) - DOUT << "[" << TRI->getName(i) - << ",%reg" << PhysRegsUsed[i] << "] "; - DOUT << "\n"); + DEBUG({ + errs() << "\nStarting RegAlloc of: " << *MI; + errs() << " Regs have values: "; + for (unsigned i = 0; i != TRI->getNumRegs(); ++i) + if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) + errs() << "[" << TRI->getName(i) + << ",%reg" << PhysRegsUsed[i] << "] "; + errs() << '\n'; + }); // Loop over the implicit uses, making sure that they are at the head of the // use order list, so they don't get reallocated. @@ -783,8 +795,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DOUT << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"; + DEBUG(errs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); MO.setReg(DestPhysReg); // Assign the earlyclobber register } else { unsigned Reg = MO.getReg(); @@ -849,15 +861,15 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } if (PhysReg) { - DOUT << " Last use of " << TRI->getName(PhysReg) - << "[%reg" << VirtReg <<"], removing it from live set\n"; + DEBUG(errs() << " Last use of " << TRI->getName(PhysReg) + << "[%reg" << VirtReg <<"], removing it from live set\n"); removePhysReg(PhysReg); for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); *SubRegs; ++SubRegs) { if (PhysRegsUsed[*SubRegs] != -2) { - DOUT << " Last use of " - << TRI->getName(*SubRegs) - << "[%reg" << VirtReg <<"], removing it from live set\n"; + DEBUG(errs() << " Last use of " + << TRI->getName(*SubRegs) << "[%reg" << VirtReg + <<"], removing it from live set\n"); removePhysReg(*SubRegs); } } @@ -942,8 +954,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { MF->getRegInfo().setPhysRegUsed(DestPhysReg); markVirtRegModified(DestVirtReg); getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DOUT << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"; + DEBUG(errs() << " Assigning " << TRI->getName(DestPhysReg) + << " to %reg" << DestVirtReg << "\n"); MO.setReg(DestPhysReg); // Assign the output register } } @@ -965,16 +977,16 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { } if (PhysReg) { - DOUT << " Register " << TRI->getName(PhysReg) - << " [%reg" << VirtReg - << "] is never used, removing it from live set\n"; + DEBUG(errs() << " Register " << TRI->getName(PhysReg) + << " [%reg" << VirtReg + << "] is never used, removing it from live set\n"); removePhysReg(PhysReg); for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); *AliasSet; ++AliasSet) { if (PhysRegsUsed[*AliasSet] != -2) { - DOUT << " Register " << TRI->getName(*AliasSet) - << " [%reg" << *AliasSet - << "] is never used, removing it from live set\n"; + DEBUG(errs() << " Register " << TRI->getName(*AliasSet) + << " [%reg" << *AliasSet + << "] is never used, removing it from live set\n"); removePhysReg(*AliasSet); } } @@ -1022,7 +1034,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { /// runOnMachineFunction - Register allocate the whole function /// bool RALocal::runOnMachineFunction(MachineFunction &Fn) { - DOUT << "Machine Function " << "\n"; + DEBUG(errs() << "Machine Function\n"); MF = &Fn; TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 89e2c59fe805d..bee5d931319eb 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,7 +31,9 @@ #define DEBUG_TYPE "regalloc" -#include "PBQP.h" +#include "PBQP/HeuristicSolver.h" +#include "PBQP/SimpleGraph.h" +#include "PBQP/Heuristics/Briggs.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -42,6 +44,7 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include @@ -53,32 +56,38 @@ using namespace llvm; static RegisterRegAlloc -registerPBQPRepAlloc("pbqp", "PBQP register allocator", - createPBQPRegisterAllocator); +registerPBQPRepAlloc("pbqp", "PBQP register allocator.", + llvm::createPBQPRegisterAllocator); + +static cl::opt +pbqpCoalescing("pbqp-coalescing", + cl::desc("Attempt coalescing during PBQP register allocation."), + cl::init(false), cl::Hidden); namespace { - //! - //! PBQP based allocators solve the register allocation problem by mapping - //! register allocation problems to Partitioned Boolean Quadratic - //! Programming problems. + /// + /// PBQP based allocators solve the register allocation problem by mapping + /// register allocation problems to Partitioned Boolean Quadratic + /// Programming problems. class VISIBILITY_HIDDEN PBQPRegAlloc : public MachineFunctionPass { public: static char ID; - //! Construct a PBQP register allocator. - PBQPRegAlloc() : MachineFunctionPass((intptr_t)&ID) {} + /// Construct a PBQP register allocator. + PBQPRegAlloc() : MachineFunctionPass(&ID) {} - //! Return the pass name. - virtual const char* getPassName() const throw() { + /// Return the pass name. + virtual const char* getPassName() const { return "PBQP Register Allocator"; } - //! PBQP analysis usage. + /// PBQP analysis usage. virtual void getAnalysisUsage(AnalysisUsage &au) const { au.addRequired(); - au.addRequiredTransitive(); + //au.addRequiredID(SplitCriticalEdgesID); + au.addRequired(); au.addRequired(); au.addPreserved(); au.addRequired(); @@ -87,7 +96,7 @@ namespace { MachineFunctionPass::getAnalysisUsage(au); } - //! Perform register allocation + /// Perform register allocation virtual bool runOnMachineFunction(MachineFunction &MF); private: @@ -97,7 +106,7 @@ namespace { typedef std::vector AllowedSetMap; typedef std::set RegSet; typedef std::pair RegPair; - typedef std::map CoalesceMap; + typedef std::map CoalesceMap; typedef std::set LiveIntervalSet; @@ -119,60 +128,60 @@ namespace { emptyVRegIntervals; - //! Builds a PBQP cost vector. + /// Builds a PBQP cost vector. template - PBQPVector* buildCostVector(unsigned vReg, - const RegContainer &allowed, - const CoalesceMap &cealesces, - PBQPNum spillCost) const; - - //! \brief Builds a PBQP interference matrix. - //! - //! @return Either a pointer to a non-zero PBQP matrix representing the - //! allocation option costs, or a null pointer for a zero matrix. - //! - //! Expects allowed sets for two interfering LiveIntervals. These allowed - //! sets should contain only allocable registers from the LiveInterval's - //! register class, with any interfering pre-colored registers removed. + PBQP::Vector buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &cealesces, + PBQP::PBQPNum spillCost) const; + + /// \brief Builds a PBQP interference matrix. + /// + /// @return Either a pointer to a non-zero PBQP matrix representing the + /// allocation option costs, or a null pointer for a zero matrix. + /// + /// Expects allowed sets for two interfering LiveIntervals. These allowed + /// sets should contain only allocable registers from the LiveInterval's + /// register class, with any interfering pre-colored registers removed. template - PBQPMatrix* buildInterferenceMatrix(const RegContainer &allowed1, - const RegContainer &allowed2) const; - - //! - //! Expects allowed sets for two potentially coalescable LiveIntervals, - //! and an estimated benefit due to coalescing. The allowed sets should - //! contain only allocable registers from the LiveInterval's register - //! classes, with any interfering pre-colored registers removed. + PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1, + const RegContainer &allowed2) const; + + /// + /// Expects allowed sets for two potentially coalescable LiveIntervals, + /// and an estimated benefit due to coalescing. The allowed sets should + /// contain only allocable registers from the LiveInterval's register + /// classes, with any interfering pre-colored registers removed. template - PBQPMatrix* buildCoalescingMatrix(const RegContainer &allowed1, - const RegContainer &allowed2, - PBQPNum cBenefit) const; - - //! \brief Finds coalescing opportunities and returns them as a map. - //! - //! Any entries in the map are guaranteed coalescable, even if their - //! corresponding live intervals overlap. + PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1, + const RegContainer &allowed2, + PBQP::PBQPNum cBenefit) const; + + /// \brief Finds coalescing opportunities and returns them as a map. + /// + /// Any entries in the map are guaranteed coalescable, even if their + /// corresponding live intervals overlap. CoalesceMap findCoalesces(); - //! \brief Finds the initial set of vreg intervals to allocate. + /// \brief Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(); - //! \brief Constructs a PBQP problem representation of the register - //! allocation problem for this function. - //! - //! @return a PBQP solver object for the register allocation problem. - pbqp* constructPBQPProblem(); + /// \brief Constructs a PBQP problem representation of the register + /// allocation problem for this function. + /// + /// @return a PBQP solver object for the register allocation problem. + PBQP::SimpleGraph constructPBQPProblem(); - //! \brief Adds a stack interval if the given live interval has been - //! spilled. Used to support stack slot coloring. + /// \brief Adds a stack interval if the given live interval has been + /// spilled. Used to support stack slot coloring. void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); - //! \brief Given a solved PBQP problem maps this solution back to a register - //! assignment. - bool mapPBQPToRegAlloc(pbqp *problem); + /// \brief Given a solved PBQP problem maps this solution back to a register + /// assignment. + bool mapPBQPToRegAlloc(const PBQP::Solution &solution); - //! \brief Postprocessing before final spilling. Sets basic block "live in" - //! variables. + /// \brief Postprocessing before final spilling. Sets basic block "live in" + /// variables. void finalizeAlloc() const; }; @@ -182,17 +191,17 @@ namespace { template -PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg, - const RegContainer &allowed, - const CoalesceMap &coalesces, - PBQPNum spillCost) const { +PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg, + const RegContainer &allowed, + const CoalesceMap &coalesces, + PBQP::PBQPNum spillCost) const { typedef typename RegContainer::const_iterator AllowedItr; // Allocate vector. Additional element (0th) used for spill option - PBQPVector *v = new PBQPVector(allowed.size() + 1); + PBQP::Vector v(allowed.size() + 1, 0); - (*v)[0] = spillCost; + v[0] = spillCost; // Iterate over the allowed registers inserting coalesce benefits if there // are any. @@ -210,14 +219,14 @@ PBQPVector* PBQPRegAlloc::buildCostVector(unsigned vReg, continue; // We have a coalesce - insert the benefit. - (*v)[ai + 1] = -cmItr->second; + v[ai + 1] = -cmItr->second; } return v; } template -PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( +PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix( const RegContainer &allowed1, const RegContainer &allowed2) const { typedef typename RegContainer::const_iterator RegContainerIterator; @@ -230,7 +239,8 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( // that the spill option (element 0,0) has zero cost, since we can allocate // both intervals to memory safely (the cost for each individual allocation // to memory is accounted for by the cost vectors for each live interval). - PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1); + PBQP::Matrix *m = + new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0); // Assume this is a zero matrix until proven otherwise. Zero matrices occur // between interfering live ranges with non-overlapping register sets (e.g. @@ -259,8 +269,8 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( unsigned reg2 = *a2Itr; // If the row/column regs are identical or alias insert an infinity. - if ((reg1 == reg2) || tri->areAliases(reg1, reg2)) { - (*m)[ri][ci] = std::numeric_limits::infinity(); + if (tri->regsOverlap(reg1, reg2)) { + (*m)[ri][ci] = std::numeric_limits::infinity(); isZeroMatrix = false; } @@ -282,9 +292,9 @@ PBQPMatrix* PBQPRegAlloc::buildInterferenceMatrix( } template -PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix( +PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix( const RegContainer &allowed1, const RegContainer &allowed2, - PBQPNum cBenefit) const { + PBQP::PBQPNum cBenefit) const { typedef typename RegContainer::const_iterator RegContainerIterator; @@ -293,7 +303,8 @@ PBQPMatrix* PBQPRegAlloc::buildCoalescingMatrix( // for the LiveIntervals which are (potentially) to be coalesced. The amount // -cBenefit will be placed in any element representing the same register // for both intervals. - PBQPMatrix *m = new PBQPMatrix(allowed1.size() + 1, allowed2.size() + 1); + PBQP::Matrix *m = + new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0); // Reset costs to zero. m->reset(0); @@ -442,7 +453,7 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { vniItr != vniEnd; ++vniItr) { // We want to make sure we skip the copy instruction itself. - if ((*vniItr)->copy == instr) + if ((*vniItr)->getCopy() == instr) continue; if (srcLI->liveAt((*vniItr)->def)) { @@ -495,10 +506,11 @@ void PBQPRegAlloc::findVRegIntervalsToAlloc() { } } -pbqp* PBQPRegAlloc::constructPBQPProblem() { +PBQP::SimpleGraph PBQPRegAlloc::constructPBQPProblem() { typedef std::vector LIVector; typedef std::vector RegVector; + typedef std::vector NodeVector; // This will store the physical intervals for easy reference. LIVector physIntervals; @@ -530,10 +542,15 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() { } // Get the set of potential coalesces. - CoalesceMap coalesces(findCoalesces()); + CoalesceMap coalesces; + + if (pbqpCoalescing) { + coalesces = findCoalesces(); + } // Construct a PBQP solver for this problem - pbqp *solver = alloc_pbqp(vregIntervalsToAlloc.size()); + PBQP::SimpleGraph problem; + NodeVector problemNodes(vregIntervalsToAlloc.size()); // Resize allowedSets container appropriately. allowedSets.resize(vregIntervalsToAlloc.size()); @@ -594,13 +611,13 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() { // Set the spill cost to the interval weight, or epsilon if the // interval weight is zero - PBQPNum spillCost = (li->weight != 0.0) ? - li->weight : std::numeric_limits::min(); + PBQP::PBQPNum spillCost = (li->weight != 0.0) ? + li->weight : std::numeric_limits::min(); // Build a cost vector for this interval. - add_pbqp_nodecosts(solver, node, - buildCostVector(li->reg, allowedSets[node], coalesces, - spillCost)); + problemNodes[node] = + problem.addNode( + buildCostVector(li->reg, allowedSets[node], coalesces, spillCost)); } @@ -616,7 +633,7 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() { CoalesceMap::const_iterator cmItr = coalesces.find(RegPair(li->reg, li2->reg)); - PBQPMatrix *m = 0; + PBQP::Matrix *m = 0; if (cmItr != coalesces.end()) { m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2], @@ -627,14 +644,29 @@ pbqp* PBQPRegAlloc::constructPBQPProblem() { } if (m != 0) { - add_pbqp_edgecosts(solver, node1, node2, m); + problem.addEdge(problemNodes[node1], + problemNodes[node2], + *m); + delete m; } } } + problem.assignNodeIDs(); + + assert(problem.getNumNodes() == allowedSets.size()); + for (unsigned i = 0; i < allowedSets.size(); ++i) { + assert(problem.getNodeItr(i) == problemNodes[i]); + } +/* + std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, " + << problem.getNumEdges() << " edges.\n"; + + problem.printDot(std::cerr); +*/ // We're done, PBQP problem constructed - return it. - return solver; + return problem; } void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, @@ -651,14 +683,14 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled, if (stackInterval.getNumValNums() != 0) vni = stackInterval.getValNumInfo(0); else - vni = stackInterval.getNextValue(0, 0, false, lss->getVNInfoAllocator()); + vni = stackInterval.getNextValue( + LiveIndex(), 0, false, lss->getVNInfoAllocator()); LiveInterval &rhsInterval = lis->getInterval(spilled->reg); stackInterval.MergeRangesInAsValue(rhsInterval, vni); } -bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { - +bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) { // Set to true if we have any spills bool anotherRoundNeeded = false; @@ -668,14 +700,16 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { // Iterate over the nodes mapping the PBQP solution to a register assignment. for (unsigned node = 0; node < node2LI.size(); ++node) { unsigned virtReg = node2LI[node]->reg, - allocSelection = get_pbqp_solution(problem, node); + allocSelection = solution.getSelection(node); + // If the PBQP solution is non-zero it's a physical register... if (allocSelection != 0) { // Get the physical reg, subtracting 1 to account for the spill option. unsigned physReg = allowedSets[node][allocSelection - 1]; - DOUT << "VREG " << virtReg << " -> " << tri->getName(physReg) << "\n"; + DEBUG(errs() << "VREG " << virtReg << " -> " + << tri->getName(physReg) << "\n"); assert(physReg != 0); @@ -697,8 +731,9 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); addStackInterval(spillInterval, mri); - DOUT << "VREG " << virtReg << " -> SPILLED (Cost: " - << oldSpillWeight << ", New vregs: "; + (void) oldSpillWeight; + DEBUG(errs() << "VREG " << virtReg << " -> SPILLED (Cost: " + << oldSpillWeight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. @@ -708,12 +743,12 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(pbqp *problem) { assert(!(*itr)->empty() && "Empty spill range."); - DOUT << (*itr)->reg << " "; + DEBUG(errs() << (*itr)->reg << " "); vregIntervalsToAlloc.insert(*itr); } - DOUT << ")\n"; + DEBUG(errs() << ")\n"); // We need another round if spill intervals were added. anotherRoundNeeded |= !newSpills.empty(); @@ -734,6 +769,7 @@ void PBQPRegAlloc::finalizeAlloc() const { LiveInterval *li = *itr; unsigned physReg = vrm->getRegAllocPref(li->reg); + if (physReg == 0) { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); physReg = *liRC->allocation_order_begin(*mf); @@ -764,8 +800,8 @@ void PBQPRegAlloc::finalizeAlloc() const { continue; } - // Ignore unallocated vregs: if (reg == 0) { + // Filter out zero regs - they're for intervals that were spilled. continue; } @@ -804,7 +840,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { vrm = &getAnalysis(); - DOUT << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"; + DEBUG(errs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n"); // Allocator main loop: // @@ -829,15 +865,14 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { unsigned round = 0; while (!pbqpAllocComplete) { - DOUT << " PBQP Regalloc round " << round << ":\n"; - - pbqp *problem = constructPBQPProblem(); - - solve_pbqp(problem); + DEBUG(errs() << " PBQP Regalloc round " << round << ":\n"); - pbqpAllocComplete = mapPBQPToRegAlloc(problem); + PBQP::SimpleGraph problem = constructPBQPProblem(); + PBQP::HeuristicSolver solver; + problem.assignNodeIDs(); + PBQP::Solution solution = solver.solve(problem); - free_pbqp(problem); + pbqpAllocComplete = mapPBQPToRegAlloc(solution); ++round; } @@ -852,7 +887,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) { node2LI.clear(); allowedSets.clear(); - DOUT << "Post alloc VirtRegMap:\n" << *vrm << "\n"; + DEBUG(errs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); // Run rewriter std::auto_ptr rewriter(createVirtRegRewriter()); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index d7fe7a2d54543..5f1c4e2594c2b 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -16,46 +16,21 @@ #define DEBUG_TYPE "reg-scavenging" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" using namespace llvm; -/// RedefinesSuperRegPart - Return true if the specified register is redefining -/// part of a super-register. -static bool RedefinesSuperRegPart(const MachineInstr *MI, unsigned SubReg, - const TargetRegisterInfo *TRI) { - bool SeenSuperUse = false; - bool SeenSuperDef = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.isUndef()) - continue; - if (TRI->isSuperRegister(SubReg, MO.getReg())) { - if (MO.isUse()) - SeenSuperUse = true; - else if (MO.isImplicit()) - SeenSuperDef = true; - } - } - - return SeenSuperDef && SeenSuperUse; -} - -static bool RedefinesSuperRegPart(const MachineInstr *MI, - const MachineOperand &MO, - const TargetRegisterInfo *TRI) { - assert(MO.isReg() && MO.isDef() && "Not a register def!"); - return RedefinesSuperRegPart(MI, MO.getReg(), TRI); -} - /// setUsed - Set the register and its sub-registers as being used. void RegScavenger::setUsed(unsigned Reg) { RegsAvailable.reset(Reg); @@ -65,14 +40,38 @@ void RegScavenger::setUsed(unsigned Reg) { RegsAvailable.reset(SubReg); } -/// setUnused - Set the register and its sub-registers as being unused. -void RegScavenger::setUnused(unsigned Reg, const MachineInstr *MI) { - RegsAvailable.set(Reg); +bool RegScavenger::isAliasUsed(unsigned Reg) const { + if (isUsed(Reg)) + return true; + for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R) + if (isUsed(*R)) + return true; + return false; +} - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - if (!RedefinesSuperRegPart(MI, Reg, TRI)) - RegsAvailable.set(SubReg); +void RegScavenger::initRegState() { + ScavengedReg = 0; + ScavengedRC = NULL; + ScavengeRestore = NULL; + + // All registers started out unused. + RegsAvailable.set(); + + // Reserved registers are always used. + RegsAvailable ^= ReservedRegs; + + if (!MBB) + return; + + // Live-in registers are in use. + for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + setUsed(*I); + + // Pristine CSRs are also unavailable. + BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) + setUsed(I); } void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { @@ -85,6 +84,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && "Target changed?"); + // Self-initialize. if (!MBB) { NumPhysRegs = TRI->getNumRegs(); RegsAvailable.resize(NumPhysRegs); @@ -100,73 +100,26 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { CalleeSavedRegs.set(CSRegs[i]); } - MBB = mbb; - ScavengedReg = 0; - ScavengedRC = NULL; - ScavengeRestore = NULL; - CurrDist = 0; - DistanceMap.clear(); - - // All registers started out unused. - RegsAvailable.set(); - - // Reserved registers are always used. - RegsAvailable ^= ReservedRegs; - - // Live-in registers are in use. - if (!MBB->livein_empty()) - for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) - setUsed(*I); + // RS used within emit{Pro,Epi}logue() + if (mbb != MBB) { + MBB = mbb; + initRegState(); + } Tracking = false; } -void RegScavenger::restoreScavengedReg() { - TII->loadRegFromStackSlot(*MBB, MBBI, ScavengedReg, - ScavengingFrameIndex, ScavengedRC); - MachineBasicBlock::iterator II = prior(MBBI); - TRI->eliminateFrameIndex(II, 0, this); - setUsed(ScavengedReg); - ScavengedReg = 0; - ScavengedRC = NULL; +void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { + BV.set(Reg); + for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++) + BV.set(*R); } -#ifndef NDEBUG -/// isLiveInButUnusedBefore - Return true if register is livein the MBB not -/// not used before it reaches the MI that defines register. -static bool isLiveInButUnusedBefore(unsigned Reg, MachineInstr *MI, - MachineBasicBlock *MBB, - const TargetRegisterInfo *TRI, - MachineRegisterInfo* MRI) { - // First check if register is livein. - bool isLiveIn = false; - for (MachineBasicBlock::const_livein_iterator I = MBB->livein_begin(), - E = MBB->livein_end(); I != E; ++I) - if (Reg == *I || TRI->isSuperRegister(Reg, *I)) { - isLiveIn = true; - break; - } - if (!isLiveIn) - return false; - - // Is there any use of it before the specified MI? - SmallPtrSet UsesInMBB; - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->getParent() == MBB) - UsesInMBB.insert(UseMI); - } - if (UsesInMBB.empty()) - return true; - - for (MachineBasicBlock::iterator I = MBB->begin(), E = MI; I != E; ++I) - if (UsesInMBB.count(&*I)) - return false; - return true; +void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) { + BV.set(Reg); + for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++) + BV.set(*R); } -#endif void RegScavenger::forward() { // Move ptr forward. @@ -179,7 +132,6 @@ void RegScavenger::forward() { } MachineInstr *MI = MBBI; - DistanceMap.insert(std::make_pair(MI, CurrDist++)); if (MI == ScavengeRestore) { ScavengedReg = 0; @@ -187,153 +139,63 @@ void RegScavenger::forward() { ScavengeRestore = NULL; } -#if 0 - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - return; -#endif - - // Separate register operands into 3 classes: uses, defs, earlyclobbers. - SmallVector, 4> UseMOs; - SmallVector, 4> DefMOs; - SmallVector, 4> EarlyClobberMOs; + // Find out which registers are early clobbered, killed, defined, and marked + // def-dead in this instruction. + BitVector EarlyClobberRegs(NumPhysRegs); + BitVector KillRegs(NumPhysRegs); + BitVector DefRegs(NumPhysRegs); + BitVector DeadRegs(NumPhysRegs); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef()) + if (!MO.isReg() || MO.isUndef()) continue; - if (MO.isUse()) - UseMOs.push_back(std::make_pair(&MO,i)); - else if (MO.isEarlyClobber()) - EarlyClobberMOs.push_back(std::make_pair(&MO,i)); - else - DefMOs.push_back(std::make_pair(&MO,i)); - } - - // Process uses first. - BitVector KillRegs(NumPhysRegs); - for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { - const MachineOperand MO = *UseMOs[i].first; unsigned Reg = MO.getReg(); - - assert(isUsed(Reg) && "Using an undefined register!"); - - if (MO.isKill() && !isReserved(Reg)) { - KillRegs.set(Reg); - - // Mark sub-registers as used. - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - KillRegs.set(SubReg); - } - } - - // Change states of all registers after all the uses are processed to guard - // against multiple uses. - setUnused(KillRegs); - - // Process early clobber defs then process defs. We can have a early clobber - // that is dead, it should not conflict with a def that happens one "slot" - // (see InstrSlots in LiveIntervalAnalysis.h) later. - unsigned NumECs = EarlyClobberMOs.size(); - unsigned NumDefs = DefMOs.size(); - - for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) { - const MachineOperand &MO = (i < NumECs) - ? *EarlyClobberMOs[i].first : *DefMOs[i-NumECs].first; - unsigned Idx = (i < NumECs) - ? EarlyClobberMOs[i].second : DefMOs[i-NumECs].second; - unsigned Reg = MO.getReg(); - if (MO.isUndef()) + if (!Reg || isReserved(Reg)) continue; - // If it's dead upon def, then it is now free. - if (MO.isDead()) { - setUnused(Reg, MI); - continue; - } - - // Skip two-address destination operand. - unsigned UseIdx; - if (MI->isRegTiedToUseOperand(Idx, &UseIdx) && - !MI->getOperand(UseIdx).isUndef()) { - assert(isUsed(Reg) && "Using an undefined register!"); - continue; + if (MO.isUse()) { + // Two-address operands implicitly kill. + if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + addRegWithSubRegs(KillRegs, Reg); + } else { + assert(MO.isDef()); + if (MO.isDead()) + addRegWithSubRegs(DeadRegs, Reg); + else + addRegWithSubRegs(DefRegs, Reg); + if (MO.isEarlyClobber()) + addRegWithAliases(EarlyClobberRegs, Reg); } - - // Skip if this is merely redefining part of a super-register. - if (RedefinesSuperRegPart(MI, MO, TRI)) - continue; - - // Implicit def is allowed to "re-define" any register. Similarly, - // implicitly defined registers can be clobbered. - assert((isReserved(Reg) || isUnused(Reg) || - isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && - "Re-defining a live register!"); - setUsed(Reg); } -} - -void RegScavenger::backward() { - assert(Tracking && "Not tracking states!"); - assert(MBBI != MBB->begin() && "Already at start of basic block!"); - // Move ptr backward. - MBBI = prior(MBBI); - - MachineInstr *MI = MBBI; - DistanceMap.erase(MI); - --CurrDist; - // Separate register operands into 3 classes: uses, defs, earlyclobbers. - SmallVector, 4> UseMOs; - SmallVector, 4> DefMOs; - SmallVector, 4> EarlyClobberMOs; + // Verify uses and defs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0 || MO.isUndef()) - continue; - if (MO.isUse()) - UseMOs.push_back(std::make_pair(&MO,i)); - else if (MO.isEarlyClobber()) - EarlyClobberMOs.push_back(std::make_pair(&MO,i)); - else - DefMOs.push_back(std::make_pair(&MO,i)); - } - - - // Process defs first. - unsigned NumECs = EarlyClobberMOs.size(); - unsigned NumDefs = DefMOs.size(); - for (unsigned i = 0, e = NumECs + NumDefs; i != e; ++i) { - const MachineOperand &MO = (i < NumDefs) - ? *DefMOs[i].first : *EarlyClobberMOs[i-NumDefs].first; - unsigned Idx = (i < NumECs) - ? DefMOs[i].second : EarlyClobberMOs[i-NumDefs].second; - if (MO.isUndef()) - continue; - - // Skip two-address destination operand. - if (MI->isRegTiedToUseOperand(Idx)) + if (!MO.isReg() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - assert(isUsed(Reg)); - if (!isReserved(Reg)) - setUnused(Reg, MI); + if (!Reg || isReserved(Reg)) + continue; + if (MO.isUse()) { + assert(isUsed(Reg) && "Using an undefined register!"); + assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) && + "Using an early clobbered register!"); + } else { + assert(MO.isDef()); +#if 0 + // FIXME: Enable this once we've figured out how to correctly transfer + // implicit kills during codegen passes like the coalescer. + assert((KillRegs.test(Reg) || isUnused(Reg) || + isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && + "Re-defining a live register!"); +#endif + } } - // Process uses. - BitVector UseRegs(NumPhysRegs); - for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { - const MachineOperand MO = *UseMOs[i].first; - unsigned Reg = MO.getReg(); - assert(isUnused(Reg) || isReserved(Reg)); - UseRegs.set(Reg); - - // Set the sub-registers as "used". - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - unsigned SubReg = *SubRegs; ++SubRegs) - UseRegs.set(SubReg); - } - setUsed(UseRegs); + // Commit the changes. + setUnused(KillRegs); + setUnused(DeadRegs); + setUsed(DefRegs); } void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) { @@ -351,129 +213,110 @@ static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) { Mask.set(*I); } -unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, - const BitVector &Candidates) const { - // Mask off the registers which are not in the TargetRegisterClass. - BitVector RegsAvailableCopy(NumPhysRegs, false); - CreateRegClassMask(RegClass, RegsAvailableCopy); - RegsAvailableCopy &= RegsAvailable; - - // Restrict the search to candidates. - RegsAvailableCopy &= Candidates; - - // Returns the first unused (bit is set) register, or 0 is none is found. - int Reg = RegsAvailableCopy.find_first(); - return (Reg == -1) ? 0 : Reg; +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) + return *I; + return 0; } -unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RegClass, - bool ExCalleeSaved) const { - // Mask off the registers which are not in the TargetRegisterClass. - BitVector RegsAvailableCopy(NumPhysRegs, false); - CreateRegClassMask(RegClass, RegsAvailableCopy); - RegsAvailableCopy &= RegsAvailable; - - // If looking for a non-callee-saved register, mask off all the callee-saved - // registers. - if (ExCalleeSaved) - RegsAvailableCopy &= ~CalleeSavedRegs; - - // Returns the first unused (bit is set) register, or 0 is none is found. - int Reg = RegsAvailableCopy.find_first(); - return (Reg == -1) ? 0 : Reg; -} +/// findSurvivorReg - Return the candidate register that is unused for the +/// longest after MBBI. UseMI is set to the instruction where the search +/// stopped. +/// +/// No more than InstrLimit instructions are inspected. +/// +unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator MI, + BitVector &Candidates, + unsigned InstrLimit, + MachineBasicBlock::iterator &UseMI) { + int Survivor = Candidates.find_first(); + assert(Survivor > 0 && "No candidates for scavenging"); + + MachineBasicBlock::iterator ME = MBB->getFirstTerminator(); + assert(MI != ME && "MI already at terminator"); + + for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { + // Remove any candidates touched by instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isUndef() || !MO.getReg() || + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + Candidates.reset(MO.getReg()); + for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++) + Candidates.reset(*R); + } -/// findFirstUse - Calculate the distance to the first use of the -/// specified register. -MachineInstr* -RegScavenger::findFirstUse(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, unsigned Reg, - unsigned &Dist) { - MachineInstr *UseMI = 0; - Dist = ~0U; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineInstr *UDMI = &*RI; - if (UDMI->getParent() != MBB) + // Was our survivor untouched by this instruction? + if (Candidates.test(Survivor)) continue; - DenseMap::iterator DI = DistanceMap.find(UDMI); - if (DI == DistanceMap.end()) { - // If it's not in map, it's below current MI, let's initialize the - // map. - I = next(I); - unsigned Dist = CurrDist + 1; - while (I != MBB->end()) { - DistanceMap.insert(std::make_pair(I, Dist++)); - I = next(I); - } - } - DI = DistanceMap.find(UDMI); - if (DI->second > CurrDist && DI->second < Dist) { - Dist = DI->second; - UseMI = UDMI; - } + + // All candidates gone? + if (Candidates.none()) + break; + + Survivor = Candidates.find_first(); } - return UseMI; + + // We ran out of candidates, so stop the search. + UseMI = MI; + return Survivor; } unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { - assert(ScavengingFrameIndex >= 0 && - "Cannot scavenge a register without an emergency spill slot!"); - // Mask off the registers which are not in the TargetRegisterClass. BitVector Candidates(NumPhysRegs, false); CreateRegClassMask(RC, Candidates); - Candidates ^= ReservedRegs; // Do not include reserved registers. + // Do not include reserved registers. + Candidates ^= ReservedRegs & Candidates; // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); - if (MO.isReg()) + if (MO.isReg() && MO.getReg() != 0 && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } // Find the register whose use is furthest away. - unsigned SReg = 0; - unsigned MaxDist = 0; - MachineInstr *MaxUseMI = 0; - int Reg = Candidates.find_first(); - while (Reg != -1) { - unsigned Dist; - MachineInstr *UseMI = findFirstUse(MBB, I, Reg, Dist); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - unsigned AsDist; - MachineInstr *AsUseMI = findFirstUse(MBB, I, *AS, AsDist); - if (AsDist < Dist) { - Dist = AsDist; - UseMI = AsUseMI; - } - } - if (Dist >= MaxDist) { - MaxDist = Dist; - MaxUseMI = UseMI; - SReg = Reg; - } - Reg = Candidates.find_next(Reg); - } + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - if (ScavengedReg != 0) { - assert(0 && "Scavenger slot is live, unable to scavenge another register!"); - abort(); - } + // If we found an unused register there is no reason to spill it. We have + // probably found a callee-saved register that has been saved in the + // prologue, but happens to be unused at this point. + if (!isAliasUsed(SReg)) + return SReg; - // Spill the scavenged register before I. - TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); - MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, this); + assert(ScavengedReg == 0 && + "Scavenger slot is live, unable to scavenge another register!"); - // Restore the scavenged register before its use (or first terminator). - II = MaxUseMI - ? MachineBasicBlock::iterator(MaxUseMI) : MBB->getFirstTerminator(); - TII->loadRegFromStackSlot(*MBB, II, SReg, ScavengingFrameIndex, RC); - ScavengeRestore = prior(II); + // Avoid infinite regress ScavengedReg = SReg; + + // If the target knows how to save/restore the register, let it do so; + // otherwise, use the emergency stack spill slot. + if (!TRI->saveScavengerRegister(*MBB, I, RC, SReg)) { + // Spill the scavenged register before I. + assert(ScavengingFrameIndex >= 0 && + "Cannot scavenge register without an emergency spill slot!"); + TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC); + MachineBasicBlock::iterator II = prior(I); + TRI->eliminateFrameIndex(II, SPAdj, NULL, this); + + // Restore the scavenged register before its use (or first terminator). + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC); + } else + TRI->restoreScavengerRegister(*MBB, UseMI, RC, SReg); + + ScavengeRestore = prior(UseMI); + + // Doing this here leads to infinite regress. + // ScavengedReg = SReg; ScavengedRC = RC; return SReg; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index a8452dff272b8..5a59862090b14 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -19,6 +19,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -40,7 +41,7 @@ void ScheduleDAG::dumpSchedule() const { if (SUnit *SU = Sequence[i]) SU->dump(this); else - cerr << "**** NOOP ****\n"; + errs() << "**** NOOP ****\n"; } } @@ -59,9 +60,11 @@ void ScheduleDAG::Run(MachineBasicBlock *bb, Schedule(); - DOUT << "*** Final schedule ***\n"; - DEBUG(dumpSchedule()); - DOUT << "\n"; + DEBUG({ + errs() << "*** Final schedule ***\n"; + dumpSchedule(); + errs() << '\n'; + }); } /// addPred - This adds the specified edge as a pred of the current node if @@ -79,13 +82,19 @@ void SUnit::addPred(const SDep &D) { SUnit *N = D.getSUnit(); // Update the bookkeeping. if (D.getKind() == SDep::Data) { + assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); + assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); ++NumPreds; ++N->NumSuccs; } - if (!N->isScheduled) + if (!N->isScheduled) { + assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); ++NumPredsLeft; - if (!isScheduled) + } + if (!isScheduled) { + assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++N->NumSuccsLeft; + } Preds.push_back(D); N->Succs.push_back(P); if (P.getLatency() != 0) { @@ -118,13 +127,19 @@ void SUnit::removePred(const SDep &D) { Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { + assert(NumPreds > 0 && "NumPreds will underflow!"); + assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); --NumPreds; --N->NumSuccs; } - if (!N->isScheduled) + if (!N->isScheduled) { + assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); --NumPredsLeft; - if (!isScheduled) + } + if (!isScheduled) { + assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); --N->NumSuccsLeft; + } if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); @@ -256,56 +271,58 @@ void SUnit::ComputeHeight() { /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. void SUnit::dump(const ScheduleDAG *G) const { - cerr << "SU(" << NodeNum << "): "; + errs() << "SU(" << NodeNum << "): "; G->dumpNode(this); } void SUnit::dumpAll(const ScheduleDAG *G) const { dump(G); - cerr << " # preds left : " << NumPredsLeft << "\n"; - cerr << " # succs left : " << NumSuccsLeft << "\n"; - cerr << " Latency : " << Latency << "\n"; - cerr << " Depth : " << Depth << "\n"; - cerr << " Height : " << Height << "\n"; + errs() << " # preds left : " << NumPredsLeft << "\n"; + errs() << " # succs left : " << NumSuccsLeft << "\n"; + errs() << " Latency : " << Latency << "\n"; + errs() << " Depth : " << Depth << "\n"; + errs() << " Height : " << Height << "\n"; if (Preds.size() != 0) { - cerr << " Predecessors:\n"; + errs() << " Predecessors:\n"; for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { - cerr << " "; + errs() << " "; switch (I->getKind()) { - case SDep::Data: cerr << "val "; break; - case SDep::Anti: cerr << "anti"; break; - case SDep::Output: cerr << "out "; break; - case SDep::Order: cerr << "ch "; break; + case SDep::Data: errs() << "val "; break; + case SDep::Anti: errs() << "anti"; break; + case SDep::Output: errs() << "out "; break; + case SDep::Order: errs() << "ch "; break; } - cerr << "#"; - cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + errs() << "#"; + errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) - cerr << " *"; - cerr << "\n"; + errs() << " *"; + errs() << ": Latency=" << I->getLatency(); + errs() << "\n"; } } if (Succs.size() != 0) { - cerr << " Successors:\n"; + errs() << " Successors:\n"; for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); I != E; ++I) { - cerr << " "; + errs() << " "; switch (I->getKind()) { - case SDep::Data: cerr << "val "; break; - case SDep::Anti: cerr << "anti"; break; - case SDep::Output: cerr << "out "; break; - case SDep::Order: cerr << "ch "; break; + case SDep::Data: errs() << "val "; break; + case SDep::Anti: errs() << "anti"; break; + case SDep::Output: errs() << "out "; break; + case SDep::Order: errs() << "ch "; break; } - cerr << "#"; - cerr << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; + errs() << "#"; + errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")"; if (I->isArtificial()) - cerr << " *"; - cerr << "\n"; + errs() << " *"; + errs() << ": Latency=" << I->getLatency(); + errs() << "\n"; } } - cerr << "\n"; + errs() << "\n"; } #ifndef NDEBUG @@ -323,35 +340,35 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { continue; } if (!AnyNotSched) - cerr << "*** Scheduling failed! ***\n"; + errs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has not been scheduled!\n"; + errs() << "has not been scheduled!\n"; AnyNotSched = true; } if (SUnits[i].isScheduled && (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getHeight()) > unsigned(INT_MAX)) { if (!AnyNotSched) - cerr << "*** Scheduling failed! ***\n"; + errs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has an unexpected " + errs() << "has an unexpected " << (isBottomUp ? "Height" : "Depth") << " value!\n"; AnyNotSched = true; } if (isBottomUp) { if (SUnits[i].NumSuccsLeft != 0) { if (!AnyNotSched) - cerr << "*** Scheduling failed! ***\n"; + errs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has successors left!\n"; + errs() << "has successors left!\n"; AnyNotSched = true; } } else { if (SUnits[i].NumPredsLeft != 0) { if (!AnyNotSched) - cerr << "*** Scheduling failed! ***\n"; + errs() << "*** Scheduling failed! ***\n"; SUnits[i].dump(this); - cerr << "has predecessors left!\n"; + errs() << "has predecessors left!\n"; AnyNotSched = true; } } diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index 770f5bbbdbb1d..0d15c02141250 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -28,10 +28,6 @@ #include "llvm/Support/MathExtras.h" using namespace llvm; -void ScheduleDAG::AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO) { - MI->addMemOperand(MF, MO); -} - void ScheduleDAG::EmitNoop() { TII->insertNoop(*BB, InsertPos); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 8e18b3d17fda2..44e9296661aa6 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -14,8 +14,10 @@ #define DEBUG_TYPE "sched-instrs" #include "ScheduleDAGInstrs.h" +#include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetMachine.h" @@ -45,35 +47,24 @@ void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, ScheduleDAG::Run(bb, end); } -/// getOpcode - If this is an Instruction or a ConstantExpr, return the -/// opcode value. Otherwise return UserOp1. -static unsigned getOpcode(const Value *V) { - if (const Instruction *I = dyn_cast(V)) - return I->getOpcode(); - if (const ConstantExpr *CE = dyn_cast(V)) - return CE->getOpcode(); - // Use UserOp1 to mean there's no opcode. - return Instruction::UserOp1; -} - /// getUnderlyingObjectFromInt - This is the function that does the work of /// looking through basic ptrtoint+arithmetic+inttoptr sequences. static const Value *getUnderlyingObjectFromInt(const Value *V) { do { - if (const User *U = dyn_cast(V)) { + if (const Operator *U = dyn_cast(V)) { // If we find a ptrtoint, we can transfer control back to the // regular getUnderlyingObjectFromInt. - if (getOpcode(U) == Instruction::PtrToInt) + if (U->getOpcode() == Instruction::PtrToInt) return U->getOperand(0); // If we find an add of a constant or a multiplied value, it's // likely that the other operand will lead us to the base // object. We don't have to worry about the case where the - // object address is somehow being computed bt the multiply, + // object address is somehow being computed by the multiply, // because our callers only care when the result is an // identifibale object. - if (getOpcode(U) != Instruction::Add || + if (U->getOpcode() != Instruction::Add || (!isa(U->getOperand(1)) && - getOpcode(U->getOperand(1)) != Instruction::Mul)) + Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) return V; V = U->getOperand(0); } else { @@ -90,7 +81,7 @@ static const Value *getUnderlyingObject(const Value *V) { do { V = V->getUnderlyingObject(); // If it found an inttoptr, use special code to continue climing. - if (getOpcode(V) != Instruction::IntToPtr) + if (Operator::getOpcode(V) != Instruction::IntToPtr) break; const Value *O = getUnderlyingObjectFromInt(cast(V)->getOperand(0)); // If that succeeded in finding a pointer, continue the search. @@ -106,11 +97,11 @@ static const Value *getUnderlyingObject(const Value *V) { /// object, return the Value for that object. Otherwise return null. static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI) { if (!MI->hasOneMemOperand() || - !MI->memoperands_begin()->getValue() || - MI->memoperands_begin()->isVolatile()) + !(*MI->memoperands_begin())->getValue() || + (*MI->memoperands_begin())->isVolatile()) return 0; - const Value *V = MI->memoperands_begin()->getValue(); + const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) return 0; @@ -132,7 +123,7 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { } } -void ScheduleDAGInstrs::BuildSchedGraph() { +void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // We'll be allocating one SUnit for each instruction, plus one for // the region exit node. SUnits.reserve(BB->size()); @@ -155,8 +146,8 @@ void ScheduleDAGInstrs::BuildSchedGraph() { bool UnitLatencies = ForceUnitLatencies(); // Ask the target if address-backscheduling is desirable, and if so how much. - unsigned SpecialAddressLatency = - TM.getSubtarget().getSpecialAddressLatency(); + const TargetSubtarget &ST = TM.getSubtarget(); + unsigned SpecialAddressLatency = ST.getSpecialAddressLatency(); // Walk the list of instructions, from bottom moving up. for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; @@ -184,16 +175,20 @@ void ScheduleDAGInstrs::BuildSchedGraph() { assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!"); std::vector &UseList = Uses[Reg]; std::vector &DefList = Defs[Reg]; - // Optionally add output and anti dependencies. - // TODO: Using a latency of 1 here assumes there's no cost for - // reusing registers. + // Optionally add output and anti dependencies. For anti + // dependencies we use a latency of 0 because for a multi-issue + // target we want to allow the defining instruction to issue + // in the same cycle as the using instruction. + // TODO: Using a latency of 1 here for output dependencies assumes + // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { SUnit *DefSU = DefList[i]; if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(Reg))) - DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/Reg)); + DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg)); } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector &DefList = Defs[*Alias]; @@ -202,7 +197,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() { if (DefSU != SU && (Kind != SDep::Output || !MO.isDead() || !DefSU->getInstr()->registerDefIsDead(Reg))) - DefSU->addPred(SDep(SU, Kind, /*Latency=*/1, /*Reg=*/ *Alias)); + DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias)); } } @@ -216,6 +211,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() { // Optionally add in a special extra latency for nodes that // feed addresses. // TODO: Do this for register aliases too. + // TODO: Perhaps we should get rid of + // SpecialAddressLatency and just move this into + // adjustSchedDependency for the targets that care about + // it. if (SpecialAddressLatency != 0 && !UnitLatencies) { MachineInstr *UseMI = UseSU->getInstr(); const TargetInstrDesc &UseTID = UseMI->getDesc(); @@ -226,15 +225,29 @@ void ScheduleDAGInstrs::BuildSchedGraph() { UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass()) LDataLatency += SpecialAddressLatency; } - UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg)); + // Adjust the dependence latency using operand def/use + // information (if any), and then allow the target to + // perform its own adjustments. + const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, (SDep &)dep); + ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + } + UseSU->addPred(dep); } } for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { std::vector &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; - if (UseSU != SU) - UseSU->addPred(SDep(SU, SDep::Data, DataLatency, *Alias)); + if (UseSU != SU) { + const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias); + if (!UnitLatencies) { + ComputeOperandLatency(SU, UseSU, (SDep &)dep); + ST.adjustSchedDependency(SU, UseSU, (SDep &)dep); + } + UseSU->addPred(dep); + } } } @@ -323,10 +336,10 @@ void ScheduleDAGInstrs::BuildSchedGraph() { if (!ChainTID.isCall() && !ChainTID.hasUnmodeledSideEffects() && ChainMI->hasOneMemOperand() && - !ChainMI->memoperands_begin()->isVolatile() && - ChainMI->memoperands_begin()->getValue()) + !(*ChainMI->memoperands_begin())->isVolatile() && + (*ChainMI->memoperands_begin())->getValue()) // We know that the Chain accesses one specific memory location. - ChainMMO = &*ChainMI->memoperands_begin(); + ChainMMO = *ChainMI->memoperands_begin(); else // Unknown memory accesses. Assume the worst. ChainMMO = 0; @@ -362,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph() { // Treat all other stores conservatively. goto new_chain; } else if (TID.mayLoad()) { - if (TII->isInvariantLoad(MI)) { + if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else if (const Value *V = getUnderlyingObjectForInstr(MI)) { // A load from a specific PseudoSourceValue. Add precise dependencies. @@ -409,10 +422,9 @@ void ScheduleDAGInstrs::FinishBlock() { void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - // Compute the latency for the node. We use the sum of the latencies for - // all nodes flagged together into this SUnit. + // Compute the latency for the node. SU->Latency = - InstrItins.getLatency(SU->getInstr()->getDesc().getSchedClass()); + InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass()); // Simplistic target-independent heuristic: assume that loads take // extra time. @@ -421,6 +433,50 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { SU->Latency += 2; } +void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + if (InstrItins.isEmpty()) + return; + + // For a data dependency with a known register... + if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0)) + return; + + const unsigned Reg = dep.getReg(); + + // ... find the definition of the register in the defining + // instruction + MachineInstr *DefMI = Def->getInstr(); + int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); + if (DefIdx != -1) { + int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx); + if (DefCycle >= 0) { + MachineInstr *UseMI = Use->getInstr(); + const unsigned UseClass = UseMI->getDesc().getSchedClass(); + + // For all uses of the register, calculate the maxmimum latency + int Latency = -1; + for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg != Reg) + continue; + + int UseCycle = InstrItins.getOperandCycle(UseClass, i); + if (UseCycle >= 0) + Latency = std::max(Latency, DefCycle - UseCycle + 1); + } + + // If we found a latency, then replace the existing dependence latency. + if (Latency >= 0) + dep.setLatency(Latency); + } + } +} + void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { SU->getInstr()->dump(); } @@ -438,7 +494,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { } // EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { +MachineBasicBlock *ScheduleDAGInstrs:: +EmitSchedule(DenseMap *EM) { // For MachineInstr-based scheduling, we're rescheduling the instructions in // the block, so start by removing them from the block. while (Begin != InsertPos) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index 00d6268d1a14a..29e1c98cb31f7 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -15,12 +15,13 @@ #ifndef SCHEDULEDAGINSTRS_H #define SCHEDULEDAGINSTRS_H -#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include namespace llvm { @@ -120,7 +121,6 @@ namespace llvm { SmallSet LoopLiveInRegs; public: - MachineBasicBlock *BB; // Current basic block MachineBasicBlock::iterator Begin; // The beginning of the range to // be scheduled. The range extends // to InsertPos. @@ -154,13 +154,20 @@ namespace llvm { /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are /// input. - virtual void BuildSchedGraph(); + virtual void BuildSchedGraph(AliasAnalysis *AA); /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU); - virtual MachineBasicBlock *EmitSchedule(); + /// ComputeOperandLatency - Override dependence edge latency using + /// operand use/def information + /// + virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + SDep& dep) const; + + virtual MachineBasicBlock* + EmitSchedule(DenseMap*); /// StartBlock - Prepare to perform scheduling in the given block. /// diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 5efd274eea503..95ad05e7d784a 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -86,14 +86,14 @@ void ScheduleDAG::viewGraph() { // This code is only for debugging! #ifndef NDEBUG if (BB->getBasicBlock()) - ViewGraph(this, "dag." + MF.getFunction()->getName(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' + - BB->getBasicBlock()->getName()); + ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, + "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + + ":" + BB->getBasicBlock()->getNameStr()); else - ViewGraph(this, "dag." + MF.getFunction()->getName(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getName()); + ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, + "Scheduling-Units Graph for " + MF.getFunction()->getNameStr()); #else - cerr << "ScheduleDAG::viewGraph is only available in debug builds on " - << "systems with Graphviz or gv!\n"; + errs() << "ScheduleDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; #endif // NDEBUG } diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 4ffe88fda5a59..c766859ae9c87 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMSelectionDAG CallingConvLower.cpp DAGCombiner.cpp FastISel.cpp + InstrEmitter.cpp LegalizeDAG.cpp LegalizeFloatTypes.cpp LegalizeIntegerTypes.cpp @@ -9,13 +10,12 @@ add_llvm_library(LLVMSelectionDAG LegalizeTypesGeneric.cpp LegalizeVectorOps.cpp LegalizeVectorTypes.cpp - ScheduleDAGSDNodes.cpp - ScheduleDAGSDNodesEmit.cpp ScheduleDAGFast.cpp ScheduleDAGList.cpp ScheduleDAGRRList.cpp - SelectionDAGBuild.cpp + ScheduleDAGSDNodes.cpp SelectionDAG.cpp + SelectionDAGBuild.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp TargetLowering.cpp diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp index 7cd2b73e8704d..fbe40b678639b 100644 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp @@ -13,15 +13,17 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; -CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, - SmallVector &locs) +CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, + SmallVector &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs) { + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { // No stack is used. StackOffset = 0; @@ -31,8 +33,8 @@ CCState::CCState(unsigned CC, bool isVarArg, const TargetMachine &tm, // HandleByVal - Allocate a stack slot large enough to pass an argument by // value. The size and alignment information of the argument is encoded in its // parameter attribute. -void CCState::HandleByVal(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, +void CCState::HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags) { unsigned Align = ArgFlags.getByValAlign(); @@ -55,94 +57,107 @@ void CCState::MarkAllocated(unsigned Reg) { UsedRegs[Reg/32] |= 1 << (Reg&31); } -/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node, +/// AnalyzeFormalArguments - Analyze an array of argument values, /// incorporating info about the formals into this state. -void CCState::AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn) { - unsigned NumArgs = TheArgs->getNumValues()-1; - +void +CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, + CCAssignFn Fn) { + unsigned NumArgs = Ins.size(); + for (unsigned i = 0; i != NumArgs; ++i) { - MVT ArgVT = TheArgs->getValueType(i); - ISD::ArgFlagsTy ArgFlags = - cast(TheArgs->getOperand(3+i))->getArgFlags(); + EVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { - cerr << "Formal argument #" << i << " has unhandled type " - << ArgVT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Formal argument #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); } } } -/// AnalyzeReturn - Analyze the returned values of an ISD::RET node, +/// AnalyzeReturn - Analyze the returned values of a return, /// incorporating info about the result values into this state. -void CCState::AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn) { +void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, + CCAssignFn Fn) { // Determine which register each value should be copied into. - for (unsigned i = 0, e = TheRet->getNumOperands() / 2; i != e; ++i) { - MVT VT = TheRet->getOperand(i*2+1).getValueType(); - ISD::ArgFlagsTy ArgFlags = - cast(TheRet->getOperand(i*2+2))->getArgFlags(); - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)){ - cerr << "Return operand #" << i << " has unhandled type " - << VT.getMVTString() << "\n"; - abort(); + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].Val.getValueType(); + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + errs() << "Return operand #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); } } } -/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info -/// about the passed values into this state. -void CCState::AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn) { - unsigned NumOps = TheCall->getNumArgs(); +/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallOperands(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + unsigned NumOps = Outs.size(); for (unsigned i = 0; i != NumOps; ++i) { - MVT ArgVT = TheCall->getArg(i).getValueType(); - ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i); + EVT ArgVT = Outs[i].Val.getValueType(); + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { - cerr << "Call operand #" << i << " has unhandled type " - << ArgVT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); } } } /// AnalyzeCallOperands - Same as above except it takes vectors of types /// and argument flags. -void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, +void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, SmallVectorImpl &Flags, CCAssignFn Fn) { unsigned NumOps = ArgVTs.size(); for (unsigned i = 0; i != NumOps; ++i) { - MVT ArgVT = ArgVTs[i]; + EVT ArgVT = ArgVTs[i]; ISD::ArgFlagsTy ArgFlags = Flags[i]; if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { - cerr << "Call operand #" << i << " has unhandled type " - << ArgVT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); } } } -/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node, +/// AnalyzeCallResult - Analyze the return values of a call, /// incorporating info about the passed values into this state. -void CCState::AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn) { - for (unsigned i = 0, e = TheCall->getNumRetVals(); i != e; ++i) { - MVT VT = TheCall->getRetValType(i); - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (TheCall->isInreg()) - Flags.setInReg(); +void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, + CCAssignFn Fn) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { - cerr << "Call result #" << i << " has unhandled type " - << VT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call result #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); } } } /// AnalyzeCallResult - Same as above except it's specialized for calls which /// produce a single value. -void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { +void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { - cerr << "Call result has unhandled type " - << VT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call result has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); } } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 609ec82c5ad19..1ed3082152012 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19,6 +19,7 @@ #define DEBUG_TYPE "dagcombine" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" @@ -33,7 +34,9 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; @@ -213,12 +216,12 @@ namespace { SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); - SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans = true); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); - SDValue CombineConsecutiveLoads(SDNode *N, MVT VT); - SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT); + SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildUDIV(SDNode *N); SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); @@ -236,14 +239,17 @@ namespace { /// overlap. bool isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, SDValue Ptr2, int64_t Size2, - const Value *SrcValue2, int SrcValueOffset2) const; + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const; /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, int &SrcValueOffset) const; + const Value *&SrcValue, int &SrcValueOffset, + unsigned &SrcValueAlignment) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -251,7 +257,7 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. - MVT getShiftAmountTy() { + EVT getShiftAmountTy() { return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); } @@ -392,7 +398,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); switch (Op.getOpcode()) { - default: assert(0 && "Unknown code"); + default: llvm_unreachable("Unknown code"); case ISD::ConstantFP: { APFloat V = cast(Op)->getValueAPF(); V.changeSign(); @@ -495,7 +501,7 @@ static bool isOneUseSetCC(SDValue N) { SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, SDValue N0, SDValue N1) { - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc && isa(N0.getOperand(1))) { if (isa(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) @@ -537,10 +543,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG)); - DOUT << " and " << NumTo-1 << " other values\n"; - DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i) + DEBUG(errs() << "\nReplacing.1 "; + N->dump(&DAG); + errs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + errs() << " and " << NumTo-1 << " other values\n"; + for (unsigned i = 0, e = NumTo; i != e; ++i) assert(N->getValueType(i) == To[i].getValueType() && "Cannot combine value to value of different type!")); WorkListRemover DeadNodes(*this); @@ -612,9 +620,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.2 "; + TLO.Old.getNode()->dump(&DAG); + errs() << "\nWith: "; + TLO.New.getNode()->dump(&DAG); + errs() << '\n'); CommitTargetLoweringOpt(TLO); return true; @@ -680,9 +690,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.3 "; + N->dump(&DAG); + errs() << "\nWith: "; + RV.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); @@ -800,7 +812,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // Expose the DAG combiner to the target combiner impls. TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, Level == Unrestricted, false, this); + DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); RV = TLI.PerformDAGCombine(N, DagCombineInfo); } @@ -877,7 +889,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { break; case ISD::TokenFactor: - if ((CombinerAA || Op.hasOneUse()) && + if (Op.hasOneUse() && std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { // Queue up for processing. TFs.push_back(Op.getNode()); @@ -898,7 +910,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } } } - + SDValue Result; // If we've change things around then replace token factor. @@ -922,9 +934,14 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { /// MERGE_VALUES can always be eliminated. SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { WorkListRemover DeadNodes(*this); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), - &DeadNodes); + // Replacing results may cause a different MERGE_VALUES to suddenly + // be CSE'd with N, and carry its uses with it. Iterate until no + // uses remain, to ensure that the node can be safely deleted. + do { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), + &DeadNodes); + } while (!N->use_empty()); removeFromWorkList(N); DAG.DeleteNode(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -933,7 +950,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { static SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG) { - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); ConstantSDNode *N01C = dyn_cast(N01); @@ -957,7 +974,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1080,7 +1097,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // If the flag result is dead, turn this into an ADD. if (N->hasNUsesOfValue(0, 1)) @@ -1142,7 +1159,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1215,7 +1232,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1308,7 +1325,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -1395,7 +1412,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -1415,7 +1432,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { - MVT ADDVT = N1.getOperand(1).getValueType(); + EVT ADDVT = N1.getOperand(1).getValueType(); SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() @@ -1447,7 +1464,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 if (N0C && N1C && !N1C->isNullValue()) @@ -1489,7 +1506,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 if (N0C && N1C && !N1C->isNullValue()) @@ -1541,7 +1558,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1562,7 +1579,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -1665,7 +1682,7 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) { /// two operands of the same opcode, try to simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); // For each of OP in AND/OR/XOR: @@ -1677,7 +1694,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { N0.getOpcode() == ISD::SIGN_EXTEND || (N0.getOpcode() == ISD::TRUNCATE && !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && - N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + (!LegalOperations || + TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); @@ -1709,7 +1728,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue LL, LR, RL, RR, CC0, CC1; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N1.getValueType(); + EVT VT = N1.getValueType(); unsigned BitWidth = VT.getSizeInBits(); // fold vector ops @@ -1820,18 +1839,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (zext_inreg (extload x)) -> (zextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits())) && + BitWidth - MemVT.getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -1842,18 +1861,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); // If we zero all the possible extended bits, then we can turn this into // a zextload if we are running before legalize or the operation is legal. unsigned BitWidth = N1.getValueSizeInBits(); if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, - BitWidth - EVT.getSizeInBits())) && + BitWidth - MemVT.getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -1869,24 +1888,24 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LN0->isUnindexed() && N0.hasOneUse() && // Do not change the width of a volatile load. !LN0->isVolatile()) { - MVT EVT = MVT::Other; + EVT ExtVT = MVT::Other; uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) - EVT = MVT::getIntegerVT(ActiveBits); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - MVT LoadedVT = LN0->getMemoryVT(); + EVT LoadedVT = LN0->getMemoryVT(); // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) { - MVT PtrType = N0.getOperand(1).getValueType(); + if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = N0.getOperand(1).getValueType(); // For big endian targets, we need to add an offset to the pointer to // load the correct bytes. For little endian systems, we merely need to // read fewer bytes from the same pointer. - unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8; - unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8; + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; unsigned Alignment = LN0->getAlignment(); SDValue NewPtr = LN0->getBasePtr(); @@ -1901,7 +1920,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - EVT, LN0->isVolatile(), Alignment); + ExtVT, LN0->isVolatile(), Alignment); AddToWorkList(N); CombineTo(N0.getNode(), Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -1918,7 +1937,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue LL, LR, RL, RR, CC0, CC1; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N1.getValueType(); + EVT VT = N1.getValueType(); // fold vector ops if (VT.isVector()) { @@ -1928,7 +1947,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, undef) -> -1 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(~0ULL, VT); + return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); // fold (or c1, c2) -> c1|c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); @@ -2058,7 +2077,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // a rot[lr]. SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. - MVT VT = LHS.getValueType(); + EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return 0; // The target must have at least one rotate flavor. @@ -2219,7 +2238,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS, RHS, CC; ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold vector ops if (VT.isVector()) { @@ -2258,8 +2277,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { switch (N0.getOpcode()) { default: - assert(0 && "Unhandled SetCC Equivalent!"); - abort(); + llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: @@ -2388,7 +2406,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { !isa(BinOpLHSVal->getOperand(1))) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If this is a signed shift right, and the high bit is modified by the // logical operation, do not perform the transformation. The highBitSet @@ -2418,7 +2436,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getSizeInBits(); // fold (shl c1, c2) -> c1<(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2474,20 +2492,33 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SRL && N0.getOperand(1).getOpcode() == ISD::Constant) { uint64_t c1 = cast(N0.getOperand(1))->getZExtValue(); - uint64_t c2 = N1C->getZExtValue(); - SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0), - DAG.getConstant(~0ULL << c1, VT)); - if (c2 > c1) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c2-c1, N1.getValueType())); - else - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, - DAG.getConstant(c1-c2, N1.getValueType())); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - c1), + VT); + SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, + N0.getOperand(0), + HiBitsMask); + if (c2 > c1) + return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c2-c1, N1.getValueType())); + else + return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, + DAG.getConstant(c1-c2, N1.getValueType())); + } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) - if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), + VT.getSizeInBits() - + N1C->getZExtValue()), + VT); return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getConstant(~0ULL << N1C->getZExtValue(), VT)); + HiBitsMask); + } return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); } @@ -2497,7 +2528,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) @@ -2518,7 +2549,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // sext_inreg. if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue(); - MVT EVT = MVT::getIntegerVT(LowBits); + EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT))) return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0.getOperand(0), DAG.getValueType(EVT)); @@ -2545,8 +2576,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N01C && N1C) { // Determine what the truncate's result bitsize and type would be. unsigned VTValSize = VT.getSizeInBits(); - MVT TruncVT = - MVT::getIntegerVT(VTValSize - N1C->getZExtValue()); + EVT TruncVT = + EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue()); // Determine the residual right-shift amount. signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); @@ -2576,7 +2607,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2607,7 +2638,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); - MVT VT = N0.getValueType(); + EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getSizeInBits(); // fold (srl c1, c2) -> c1 >>u c2 @@ -2641,7 +2672,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? - MVT SmallVT = N0.getOperand(0).getValueType(); + EVT SmallVT = N0.getOperand(0).getValueType(); if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) return DAG.getUNDEF(VT); @@ -2700,7 +2731,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { SDValue N101 = N1.getOperand(0).getOperand(1); if (ConstantSDNode *N101C = dyn_cast(N101)) { - MVT TruncVT = N1.getValueType(); + EVT TruncVT = N1.getValueType(); SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC.trunc(TruncVT.getSizeInBits()); @@ -2724,7 +2755,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (ctlz c1) -> c2 if (isa(N0)) @@ -2734,7 +2765,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (cttz c1) -> c2 if (isa(N0)) @@ -2744,7 +2775,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { SDValue DAGCombiner::visitCTPOP(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (ctpop c1) -> c2 if (isa(N0)) @@ -2759,8 +2790,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); - MVT VT = N->getValueType(0); - MVT VT0 = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); // fold (select C, X, X) -> X if (N1 == N2) @@ -2825,7 +2856,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // Check against MVT::Other for SELECT_CC, which is a workaround for targets // having to say they don't support SELECT_CC on every type the DAG knows // about, since there is no way to mark an opcode illegal at all value types - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -2945,7 +2977,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (sext c1) -> c1 if (isa(N0)) @@ -3054,13 +3086,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3071,14 +3103,34 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) if (N0.getOpcode() == ISD::SETCC) { + // sext(setcc) -> sext_in_reg(vsetcc) for vectors. + if (VT.isVector() && + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() && + + // Only do this before legalize for now. + !LegalOperations) { + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + } + + // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) + SDValue NegOne = + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); SDValue SCC = SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), - DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT), + NegOne, DAG.getConstant(0, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; } + + // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && @@ -3090,7 +3142,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (zext c1) -> c1 if (isa(N0)) @@ -3194,13 +3246,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3225,7 +3277,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (aext c1) -> c1 if (isa(N0)) @@ -3330,11 +3382,11 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); - MVT EVT = LN0->getMemoryVT(); + EVT MemVT = LN0->getMemoryVT(); SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), VT, LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), EVT, + LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), @@ -3400,8 +3452,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); - MVT EVT = VT; + EVT VT = N->getValueType(0); + EVT ExtVT = VT; // This transformation isn't valid for vector loads. if (VT.isVector()) @@ -3411,20 +3463,21 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { ExtType = ISD::SEXTLOAD; - EVT = cast(N->getOperand(1))->getVT(); - if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) + ExtVT = cast(N->getOperand(1))->getVT(); + if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); } - unsigned EVTBits = EVT.getSizeInBits(); + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned ShAmt = 0; - if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); - if (N0.getValueType().getSizeInBits() <= EVTBits) + // Is the load width a multiple of size of VT? + if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) return SDValue(); } } @@ -3432,18 +3485,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (isa(N0) && N0.hasOneUse() && EVT.isRound() && + if (isa(N0) && N0.hasOneUse() && ExtVT.isRound() && cast(N0)->getMemoryVT().getSizeInBits() > EVTBits && // Do not change the width of a volatile load. !cast(N0)->isVolatile()) { LoadSDNode *LN0 = cast(N0); - MVT PtrType = N0.getOperand(1).getValueType(); + EVT PtrType = N0.getOperand(1).getValueType(); // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (TLI.isBigEndian()) { unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); - unsigned EVTStoreBits = EVT.getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; } @@ -3460,7 +3513,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LN0->isVolatile(), NewAlign) : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, - EVT, LN0->isVolatile(), NewAlign); + ExtVT, LN0->isVolatile(), NewAlign); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -3477,8 +3530,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - MVT VT = N->getValueType(0); - MVT EVT = cast(N1)->getVT(); + EVT VT = N->getValueType(0); + EVT EVT = cast(N1)->getVT(); unsigned VTBits = VT.getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); @@ -3573,7 +3626,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // noop truncate if (N0.getValueType() == N->getValueType(0)) @@ -3623,14 +3676,14 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { /// CombineConsecutiveLoads - build_pair (load, load) -> load /// if load locations are consecutive. -SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); LoadSDNode *LD1 = dyn_cast(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast(getBuildPairElt(N, 1)); if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); - MVT LD1VT = LD1->getValueType(0); + EVT LD1VT = LD1->getValueType(0); const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (ISD::isNON_EXTLoad(LD2) && @@ -3642,7 +3695,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> - getABITypeAlignment(VT.getTypeForMVT()); + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) @@ -3656,7 +3709,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If the input is a BUILD_VECTOR with all constant elements, fold this now. // Only do this before legalize, since afterward the target may be depending @@ -3674,7 +3727,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { break; } - MVT DestEltVT = N->getValueType(0).getVectorElementType(); + EVT DestEltVT = N->getValueType(0).getVectorElementType(); assert(!DestEltVT.isVector() && "Element type of vector ValueType must not be vector!"); if (isSimple) @@ -3684,7 +3737,18 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa(N0) || isa(N0)) { SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); - if (Res.getNode() != N) return Res; + if (Res.getNode() != N) { + if (!LegalOperations || + TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) + return Res; + + // Folding it resulted in an illegal node, and it's too late to + // do that. Clean up the old node and forego the transformation. + // Ideally this won't happen very often, because instcombine + // and the earlier dagcombine runs (where illegal nodes are + // permitted) should have folded most of them already. + DAG.DeleteNode(Res.getNode()); + } } // (conv (conv x, t1), t2) -> (conv x, t2) @@ -3700,7 +3764,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast(N0); unsigned Align = TLI.getTargetData()-> - getABITypeAlignment(VT.getTypeForMVT()); + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { @@ -3743,7 +3807,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); - MVT IntXVT = MVT::getIntegerVT(OrigXWidth); + EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (TLI.isTypeLegal(IntXVT) || !LegalTypes) { SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), IntXVT, N0.getOperand(1)); @@ -3791,7 +3855,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { } SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); return CombineConsecutiveLoads(N, VT); } @@ -3799,8 +3863,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the /// destination element value type. SDValue DAGCombiner:: -ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { - MVT SrcEltVT = BV->getValueType(0).getVectorElementType(); +ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); // If this is already the right type, we're done. if (SrcEltVT == DstEltVT) return SDValue(BV, 0); @@ -3822,7 +3886,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - MVT VT = MVT::getVectorVT(DstEltVT, + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, BV->getValueType(0).getVectorNumElements()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); @@ -3835,7 +3899,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // Convert the input float vector to a int vector where the elements are the // same sizes. assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); - MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits()); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; } @@ -3844,7 +3908,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); - MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits()); + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); // Next, convert to FP elements of the same size. @@ -3880,7 +3944,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); } - MVT VT = MVT::getVectorVT(DstEltVT, Ops.size()); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -3889,7 +3953,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) { // turns into multiple outputs. bool isS2V = ISD::isScalarToVector(BV); unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; - MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands()); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + NumOutputsPerInput*BV->getNumOperands()); SmallVector Ops; for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { @@ -3926,7 +3991,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -3967,7 +4032,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4001,7 +4066,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4024,7 +4089,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); - // fold (fmul X, (fneg 1.0)) -> (fneg X) + // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); @@ -4056,7 +4121,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold vector ops if (VT.isVector()) { @@ -4089,7 +4154,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP && VT != MVT::ppcf128) @@ -4103,7 +4168,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); @@ -4151,8 +4216,8 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantSDNode *N0C = dyn_cast(N0); - MVT VT = N->getValueType(0); - MVT OpVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128) @@ -4173,8 +4238,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantSDNode *N0C = dyn_cast(N0); - MVT VT = N->getValueType(0); - MVT OpVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128) @@ -4195,7 +4260,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_to_sint c1fp) -> c1 if (N0CFP) @@ -4207,7 +4272,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_to_uint c1fp) -> c1 if (N0CFP && VT != MVT::ppcf128) @@ -4220,7 +4285,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp if (N0CFP && N0.getValueType() != MVT::ppcf128) @@ -4253,8 +4318,8 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); - MVT VT = N->getValueType(0); - MVT EVT = cast(N->getOperand(1))->getVT(); + EVT VT = N->getValueType(0); + EVT EVT = cast(N->getOperand(1))->getVT(); ConstantFPSDNode *N0CFP = dyn_cast(N0); // fold (fp_round_inreg c1fp) -> c1fp @@ -4269,7 +4334,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && @@ -4326,7 +4391,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); - MVT IntVT = Int.getValueType(); + EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); @@ -4342,7 +4407,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast(N0); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // fold (fabs c1) -> fabs(c1) if (N0CFP && VT != MVT::ppcf128) @@ -4361,7 +4426,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); - MVT IntVT = Int.getValueType(); + EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); @@ -4419,7 +4484,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant) { - SDValue AndOp0 = Op0.getOperand(0); SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { @@ -4491,7 +4555,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { bool isLoad = true; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; @@ -4579,9 +4643,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.4 "; + N->dump(&DAG); + errs() << "\nWith: "; + Result.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4616,7 +4682,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { bool isLoad = true; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { if (LD->isIndexed()) return false; @@ -4652,7 +4718,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { SDValue Offset; ISD::MemIndexedMode AM = ISD::UNINDEXED; if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { - if (Ptr == Offset) + if (Ptr == Offset && Op->getOpcode() == ISD::ADD) std::swap(BasePtr, Offset); if (Ptr != BasePtr) continue; @@ -4711,9 +4777,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG)); - DOUT << '\n'; + DEBUG(errs() << "\nReplacing.5 "; + N->dump(&DAG); + errs() << "\nWith: "; + Result.getNode()->dump(&DAG); + errs() << '\n'); WorkListRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), @@ -4815,9 +4883,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. - DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG)); - DOUT << "\n"; + DEBUG(errs() << "\nReplacing.6 "; + N->dump(&DAG); + errs() << "\nWith chain: "; + Chain.getNode()->dump(&DAG); + errs() << "\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); @@ -4833,9 +4903,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG)); - DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG)); - DOUT << " and 2 other values\n"; + DEBUG(errs() << "\nReplacing.6 "; + N->dump(&DAG); + errs() << "\nWith: "; + Undef.getNode()->dump(&DAG); + errs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), @@ -4890,7 +4962,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Create token factor to keep old chain connected. SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplLoad.getValue(1)); - + + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + // Replace uses with load result and token factor. Don't add users // to work list. return CombineTo(N, ReplLoad.getValue(0), Token, false); @@ -4917,7 +4992,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { SDValue Chain = ST->getChain(); SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); - MVT VT = Value.getValueType(); + EVT VT = Value.getValueType(); if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) return SDValue(); @@ -4944,12 +5019,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned ShAmt = Imm.countTrailingZeros(); unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; unsigned NewBW = NextPowerOf2(MSB - ShAmt); - MVT NewVT = MVT::getIntegerVT(NewBW); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); while (NewBW < BitWidth && !(TLI.isOperationLegalOrCustom(Opc, NewVT) && TLI.isNarrowingProfitable(VT, NewVT))) { NewBW = NextPowerOf2(NewBW); - NewVT = MVT::getIntegerVT(NewBW); + NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); } if (NewBW >= BitWidth) return SDValue(); @@ -4971,7 +5046,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); if (NewAlign < - TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT())) + TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext()))) return SDValue(); SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), @@ -5024,9 +5099,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && ST->isUnindexed()) { unsigned OrigAlign = ST->getAlignment(); - MVT SVT = Value.getOperand(0).getValueType(); + EVT SVT = Value.getOperand(0).getValueType(); unsigned Align = TLI.getTargetData()-> - getABITypeAlignment(SVT.getTypeForMVT()); + getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) @@ -5043,8 +5118,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unknown FP type"); + switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP type"); case MVT::f80: // We don't do this for these yet. case MVT::f128: case MVT::ppcf128: @@ -5111,8 +5186,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If there is a better chain. if (Chain != BetterChain) { - // Replace the chain to avoid dependency. SDValue ReplStore; + + // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, ST->getSrcValue(),ST->getSrcValueOffset(), @@ -5128,6 +5204,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, Chain, ReplStore); + // Make sure the new and old chains are cleaned up. + AddToWorkList(Token.getNode()); + // Don't add users to work list. return CombineTo(N, Token, false); } @@ -5211,10 +5290,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // BUILD_VECTOR with undef elements and the inserted element. if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && isa(EltNo)) { - MVT VT = InVec.getValueType(); - MVT EVT = VT.getVectorElementType(); + EVT VT = InVec.getValueType(); + EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - SmallVector Ops(NElts, DAG.getUNDEF(EVT)); + SmallVector Ops(NElts, DAG.getUNDEF(EltVT)); unsigned Elt = cast(EltNo)->getZExtValue(); if (Elt < Ops.size()) @@ -5232,7 +5311,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. - MVT EltVT = InVec.getValueType().getVectorElementType(); + EVT EltVT = InVec.getValueType().getVectorElementType(); SDValue InOp = InVec.getOperand(0); if (InOp.getValueType() != EltVT) return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp); @@ -5252,18 +5331,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { unsigned Elt = cast(EltNo)->getZExtValue(); bool NewLoad = false; bool BCNumEltsChanged = false; - MVT VT = InVec.getValueType(); - MVT EVT = VT.getVectorElementType(); - MVT LVT = EVT; + EVT VT = InVec.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; if (InVec.getOpcode() == ISD::BIT_CONVERT) { - MVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType())) + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) return SDValue(); if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) BCNumEltsChanged = true; InVec = InVec.getOperand(0); - EVT = BCVT.getVectorElementType(); + ExtVT = BCVT.getVectorElementType(); NewLoad = true; } @@ -5272,7 +5351,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && - InVec.getOperand(0).getValueType() == EVT && + InVec.getOperand(0).getValueType() == ExtVT && ISD::isNormalLoad(InVec.getOperand(0).getNode())) { LN0 = cast(InVec.getOperand(0)); } else if ((SVN = dyn_cast(InVec))) { @@ -5306,7 +5385,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check the resultant load doesn't need a higher alignment than the // original load. unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT()); + TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) return SDValue(); @@ -5317,7 +5396,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue NewPtr = LN0->getBasePtr(); if (Elt) { unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; - MVT PtrType = NewPtr.getValueType(); + EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, @@ -5334,8 +5413,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); - MVT VT = N->getValueType(0); - MVT EltType = VT.getVectorElementType(); + EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from @@ -5432,11 +5510,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); assert(N0.getValueType().getVectorNumElements() == NumElts && "Vector shuffle must be normalized in DAG"); @@ -5494,7 +5571,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -5517,14 +5594,14 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { } // Let's see if the target supports this vector_shuffle. - MVT RVT = RHS.getValueType(); + EVT RVT = RHS.getValueType(); if (!TLI.isVectorClearMaskLegal(Indices, RVT)) return SDValue(); // Return the new VECTOR_SHUFFLE node. - MVT EVT = RVT.getVectorElementType(); + EVT EltVT = RVT.getVectorElementType(); SmallVector ZeroOps(RVT.getVectorNumElements(), - DAG.getConstant(0, EVT)); + DAG.getConstant(0, EltVT)); SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), RVT, &ZeroOps[0], ZeroOps.size()); LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); @@ -5543,10 +5620,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // things. Simplifying them may result in a loss of legality. if (LegalOperations) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); - MVT EltType = VT.getVectorElementType(); + EVT EltType = VT.getVectorElementType(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Shuffle = XformToShuffleWithZero(N); @@ -5589,7 +5666,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) { - MVT VT = LHS.getValueType(); + EVT VT = LHS.getValueType(); return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Ops[0], Ops.size()); } @@ -5728,7 +5805,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // (x ? y : y) -> y. if (N2 == N3) return N2; - MVT VT = N2.getValueType(); + EVT VT = N2.getValueType(); ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); ConstantSDNode *N3C = dyn_cast(N3.getNode()); @@ -5820,8 +5897,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, N2.getValueType().isInteger() && (N1C->isNullValue() || // (a < 0) ? b : 0 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 - MVT XType = N0.getValueType(); - MVT AType = N2.getValueType(); + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); if (XType.bitsGE(AType)) { // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a // single-bit constant. @@ -5900,7 +5977,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // FIXME: Turn all of these into setcc if setcc if setcc is legal // otherwise, go ahead with the folds. if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); @@ -5942,7 +6019,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy())); @@ -5957,7 +6034,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { if (ConstantSDNode *SubC = dyn_cast(N3.getOperand(0))) { - MVT XType = N0.getValueType(); + EVT XType = N0.getValueType(); if (SubC->isNullValue() && XType.isInteger()) { SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, @@ -5976,11 +6053,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. -SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0, +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, Level == Unrestricted, false, this); + DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } @@ -6012,11 +6089,12 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -/// FindBaseOffset - Return true if base is known not to alias with anything -/// but itself. Provides base object and offset as results. -static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { +/// FindBaseOffset - Return true if base is a frame index, which is known not +// to alias with anything but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, + GlobalValue *&GV, void *&CV) { // Assume it is a primitive operation. - Base = Ptr; Offset = 0; + Base = Ptr; Offset = 0; GV = 0; CV = 0; // If it's an adding a simple constant then integrate the offset. if (Base.getOpcode() == ISD::ADD) { @@ -6025,36 +6103,73 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) { Offset += C->getZExtValue(); } } + + // Return the underlying GlobalValue, and update the Offset. Return false + // for GlobalAddressSDNode since the same GlobalAddress may be represented + // by multiple nodes with different offsets. + if (GlobalAddressSDNode *G = dyn_cast(Base)) { + GV = G->getGlobal(); + Offset += G->getOffset(); + return false; + } + // Return the underlying Constant value, and update the Offset. Return false + // for ConstantSDNodes since the same constant pool entry may be represented + // by multiple nodes with different offsets. + if (ConstantPoolSDNode *C = dyn_cast(Base)) { + CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() + : (void *)C->getConstVal(); + Offset += C->getOffset(); + return false; + } // If it's any of the following then it can't alias with anything but itself. - return isa(Base) || - isa(Base) || - isa(Base); + return isa(Base); } /// isAlias - Return true if there is any possibility that the two addresses /// overlap. bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, const Value *SrcValue1, int SrcValueOffset1, + unsigned SrcValueAlign1, SDValue Ptr2, int64_t Size2, - const Value *SrcValue2, int SrcValueOffset2) const { + const Value *SrcValue2, int SrcValueOffset2, + unsigned SrcValueAlign2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; - bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1); - bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2); + GlobalValue *GV1, *GV2; + void *CV1, *CV2; + bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); - // If they have a same base address then... - if (Base1 == Base2) - // Check to see if the addresses overlap. + // If they have a same base address then check to see if they overlap. + if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); - // If we know both bases then they can't alias. - if (KnownBase1 && KnownBase2) return false; + // If we know what the bases are, and they aren't identical, then we know they + // cannot alias. + if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + return false; + // If we know required SrcValue1 and SrcValue2 have relatively large alignment + // compared to the size and offset of the access, we may be able to prove they + // do not alias. This check is conservative for now to catch cases created by + // splitting vector types. + if ((SrcValueAlign1 == SrcValueAlign2) && + (SrcValueOffset1 != SrcValueOffset2) && + (Size1 == Size2) && (SrcValueAlign1 > Size1)) { + int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; + int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + + // There is no overlap between these relatively aligned accesses of similar + // size, return no alias. + if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + return false; + } + if (CombinerGlobalAA) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); @@ -6074,20 +6189,24 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, SDValue &Ptr, int64_t &Size, - const Value *&SrcValue, int &SrcValueOffset) const { + const Value *&SrcValue, + int &SrcValueOffset, + unsigned &SrcValueAlign) const { if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); Size = LD->getMemoryVT().getSizeInBits() >> 3; SrcValue = LD->getSrcValue(); SrcValueOffset = LD->getSrcValueOffset(); + SrcValueAlign = LD->getOriginalAlignment(); return true; } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); Size = ST->getMemoryVT().getSizeInBits() >> 3; SrcValue = ST->getSrcValue(); SrcValueOffset = ST->getSrcValueOffset(); + SrcValueAlign = ST->getOriginalAlignment(); } else { - assert(0 && "FindAliasInfo expected a memory operand"); + llvm_unreachable("FindAliasInfo expected a memory operand"); } return false; @@ -6098,28 +6217,45 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVector &Aliases) { SmallVector Chains; // List of chains to visit. - std::set Visited; // Visited node set. + SmallPtrSet Visited; // Visited node set. // Get alias information for node. SDValue Ptr; - int64_t Size = 0; - const Value *SrcValue = 0; - int SrcValueOffset = 0; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset); + int64_t Size; + const Value *SrcValue; + int SrcValueOffset; + unsigned SrcValueAlign; + bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, + SrcValueAlign); // Starting off. Chains.push_back(OriginalChain); - + unsigned Depth = 0; + // Look at each chain and determine if it is an alias. If so, add it to the // aliases list. If not, then continue up the chain looking for the next // candidate. while (!Chains.empty()) { SDValue Chain = Chains.back(); Chains.pop_back(); + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we find two aliases. If we've seen two aliases, assume we'll + // find more and revert to original chain since the xform is unlikely to be + // profitable. + // + // FIXME: The depth check could be made to return the last non-aliasing + // chain we found before we hit a tokenfactor rather than the original + // chain. + if (Depth > 6 || Aliases.size() == 2) { + Aliases.clear(); + Aliases.push_back(OriginalChain); + break; + } - // Don't bother if we've been before. - if (Visited.find(Chain.getNode()) != Visited.end()) continue; - Visited.insert(Chain.getNode()); + // Don't bother if we've been before. + if (!Visited.insert(Chain.getNode())) + continue; switch (Chain.getOpcode()) { case ISD::EntryToken: @@ -6130,35 +6266,40 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::STORE: { // Get alias information for Chain. SDValue OpPtr; - int64_t OpSize = 0; - const Value *OpSrcValue = 0; - int OpSrcValueOffset = 0; + int64_t OpSize; + const Value *OpSrcValue; + int OpSrcValueOffset; + unsigned OpSrcValueAlign; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset); + OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) { + isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, + OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + OpSrcValueAlign)) { Aliases.push_back(Chain); } else { // Look further up the chain. Chains.push_back(Chain.getOperand(0)); - // Clean up old chain. - AddToWorkList(Chain.getNode()); + ++Depth; } break; } case ISD::TokenFactor: - // We have to check each of the operands of the token factor, so we queue - // then up. Adding the operands to the queue (stack) in reverse order - // maintains the original order and increases the likelihood that getNode - // will find a matching token factor (CSE.) + // We have to check each of the operands of the token factor for "small" + // token factors, so we queue them up. Adding the operands to the queue + // (stack) in reverse order maintains the original order and increases the + // likelihood that getNode will find a matching token factor (CSE.) + if (Chain.getNumOperands() > 16) { + Aliases.push_back(Chain); + break; + } for (unsigned n = Chain.getNumOperands(); n;) Chains.push_back(Chain.getOperand(--n)); - // Eliminate the token factor if we can. - AddToWorkList(Chain.getNode()); + ++Depth; break; default: @@ -6184,15 +6325,10 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // If a single operand then chain to it. We don't need to revisit it. return Aliases[0]; } - + // Construct a custom tailored token factor. - SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, - &Aliases[0], Aliases.size()); - - // Make sure the old chain gets cleaned up. - if (NewChain != OldChain) AddToWorkList(OldChain.getNode()); - - return NewChain; + return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + &Aliases[0], Aliases.size()); } // SelectionDAG::Combine - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index cd2d5ac8ec23f..8e955aff98fef 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -57,7 +57,7 @@ using namespace llvm; unsigned FastISel::getRegForValue(Value *V) { - MVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); + EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) return 0; @@ -65,11 +65,11 @@ unsigned FastISel::getRegForValue(Value *V) { // Ignore illegal types. We must do this before looking up the value // in ValueMap because Arguments are given virtual registers regardless // of whether FastISel can handle them. - MVT::SimpleValueType VT = RealVT.getSimpleVT(); + MVT VT = RealVT.getSimpleVT(); if (!TLI.isTypeLegal(VT)) { // Promote MVT::i1 to a legal type though, because it's common and easy. if (VT == MVT::i1) - VT = TLI.getTypeToTransformTo(VT).getSimpleVT(); + VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else return 0; } @@ -92,13 +92,14 @@ unsigned FastISel::getRegForValue(Value *V) { } else if (isa(V)) { // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. - Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType())); + Reg = + getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (ConstantFP *CF = dyn_cast(V)) { Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); if (!Reg) { const APFloat &Flt = CF->getValueAPF(); - MVT IntVT = TLI.getPointerTy(); + EVT IntVT = TLI.getPointerTy(); uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); @@ -108,7 +109,8 @@ unsigned FastISel::getRegForValue(Value *V) { if (isExact) { APInt IntVal(IntBitWidth, 2, x); - unsigned IntegerReg = getRegForValue(ConstantInt::get(IntVal)); + unsigned IntegerReg = + getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); } @@ -174,13 +176,11 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) { // If the index is smaller or larger than intptr_t, truncate or extend it. MVT PtrVT = TLI.getPointerTy(); - MVT IdxVT = MVT::getMVT(Idx->getType(), /*HandleUnknown=*/false); + EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), - ISD::SIGN_EXTEND, IdxN); + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN); else if (IdxVT.bitsGT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT.getSimpleVT(), - ISD::TRUNCATE, IdxN); + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN); return IdxN; } @@ -188,7 +188,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) { /// which has an opcode which directly corresponds to the given ISD opcode. /// bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { - MVT VT = MVT::getMVT(I->getType(), /*HandleUnknown=*/true); + EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; @@ -203,7 +203,7 @@ bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) { if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || ISDOpcode == ISD::XOR)) - VT = TLI.getTypeToTransformTo(VT); + VT = TLI.getTypeToTransformTo(I->getContext(), VT); else return false; } @@ -260,7 +260,7 @@ bool FastISel::SelectGetElementPtr(User *I) { return false; const Type *Ty = I->getOperand(0)->getType(); - MVT::SimpleValueType VT = TLI.getPointerTy().getSimpleVT(); + MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::op_iterator OI = I->op_begin()+1, E = I->op_end(); OI != E; ++OI) { Value *Idx = *OI; @@ -335,7 +335,7 @@ bool FastISel::SelectCall(User *I) { if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW && DW->ShouldEmitDwarfDebug()) { unsigned ID = - DW->RecordRegionStart(cast(RSI->getContext())); + DW->RecordRegionStart(RSI->getContext()); const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); BuildMI(MBB, DL, II).addImm(ID); } @@ -346,7 +346,7 @@ bool FastISel::SelectCall(User *I) { if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW && DW->ShouldEmitDwarfDebug()) { unsigned ID = 0; - DISubprogram Subprogram(cast(REI->getContext())); + DISubprogram Subprogram(REI->getContext()); if (isInlinedFnEnd(*REI, MF.getFunction())) { // This is end of an inlined function. const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); @@ -359,7 +359,7 @@ bool FastISel::SelectCall(User *I) { BuildMI(MBB, DL, II).addImm(ID); } else { const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); - ID = DW->RecordRegionEnd(cast(REI->getContext())); + ID = DW->RecordRegionEnd(REI->getContext()); BuildMI(MBB, DL, II).addImm(ID); } } @@ -384,11 +384,10 @@ bool FastISel::SelectCall(User *I) { setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo())); DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); - DISubprogram SP(cast(FSI->getSubprogram())); - unsigned LabelID = DW->RecordInlinedFnStart(SP, - DICompileUnit(PrevLocTpl.CompileUnit), - PrevLocTpl.Line, - PrevLocTpl.Col); + DISubprogram SP(FSI->getSubprogram()); + unsigned LabelID = + DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope), + PrevLocTpl.Line, PrevLocTpl.Col); const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL); BuildMI(MBB, DL, II).addImm(LabelID); return true; @@ -398,7 +397,7 @@ bool FastISel::SelectCall(User *I) { MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo())); // llvm.dbg.func_start also defines beginning of function scope. - DW->RecordRegionStart(cast(FSI->getSubprogram())); + DW->RecordRegionStart(FSI->getSubprogram()); return true; } case Intrinsic::dbg_declare: { @@ -407,7 +406,6 @@ bool FastISel::SelectCall(User *I) { || !DW->ShouldEmitDwarfDebug()) return true; - Value *Variable = DI->getVariable(); Value *Address = DI->getAddress(); if (BitCastInst *BCI = dyn_cast(Address)) Address = BCI->getOperand(0); @@ -418,20 +416,15 @@ bool FastISel::SelectCall(User *I) { StaticAllocaMap.find(AI); if (SI == StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; - - // Determine the debug globalvariable. - GlobalValue *GV = cast(Variable); - - // Build the DECLARE instruction. - const TargetInstrDesc &II = TII.get(TargetInstrInfo::DECLARE); - MachineInstr *DeclareMI - = BuildMI(MBB, DL, II).addFrameIndex(FI).addGlobalAddress(GV); - DIVariable DV(cast(GV)); - DW->RecordVariableScope(DV, DeclareMI); + if (MMI) + MMI->setVariableDbgInfo(DI->getVariable(), FI); +#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN + DW->RecordVariable(DI->getVariable(), FI); +#endif return true; } case Intrinsic::eh_exception: { - MVT VT = TLI.getValueType(I->getType()); + EVT VT = TLI.getValueType(I->getType()); switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { default: break; case TargetLowering::Expand: { @@ -449,15 +442,11 @@ bool FastISel::SelectCall(User *I) { } break; } - case Intrinsic::eh_selector_i32: - case Intrinsic::eh_selector_i64: { - MVT VT = TLI.getValueType(I->getType()); + case Intrinsic::eh_selector: { + EVT VT = TLI.getValueType(I->getType()); switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { default: break; case TargetLowering::Expand: { - MVT VT = (IID == Intrinsic::eh_selector_i32 ? - MVT::i32 : MVT::i64); - if (MMI) { if (MBB->isLandingPad()) AddCatchInfo(*cast(I), MMI, MBB); @@ -471,12 +460,25 @@ bool FastISel::SelectCall(User *I) { } unsigned Reg = TLI.getExceptionSelectorRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Reg, RC, RC); + bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, + RC, RC); assert(InsertedCopy && "Can't copy address registers!"); InsertedCopy = InsertedCopy; + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + UpdateValueMap(I, ResultReg); } else { unsigned ResultReg = @@ -493,8 +495,8 @@ bool FastISel::SelectCall(User *I) { } bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple()) @@ -524,14 +526,14 @@ bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) { // If the operand is i1, arrange for the high bits in the register to be zero. if (SrcVT == MVT::i1) { - SrcVT = TLI.getTypeToTransformTo(SrcVT); + SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg); if (!InputReg) return false; } // If the result is i1, truncate to the target's type for i1 first. if (DstVT == MVT::i1) - DstVT = TLI.getTypeToTransformTo(DstVT); + DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT); unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), @@ -555,8 +557,8 @@ bool FastISel::SelectBitCast(User *I) { } // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators. - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || !DstVT.isSimple() || @@ -616,6 +618,49 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { MBB->addSuccessor(MSucc); } +/// SelectFNeg - Emit an FNeg operation. +/// +bool +FastISel::SelectFNeg(User *I) { + unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); + if (OpReg == 0) return false; + + // If the target has ISD::FNEG, use it. + EVT VT = TLI.getValueType(I->getType()); + unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), + ISD::FNEG, OpReg); + if (ResultReg != 0) { + UpdateValueMap(I, ResultReg); + return true; + } + + // Bitcast the value to integer, twiddle the sign bit with xor, + // and then bitcast it back to floating-point. + if (VT.getSizeInBits() > 64) return false; + EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); + if (!TLI.isTypeLegal(IntVT)) + return false; + + unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::BIT_CONVERT, OpReg); + if (IntReg == 0) + return false; + + unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg, + UINT64_C(1) << (VT.getSizeInBits()-1), + IntVT.getSimpleVT()); + if (IntResultReg == 0) + return false; + + ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), + ISD::BIT_CONVERT, IntResultReg); + if (ResultReg == 0) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + bool FastISel::SelectOperator(User *I, unsigned Opcode) { switch (Opcode) { @@ -626,6 +671,9 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { case Instruction::Sub: return SelectBinaryOp(I, ISD::SUB); case Instruction::FSub: + // FNeg is currently represented in LLVM IR as a special case of FSub. + if (BinaryOperator::isFNeg(I)) + return SelectFNeg(I); return SelectBinaryOp(I, ISD::FSUB); case Instruction::Mul: return SelectBinaryOp(I, ISD::MUL); @@ -709,8 +757,8 @@ FastISel::SelectOperator(User *I, unsigned Opcode) { case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); if (DstVT.bitsGT(SrcVT)) return SelectCast(I, ISD::ZERO_EXTEND); if (DstVT.bitsLT(SrcVT)) @@ -758,45 +806,44 @@ FastISel::FastISel(MachineFunction &mf, FastISel::~FastISel() {} -unsigned FastISel::FastEmit_(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_(MVT, MVT, ISD::NodeType) { return 0; } -unsigned FastISel::FastEmit_r(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_r(MVT, MVT, ISD::NodeType, unsigned /*Op0*/) { return 0; } -unsigned FastISel::FastEmit_rr(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_rr(MVT, MVT, ISD::NodeType, unsigned /*Op0*/, unsigned /*Op0*/) { return 0; } -unsigned FastISel::FastEmit_i(MVT::SimpleValueType, MVT::SimpleValueType, - ISD::NodeType, uint64_t /*Imm*/) { +unsigned FastISel::FastEmit_i(MVT, MVT, ISD::NodeType, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_f(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_f(MVT, MVT, ISD::NodeType, ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_ri(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_ri(MVT, MVT, ISD::NodeType, unsigned /*Op0*/, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_rf(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_rf(MVT, MVT, ISD::NodeType, unsigned /*Op0*/, ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType, +unsigned FastISel::FastEmit_rri(MVT, MVT, ISD::NodeType, unsigned /*Op0*/, unsigned /*Op1*/, uint64_t /*Imm*/) { @@ -807,9 +854,9 @@ unsigned FastISel::FastEmit_rri(MVT::SimpleValueType, MVT::SimpleValueType, /// to emit an instruction with an immediate operand using FastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. -unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode, +unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode, unsigned Op0, uint64_t Imm, - MVT::SimpleValueType ImmType) { + MVT ImmType) { // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm); if (ResultReg != 0) @@ -824,9 +871,9 @@ unsigned FastISel::FastEmit_ri_(MVT::SimpleValueType VT, ISD::NodeType Opcode, /// to emit an instruction with a floating-point immediate operand using /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode, +unsigned FastISel::FastEmit_rf_(MVT VT, ISD::NodeType Opcode, unsigned Op0, ConstantFP *FPImm, - MVT::SimpleValueType ImmType) { + MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); if (ResultReg != 0) @@ -842,7 +889,7 @@ unsigned FastISel::FastEmit_rf_(MVT::SimpleValueType VT, ISD::NodeType Opcode, // be replaced by code that creates a load from a constant-pool entry, // which will require some target-specific work. const APFloat &Flt = FPImm->getValueAPF(); - MVT IntVT = TLI.getPointerTy(); + EVT IntVT = TLI.getPointerTy(); uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); @@ -987,7 +1034,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT, +unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, uint32_t Idx) { const TargetRegisterClass* RC = MRI.getRegClass(Op0); @@ -1008,6 +1055,6 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT::SimpleValueType RetVT, /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op /// with all but the least significant bit set to zero. -unsigned FastISel::FastEmitZExtFromI1(MVT::SimpleValueType VT, unsigned Op) { +unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) { return FastEmit_ri(VT, VT, ISD::AND, Op, 1); } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp new file mode 100644 index 0000000000000..d3ffb2a22d932 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -0,0 +1,693 @@ +//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "instr-emitter" +#include "InstrEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional flag operands (which do +/// not go into the resulting MachineInstr). +unsigned InstrEmitter::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Flag) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// CountOperands - The inputs to target nodes have any actual inputs first, +/// followed by an optional chain operand, then an optional flag operand. +/// Compute the number of actual operands that will go into the resulting +/// MachineInstr. +unsigned InstrEmitter::CountOperands(SDNode *Node) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + return N; +} + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void InstrEmitter:: +EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, + unsigned SrcReg, DenseMap &VRBaseMap) { + unsigned VRBase = 0; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = NULL; + if (!IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + EVT VT = Node->getValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Flag) + continue; + Match = false; + if (User->isMachineOpcode()) { + const TargetInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = 0; + if (i+II.getNumDefs() < II.getNumOperands()) + RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI); + if (!UseRC) + UseRC = RC; + else if (RC) { + const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC); + // If multiple uses expect disjoint register classes, we emit + // copies in AddRegisterOperand. + if (ComRC) + UseRC = ComRC; + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + EVT VT = Node->getValueType(ResNo); + const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI->getRegClass(VRBase); + } else if (UseRC) { + assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI->createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg, + DstRC, SrcRC); + + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// getDstOfCopyToRegUse - If the only use of the specified result number of +/// node is a CopyToReg, return its destination register. Return 0 otherwise. +unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const { + if (!Node->hasOneUse()) + return 0; + + SDNode *User = *Node->use_begin(); + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned Reg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return Reg; + } + return 0; +} + +void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetInstrInfo::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI); + if (II.OpInfo[i].isOptionalDef()) { + // Optional def must be a physical register. + unsigned NumResults = CountResults(Node); + VRBase = cast(Node->getOperand(i-NumResults))->getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + if (!VRBase && !IsClone && !IsCloned) + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI->createVirtualRegister(RC); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + } + + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned InstrEmitter::getVR(SDValue Op, + DenseMap &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetInstrInfo::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); + // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc + // does not include operand register class info. + if (!VReg) { + const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); + VReg = MRI->createVirtualRegister(RC); + } + BuildMI(MBB, Op.getDebugLoc(), + TII->get(TargetInstrInfo::IMPLICIT_DEF), VReg); + return VReg; + } + + DenseMap::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap &VRBaseMap) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + + const TargetInstrDesc &TID = MI->getDesc(); + bool isOptDef = IIOpNum < TID.getNumOperands() && + TID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it. + if (II) { + const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + const TargetRegisterClass *DstRC = 0; + if (IIOpNum < II->getNumOperands()) + DstRC = II->OpInfo[IIOpNum].getRegClass(TRI); + assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) && + "Don't have operand info for this instruction!"); + if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + VReg = NewVReg; + } + } + + MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. IIOpNum and II are used for +/// assertions only. +void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap &VRBaseMap) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } else if (ConstantSDNode *C = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); + } else if (ConstantFPSDNode *F = dyn_cast(Op)) { + const ConstantFP *CFP = F->getConstantFPValue(); + MI->addOperand(MachineOperand::CreateFPImm(CFP)); + } else if (RegisterSDNode *R = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateReg(R->getReg(), false)); + } else if (GlobalAddressSDNode *TGA = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), + TGA->getTargetFlags())); + } else if (BasicBlockSDNode *BBNode = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock())); + } else if (FrameIndexSDNode *FI = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateFI(FI->getIndex())); + } else if (JumpTableSDNode *JT = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(), + JT->getTargetFlags())); + } else if (ConstantPoolSDNode *CP = dyn_cast(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + const Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = TM->getTargetData()->getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = TM->getTargetData()->getTypeAllocSize(Type); + } + } + + unsigned Idx; + MachineConstantPool *MCP = MF->getConstantPool(); + if (CP->isMachineConstantPoolEntry()) + Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); + MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, + CP->getTargetFlags())); + } else if (ExternalSymbolSDNode *ES = dyn_cast(Op)) { + MI->addOperand(MachineOperand::CreateES(ES->getSymbol(), + ES->getTargetFlags())); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Flag && + "Chain and flag operands should occur at end of operand list!"); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap); + } +} + +/// getSuperRegisterRegClass - Returns the register class of a superreg A whose +/// "SubIdx"'th sub-register class is the specified register class and whose +/// type matches the specified type. +static const TargetRegisterClass* +getSuperRegisterRegClass(const TargetRegisterClass *TRC, + unsigned SubIdx, EVT VT) { + // Pick the register class of the superegister for this type + for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(), + E = TRC->superregclasses_end(); I != E; ++I) + if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC) + return *I; + assert(false && "Couldn't find the register class"); + return 0; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void InstrEmitter::EmitSubregNode(SDNode *Node, + DenseMap &VRBaseMap){ + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetInstrInfo::EXTRACT_SUBREG) { + unsigned SubIdx = cast(Node->getOperand(1))->getZExtValue(); + + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::EXTRACT_SUBREG)); + + // Figure out the register class to create for the destreg. + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(VReg); + const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx); + assert(SRC && "Invalid subregister index in EXTRACT_SUBREG"); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI->createVirtualRegister(SRC); + } + + // Add def, source, and subreg index + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + MBB->insert(InsertPos, MI); + } else if (Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubReg = getVR(N1, VRBaseMap); + unsigned SubIdx = cast(N2)->getZExtValue(); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, + Node->getValueType(0)); + + // Figure out the register class to create for the destreg. + // Note that if we're going to directly use an existing register, + // it must be precisely the required class, and not a subclass + // thereof. + if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) { + // Create the reg + assert(SRC && "Couldn't find source register class"); + VRBase = MRI->createVirtualRegister(SRC); + } + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); + MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetInstrInfo::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast(N0); + MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); + } else + AddOperand(MI, N0, 0, 0, VRBaseMap); + // Add the subregster being inserted + AddOperand(MI, N1, 0, 0, VRBaseMap); + MI->addOperand(MachineOperand::CreateImm(SubIdx)); + MBB->insert(InsertPos, MI); + } else + llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, + DenseMap &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + + unsigned DstRCIdx = cast(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); + + // Create the new VReg in the destination class and emit a copy. + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, + DstRC, SrcRC); + assert(Emitted && + "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); + (void) Emitted; + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + isNew = isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitNode - Generate machine code for an node and needed dependencies. +/// +void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap &VRBaseMap, + DenseMap *EM) { + // If machine instruction + if (Node->isMachineOpcode()) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetInstrInfo::EXTRACT_SUBREG || + Opc == TargetInstrInfo::INSERT_SUBREG || + Opc == TargetInstrInfo::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetInstrInfo::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + if (Opc == TargetInstrInfo::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const TargetInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NodeOperands = CountOperands(Node); + bool HasPhysRegOuts = (NumResults > II.getNumDefs()) && + II.getImplicitDefs() != 0; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + assert((II.getNumOperands() == NumMIOperands || + HasPhysRegOuts || II.isVariadic()) && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + bool HasOptPRefs = II.getNumDefs() > NumResults; + assert((!HasOptPRefs || !HasPhysRegOuts) && + "Unable to cope with optional defs and phys regs defs!"); + unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + for (unsigned i = NumSkip; i != NodeOperands; ++i) + AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + VRBaseMap); + + // Transfer all of the memory reference descriptions of this instruction. + MI->setMemRefs(cast(Node)->memoperands_begin(), + cast(Node)->memoperands_end()); + + if (II.usesCustomDAGSchedInsertionHook()) { + // Insert this instruction into the basic block using a target + // specific inserter which may returns a new basic block. + MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM); + InsertPos = MBB->end(); + } else { + MBB->insert(InsertPos, MI); + } + + // Additional results must be an physical register def. + if (HasPhysRegOuts) { + for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + if (Node->hasAnyUseOfValue(i)) + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + } + } + return; + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(); +#endif + llvm_unreachable("This target-independent node should have been selected!"); + break; + case ISD::EntryToken: + llvm_unreachable("EntryToken should have been excluded from the schedule!"); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned SrcReg; + SDValue SrcVal = Node->getOperand(2); + if (RegisterSDNode *R = dyn_cast(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + unsigned DestReg = cast(Node->getOperand(1))->getReg(); + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; + // Get the register classes of the src/dst. + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + SrcTRC = MRI->getRegClass(SrcReg); + else + SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); + + if (TargetRegisterInfo::isVirtualRegister(DestReg)) + DstTRC = MRI->getRegClass(DestReg); + else + DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, + Node->getOperand(1).getValueType()); + + bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg, + DstTRC, SrcTRC); + assert(Emitted && "Unable to issue a copy instruction!\n"); + (void) Emitted; + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag) + --NumOps; // Ignore the flag operand. + + // Create the inline asm machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetInstrInfo::INLINEASM)); + + // Add the asm string as an external symbol operand. + const char *AsmStr = + cast(Node->getOperand(1))->getSymbol(); + MI->addOperand(MachineOperand::CreateES(AsmStr)); + + // Add all of the operand registers to the instruction. + for (unsigned i = 2; i != NumOps;) { + unsigned Flags = + cast(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + MI->addOperand(MachineOperand::CreateImm(Flags)); + ++i; // Skip the ID value. + + switch (Flags & 7) { + default: llvm_unreachable("Bad flags!"); + case 2: // Def of register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true)); + } + break; + case 6: // Def of earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast(Node->getOperand(i))->getReg(); + MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, + false, false, true)); + } + break; + case 1: // Use of register. + case 3: // Immediate. + case 4: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (; NumVals; --NumVals, ++i) + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + break; + } + } + MBB->insert(InsertPos, MI); + break; + } + } +} + +/// InstrEmitter - Construct an InstrEmitter and set it to start inserting +/// at the given position in the given block. +InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, + MachineBasicBlock::iterator insertpos) + : MF(mbb->getParent()), + MRI(&MF->getRegInfo()), + TM(&MF->getTarget()), + TII(TM->getInstrInfo()), + TRI(TM->getRegisterInfo()), + TLI(TM->getTargetLowering()), + MBB(mbb), InsertPos(insertpos) { +} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h new file mode 100644 index 0000000000000..bb4634d04b2a1 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -0,0 +1,119 @@ +//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTREMITTER_H +#define INSTREMITTER_H + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +class TargetInstrDesc; + +class InstrEmitter { + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertPos; + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap &VRBaseMap); + + /// getDstOfCopyToRegUse - If the only use of the specified result number of + /// node is a CopyToReg, return its destination register. Return 0 otherwise. + unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const; + + void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + const TargetInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, + DenseMap &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap &VRBaseMap); + + /// AddOperand - Add the specified operand to the specified machine instr. II + /// specifies the instruction information for the node, and IIOpNum is the + /// operand number (in the II) that we are adding. IIOpNum and II are used for + /// assertions only. + void AddOperand(MachineInstr *MI, SDValue Op, + unsigned IIOpNum, + const TargetInstrDesc *II, + DenseMap &VRBaseMap); + + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. + /// COPY_TO_REGCLASS is just a normal copy, except that the destination + /// register is constrained to be in a particular register class. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap &VRBaseMap); + +public: + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// CountOperands - The inputs to target nodes have any actual inputs first, + /// followed by an optional chain operand, then flag operands. Compute + /// the number of actual operands that will go into the resulting + /// MachineInstr. + static unsigned CountOperands(SDNode *Node); + + /// EmitNode - Generate machine code for an node and needed dependencies. + /// + void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap &VRBaseMap, + DenseMap *EM); + + /// getBlock - Return the current basic block. + MachineBasicBlock *getBlock() { return MBB; } + + /// getInsertPos - Return the current insertion position. + MachineBasicBlock::iterator getInsertPos() { return InsertPos; } + + /// InstrEmitter - Construct an InstrEmitter and set it to start inserting + /// at the given position in the given block. + InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); +}; + +} + +#endif diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 1413d9552d0e1..fc01b07f65a2c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -30,9 +30,12 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -98,13 +101,14 @@ public: /// getTypeAction - Return how we should legalize values of this type, either /// it is already legal or we need to expand it into multiple registers of /// smaller integer type, or we need to promote it to a larger type. - LegalizeAction getTypeAction(MVT VT) const { - return (LegalizeAction)ValueTypeActions.getTypeAction(VT); + LegalizeAction getTypeAction(EVT VT) const { + return + (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT); } /// isTypeLegal - Return true if this type is legal on this target. /// - bool isTypeLegal(MVT VT) const { + bool isTypeLegal(EVT VT) const { return getTypeAction(VT) == Legal; } @@ -131,14 +135,14 @@ private: /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> - SDValue ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, SmallVectorImpl &Mask) const; bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, SmallPtrSet &NodesLeadingTo); - void LegalizeSetCCCondCode(MVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -149,18 +153,18 @@ private: RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); - SDValue EmitStackConvert(SDValue SrcOp, MVT SlotVT, MVT DestVT, DebugLoc dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); SDValue ExpandDBG_STOPPOINT(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); SDValue ExpandFCOPYSIGN(SDNode *Node); - SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, MVT DestVT, + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, DebugLoc dl); - SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, MVT DestVT, bool isSigned, + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, DebugLoc dl); - SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, MVT DestVT, bool isSigned, + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, DebugLoc dl); SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); @@ -179,10 +183,10 @@ private: /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(MVT NVT, MVT VT, DebugLoc dl, +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, SmallVectorImpl &Mask) const { - MVT EltVT = NVT.getVectorElementType(); + EVT EltVT = NVT.getVectorElementType(); unsigned NumMaskElts = VT.getVectorNumElements(); unsigned NumDestElts = NVT.getVectorNumElements(); unsigned NumEltsGrowth = NumDestElts / NumMaskElts; @@ -342,7 +346,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, // double. This shrinks FP constants and canonicalizes them for targets where // an FP extending load is the same cost as a normal load (such as on the x87 // fp stack or PPC FP unit). - MVT VT = CFP->getValueType(0); + EVT VT = CFP->getValueType(0); ConstantFP *LLVMC = const_cast(CFP->getConstantFPValue()); if (!UseCP) { assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); @@ -350,16 +354,16 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, (VT == MVT::f64) ? MVT::i64 : MVT::i32); } - MVT OrigVT = VT; - MVT SVT = VT; + EVT OrigVT = VT; + EVT SVT = VT; while (SVT != MVT::f32) { - SVT = (MVT::SimpleValueType)(SVT.getSimpleVT() - 1); + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { - const Type *SType = SVT.getTypeForMVT(); + const Type *SType = SVT.getTypeForEVT(*DAG.getContext()); LLVMC = cast(ConstantExpr::getFPTrunc(LLVMC, SType)); VT = SVT; Extend = true; @@ -384,13 +388,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); - MVT VT = Val.getValueType(); + EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); int SVOffset = ST->getSrcValueOffset(); DebugLoc dl = ST->getDebugLoc(); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { - MVT intVT = MVT::getIntegerVT(VT.getSizeInBits()); + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); if (TLI.isTypeLegal(intVT)) { // Expand to a bitconvert of the value to the integer type of the // same size, then a (misaligned) int store. @@ -401,9 +405,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, } else { // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. - MVT StoredVT = ST->getMemoryVT(); - MVT RegVT = - TLI.getRegisterType(MVT::getIntegerVT(StoredVT.getSizeInBits())); + EVT StoredVT = ST->getMemoryVT(); + EVT RegVT = + TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); unsigned StoredBytes = StoredVT.getSizeInBits() / 8; unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; @@ -437,7 +441,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // The last store may be partial. Do a truncating store. On big-endian // machines this requires an extending load from the stack slot to ensure // that the bits are in the right place. - MVT MemVT = MVT::getIntegerVT(8 * (StoredBytes - Offset)); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset)); // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, @@ -456,8 +460,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, !ST->getMemoryVT().isVector() && "Unaligned store of unknown type."); // Get the half-size VT - MVT NewStoredVT = - (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT() - 1); + EVT NewStoredVT = + (MVT::SimpleValueType)(ST->getMemoryVT().getSimpleVT().SimpleTy - 1); int NumBits = NewStoredVT.getSizeInBits(); int IncrementSize = NumBits / 8; @@ -488,11 +492,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, int SVOffset = LD->getSrcValueOffset(); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - MVT VT = LD->getValueType(0); - MVT LoadedVT = LD->getMemoryVT(); + EVT VT = LD->getValueType(0); + EVT LoadedVT = LD->getMemoryVT(); DebugLoc dl = LD->getDebugLoc(); if (VT.isFloatingPoint() || VT.isVector()) { - MVT intVT = MVT::getIntegerVT(LoadedVT.getSizeInBits()); + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (TLI.isTypeLegal(intVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. @@ -508,7 +512,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } else { // Copy the value to a (aligned) stack slot using (unaligned) integer // loads and stores, then do a (aligned) load from the stack slot. - MVT RegVT = TLI.getRegisterType(intVT); + EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; @@ -538,7 +542,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // The last copy may be partial. Do an extending load. - MVT MemVT = MVT::getIntegerVT(8 * (LoadedBytes - Offset)); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset)); SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), @@ -568,8 +572,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Compute the new VT that is half the size of the old one. This is an // integer MVT. unsigned NumBits = LoadedVT.getSizeInBits(); - MVT NewLoadedVT; - NewLoadedVT = MVT::getIntegerVT(NumBits/2); + EVT NewLoadedVT; + NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); NumBits >>= 1; unsigned Alignment = LD->getAlignment(); @@ -629,10 +633,10 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, // with a "move to register" or "extload into register" instruction, then // permute it into place, if the idx is a constant and if the idx is // supported by the target. - MVT VT = Tmp1.getValueType(); - MVT EltVT = VT.getVectorElementType(); - MVT IdxVT = Tmp3.getValueType(); - MVT PtrVT = TLI.getPointerTy(); + EVT VT = Tmp1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = Tmp3.getValueType(); + EVT PtrVT = TLI.getPointerTy(); SDValue StackPtr = DAG.CreateStackTemporary(VT); int SPFI = cast(StackPtr.getNode())->getIndex(); @@ -663,7 +667,7 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for // integers in which case the inserted value can be over width. - MVT EltVT = Vec.getValueType().getVectorElementType(); + EVT EltVT = Vec.getValueType().getVectorElementType(); if (Val.getValueType() == EltVT || (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, @@ -785,7 +789,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: { - MVT InnerType = cast(Node->getOperand(1))->getVT(); + EVT InnerType = cast(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); break; } @@ -795,7 +799,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : Node->getOpcode() == ISD::SETCC ? 2 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; - MVT OpVT = Node->getOperand(CompareOperand).getValueType(); + EVT OpVT = Node->getOperand(CompareOperand).getValueType(); ISD::CondCode CCCode = cast(Node->getOperand(CCOperand))->get(); Action = TLI.getCondCodeAction(CCCode, OpVT); @@ -821,11 +825,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // special case should be done as part of making LegalizeDAG non-recursive. SimpleFinishLegalizing = false; break; - case ISD::CALL: - // FIXME: Legalization for calls requires custom-lowering the call before - // legalizing the operands! (I haven't looked into precisely why.) - SimpleFinishLegalizing = false; - break; case ISD::EXTRACT_ELEMENT: case ISD::FLT_ROUNDS_: case ISD::SADDO: @@ -847,7 +846,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::TRAMPOLINE: case ISD::FRAMEADDR: case ISD::RETURNADDR: - case ISD::FORMAL_ARGUMENTS: // These operations lie about being legal: when they claim to be legal, // they should actually be custom-lowered. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -885,7 +883,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::BR_JT: case ISD::BR_CC: case ISD::BRCOND: - case ISD::RET: // Branches tweak the chain to include LastCALLSEQ_END Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], LastCALLSEQ_END); @@ -902,6 +899,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (!Ops[1].getValueType().isVector()) Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); break; + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: + case ISD::SHL_PARTS: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Ops[2].getValueType().isVector()) + Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2])); + break; } Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), @@ -946,44 +951,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { switch (Node->getOpcode()) { default: #ifndef NDEBUG - cerr << "NODE: "; Node->dump(&DAG); cerr << "\n"; + errs() << "NODE: "; + Node->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to legalize this operator!"); - abort(); - case ISD::CALL: - // The only option for this is to custom lower it. - Tmp3 = TLI.LowerOperation(Result.getValue(0), DAG); - assert(Tmp3.getNode() && "Target didn't custom lower this node!"); - // A call within a calling sequence must be legalized to something - // other than the normal CALLSEQ_END. Violating this gets Legalize - // into an infinite loop. - assert ((!IsLegalizingCall || - Node->getOpcode() != ISD::CALL || - Tmp3.getNode()->getOpcode() != ISD::CALLSEQ_END) && - "Nested CALLSEQ_START..CALLSEQ_END not supported."); - - // The number of incoming and outgoing values should match; unless the final - // outgoing value is a flag. - assert((Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() || - (Tmp3.getNode()->getNumValues() == Result.getNode()->getNumValues() + 1 && - Tmp3.getNode()->getValueType(Tmp3.getNode()->getNumValues() - 1) == - MVT::Flag)) && - "Lowering call/formal_arguments produced unexpected # results!"); - - // Since CALL/FORMAL_ARGUMENTS nodes produce multiple values, make sure to - // remember that we legalized all of them, so it doesn't get relegalized. - for (unsigned i = 0, e = Tmp3.getNode()->getNumValues(); i != e; ++i) { - if (Tmp3.getNode()->getValueType(i) == MVT::Flag) - continue; - Tmp1 = LegalizeOp(Tmp3.getValue(i)); - if (Op.getResNo() == i) - Tmp2 = Tmp1; - AddLegalizedOperand(SDValue(Node, i), Tmp1); - } - return Tmp2; + llvm_unreachable("Do not know how to legalize this operator!"); + case ISD::BUILD_VECTOR: switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: Tmp3 = TLI.LowerOperation(Result, DAG); if (Tmp3.getNode()) { @@ -1094,22 +1070,22 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); Tmp3 = Result.getValue(0); Tmp4 = Result.getValue(1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses()) { - unsigned ABIAlignment = TLI.getTargetData()-> - getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), DAG, - TLI); + Result = ExpandUnalignedLoad(cast(Result.getNode()), + DAG, TLI); Tmp3 = Result.getOperand(0); Tmp4 = Result.getOperand(1); Tmp3 = LegalizeOp(Tmp3); @@ -1128,7 +1104,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Only promote a load of vector type to another. assert(VT.isVector() && "Cannot promote this load!"); // Change base type to a different vector type. - MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), @@ -1144,7 +1120,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { AddLegalizedOperand(SDValue(Node, 1), Tmp4); return Op.getResNo() ? Tmp4 : Tmp3; } else { - MVT SrcVT = LD->getMemoryVT(); + EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); int SVOffset = LD->getSrcValueOffset(); unsigned Alignment = LD->getAlignment(); @@ -1163,7 +1139,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. unsigned NewWidth = SrcVT.getStoreSizeInBits(); - MVT NVT = MVT::getIntegerVT(NewWidth); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); SDValue Ch; // The extra bits are guaranteed to be zero, since we stored them that @@ -1201,8 +1177,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Load size not an integral number of bytes!"); - MVT RoundVT = MVT::getIntegerVT(RoundWidth); - MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi, Ch; unsigned IncrementSize; @@ -1269,7 +1245,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Ch); } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; // FALLTHROUGH @@ -1287,12 +1263,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } else { // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses()) { - unsigned ABIAlignment = TLI.getTargetData()-> - getABITypeAlignment(LD->getMemoryVT().getTypeForMVT()); + if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), DAG, - TLI); + Result = ExpandUnalignedLoad(cast(Result.getNode()), + DAG, TLI); Tmp1 = Result.getOperand(0); Tmp2 = Result.getOperand(1); Tmp1 = LegalizeOp(Tmp1); @@ -1303,10 +1279,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; case TargetLowering::Expand: // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND - if (SrcVT == MVT::f32 && Node->getValueType(0) == MVT::f64) { + // f128 = EXTLOAD {f32,f64} too + if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 || + Node->getValueType(0) == MVT::f128)) || + (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(), - LD->getSrcValueOffset(), - LD->isVolatile(), LD->getAlignment()); + LD->getSrcValueOffset(), + LD->isVolatile(), LD->getAlignment()); Result = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Load); Tmp1 = LegalizeOp(Result); // Relegalize new nodes. @@ -1359,18 +1338,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, ST->getOffset()); - MVT VT = Tmp3.getValueType(); + EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses()) { - unsigned ABIAlignment = TLI.getTargetData()-> - getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), DAG, - TLI); + Result = ExpandUnalignedStore(cast(Result.getNode()), + DAG, TLI); } break; case TargetLowering::Custom: @@ -1391,14 +1370,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } else { Tmp3 = LegalizeOp(ST->getValue()); - MVT StVT = ST->getMemoryVT(); + EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); if (StWidth != StVT.getStoreSizeInBits()) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - MVT NVT = MVT::getIntegerVT(StVT.getStoreSizeInBits()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), SVOffset, NVT, isVolatile, Alignment); @@ -1412,8 +1391,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { assert(ExtraWidth < RoundWidth); assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && "Store size not an integral number of bytes!"); - MVT RoundVT = MVT::getIntegerVT(RoundWidth); - MVT ExtraVT = MVT::getIntegerVT(ExtraWidth); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); SDValue Lo, Hi; unsigned IncrementSize; @@ -1460,16 +1439,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ST->getOffset()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: assert(0 && "This action is not supported yet!"); + default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses()) { - unsigned ABIAlignment = TLI.getTargetData()-> - getABITypeAlignment(ST->getMemoryVT().getTypeForMVT()); + if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), DAG, - TLI); + Result = ExpandUnalignedStore(cast(Result.getNode()), + DAG, TLI); } break; case TargetLowering::Custom: @@ -1522,7 +1501,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); - return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); + if (Op.getValueType().isVector()) + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); + else + return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + NULL, 0, Vec.getValueType().getVectorElementType()); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1530,8 +1513,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // aligned object on the stack, store each element into it, then load // the result as a vector. // Create the stack frame object. - MVT VT = Node->getValueType(0); - MVT OpVT = Node->getOperand(0).getValueType(); + EVT VT = Node->getValueType(0); + EVT OpVT = Node->getOperand(0).getValueType(); DebugLoc dl = Node->getDebugLoc(); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast(FIPtr.getNode())->getIndex(); @@ -1574,7 +1557,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { "Ugly special-cased code!"); // Get the sign bit of the RHS. SDValue SignBit; - MVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32; + EVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32; if (isTypeLegal(IVT)) { SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2); } else { @@ -1613,9 +1596,8 @@ SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) { bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other); const DbgStopPointSDNode *DSP = cast(Node); - GlobalVariable *CU_GV = cast(DSP->getCompileUnit()); - if (DW && (useDEBUG_LOC || useLABEL) && !CU_GV->isDeclaration()) { - DICompileUnit CU(cast(DSP->getCompileUnit())); + MDNode *CU_Node = DSP->getCompileUnit(); + if (DW && (useDEBUG_LOC || useLABEL)) { unsigned Line = DSP->getLine(); unsigned Col = DSP->getColumn(); @@ -1627,9 +1609,9 @@ SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) { return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0), DAG.getConstant(Line, MVT::i32), DAG.getConstant(Col, MVT::i32), - DAG.getSrcValue(CU.getGV())); + DAG.getSrcValue(CU_Node)); } else { - unsigned ID = DW->RecordSourceLine(Line, Col, CU); + unsigned ID = DW->RecordSourceLine(Line, Col, CU_Node); return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID); } } @@ -1643,7 +1625,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); DebugLoc dl = Node->getDebugLoc(); - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDValue Tmp1 = SDValue(Node, 0); SDValue Tmp2 = SDValue(Node, 1); SDValue Tmp3 = Node->getOperand(2); @@ -1676,14 +1658,14 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// condition code CC on the current target. This routine assumes LHS and rHS /// have already been legalized by LegalizeSetCCOperands. It expands SETCC with /// illegal condition code into AND / OR of multiple SETCC values. -void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, +void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl) { - MVT OpVT = LHS.getValueType(); + EVT OpVT = LHS.getValueType(); ISD::CondCode CCCode = cast(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { - default: assert(0 && "Unknown condition code action!"); + default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; @@ -1691,7 +1673,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { - default: assert(0 && "Don't know how to expand this condition!"); abort(); + default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO; Opc = ISD::AND; break; case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO; Opc = ISD::AND; break; @@ -1722,13 +1704,13 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(MVT VT, /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, - MVT SlotVT, - MVT DestVT, + EVT SlotVT, + EVT DestVT, DebugLoc dl) { // Create the stack frame object. unsigned SrcAlign = TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType(). - getTypeForMVT()); + getTypeForEVT(*DAG.getContext())); SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); FrameIndexSDNode *StackPtrFI = cast(FIPtr); @@ -1739,7 +1721,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, unsigned SlotSize = SlotVT.getSizeInBits(); unsigned DestSize = DestVT.getSizeInBits(); unsigned DestAlign = - TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForMVT()); + TLI.getTargetData()->getPrefTypeAlignment(DestVT.getTypeForEVT(*DAG.getContext())); // Emit a store to the stack slot. Use a truncstore if the input value is // later than DestVT. @@ -1787,9 +1769,9 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); SDValue Value1, Value2; DebugLoc dl = Node->getDebugLoc(); - MVT VT = Node->getValueType(0); - MVT OpVT = Node->getOperand(0).getValueType(); - MVT EltVT = VT.getVectorElementType(); + EVT VT = Node->getValueType(0); + EVT OpVT = Node->getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); // If the only non-undef value is the low element, turn this into a // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. @@ -1833,7 +1815,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { CV.push_back(const_cast(V->getConstantIntValue())); } else { assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); - const Type *OpNTy = OpVT.getTypeForMVT(); + const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext()); CV.push_back(UndefValue::get(OpNTy)); } } @@ -1886,8 +1868,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { - MVT ArgVT = Node->getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForMVT(); + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; @@ -1897,10 +1879,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TLI.getPointerTy()); // Splice the libcall in wherever FindInputOutputChains tells us to. - const Type *RetTy = Node->getValueType(0).getTypeForMVT(); + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - 0, CallingConv::C, false, Callee, Args, DAG, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance @@ -1916,8 +1900,8 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unexpected request for libcall!"); + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; @@ -1932,8 +1916,8 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unexpected request for libcall!"); + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; case MVT::i64: LC = Call_I64; break; @@ -1948,7 +1932,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, /// legal for the target. SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, - MVT DestVT, + EVT DestVT, DebugLoc dl) { if (Op0.getValueType() == MVT::i32) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2018,15 +2002,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; - switch (Op0.getValueType().getSimpleVT()) { - default: assert(0 && "Unsupported integer type!"); + switch (Op0.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) } if (TLI.isLittleEndian()) FF <<= 32; - Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF); + Constant *FudgeFactor = ConstantInt::get( + Type::getInt64Ty(*DAG.getContext()), FF); SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); @@ -2054,17 +2039,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP /// operation that takes a larger input. SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, - MVT DestVT, + EVT DestVT, bool isSigned, DebugLoc dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. - MVT NewInTy = LegalOp.getValueType(); + EVT NewInTy = LegalOp.getValueType(); unsigned OpToUse = 0; // Scan for the appropriate larger type to use. while (1) { - NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); assert(NewInTy.isInteger() && "Ran out of possibilities!"); // If the target supports SINT_TO_FP of this type, use it. @@ -2096,17 +2081,17 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT /// operation that returns a larger result. SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, - MVT DestVT, + EVT DestVT, bool isSigned, DebugLoc dl) { // First step, figure out the appropriate FP_TO*INT operation to use. - MVT NewOutTy = DestVT; + EVT NewOutTy = DestVT; unsigned OpToUse = 0; // Scan for the appropriate larger type to use. while (1) { - NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT()+1); + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { @@ -2134,11 +2119,11 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, /// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. /// SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { - MVT VT = Op.getValueType(); - MVT SHVT = TLI.getShiftAmountTy(); + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; - switch (VT.getSimpleVT()) { - default: assert(0 && "Unhandled Expand type in BSWAP!"); abort(); + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); case MVT::i16: Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT)); Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT)); @@ -2183,15 +2168,15 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl) { switch (Opc) { - default: assert(0 && "Cannot expand this yet!"); + default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { static const uint64_t mask[6] = { 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL }; - MVT VT = Op.getValueType(); - MVT ShVT = TLI.getShiftAmountTy(); + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8]) @@ -2217,8 +2202,8 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // return popcount(~x); // // but see also: http://www.hackersdelight.org/HDcode/nlz.cc - MVT VT = Op.getValueType(); - MVT ShVT = TLI.getShiftAmountTy(); + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); @@ -2233,7 +2218,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // see also http://www.hackersdelight.org/HDcode/ntz.cc - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, @@ -2272,7 +2257,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(DAG.getConstant(1, Node->getValueType(0))); break; case ISD::EH_RETURN: - case ISD::DECLARE: case ISD::DBG_LABEL: case ISD::EH_LABEL: case ISD::PREFETCH: @@ -2291,21 +2275,22 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Node->getOperand(i)); break; case ISD::UNDEF: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); if (VT.isInteger()) Results.push_back(DAG.getConstant(0, VT)); else if (VT.isFloatingPoint()) Results.push_back(DAG.getConstantFP(0, VT)); else - assert(0 && "Unknown value type!"); + llvm_unreachable("Unknown value type!"); break; } case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; std::pair CallResult = - TLI.LowerCallTo(Node->getOperand(0), Type::VoidTy, + TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("abort", TLI.getPointerTy()), Args, DAG, dl); Results.push_back(CallResult.second); @@ -2326,7 +2311,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::SIGN_EXTEND_INREG: { // NOTE: we could fall back on load/store here too for targets without // SAR. However, it is doubtful that any exist. - MVT ExtraVT = cast(Node->getOperand(1))->getVT(); + EVT ExtraVT = cast(Node->getOperand(1))->getVT(); unsigned BitsDiff = Node->getValueType(0).getSizeInBits() - ExtraVT.getSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy()); @@ -2343,7 +2328,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create // new ones, as reuse may inhibit scheduling. - MVT ExtraVT = cast(Node->getOperand(1))->getVT(); + EVT ExtraVT = cast(Node->getOperand(1))->getVT(); Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, Node->getValueType(0), dl); Results.push_back(Tmp1); @@ -2357,8 +2342,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; case ISD::FP_TO_UINT: { SDValue True, False; - MVT VT = Node->getOperand(0).getValueType(); - MVT NVT = Node->getValueType(0); + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); const uint64_t zero[] = {0, 0}; APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero)); APInt x = APInt::getSignBit(NVT.getSizeInBits()); @@ -2379,14 +2364,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::VAARG: { const Value *V = cast(Node->getOperand(2))->getValue(); - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0); // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> - getTypeAllocSize(VT.getTypeForMVT()), + getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0); @@ -2434,8 +2419,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SmallVector Mask; cast(Node)->getMask(Mask); - MVT VT = Node->getValueType(0); - MVT EltVT = VT.getVectorElementType(); + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); SmallVector Ops; for (unsigned i = 0; i != NumElems; ++i) { @@ -2458,7 +2443,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::EXTRACT_ELEMENT: { - MVT OpTy = Node->getOperand(0).getValueType(); + EVT OpTy = Node->getOperand(0).getValueType(); if (cast(Node->getOperand(1))->getZExtValue()) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), @@ -2507,7 +2492,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; case ISD::FABS: { // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X). - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = DAG.getConstantFP(0.0, VT); Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), @@ -2622,7 +2607,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::SUB: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && TLI.isOperationLegalOrCustom(ISD::XOR, VT) && "Don't know how to expand this subtraction!"); @@ -2634,7 +2619,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::UREM: case ISD::SREM: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); bool isSigned = Node->getOpcode() == ISD::SREM; unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; @@ -2662,7 +2647,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::SDIV: { bool isSigned = Node->getOpcode() == ISD::SDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), @@ -2680,7 +2665,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::MULHS: { unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI; - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && "If this wasn't legal, it shouldn't have been created!"); @@ -2690,7 +2675,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::MUL: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); // See if multiply or divide can be lowered using two-result operations. // We just need the low half of the multiply; try both the signed @@ -2729,7 +2714,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - MVT OType = Node->getValueType(1); + EVT OType = Node->getValueType(1); SDValue Zero = DAG.getConstant(0, LHS.getValueType()); @@ -2770,7 +2755,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::UMULO: case ISD::SMULO: { - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; @@ -2786,8 +2771,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else if (TLI.isTypeLegal(MVT::getIntegerVT(VT.getSizeInBits() * 2))) { - MVT WideVT = MVT::getIntegerVT(VT.getSizeInBits() * 2); + } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -2800,7 +2785,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // type in some cases cases. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. - assert(0 && "Don't know how to expand this operation yet!"); + llvm_unreachable("Don't know how to expand this operation yet!"); } if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); @@ -2816,7 +2801,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, break; } case ISD::BUILD_PAIR: { - MVT PairTy = Node->getValueType(0); + EVT PairTy = Node->getValueType(0); Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, @@ -2845,14 +2830,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, SDValue Table = Node->getOperand(1); SDValue Index = Node->getOperand(2); - MVT PTy = TLI.getPointerTy(); + EVT PTy = TLI.getPointerTy(); MachineFunction &MF = DAG.getMachineFunction(); unsigned EntrySize = MF.getJumpTableInfo()->getEntrySize(); Index= DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(EntrySize, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); - MVT MemVT = MVT::getIntegerVT(EntrySize * 8); + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, PseudoSourceValue::getJumpTable(), 0, MemVT); Addr = LD; @@ -2899,7 +2884,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); Results.push_back(Tmp1); @@ -2958,12 +2943,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } void SelectionDAGLegalize::PromoteNode(SDNode *Node, SmallVectorImpl &Results) { - MVT OVT = Node->getValueType(0); + EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || - Node->getOpcode() == ISD::SINT_TO_FP) { + Node->getOpcode() == ISD::SINT_TO_FP || + Node->getOpcode() == ISD::SETCC) { OVT = Node->getOperand(0).getValueType(); } - MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { @@ -2973,10 +2959,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, // Zero extend the argument. Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); // Perform the larger operation. - Tmp1 = DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Tmp1); + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT) - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, @@ -2987,7 +2973,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, DAG.getConstant(NVT.getSizeInBits() - OVT.getSizeInBits(), NVT)); } - Results.push_back(Tmp1); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); @@ -3012,16 +2998,26 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; case ISD::AND: case ISD::OR: - case ISD::XOR: - assert(OVT.isVector() && "Don't know how to promote scalar logic ops"); - // Bit convert each of the values to the new type. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0)); - Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1)); + case ISD::XOR: { + unsigned ExtOp, TruncOp; + if (OVT.isVector()) { + ExtOp = ISD::BIT_CONVERT; + TruncOp = ISD::BIT_CONVERT; + } else if (OVT.isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + llvm_report_error("Cannot promote logic operation"); + } + // Promote each of the values to the new type. + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + // Perform the larger operation, then convert back Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); - // Bit convert the result back the original type. - Results.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1)); + Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); break; - case ISD::SELECT: + } + case ISD::SELECT: { unsigned ExtOp, TruncOp; if (Node->getValueType(0).isVector()) { ExtOp = ISD::BIT_CONVERT; @@ -3046,6 +3042,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, DAG.getIntPtrConstant(0)); Results.push_back(Tmp1); break; + } case ISD::VECTOR_SHUFFLE: { SmallVector Mask; cast(Node)->getMask(Mask); @@ -3061,31 +3058,14 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; } case ISD::SETCC: { - // First step, figure out the appropriate operation to use. - // Allow SETCC to not be supported for all legal data types - // Mostly this targets FP - MVT NewInTy = Node->getOperand(0).getValueType(); - MVT OldVT = NewInTy; OldVT = OldVT; - - // Scan for the appropriate larger type to use. - while (1) { - NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT()+1); - - assert(NewInTy.isInteger() == OldVT.isInteger() && - "Fell off of the edge of the integer world"); - assert(NewInTy.isFloatingPoint() == OldVT.isFloatingPoint() && - "Fell off of the edge of the floating point world"); - - // If the target supports SETCC of this type, use it. - if (TLI.isOperationLegalOrCustom(ISD::SETCC, NewInTy)) - break; - } - if (NewInTy.isInteger()) - assert(0 && "Cannot promote Legal Integer SETCC yet"); - else { - Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp1); - Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NewInTy, Tmp2); + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast(Node->getOperand(2))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1, Tmp2, Node->getOperand(2))); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c3c1beabd5f09..84e39b4803966 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -20,10 +20,12 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; /// GetFPLibCall - Return the right libcall for the given floating point type. -static RTLIB::Libcall GetFPLibCall(MVT VT, +static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, @@ -41,18 +43,17 @@ static RTLIB::Libcall GetFPLibCall(MVT VT, //===----------------------------------------------------------------------===// void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Soften float result " << ResNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + errs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "SoftenFloatResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to soften the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to soften the result of this operator!"); case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; @@ -107,14 +108,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { // Convert the inputs to integers, and build a new pair out of them. return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), - TLI.getTypeToTransformTo(N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), BitConvertToInteger(N->getOperand(0)), BitConvertToInteger(N->getOperand(1))); } SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), - TLI.getTypeToTransformTo(N->getValueType(0))); + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -125,7 +126,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); // Mask = ~(1 << (Size-1)) @@ -136,7 +137,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -148,7 +149,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, @@ -163,8 +164,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue RHS = BitConvertToInteger(N->getOperand(1)); DebugLoc dl = N->getDebugLoc(); - MVT LVT = LHS.getValueType(); - MVT RVT = RHS.getValueType(); + EVT LVT = LHS.getValueType(); + EVT RVT = RHS.getValueType(); unsigned LSize = LVT.getSizeInBits(); unsigned RSize = RVT.getSizeInBits(); @@ -199,7 +200,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, @@ -210,7 +211,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -222,7 +223,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, @@ -233,7 +234,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, @@ -244,7 +245,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::FLOOR_F32, @@ -255,7 +256,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, @@ -266,7 +267,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, @@ -277,7 +278,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::LOG10_F32, @@ -288,7 +289,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -300,7 +301,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, @@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; @@ -324,7 +325,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); @@ -332,7 +333,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); @@ -340,7 +341,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -354,7 +355,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { assert(N->getOperand(1).getValueType() == MVT::i32 && "Unsupported power type!"); - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, @@ -365,7 +366,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -377,7 +378,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, @@ -388,7 +389,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, @@ -399,7 +400,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, @@ -410,7 +411,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; return MakeLibCall(GetFPLibCall(N->getValueType(0), @@ -422,7 +423,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); return MakeLibCall(GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, @@ -434,8 +435,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast(N); - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); DebugLoc dl = N->getDebugLoc(); SDValue NewL; @@ -479,19 +480,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); DebugLoc dl = N->getDebugLoc(); SDValue NewVAARG; NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); - + // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); @@ -500,9 +501,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { bool Signed = N->getOpcode() == ISD::SINT_TO_FP; - MVT SVT = N->getOperand(0).getValueType(); - MVT RVT = N->getValueType(0); - MVT NVT = MVT(); + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT NVT = EVT(); DebugLoc dl = N->getDebugLoc(); // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to @@ -521,7 +522,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - return MakeLibCall(LC, TLI.getTypeToTransformTo(RVT), &Op, 1, false, dl); + return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), &Op, 1, false, dl); } @@ -530,18 +531,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Soften float operand " << OpNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "SoftenFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to soften this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; @@ -574,7 +574,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, DebugLoc dl) { SDValue LHSInt = GetSoftenedFloat(NewLHS); SDValue RHSInt = GetSoftenedFloat(NewRHS); - MVT VT = NewLHS.getValueType(); + EVT VT = NewLHS.getValueType(); assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); @@ -637,7 +637,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, } } - MVT RetVT = MVT::i32; // FIXME: is this the correct return type? + EVT RetVT = MVT::i32; // FIXME: is this the correct return type? SDValue Ops[2] = { LHSInt, RHSInt }; NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); NewRHS = DAG.getConstant(0, RetVT); @@ -659,8 +659,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { - MVT SVT = N->getOperand(0).getValueType(); - MVT RVT = N->getValueType(0); + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); @@ -688,7 +688,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { - MVT RVT = N->getValueType(0); + EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -696,7 +696,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { - MVT RVT = N->getValueType(0); + EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -767,7 +767,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Expand float result: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Expand float result: "; N->dump(&DAG); errs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -778,11 +778,10 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ExpandFloatResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to expand the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to expand the result of this operator!"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; @@ -830,7 +829,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); assert(NVT.getSizeInBits() == integerPartWidth && "Do not know how to expand this float constant!"); APInt C = cast(N)->getValueAPF().bitcastToAPInt(); @@ -982,7 +981,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); } @@ -1067,7 +1066,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, SDValue Ptr = LD->getBasePtr(); DebugLoc dl = N->getDebugLoc(); - MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); @@ -1090,10 +1089,10 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Src = N->getOperand(0); - MVT SrcVT = Src.getValueType(); + EVT SrcVT = Src.getValueType(); bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; DebugLoc dl = N->getDebugLoc(); @@ -1135,7 +1134,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; const uint64_t *Parts = 0; - switch (SrcVT.getSimpleVT()) { + switch (SrcVT.getSimpleVT().SimpleTy) { default: assert(false && "Unsupported UINT_TO_FP!"); case MVT::i32: @@ -1167,7 +1166,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, /// types of the node are known to be legal, but other operands of the node may /// need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Expand float operand: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Expand float operand: "; N->dump(&DAG); errs() << "\n"); SDValue Res = SDValue(); if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType()) @@ -1178,11 +1177,10 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ExpandFloatOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to expand this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to expand this operator's operand!"); case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; @@ -1224,7 +1222,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); - MVT VT = NewLHS.getValueType(); + EVT VT = NewLHS.getValueType(); assert(VT == MVT::ppcf128 && "Unsupported setcc type!"); // FIXME: This generated code sucks. We want to generate @@ -1276,7 +1274,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { - MVT RVT = N->getValueType(0); + EVT RVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on @@ -1297,7 +1295,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { - MVT RVT = N->getValueType(0); + EVT RVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on @@ -1374,7 +1372,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - MVT NVT = TLI.getTypeToTransformTo(ST->getValue().getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ST->getValue().getValueType()); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0c826f67c24a4..8ac8063be9ffe 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -20,6 +20,8 @@ #include "LegalizeTypes.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -31,7 +33,7 @@ using namespace llvm; /// may also have invalid operands or may have other results that need /// expansion, we just know that (at least) one result needs promotion. void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Promote integer result: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Promote integer result: "; N->dump(&DAG); errs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom expand this node. @@ -41,11 +43,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "PromoteIntegerResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to promote this operator!"); - abort(); + llvm_unreachable("Do not know how to promote this operator!"); case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break; @@ -161,10 +162,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { SDValue InOp = N->getOperand(0); - MVT InVT = InOp.getValueType(); - MVT NInVT = TLI.getTypeToTransformTo(InVT); - MVT OutVT = N->getValueType(0); - MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + EVT InVT = InOp.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { @@ -201,7 +202,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { std::swap(Lo, Hi); InOp = DAG.getNode(ISD::ANY_EXTEND, dl, - MVT::getIntegerVT(NOutVT.getSizeInBits()), + EVT::getIntegerVT(*DAG.getContext(), NOutVT.getSizeInBits()), JoinIntegers(Lo, Hi)); return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp); } @@ -211,24 +212,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) { return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp)); } - // Otherwise, lower the bit-convert to a store/load from the stack. - // Create the stack frame object. Make sure it is aligned for both - // the source and destination types. - SDValue FIPtr = DAG.CreateStackTemporary(InVT, OutVT); - int FI = cast(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); - - // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); - - // Result is an extending load from the stack slot. - return DAG.getExtLoad(ISD::EXTLOAD, dl, NOutVT, Store, FIPtr, SV, 0, OutVT); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + CreateStackStoreLoad(InOp, OutVT)); } SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - MVT OVT = N->getValueType(0); - MVT NVT = Op.getValueType(); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); @@ -240,18 +231,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), - TLI.getTypeToTransformTo(N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), JoinIntegers(N->getOperand(0), N->getOperand(1))); } SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // FIXME there is no actual debug info here DebugLoc dl = N->getDebugLoc(); // Zero extend things like i1, sign extend everything else. It shouldn't // matter in theory which one we pick, but this tends to give better code? unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(VT), + SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT), SDValue(N, 0)); assert(isa(Result) && "Didn't constant fold ext?"); return Result; @@ -263,7 +254,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && "can only promote integers"); - MVT OutVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), CvtCode); @@ -273,8 +264,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); DebugLoc dl = N->getDebugLoc(); - MVT OVT = N->getValueType(0); - MVT NVT = Op.getValueType(); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. return DAG.getNode(ISD::SUB, dl, NVT, Op, @@ -290,8 +281,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - MVT OVT = N->getValueType(0); - MVT NVT = Op.getValueType(); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -303,63 +294,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { - MVT OldVT = N->getValueType(0); - SDValue OldVec = N->getOperand(0); - if (getTypeAction(OldVec.getValueType()) == WidenVector) - OldVec = GetWidenedVector(N->getOperand(0)); - unsigned OldElts = OldVec.getValueType().getVectorNumElements(); DebugLoc dl = N->getDebugLoc(); - - if (OldElts == 1) { - assert(!isTypeLegal(OldVec.getValueType()) && - "Legal one-element vector of a type needing promotion!"); - // It is tempting to follow GetScalarizedVector by a call to - // GetPromotedInteger, but this would be wrong because the - // scalarized value may not yet have been processed. - return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), - GetScalarizedVector(OldVec)); - } - - // Convert to a vector half as long with an element type of twice the width, - // for example <4 x i16> -> <2 x i32>. - assert(!(OldElts & 1) && "Odd length vectors not supported!"); - MVT NewVT = MVT::getIntegerVT(2 * OldVT.getSizeInBits()); - assert(OldVT.isSimple() && NewVT.isSimple()); - - SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, - MVT::getVectorVT(NewVT, OldElts / 2), - OldVec); - - // Extract the element at OldIdx / 2 from the new vector. - SDValue OldIdx = N->getOperand(1); - SDValue NewIdx = DAG.getNode(ISD::SRL, dl, OldIdx.getValueType(), OldIdx, - DAG.getConstant(1, TLI.getPointerTy())); - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, NewIdx); - - // Select the appropriate half of the element: Lo if OldIdx was even, - // Hi if it was odd. - SDValue Lo = Elt; - SDValue Hi = DAG.getNode(ISD::SRL, dl, NewVT, Elt, - DAG.getConstant(OldVT.getSizeInBits(), - TLI.getPointerTy())); - if (TLI.isBigEndian()) - std::swap(Lo, Hi); - - // Extend to the promoted type. - SDValue Odd = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, OldIdx); - SDValue Res = DAG.getNode(ISD::SELECT, dl, NewVT, Odd, Hi, Lo); - return DAG.getNode(ISD::ANY_EXTEND, dl, TLI.getTypeToTransformTo(OldVT), Res); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), + N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NewOpc = N->getOpcode(); DebugLoc dl = N->getDebugLoc(); // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT // and SINT conversions are Custom, there is no way to tell which is preferable. - // We choose SINT because that's the right thing on PPC.) + // We choose SINT because that's the right thing on PPC.) if (N->getOpcode() == ISD::FP_TO_UINT && !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) @@ -376,7 +325,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) { @@ -403,7 +352,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); DebugLoc dl = N->getDebugLoc(); @@ -421,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(1)); - MVT ValueVTs[] = { N->getValueType(0), NVT }; + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + EVT ValueVTs[] = { N->getValueType(0), NVT }; SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), DAG.getVTList(ValueVTs, 2), Ops, 2); @@ -442,8 +391,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { // sign extension of its truncation to the original type. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - MVT OVT = N->getOperand(0).getValueType(); - MVT NVT = LHS.getValueType(); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); DebugLoc dl = N->getDebugLoc(); // Do the arithmetic in the larger type. @@ -487,7 +436,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - MVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); assert(isTypeLegal(SVT) && "Illegal SetCC type!"); DebugLoc dl = N->getDebugLoc(); @@ -496,14 +445,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { N->getOperand(1), N->getOperand(2)); // Convert to the expected type. - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { return DAG.getNode(ISD::SHL, N->getDebugLoc(), - TLI.getTypeToTransformTo(N->getValueType(0)), + TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); } @@ -532,18 +481,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { // The input value must be properly zero extended. - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Res = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; switch (getTypeAction(N->getOperand(0).getValueType())) { - default: assert(0 && "Unknown type action!"); + default: llvm_unreachable("Unknown type action!"); case Legal: case ExpandInteger: Res = N->getOperand(0); @@ -565,8 +514,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { // zero extension of its truncation to the original type. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - MVT OVT = N->getOperand(0).getValueType(); - MVT NVT = LHS.getValueType(); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); DebugLoc dl = N->getDebugLoc(); // Do the arithmetic in the larger type. @@ -594,17 +543,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(TLI.getTypeToTransformTo(N->getValueType(0))); + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0))); } SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - MVT RegVT = TLI.getRegisterType(VT); - unsigned NumRegs = TLI.getNumRegisters(VT); + EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); // The argument is passed as NumRegs registers of type RegVT. SmallVector Parts(NumRegs); @@ -618,7 +567,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { std::reverse(Parts.begin(), Parts.end()); // Assemble the parts in the promoted type. - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); for (unsigned i = 1; i < NumRegs; ++i) { SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); @@ -650,7 +599,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Promote integer operand: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Promote integer operand: "; N->dump(&DAG); errs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -659,11 +608,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "PromoteIntegerOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to promote this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to promote this operator's operand!"); case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break; @@ -719,7 +667,7 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // insert sign extends for ALL conditions, but zero extend is cheaper on // many machines (an AND instead of two shifts), so prefer it. switch (CCCode) { - default: assert(0 && "Unknown integer comparison!"); + default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: case ISD::SETNE: case ISD::SETUGE: @@ -770,7 +718,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - MVT SVT = TLI.getSetCCResultType(MVT::Other); + EVT SVT = TLI.getSetCCResultType(MVT::Other); SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. @@ -780,7 +728,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { // Since the result type is legal, the operands must promote to it. - MVT OVT = N->getOperand(0).getValueType(); + EVT OVT = N->getOperand(0).getValueType(); SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); SDValue Hi = GetPromotedInteger(N->getOperand(1)); assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); @@ -795,7 +743,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // The vector type is legal but the element type is not. This implies // that the vector is a power-of-two in length and that the element // type does not have a strange size (eg: it is not i1). - MVT VecVT = N->getValueType(0); + EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); assert(!(NumElts & 1) && "Legal vector of one illegal element?"); @@ -871,7 +819,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - MVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); + EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, @@ -962,7 +910,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Expand integer result: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Expand integer result: "; N->dump(&DAG); errs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -973,11 +921,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ExpandIntegerResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to expand the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to expand the result of this operator!"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; @@ -1043,10 +990,10 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); - MVT NVT = InL.getValueType(); + EVT NVT = InL.getValueType(); unsigned VTBits = N->getValueType(0).getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); - MVT ShTy = N->getOperand(1).getValueType(); + EVT ShTy = N->getOperand(1).getValueType(); if (N->getOpcode() == ISD::SHL) { if (Amt > VTBits) { @@ -1060,7 +1007,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, Hi = InL; } else if (Amt == 1 && TLI.isOperationLegalOrCustom(ISD::ADDC, - TLI.getTypeToExpandTo(NVT))) { + TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { // Emit this X << 1 as X+X. SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); SDValue LoOps[2] = { InL, InL }; @@ -1130,8 +1077,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, bool DAGTypeLegalizer:: ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Amt = N->getOperand(1); - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); - MVT ShTy = Amt.getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); unsigned ShBits = ShTy.getSizeInBits(); unsigned NVTBits = NVT.getSizeInBits(); assert(isPowerOf2_32(NVTBits) && @@ -1158,7 +1105,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getConstant(~HighBitMask, ShTy)); switch (N->getOpcode()) { - default: assert(0 && "Unknown shift"); + default: llvm_unreachable("Unknown shift"); case ISD::SHL: Lo = DAG.getConstant(0, NVT); // Low part is zero. Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. @@ -1186,7 +1133,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { Amt); unsigned Op1, Op2; switch (N->getOpcode()) { - default: assert(0 && "Unknown shift"); + default: llvm_unreachable("Unknown shift"); case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; case ISD::SRL: case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; @@ -1208,8 +1155,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { bool DAGTypeLegalizer:: ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Amt = N->getOperand(1); - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); - MVT ShTy = Amt.getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); unsigned NVTBits = NVT.getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); @@ -1226,7 +1173,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Lo1, Hi1, Lo2, Hi2; switch (N->getOpcode()) { - default: assert(0 && "Unknown shift"); + default: llvm_unreachable("Unknown shift"); case ISD::SHL: // ShAmt < NVTBits Lo1 = DAG.getConstant(0, NVT); // Low part is zero. @@ -1283,7 +1230,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); - MVT NVT = LHSL.getValueType(); + EVT NVT = LHSL.getValueType(); SDValue LoOps[2] = { LHSL, RHSL }; SDValue HiOps[3] = { LHSH, RHSH }; @@ -1295,7 +1242,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, bool hasCarry = TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::ADDC : ISD::SUBC, - TLI.getTypeToExpandTo(NVT)); + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); if (hasCarry) { SDVTList VTList = DAG.getVTList(NVT, MVT::Flag); @@ -1384,7 +1331,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { @@ -1408,14 +1355,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); - MVT EVT = cast(N->getOperand(1))->getVT(); + EVT NVT = Lo.getValueType(); + EVT EVT = cast(N->getOperand(1))->getVT(); unsigned NVTBits = NVT.getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); if (NVTBits < EVTBits) { Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, - DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits))); } else { Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part replicates the sign bit of Lo, make it explicit. @@ -1428,14 +1375,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); - MVT EVT = cast(N->getOperand(1))->getVT(); + EVT NVT = Lo.getValueType(); + EVT EVT = cast(N->getOperand(1))->getVT(); unsigned NVTBits = NVT.getSizeInBits(); unsigned EVTBits = EVT.getSizeInBits(); if (NVTBits < EVTBits) { Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, - DAG.getValueType(MVT::getIntegerVT(EVTBits - NVTBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), EVTBits - NVTBits))); } else { Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); // The high part must be zero, make it explicit. @@ -1453,7 +1400,7 @@ void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); const APInt &Cst = cast(N)->getAPIntValue(); Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT); @@ -1465,7 +1412,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, DebugLoc dl = N->getDebugLoc(); // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); + EVT NVT = Lo.getValueType(); SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); @@ -1484,7 +1431,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, DebugLoc dl = N->getDebugLoc(); // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); + EVT NVT = Lo.getValueType(); Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); Hi = DAG.getConstant(0, NVT); @@ -1495,7 +1442,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, DebugLoc dl = N->getDebugLoc(); // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT NVT = Lo.getValueType(); + EVT NVT = Lo.getValueType(); SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); @@ -1512,7 +1459,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); @@ -1522,7 +1469,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); @@ -1538,8 +1485,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); @@ -1551,10 +1498,10 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, assert(NVT.isByteSized() && "Expanded type not byte sized!"); if (N->getMemoryVT().bitsLE(NVT)) { - MVT EVT = N->getMemoryVT(); + EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, - EVT, isVolatile, Alignment); + MemVT, isVolatile, Alignment); // Remember the chain. Ch = Lo.getValue(1); @@ -1580,7 +1527,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); - MVT NEVT = MVT::getIntegerVT(ExcessBits); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; @@ -1597,14 +1544,15 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else { // Big-endian - high bits are at low addresses. Favor aligned loads at // the cost of some bit-fiddling. - MVT EVT = N->getMemoryVT(); - unsigned EBytes = EVT.getStoreSizeInBits()/8; + EVT MemVT = N->getMemoryVT(); + unsigned EBytes = MemVT.getStoreSize(); unsigned IncrementSize = NVT.getSizeInBits()/8; unsigned ExcessBits = (EBytes - IncrementSize)*8; // Load both the high bits and maybe some of the low bits. Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, - MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits), + EVT::getIntegerVT(*DAG.getContext(), + MemVT.getSizeInBits() - ExcessBits), isVolatile, Alignment); // Increment the pointer to the other half. @@ -1613,7 +1561,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, - MVT::getIntegerVT(ExcessBits), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the @@ -1652,8 +1600,8 @@ void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); DebugLoc dl = N->getDebugLoc(); bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); @@ -1742,7 +1690,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1762,7 +1710,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // If we can emit an efficient shift operation, do so now. Check to see if @@ -1788,7 +1736,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // Next check to see if the target supports this SHL_PARTS operation or if it // will custom expand it. - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || Action == TargetLowering::Custom) { @@ -1797,7 +1745,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, GetExpandedInteger(N->getOperand(0), LHSL, LHSH); SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; - MVT VT = LHSL.getValueType(); + EVT VT = LHSL.getValueType(); Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); Hi = Lo.getValue(1); return; @@ -1838,7 +1786,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, else if (VT == MVT::i128) LC = RTLIB::SRA_I128; } - + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); @@ -1846,12 +1794,12 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, } if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) - assert(0 && "Unsupported shift!"); + llvm_unreachable("Unsupported shift!"); } void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { @@ -1874,7 +1822,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, - DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); } } @@ -1882,7 +1830,7 @@ void DAGTypeLegalizer:: ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { DebugLoc dl = N->getDebugLoc(); GetExpandedInteger(N->getOperand(0), Lo, Hi); - MVT EVT = cast(N->getOperand(1))->getVT(); + EVT EVT = cast(N->getOperand(1))->getVT(); if (EVT.bitsLE(Lo.getValueType())) { // sext_inreg the low part if needed. @@ -1900,13 +1848,13 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, - DAG.getValueType(MVT::getIntegerVT(ExcessBits))); + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits))); } } void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1926,7 +1874,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); Hi = DAG.getNode(ISD::SRL, dl, @@ -1937,7 +1885,7 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1957,7 +1905,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1977,7 +1925,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { @@ -1996,7 +1944,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SplitInteger(Res, Lo, Hi); unsigned ExcessBits = Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); - Hi = DAG.getZeroExtendInReg(Hi, dl, MVT::getIntegerVT(ExcessBits)); + Hi = DAG.getZeroExtendInReg(Hi, dl, EVT::getIntegerVT(*DAG.getContext(), ExcessBits)); } } @@ -2010,7 +1958,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Expand integer operand: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Expand integer operand: "; N->dump(&DAG); errs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -2019,11 +1967,10 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ExpandIntegerOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); errs() << "\n"; #endif - assert(0 && "Do not know how to expand this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to expand this operator's operand!"); case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break; case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; @@ -2070,7 +2017,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, GetExpandedInteger(NewLHS, LHSLo, LHSHi); GetExpandedInteger(NewRHS, RHSLo, RHSHi); - MVT VT = NewLHS.getValueType(); + EVT VT = NewLHS.getValueType(); if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { if (RHSLo == RHSHi) { @@ -2105,7 +2052,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // FIXME: This generated code sucks. ISD::CondCode LowCC; switch (CCCode) { - default: assert(0 && "Unknown integer setcc!"); + default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETLT: case ISD::SETULT: LowCC = ISD::SETULT; break; case ISD::SETGT: @@ -2122,7 +2069,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // NOTE: on targets without efficient SELECT of bools, we can always use // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) - TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, NULL); + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL); SDValue Tmp1, Tmp2; Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); @@ -2228,7 +2175,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); - MVT DstVT = N->getValueType(0); + EVT DstVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); @@ -2242,8 +2189,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); assert(OpNo == 1 && "Can only expand the stored value so far"); - MVT VT = N->getOperand(1).getValueType(); - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getOperand(1).getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); int SVOffset = N->getSrcValueOffset(); @@ -2267,7 +2214,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); - MVT NEVT = MVT::getIntegerVT(ExcessBits); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; @@ -2282,11 +2229,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // the cost of some bit-fiddling. GetExpandedInteger(N->getValue(), Lo, Hi); - MVT EVT = N->getMemoryVT(); - unsigned EBytes = EVT.getStoreSizeInBits()/8; + EVT ExtVT = N->getMemoryVT(); + unsigned EBytes = ExtVT.getStoreSize(); unsigned IncrementSize = NVT.getSizeInBits()/8; unsigned ExcessBits = (EBytes - IncrementSize)*8; - MVT HiVT = MVT::getIntegerVT(EVT.getSizeInBits() - ExcessBits); + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), ExtVT.getSizeInBits() - ExcessBits); if (ExcessBits < NVT.getSizeInBits()) { // Transfer high bits from the top of Lo to the bottom of Hi. @@ -2309,7 +2256,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset+IncrementSize, - MVT::getIntegerVT(ExcessBits), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, MinAlign(Alignment, IncrementSize)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2324,8 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); - MVT SrcVT = Op.getValueType(); - MVT DstVT = N->getValueType(0); + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ @@ -2360,7 +2307,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. - SDValue FudgePtr = DAG.getConstantPool(ConstantInt::get(FF.zext(64)), + SDValue FudgePtr = DAG.getConstantPool( + ConstantInt::get(*DAG.getContext(), FF.zext(64)), TLI.getPointerTy()); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 3135a445431ec..5992f5d534da4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -15,9 +15,11 @@ #include "LegalizeTypes.h" #include "llvm/CallingConv.h" +#include "llvm/Target/TargetData.h" #include "llvm/ADT/SetVector.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetData.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt @@ -113,43 +115,43 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (I->getNodeId() != Processed) { if (Mapped != 0) { - cerr << "Unprocessed value in a map!"; + errs() << "Unprocessed value in a map!"; Failed = true; } } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) { if (Mapped > 1) { - cerr << "Value with legal type was transformed!"; + errs() << "Value with legal type was transformed!"; Failed = true; } } else { if (Mapped == 0) { - cerr << "Processed value not in any map!"; + errs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { - cerr << "Value in multiple maps!"; + errs() << "Value in multiple maps!"; Failed = true; } } if (Failed) { if (Mapped & 1) - cerr << " ReplacedValues"; + errs() << " ReplacedValues"; if (Mapped & 2) - cerr << " PromotedIntegers"; + errs() << " PromotedIntegers"; if (Mapped & 4) - cerr << " SoftenedFloats"; + errs() << " SoftenedFloats"; if (Mapped & 8) - cerr << " ScalarizedVectors"; + errs() << " ScalarizedVectors"; if (Mapped & 16) - cerr << " ExpandedIntegers"; + errs() << " ExpandedIntegers"; if (Mapped & 32) - cerr << " ExpandedFloats"; + errs() << " ExpandedFloats"; if (Mapped & 64) - cerr << " SplitVectors"; + errs() << " SplitVectors"; if (Mapped & 128) - cerr << " WidenedVectors"; - cerr << "\n"; - abort(); + errs() << " WidenedVectors"; + errs() << "\n"; + llvm_unreachable(0); } } } @@ -210,7 +212,7 @@ bool DAGTypeLegalizer::run() { // Scan the values produced by the node, checking to see if any result // types are illegal. for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { - MVT ResultVT = N->getValueType(i); + EVT ResultVT = N->getValueType(i); switch (getTypeAction(ResultVT)) { default: assert(false && "Unknown action!"); @@ -263,7 +265,7 @@ ScanOperands: if (IgnoreNodeResults(N->getOperand(i).getNode())) continue; - MVT OpVT = N->getOperand(i).getValueType(); + EVT OpVT = N->getOperand(i).getValueType(); switch (getTypeAction(OpVT)) { default: assert(false && "Unknown action!"); @@ -336,7 +338,7 @@ ScanOperands: } if (i == NumOperands) { - DEBUG(cerr << "Legally typed node: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Legally typed node: "; N->dump(&DAG); errs() << "\n"); } } NodeDone: @@ -405,7 +407,7 @@ NodeDone: if (!IgnoreNodeResults(I)) for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i) if (!isTypeLegal(I->getValueType(i))) { - cerr << "Result type " << i << " illegal!\n"; + errs() << "Result type " << i << " illegal!\n"; Failed = true; } @@ -413,25 +415,25 @@ NodeDone: for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(I->getOperand(i).getNode()) && !isTypeLegal(I->getOperand(i).getValueType())) { - cerr << "Operand type " << i << " illegal!\n"; + errs() << "Operand type " << i << " illegal!\n"; Failed = true; } if (I->getNodeId() != Processed) { if (I->getNodeId() == NewNode) - cerr << "New node not analyzed?\n"; + errs() << "New node not analyzed?\n"; else if (I->getNodeId() == Unanalyzed) - cerr << "Unanalyzed node not noticed?\n"; + errs() << "Unanalyzed node not noticed?\n"; else if (I->getNodeId() > 0) - cerr << "Operand not processed?\n"; + errs() << "Operand not processed?\n"; else if (I->getNodeId() == ReadyToProcess) - cerr << "Not added to worklist?\n"; + errs() << "Not added to worklist?\n"; Failed = true; } if (Failed) { - I->dump(&DAG); cerr << "\n"; - abort(); + I->dump(&DAG); errs() << "\n"; + llvm_unreachable(0); } } #endif @@ -479,8 +481,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { NewOps.push_back(Op); } else if (Op != OrigOp) { // This is the first operand to change - add all operands so far. - for (unsigned j = 0; j < i; ++j) - NewOps.push_back(N->getOperand(j)); + NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); NewOps.push_back(Op); } } @@ -732,6 +733,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted integer"); AnalyzeNewValue(Result); SDValue &OpEntry = PromotedIntegers[Op]; @@ -740,6 +743,8 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; @@ -748,6 +753,8 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == Op.getValueType().getVectorElementType() && + "Invalid type for scalarized vector"); AnalyzeNewValue(Result); SDValue &OpEntry = ScalarizedVectors[Op]; @@ -767,6 +774,9 @@ void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi) { + assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded integer"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); @@ -790,6 +800,9 @@ void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi) { + assert(Lo.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded float"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); @@ -813,6 +826,12 @@ void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi) { + assert(Lo.getValueType().getVectorElementType() == + Op.getValueType().getVectorElementType() && + 2*Lo.getValueType().getVectorNumElements() == + Op.getValueType().getVectorNumElements() && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for split vector"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); @@ -825,6 +844,8 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for widened vector"); AnalyzeNewValue(Result); SDValue &OpEntry = WidenedVectors[Op]; @@ -841,7 +862,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { unsigned BitWidth = Op.getValueType().getSizeInBits(); return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), - MVT::getIntegerVT(BitWidth), Op); + EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); } /// BitConvertVectorToIntegerVector - Convert to a vector of integers of the @@ -849,14 +870,14 @@ SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { assert(Op.getValueType().isVector() && "Only applies to vectors!"); unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); - MVT EltNVT = MVT::getIntegerVT(EltWidth); + EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = Op.getValueType().getVectorNumElements(); return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(), - MVT::getVectorVT(EltNVT, NumElts), Op); + EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); } SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, - MVT DestVT) { + EVT DestVT) { DebugLoc dl = Op.getDebugLoc(); // Create the stack frame object. Make sure it is aligned for both // the source and destination types. @@ -875,7 +896,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, /// The last parameter being TRUE means we are dealing with a /// node with illegal result types. The second parameter denotes the type of /// illegal ResNo in that case. -bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) { +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // See if the target wants to custom lower this node. if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) return false; @@ -900,21 +921,14 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult) { /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type /// which is split into two not necessarily identical pieces. -void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) { +void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { + // Currently all types are split in half. if (!InVT.isVector()) { - LoVT = HiVT = TLI.getTypeToTransformTo(InVT); + LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); } else { - MVT NewEltVT = InVT.getVectorElementType(); unsigned NumElements = InVT.getVectorNumElements(); - if ((NumElements & (NumElements-1)) == 0) { // Simple power of two vector. - NumElements >>= 1; - LoVT = HiVT = MVT::getVectorVT(NewEltVT, NumElements); - } else { // Non-power-of-two vectors. - unsigned NewNumElts_Lo = 1 << Log2_32(NumElements); - unsigned NewNumElts_Hi = NumElements - NewNumElts_Lo; - LoVT = MVT::getVectorVT(NewEltVT, NewNumElts_Lo); - HiVT = MVT::getVectorVT(NewEltVT, NewNumElts_Hi); - } + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), NumElements/2); } } @@ -923,14 +937,14 @@ void DAGTypeLegalizer::GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT) { void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi) { DebugLoc dl = Pair.getDebugLoc(); - MVT NVT = TLI.getTypeToTransformTo(Pair.getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, DAG.getIntPtrConstant(1)); } -SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, MVT EltVT, +SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index) { DebugLoc dl = Index.getDebugLoc(); // Make sure the index type is big enough to compute in. @@ -952,9 +966,9 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { // Arbitrarily use dlHi for result DebugLoc DebugLoc dlHi = Hi.getDebugLoc(); DebugLoc dlLo = Lo.getDebugLoc(); - MVT LVT = Lo.getValueType(); - MVT HVT = Hi.getValueType(); - MVT NVT = MVT::getIntegerVT(LVT.getSizeInBits() + HVT.getSizeInBits()); + EVT LVT = Lo.getValueType(); + EVT HVT = Hi.getValueType(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits()); Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); @@ -986,7 +1000,7 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, /// MakeLibCall - Generate a libcall taking the given operands as arguments and /// returning a result of type RetVT. -SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, +SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, DebugLoc dl) { TargetLowering::ArgListTy Args; @@ -995,7 +1009,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, TargetLowering::ArgListEntry Entry; for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; - Entry.Ty = Entry.Node.getValueType().getTypeForMVT(); + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); @@ -1003,17 +1017,19 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, MVT RetVT, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - const Type *RetTy = RetVT.getTypeForMVT(); + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, CallingConv::C, false, Callee, Args, DAG, dl); + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); return CallInfo.first; } /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. -SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) { +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { DebugLoc dl = Bool.getDebugLoc(); ISD::NodeType ExtendCode; switch (TLI.getBooleanContents()) { @@ -1039,7 +1055,7 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, MVT VT) { /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT /// bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, - MVT LoVT, MVT HiVT, + EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi) { DebugLoc dl = Op.getDebugLoc(); assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == @@ -1054,7 +1070,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, /// type half the size of Op's. void DAGTypeLegalizer::SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { - MVT HalfVT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()/2); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Op.getValueType().getSizeInBits()/2); SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 02b073221f6a3..859c65668da44 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -64,7 +64,7 @@ private: SoftenFloat, // Convert this float type to a same size integer type. ExpandFloat, // Split this float type into two of half the size. ScalarizeVector, // Replace this one-element vector with its element type. - SplitVector, // This vector type should be split into smaller vectors. + SplitVector, // Split this vector type into two of half the size. WidenVector // This vector type should be widened into a larger vector. }; @@ -74,8 +74,8 @@ private: TargetLowering::ValueTypeActionImpl ValueTypeActions; /// getTypeAction - Return how we should legalize values of this type. - LegalizeAction getTypeAction(MVT VT) const { - switch (ValueTypeActions.getTypeAction(VT)) { + LegalizeAction getTypeAction(EVT VT) const { + switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) { default: assert(false && "Unknown legalize action!"); case TargetLowering::Legal: @@ -96,7 +96,7 @@ private: if (VT.isInteger()) return ExpandInteger; else if (VT.getSizeInBits() == - TLI.getTypeToTransformTo(VT).getSizeInBits()) + TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits()) return SoftenFloat; else return ExpandFloat; @@ -109,8 +109,9 @@ private: } /// isTypeLegal - Return true if this type is legal on this target. - bool isTypeLegal(MVT VT) const { - return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal; + bool isTypeLegal(EVT VT) const { + return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) == + TargetLowering::Legal); } /// IgnoreNodeResults - Pretend all of this node's results are legal. @@ -185,19 +186,19 @@ private: // Common routines. SDValue BitConvertToInteger(SDValue Op); SDValue BitConvertVectorToIntegerVector(SDValue Op); - SDValue CreateStackStoreLoad(SDValue Op, MVT DestVT); - bool CustomLowerNode(SDNode *N, MVT VT, bool LegalizeResult); - SDValue GetVectorElementPointer(SDValue VecPtr, MVT EltVT, SDValue Index); + SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT); + bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); + SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); - SDValue MakeLibCall(RTLIB::Libcall LC, MVT RetVT, + SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, DebugLoc dl); - SDValue PromoteTargetBoolean(SDValue Bool, MVT VT); + SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); void ReplaceValueWith(SDValue From, SDValue To); void ReplaceValueWithHelper(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); - void SplitInteger(SDValue Op, MVT LoVT, MVT HiVT, + void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi); //===--------------------------------------------------------------------===// @@ -224,7 +225,7 @@ private: /// SExtPromotedInteger - Get a promoted operand and sign extend it to the /// final size. SDValue SExtPromotedInteger(SDValue Op) { - MVT OldVT = Op.getValueType(); + EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, @@ -234,7 +235,7 @@ private: /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the /// final size. SDValue ZExtPromotedInteger(SDValue Op) { - MVT OldVT = Op.getValueType(); + EVT OldVT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); Op = GetPromotedInteger(Op); return DAG.getZeroExtendInReg(Op, dl, OldVT); @@ -506,7 +507,6 @@ private: // Vector Result Scalarization: <1 x ty> -> ty. void ScalarizeVectorResult(SDNode *N, unsigned OpNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); - SDValue ScalarizeVecRes_ShiftOp(SDNode *N); SDValue ScalarizeVecRes_UnaryOp(SDNode *N); SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N); @@ -518,6 +518,7 @@ private: SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_SETCC(SDNode *N); SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); SDValue ScalarizeVecRes_VSETCC(SDNode *N); @@ -533,8 +534,8 @@ private: // Vector Splitting Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// - /// GetSplitVector - Given a processed vector Op which was split into smaller - /// vectors, this method returns the smaller vectors. The first elements of + /// GetSplitVector - Given a processed vector Op which was split into vectors + /// of half the size, this method returns the halves. The first elements of /// Op coincide with the elements of Lo; the remaining elements of Op coincide /// with the elements of Hi: Op is what you would get by concatenating Lo and /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then @@ -558,10 +559,10 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, SDValue &Hi); // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); @@ -641,7 +642,7 @@ private: SDValue BasePtr, const Value *SV, int SVOffset, unsigned Alignment, bool isVolatile, unsigned LdWidth, - MVT ResType, DebugLoc dl); + EVT ResType, DebugLoc dl); /// Helper genWidenVectorStores - Helper function to generate a set of /// stores to store a widen vector into non widen memory @@ -664,7 +665,7 @@ private: /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. - SDValue ModifyToType(SDValue InOp, MVT WidenVT); + SDValue ModifyToType(SDValue InOp, EVT WidenVT); //===--------------------------------------------------------------------===// @@ -686,7 +687,7 @@ private: /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type /// which is split (or expanded) into two not necessarily identical pieces. - void GetSplitDestVTs(MVT InVT, MVT &LoVT, MVT &HiVT); + void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 6e5adee84c34f..0eafe62b8576a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -11,9 +11,11 @@ // The routines here perform legalization when the details of the type (such as // whether it is an integer or a float) do not matter. // Expansion is the act of changing a computation in an illegal type to be a -// computation in two identical registers of a smaller type. +// computation in two identical registers of a smaller type. The Lo/Hi part +// is required to be stored first in memory on little/big-endian machines. // Splitting is the act of changing a computation in an illegal type to be a // computation in two not necessarily identical registers of a smaller type. +// There are no requirements on how the type is represented in memory. // //===----------------------------------------------------------------------===// @@ -32,10 +34,10 @@ using namespace llvm; void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT OutVT = N->getValueType(0); - MVT NOutVT = TLI.getTypeToTransformTo(OutVT); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); - MVT InVT = InOp.getValueType(); + EVT InVT = InOp.getValueType(); DebugLoc dl = N->getDebugLoc(); // Handle some special cases efficiently. @@ -59,16 +61,12 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); return; case SplitVector: - // Convert the split parts of the input if it was split in two. GetSplitVector(InOp, Lo, Hi); - if (Lo.getValueType() == Hi.getValueType()) { - if (TLI.isBigEndian()) - std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); - return; - } - break; + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi); + return; case ScalarizeVector: // Convert the element instead. SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); @@ -78,7 +76,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, case WidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT"); InOp = GetWidenedVector(InOp); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), InVT.getVectorNumElements()/2); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); @@ -95,7 +93,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, if (InVT.isVector() && OutVT.isInteger()) { // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand // is legal but the result is not. - MVT NVT = MVT::getVectorVT(NOutVT, 2); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2); if (isTypeLegal(NVT)) { SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp); @@ -106,7 +104,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, if (TLI.isBigEndian()) std::swap(Lo, Hi); - + return; } } @@ -117,7 +115,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo, // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForMVT()); + TLI.getTargetData()->getPrefTypeAlignment(NOutVT.getTypeForEVT(*DAG.getContext())); SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); int SPFI = cast(StackPtr.getNode())->getIndex(); const Value *SV = PseudoSourceValue::getFixedStack(SPFI); @@ -169,11 +167,11 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Convert to a vector of the expanded element type, for example // <3 x i64> -> <6 x i32>. - MVT OldVT = N->getValueType(0); - MVT NewVT = TLI.getTypeToTransformTo(OldVT); + EVT OldVT = N->getValueType(0); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, - MVT::getVectorVT(NewVT, 2*OldElts), + EVT::getVectorVT(*DAG.getContext(), NewVT, 2*OldElts), OldVec); // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. @@ -200,7 +198,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast(N); - MVT NVT = TLI.getTypeToTransformTo(LD->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); int SVOffset = LD->getSrcValueOffset(); @@ -235,7 +233,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, } void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT NVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); @@ -265,8 +263,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32. - MVT OVT = N->getOperand(0).getValueType(); - MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OVT), 2); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), 2); if (isTypeLegal(NVT)) { SDValue Parts[2]; @@ -286,10 +284,10 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { // The vector type is legal but the element type needs expansion. - MVT VecVT = N->getValueType(0); + EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - MVT OldVT = N->getOperand(0).getValueType(); - MVT NewVT = TLI.getTypeToTransformTo(OldVT); + EVT OldVT = N->getOperand(0).getValueType(); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); DebugLoc dl = N->getDebugLoc(); assert(OldVT == VecVT.getVectorElementType() && @@ -310,7 +308,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { } SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::getVectorVT(NewVT, NewElts.size()), + EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()), &NewElts[0], NewElts.size()); // Convert the new vector to the old vector type. @@ -325,20 +323,20 @@ SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. - MVT VecVT = N->getValueType(0); + EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); DebugLoc dl = N->getDebugLoc(); SDValue Val = N->getOperand(1); - MVT OldEVT = Val.getValueType(); - MVT NewEVT = TLI.getTypeToTransformTo(OldEVT); + EVT OldEVT = Val.getValueType(); + EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT); assert(OldEVT == VecVT.getVectorElementType() && "Inserted element type doesn't match vector element type!"); // Bitconvert to a vector of twice the length with elements of the expanded // type, insert the expanded vector elements, and then convert back. - MVT NewVecVT = MVT::getVectorVT(NewEVT, NumElts*2); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, N->getOperand(0)); @@ -360,7 +358,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); unsigned NumElts = VT.getVectorNumElements(); @@ -378,7 +376,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *St = cast(N); - MVT NVT = TLI.getTypeToTransformTo(St->getValue().getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), St->getValue().getValueType()); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); int SVOffset = St->getSrcValueOffset(); @@ -464,7 +462,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, } void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); Lo = DAG.getUNDEF(LoVT); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 335c73cd59648..ca194305d9898 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -20,8 +20,8 @@ // type i8 which must be promoted. // // This does not legalize vector manipulations like ISD::BUILD_VECTOR, -// or operations that happen to take a vector which are custom-lowered like -// ISD::CALL; the legalization for such operations never produces nodes +// or operations that happen to take a vector which are custom-lowered; +// the legalization for such operations never produces nodes // with illegal types, so it's okay to put off legalizing them until // SelectionDAG::Legalize runs. // @@ -129,7 +129,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (!HasVectorValue) return TranslateLegalizeResults(Op, Result); - MVT QueryType; + EVT QueryType; switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); @@ -231,10 +231,10 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { // Vector "promotion" is basically just bitcasting and doing the operation // in a different type. For example, x86 promotes ISD::AND on v2i32 to // v1i64. - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); - MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); DebugLoc dl = Op.getDebugLoc(); SmallVector Operands(Op.getNumOperands()); @@ -260,11 +260,11 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { } SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); - MVT TmpEltVT = LHS.getValueType().getVectorElementType(); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SmallVector Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { @@ -287,11 +287,11 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { /// the operation be expanded. "Unroll" the vector, splitting out the scalars /// and operating on each element individually. SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); unsigned NE = VT.getVectorNumElements(); - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SmallVector Scalars; @@ -299,10 +299,10 @@ SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) { for (unsigned i = 0; i != NE; ++i) { for (unsigned j = 0; j != Op.getNumOperands(); ++j) { SDValue Operand = Op.getOperand(j); - MVT OperandVT = Operand.getValueType(); + EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. - MVT OperandEltVT = OperandVT.getVectorElementType(); + EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 68967cc638fd7..a03f825a9f043 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -15,14 +15,16 @@ // eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 // types. // Splitting is the act of changing a computation in an invalid vector type to -// be a computation in multiple vectors of a smaller type. For example, -// implementing <128 x f32> operations in terms of two <64 x f32> operations. +// be a computation in two vectors of half the size. For example, implementing +// <128 x f32> operations in terms of two <64 x f32> operations. // //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -30,18 +32,19 @@ using namespace llvm; //===----------------------------------------------------------------------===// void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Scalarize node result " << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ScalarizeVectorResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to scalarize the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to scalarize the result of this operator!"); case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break; case ISD::BUILD_VECTOR: R = N->getOperand(0); break; @@ -53,6 +56,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; @@ -72,9 +76,14 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCEIL: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::TRUNCATE: - case ISD::UINT_TO_FP: R = ScalarizeVecRes_UnaryOp(N); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + R = ScalarizeVecRes_UnaryOp(N); + break; case ISD::ADD: case ISD::AND: @@ -91,11 +100,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SUB: case ISD::UDIV: case ISD::UREM: - case ISD::XOR: R = ScalarizeVecRes_BinOp(N); break; - + case ISD::XOR: case ISD::SHL: case ISD::SRA: - case ISD::SRL: R = ScalarizeVecRes_ShiftOp(N); break; + case ISD::SRL: + R = ScalarizeVecRes_BinOp(N); + break; } // If R is null, the sub-method took care of registering the result. @@ -110,21 +120,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_ShiftOp(SDNode *N) { - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue ShiftAmt = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), - LHS.getValueType(), LHS, ShiftAmt); -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) { - MVT NewVT = N->getValueType(0).getVectorElementType(); + EVT NewVT = N->getValueType(0).getVectorElementType(); return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), NewVT, N->getOperand(0)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { - MVT NewVT = N->getValueType(0).getVectorElementType(); + EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op0 = GetScalarizedVector(N->getOperand(0)); return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), Op0, DAG.getValueType(NewVT), @@ -150,7 +153,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { // The value to insert may have a wider type than the vector element type, // so be sure to truncate it to the element type if necessary. SDValue Op = N->getOperand(1); - MVT EltVT = N->getValueType(0).getVectorElementType(); + EVT EltVT = N->getValueType(0).getVectorElementType(); if (Op.getValueType() != EltVT) // FIXME: Can this happen for floating point types? Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); @@ -167,7 +170,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { DAG.getUNDEF(N->getBasePtr().getValueType()), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->getAlignment()); + N->isVolatile(), N->getOriginalAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -177,7 +180,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. - MVT DestVT = N->getValueType(0).getVectorElementType(); + EVT DestVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); } @@ -185,7 +188,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { // If the operand is wider than the vector element type then it is implicitly // truncated. Make that explicit here. - MVT EltVT = N->getValueType(0).getVectorElementType(); + EVT EltVT = N->getValueType(0).getVectorElementType(); SDValue InOp = N->getOperand(0); if (InOp.getValueType() != EltVT) return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); @@ -207,6 +210,15 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { N->getOperand(4)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + DebugLoc DL = N->getDebugLoc(); + + // Turn it into a scalar SETCC. + return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); } @@ -223,12 +235,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); - MVT NVT = N->getValueType(0).getVectorElementType(); - MVT SVT = TLI.getSetCCResultType(LHS.getValueType()); - DebugLoc dl = N->getDebugLoc(); + EVT NVT = N->getValueType(0).getVectorElementType(); + EVT SVT = TLI.getSetCCResultType(LHS.getValueType()); + DebugLoc DL = N->getDebugLoc(); // Turn it into a scalar SETCC. - SDValue Res = DAG.getNode(ISD::SETCC, dl, SVT, LHS, RHS, N->getOperand(2)); + SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2)); // VSETCC always returns a sign-extended value, while SETCC may not. The // SETCC result type may not match the vector element type. Correct these. @@ -237,19 +249,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { // Ensure the SETCC result is sign-extended. if (TLI.getBooleanContents() != TargetLowering::ZeroOrNegativeOneBooleanContent) - Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, SVT, Res, + Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res, DAG.getValueType(MVT::i1)); // Truncate to the final type. - return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); - } else { - // The SETCC result type is smaller than the vector element type. - // If the SetCC result is not sign-extended, chop it down to MVT::i1. - if (TLI.getBooleanContents() != - TargetLowering::ZeroOrNegativeOneBooleanContent) - Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Res); - // Sign extend to the final type. - return DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, Res); + return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res); } + + // The SETCC result type is smaller than the vector element type. + // If the SetCC result is not sign-extended, chop it down to MVT::i1. + if (TLI.getBooleanContents() != + TargetLowering::ZeroOrNegativeOneBooleanContent) + Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res); + // Sign extend to the final type. + return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res); } @@ -258,31 +270,32 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Scalarize node operand " << OpNo << ": "; + N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); if (Res.getNode() == 0) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "ScalarizeVectorOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to scalarize this operator's operand!"); - abort(); - + llvm_unreachable("Do not know how to scalarize this operator's operand!"); case ISD::BIT_CONVERT: - Res = ScalarizeVecOp_BIT_CONVERT(N); break; - + Res = ScalarizeVecOp_BIT_CONVERT(N); + break; case ISD::CONCAT_VECTORS: - Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; - + Res = ScalarizeVecOp_CONCAT_VECTORS(N); + break; case ISD::EXTRACT_VECTOR_ELT: - Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; - + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); + break; case ISD::STORE: - Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; + Res = ScalarizeVecOp_STORE(cast(N), OpNo); + break; } } @@ -323,7 +336,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { /// be scalarized, it must be <1 x ty>, so just return the element, ignoring the /// index. SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { - return GetScalarizedVector(N->getOperand(0)); + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), + Res); + return Res; } /// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be @@ -343,7 +360,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), - N->isVolatile(), N->getAlignment()); + N->isVolatile(), N->getOriginalAlignment()); } @@ -357,17 +374,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ /// legalization, we just know that (at least) one result needs vector /// splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Split node result: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Split node result: "; + N->dump(&DAG); + errs() << "\n"); SDValue Lo, Hi; switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "SplitVectorResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to split the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to split the result of this operator!"); case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; @@ -382,10 +401,16 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; - case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi);break; + case ISD::LOAD: + SplitVecRes_LOAD(cast(N), Lo, Hi); + break; + case ISD::SETCC: + case ISD::VSETCC: + SplitVecRes_SETCC(N, Lo, Hi); + break; case ISD::VECTOR_SHUFFLE: - SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); break; - case ISD::VSETCC: SplitVecRes_VSETCC(N, Lo, Hi); break; + SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); + break; case ISD::CTTZ: case ISD::CTLZ: @@ -403,8 +428,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::UINT_TO_FP: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + SplitVecRes_UnaryOp(N, Lo, Hi); + break; case ISD::ADD: case ISD::SUB: @@ -424,7 +454,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SRL: case ISD::UREM: case ISD::SREM: - case ISD::FREM: SplitVecRes_BinOp(N, Lo, Hi); break; + case ISD::FREM: + SplitVecRes_BinOp(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -448,12 +480,12 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi) { // We know the result is a vector. The input may be either a vector or a // scalar value. - MVT LoVT, HiVT; + EVT LoVT, HiVT; GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); DebugLoc dl = N->getDebugLoc(); SDValue InOp = N->getOperand(0); - MVT InVT = InOp.getValueType(); + EVT InVT = InOp.getValueType(); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { @@ -488,8 +520,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, } // In the general case, convert the input to an integer and split it by hand. - MVT LoIntVT = MVT::getIntegerVT(LoVT.getSizeInBits()); - MVT HiIntVT = MVT::getIntegerVT(HiVT.getSizeInBits()); + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); if (TLI.isBigEndian()) std::swap(LoIntVT, HiIntVT); @@ -503,7 +535,7 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); unsigned LoNumElts = LoVT.getVectorNumElements(); @@ -525,7 +557,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, return; } - MVT LoVT, HiVT; + EVT LoVT, HiVT; GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); SmallVector LoOps(N->op_begin(), N->op_begin()+NumSubvectors); @@ -537,7 +569,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); @@ -550,12 +582,11 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, // Split the input. SDValue VLo, VHi; - MVT InVT = N->getOperand(0).getValueType(); + EVT InVT = N->getOperand(0).getValueType(); switch (getTypeAction(InVT)) { - default: assert(0 && "Unexpected type action!"); + default: llvm_unreachable("Unexpected type action!"); case Legal: { - assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(0)); @@ -570,9 +601,8 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, // If the result needs to be split and the input needs to be widened, // the two types must have different lengths. Use the widened result // and extract from it to do the split. - assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); SDValue InOp = GetWidenedVector(N->getOperand(0)); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); @@ -595,14 +625,11 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - MVT IdxVT = Idx.getValueType(); + EVT IdxVT = Idx.getValueType(); DebugLoc dl = N->getDebugLoc(); - MVT LoVT, HiVT; + EVT LoVT, HiVT; GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - // The indices are not guaranteed to be a multiple of the new vector - // size unless the original vector type was split in two. - assert(LoVT == HiVT && "Non power-of-two vectors not supported!"); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx, @@ -639,8 +666,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, } // Spill the vector to the stack. - MVT VecVT = Vec.getValueType(); - MVT EltVT = VecVT.getVectorElementType(); + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0); @@ -648,7 +675,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // so use a truncating store. SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); unsigned Alignment = - TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForMVT()); + TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext())); Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT); // Load the Lo part from the stack slot. @@ -666,7 +693,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); @@ -676,7 +703,7 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = LD->getDebugLoc(); GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); @@ -686,11 +713,11 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); const Value *SV = LD->getSrcValue(); int SVOffset = LD->getSrcValueOffset(); - MVT MemoryVT = LD->getMemoryVT(); - unsigned Alignment = LD->getAlignment(); + EVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); - MVT LoMemVT, HiMemVT; + EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, @@ -700,7 +727,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; - Alignment = MinAlign(Alignment, IncrementSize); Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, SV, SVOffset, HiMemVT, isVolatile, Alignment); @@ -714,20 +740,43 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + DebugLoc DL = N->getDebugLoc(); + GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + + // Split the input. + EVT InVT = N->getOperand(0).getValueType(); + SDValue LL, LH, RL, RH; + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), + LoVT.getVectorNumElements()); + LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(0)); + LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + + RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), + DAG.getIntPtrConstant(0)); + RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); +} + void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi) { // Get the dest types - they may not match the input types, e.g. int_to_fp. - MVT LoVT, HiVT; + EVT LoVT, HiVT; DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); // Split the input. - MVT InVT = N->getOperand(0).getValueType(); + EVT InVT = N->getOperand(0).getValueType(); switch (getTypeAction(InVT)) { - default: assert(0 && "Unexpected type action!"); + default: llvm_unreachable("Unexpected type action!"); case Legal: { - assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(0)); @@ -742,9 +791,8 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, // If the result needs to be split and the input needs to be widened, // the two types must have different lengths. Use the widened result // and extract from it to do the split. - assert(LoVT == HiVT && "Legal non-power-of-two vector type?"); SDValue InOp = GetWidenedVector(N->getOperand(0)); - MVT InNVT = MVT::getVectorVT(InVT.getVectorElementType(), + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, DAG.getIntPtrConstant(0)); @@ -765,10 +813,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, DebugLoc dl = N->getDebugLoc(); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); - MVT NewVT = Inputs[0].getValueType(); + EVT NewVT = Inputs[0].getValueType(); unsigned NewElts = NewVT.getVectorNumElements(); - assert(NewVT == Inputs[1].getValueType() && - "Non power-of-two vectors not supported!"); // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the @@ -825,7 +871,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } if (useBuildVector) { - MVT EltVT = NewVT.getVectorElementType(); + EVT EltVT = NewVT.getVectorElementType(); SmallVector SVOps; // Extract the input elements by hand. @@ -868,20 +914,6 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } } -void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, - SDValue &Hi) { - MVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - - SDValue LL, LH, RL, RH; - GetSplitVector(N->getOperand(0), LL, LH); - GetSplitVector(N->getOperand(1), RL, RH); - - Lo = DAG.getNode(ISD::VSETCC, dl, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(ISD::VSETCC, dl, HiVT, LH, RH, N->getOperand(2)); -} - //===----------------------------------------------------------------------===// // Operand Vector Splitting @@ -892,24 +924,27 @@ void DAGTypeLegalizer::SplitVecRes_VSETCC(SDNode *N, SDValue &Lo, /// result types of the node are known to be legal, but other operands of the /// node may need legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(cerr << "Split node operand: "; N->dump(&DAG); cerr << "\n"); + DEBUG(errs() << "Split node operand: "; + N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); if (Res.getNode() == 0) { switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "SplitVectorOperand Op #" << OpNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to split this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to split this operator's operand!"); case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; - case ISD::STORE: Res = SplitVecOp_STORE(cast(N), - OpNo); break; + case ISD::STORE: + Res = SplitVecOp_STORE(cast(N), OpNo); + break; case ISD::CTTZ: case ISD::CTLZ: @@ -917,8 +952,13 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::UINT_TO_FP: Res = SplitVecOp_UnaryOp(N); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = SplitVecOp_UnaryOp(N); + break; } } @@ -939,15 +979,13 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. - MVT ResVT = N->getValueType(0); + EVT ResVT = N->getValueType(0); SDValue Lo, Hi; DebugLoc dl = N->getDebugLoc(); GetSplitVector(N->getOperand(0), Lo, Hi); - assert(Lo.getValueType() == Hi.getValueType() && - "Returns legal non-power-of-two vector type?"); - MVT InVT = Lo.getValueType(); + EVT InVT = Lo.getValueType(); - MVT OutVT = MVT::getVectorVT(ResVT.getVectorElementType(), + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), InVT.getVectorNumElements()); Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); @@ -975,7 +1013,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. For now, assume the index // is a constant. - MVT SubVT = N->getValueType(0); + EVT SubVT = N->getValueType(0); SDValue Idx = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); SDValue Lo, Hi; @@ -997,7 +1035,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - MVT VecVT = Vec.getValueType(); + EVT VecVT = Vec.getValueType(); if (isa(Idx)) { uint64_t IdxVal = cast(Idx)->getZExtValue(); @@ -1010,14 +1048,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { if (IdxVal < LoElts) return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); - else - return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, - DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())); + return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())); } // Store the vector to the stack. - MVT EltVT = VecVT.getVectorElementType(); + EVT EltVT = VecVT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); int SPFI = cast(StackPtr.getNode())->getIndex(); @@ -1026,7 +1063,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getLoad(EltVT, dl, Store, StackPtr, SV, 0); + return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + SV, 0, EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1038,13 +1076,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); int SVOffset = N->getSrcValueOffset(); - MVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getAlignment(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); - MVT LoMemVT, HiMemVT; + EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; @@ -1059,15 +1097,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); + SVOffset += IncrementSize; if (isTruncating) - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, - N->getSrcValue(), SVOffset+IncrementSize, - HiMemVT, - isVol, MinAlign(Alignment, IncrementSize)); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, + HiMemVT, isVol, Alignment); else - Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset+IncrementSize, - isVol, MinAlign(Alignment, IncrementSize)); + Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset, + isVol, Alignment); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -1078,18 +1115,19 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Widen node result " << ResNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Widen node result " << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "WidenVectorResult #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to widen the result of this operator!"); - abort(); + llvm_unreachable("Do not know how to widen the result of this operator!"); case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; @@ -1102,9 +1140,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; - case ISD::VECTOR_SHUFFLE: - Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); break; - case ISD::VSETCC: Res = WidenVecRes_VSETCC(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); + break; + case ISD::VSETCC: + Res = WidenVecRes_VSETCC(N); + break; case ISD::ADD: case ISD::AND: @@ -1126,21 +1167,27 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIV: case ISD::UREM: case ISD::SUB: - case ISD::XOR: Res = WidenVecRes_Binary(N); break; + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; case ISD::SHL: case ISD::SRA: - case ISD::SRL: Res = WidenVecRes_Shift(N); break; + case ISD::SRL: + Res = WidenVecRes_Shift(N); + break; - case ISD::ANY_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: - case ISD::UINT_TO_FP: Res = WidenVecRes_Convert(N); break; + case ISD::ANY_EXTEND: + Res = WidenVecRes_Convert(N); + break; case ISD::CTLZ: case ISD::CTPOP: @@ -1149,7 +1196,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOS: case ISD::FNEG: case ISD::FSIN: - case ISD::FSQRT: Res = WidenVecRes_Unary(N); break; + case ISD::FSQRT: + Res = WidenVecRes_Unary(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1159,7 +1208,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2); @@ -1169,12 +1218,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); DebugLoc dl = N->getDebugLoc(); - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); - MVT InVT = InOp.getValueType(); - MVT InEltVT = InVT.getVectorElementType(); - MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); unsigned Opcode = N->getOpcode(); unsigned InVTNumElts = InVT.getVectorNumElements(); @@ -1216,7 +1265,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. SmallVector Ops(WidenNumElts); - MVT EltVT = WidenVT.getVectorElementType(); + EVT EltVT = WidenVT.getVectorElementType(); unsigned MinElts = std::min(InVTNumElts, WidenNumElts); unsigned i; for (i=0; i < MinElts; ++i) @@ -1232,16 +1281,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); - MVT ShVT = ShOp.getValueType(); + EVT ShVT = ShOp.getValueType(); if (getTypeAction(ShVT) == WidenVector) { ShOp = GetWidenedVector(ShOp); ShVT = ShOp.getValueType(); } - MVT ShWidenVT = MVT::getVectorVT(ShVT.getVectorElementType(), + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), ShVT.getVectorElementType(), WidenVT.getVectorNumElements()); if (ShVT != ShWidenVT) ShOp = ModifyToType(ShOp, ShWidenVT); @@ -1251,16 +1300,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); } SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { SDValue InOp = N->getOperand(0); - MVT InVT = InOp.getValueType(); - MVT VT = N->getValueType(0); - MVT WidenVT = TLI.getTypeToTransformTo(VT); + EVT InVT = InOp.getValueType(); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); DebugLoc dl = N->getDebugLoc(); switch (getTypeAction(InVT)) { @@ -1300,13 +1349,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { // Determine new input vector type. The new input vector type will use // the same element type (if its a vector) or use the input type as a // vector. It is the same size as the type to widen to. - MVT NewInVT; + EVT NewInVT; unsigned NewNumElts = WidenSize / InSize; if (InVT.isVector()) { - MVT InEltVT = InVT.getVectorElementType(); - NewInVT= MVT::getVectorVT(InEltVT, WidenSize / InEltVT.getSizeInBits()); + EVT InEltVT = InVT.getVectorElementType(); + NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); } else { - NewInVT = MVT::getVectorVT(InVT, NewNumElts); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } if (TLI.isTypeLegal(NewInVT)) { @@ -1332,28 +1381,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { } } - // This should occur rarely. Lower the bit-convert to a store/load - // from the stack. Create the stack frame object. Make sure it is aligned - // for both the source and destination types. - SDValue FIPtr = DAG.CreateStackTemporary(InVT, WidenVT); - int FI = cast(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); - - // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); - - // Result is a load from the stack slot. - return DAG.getLoad(WidenVT, dl, Store, FIPtr, SV, 0); + return CreateStackStoreLoad(InOp, WidenVT); } SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { DebugLoc dl = N->getDebugLoc(); // Build a vector with undefined for the new nodes. - MVT VT = N->getValueType(0); - MVT EltVT = VT.getVectorElementType(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); - MVT WidenVT = TLI.getTypeToTransformTo(VT); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector NewOps(N->op_begin(), N->op_end()); @@ -1365,8 +1403,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { - MVT InVT = N->getOperand(0).getValueType(); - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT InVT = N->getOperand(0).getValueType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); DebugLoc dl = N->getDebugLoc(); unsigned WidenNumElts = WidenVT.getVectorNumElements(); unsigned NumOperands = N->getNumOperands(); @@ -1387,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { } } else { InputWidened = true; - if (WidenVT == TLI.getTypeToTransformTo(InVT)) { + if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { // The inputs and the result are widen to the same value. unsigned i; for (i=1; i < NumOperands; ++i) @@ -1406,7 +1444,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { MaskOps[i] = i; MaskOps[i+WidenNumElts/2] = i+WidenNumElts; } - return DAG.getVectorShuffle(WidenVT, dl, + return DAG.getVectorShuffle(WidenVT, dl, GetWidenedVector(N->getOperand(0)), GetWidenedVector(N->getOperand(1)), &MaskOps[0]); @@ -1415,7 +1453,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { } // Fall back to use extracts and build vector. - MVT EltVT = WidenVT.getVectorElementType(); + EVT EltVT = WidenVT.getVectorElementType(); unsigned NumInElts = InVT.getVectorNumElements(); SmallVector Ops(WidenNumElts); unsigned Idx = 0; @@ -1439,12 +1477,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); - MVT InVT = InOp.getValueType(); - MVT InEltVT = InVT.getVectorElementType(); - MVT InWidenVT = MVT::getVectorVT(InEltVT, WidenNumElts); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); SDValue DTyOp = DAG.getValueType(WidenVT); SDValue STyOp = DAG.getValueType(InWidenVT); @@ -1491,7 +1529,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. SmallVector Ops(WidenNumElts); - MVT EltVT = WidenVT.getVectorElementType(); + EVT EltVT = WidenVT.getVectorElementType(); DTyOp = DAG.getValueType(EltVT); STyOp = DAG.getValueType(InEltVT); @@ -1512,8 +1550,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - MVT VT = N->getValueType(0); - MVT WidenVT = TLI.getTypeToTransformTo(VT); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); @@ -1522,7 +1560,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { if (getTypeAction(InOp.getValueType()) == WidenVector) InOp = GetWidenedVector(InOp); - MVT InVT = InOp.getValueType(); + EVT InVT = InOp.getValueType(); ConstantSDNode *CIdx = dyn_cast(Idx); if (CIdx) { @@ -1540,8 +1578,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector Ops(WidenNumElts); - MVT EltVT = VT.getVectorElementType(); - MVT IdxVT = Idx.getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = Idx.getValueType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; if (CIdx) { @@ -1573,8 +1611,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast(N); - MVT WidenVT = TLI.getTypeToTransformTo(LD->getValueType(0)); - MVT LdVT = LD->getMemoryVT(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); DebugLoc dl = N->getDebugLoc(); assert(LdVT.isVector() && WidenVT.isVector()); @@ -1593,8 +1631,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { // For extension loads, we can not play the tricks of chopping legal // vector types and bit cast it to the right type. Instead, we unroll // the load and build a vector. - MVT EltVT = WidenVT.getVectorElementType(); - MVT LdEltVT = LdVT.getVectorElementType(); + EVT EltVT = WidenVT.getVectorElementType(); + EVT LdEltVT = LdVT.getVectorElementType(); unsigned NumElts = LdVT.getVectorNumElements(); // Load each element and widen @@ -1638,26 +1676,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { // Modified the chain - switch anything that used the old chain to use // the new one. - ReplaceValueWith(SDValue(N, 1), Chain); + ReplaceValueWith(SDValue(N, 1), NewChain); return Result; } SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), WidenVT, N->getOperand(0)); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue Cond1 = N->getOperand(0); - MVT CondVT = Cond1.getValueType(); + EVT CondVT = Cond1.getValueType(); if (CondVT.isVector()) { - MVT CondEltVT = CondVT.getVectorElementType(); - MVT CondWidenVT = MVT::getVectorVT(CondEltVT, WidenNumElts); + EVT CondEltVT = CondVT.getVectorElementType(); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenNumElts); if (getTypeAction(CondVT) == WidenVector) Cond1 = GetWidenedVector(Cond1); @@ -1681,15 +1719,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getUNDEF(WidenVT); } SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - MVT WidenVT = TLI.getTypeToTransformTo(VT); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned NumElts = VT.getVectorNumElements(); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1711,13 +1749,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { - MVT WidenVT = TLI.getTypeToTransformTo(N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp1 = N->getOperand(0); - MVT InVT = InOp1.getValueType(); + EVT InVT = InOp1.getValueType(); assert(InVT.isVector() && "can not widen non vector type"); - MVT WidenInVT = MVT::getVectorVT(InVT.getVectorElementType(), WidenNumElts); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenNumElts); InOp1 = GetWidenedVector(InOp1); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -1735,18 +1773,19 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { // Widen Vector Operand //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { - DEBUG(cerr << "Widen node operand " << ResNo << ": "; N->dump(&DAG); - cerr << "\n"); + DEBUG(errs() << "Widen node operand " << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG - cerr << "WidenVectorOperand op #" << ResNo << ": "; - N->dump(&DAG); cerr << "\n"; + errs() << "WidenVectorOperand op #" << ResNo << ": "; + N->dump(&DAG); + errs() << "\n"; #endif - assert(0 && "Do not know how to widen this operator's operand!"); - abort(); + llvm_unreachable("Do not know how to widen this operator's operand!"); case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; @@ -1757,8 +1796,13 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::UINT_TO_FP: Res = WidenVecOp_Convert(N); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = WidenVecOp_Convert(N); + break; } // If Res is null, the sub-method took care of registering the result. @@ -1781,15 +1825,15 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert // into some scalar code and create a nasty build vector. - MVT VT = N->getValueType(0); - MVT EltVT = VT.getVectorElementType(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); if (getTypeAction(InOp.getValueType()) == WidenVector) InOp = GetWidenedVector(InOp); - MVT InVT = InOp.getValueType(); - MVT InEltVT = InVT.getVectorElementType(); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); unsigned Opcode = N->getOpcode(); SmallVector Ops(NumElts); @@ -1802,9 +1846,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); - MVT InWidenVT = InOp.getValueType(); + EVT InWidenVT = InOp.getValueType(); DebugLoc dl = N->getDebugLoc(); // Check if we can convert between two legal vector types and extract. @@ -1812,7 +1856,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { unsigned Size = VT.getSizeInBits(); if (InWidenSize % Size == 0 && !VT.isVector()) { unsigned NewNumElts = InWidenSize / Size; - MVT NewVT = MVT::getVectorVT(VT, NewNumElts); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, @@ -1820,31 +1864,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { } } - // Lower the bit-convert to a store/load from the stack. Create the stack - // frame object. Make sure it is aligned for both the source and destination - // types. - SDValue FIPtr = DAG.CreateStackTemporary(InWidenVT, VT); - int FI = cast(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); - - // Emit a store to the stack slot. - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, FIPtr, SV, 0); - - // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, Store, FIPtr, SV, 0); + return CreateStackStoreLoad(InOp, VT); } SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { // If the input vector is not legal, it is likely that we will not find a // legal vector of the same size. Replace the concatenate vector with a // nasty build vector. - MVT VT = N->getValueType(0); - MVT EltVT = VT.getVectorElementType(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = N->getDebugLoc(); unsigned NumElts = VT.getVectorNumElements(); SmallVector Ops(NumElts); - MVT InVT = N->getOperand(0).getValueType(); + EVT InVT = N->getOperand(0).getValueType(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned Idx = 0; @@ -1862,9 +1895,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - MVT EltVT = InOp.getValueType().getVectorElementType(); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), - EltVT, InOp, N->getOperand(1)); + N->getValueType(0), InOp, N->getOperand(1)); } SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { @@ -1880,8 +1912,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { SDValue ValOp = GetWidenedVector(ST->getValue()); DebugLoc dl = N->getDebugLoc(); - MVT StVT = ST->getMemoryVT(); - MVT ValVT = ValOp.getValueType(); + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ValOp.getValueType(); // It must be true that we the widen vector type is bigger than where // we need to store. assert(StVT.isVector() && ValOp.getValueType().isVector()); @@ -1892,8 +1924,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // For truncating stores, we can not play the tricks of chopping legal // vector types and bit cast it to the right type. Instead, we unroll // the store. - MVT StEltVT = StVT.getVectorElementType(); - MVT ValEltVT = ValVT.getVectorElementType(); + EVT StEltVT = StVT.getVectorElementType(); + EVT ValEltVT = ValVT.getVectorElementType(); unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, @@ -1938,9 +1970,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // VecVT: Vector value type whose size we must match. // Returns NewVecVT and NewEltVT - the vector type and its associated // element type. -static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, - MVT VecVT, - MVT& NewEltVT, MVT& NewVecVT) { +static void FindAssocWidenVecType(SelectionDAG& DAG, + const TargetLowering &TLI, unsigned Width, + EVT VecVT, + EVT& NewEltVT, EVT& NewVecVT) { unsigned EltWidth = Width + 1; if (TLI.isTypeLegal(VecVT)) { // We start with the preferred with, making it a power of 2 and find a @@ -1950,9 +1983,9 @@ static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, do { assert(EltWidth > 0); EltWidth = 1 << Log2_32(EltWidth - 1); - NewEltVT = MVT::getIntegerVT(EltWidth); + NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = VecVT.getSizeInBits() / EltWidth; - NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts); } while (!TLI.isTypeLegal(NewVecVT) || VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); } else { @@ -1965,9 +1998,9 @@ static void FindAssocWidenVecType(const TargetLowering &TLI, unsigned Width, do { assert(EltWidth > 0); EltWidth = 1 << Log2_32(EltWidth - 1); - NewEltVT = MVT::getIntegerVT(EltWidth); + NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); unsigned NumElts = VecVT.getSizeInBits() / EltWidth; - NewVecVT = MVT::getVectorVT(NewEltVT, NumElts); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts); } while (!TLI.isTypeLegal(NewEltVT) || VecVT.getSizeInBits() != NewVecVT.getSizeInBits()); } @@ -1981,7 +2014,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector& LdChain, unsigned Alignment, bool isVolatile, unsigned LdWidth, - MVT ResType, + EVT ResType, DebugLoc dl) { // The strategy assumes that we can efficiently load powers of two widths. // The routines chops the vector into the largest power of 2 load and @@ -1992,9 +2025,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector& LdChain, // the load is nonvolatile, we an use a wider load for the value. // Find the vector type that can load from. - MVT NewEltVT, NewVecVT; + EVT NewEltVT, NewVecVT; unsigned NewEltVTWidth; - FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT); NewEltVTWidth = NewEltVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset, @@ -2021,7 +2054,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector& LdChain, // Our current type we are using is too large, use a smaller size by // using a smaller power of 2 unsigned oNewEltVTWidth = NewEltVTWidth; - FindAssocWidenVecType(TLI, LdWidth, ResType, NewEltVT, NewVecVT); + FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT); NewEltVTWidth = NewEltVT.getSizeInBits(); // Readjust position and vector position based on new load type Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); @@ -2056,10 +2089,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, // want to store. This avoids requiring a stack convert. // Find a width of the element type we can store with - MVT WidenVT = ValOp.getValueType(); - MVT NewEltVT, NewVecVT; + EVT WidenVT = ValOp.getValueType(); + EVT NewEltVT, NewVecVT; - FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT); unsigned NewEltVTWidth = NewEltVT.getSizeInBits(); SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp); @@ -2088,7 +2121,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, // Our current type we are using is too large, use a smaller size by // using a smaller power of 2 unsigned oNewEltVTWidth = NewEltVTWidth; - FindAssocWidenVecType(TLI, StWidth, WidenVT, NewEltVT, NewVecVT); + FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT); NewEltVTWidth = NewEltVT.getSizeInBits(); // Readjust position and vector position based on new load type Idx = Idx * (oNewEltVTWidth/NewEltVTWidth); @@ -2106,10 +2139,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. -SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) { +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { // Note that InOp might have been widened so it might already have // the right width or it might need be narrowed. - MVT InVT = InOp.getValueType(); + EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); DebugLoc dl = InOp.getDebugLoc(); @@ -2137,7 +2170,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, MVT NVT) { // Fall back to extract and build. SmallVector Ops(WidenNumElts); - MVT EltVT = NVT.getVectorElementType(); + EVT EltVT = NVT.getVectorElementType(); unsigned MinNumElts = std::min(WidenNumElts, InNumElts); unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index af73b28fae934..e0f93d85c751d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -24,6 +24,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumUnfolds, "Number of nodes unfolded"); @@ -108,14 +110,14 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGFast::Schedule() { - DOUT << "********** List Scheduling **********\n"; + DEBUG(errs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. - BuildSchedGraph(); + BuildSchedGraph(NULL); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -132,17 +134,17 @@ void ScheduleDAGFast::Schedule() { /// the AvailableQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); - --PredSU->NumSuccsLeft; - + #ifndef NDEBUG - if (PredSU->NumSuccsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (PredSU->NumSuccsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --PredSU->NumSuccsLeft; + // If all the node's successors are scheduled, this node is ready // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { @@ -174,7 +176,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); @@ -214,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT == MVT::Flag) return NULL; else if (VT == MVT::Other) @@ -222,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - MVT VT = Op.getNode()->getValueType(Op.getResNo()); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Flag) return NULL; } @@ -232,7 +234,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -342,7 +344,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = Clone(SU); // New SUnit has the exact same predecessors. @@ -419,7 +421,7 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); @@ -533,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); @@ -549,16 +551,16 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DOUT << "Adding an edge from SU # " << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"; + DEBUG(errs() << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); NewDef = Copies.back(); } - DOUT << "Adding an edge from SU # " << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"; + DEBUG(errs() << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -568,8 +570,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { } if (!CurSU) { - assert(false && "Unable to resolve live physical register dependencies!"); - abort(); + llvm_unreachable("Unable to resolve live physical register dependencies!"); } } @@ -587,41 +588,11 @@ void ScheduleDAGFast::ListScheduleBottomUp() { ++CurCycle; } - // Reverse the order if it is bottom up. + // Reverse the order since it is bottom up. std::reverse(Sequence.begin(), Sequence.end()); - - + #ifndef NDEBUG - // Verify that all SUnits were scheduled. - bool AnyNotSched = false; - unsigned DeadNodes = 0; - unsigned Noops = 0; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - if (!SUnits[i].isScheduled) { - if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { - ++DeadNodes; - continue; - } - if (!AnyNotSched) - cerr << "*** List scheduling failed! ***\n"; - SUnits[i].dump(this); - cerr << "has not been scheduled!\n"; - AnyNotSched = true; - } - if (SUnits[i].NumSuccsLeft != 0) { - if (!AnyNotSched) - cerr << "*** List scheduling failed! ***\n"; - SUnits[i].dump(this); - cerr << "has successors left!\n"; - AnyNotSched = true; - } - } - for (unsigned i = 0, e = Sequence.size(); i != e; ++i) - if (!Sequence[i]) - ++Noops; - assert(!AnyNotSched); - assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && - "The number of nodes scheduled doesn't match the expected number!"); + VerifySchedule(/*isBottomUp=*/true); #endif } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index c4325349990d3..c8d21584616a7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -29,6 +29,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/Statistic.h" #include @@ -86,10 +88,10 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGList::Schedule() { - DOUT << "********** List Scheduling **********\n"; + DEBUG(errs() << "********** List Scheduling **********\n"); // Build the scheduling graph. - BuildSchedGraph(); + BuildSchedGraph(NULL); AvailableQueue->initNodes(SUnits); @@ -106,17 +108,17 @@ void ScheduleDAGList::Schedule() { /// the PendingQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { SUnit *SuccSU = D.getSUnit(); - --SuccSU->NumPredsLeft; - + #ifndef NDEBUG - if (SuccSU->NumPredsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (SuccSU->NumPredsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --SuccSU->NumPredsLeft; + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); // If all the node's predecessors are scheduled, this node is ready @@ -140,7 +142,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); @@ -232,7 +234,7 @@ void ScheduleDAGList::ListScheduleTopDown() { } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. - DOUT << "*** Advancing cycle, no work to do\n"; + DEBUG(errs() << "*** Advancing cycle, no work to do\n"); HazardRec->AdvanceCycle(); ++NumStalls; ++CurCycle; @@ -240,7 +242,7 @@ void ScheduleDAGList::ListScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DOUT << "*** Emitting noop\n"; + DEBUG(errs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); Sequence.push_back(0); // NULL here means noop ++NumNoops; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index c97e2a8c86bf7..cec24e606f99d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -25,10 +25,12 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -163,14 +165,14 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DOUT << "********** List Scheduling **********\n"; + DEBUG(errs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. - BuildSchedGraph(); + BuildSchedGraph(NULL); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -195,17 +197,17 @@ void ScheduleDAGRRList::Schedule() { /// the AvailableQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); - --PredSU->NumSuccsLeft; - + #ifndef NDEBUG - if (PredSU->NumSuccsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (PredSU->NumSuccsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --PredSU->NumSuccsLeft; + // If all the node's successors are scheduled, this node is ready // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { @@ -237,7 +239,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); @@ -276,13 +278,14 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { AvailableQueue->remove(PredSU); } + assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); ++PredSU->NumSuccsLeft; } /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { - DOUT << "*** Unscheduling [" << SU->getHeight() << "]: "; + DEBUG(errs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); AvailableQueue->UnscheduledNode(SU); @@ -351,7 +354,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT == MVT::Flag) return NULL; else if (VT == MVT::Other) @@ -359,7 +362,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - MVT VT = Op.getNode()->getValueType(Op.getResNo()); + EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Flag) return NULL; } @@ -369,7 +372,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; - DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -488,7 +491,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. @@ -570,7 +573,7 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!"); @@ -753,7 +756,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); @@ -769,8 +772,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DOUT << "Adding an edge from SU #" << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"; + DEBUG(errs() << "Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, @@ -778,8 +781,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { NewDef = Copies.back(); } - DOUT << "Adding an edge from SU #" << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"; + DEBUG(errs() << "Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, @@ -822,17 +825,17 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { /// the AvailableQueue if the count reaches zero. Also update its cycle bound. void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); - --SuccSU->NumPredsLeft; - + #ifndef NDEBUG - if (SuccSU->NumPredsLeft < 0) { - cerr << "*** Scheduling failed! ***\n"; + if (SuccSU->NumPredsLeft == 0) { + errs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); - cerr << " has been released too many times!\n"; - assert(0); + errs() << " has been released too many times!\n"; + llvm_unreachable(0); } #endif - + --SuccSU->NumPredsLeft; + // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { @@ -856,7 +859,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DOUT << "*** Scheduling [" << CurCycle << "]: "; + DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); @@ -1215,7 +1218,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!SUImpDefs) return false; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT == MVT::Flag || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) @@ -1328,9 +1331,9 @@ void RegReductionPriorityQueue::PrescheduleNodesWithMultipleUses() { // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DOUT << "Prescheduling SU # " << SU->NodeNum - << " next to PredSU # " << PredSU->NodeNum - << " to guide scheduling in the presence of multiple uses\n"; + DEBUG(errs() << "Prescheduling SU # " << SU->NodeNum + << " next to PredSU # " << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { SDep Edge = PredSU->Succs[i]; assert(!Edge.isAssignedRegDep()); @@ -1418,8 +1421,8 @@ void RegReductionPriorityQueue::AddPseudoTwoAddrDeps() { (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || (!SU->isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, SU)) { - DOUT << "Adding a pseudo-two-addr edge from SU # " << SU->NodeNum - << " to SU #" << SuccSU->NodeNum << "\n"; + DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # " + << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 7aa15bcc6862e..d53de347a5566 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -14,10 +14,12 @@ #define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -152,6 +154,11 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { } void ScheduleDAGSDNodes::AddSchedEdges() { + const TargetSubtarget &ST = TM.getSubtarget(); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = ForceUnitLatencies(); + // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { SUnit *SU = &SUnits[su]; @@ -175,7 +182,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).getImplicitDefs()) { SU->hasPhysRegClobbers = true; - unsigned NumUsed = CountResults(N); + unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) --NumUsed; // Skip over unused values at the end. if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) @@ -189,7 +196,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { assert(OpSU && "Node has no SUnit!"); if (OpSU == SU) continue; // In the same group. - MVT OpVT = N->getOperand(i).getValueType(); + EVT OpVT = N->getOperand(i).getValueType(); assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!"); bool isChain = OpVT == MVT::Other; @@ -206,8 +213,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // dependency. This may change in the future though. if (Cost >= 0) PhysReg = 0; - SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpSU->Latency, PhysReg)); + + const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpSU->Latency, PhysReg); + if (!isChain && !UnitLatencies) { + ComputeOperandLatency(OpSU, SU, (SDep &)dep); + ST.adjustSchedDependency(OpSU, SU, (SDep &)dep); + } + + SU->addPred(dep); } } } @@ -217,7 +231,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. -void ScheduleDAGSDNodes::BuildSchedGraph() { +void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { // Populate the SUnits array. BuildSchedUnits(); // Compute all the scheduling dependencies between nodes. @@ -230,65 +244,68 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { // Compute the latency for the node. We use the sum of the latencies for // all nodes flagged together into this SUnit. SU->Latency = 0; - bool SawMachineOpcode = false; for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) if (N->isMachineOpcode()) { - SawMachineOpcode = true; - SU->Latency += - InstrItins.getLatency(TII->get(N->getMachineOpcode()).getSchedClass()); + SU->Latency += InstrItins. + getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); } } -/// CountResults - The results of target nodes have register or immediate -/// operands first, then an optional chain, and optional flag operands (which do -/// not go into the resulting MachineInstr). -unsigned ScheduleDAGSDNodes::CountResults(SDNode *Node) { - unsigned N = Node->getNumValues(); - while (N && Node->getValueType(N - 1) == MVT::Flag) - --N; - if (N && Node->getValueType(N - 1) == MVT::Other) - --N; // Skip over chain result. - return N; -} - -/// CountOperands - The inputs to target nodes have any actual inputs first, -/// followed by special operands that describe memory references, then an -/// optional chain operand, then an optional flag operand. Compute the number -/// of actual operands that will go into the resulting MachineInstr. -unsigned ScheduleDAGSDNodes::CountOperands(SDNode *Node) { - unsigned N = ComputeMemOperandsEnd(Node); - while (N && isa(Node->getOperand(N - 1).getNode())) - --N; // Ignore MEMOPERAND nodes - return N; -} - -/// ComputeMemOperandsEnd - Find the index one past the last MemOperandSDNode -/// operand -unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) { - unsigned N = Node->getNumOperands(); - while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag) - --N; - if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) - --N; // Ignore chain if it exists. - return N; -} - - void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { if (!SU->getNode()) { - cerr << "PHYS REG COPY\n"; + errs() << "PHYS REG COPY\n"; return; } SU->getNode()->dump(DAG); - cerr << "\n"; + errs() << "\n"; SmallVector FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { - cerr << " "; + errs() << " "; FlaggedNodes.back()->dump(DAG); - cerr << "\n"; + errs() << "\n"; FlaggedNodes.pop_back(); } } + +/// EmitSchedule - Emit the machine code in scheduled order. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(DenseMap *EM) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap VRBaseMap; + DenseMap CopyVRBaseMap; + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + EmitNoop(); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any flagged SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap); + continue; + } + + SmallVector FlaggedNodes; + for (SDNode *N = SU->getNode()->getFlaggedNode(); N; + N = N->getFlaggedNode()) + FlaggedNodes.push_back(N); + while (!FlaggedNodes.empty()) { + Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap, EM); + FlaggedNodes.pop_back(); + } + Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap, EM); + } + + BB = Emitter.getBlock(); + InsertPos = Emitter.getInsertPos(); + return BB; +} diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 2a278b749a8c4..c9c36f7e42e7a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -58,7 +58,6 @@ namespace llvm { if (isa(Node)) return true; if (isa(Node)) return true; if (isa(Node)) return true; - if (isa(Node)) return true; if (Node->getOpcode() == ISD::EntryToken) return true; return false; } @@ -87,35 +86,14 @@ namespace llvm { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - virtual void BuildSchedGraph(); + virtual void BuildSchedGraph(AliasAnalysis *AA); /// ComputeLatency - Compute node latency. /// virtual void ComputeLatency(SUnit *SU); - /// CountResults - The results of target nodes have register or immediate - /// operands first, then an optional chain, and optional flag operands - /// (which do not go into the machine instrs.) - static unsigned CountResults(SDNode *Node); - - /// CountOperands - The inputs to target nodes have any actual inputs first, - /// followed by special operands that describe memory references, then an - /// optional chain operand, then flag operands. Compute the number of - /// actual operands that will go into the resulting MachineInstr. - static unsigned CountOperands(SDNode *Node); - - /// ComputeMemOperandsEnd - Find the index one past the last - /// MemOperandSDNode operand - static unsigned ComputeMemOperandsEnd(SDNode *Node); - - /// EmitNode - Generate machine code for an node and needed dependencies. - /// VRBaseMap contains, for each already emitted node, the first virtual - /// register number for the results of the node. - /// - void EmitNode(SDNode *Node, bool IsClone, bool HasClone, - DenseMap &VRBaseMap); - - virtual MachineBasicBlock *EmitSchedule(); + virtual MachineBasicBlock * + EmitSchedule(DenseMap *EM); /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. @@ -129,47 +107,6 @@ namespace llvm { virtual void getCustomGraphFeatures(GraphWriter &GW) const; private: - /// EmitSubregNode - Generate machine code for subreg nodes. - /// - void EmitSubregNode(SDNode *Node, - DenseMap &VRBaseMap); - - /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS - /// nodes. - /// - void EmitCopyToRegClassNode(SDNode *Node, - DenseMap &VRBaseMap); - - /// getVR - Return the virtual register corresponding to the specified result - /// of the specified node. - unsigned getVR(SDValue Op, DenseMap &VRBaseMap); - - /// getDstOfCopyToRegUse - If the only use of the specified result number of - /// node is a CopyToReg, return its destination register. Return 0 otherwise. - unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const; - - void AddOperand(MachineInstr *MI, SDValue Op, unsigned IIOpNum, - const TargetInstrDesc *II, - DenseMap &VRBaseMap); - - /// AddRegisterOperand - Add the specified register as an operand to the - /// specified machine instr. Insert register copies if the register is - /// not in the required register class. - void AddRegisterOperand(MachineInstr *MI, SDValue Op, - unsigned IIOpNum, const TargetInstrDesc *II, - DenseMap &VRBaseMap); - - /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an - /// implicit physical register output. - void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, - bool IsCloned, unsigned SrcReg, - DenseMap &VRBaseMap); - - void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, - const TargetInstrDesc &II, bool IsClone, - bool IsCloned, - DenseMap &VRBaseMap); - /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); void AddSchedEdges(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c8f4b520ff18e..542bf647eb0fe 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Constants.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" @@ -31,6 +32,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -46,14 +48,14 @@ using namespace llvm; /// makeVTList - Return an instance of the SDVTList struct initialized with the /// specified members. -static SDVTList makeVTList(const MVT *VTs, unsigned NumVTs) { +static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { SDVTList Res = {VTs, NumVTs}; return Res; } -static const fltSemantics *MVTToAPFloatSemantics(MVT VT) { - switch (VT.getSimpleVT()) { - default: assert(0 && "Unknown FP format"); +static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP format"); case MVT::f32: return &APFloat::IEEEsingle; case MVT::f64: return &APFloat::IEEEdouble; case MVT::f80: return &APFloat::x87DoubleExtended; @@ -76,7 +78,7 @@ bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { return getValueAPF().bitwiseIsEqual(V); } -bool ConstantFPSDNode::isValueValidForType(MVT VT, +bool ConstantFPSDNode::isValueValidForType(EVT VT, const APFloat& Val) { assert(VT.isFloatingPoint() && "Can only convert between FP types"); @@ -88,7 +90,7 @@ bool ConstantFPSDNode::isValueValidForType(MVT VT, // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; - (void) Val2.convert(*MVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &losesInfo); return !losesInfo; } @@ -243,7 +245,7 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { /// if the operation does not depend on the sign of the input (setne and seteq). static int isSignedOp(ISD::CondCode Opcode) { switch (Opcode) { - default: assert(0 && "Illegal integer setcc operation!"); + default: llvm_unreachable("Illegal integer setcc operation!"); case ISD::SETEQ: case ISD::SETNE: return 0; case ISD::SETLT: @@ -363,11 +365,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { switch (N->getOpcode()) { case ISD::TargetExternalSymbol: case ISD::ExternalSymbol: - assert(0 && "Should only be used on nodes with operands"); + llvm_unreachable("Should only be used on nodes with operands"); default: break; // Normal nodes don't need extra info. - case ISD::ARG_FLAGS: - ID.AddInteger(cast(N)->getArgFlags().getRawBits()); - break; case ISD::TargetConstant: case ISD::Constant: ID.AddPointer(cast(N)->getConstantIntValue()); @@ -403,11 +402,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::SRCVALUE: ID.AddPointer(cast(N)->getValue()); break; - case ISD::MEMOPERAND: { - const MachineMemOperand &MO = cast(N)->MO; - MO.Profile(ID); - break; - } case ISD::FrameIndex: case ISD::TargetFrameIndex: ID.AddInteger(cast(N)->getIndex()); @@ -429,12 +423,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(CP->getTargetFlags()); break; } - case ISD::CALL: { - const CallSDNode *Call = cast(N); - ID.AddInteger(Call->getCallingConv()); - ID.AddInteger(Call->isVarArg()); - break; - } case ISD::LOAD: { const LoadSDNode *LD = cast(N); ID.AddInteger(LD->getMemoryVT().getRawBits()); @@ -466,7 +454,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } case ISD::VECTOR_SHUFFLE: { const ShuffleVectorSDNode *SVN = cast(N); - for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); i != e; ++i) ID.AddInteger(SVN->getMaskElt(i)); break; @@ -488,20 +476,18 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { } /// encodeMemSDNodeFlags - Generic routine for computing a value for use in -/// the CSE map that carries alignment, volatility, indexing mode, and +/// the CSE map that carries volatility, indexing mode, and /// extension/truncation information. /// static inline unsigned -encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, - bool isVolatile, unsigned Alignment) { +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) { assert((ConvType & 3) == ConvType && "ConvType may not require more than 2 bits!"); assert((AM & 7) == AM && "AM may not require more than 3 bits!"); return ConvType | (AM << 2) | - (isVolatile << 5) | - ((Log2_32(Alignment) + 1) << 6); + (isVolatile << 5); } //===----------------------------------------------------------------------===// @@ -519,7 +505,6 @@ static bool doNotCSE(SDNode *N) { case ISD::DBG_LABEL: case ISD::DBG_STOPPOINT: case ISD::EH_LABEL: - case ISD::DECLARE: return true; // Never CSE these nodes. } @@ -626,7 +611,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { bool Erased = false; switch (N->getOpcode()) { case ISD::EntryToken: - assert(0 && "EntryToken should not be in CSEMaps!"); + llvm_unreachable("EntryToken should not be in CSEMaps!"); return false; case ISD::HANDLENODE: return false; // noop. case ISD::CONDCODE: @@ -646,12 +631,12 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { break; } case ISD::VALUETYPE: { - MVT VT = cast(N)->getVT(); + EVT VT = cast(N)->getVT(); if (VT.isExtended()) { Erased = ExtendedValueTypeNodes.erase(VT); } else { - Erased = ValueTypeNodes[VT.getSimpleVT()] != 0; - ValueTypeNodes[VT.getSimpleVT()] = 0; + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0; } break; } @@ -667,8 +652,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag && !N->isMachineOpcode() && !doNotCSE(N)) { N->dump(this); - cerr << "\n"; - assert(0 && "Node is not in map!"); + errs() << "\n"; + llvm_unreachable("Node is not in map!"); } #endif return Erased; @@ -762,7 +747,7 @@ void SelectionDAG::VerifyNode(SDNode *N) { default: break; case ISD::BUILD_PAIR: { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); assert(N->getNumValues() == 1 && "Too many results!"); assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && "Wrong return type!"); @@ -780,7 +765,7 @@ void SelectionDAG::VerifyNode(SDNode *N) { assert(N->getValueType(0).isVector() && "Wrong return type!"); assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && "Wrong number of operands!"); - MVT EltVT = N->getValueType(0).getVectorElementType(); + EVT EltVT = N->getValueType(0).getVectorElementType(); for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) assert((I->getValueType() == EltVT || (EltVT.isInteger() && I->getValueType().isInteger() && @@ -791,13 +776,13 @@ void SelectionDAG::VerifyNode(SDNode *N) { } } -/// getMVTAlignment - Compute the default alignment value for the +/// getEVTAlignment - Compute the default alignment value for the /// given type. /// -unsigned SelectionDAG::getMVTAlignment(MVT VT) const { +unsigned SelectionDAG::getEVTAlignment(EVT VT) const { const Type *Ty = VT == MVT::iPTR ? - PointerType::get(Type::Int8Ty, 0) : - VT.getTypeForMVT(); + PointerType::get(Type::getInt8Ty(*getContext()), 0) : + VT.getTypeForEVT(*getContext()); return TLI.getTargetData()->getABITypeAlignment(Ty); } @@ -815,6 +800,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi, MF = &mf; MMI = mmi; DW = dw; + Context = &mf.getFunction()->getContext(); } SelectionDAG::~SelectionDAG() { @@ -846,7 +832,19 @@ void SelectionDAG::clear() { Root = getEntryNode(); } -SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) { +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::SIGN_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ZERO_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { if (Op.getValueType() == VT) return Op; APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(), VT.getSizeInBits()); @@ -856,29 +854,29 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, MVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// -SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, MVT VT) { - MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; +SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; SDValue NegOne = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); return getNode(ISD::XOR, DL, VT, Val, NegOne); } -SDValue SelectionDAG::getConstant(uint64_t Val, MVT VT, bool isT) { - MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; +SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; assert((EltVT.getSizeInBits() >= 64 || (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && "getConstant with a uint64_t value that doesn't fit in the type!"); return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT); } -SDValue SelectionDAG::getConstant(const APInt &Val, MVT VT, bool isT) { - return getConstant(*ConstantInt::get(Val), VT, isT); +SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) { + return getConstant(*ConstantInt::get(*Context, Val), VT, isT); } -SDValue SelectionDAG::getConstant(const ConstantInt &Val, MVT VT, bool isT) { +SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { assert(VT.isInteger() && "Cannot create FP integer constant!"); - MVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; assert(Val.getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); @@ -913,14 +911,14 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { } -SDValue SelectionDAG::getConstantFP(const APFloat& V, MVT VT, bool isTarget) { - return getConstantFP(*ConstantFP::get(V), VT, isTarget); +SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) { + return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget); } -SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){ +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); - MVT EltVT = + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; // Do the map lookup using the actual bit pattern for the floating point @@ -953,8 +951,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, MVT VT, bool isTarget){ return Result; } -SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) { - MVT EltVT = +SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; if (EltVT==MVT::f32) return getConstantFP(APFloat((float)Val), VT, isTarget); @@ -963,14 +961,15 @@ SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) { } SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, - MVT VT, int64_t Offset, + EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); - + // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + EVT PTy = TLI.getPointerTy(); + unsigned BitWidth = PTy.getSizeInBits(); if (BitWidth < 64) Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth)); @@ -1002,7 +1001,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, return SDValue(N, 0); } -SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) { +SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1017,7 +1016,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) { return SDValue(N, 0); } -SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget, +SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent jump tables"); @@ -1036,9 +1035,9 @@ SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget, return SDValue(N, 0); } -SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT, +SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT, unsigned Alignment, int Offset, - bool isTarget, + bool isTarget, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); @@ -1062,7 +1061,7 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT, } -SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT, +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, unsigned char TargetFlags) { @@ -1101,26 +1100,13 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { return SDValue(N, 0); } -SDValue SelectionDAG::getArgFlags(ISD::ArgFlagsTy Flags) { - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::ARG_FLAGS, getVTList(MVT::Other), 0, 0); - ID.AddInteger(Flags.getRawBits()); - void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate(); - new (N) ARG_FLAGSSDNode(Flags); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); -} - -SDValue SelectionDAG::getValueType(MVT VT) { - if (VT.isSimple() && (unsigned)VT.getSimpleVT() >= ValueTypeNodes.size()) - ValueTypeNodes.resize(VT.getSimpleVT()+1); +SDValue SelectionDAG::getValueType(EVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= + ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); SDNode *&N = VT.isExtended() ? - ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT()]; + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; if (N) return SDValue(N, 0); N = NodeAllocator.Allocate(); @@ -1129,7 +1115,7 @@ SDValue SelectionDAG::getValueType(MVT VT) { return SDValue(N, 0); } -SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) { +SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); N = NodeAllocator.Allocate(); @@ -1138,7 +1124,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) { return SDValue(N, 0); } -SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT, +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, unsigned char TargetFlags) { SDNode *&N = TargetExternalSymbols[std::pair(Sym, @@ -1177,19 +1163,19 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl &M) { } } -SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, +SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, const int *Mask) { assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); - assert(VT.isVector() && N1.getValueType().isVector() && + assert(VT.isVector() && N1.getValueType().isVector() && "Vector Shuffle VTs must be a vectors"); assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && "Vector Shuffle VTs must have same element type"); // Canonicalize shuffle undef, undef -> undef if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) - return N1; + return getUNDEF(VT); - // Validate that all indices in Mask are within the range of the elements + // Validate that all indices in Mask are within the range of the elements // input to the shuffle. unsigned NElts = VT.getVectorNumElements(); SmallVector MaskVec; @@ -1197,18 +1183,18 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); MaskVec.push_back(Mask[i]); } - + // Canonicalize shuffle v, v -> v, undef if (N1 == N2) { N2 = getUNDEF(VT); for (unsigned i = 0; i != NElts; ++i) if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; } - + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. if (N1.getOpcode() == ISD::UNDEF) commuteShuffle(N1, N2, MaskVec); - + // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef bool AllLHS = true, AllRHS = true; @@ -1231,7 +1217,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, N1 = getUNDEF(VT); commuteShuffle(N1, N2, MaskVec); } - + // If Identity shuffle, or all shuffle in to undef, return that node. bool AllUndef = true; bool Identity = true; @@ -1239,7 +1225,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; if (MaskVec[i] >= 0) AllUndef = false; } - if (Identity) + if (Identity && NElts == N1.getValueType().getVectorNumElements()) return N1; if (AllUndef) return getUNDEF(VT); @@ -1249,17 +1235,17 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); for (unsigned i = 0; i != NElts; ++i) ID.AddInteger(MaskVec[i]); - + void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - + // Allocate the mask array for the node out of the BumpPtrAllocator, since // SDNode doesn't have access to it. This memory will be "leaked" when // the node is deallocated, but recovered when the NodeAllocator is released. int *MaskAlloc = OperandAllocator.Allocate(NElts); memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); - + ShuffleVectorSDNode *N = NodeAllocator.Allocate(); new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); @@ -1267,7 +1253,7 @@ SDValue SelectionDAG::getVectorShuffle(MVT VT, DebugLoc dl, SDValue N1, return SDValue(N, 0); } -SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, +SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code) { @@ -1289,7 +1275,7 @@ SDValue SelectionDAG::getConvertRndSat(MVT VT, DebugLoc dl, return SDValue(N, 0); } -SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) { +SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); ID.AddInteger(RegNo); @@ -1305,7 +1291,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, MVT VT) { SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root, unsigned Line, unsigned Col, - Value *CU) { + MDNode *CU) { SDNode *N = NodeAllocator.Allocate(); new (N) DbgStopPointSDNode(Root, Line, Col, CU); N->setDebugLoc(DL); @@ -1349,32 +1335,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { return SDValue(N, 0); } -SDValue SelectionDAG::getMemOperand(const MachineMemOperand &MO) { -#ifndef NDEBUG - const Value *v = MO.getValue(); - assert((!v || isa(v->getType())) && - "SrcValue is not a pointer?"); -#endif - - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MEMOPERAND, getVTList(MVT::Other), 0, 0); - MO.Profile(ID); - - void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) - return SDValue(E, 0); - - SDNode *N = NodeAllocator.Allocate(); - new (N) MemOperandSDNode(MO); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); -} - /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { - MVT OpTy = Op.getValueType(); + EVT OpTy = Op.getValueType(); MVT ShTy = TLI.getShiftAmountTy(); if (OpTy == ShTy || OpTy.isVector()) return Op; @@ -1384,10 +1348,10 @@ SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { /// CreateStackTemporary - Create a stack temporary, suitable for holding the /// specified value type. -SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) { +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); - unsigned ByteSize = VT.getStoreSizeInBits()/8; - const Type *Ty = VT.getTypeForMVT(); + unsigned ByteSize = VT.getStoreSize(); + const Type *Ty = VT.getTypeForEVT(*getContext()); unsigned StackAlign = std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign); @@ -1397,11 +1361,11 @@ SDValue SelectionDAG::CreateStackTemporary(MVT VT, unsigned minAlign) { /// CreateStackTemporary - Create a stack temporary suitable for holding /// either of the specified value types. -SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) { +SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { unsigned Bytes = std::max(VT1.getStoreSizeInBits(), VT2.getStoreSizeInBits())/8; - const Type *Ty1 = VT1.getTypeForMVT(); - const Type *Ty2 = VT2.getTypeForMVT(); + const Type *Ty1 = VT1.getTypeForEVT(*getContext()); + const Type *Ty2 = VT2.getTypeForEVT(*getContext()); const TargetData *TD = TLI.getTargetData(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1411,7 +1375,7 @@ SDValue SelectionDAG::CreateStackTemporary(MVT VT1, MVT VT2) { return getFrameIndex(FrameIdx, TLI.getPointerTy()); } -SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, +SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, DebugLoc dl) { // These setcc operations always fold. switch (Cond) { @@ -1441,7 +1405,7 @@ SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, const APInt &C1 = N1C->getAPIntValue(); switch (Cond) { - default: assert(0 && "Unknown integer setcc!"); + default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETEQ: return getConstant(C1 == C2, VT); case ISD::SETNE: return getConstant(C1 != C2, VT); case ISD::SETULT: return getConstant(C1.ult(C2), VT); @@ -1516,6 +1480,10 @@ SDValue SelectionDAG::FoldSetCC(MVT VT, SDValue N1, /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + // This predicate is not safe for vector operations. + if (Op.getValueType().isVector()) + return false; + unsigned BitWidth = Op.getValueSizeInBits(); return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); } @@ -1743,7 +1711,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, } return; case ISD::SIGN_EXTEND_INREG: { - MVT EVT = cast(Op.getOperand(1))->getVT(); + EVT EVT = cast(Op.getOperand(1))->getVT(); unsigned EBits = EVT.getSizeInBits(); // Sign extension. Compute the demanded bits in the result that are not @@ -1788,14 +1756,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::LOAD: { if (ISD::isZEXTLoad(Op.getNode())) { LoadSDNode *LD = cast(Op); - MVT VT = LD->getMemoryVT(); + EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; } return; } case ISD::ZERO_EXTEND: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; APInt InMask = Mask; @@ -1809,7 +1777,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::SIGN_EXTEND: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; @@ -1850,7 +1818,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::ANY_EXTEND: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt InMask = Mask; InMask.trunc(InBits); @@ -1862,7 +1830,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::TRUNCATE: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt InMask = Mask; InMask.zext(InBits); @@ -1875,7 +1843,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, break; } case ISD::AssertZext: { - MVT VT = cast(Op.getOperand(1))->getVT(); + EVT VT = cast(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, KnownOne, Depth+1); @@ -1981,7 +1949,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this); + TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, + Depth); } return; } @@ -1993,7 +1962,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getSizeInBits(); unsigned Tmp, Tmp2; @@ -2212,6 +2181,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); } +bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { + // If we're told that NaNs won't happen, assume they won't. + if (FiniteOnlyFPMath()) + return true; + + // If the value is a constant, we can obviously see if it is a NaN or not. + if (const ConstantFPSDNode *C = dyn_cast(Op)) + return !C->getValueAPF().isNaN(); + + // TODO: Recognize more cases here. + + return false; +} bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { GlobalAddressSDNode *GA = dyn_cast(Op); @@ -2228,7 +2210,7 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { /// element of the result of the vector shuffle. SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, unsigned i) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); if (N->getMaskElt(i) < 0) return getUNDEF(VT.getVectorElementType()); @@ -2239,7 +2221,7 @@ SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, if (V.getOpcode() == ISD::BIT_CONVERT) { V = V.getOperand(0); - MVT VVT = V.getValueType(); + EVT VVT = V.getValueType(); if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems) return SDValue(); } @@ -2256,7 +2238,7 @@ SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, /// getNode - Gets or creates the specified node. /// -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) { +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; @@ -2274,7 +2256,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT) { } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - MVT VT, SDValue Operand) { + EVT VT, SDValue Operand) { // Constant fold unary operations with an integer constant operand. if (ConstantSDNode *C = dyn_cast(Operand.getNode())) { const APInt &Val = C->getAPIntValue(); @@ -2332,7 +2314,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*MVTToAPFloatSemantics(VT), + (void)V.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -2366,7 +2348,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::MERGE_VALUES: case ISD::CONCAT_VECTORS: return Operand; // Factor, merge or concat of one node? No need. - case ISD::FP_ROUND: assert(0 && "Invalid method to make FP_ROUND node"); + case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); case ISD::FP_EXTEND: assert(VT.isFloatingPoint() && Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); @@ -2487,7 +2469,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, - MVT VT, + EVT VT, ConstantSDNode *Cst1, ConstantSDNode *Cst2) { const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); @@ -2522,7 +2504,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, return SDValue(); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2) { ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); @@ -2624,7 +2606,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, return N1; break; case ISD::FP_ROUND_INREG: { - MVT EVT = cast(N2)->getVT(); + EVT EVT = cast(N2)->getVT(); assert(VT == N1.getValueType() && "Not an inreg round!"); assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && "Cannot FP_ROUND_INREG integer types"); @@ -2641,7 +2623,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, break; case ISD::AssertSext: case ISD::AssertZext: { - MVT EVT = cast(N2)->getVT(); + EVT EVT = cast(N2)->getVT(); assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); @@ -2650,7 +2632,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, break; } case ISD::SIGN_EXTEND_INREG: { - MVT EVT = cast(N2)->getVT(); + EVT EVT = cast(N2)->getVT(); assert(VT == N1.getValueType() && "Not an inreg extend!"); assert(VT.isInteger() && EVT.isInteger() && "Cannot *_EXTEND_INREG FP types"); @@ -2688,13 +2670,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, // expanding large vector constants. if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { SDValue Elt = N1.getOperand(N2C->getZExtValue()); - if (Elt.getValueType() != VT) { + EVT VEltTy = N1.getValueType().getVectorElementType(); + if (Elt.getValueType() != VEltTy) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. - assert(VT.isInteger() && Elt.getValueType().isInteger() && - VT.bitsLE(Elt.getValueType()) && - "Bad type for BUILD_VECTOR operand"); - Elt = getNode(ISD::TRUNCATE, DL, VT, Elt); + Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt); + } + if (VT != VEltTy) { + // If the vector element type is not legal, the EXTRACT_VECTOR_ELT + // result is implicitly extended. + Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt); } return Elt; } @@ -2895,7 +2880,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast(N1.getNode()); @@ -2938,7 +2923,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, } break; case ISD::VECTOR_SHUFFLE: - assert(0 && "should use getVectorShuffle constructor!"); + llvm_unreachable("should use getVectorShuffle constructor!"); break; case ISD::BIT_CONVERT: // Fold bit_convert nodes from a type to themselves. @@ -2971,23 +2956,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VT, Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; return getNode(Opcode, DL, VT, Ops, 5); } +/// getStackArgumentTokenFactor - Compute a TokenFactor to force all +/// the incoming stack arguments to be loaded from the stack. +SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { + SmallVector ArgChains; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument. + for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), + UE = getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast(*U)) + if (FrameIndexSDNode *FI = dyn_cast(L->getBasePtr())) + if (FI->getIndex() < 0) + ArgChains.push_back(SDValue(L, 1)); + + // Build a tokenfactor for all the chains. + return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + &ArgChains[0], ArgChains.size()); +} + /// getMemsetValue - Vectorized representation of the memset value /// operand. -static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG, +static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, DebugLoc dl) { unsigned NumBits = VT.isVector() ? VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits(); @@ -3021,9 +3029,9 @@ static SDValue getMemsetValue(SDValue Value, MVT VT, SelectionDAG &DAG, /// getMemsetStringVal - Similar to getMemsetValue. Except this is only /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. -static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, - const TargetLowering &TLI, - std::string &Str, unsigned Offset) { +static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, + const TargetLowering &TLI, + std::string &Str, unsigned Offset) { // Handle vector with all elements zero. if (Str.empty()) { if (VT.isInteger()) @@ -3031,7 +3039,8 @@ static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, unsigned NumElts = VT.getVectorNumElements(); MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; return DAG.getNode(ISD::BIT_CONVERT, dl, VT, - DAG.getConstant(0, MVT::getVectorVT(EltVT, NumElts))); + DAG.getConstant(0, + EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts))); } assert(!VT.isVector() && "Can't handle vector type here!"); @@ -3051,7 +3060,7 @@ static SDValue getMemsetStringVal(MVT VT, DebugLoc dl, SelectionDAG &DAG, /// static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SelectionDAG &DAG) { - MVT VT = Base.getValueType(); + EVT VT = Base.getValueType(); return DAG.getNode(ISD::ADD, Base.getDebugLoc(), VT, Base, DAG.getConstant(Offset, VT)); } @@ -3083,7 +3092,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { /// to replace the memset / memcpy is below the threshold. It also returns the /// types of the sequence of memory ops to perform memset / memcpy. static -bool MeetsMaxMemopRequirement(std::vector &MemOps, +bool MeetsMaxMemopRequirement(std::vector &MemOps, SDValue Dst, SDValue Src, unsigned Limit, uint64_t Size, unsigned &Align, std::string &Str, bool &isSrcStr, @@ -3091,11 +3100,11 @@ bool MeetsMaxMemopRequirement(std::vector &MemOps, const TargetLowering &TLI) { isSrcStr = isMemSrcFromString(Src, Str); bool isSrcConst = isa(Src); - bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(); - MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG); + EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG); + bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT); if (VT != MVT::iAny) { - unsigned NewAlign = (unsigned) - TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT()); + const Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty); // If source is a string constant, this will require an unaligned load. if (NewAlign > Align && (isSrcConst || AllowUnalign)) { if (Dst.getOpcode() != ISD::FrameIndex) { @@ -3120,7 +3129,7 @@ bool MeetsMaxMemopRequirement(std::vector &MemOps, } if (VT == MVT::iAny) { - if (AllowUnalign) { + if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) { VT = MVT::i64; } else { switch (Align & 7) { @@ -3133,7 +3142,7 @@ bool MeetsMaxMemopRequirement(std::vector &MemOps, MVT LVT = MVT::i64; while (!TLI.isTypeLegal(LVT)) - LVT = (MVT::SimpleValueType)(LVT.getSimpleVT() - 1); + LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); assert(LVT.isInteger()); if (VT.bitsGT(LVT)) @@ -3148,12 +3157,12 @@ bool MeetsMaxMemopRequirement(std::vector &MemOps, if (VT.isVector()) { VT = MVT::i64; while (!TLI.isTypeLegal(VT)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); VTSize = VT.getSizeInBits() / 8; } else { // This can result in a type that is not legal on the target, e.g. // 1 or 2 bytes on PPC. - VT = (MVT::SimpleValueType)(VT.getSimpleVT() - 1); + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); VTSize >>= 1; } } @@ -3177,7 +3186,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // Expand memcpy to a series of load and store ops if the size operand falls // below a certain threshold. - std::vector MemOps; + std::vector MemOps; uint64_t Limit = -1ULL; if (!AlwaysInline) Limit = TLI.getMaxStoresPerMemcpy(); @@ -3193,8 +3202,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SmallVector OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; - for (unsigned i = 0; i < NumMemOps; i++) { - MVT VT = MemOps[i]; + for (unsigned i = 0; i != NumMemOps; ++i) { + EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; @@ -3214,7 +3223,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // thing to do is generate a LoadExt/StoreTrunc pair. These simplify // to Load/Store if NVT==VT. // FIXME does the case above also need this? - MVT NVT = TLI.getTypeToTransformTo(VT); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), @@ -3242,7 +3251,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // Expand memmove to a series of load and store ops if the size operand falls // below a certain threshold. - std::vector MemOps; + std::vector MemOps; uint64_t Limit = -1ULL; if (!AlwaysInline) Limit = TLI.getMaxStoresPerMemmove(); @@ -3260,7 +3269,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SmallVector OutChains; unsigned NumMemOps = MemOps.size(); for (unsigned i = 0; i < NumMemOps; i++) { - MVT VT = MemOps[i]; + EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; @@ -3275,7 +3284,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, &LoadChains[0], LoadChains.size()); OutChains.clear(); for (unsigned i = 0; i < NumMemOps; i++) { - MVT VT = MemOps[i]; + EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; @@ -3299,7 +3308,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, // Expand memset to a series of load/store ops if the size operand // falls below a certain threshold. - std::vector MemOps; + std::vector MemOps; std::string Str; bool CopyFromStr; if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(), @@ -3311,7 +3320,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, unsigned NumMemOps = MemOps.size(); for (unsigned i = 0; i < NumMemOps; i++) { - MVT VT = MemOps[i]; + EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value = getMemsetValue(Src, VT, DAG, dl); SDValue Store = DAG.getStore(Chain, dl, Value, @@ -3368,15 +3377,18 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in DebugLoc std::pair CallResult = - TLI.LowerCallTo(Chain, Type::VoidTy, - false, false, false, false, 0, CallingConv::C, false, - getExternalSymbol("memcpy", TLI.getPointerTy()), + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMCPY), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), + TLI.getPointerTy()), Args, *this, dl); return CallResult.second; } @@ -3414,15 +3426,18 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in DebugLoc std::pair CallResult = - TLI.LowerCallTo(Chain, Type::VoidTy, - false, false, false, false, 0, CallingConv::C, false, - getExternalSymbol("memmove", TLI.getPointerTy()), + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), + TLI.getPointerTy()), Args, *this, dl); return CallResult.second; } @@ -3456,7 +3471,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(); + const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -3466,31 +3481,61 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); else Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); - Entry.Node = Src; Entry.Ty = Type::Int32Ty; Entry.isSExt = true; + Entry.Node = Src; + Entry.Ty = Type::getInt32Ty(*getContext()); + Entry.isSExt = true; Args.push_back(Entry); - Entry.Node = Size; Entry.Ty = IntPtrTy; Entry.isSExt = false; + Entry.Node = Size; + Entry.Ty = IntPtrTy; + Entry.isSExt = false; Args.push_back(Entry); // FIXME: pass in DebugLoc std::pair CallResult = - TLI.LowerCallTo(Chain, Type::VoidTy, - false, false, false, false, 0, CallingConv::C, false, - getExternalSymbol("memset", TLI.getPointerTy()), + TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()), + false, false, false, false, 0, + TLI.getLibcallCallingConv(RTLIB::MEMSET), false, + /*isReturnValueUsed=*/false, + getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), + TLI.getPointerTy()), Args, *this, dl); return CallResult.second; } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, const Value* PtrVal, unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + // Check if the memory reference references a frame index + if (!PtrVal) + if (const FrameIndexSDNode *FI = + dyn_cast(Ptr.getNode())) + PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + // For now, atomics are considered to be volatile always. + Flags |= MachineMemOperand::MOVolatile; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrVal, Flags, 0, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Cmp, + SDValue Swp, MachineMemOperand *MMO) { assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - MVT VT = Cmp.getValueType(); - - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(MemVT); + EVT VT = Cmp.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); FoldingSetNodeID ID; @@ -3498,21 +3543,48 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; AddNodeIDNode(ID, Opcode, VTs, Ops, 4); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode* N = NodeAllocator.Allocate(); - new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, - Chain, Ptr, Cmp, Swp, PtrVal, Alignment); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value* PtrVal, unsigned Alignment) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + // Check if the memory reference references a frame index + if (!PtrVal) + if (const FrameIndexSDNode *FI = + dyn_cast(Ptr.getNode())) + PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + + // For now, atomics are considered to be volatile always. + Flags |= MachineMemOperand::MOVolatile; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrVal, Flags, 0, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + MachineMemOperand *MMO) { assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || Opcode == ISD::ATOMIC_LOAD_AND || @@ -3526,10 +3598,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, Opcode == ISD::ATOMIC_SWAP) && "Invalid Atomic Op"); - MVT VT = Val.getValueType(); - - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(MemVT); + EVT VT = Val.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); FoldingSetNodeID ID; @@ -3537,11 +3606,12 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, MVT MemVT, SDValue Ops[] = {Chain, Ptr, Val}; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode* N = NodeAllocator.Allocate(); - new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, - Chain, Ptr, Val, PtrVal, Alignment); + new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3554,7 +3624,7 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, if (NumOps == 1) return Ops[0]; - SmallVector VTs; + SmallVector VTs; VTs.reserve(NumOps); for (unsigned i = 0; i < NumOps; ++i) VTs.push_back(Ops[i].getValueType()); @@ -3564,9 +3634,9 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, - const MVT *VTs, unsigned NumVTs, + const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, - MVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, const Value *srcValue, int SVOff, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, @@ -3577,81 +3647,104 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, - MVT MemVT, const Value *srcValue, int SVOff, + EVT MemVT, const Value *srcValue, int SVOff, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { + if (Align == 0) // Ensure that codegen never sees alignment 0 + Align = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = 0; + if (WriteMem) + Flags |= MachineMemOperand::MOStore; + if (ReadMem) + Flags |= MachineMemOperand::MOLoad; + if (Vol) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(srcValue, Flags, SVOff, + MemVT.getStoreSize(), Align); + + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, + const SDValue *Ops, unsigned NumOps, + EVT MemVT, MachineMemOperand *MMO) { + assert((Opcode == ISD::INTRINSIC_VOID || + Opcode == ISD::INTRINSIC_W_CHAIN || + (Opcode <= INT_MAX && + (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && + "Opcode is not a memory-accessing opcode!"); + // Memoize the node unless it returns a flag. MemIntrinsicSDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); return SDValue(E, 0); + } N = NodeAllocator.Allocate(); - new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, - srcValue, SVOff, Align, Vol, ReadMem, WriteMem); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); CSEMap.InsertNode(N, IP); } else { N = NodeAllocator.Allocate(); - new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, - srcValue, SVOff, Align, Vol, ReadMem, WriteMem); - } - AllNodes.push_back(N); - return SDValue(N, 0); -} - -SDValue -SelectionDAG::getCall(unsigned CallingConv, DebugLoc dl, bool IsVarArgs, - bool IsTailCall, bool IsInreg, SDVTList VTs, - const SDValue *Operands, unsigned NumOperands, - unsigned NumFixedArgs) { - // Do not include isTailCall in the folding set profile. - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::CALL, VTs, Operands, NumOperands); - ID.AddInteger(CallingConv); - ID.AddInteger(IsVarArgs); - void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - // Instead of including isTailCall in the folding set, we just - // set the flag of the existing node. - if (!IsTailCall) - cast(E)->setNotTailCall(); - return SDValue(E, 0); + new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); } - SDNode *N = NodeAllocator.Allocate(); - new (N) CallSDNode(CallingConv, dl, IsVarArgs, IsTailCall, IsInreg, - VTs, Operands, NumOperands, NumFixedArgs); - CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, MVT VT, SDValue Chain, + ISD::LoadExtType ExtType, EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset, - const Value *SV, int SVOffset, MVT EVT, + const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, unsigned Alignment) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(VT); + Alignment = getEVTAlignment(VT); + + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOLoad; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, + MemVT.getStoreSize(), Alignment); + return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO); +} - if (VT == EVT) { +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, + ISD::LoadExtType ExtType, EVT VT, SDValue Chain, + SDValue Ptr, SDValue Offset, EVT MemVT, + MachineMemOperand *MMO) { + if (VT == MemVT) { ExtType = ISD::NON_EXTLOAD; } else if (ExtType == ISD::NON_EXTLOAD) { - assert(VT == EVT && "Non-extending load from different memory type!"); + assert(VT == MemVT && "Non-extending load from different memory type!"); } else { // Extending load. if (VT.isVector()) - assert(EVT.getVectorNumElements() == VT.getVectorNumElements() && + assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() && "Invalid vector extload!"); else - assert(EVT.bitsLT(VT) && + assert(MemVT.bitsLT(VT) && "Should only be an extending load, not truncating!"); assert((ExtType == ISD::EXTLOAD || VT.isInteger()) && "Cannot sign/zero extend a FP/Vector load!"); - assert(VT.isInteger() == EVT.isInteger() && + assert(VT.isInteger() == MemVT.isInteger() && "Cannot convert from FP to Int or Int -> FP!"); } @@ -3664,20 +3757,21 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, SDValue Ops[] = { Chain, Ptr, Offset }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); - ID.AddInteger(EVT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, isVolatile, Alignment)); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile())); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); - new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, EVT, SV, SVOffset, - Alignment, isVolatile); + new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl, +SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, bool isVolatile, unsigned Alignment) { @@ -3686,14 +3780,14 @@ SDValue SelectionDAG::getLoad(MVT VT, DebugLoc dl, SV, SVOffset, VT, isVolatile, Alignment); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, MVT VT, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, SDValue Chain, SDValue Ptr, const Value *SV, - int SVOffset, MVT EVT, + int SVOffset, EVT MemVT, bool isVolatile, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, - SV, SVOffset, EVT, isVolatile, Alignment); + SV, SVOffset, MemVT, isVolatile, Alignment); } SDValue @@ -3711,25 +3805,43 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, int SVOffset, bool isVolatile, unsigned Alignment) { - MVT VT = Val.getValueType(); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(VT); + Alignment = getEVTAlignment(Val.getValueType()); + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, + Val.getValueType().getStoreSize(), Alignment); + + return getStore(Chain, dl, Val, Ptr, MMO); +} + +SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(VT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, - isVolatile, Alignment)); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile())); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); - new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, - VT, SV, SVOffset, Alignment, isVolatile); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3737,19 +3849,39 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr, const Value *SV, - int SVOffset, MVT SVT, + int SVOffset, EVT SVT, bool isVolatile, unsigned Alignment) { - MVT VT = Val.getValueType(); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(SVT); + + // Check if the memory reference references a frame index + if (!SV) + if (const FrameIndexSDNode *FI = + dyn_cast(Ptr.getNode())) + SV = PseudoSourceValue::getFixedStack(FI->getIndex()); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + MachineMemOperand *MMO = + MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment); + + return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, + SDValue Ptr, EVT SVT, + MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); if (VT == SVT) - return getStore(Chain, dl, Val, Ptr, SV, SVOffset, isVolatile, Alignment); + return getStore(Chain, dl, Val, Ptr, MMO); assert(VT.bitsGT(SVT) && "Not a truncation?"); assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getMVTAlignment(VT); SDVTList VTs = getVTList(MVT::Other); SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -3757,14 +3889,14 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(SVT.getRawBits()); - ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, - isVolatile, Alignment)); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile())); void *IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); return SDValue(E, 0); + } SDNode *N = NodeAllocator.Allocate(); - new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, - SVT, SV, SVOffset, Alignment, isVolatile); + new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3788,21 +3920,20 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, SDNode *N = NodeAllocator.Allocate(); new (N) StoreSDNode(Ops, dl, VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), - ST->getSrcValue(), ST->getSrcValueOffset(), - ST->getAlignment(), ST->isVolatile()); + ST->getMemOperand()); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getVAArg(MVT VT, DebugLoc dl, +SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue SV) { SDValue Ops[] = { Chain, Ptr, SV }; return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, const SDUse *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -3818,7 +3949,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, return getNode(Opcode, DL, VT, &NewOps[0], NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, const SDValue *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -3876,14 +4007,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - const std::vector &ResultTys, + const std::vector &ResultTys, const SDValue *Ops, unsigned NumOps) { return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), Ops, NumOps); } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - const MVT *VTs, unsigned NumVTs, + const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps) { if (NumVTs == 1) return getNode(Opcode, DL, VTs[0], Ops, NumOps); @@ -3895,11 +4026,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); +#if 0 switch (Opcode) { // FIXME: figure out how to safely handle things like // int foo(int x) { return 1 << (x & 255); } // int bar() { return foo(256); } -#if 0 case ISD::SRA_PARTS: case ISD::SRL_PARTS: case ISD::SHL_PARTS: @@ -3915,8 +4046,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); } break; -#endif } +#endif // Memoize the node unless it returns a flag. SDNode *N; @@ -3998,17 +4129,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return getNode(Opcode, DL, VTList, Ops, 5); } -SDVTList SelectionDAG::getVTList(MVT VT) { +SDVTList SelectionDAG::getVTList(EVT VT) { return makeVTList(SDNode::getValueTypeList(VT), 1); } -SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) { +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { for (std::vector::reverse_iterator I = VTList.rbegin(), E = VTList.rend(); I != E; ++I) if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) return *I; - MVT *Array = Allocator.Allocate(2); + EVT *Array = Allocator.Allocate(2); Array[0] = VT1; Array[1] = VT2; SDVTList Result = makeVTList(Array, 2); @@ -4016,14 +4147,14 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2) { return Result; } -SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) { +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { for (std::vector::reverse_iterator I = VTList.rbegin(), E = VTList.rend(); I != E; ++I) if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && I->VTs[2] == VT3) return *I; - MVT *Array = Allocator.Allocate(3); + EVT *Array = Allocator.Allocate(3); Array[0] = VT1; Array[1] = VT2; Array[2] = VT3; @@ -4032,14 +4163,14 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3) { return Result; } -SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) { +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { for (std::vector::reverse_iterator I = VTList.rbegin(), E = VTList.rend(); I != E; ++I) if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && I->VTs[2] == VT3 && I->VTs[3] == VT4) return *I; - MVT *Array = Allocator.Allocate(3); + EVT *Array = Allocator.Allocate(3); Array[0] = VT1; Array[1] = VT2; Array[2] = VT3; @@ -4049,9 +4180,9 @@ SDVTList SelectionDAG::getVTList(MVT VT1, MVT VT2, MVT VT3, MVT VT4) { return Result; } -SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) { +SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { switch (NumVTs) { - case 0: assert(0 && "Cannot have nodes without results!"); + case 0: llvm_unreachable("Cannot have nodes without results!"); case 1: return getVTList(VTs[0]); case 2: return getVTList(VTs[0], VTs[1]); case 3: return getVTList(VTs[0], VTs[1], VTs[2]); @@ -4073,7 +4204,7 @@ SDVTList SelectionDAG::getVTList(const MVT *VTs, unsigned NumVTs) { return *I; } - MVT *Array = Allocator.Allocate(NumVTs); + EVT *Array = Allocator.Allocate(NumVTs); std::copy(VTs, VTs+NumVTs, Array); SDVTList Result = makeVTList(Array, NumVTs); VTList.push_back(Result); @@ -4215,20 +4346,20 @@ void SDNode::DropOperands() { /// machine opcode. /// SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT) { + EVT VT) { SDVTList VTs = getVTList(VT); return SelectNodeTo(N, MachineOpc, VTs, 0, 0); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT, SDValue Op1) { + EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT, SDValue Op1, + EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; @@ -4236,7 +4367,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT, SDValue Op1, + EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; @@ -4244,41 +4375,41 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT, const SDValue *Ops, + EVT VT, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT); return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, const SDValue *Ops, + EVT VT1, EVT VT2, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2); return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2) { + EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, MVT VT3, + EVT VT1, EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3); return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, MVT VT3, MVT VT4, + EVT VT1, EVT VT2, EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; @@ -4286,7 +4417,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; @@ -4294,7 +4425,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); @@ -4303,7 +4434,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - MVT VT1, MVT VT2, MVT VT3, + EVT VT1, EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -4318,20 +4449,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT) { + EVT VT) { SDVTList VTs = getVTList(VT); return MorphNodeTo(N, Opc, VTs, 0, 0); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT, SDValue Op1) { + EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return MorphNodeTo(N, Opc, VTs, Ops, 1); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT, SDValue Op1, + EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; @@ -4339,7 +4470,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT, SDValue Op1, + EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; @@ -4347,34 +4478,34 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT, const SDValue *Ops, + EVT VT, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT); return MorphNodeTo(N, Opc, VTs, Ops, NumOps); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, const SDValue *Ops, + EVT VT1, EVT VT2, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2); return MorphNodeTo(N, Opc, VTs, Ops, NumOps); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2) { + EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, MVT VT3, + EVT VT1, EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3); return MorphNodeTo(N, Opc, VTs, Ops, NumOps); } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; @@ -4382,7 +4513,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; @@ -4390,7 +4521,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - MVT VT1, MVT VT2, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); @@ -4441,29 +4572,35 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, DeadNodeSet.insert(Used); } - // If NumOps is larger than the # of operands we currently have, reallocate - // the operand list. - if (NumOps > N->NumOperands) { - if (N->OperandsNeedDelete) - delete[] N->OperandList; - - if (N->isMachineOpcode()) { - // We're creating a final node that will live unmorphed for the - // remainder of the current SelectionDAG iteration, so we can allocate - // the operands directly out of a pool with no recycling metadata. - N->OperandList = OperandAllocator.Allocate(NumOps); - N->OperandsNeedDelete = false; - } else { - N->OperandList = new SDUse[NumOps]; + if (MachineSDNode *MN = dyn_cast(N)) { + // Initialize the memory references information. + MN->setMemRefs(0, 0); + // If NumOps is larger than the # of operands we can have in a + // MachineSDNode, reallocate the operand list. + if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { + if (MN->OperandsNeedDelete) + delete[] MN->OperandList; + if (NumOps > array_lengthof(MN->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + MN->InitOperands(OperandAllocator.Allocate(NumOps), + Ops, NumOps); + else + MN->InitOperands(MN->LocalOperands, Ops, NumOps); + MN->OperandsNeedDelete = false; + } else + MN->InitOperands(MN->OperandList, Ops, NumOps); + } else { + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > N->NumOperands) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + N->InitOperands(new SDUse[NumOps], Ops, NumOps); N->OperandsNeedDelete = true; - } - } - - // Assign the new operands. - N->NumOperands = NumOps; - for (unsigned i = 0, e = NumOps; i != e; ++i) { - N->OperandList[i].setUser(N); - N->OperandList[i].setInitial(Ops[i]); + } else + MN->InitOperands(MN->OperandList, Ops, NumOps); } // Delete any nodes that are still dead after adding the uses for the @@ -4481,108 +4618,189 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, } -/// getTargetNode - These are used for target selectors to create a new node -/// with specified return type(s), target opcode, and operands. +/// getMachineNode - These are used for target selectors to create a new node +/// with specified return type(s), MachineInstr opcode, and operands. /// -/// Note that getTargetNode returns the resultant node. If there is already a +/// Note that getMachineNode returns the resultant node. If there is already a /// node of the specified opcode and operands, it returns that node instead of /// the current one. -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT) { - return getNode(~Opcode, dl, VT).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, 0, 0); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - SDValue Op1) { - return getNode(~Opcode, dl, VT, Op1).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - SDValue Op1, SDValue Op2) { - return getNode(~Opcode, dl, VT, Op1, Op2).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - SDValue Op1, SDValue Op2, - SDValue Op3) { - return getNode(~Opcode, dl, VT, Op1, Op2, Op3).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT, - const SDValue *Ops, unsigned NumOps) { - return getNode(~Opcode, dl, VT, Ops, NumOps).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - SDValue Op; - return getNode(~Opcode, dl, VTs, &Op, 0).getNode(); + return getMachineNode(Opcode, dl, VTs, 0, 0); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, SDValue Op1) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); - return getNode(~Opcode, dl, VTs, &Op1, 1).getNode(); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, SDValue Op1, - SDValue Op2) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; - return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, SDValue Op1, - SDValue Op2, SDValue Op3) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; - return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2, - const SDValue *Ops, unsigned NumOps) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, + const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2); - return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2, MVT VT3, - SDValue Op1, SDValue Op2) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2 }; - return getNode(~Opcode, dl, VTs, Ops, 2).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2, MVT VT3, - SDValue Op1, SDValue Op2, - SDValue Op3) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; - return getNode(~Opcode, dl, VTs, Ops, 3).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops)); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - MVT VT1, MVT VT2, MVT VT3, - const SDValue *Ops, unsigned NumOps) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + EVT VT1, EVT VT2, EVT VT3, + const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3); - return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, MVT VT1, - MVT VT2, MVT VT3, MVT VT4, - const SDValue *Ops, unsigned NumOps) { +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, + EVT VT2, EVT VT3, EVT VT4, + const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return getNode(~Opcode, dl, VTs, Ops, NumOps).getNode(); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); } -SDNode *SelectionDAG::getTargetNode(unsigned Opcode, DebugLoc dl, - const std::vector &ResultTys, - const SDValue *Ops, unsigned NumOps) { - return getNode(~Opcode, dl, ResultTys, Ops, NumOps).getNode(); +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, + const std::vector &ResultTys, + const SDValue *Ops, unsigned NumOps) { + SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); + return getMachineNode(Opcode, dl, VTs, Ops, NumOps); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, + const SDValue *Ops, unsigned NumOps) { + bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag; + MachineSDNode *N; + void *IP; + + if (DoCSE) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); + IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return cast(E); + } + + // Allocate a new MachineSDNode. + N = NodeAllocator.Allocate(); + new (N) MachineSDNode(~Opcode, DL, VTs); + + // Initialize the operands list. + if (NumOps > array_lengthof(N->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + N->InitOperands(OperandAllocator.Allocate(NumOps), + Ops, NumOps); + else + N->InitOperands(N->LocalOperands, Ops, NumOps); + N->OperandsNeedDelete = false; + + if (DoCSE) + CSEMap.InsertNode(N, IP); + + AllNodes.push_back(N); +#ifndef NDEBUG + VerifyNode(N); +#endif + return N; +} + +/// getTargetExtractSubreg - A convenience function for creating +/// TargetInstrInfo::EXTRACT_SUBREG nodes. +SDValue +SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Subreg = getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, DL, + VT, Operand, SRIdxVal); + return SDValue(Subreg, 0); +} + +/// getTargetInsertSubreg - A convenience function for creating +/// TargetInstrInfo::INSERT_SUBREG nodes. +SDValue +SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, + SDValue Operand, SDValue Subreg) { + SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); + SDNode *Result = getMachineNode(TargetInstrInfo::INSERT_SUBREG, DL, + VT, Operand, Subreg, SRIdxVal); + return SDValue(Result, 0); } /// getNodeIfExists - Get the specified node if it's already available, or @@ -4937,64 +5155,28 @@ HandleSDNode::~HandleSDNode() { } GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, - MVT VT, int64_t o, unsigned char TF) + EVT VT, int64_t o, unsigned char TF) : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = const_cast(GA); } -MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, MVT memvt, - const Value *srcValue, int SVO, - unsigned alignment, bool vol) - : SDNode(Opc, dl, VTs), MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); - assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); - assert(getAlignment() == alignment && "Alignment representation error!"); - assert(isVolatile() == vol && "Volatile representation error!"); +MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, + MachineMemOperand *mmo) + : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, - const SDValue *Ops, - unsigned NumOps, MVT memvt, const Value *srcValue, - int SVO, unsigned alignment, bool vol) + const SDValue *Ops, unsigned NumOps, EVT memvt, + MachineMemOperand *mmo) : SDNode(Opc, dl, VTs, Ops, NumOps), - MemoryVT(memvt), SrcValue(srcValue), SVOffset(SVO) { - SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, vol, alignment); - assert(isPowerOf2_32(alignment) && "Alignment is not a power of 2!"); - assert(getAlignment() == alignment && "Alignment representation error!"); - assert(isVolatile() == vol && "Volatile representation error!"); -} - -/// getMemOperand - Return a MachineMemOperand object describing the memory -/// reference performed by this memory reference. -MachineMemOperand MemSDNode::getMemOperand() const { - int Flags = 0; - if (isa(this)) - Flags = MachineMemOperand::MOLoad; - else if (isa(this)) - Flags = MachineMemOperand::MOStore; - else if (isa(this)) { - Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - } - else { - const MemIntrinsicSDNode* MemIntrinNode = dyn_cast(this); - assert(MemIntrinNode && "Unknown MemSDNode opcode!"); - if (MemIntrinNode->readMem()) Flags |= MachineMemOperand::MOLoad; - if (MemIntrinNode->writeMem()) Flags |= MachineMemOperand::MOStore; - } - - int Size = (getMemoryVT().getSizeInBits() + 7) >> 3; - if (isVolatile()) Flags |= MachineMemOperand::MOVolatile; - - // Check if the memory reference references a frame index - const FrameIndexSDNode *FI = - dyn_cast(getBasePtr().getNode()); - if (!getSrcValue() && FI) - return MachineMemOperand(PseudoSourceValue::getFixedStack(FI->getIndex()), - Flags, 0, Size, getAlignment()); - else - return MachineMemOperand(getSrcValue(), Flags, getSrcValueOffset(), - Size, getAlignment()); + MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } /// Profile - Gather unique data for the node. @@ -5003,19 +5185,30 @@ void SDNode::Profile(FoldingSetNodeID &ID) const { AddNodeIDNode(ID, this); } -static ManagedStatic > EVTs; -static MVT VTs[MVT::LAST_VALUETYPE]; +namespace { + struct EVTArray { + std::vector VTs; + + EVTArray() { + VTs.reserve(MVT::LAST_VALUETYPE); + for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) + VTs.push_back(MVT((MVT::SimpleValueType)i)); + } + }; +} + +static ManagedStatic > EVTs; +static ManagedStatic SimpleVTArray; static ManagedStatic > VTMutex; /// getValueTypeList - Return a pointer to the specified value type. /// -const MVT *SDNode::getValueTypeList(MVT VT) { - sys::SmartScopedLock Lock(&*VTMutex); +const EVT *SDNode::getValueTypeList(EVT VT) { if (VT.isExtended()) { + sys::SmartScopedLock Lock(*VTMutex); return &(*EVTs->insert(VT).first); } else { - VTs[VT.getSimpleVT()] = VT; - return &VTs[VT.getSimpleVT()]; + return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; } } @@ -5186,14 +5379,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; - case ISD::MEMOPERAND: return "MemOperand"; case ISD::EntryToken: return "EntryToken"; case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; case ISD::AssertZext: return "AssertZext"; case ISD::BasicBlock: return "BasicBlock"; - case ISD::ARG_FLAGS: return "ArgFlags"; case ISD::VALUETYPE: return "ValueType"; case ISD::Register: return "Register"; @@ -5208,6 +5399,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FRAMEADDR: return "FRAMEADDR"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; case ISD::EHSELECTION: return "EHSELECTION"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::ConstantPool: return "ConstantPool"; @@ -5239,10 +5431,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::INLINEASM: return "inlineasm"; case ISD::DBG_LABEL: return "dbg_label"; case ISD::EH_LABEL: return "eh_label"; - case ISD::DECLARE: return "declare"; case ISD::HANDLENODE: return "handlenode"; - case ISD::FORMAL_ARGUMENTS: return "formal_arguments"; - case ISD::CALL: return "call"; // Unary operators case ISD::FABS: return "fabs"; @@ -5332,7 +5521,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CONVERT_RNDSAT: { switch (cast(this)->getCvtCode()) { - default: assert(0 && "Unknown cvt code!"); + default: llvm_unreachable("Unknown cvt code!"); case ISD::CVT_FF: return "cvt_ff"; case ISD::CVT_FS: return "cvt_fs"; case ISD::CVT_FU: return "cvt_fu"; @@ -5351,7 +5540,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::BR_JT: return "br_jt"; case ISD::BRCOND: return "brcond"; case ISD::BR_CC: return "br_cc"; - case ISD::RET: return "ret"; case ISD::CALLSEQ_START: return "callseq_start"; case ISD::CALLSEQ_END: return "callseq_end"; @@ -5384,7 +5572,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CONDCODE: switch (cast(this)->get()) { - default: assert(0 && "Unknown setcc condition!"); + default: llvm_unreachable("Unknown setcc condition!"); case ISD::SETOEQ: return "setoeq"; case ISD::SETOGT: return "setogt"; case ISD::SETOGE: return "setoge"; @@ -5463,14 +5651,26 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { if (getValueType(i) == MVT::Other) OS << "ch"; else - OS << getValueType(i).getMVTString(); + OS << getValueType(i).getEVTString(); } OS << " = " << getOperationName(G); } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { - if (!isTargetOpcode() && getOpcode() == ISD::VECTOR_SHUFFLE) { - const ShuffleVectorSDNode *SVN = cast(this); + if (const MachineSDNode *MN = dyn_cast(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast(this)) { OS << "<"; for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { int Idx = SVN->getMaskElt(i); @@ -5481,9 +5681,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << Idx; } OS << ">"; - } - - if (const ConstantSDNode *CSDN = dyn_cast(this)) { + } else if (const ConstantSDNode *CSDN = dyn_cast(this)) { OS << '<' << CSDN->getAPIntValue() << '>'; } else if (const ConstantFPSDNode *CSDN = dyn_cast(this)) { if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) @@ -5505,13 +5703,13 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " + " << offset; else OS << " " << offset; - if (unsigned char TF = GADN->getTargetFlags()) + if (unsigned int TF = GADN->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const FrameIndexSDNode *FIDN = dyn_cast(this)) { OS << "<" << FIDN->getIndex() << ">"; } else if (const JumpTableSDNode *JTDN = dyn_cast(this)) { OS << "<" << JTDN->getIndex() << ">"; - if (unsigned char TF = JTDN->getTargetFlags()) + if (unsigned int TF = JTDN->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const ConstantPoolSDNode *CP = dyn_cast(this)){ int offset = CP->getOffset(); @@ -5523,7 +5721,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " + " << offset; else OS << " " << offset; - if (unsigned char TF = CP->getTargetFlags()) + if (unsigned int TF = CP->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const BasicBlockSDNode *BBDN = dyn_cast(this)) { OS << "<"; @@ -5541,80 +5739,47 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const ExternalSymbolSDNode *ES = dyn_cast(this)) { OS << "'" << ES->getSymbol() << "'"; - if (unsigned char TF = ES->getTargetFlags()) + if (unsigned int TF = ES->getTargetFlags()) OS << " [TF=" << TF << ']'; } else if (const SrcValueSDNode *M = dyn_cast(this)) { if (M->getValue()) OS << "<" << M->getValue() << ">"; else OS << ""; - } else if (const MemOperandSDNode *M = dyn_cast(this)) { - if (M->MO.getValue()) - OS << "<" << M->MO.getValue() << ":" << M->MO.getOffset() << ">"; - else - OS << "MO.getOffset() << ">"; - } else if (const ARG_FLAGSSDNode *N = dyn_cast(this)) { - OS << N->getArgFlags().getArgFlagsString(); } else if (const VTSDNode *N = dyn_cast(this)) { - OS << ":" << N->getVT().getMVTString(); + OS << ":" << N->getVT().getEVTString(); } else if (const LoadSDNode *LD = dyn_cast(this)) { - const Value *SrcValue = LD->getSrcValue(); - int SrcOffset = LD->getSrcValueOffset(); - OS << " <"; - if (SrcValue) - OS << SrcValue; - else - OS << "null"; - OS << ":" << SrcOffset << ">"; + OS << " <" << *LD->getMemOperand(); bool doExt = true; switch (LD->getExtensionType()) { default: doExt = false; break; - case ISD::EXTLOAD: OS << " getMemoryVT().getMVTString() << ">"; + OS << " from " << LD->getMemoryVT().getEVTString(); const char *AM = getIndexedModeName(LD->getAddressingMode()); if (*AM) - OS << " " << AM; - if (LD->isVolatile()) - OS << " "; - OS << " alignment=" << LD->getAlignment(); + OS << ", " << AM; + + OS << ">"; } else if (const StoreSDNode *ST = dyn_cast(this)) { - const Value *SrcValue = ST->getSrcValue(); - int SrcOffset = ST->getSrcValueOffset(); - OS << " <"; - if (SrcValue) - OS << SrcValue; - else - OS << "null"; - OS << ":" << SrcOffset << ">"; + OS << " <" << *ST->getMemOperand(); if (ST->isTruncatingStore()) - OS << " getMemoryVT().getMVTString() << ">"; + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); const char *AM = getIndexedModeName(ST->getAddressingMode()); if (*AM) - OS << " " << AM; - if (ST->isVolatile()) - OS << " "; - OS << " alignment=" << ST->getAlignment(); - } else if (const AtomicSDNode* AT = dyn_cast(this)) { - const Value *SrcValue = AT->getSrcValue(); - int SrcOffset = AT->getSrcValueOffset(); - OS << " <"; - if (SrcValue) - OS << SrcValue; - else - OS << "null"; - OS << ":" << SrcOffset << ">"; - if (AT->isVolatile()) - OS << " "; - OS << " alignment=" << AT->getAlignment(); + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast(this)) { + OS << " <" << *M->getMemOperand() << ">"; } } @@ -5635,16 +5800,17 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { if (N->getOperand(i).getNode()->hasOneUse()) DumpNodes(N->getOperand(i).getNode(), indent+2, G); else - cerr << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": "; + errs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": "; - cerr << "\n" << std::string(indent, ' '); + errs() << "\n"; + errs().indent(indent); N->dump(G); } void SelectionDAG::dump() const { - cerr << "SelectionDAG has " << AllNodes.size() << " nodes:"; + errs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I) { @@ -5655,7 +5821,7 @@ void SelectionDAG::dump() const { if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - cerr << "\n\n"; + errs() << "\n\n"; } void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { @@ -5699,6 +5865,11 @@ void SDNode::dumpr() const { DumpNodesr(errs(), this, 0, 0, once); } +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(errs(), this, 0, G, once); +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { @@ -5717,7 +5888,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits) { - MVT VT = getValueType(0); + EVT VT = getValueType(0); assert(VT.isVector() && "Expected a vector type"); unsigned sz = VT.getSizeInBits(); if (MinSplatBits > sz) @@ -5767,7 +5938,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, SplatValue = HighValue | LowValue; SplatUndef = HighUndef & LowUndef; - + sz = HalfSize; } @@ -5775,14 +5946,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, return true; } -bool ShuffleVectorSDNode::isSplatMask(const int *Mask, MVT VT) { +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { // Find the first non-undef value in the shuffle mask. unsigned i, e; for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) /* search */; assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); - + // Make sure all remaining elements are either undef or the same as the first // non-undef value. for (int Idx = Mask[i]; i != e; ++i) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index 260911e3b9940..9017e435962b5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Constants.h" +#include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -49,6 +50,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -104,14 +106,14 @@ static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, } /// ComputeValueVTs - Given an LLVM IR type, compute a sequence of -/// MVTs that represent all the individual underlying +/// EVTs that represent all the individual underlying /// non-aggregate types that comprise it. /// /// If Offsets is non-null, it points to a vector to be filled in /// with the in-memory offsets of each of the individual values. /// static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, - SmallVectorImpl &ValueVTs, + SmallVectorImpl &ValueVTs, SmallVectorImpl *Offsets = 0, uint64_t StartingOffset = 0) { // Given a struct type, recursively traverse the elements. @@ -135,9 +137,9 @@ static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, return; } // Interpret void as zero return values. - if (Ty == Type::VoidTy) + if (Ty == Type::getVoidTy(Ty->getContext())) return; - // Base case: we can get an MVT for this LLVM IR type. + // Base case: we can get an EVT for this LLVM IR type. ValueVTs.push_back(TLI.getValueType(Ty)); if (Offsets) Offsets->push_back(StartingOffset); @@ -161,7 +163,7 @@ namespace llvm { /// ValueVTs - The value types of the values, which may not be legal, and /// may need be promoted or synthesized from one or more registers. /// - SmallVector ValueVTs; + SmallVector ValueVTs; /// RegVTs - The value types of the registers. This is the same size as /// ValueVTs and it records, for each value, what the type of the assigned @@ -172,7 +174,7 @@ namespace llvm { /// getRegisterType member function, however when with physical registers /// it is necessary to have a separate record of the types. /// - SmallVector RegVTs; + SmallVector RegVTs; /// Regs - This list holds the registers assigned to the values. /// Each legal or promoted value requires one register, and each @@ -184,21 +186,21 @@ namespace llvm { RegsForValue(const TargetLowering &tli, const SmallVector ®s, - MVT regvt, MVT valuevt) + EVT regvt, EVT valuevt) : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue(const TargetLowering &tli, const SmallVector ®s, - const SmallVector ®vts, - const SmallVector &valuevts) + const SmallVector ®vts, + const SmallVector &valuevts) : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(const TargetLowering &tli, + RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg, const Type *Ty) : TLI(&tli) { ComputeValueVTs(tli, Ty, ValueVTs); for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(ValueVT); - MVT RegisterVT = TLI->getRegisterType(ValueVT); + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT); + EVT RegisterVT = TLI->getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); @@ -352,11 +354,11 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, unsigned PHIReg = ValueMap[PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); - SmallVector ValueVTs; + SmallVector ValueVTs; ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - MVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(VT); + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); @@ -366,7 +368,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, } } -unsigned FunctionLoweringInfo::MakeReg(MVT VT) { +unsigned FunctionLoweringInfo::MakeReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } @@ -378,15 +380,15 @@ unsigned FunctionLoweringInfo::MakeReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { - SmallVector ValueVTs; + SmallVector ValueVTs; ComputeValueVTs(TLI, V->getType(), ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT ValueVT = ValueVTs[Value]; - MVT RegisterVT = TLI.getRegisterType(ValueVT); + EVT ValueVT = ValueVTs[Value]; + EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(ValueVT); + unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { unsigned R = MakeReg(RegisterVT); if (!FirstReg) FirstReg = R; @@ -402,7 +404,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { /// (ISD::AssertSext). static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, const SDValue *Parts, - unsigned NumParts, MVT PartVT, MVT ValueVT, + unsigned NumParts, EVT PartVT, EVT ValueVT, ISD::NodeType AssertOp = ISD::DELETED_NODE) { assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -418,11 +420,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned RoundParts = NumParts & (NumParts - 1) ? 1 << Log2_32(NumParts) : NumParts; unsigned RoundBits = PartBits * RoundParts; - MVT RoundVT = RoundBits == ValueBits ? - ValueVT : MVT::getIntegerVT(RoundBits); + EVT RoundVT = RoundBits == ValueBits ? + ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); SDValue Lo, Hi; - MVT HalfVT = MVT::getIntegerVT(RoundBits/2); + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); if (RoundParts > 2) { Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT); @@ -439,7 +441,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (RoundParts < NumParts) { // Assemble the trailing non-power-of-2 part. unsigned OddParts = NumParts - RoundParts; - MVT OddVT = MVT::getIntegerVT(OddParts * PartBits); + EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); Hi = getCopyFromParts(DAG, dl, Parts+RoundParts, OddParts, PartVT, OddVT); @@ -447,7 +449,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, Lo = Val; if (TLI.isBigEndian()) std::swap(Lo, Hi); - MVT TotalVT = MVT::getIntegerVT(NumParts * PartBits); + EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi); Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi, DAG.getConstant(Lo.getValueType().getSizeInBits(), @@ -457,11 +459,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, } } else if (ValueVT.isVector()) { // Handle a multi-element vector. - MVT IntermediateVT, RegisterVT; + EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; unsigned NumRegs = - TLI.getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, - RegisterVT); + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); @@ -494,11 +496,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, ValueVT, &Ops[0], NumIntermediates); } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) - assert(ValueVT == MVT(MVT::ppcf128) && PartVT == MVT(MVT::f64) && + assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && "Unexpected split"); SDValue Lo, Hi; - Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[0]); - Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT(MVT::f64), Parts[1]); + Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]); if (TLI.isBigEndian()) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi); @@ -506,7 +508,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, // FP split into integer parts (soft fp) assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && !PartVT.isVector() && "Unexpected split"); - MVT IntVT = MVT::getIntegerVT(ValueVT.getSizeInBits()); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT); } } @@ -555,7 +557,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val); - assert(0 && "Unknown mismatch!"); + llvm_unreachable("Unknown mismatch!"); return SDValue(); } @@ -563,11 +565,11 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, - SDValue *Parts, unsigned NumParts, MVT PartVT, + SDValue *Parts, unsigned NumParts, EVT PartVT, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT PtrVT = TLI.getPointerTy(); - MVT ValueVT = Val.getValueType(); + EVT PtrVT = TLI.getPointerTy(); + EVT ValueVT = Val.getValueType(); unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); @@ -588,10 +590,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, assert(NumParts == 1 && "Do not know what to promote to!"); Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val); } else if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = MVT::getIntegerVT(NumParts * PartBits); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ExtendKind, dl, ValueVT, Val); } else { - assert(0 && "Unknown mismatch!"); + llvm_unreachable("Unknown mismatch!"); } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. @@ -600,10 +602,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. if (PartVT.isInteger() && ValueVT.isInteger()) { - ValueVT = MVT::getIntegerVT(NumParts * PartBits); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); } else { - assert(0 && "Unknown mismatch!"); + llvm_unreachable("Unknown mismatch!"); } } @@ -634,19 +636,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, // The odd parts were reversed by getCopyToParts - unreverse them. std::reverse(Parts + RoundParts, Parts + NumParts); NumParts = RoundParts; - ValueVT = MVT::getIntegerVT(NumParts * PartBits); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val); } // The number of parts is a power of 2. Repeatedly bisect the value using // EXTRACT_ELEMENT. Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl, - MVT::getIntegerVT(ValueVT.getSizeInBits()), + EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()), Val); for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { for (unsigned i = 0; i < NumParts; i += StepSize) { unsigned ThisBits = StepSize * PartBits / 2; - MVT ThisVT = MVT::getIntegerVT (ThisBits); + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); SDValue &Part0 = Parts[i]; SDValue &Part1 = Parts[i+StepSize/2]; @@ -692,11 +694,10 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, } // Handle a multi-element vector. - MVT IntermediateVT, RegisterVT; + EVT IntermediateVT, RegisterVT; unsigned NumIntermediates; - unsigned NumRegs = TLI - .getVectorTypeBreakdown(ValueVT, IntermediateVT, NumIntermediates, - RegisterVT); + unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, + IntermediateVT, NumIntermediates, RegisterVT); unsigned NumElements = ValueVT.getVectorNumElements(); assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); @@ -750,8 +751,10 @@ void SelectionDAGLowering::clear() { NodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); + EdgeMapping.clear(); DAG.clear(); CurDebugLoc = DebugLoc::getUnknownLoc(); + HasTailCall = false; } /// getRoot - Return the current virtual root of the Selection DAG, @@ -817,8 +820,7 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { - default: assert(0 && "Unknown instruction type encountered!"); - abort(); + default: llvm_unreachable("Unknown instruction type encountered!"); // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE:return visit##OPCODE((CLASS&)I); @@ -831,7 +833,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { if (N.getNode()) return N; if (Constant *C = const_cast(dyn_cast(V))) { - MVT VT = TLI.getValueType(V->getType(), true); + EVT VT = TLI.getValueType(V->getType(), true); if (ConstantInt *CI = dyn_cast(C)) return N = DAG.getConstant(*CI, VT); @@ -860,6 +862,10 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); OI != OE; ++OI) { SDNode *Val = getValue(*OI).getNode(); + // If the operand is an empty aggregate, there are no values. + if (!Val) continue; + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) Constants.push_back(SDValue(Val, i)); } @@ -871,14 +877,14 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { assert((isa(C) || isa(C)) && "Unknown struct or array constant!"); - SmallVector ValueVTs; + SmallVector ValueVTs; ComputeValueVTs(TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct SmallVector Constants(NumElts); for (unsigned i = 0; i != NumElts; ++i) { - MVT EltVT = ValueVTs[i]; + EVT EltVT = ValueVTs[i]; if (isa(C)) Constants[i] = DAG.getUNDEF(EltVT); else if (EltVT.isFloatingPoint()) @@ -900,7 +906,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { Ops.push_back(getValue(CP->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); - MVT EltVT = TLI.getValueType(VecTy->getElementType()); + EVT EltVT = TLI.getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -927,30 +933,24 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { unsigned InReg = FuncInfo.ValueMap[V]; assert(InReg && "Value not in map!"); - RegsForValue RFV(TLI, InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); } void SelectionDAGLowering::visitRet(ReturnInst &I) { - if (I.getNumOperands() == 0) { - DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), - MVT::Other, getControlRoot())); - return; - } - - SmallVector NewValues; - NewValues.push_back(getControlRoot()); + SDValue Chain = getControlRoot(); + SmallVector Outs; for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - SmallVector ValueVTs; + SmallVector ValueVTs; ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) continue; SDValue RetOp = getValue(I.getOperand(i)); for (unsigned j = 0, f = NumValues; j != f; ++j) { - MVT VT = ValueVTs[j]; + EVT VT = ValueVTs[j]; ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -965,13 +965,13 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) { // conventions. The frontend should mark functions whose return values // require promoting with signext or zeroext attributes. if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - MVT MinVT = TLI.getRegisterType(MVT::i32); + EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); if (VT.bitsLT(MinVT)) VT = MinVT; } - unsigned NumParts = TLI.getNumRegisters(VT); - MVT PartVT = TLI.getRegisterType(VT); + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); + EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), @@ -981,14 +981,30 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) { ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); if (F->paramHasAttr(0, Attribute::InReg)) Flags.setInReg(); - for (unsigned i = 0; i < NumParts; ++i) { - NewValues.push_back(Parts[i]); - NewValues.push_back(DAG.getArgFlags(Flags)); - } + + // Propagate extension type if any + if (F->paramHasAttr(0, Attribute::SExt)) + Flags.setSExt(); + else if (F->paramHasAttr(0, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); } } - DAG.setRoot(DAG.getNode(ISD::RET, getCurDebugLoc(), MVT::Other, - &NewValues[0], NewValues.size())); + + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CallingConv::ID CallConv = + DAG.getMachineFunction().getFunction()->getCallingConv(); + Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, + Outs, getCurDebugLoc(), DAG); + + // Verify that the target's LowerReturn behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerReturn didn't return a valid chain!"); + + // Update the DAG with the new chain value resulting from return lowering. + DAG.setRoot(Chain); } /// CopyToExportRegsIfNeeded - If the given value has virtual registers @@ -1073,7 +1089,7 @@ static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) { case FCmpInst::FCMP_UNE: FOC = ISD::SETNE; FPC = ISD::SETUNE; break; case FCmpInst::FCMP_TRUE: FOC = FPC = ISD::SETTRUE; break; default: - assert(0 && "Invalid FCmp predicate opcode!"); + llvm_unreachable("Invalid FCmp predicate opcode!"); FOC = FPC = ISD::SETFALSE; break; } @@ -1099,7 +1115,7 @@ static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { case ICmpInst::ICMP_SGT: return ISD::SETGT; case ICmpInst::ICMP_UGT: return ISD::SETUGT; default: - assert(0 && "Invalid ICmp predicate opcode!"); + llvm_unreachable("Invalid ICmp predicate opcode!"); return ISD::SETNE; } } @@ -1131,7 +1147,7 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, Condition = getFCmpCondCode(FC->getPredicate()); } else { Condition = ISD::SETEQ; // silence warning. - assert(0 && "Unknown compare instruction"); + llvm_unreachable("Unknown compare instruction"); } CaseBlock CB(Condition, BOp->getOperand(0), @@ -1142,7 +1158,7 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, } // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(), + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), NULL, TBB, FBB, CurBB); SwitchCases.push_back(CB); } @@ -1229,7 +1245,7 @@ void SelectionDAGLowering::visitBr(BranchInst &I) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; if (I.isUnconditional()) { @@ -1290,14 +1306,14 @@ void SelectionDAGLowering::visitBr(BranchInst &I) { // Okay, we decided not to do this, remove any inserted MBB's and clear // SwitchCases. for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) - CurMBB->getParent()->erase(SwitchCases[i].ThisBB); + FuncInfo.MF->erase(SwitchCases[i].ThisBB); SwitchCases.clear(); } } // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(), + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), NULL, Succ0MBB, Succ1MBB, CurMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1315,9 +1331,11 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { if (CB.CmpMHS == NULL) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. - if (CB.CmpRHS == ConstantInt::getTrue() && CB.CC == ISD::SETEQ) + if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && + CB.CC == ISD::SETEQ) Cond = CondLHS; - else if (CB.CmpRHS == ConstantInt::getFalse() && CB.CC == ISD::SETEQ) { + else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && + CB.CC == ISD::SETEQ) { SDValue True = DAG.getConstant(1, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); } else @@ -1329,7 +1347,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { const APInt& High = cast(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); - MVT VT = CmpOp.getValueType(); + EVT VT = CmpOp.getValueType(); if (cast(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), @@ -1350,7 +1368,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; // If the lhs block is the next block, invert the condition so that we can @@ -1385,7 +1403,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { void SelectionDAGLowering::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - MVT PTy = TLI.getPointerTy(); + EVT PTy = TLI.getPointerTy(); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -1402,7 +1420,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); - MVT VT = SwitchOp.getValueType(); + EVT VT = SwitchOp.getValueType(); SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, DAG.getConstant(JTH.First, VT)); @@ -1411,12 +1429,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - if (VT.bitsGT(TLI.getPointerTy())) - SwitchOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), SUB); - else - SwitchOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), SUB); + SwitchOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy()); unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), @@ -1435,7 +1448,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), @@ -1454,7 +1467,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); - MVT VT = SwitchOp.getValueType(); + EVT VT = SwitchOp.getValueType(); SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, DAG.getConstant(B.First, VT)); @@ -1464,13 +1477,7 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { SUB, DAG.getConstant(B.Range, VT), ISD::SETUGT); - SDValue ShiftOp; - if (VT.bitsGT(TLI.getPointerTy())) - ShiftOp = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), SUB); - else - ShiftOp = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), SUB); + SDValue ShiftOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy()); B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), @@ -1480,7 +1487,7 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; MachineBasicBlock* MBB = B.Cases[0].ThisBB; @@ -1531,7 +1538,7 @@ void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, // This is used to avoid emitting unnecessary branches to the next block. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CurMBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; if (NextMBB == NextBlock) @@ -1584,13 +1591,13 @@ bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR, // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = CurMBB->getParent(); + MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CR.CaseBB; - if (++BBI != CurMBB->getParent()->end()) + if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; // TODO: If any two of the cases has the same destination, and if one value @@ -1698,14 +1705,11 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = CurMBB->getParent(); + MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CR.CaseBB; - - if (++BBI != CurMBB->getParent()->end()) - NextBlock = BBI; + ++BBI; const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); @@ -1771,14 +1775,11 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, MachineBasicBlock* Default) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = CurMBB->getParent(); + MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; MachineFunction::iterator BBI = CR.CaseBB; - - if (++BBI != CurMBB->getParent()->end()) - NextBlock = BBI; + ++BBI; Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); @@ -1898,14 +1899,15 @@ bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, Value* SV, MachineBasicBlock* Default){ - unsigned IntPtrBits = TLI.getPointerTy().getSizeInBits(); + EVT PTy = TLI.getPointerTy(); + unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. - MachineFunction *CurMF = CurMBB->getParent(); + MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) @@ -2069,7 +2071,6 @@ size_t SelectionDAGLowering::Clusterify(CaseVector& Cases, void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; - MachineFunction::iterator BBI = CurMBB; MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; @@ -2174,24 +2175,26 @@ void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { if (!isa(I.getType()) && Op2.getValueType() != TLI.getShiftAmountTy()) { // If the operand is smaller than the shift count type, promote it. - if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType())) + EVT PTy = TLI.getPointerTy(); + EVT STy = TLI.getShiftAmountTy(); + if (STy.bitsGT(Op2.getValueType())) Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), TLI.getShiftAmountTy(), Op2); // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. - else if (TLI.getShiftAmountTy().getSizeInBits() >= + else if (STy.getSizeInBits() >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getShiftAmountTy(), Op2); // Otherwise we'll need to temporarily settle for some other // convenient type; type legalization will make adjustments as // needed. - else if (TLI.getPointerTy().bitsLT(Op2.getValueType())) + else if (PTy.bitsLT(Op2.getValueType())) Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getPointerTy(), Op2); - else if (TLI.getPointerTy().bitsGT(Op2.getValueType())) + else if (PTy.bitsGT(Op2.getValueType())) Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), Op2); } @@ -2209,7 +2212,9 @@ void SelectionDAGLowering::visitICmp(User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - setValue(&I, DAG.getSetCC(getCurDebugLoc(),MVT::i1, Op1, Op2, Opcode)); + + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGLowering::visitFCmp(User &I) { @@ -2221,38 +2226,12 @@ void SelectionDAGLowering::visitFCmp(User &I) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), MVT::i1, Op1, Op2, Condition)); -} - -void SelectionDAGLowering::visitVICmp(User &I) { - ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; - if (VICmpInst *IC = dyn_cast(&I)) - predicate = IC->getPredicate(); - else if (ConstantExpr *IC = dyn_cast(&I)) - predicate = ICmpInst::Predicate(IC->getPredicate()); - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - ISD::CondCode Opcode = getICmpCondCode(predicate); - setValue(&I, DAG.getVSetCC(getCurDebugLoc(), Op1.getValueType(), - Op1, Op2, Opcode)); -} - -void SelectionDAGLowering::visitVFCmp(User &I) { - FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; - if (VFCmpInst *FC = dyn_cast(&I)) - predicate = FC->getPredicate(); - else if (ConstantExpr *FC = dyn_cast(&I)) - predicate = FCmpInst::Predicate(FC->getPredicate()); - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - ISD::CondCode Condition = getFCmpCondCode(predicate); - MVT DestVT = TLI.getValueType(I.getType()); - - setValue(&I, DAG.getVSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGLowering::visitSelect(User &I) { - SmallVector ValueVTs; + SmallVector ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues != 0) { @@ -2277,7 +2256,7 @@ void SelectionDAGLowering::visitSelect(User &I) { void SelectionDAGLowering::visitTrunc(User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); } @@ -2285,7 +2264,7 @@ void SelectionDAGLowering::visitZExt(User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); } @@ -2293,14 +2272,14 @@ void SelectionDAGLowering::visitSExt(User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPTrunc(User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), DestVT, N, DAG.getIntPtrConstant(0))); } @@ -2308,35 +2287,35 @@ void SelectionDAGLowering::visitFPTrunc(User &I) { void SelectionDAGLowering::visitFPExt(User &I){ // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPToUI(User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitFPToSI(User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitUIToFP(User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); } void SelectionDAGLowering::visitSIToFP(User &I){ // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); } @@ -2344,14 +2323,9 @@ void SelectionDAGLowering::visitPtrToInt(User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - MVT SrcVT = N.getValueType(); - MVT DestVT = TLI.getValueType(I.getType()); - SDValue Result; - if (DestVT.bitsLT(SrcVT)) - Result = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N); - else - // Note: ZERO_EXTEND can handle cases where the sizes are equal too - Result = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N); + EVT SrcVT = N.getValueType(); + EVT DestVT = TLI.getValueType(I.getType()); + SDValue Result = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT); setValue(&I, Result); } @@ -2359,19 +2333,14 @@ void SelectionDAGLowering::visitIntToPtr(User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - MVT SrcVT = N.getValueType(); - MVT DestVT = TLI.getValueType(I.getType()); - if (DestVT.bitsLT(SrcVT)) - setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); - else - // Note: ZERO_EXTEND can handle cases where the sizes are equal too - setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - DestVT, N)); + EVT SrcVT = N.getValueType(); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } void SelectionDAGLowering::visitBitCast(User &I) { SDValue N = getValue(I.getOperand(0)); - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this // is either a BIT_CONVERT or a no-op. @@ -2422,7 +2391,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { // Convert the ConstantVector mask operand into an array of ints, with -1 // representing undef values. SmallVector MaskElts; - cast(I.getOperand(2))->getVectorElements(MaskElts); + cast(I.getOperand(2))->getVectorElements(*DAG.getContext(), + MaskElts); unsigned MaskNumElts = MaskElts.size(); for (unsigned i = 0; i != MaskNumElts; ++i) { if (isa(MaskElts[i])) @@ -2431,8 +2401,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { Mask.push_back(cast(MaskElts[i])->getSExtValue()); } - MVT VT = TLI.getValueType(I.getType()); - MVT SrcVT = Src1.getValueType(); + EVT VT = TLI.getValueType(I.getType()); + EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { @@ -2531,7 +2501,7 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { } } - if (RangeUse[0] == 0 && RangeUse[0] == 0) { + if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } @@ -2566,8 +2536,8 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { // We can't use either concat vectors or extract subvectors so fall back to // replacing the shuffle with extract and build vector. // to insert and build vector. - MVT EltVT = VT.getVectorElementType(); - MVT PtrVT = TLI.getPointerTy(); + EVT EltVT = VT.getVectorElementType(); + EVT PtrVT = TLI.getPointerTy(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { if (Mask[i] < 0) { @@ -2598,9 +2568,9 @@ void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, I.idx_begin(), I.idx_end()); - SmallVector AggValueVTs; + SmallVector AggValueVTs; ComputeValueVTs(TLI, AggTy, AggValueVTs); - SmallVector ValValueVTs; + SmallVector ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); @@ -2637,7 +2607,7 @@ void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy, I.idx_begin(), I.idx_end()); - SmallVector ValValueVTs; + SmallVector ValValueVTs; ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -2682,7 +2652,8 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) { uint64_t Offs = TD->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); SDValue OffsVal; - unsigned PtrBits = TLI.getPointerTy().getSizeInBits(); + EVT PTy = TLI.getPointerTy(); + unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) { OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getPointerTy(), @@ -2700,12 +2671,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) { // If the index is smaller or larger than intptr_t, truncate or extend // it. - if (IdxN.getValueType().bitsLT(N.getValueType())) - IdxN = DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), - N.getValueType(), IdxN); - else if (IdxN.getValueType().bitsGT(N.getValueType())) - IdxN = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - N.getValueType(), IdxN); + IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. @@ -2749,13 +2715,8 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) { - MVT IntPtr = TLI.getPointerTy(); - if (IntPtr.bitsLT(AllocSize.getValueType())) - AllocSize = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - IntPtr, AllocSize); - else if (IntPtr.bitsGT(AllocSize.getValueType())) - AllocSize = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - IntPtr, AllocSize); + EVT IntPtr = TLI.getPointerTy(); + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -2784,7 +2745,7 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) { // Inform the Frame Information that we have just allocated a variable-sized // object. - CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject(); + FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); } void SelectionDAGLowering::visitLoad(LoadInst &I) { @@ -2795,7 +2756,7 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) { bool isVolatile = I.isVolatile(); unsigned Alignment = I.getAlignment(); - SmallVector ValueVTs; + SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); @@ -2818,14 +2779,13 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) { SmallVector Values(NumValues); SmallVector Chains(NumValues); - MVT PtrVT = Ptr.getValueType(); + EVT PtrVT = Ptr.getValueType(); for (unsigned i = 0; i != NumValues; ++i) { SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, - DAG.getNode(ISD::ADD, getCurDebugLoc(), - PtrVT, Ptr, - DAG.getConstant(Offsets[i], PtrVT)), - SV, Offsets[i], - isVolatile, Alignment); + DAG.getNode(ISD::ADD, getCurDebugLoc(), + PtrVT, Ptr, + DAG.getConstant(Offsets[i], PtrVT)), + SV, Offsets[i], isVolatile, Alignment); Values[i] = L; Chains[i] = L.getValue(1); } @@ -2850,7 +2810,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) { Value *SrcV = I.getOperand(0); Value *PtrV = I.getOperand(1); - SmallVector ValueVTs; + SmallVector ValueVTs; SmallVector Offsets; ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); @@ -2865,7 +2825,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) { SDValue Root = getRoot(); SmallVector Chains(NumValues); - MVT PtrVT = Ptr.getValueType(); + EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); unsigned Alignment = I.getAlignment(); for (unsigned i = 0; i != NumValues; ++i) @@ -2874,8 +2834,7 @@ void SelectionDAGLowering::visitStore(StoreInst &I) { DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)), - PtrV, Offsets[i], - isVolatile, Alignment); + PtrV, Offsets[i], isVolatile, Alignment); DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, &Chains[0], NumValues)); @@ -2915,24 +2874,18 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, Ops.push_back(Op); } - std::vector VTArray; - if (I.getType() != Type::VoidTy) { - MVT VT = TLI.getValueType(I.getType()); - if (VT.isVector()) { - const VectorType *DestTy = cast(I.getType()); - MVT EltVT = TLI.getValueType(DestTy->getElementType()); - - VT = MVT::getVectorVT(EltVT, DestTy->getNumElements()); - assert(VT != MVT::Other && "Intrinsic uses a non-legal type?"); - } - - assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?"); - VTArray.push_back(VT); + SmallVector ValueVTs; + ComputeValueVTs(TLI, I.getType(), ValueVTs); +#ifndef NDEBUG + for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) { + assert(TLI.isTypeLegal(ValueVTs[Val]) && + "Intrinsic uses a non-legal type?"); } +#endif // NDEBUG if (HasChain) - VTArray.push_back(MVT::Other); + ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(&VTArray[0], VTArray.size()); + SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); // Create the node. SDValue Result; @@ -2947,7 +2900,7 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, else if (!HasChain) Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); - else if (I.getType() != Type::VoidTy) + else if (I.getType() != Type::getVoidTy(*DAG.getContext())) Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), VTs, &Ops[0], Ops.size()); else @@ -2961,9 +2914,9 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, else DAG.setRoot(Chain); } - if (I.getType() != Type::VoidTy) { + if (I.getType() != Type::getVoidTy(*DAG.getContext())) { if (const VectorType *PTy = dyn_cast(I.getType())) { - MVT VT = TLI.getValueType(PTy); + EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result); } setValue(&I, Result); @@ -3890,7 +3843,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW && DW->ShouldEmitDwarfDebug()) { unsigned LabelID = - DW->RecordRegionStart(cast(RSI.getContext())); + DW->RecordRegionStart(RSI.getContext()); DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), getRoot(), LabelID)); } @@ -3905,7 +3858,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; MachineFunction &MF = DAG.getMachineFunction(); - DISubprogram Subprogram(cast(REI.getContext())); + DISubprogram Subprogram(REI.getContext()); if (isInlinedFnEnd(REI, MF.getFunction())) { // This is end of inlined function. Debugging information for inlined @@ -3924,7 +3877,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } unsigned LabelID = - DW->RecordRegionEnd(cast(REI.getContext())); + DW->RecordRegionEnd(REI.getContext()); DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(), getRoot(), LabelID)); return 0; @@ -3932,8 +3885,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::dbg_func_start: { DwarfWriter *DW = DAG.getDwarfWriter(); DbgFuncStartInst &FSI = cast(I); - if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None) || !DW - || !DW->ShouldEmitDwarfDebug()) + if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None)) return 0; MachineFunction &MF = DAG.getMachineFunction(); @@ -3954,9 +3906,11 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // Record the source line. setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo())); + if (!DW || !DW->ShouldEmitDwarfDebug()) + return 0; DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc); - DISubprogram SP(cast(FSI.getSubprogram())); - DICompileUnit CU(PrevLocTpl.CompileUnit); + DISubprogram SP(FSI.getSubprogram()); + DICompileUnit CU(PrevLocTpl.Scope); unsigned LabelID = DW->RecordInlinedFnStart(SP, CU, PrevLocTpl.Line, PrevLocTpl.Col); @@ -3967,23 +3921,44 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // This is a beginning of a new function. MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo())); - + + if (!DW || !DW->ShouldEmitDwarfDebug()) + return 0; // llvm.dbg.func_start also defines beginning of function scope. - DW->RecordRegionStart(cast(FSI.getSubprogram())); + DW->RecordRegionStart(FSI.getSubprogram()); return 0; } case Intrinsic::dbg_declare: { if (OptLevel != CodeGenOpt::None) // FIXME: Variable debug info is not supported here. return 0; - + DwarfWriter *DW = DAG.getDwarfWriter(); + if (!DW) + return 0; DbgDeclareInst &DI = cast(I); if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None)) return 0; - Value *Variable = DI.getVariable(); - DAG.setRoot(DAG.getNode(ISD::DECLARE, dl, MVT::Other, getRoot(), - getValue(DI.getAddress()), getValue(Variable))); + MDNode *Variable = DI.getVariable(); + Value *Address = DI.getAddress(); + if (BitCastInst *BCI = dyn_cast(Address)) + Address = BCI->getOperand(0); + AllocaInst *AI = dyn_cast(Address); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) + return 0; + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) + return 0; // VLAs. + int FI = SI->second; +#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN + MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); + if (MMI) + MMI->setVariableDbgInfo(Variable, FI); +#else + DW->RecordVariable(Variable, FI); +#endif return 0; } case Intrinsic::eh_exception: { @@ -3998,54 +3973,45 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; } - case Intrinsic::eh_selector_i32: - case Intrinsic::eh_selector_i64: { + case Intrinsic::eh_selector: { MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); - MVT VT = (Intrinsic == Intrinsic::eh_selector_i32 ? - MVT::i32 : MVT::i64); - if (MMI) { - if (CurMBB->isLandingPad()) - AddCatchInfo(I, MMI, CurMBB); - else { + if (CurMBB->isLandingPad()) + AddCatchInfo(I, MMI, CurMBB); + else { #ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(&I); + FuncInfo.CatchInfoLost.insert(&I); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) CurMBB->addLiveIn(Reg); - } - - // Insert the EHSELECTION instruction. - SDVTList VTs = DAG.getVTList(VT, MVT::Other); - SDValue Ops[2]; - Ops[0] = getValue(I.getOperand(1)); - Ops[1] = getRoot(); - SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); - setValue(&I, Op); - DAG.setRoot(Op.getValue(1)); - } else { - setValue(&I, DAG.getConstant(0, VT)); + // FIXME: Mark exception selector register as live in. Hack for PR1508. + unsigned Reg = TLI.getExceptionSelectorRegister(); + if (Reg) CurMBB->addLiveIn(Reg); } + // Insert the EHSELECTION instruction. + SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); + SDValue Ops[2]; + Ops[0] = getValue(I.getOperand(1)); + Ops[1] = getRoot(); + SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); + + DAG.setRoot(Op.getValue(1)); + + setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32)); return 0; } - case Intrinsic::eh_typeid_for_i32: - case Intrinsic::eh_typeid_for_i64: { + case Intrinsic::eh_typeid_for: { MachineModuleInfo *MMI = DAG.getMachineModuleInfo(); - MVT VT = (Intrinsic == Intrinsic::eh_typeid_for_i32 ? - MVT::i32 : MVT::i64); if (MMI) { // Find the type id for the given typeinfo. GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); unsigned TypeID = MMI->getTypeIDFor(GV); - setValue(&I, DAG.getConstant(TypeID, VT)); + setValue(&I, DAG.getConstant(TypeID, MVT::i32)); } else { // Return something different to eh_selector. - setValue(&I, DAG.getConstant(1, VT)); + setValue(&I, DAG.getConstant(1, MVT::i32)); } return 0; @@ -4073,14 +4039,9 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::eh_dwarf_cfa: { - MVT VT = getValue(I.getOperand(1)).getValueType(); - SDValue CfaArg; - if (VT.bitsGT(TLI.getPointerTy())) - CfaArg = DAG.getNode(ISD::TRUNCATE, dl, - TLI.getPointerTy(), getValue(I.getOperand(1))); - else - CfaArg = DAG.getNode(ISD::SIGN_EXTEND, dl, - TLI.getPointerTy(), getValue(I.getOperand(1))); + EVT VT = getValue(I.getOperand(1)).getValueType(); + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl, + TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -4096,7 +4057,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { Offset)); return 0; } - case Intrinsic::convertff: case Intrinsic::convertfsi: case Intrinsic::convertfui: @@ -4118,7 +4078,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - MVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); Value* Op1 = I.getOperand(1); setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), @@ -4182,16 +4142,6 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.setRoot(Tmp.getValue(1)); return 0; } - case Intrinsic::part_select: { - // Currently not implemented: just abort - assert(0 && "part_select intrinsic not implemented"); - abort(); - } - case Intrinsic::part_set: { - // Currently not implemented: just abort - assert(0 && "part_set intrinsic not implemented"); - abort(); - } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, dl, getValue(I.getOperand(1)).getValueType(), @@ -4199,21 +4149,21 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::cttz: { SDValue Arg = getValue(I.getOperand(1)); - MVT Ty = Arg.getValueType(); + EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg); setValue(&I, result); return 0; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getOperand(1)); - MVT Ty = Arg.getValueType(); + EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg); setValue(&I, result); return 0; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getOperand(1)); - MVT Ty = Arg.getValueType(); + EVT Ty = Arg.getValueType(); SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg); setValue(&I, result); return 0; @@ -4235,7 +4185,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - MVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI.getPointerTy(); SDValue Src = getValue(I.getOperand(1)); // The guard's value. AllocaInst *Slot = cast(I.getOperand(2)); @@ -4289,7 +4239,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::gcread: case Intrinsic::gcwrite: - assert(0 && "GC failed to lower gcread/gcwrite intrinsics!"); + llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); return 0; case Intrinsic::flt_rounds: { @@ -4373,9 +4323,76 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } } +/// Test if the given instruction is in a position to be optimized +/// with a tail-call. This roughly means that it's in a block with +/// a return and there's nothing that needs to be scheduled +/// between it and the return. +/// +/// This function only tests target-independent requirements. +/// For target-dependent requirements, a target should override +/// TargetLowering::IsEligibleForTailCallOptimization. +/// +static bool +isInTailCallPosition(const Instruction *I, Attributes RetAttr, + const TargetLowering &TLI) { + const BasicBlock *ExitBB = I->getParent(); + const TerminatorInst *Term = ExitBB->getTerminator(); + const ReturnInst *Ret = dyn_cast(Term); + const Function *F = ExitBB->getParent(); + + // The block must end in a return statement or an unreachable. + if (!Ret && !isa(Term)) return false; + + // If I will have a chain, make sure no other instruction that will have a + // chain interposes between I and the return. + if (I->mayHaveSideEffects() || I->mayReadFromMemory() || + !I->isSafeToSpeculativelyExecute()) + for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ; + --BBI) { + if (&*BBI == I) + break; + if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || + !BBI->isSafeToSpeculativelyExecute()) + return false; + } + + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // Conservatively require the attributes of the call to match those of + // the return. + if (F->getAttributes().getRetAttributes() != RetAttr) + return false; + + // Otherwise, make sure the unmodified return value of I is the return value. + for (const Instruction *U = dyn_cast(Ret->getOperand(0)); ; + U = dyn_cast(U->getOperand(0))) { + if (!U) + return false; + if (!U->hasOneUse()) + return false; + if (U == I) + break; + // Check for a truly no-op truncate. + if (isa(U) && + TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType())) + continue; + // Check for a truly no-op bitcast. + if (isa(U) && + (U->getOperand(0)->getType() == U->getType() || + (isa(U->getOperand(0)->getType()) && + isa(U->getType())))) + continue; + // Otherwise it's not a true no-op. + return false; + } + + return true; +} void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, - bool IsTailCall, + bool isTailCall, MachineBasicBlock *LandingPad) { const PointerType *PT = cast(CS.getCalledValue()->getType()); const FunctionType *FTy = cast(PT->getElementType()); @@ -4385,8 +4402,9 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); + unsigned j = 1; for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { + i != e; ++i, ++j) { SDValue ArgNode = getValue(*i); Entry.Node = ArgNode; Entry.Ty = (*i)->getType(); @@ -4405,6 +4423,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, // Insert a label before the invoke call to mark the try range. This can be // used to detect deletion of the invoke via the MachineModuleInfo. BeginLabel = MMI->NextLabelID(); + // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); @@ -4412,17 +4431,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, getControlRoot(), BeginLabel)); } + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI.LowerCallTo. + if (isTailCall && + !isInTailCallPosition(CS.getInstruction(), + CS.getAttributes().getRetAttributes(), + TLI)) + isTailCall = false; + std::pair Result = TLI.LowerCallTo(getRoot(), CS.getType(), CS.paramHasAttr(0, Attribute::SExt), CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(), CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(), CS.getCallingConv(), - IsTailCall && PerformTailCallOpt, + isTailCall, + !CS.getInstruction()->use_empty(), Callee, Args, DAG, getCurDebugLoc()); - if (CS.getType() != Type::VoidTy) + assert((isTailCall || Result.second.getNode()) && + "Non-null chain expected with non-tail call!"); + assert((Result.second.getNode() || !Result.first.getNode()) && + "Null value expected with tail call!"); + if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); - DAG.setRoot(Result.second); + // As a special case, a null chain means that a tail call has + // been emitted and the DAG root is already updated. + if (Result.second.getNode()) + DAG.setRoot(Result.second); + else + HasTailCall = true; if (LandingPad && MMI) { // Insert a label at the end of the invoke call to mark the try range. This @@ -4458,12 +4495,9 @@ void SelectionDAGLowering::visitCall(CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. - unsigned NameLen = F->getNameLen(); - if (!F->hasLocalLinkage() && NameLen) { - const char *NameStr = F->getNameStart(); - if (NameStr[0] == 'c' && - ((NameLen == 8 && !strcmp(NameStr, "copysign")) || - (NameLen == 9 && !strcmp(NameStr, "copysignf")))) { + if (!F->hasLocalLinkage() && F->hasName()) { + StringRef Name = F->getName(); + if (Name == "copysign" || Name == "copysignf") { if (I.getNumOperands() == 3 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType() && @@ -4474,10 +4508,7 @@ void SelectionDAGLowering::visitCall(CallInst &I) { LHS.getValueType(), LHS, RHS)); return; } - } else if (NameStr[0] == 'f' && - ((NameLen == 4 && !strcmp(NameStr, "fabs")) || - (NameLen == 5 && !strcmp(NameStr, "fabsf")) || - (NameLen == 5 && !strcmp(NameStr, "fabsl")))) { + } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && I.getType() == I.getOperand(1)->getType()) { @@ -4486,30 +4517,36 @@ void SelectionDAGLowering::visitCall(CallInst &I) { Tmp.getValueType(), Tmp)); return; } - } else if (NameStr[0] == 's' && - ((NameLen == 3 && !strcmp(NameStr, "sin")) || - (NameLen == 4 && !strcmp(NameStr, "sinf")) || - (NameLen == 4 && !strcmp(NameStr, "sinl")))) { + } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && - I.getType() == I.getOperand(1)->getType()) { + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } - } else if (NameStr[0] == 'c' && - ((NameLen == 3 && !strcmp(NameStr, "cos")) || - (NameLen == 4 && !strcmp(NameStr, "cosf")) || - (NameLen == 4 && !strcmp(NameStr, "cosl")))) { + } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { if (I.getNumOperands() == 2 && // Basic sanity checks. I.getOperand(1)->getType()->isFloatingPoint() && - I.getType() == I.getOperand(1)->getType()) { + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue Tmp = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } + } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { + if (I.getNumOperands() == 2 && // Basic sanity checks. + I.getOperand(1)->getType()->isFloatingPoint() && + I.getType() == I.getOperand(1)->getType() && + I.onlyReadsMemory()) { + SDValue Tmp = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), + Tmp.getValueType(), Tmp)); + return; + } } } } else if (isa(I.getOperand(0))) { @@ -4523,7 +4560,12 @@ void SelectionDAGLowering::visitCall(CallInst &I) { else Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); - LowerCallTo(&I, Callee, I.isTailCall()); + // Check if we can potentially perform a tail call. More detailed + // checking is be done within LowerCallTo, after more information + // about the call is known. + bool isTailCall = PerformTailCallOpt && I.isTailCall(); + + LowerCallTo(&I, Callee, isTailCall); } @@ -4539,9 +4581,9 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, SmallVector Parts; for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { // Copy the legal parts from the registers. - MVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(ValueVT); - MVT RegisterVT = RegVTs[Value]; + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -4570,7 +4612,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; - MVT FromVT(MVT::Other); + EVT FromVT(MVT::Other); if (NumSignBits == RegSize) isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 else if (NumZeroBits >= RegSize-1) @@ -4620,9 +4662,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, unsigned NumRegs = Regs.size(); SmallVector Parts(NumRegs); for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - MVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI->getNumRegisters(ValueVT); - MVT RegisterVT = RegVTs[Value]; + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT); @@ -4665,15 +4707,15 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,unsigned MatchingIdx, SelectionDAG &DAG, std::vector &Ops) const { - MVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); + EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy(); assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!"); unsigned Flag = Code | (Regs.size() << 3); if (HasMatching) Flag |= 0x80000000 | (MatchingIdx << 16); Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy)); for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI->getNumRegisters(ValueVTs[Value]); - MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + EVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); @@ -4688,11 +4730,11 @@ static const TargetRegisterClass * isAllocatableRegister(unsigned Reg, MachineFunction &MF, const TargetLowering &TLI, const TargetRegisterInfo *TRI) { - MVT FoundVT = MVT::Other; + EVT FoundVT = MVT::Other; const TargetRegisterClass *FoundRC = 0; for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), E = TRI->regclass_end(); RCI != E; ++RCI) { - MVT ThisVT = MVT::Other; + EVT ThisVT = MVT::Other; const TargetRegisterClass *RC = *RCI; // If none of the the value types for this register class are valid, we @@ -4765,10 +4807,11 @@ public: } } - /// getCallOperandValMVT - Return the MVT of the Value* that this operand + /// getCallOperandValEVT - Return the EVT of the Value* that this operand /// corresponds to. If there is no Value* for this operand, it returns /// MVT::Other. - MVT getCallOperandValMVT(const TargetLowering &TLI, + EVT getCallOperandValEVT(LLVMContext &Context, + const TargetLowering &TLI, const TargetData *TD) const { if (CallOperandVal == 0) return MVT::Other; @@ -4794,7 +4837,7 @@ public: case 32: case 64: case 128: - OpTy = IntegerType::get(BitSize); + OpTy = IntegerType::get(Context, BitSize); break; } } @@ -4830,6 +4873,8 @@ void SelectionDAGLowering:: GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set &OutputRegs, std::set &InputRegs) { + LLVMContext &Context = FuncInfo.Fn->getContext(); + // Compute whether this value requires an input register, an output register, // or both. bool isOutReg = false; @@ -4869,10 +4914,10 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // value disagrees with the register class we plan to stick this in. if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { - // Try to convert to the first MVT that the reg class contains. If the + // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - MVT RegVT = *PhysReg.second->vt_begin(); + EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), RegVT, OpInfo.CallOperand); @@ -4882,18 +4927,19 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // bitcast to the corresponding integer type. This turns an f64 value // into i64, which can be passed with two i32 values on a 32-bit // machine. - RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + RegVT = EVT::getIntegerVT(Context, + OpInfo.ConstraintVT.getSizeInBits()); OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } } - NumRegs = TLI.getNumRegisters(OpInfo.ConstraintVT); + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } - MVT RegVT; - MVT ValueVT = OpInfo.ConstraintVT; + EVT RegVT; + EVT ValueVT = OpInfo.ConstraintVT; // If this is a constraint for a specific physical register, like {r17}, // assign it now. @@ -5047,7 +5093,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); - MVT OpVT = MVT::Other; + EVT OpVT = MVT::Other; // Compute the value type for each operand. switch (OpInfo.Type) { @@ -5060,7 +5106,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // The return value of the call is this value. As such, there is no // corresponding argument. - assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && + "Bad inline asm!"); if (const StructType *STy = dyn_cast(CS.getType())) { OpVT = TLI.getValueType(STy->getElementType(ResNo)); } else { @@ -5080,13 +5127,16 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // If this is an input or an indirect output, process the call argument. // BasicBlocks are labels, currently appearing only in asm's. if (OpInfo.CallOperandVal) { + // Strip bitcasts, if any. This mostly comes up for functions. + OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts(); + if (BasicBlock *BB = dyn_cast(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValMVT(TLI, TD); + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD); } OpInfo.ConstraintVT = OpVT; @@ -5108,9 +5158,9 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { Input.ConstraintVT.isInteger()) || (OpInfo.ConstraintVT.getSizeInBits() != Input.ConstraintVT.getSizeInBits())) { - cerr << "llvm: error: Unsupported asm: input constraint with a " - << "matching output constraint of incompatible type!\n"; - exit(1); + llvm_report_error("Unsupported asm: input constraint" + " with a matching output constraint of incompatible" + " type!"); } Input.ConstraintVT = OpInfo.ConstraintVT; } @@ -5213,9 +5263,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // Copy the output from the appropriate register. Find a register that // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { - cerr << "llvm: error: Couldn't allocate output reg for constraint '" - << OpInfo.ConstraintCode << "'!\n"; - exit(1); + llvm_report_error("Couldn't allocate output reg for" + " constraint '" + OpInfo.ConstraintCode + "'!"); } // If this is an indirect operand, store through the pointer after the @@ -5225,7 +5274,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { OpInfo.CallOperandVal)); } else { // This is the result value of the call. - assert(CS.getType() != Type::VoidTy && "Bad inline asm!"); + assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) && + "Bad inline asm!"); // Concatenate this output onto the outputs list. RetValRegs.append(OpInfo.AssignedRegs); } @@ -5268,15 +5318,13 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { - cerr << "llvm: error: " - "Don't know how to handle tied indirect " - "register inputs yet!\n"; - exit(1); + llvm_report_error("Don't know how to handle tied indirect " + "register inputs yet!"); } RegsForValue MatchedRegs; MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); - MVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); @@ -5313,9 +5361,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], hasMemory, Ops, DAG); if (Ops.empty()) { - cerr << "llvm: error: Invalid operand for inline asm constraint '" - << OpInfo.ConstraintCode << "'!\n"; - exit(1); + llvm_report_error("Invalid operand for inline asm" + " constraint '" + OpInfo.ConstraintCode + "'!"); } // Add information to the INLINEASM node to know about this input. @@ -5345,9 +5392,8 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { - cerr << "llvm: error: Couldn't allocate output reg for constraint '" - << OpInfo.ConstraintCode << "'!\n"; - exit(1); + llvm_report_error("Couldn't allocate input reg for" + " constraint '"+ OpInfo.ConstraintCode +"'!"); } OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), @@ -5385,7 +5431,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - MVT ResultType = TLI.getValueType(CS.getType()); + EVT ResultType = TLI.getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -5449,45 +5495,56 @@ void SelectionDAGLowering::visitMalloc(MallocInst &I) { // multiply on 64-bit targets. // FIXME: Malloc inst should go away: PR715. uint64_t ElementSize = TD->getTypeAllocSize(I.getType()->getElementType()); - if (ElementSize != 1) + if (ElementSize != 1) { + // Src is always 32-bits, make sure the constant fits. + assert(Src.getValueType() == MVT::i32); + ElementSize = (uint32_t)ElementSize; Src = DAG.getNode(ISD::MUL, getCurDebugLoc(), Src.getValueType(), Src, DAG.getConstant(ElementSize, Src.getValueType())); + } - MVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI.getPointerTy(); - if (IntPtr.bitsLT(Src.getValueType())) - Src = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), IntPtr, Src); - else if (IntPtr.bitsGT(Src.getValueType())) - Src = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), IntPtr, Src); + Src = DAG.getZExtOrTrunc(Src, getCurDebugLoc(), IntPtr); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Src; - Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); Args.push_back(Entry); + bool isTailCall = PerformTailCallOpt && + isInTailCallPosition(&I, Attribute::None, TLI); std::pair Result = TLI.LowerCallTo(getRoot(), I.getType(), false, false, false, false, - 0, CallingConv::C, PerformTailCallOpt, + 0, CallingConv::C, isTailCall, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("malloc", IntPtr), Args, DAG, getCurDebugLoc()); - setValue(&I, Result.first); // Pointers always fit in registers - DAG.setRoot(Result.second); + if (Result.first.getNode()) + setValue(&I, Result.first); // Pointers always fit in registers + if (Result.second.getNode()) + DAG.setRoot(Result.second); } void SelectionDAGLowering::visitFree(FreeInst &I) { TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = getValue(I.getOperand(0)); - Entry.Ty = TLI.getTargetData()->getIntPtrType(); + Entry.Ty = TLI.getTargetData()->getIntPtrType(*DAG.getContext()); Args.push_back(Entry); - MVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI.getPointerTy(); + bool isTailCall = PerformTailCallOpt && + isInTailCallPosition(&I, Attribute::None, TLI); std::pair Result = - TLI.LowerCallTo(getRoot(), Type::VoidTy, false, false, false, false, - 0, CallingConv::C, PerformTailCallOpt, + TLI.LowerCallTo(getRoot(), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, isTailCall, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("free", IntPtr), Args, DAG, getCurDebugLoc()); - DAG.setRoot(Result.second); + if (Result.second.getNode()) + DAG.setRoot(Result.second); } void SelectionDAGLowering::visitVAStart(CallInst &I) { @@ -5521,161 +5578,31 @@ void SelectionDAGLowering::visitVACopy(CallInst &I) { DAG.getSrcValue(I.getOperand(2)))); } -/// TargetLowering::LowerArguments - This is the default LowerArguments -/// implementation, which just inserts a FORMAL_ARGUMENTS node. FIXME: When all -/// targets are migrated to using FORMAL_ARGUMENTS, this hook should be -/// integrated into SDISel. -void TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, - SmallVectorImpl &ArgValues, - DebugLoc dl) { - // Add CC# and isVararg as operands to the FORMAL_ARGUMENTS node. - SmallVector Ops; - Ops.push_back(DAG.getRoot()); - Ops.push_back(DAG.getConstant(F.getCallingConv(), getPointerTy())); - Ops.push_back(DAG.getConstant(F.isVarArg(), getPointerTy())); - - // Add one result value for each formal argument. - SmallVector RetVals; - unsigned j = 1; - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); - I != E; ++I, ++j) { - SmallVector ValueVTs; - ComputeValueVTs(*this, I->getType(), ValueVTs); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { - MVT VT = ValueVTs[Value]; - const Type *ArgTy = VT.getTypeForMVT(); - ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = - getTargetData()->getABITypeAlignment(ArgTy); - - if (F.paramHasAttr(j, Attribute::ZExt)) - Flags.setZExt(); - if (F.paramHasAttr(j, Attribute::SExt)) - Flags.setSExt(); - if (F.paramHasAttr(j, Attribute::InReg)) - Flags.setInReg(); - if (F.paramHasAttr(j, Attribute::StructRet)) - Flags.setSRet(); - if (F.paramHasAttr(j, Attribute::ByVal)) { - Flags.setByVal(); - const PointerType *Ty = cast(I->getType()); - const Type *ElementTy = Ty->getElementType(); - unsigned FrameAlign = getByValTypeAlignment(ElementTy); - unsigned FrameSize = getTargetData()->getTypeAllocSize(ElementTy); - // For ByVal, alignment should be passed from FE. BE will guess if - // this info is not there but there are cases it cannot get right. - if (F.getParamAlignment(j)) - FrameAlign = F.getParamAlignment(j); - Flags.setByValAlign(FrameAlign); - Flags.setByValSize(FrameSize); - } - if (F.paramHasAttr(j, Attribute::Nest)) - Flags.setNest(); - Flags.setOrigAlign(OriginalAlignment); - - MVT RegisterVT = getRegisterType(VT); - unsigned NumRegs = getNumRegisters(VT); - for (unsigned i = 0; i != NumRegs; ++i) { - RetVals.push_back(RegisterVT); - ISD::ArgFlagsTy MyFlags = Flags; - if (NumRegs > 1 && i == 0) - MyFlags.setSplit(); - // if it isn't first piece, alignment must be 1 - else if (i > 0) - MyFlags.setOrigAlign(1); - Ops.push_back(DAG.getArgFlags(MyFlags)); - } - } - } - - RetVals.push_back(MVT::Other); - - // Create the node. - SDNode *Result = DAG.getNode(ISD::FORMAL_ARGUMENTS, dl, - DAG.getVTList(&RetVals[0], RetVals.size()), - &Ops[0], Ops.size()).getNode(); - - // Prelower FORMAL_ARGUMENTS. This isn't required for functionality, but - // allows exposing the loads that may be part of the argument access to the - // first DAGCombiner pass. - SDValue TmpRes = LowerOperation(SDValue(Result, 0), DAG); - - // The number of results should match up, except that the lowered one may have - // an extra flag result. - assert((Result->getNumValues() == TmpRes.getNode()->getNumValues() || - (Result->getNumValues()+1 == TmpRes.getNode()->getNumValues() && - TmpRes.getValue(Result->getNumValues()).getValueType() == MVT::Flag)) - && "Lowering produced unexpected number of results!"); - - // The FORMAL_ARGUMENTS node itself is likely no longer needed. - if (Result != TmpRes.getNode() && Result->use_empty()) { - HandleSDNode Dummy(DAG.getRoot()); - DAG.RemoveDeadNode(Result); - } - - Result = TmpRes.getNode(); - - unsigned NumArgRegs = Result->getNumValues() - 1; - DAG.setRoot(SDValue(Result, NumArgRegs)); - - // Set up the return result vector. - unsigned i = 0; - unsigned Idx = 1; - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; - ++I, ++Idx) { - SmallVector ValueVTs; - ComputeValueVTs(*this, I->getType(), ValueVTs); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { - MVT VT = ValueVTs[Value]; - MVT PartVT = getRegisterType(VT); - - unsigned NumParts = getNumRegisters(VT); - SmallVector Parts(NumParts); - for (unsigned j = 0; j != NumParts; ++j) - Parts[j] = SDValue(Result, i++); - - ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (F.paramHasAttr(Idx, Attribute::SExt)) - AssertOp = ISD::AssertSext; - else if (F.paramHasAttr(Idx, Attribute::ZExt)) - AssertOp = ISD::AssertZext; - - ArgValues.push_back(getCopyFromParts(DAG, dl, &Parts[0], NumParts, - PartVT, VT, AssertOp)); - } - } - assert(i == NumArgRegs && "Argument register count mismatch!"); -} - - /// TargetLowering::LowerCallTo - This is the default LowerCallTo -/// implementation, which just inserts an ISD::CALL node, which is later custom -/// lowered by the target to something concrete. FIXME: When all targets are -/// migrated to using ISD::CALL, this hook should be integrated into SDISel. +/// implementation, which just calls LowerCall. +/// FIXME: When all targets are +/// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg, unsigned NumFixedArgs, - unsigned CallingConv, bool isTailCall, + CallingConv::ID CallConv, bool isTailCall, + bool isReturnValueUsed, SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) { + assert((!isTailCall || PerformTailCallOpt) && "isTailCall set when tail-call optimizations are disabled!"); - SmallVector Ops; - Ops.push_back(Chain); // Op#0 - Chain - Ops.push_back(Callee); - // Handle all of the outgoing arguments. + SmallVector Outs; for (unsigned i = 0, e = Args.size(); i != e; ++i) { - SmallVector ValueVTs; + SmallVector ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { - MVT VT = ValueVTs[Value]; - const Type *ArgTy = VT.getTypeForMVT(); + EVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; @@ -5707,8 +5634,8 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - MVT PartVT = getRegisterType(VT); - unsigned NumParts = getNumRegisters(VT); + EVT PartVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumParts = getNumRegisters(RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -5719,75 +5646,105 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind); - for (unsigned i = 0; i != NumParts; ++i) { + for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::ArgFlagsTy MyFlags = Flags; - if (NumParts > 1 && i == 0) - MyFlags.setSplit(); - else if (i != 0) - MyFlags.setOrigAlign(1); - - Ops.push_back(Parts[i]); - Ops.push_back(DAG.getArgFlags(MyFlags)); + ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs); + if (NumParts > 1 && j == 0) + MyFlags.Flags.setSplit(); + else if (j != 0) + MyFlags.Flags.setOrigAlign(1); + + Outs.push_back(MyFlags); } } } - // Figure out the result value types. We start by making a list of - // the potentially illegal return value types. - SmallVector LoweredRetTys; - SmallVector RetTys; + // Handle the incoming return values from the call. + SmallVector Ins; + SmallVector RetTys; ComputeValueVTs(*this, RetTy, RetTys); - - // Then we translate that to a list of legal types. for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - MVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(VT); - unsigned NumRegs = getNumRegisters(VT); - for (unsigned i = 0; i != NumRegs; ++i) - LoweredRetTys.push_back(RegisterVT); - } - - LoweredRetTys.push_back(MVT::Other); // Always has a chain. - - // Create the CALL node. - SDValue Res = DAG.getCall(CallingConv, dl, - isVarArg, isTailCall, isInreg, - DAG.getVTList(&LoweredRetTys[0], - LoweredRetTys.size()), - &Ops[0], Ops.size(), NumFixedArgs - ); - Chain = Res.getValue(LoweredRetTys.size() - 1); - - // Gather up the call result into a single value. - if (RetTy != Type::VoidTy && !RetTys.empty()) { - ISD::NodeType AssertOp = ISD::DELETED_NODE; - - if (RetSExt) - AssertOp = ISD::AssertSext; - else if (RetZExt) - AssertOp = ISD::AssertZext; - - SmallVector ReturnValues; - unsigned RegNo = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - MVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(VT); - unsigned NumRegs = getNumRegisters(VT); - unsigned RegNoEnd = NumRegs + RegNo; - SmallVector Results; - for (; RegNo != RegNoEnd; ++RegNo) - Results.push_back(Res.getValue(RegNo)); - SDValue ReturnValue = - getCopyFromParts(DAG, dl, &Results[0], NumRegs, RegisterVT, VT, - AssertOp); - ReturnValues.push_back(ReturnValue); + EVT VT = RetTys[I]; + EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.Used = isReturnValueUsed; + if (RetSExt) + MyFlags.Flags.setSExt(); + if (RetZExt) + MyFlags.Flags.setZExt(); + if (isInreg) + MyFlags.Flags.setInReg(); + Ins.push_back(MyFlags); } - Res = DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size()); } + // Check if target-dependent constraints permit a tail call here. + // Target-independent constraints should be checked by the caller. + if (isTailCall && + !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG)) + isTailCall = false; + + SmallVector InVals; + Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, + Outs, Ins, dl, DAG, InVals); + + // Verify that the target's LowerCall behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerCall didn't return a valid chain!"); + assert((!isTailCall || InVals.empty()) && + "LowerCall emitted a return value for a tail call!"); + assert((isTailCall || InVals.size() == Ins.size()) && + "LowerCall didn't emit the correct number of values!"); + DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerCall emitted a null value!"); + assert(Ins[i].VT == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + }); + + // For a tail call, the return value is merely live-out and there aren't + // any nodes in the DAG representing it. Return a special value to + // indicate that a tail call has been emitted and no more Instructions + // should be processed in the current block. + if (isTailCall) { + DAG.setRoot(Chain); + return std::make_pair(SDValue(), SDValue()); + } + + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (RetSExt) + AssertOp = ISD::AssertSext; + else if (RetZExt) + AssertOp = ISD::AssertZext; + SmallVector ReturnValues; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + EVT RegisterVT = getRegisterType(RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT); + + SDValue ReturnValue = + getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT, + AssertOp); + ReturnValues.push_back(ReturnValue); + CurReg += NumRegs; + } + + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actualy look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), Chain); + + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&RetTys[0], RetTys.size()), + &ReturnValues[0], ReturnValues.size()); + return std::make_pair(Res, Chain); } @@ -5800,8 +5757,7 @@ void TargetLowering::LowerOperationWrapper(SDNode *N, } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { - assert(0 && "LowerOperation not implemented for this target!"); - abort(); + llvm_unreachable("LowerOperation not implemented for this target!"); return SDValue(); } @@ -5813,7 +5769,7 @@ void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - RegsForValue RFV(TLI, Reg, V->getType()); + RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0); PendingExports.push_back(Chain); @@ -5825,25 +5781,122 @@ void SelectionDAGISel:: LowerArguments(BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. Function &F = *LLVMBB->getParent(); - SDValue OldRoot = SDL->DAG.getRoot(); - SmallVector Args; - TLI.LowerArguments(F, SDL->DAG, Args, SDL->getCurDebugLoc()); - - unsigned a = 0; - for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); - AI != E; ++AI) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, AI->getType(), ValueVTs); + SelectionDAG &DAG = SDL->DAG; + SDValue OldRoot = DAG.getRoot(); + DebugLoc dl = SDL->getCurDebugLoc(); + const TargetData *TD = TLI.getTargetData(); + + // Set up the incoming argument description vector. + SmallVector Ins; + unsigned Idx = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++Idx) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, I->getType(), ValueVTs); + bool isArgValueUsed = !I->use_empty(); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = + TD->getABITypeAlignment(ArgTy); + + if (F.paramHasAttr(Idx, Attribute::ZExt)) + Flags.setZExt(); + if (F.paramHasAttr(Idx, Attribute::SExt)) + Flags.setSExt(); + if (F.paramHasAttr(Idx, Attribute::InReg)) + Flags.setInReg(); + if (F.paramHasAttr(Idx, Attribute::StructRet)) + Flags.setSRet(); + if (F.paramHasAttr(Idx, Attribute::ByVal)) { + Flags.setByVal(); + const PointerType *Ty = cast(I->getType()); + const Type *ElementTy = Ty->getElementType(); + unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy); + unsigned FrameSize = TD->getTypeAllocSize(ElementTy); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + if (F.getParamAlignment(Idx)) + FrameAlign = F.getParamAlignment(Idx); + Flags.setByValAlign(FrameAlign); + Flags.setByValSize(FrameSize); + } + if (F.paramHasAttr(Idx, Attribute::Nest)) + Flags.setNest(); + Flags.setOrigAlign(OriginalAlignment); + + EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed); + if (NumRegs > 1 && i == 0) + MyFlags.Flags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) + MyFlags.Flags.setOrigAlign(1); + Ins.push_back(MyFlags); + } + } + } + + // Call the target to set up the argument values. + SmallVector InVals; + SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), + F.isVarArg(), Ins, + dl, DAG, InVals); + + // Verify that the target's LowerFormalArguments behaved as expected. + assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && + "LowerFormalArguments didn't return a valid chain!"); + assert(InVals.size() == Ins.size() && + "LowerFormalArguments didn't emit the correct number of values!"); + DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerFormalArguments emitted a null value!"); + assert(Ins[i].VT == InVals[i].getValueType() && + "LowerFormalArguments emitted a value with the wrong type!"); + }); + + // Update the DAG with the new chain value resulting from argument lowering. + DAG.setRoot(NewRoot); + + // Set up the argument values. + unsigned i = 0; + Idx = 1; + for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + SmallVector ArgValues; + SmallVector ValueVTs; + ComputeValueVTs(TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); - if (!AI->use_empty()) { - SDL->setValue(AI, SDL->DAG.getMergeValues(&Args[a], NumValues, - SDL->getCurDebugLoc())); + for (unsigned Value = 0; Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); + + if (!I->use_empty()) { + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (F.paramHasAttr(Idx, Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (F.paramHasAttr(Idx, Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, AssertOp)); + } + i += NumParts; + } + if (!I->use_empty()) { + SDL->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues, + SDL->getCurDebugLoc())); // If this argument is live outside of the entry block, insert a copy from // whereever we got it to the vreg that other BB's will reference it as. - SDL->CopyToExportRegsIfNeeded(AI); + SDL->CopyToExportRegsIfNeeded(I); } - a += NumValues; } + assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. // FIXME: this should insert code into the DAG! @@ -5908,11 +5961,11 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. - SmallVector ValueVTs; + SmallVector ValueVTs; ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - MVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(VT); + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; @@ -5962,11 +6015,11 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, // own moves. Second, this check is necessary becuase FastISel doesn't // use CreateRegForValue to create registers, so it always creates // exactly one register for each non-void instruction. - MVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); + EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Promote MVT::i1. if (VT == MVT::i1) - VT = TLI.getTypeToTransformTo(VT); + VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT); else { SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index 057c8410da0ef..06acc8a6bface 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -15,6 +15,7 @@ #define SELECTIONDAGBUILD_H #include "llvm/Constants.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #ifndef NDEBUG @@ -23,6 +24,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include #include @@ -75,8 +77,6 @@ class TruncInst; class UIToFPInst; class UnreachableInst; class UnwindInst; -class VICmpInst; -class VFCmpInst; class VAArgInst; class ZExtInst; @@ -117,7 +117,7 @@ public: SmallSet CatchInfoFound; #endif - unsigned MakeReg(MVT VT); + unsigned MakeReg(EVT VT); /// isExportedInst - Return true if the specified value is an instruction /// exported from its block. @@ -345,9 +345,15 @@ public: /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector BitTestCases; - + + /// PHINodesToUpdate - A list of phi instructions whose operand list will + /// be updated after processing the current basic block. std::vector > PHINodesToUpdate; + /// EdgeMapping - If an edge from CurMBB to any MBB is changed (e.g. due to + /// scheduler custom lowering), track the change here. + DenseMap EdgeMapping; + // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. DenseMap ConstantsOut; @@ -363,11 +369,21 @@ public: /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; + /// HasTailCall - This is set to true if a call in the current + /// block has been translated as a tail call. In this case, + /// no subsequent DAG nodes should be created. + /// + bool HasTailCall; + + LLVMContext *Context; + SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) : CurDebugLoc(DebugLoc::getUnknownLoc()), - TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol) { + TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), + HasTailCall(false), + Context(dag.getContext()) { } void init(GCFunctionInfo *gfi, AliasAnalysis &aa); @@ -489,8 +505,6 @@ private: void visitAShr(User &I) { visitShift(I, ISD::SRA); } void visitICmp(User &I); void visitFCmp(User &I); - void visitVICmp(User &I); - void visitVFCmp(User &I); // Visit the conversion instructions void visitTrunc(User &I); void visitZExt(User &I); @@ -539,12 +553,10 @@ private: void visitVACopy(CallInst &I); void visitUserOp1(Instruction &I) { - assert(0 && "UserOp1 should not exist at instruction selection time!"); - abort(); + llvm_unreachable("UserOp1 should not exist at instruction selection time!"); } void visitUserOp2(Instruction &I) { - assert(0 && "UserOp2 should not exist at instruction selection time!"); - abort(); + llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9d72a128d18bc..ae98da5ef8b81 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -16,6 +16,7 @@ #include "SelectionDAGBuild.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" @@ -29,6 +30,7 @@ #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -47,8 +49,10 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -150,12 +154,15 @@ namespace llvm { // insert. The specified MachineInstr is created but not inserted into any // basic blocks, and the scheduler passes ownership of it to this method. MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const { - cerr << "If a target marks an instruction with " - << "'usesCustomDAGSchedInserter', it must implement " - << "TargetLowering::EmitInstrWithCustomInserter!\n"; - abort(); - return 0; + MachineBasicBlock *MBB, + DenseMap *EM) const { +#ifndef NDEBUG + errs() << "If a target marks an instruction with " + "'usesCustomDAGSchedInserter', it must implement " + "TargetLowering::EmitInstrWithCustomInserter!"; +#endif + llvm_unreachable(0); + return 0; } /// EmitLiveInCopy - Emit a copy for a live in physical register. If the @@ -215,8 +222,11 @@ static void EmitLiveInCopy(MachineBasicBlock *MBB, --Pos; } - TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); - CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); + bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC); + assert(Emitted && "Unable to issue a live-in copy instruction!\n"); + (void) Emitted; + +CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); if (Coalesced) { if (&*InsertPos == UseMI) ++InsertPos; MBB->erase(UseMI); @@ -247,8 +257,10 @@ static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, E = MRI.livein_end(); LI != E; ++LI) if (LI->second) { const TargetRegisterClass *RC = MRI.getRegClass(LI->second); - TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), - LI->second, LI->first, RC, RC); + bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), + LI->second, LI->first, RC, RC); + assert(Emitted && "Unable to issue a live-in copy instruction!\n"); + (void) Emitted; } } } @@ -258,7 +270,7 @@ static void EmitLiveInCopies(MachineBasicBlock *EntryMBB, //===----------------------------------------------------------------------===// SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : - FunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), + MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), CurDAG(new SelectionDAG(TLI, *FuncInfo)), SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)), @@ -273,44 +285,42 @@ SelectionDAGISel::~SelectionDAGISel() { delete FuncInfo; } -unsigned SelectionDAGISel::MakeReg(MVT VT) { +unsigned SelectionDAGISel::MakeReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addRequired(); - AU.setPreservesAll(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); } -bool SelectionDAGISel::runOnFunction(Function &Fn) { +bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + Function &Fn = *mf.getFunction(); + // Do some sanity-checking on the command-line options. assert((!EnableFastISelVerbose || EnableFastISel) && "-fast-isel-verbose requires -fast-isel"); assert((!EnableFastISelAbort || EnableFastISel) && "-fast-isel-abort requires -fast-isel"); - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (Fn.hasAvailableExternallyLinkage()) - return false; - - // Get alias analysis for load/store combining. AA = &getAnalysis(); - TargetMachine &TM = TLI.getTargetMachine(); - MF = &MachineFunction::construct(&Fn, TM); + MF = &mf; const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - if (MF->getFunction()->hasGC()) - GFI = &getAnalysis().getFunctionInfo(*MF->getFunction()); + if (Fn.hasGC()) + GFI = &getAnalysis().getFunctionInfo(Fn); else GFI = 0; RegInfo = &MF->getRegInfo(); - DOUT << "\n\n\n=== " << Fn.getName() << "\n"; + DEBUG(errs() << "\n\n\n=== " << Fn.getName() << "\n"); MachineModuleInfo *MMI = getAnalysisIfAvailable(); DwarfWriter *DW = getAnalysisIfAvailable(); @@ -358,140 +368,50 @@ static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, } } -/// IsFixedFrameObjectWithPosOffset - Check if object is a fixed frame object and -/// whether object offset >= 0. -static bool -IsFixedFrameObjectWithPosOffset(MachineFrameInfo *MFI, SDValue Op) { - if (!isa(Op)) return false; - - FrameIndexSDNode * FrameIdxNode = dyn_cast(Op); - int FrameIdx = FrameIdxNode->getIndex(); - return MFI->isFixedObjectIndex(FrameIdx) && - MFI->getObjectOffset(FrameIdx) >= 0; -} - -/// IsPossiblyOverwrittenArgumentOfTailCall - Check if the operand could -/// possibly be overwritten when lowering the outgoing arguments in a tail -/// call. Currently the implementation of this call is very conservative and -/// assumes all arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with -/// virtual registers would be overwritten by direct lowering. -static bool IsPossiblyOverwrittenArgumentOfTailCall(SDValue Op, - MachineFrameInfo *MFI) { - RegisterSDNode * OpReg = NULL; - if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS || - (Op.getOpcode()== ISD::CopyFromReg && - (OpReg = dyn_cast(Op.getOperand(1))) && - (OpReg->getReg() >= TargetRegisterInfo::FirstVirtualRegister)) || - (Op.getOpcode() == ISD::LOAD && - IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(1))) || - (Op.getOpcode() == ISD::MERGE_VALUES && - Op.getOperand(Op.getResNo()).getOpcode() == ISD::LOAD && - IsFixedFrameObjectWithPosOffset(MFI, Op.getOperand(Op.getResNo()). - getOperand(1)))) - return true; - return false; -} - -/// CheckDAGForTailCallsAndFixThem - This Function looks for CALL nodes in the -/// DAG and fixes their tailcall attribute operand. -static void CheckDAGForTailCallsAndFixThem(SelectionDAG &DAG, - const TargetLowering& TLI) { - SDNode * Ret = NULL; - SDValue Terminator = DAG.getRoot(); - - // Find RET node. - if (Terminator.getOpcode() == ISD::RET) { - Ret = Terminator.getNode(); - } - - // Fix tail call attribute of CALL nodes. - for (SelectionDAG::allnodes_iterator BE = DAG.allnodes_begin(), - BI = DAG.allnodes_end(); BI != BE; ) { - --BI; - if (CallSDNode *TheCall = dyn_cast(BI)) { - SDValue OpRet(Ret, 0); - SDValue OpCall(BI, 0); - bool isMarkedTailCall = TheCall->isTailCall(); - // If CALL node has tail call attribute set to true and the call is not - // eligible (no RET or the target rejects) the attribute is fixed to - // false. The TargetLowering::IsEligibleForTailCallOptimization function - // must correctly identify tail call optimizable calls. - if (!isMarkedTailCall) continue; - if (Ret==NULL || - !TLI.IsEligibleForTailCallOptimization(TheCall, OpRet, DAG)) { - // Not eligible. Mark CALL node as non tail call. Note that we - // can modify the call node in place since calls are not CSE'd. - TheCall->setNotTailCall(); - } else { - // Look for tail call clobbered arguments. Emit a series of - // copyto/copyfrom virtual register nodes to protect them. - SmallVector Ops; - SDValue Chain = TheCall->getChain(), InFlag; - Ops.push_back(Chain); - Ops.push_back(TheCall->getCallee()); - for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { - SDValue Arg = TheCall->getArg(i); - bool isByVal = TheCall->getArgFlags(i).isByVal(); - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - if (!isByVal && - IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) { - MVT VT = Arg.getValueType(); - unsigned VReg = MF.getRegInfo(). - createVirtualRegister(TLI.getRegClassFor(VT)); - Chain = DAG.getCopyToReg(Chain, Arg.getDebugLoc(), - VReg, Arg, InFlag); - InFlag = Chain.getValue(1); - Arg = DAG.getCopyFromReg(Chain, Arg.getDebugLoc(), - VReg, VT, InFlag); - Chain = Arg.getValue(1); - InFlag = Arg.getValue(2); - } - Ops.push_back(Arg); - Ops.push_back(TheCall->getArgFlagsVal(i)); - } - // Link in chain of CopyTo/CopyFromReg. - Ops[0] = Chain; - DAG.UpdateNodeOperands(OpCall, Ops.begin(), Ops.size()); - } - } - } -} - void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, BasicBlock::iterator Begin, BasicBlock::iterator End) { SDL->setCurrentBasicBlock(BB); - - // Lower all of the non-terminator instructions. - for (BasicBlock::iterator I = Begin; I != End; ++I) + MetadataContext &TheMetadata = LLVMBB->getParent()->getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + + // Lower all of the non-terminator instructions. If a call is emitted + // as a tail call, cease emitting nodes for this block. + for (BasicBlock::iterator I = Begin; I != End && !SDL->HasTailCall; ++I) { + if (MDDbgKind) { + // Update DebugLoc if debug information is attached with this + // instruction. + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); + SDL->setCurDebugLoc(Loc); + if (MF->getDefaultDebugLoc().isUnknown()) + MF->setDefaultDebugLoc(Loc); + } + } if (!isa(I)) SDL->visit(*I); + } - // Ensure that all instructions which are used outside of their defining - // blocks are available as virtual registers. Invoke is handled elsewhere. - for (BasicBlock::iterator I = Begin; I != End; ++I) - if (!isa(I) && !isa(I)) - SDL->CopyToExportRegsIfNeeded(I); + if (!SDL->HasTailCall) { + // Ensure that all instructions which are used outside of their defining + // blocks are available as virtual registers. Invoke is handled elsewhere. + for (BasicBlock::iterator I = Begin; I != End; ++I) + if (!isa(I) && !isa(I)) + SDL->CopyToExportRegsIfNeeded(I); - // Handle PHI nodes in successor blocks. - if (End == LLVMBB->end()) { - HandlePHINodesInSuccessorBlocks(LLVMBB); + // Handle PHI nodes in successor blocks. + if (End == LLVMBB->end()) { + HandlePHINodesInSuccessorBlocks(LLVMBB); - // Lower the terminator after the copies are emitted. - SDL->visit(*LLVMBB->getTerminator()); + // Lower the terminator after the copies are emitted. + SDL->visit(*LLVMBB->getTerminator()); + } } - + // Make sure the root of the DAG is up-to-date. CurDAG->setRoot(SDL->getControlRoot()); - // Check whether calls in this block are real tail calls. Fix up CALL nodes - // with correct tailcall attribute so that the target can rely on the tailcall - // attribute indicating whether the call is really eligible for tail call - // optimization. - if (PerformTailCallOpt) - CheckDAGForTailCallsAndFixThem(*CurDAG, TLI); - // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); SDL->clear(); @@ -500,51 +420,51 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet VisitedNodes; SmallVector Worklist; - + Worklist.push_back(CurDAG->getRoot().getNode()); - + APInt Mask; APInt KnownZero; APInt KnownOne; - + while (!Worklist.empty()) { SDNode *N = Worklist.back(); Worklist.pop_back(); - + // If we've already seen this node, ignore it. if (!VisitedNodes.insert(N)) continue; - + // Otherwise, add all chain operands to the worklist. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) if (N->getOperand(i).getValueType() == MVT::Other) Worklist.push_back(N->getOperand(i).getNode()); - + // If this is a CopyToReg with a vreg dest, process it. if (N->getOpcode() != ISD::CopyToReg) continue; - + unsigned DestReg = cast(N->getOperand(1))->getReg(); if (!TargetRegisterInfo::isVirtualRegister(DestReg)) continue; - + // Ignore non-scalar or non-integer values. SDValue Src = N->getOperand(2); - MVT SrcVT = Src.getValueType(); + EVT SrcVT = Src.getValueType(); if (!SrcVT.isInteger() || SrcVT.isVector()) continue; - + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); - + // Only install this information if it tells us something. if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { DestReg -= TargetRegisterInfo::FirstVirtualRegister; - FunctionLoweringInfo &FLI = CurDAG->getFunctionLoweringInfo(); - if (DestReg >= FLI.LiveOutRegInfo.size()) - FLI.LiveOutRegInfo.resize(DestReg+1); - FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[DestReg]; + if (DestReg >= FuncInfo->LiveOutRegInfo.size()) + FuncInfo->LiveOutRegInfo.resize(DestReg+1); + FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo->LiveOutRegInfo[DestReg]; LOI.NumSignBits = NumSignBits; LOI.KnownOne = KnownOne; LOI.KnownZero = KnownZero; @@ -560,10 +480,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) - BlockName = CurDAG->getMachineFunction().getFunction()->getName() + ':' + - BB->getBasicBlock()->getName(); + BlockName = MF->getFunction()->getNameStr() + ":" + + BB->getBasicBlock()->getNameStr(); - DOUT << "Initial selection DAG:\n"; + DEBUG(errs() << "Initial selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -575,10 +495,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } else { CurDAG->Combine(Unrestricted, *AA, OptLevel); } - - DOUT << "Optimized lowered selection DAG:\n"; + + DEBUG(errs() << "Optimized lowered selection DAG:\n"); DEBUG(CurDAG->dump()); - + // Second step, hack on the DAG until it only uses operations and types that // the target supports. if (!DisableLegalizeTypes) { @@ -593,7 +513,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DOUT << "Type-legalized selection DAG:\n"; + DEBUG(errs() << "Type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (Changed) { @@ -608,7 +528,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DOUT << "Optimized type-legalized selection DAG:\n"; + DEBUG(errs() << "Optimized type-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); } @@ -638,11 +558,11 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DOUT << "Optimized vector-legalized selection DAG:\n"; + DEBUG(errs() << "Optimized vector-legalized selection DAG:\n"); DEBUG(CurDAG->dump()); } } - + if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); if (TimePassesIsEnabled) { @@ -651,10 +571,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } else { CurDAG->Legalize(DisableLegalizeTypes, OptLevel); } - - DOUT << "Legalized selection DAG:\n"; + + DEBUG(errs() << "Legalized selection DAG:\n"); DEBUG(CurDAG->dump()); - + if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. @@ -664,12 +584,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } else { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - - DOUT << "Optimized legalized selection DAG:\n"; + + DEBUG(errs() << "Optimized legalized selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); - + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -682,7 +602,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { InstructionSelect(); } - DOUT << "Selected selection DAG:\n"; + DEBUG(errs() << "Selected selection DAG:\n"); DEBUG(CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -698,13 +618,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (ViewSUnitDAGs) Scheduler->viewGraph(); - // Emit machine code to BB. This can change 'BB' to the last block being + // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. if (TimePassesIsEnabled) { NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(); + BB = Scheduler->EmitSchedule(&SDL->EdgeMapping); } else { - BB = Scheduler->EmitSchedule(); + BB = Scheduler->EmitSchedule(&SDL->EdgeMapping); } // Free the scheduler state. @@ -715,9 +635,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { delete Scheduler; } - DOUT << "Selected machine code:\n"; + DEBUG(errs() << "Selected machine code:\n"); DEBUG(BB->dump()); -} +} void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, MachineFunction &MF, @@ -736,6 +656,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, #endif ); + MetadataContext &TheMetadata = Fn.getContext().getMetadata(); + unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); + // Iterate over all basic blocks in the function. for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { BasicBlock *LLVMBB = &*I; @@ -758,7 +681,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, I != E; ++I, ++j) if (Fn.paramHasAttr(j, Attribute::ByVal)) { if (EnableFastISelVerbose || EnableFastISelAbort) - cerr << "FastISel skips entry block due to byval argument\n"; + errs() << "FastISel skips entry block due to byval argument\n"; SuppressFastISel = true; break; } @@ -818,16 +741,29 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, FastIS->startNewBlock(BB); // Do FastISel on as many instructions as possible. for (; BI != End; ++BI) { + if (MDDbgKind) { + // Update DebugLoc if debug information is attached with this + // instruction. + if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) { + DILocation DILoc(Dbg); + DebugLoc Loc = ExtractDebugLocation(DILoc, + MF.getDebugLocInfo()); + FastIS->setCurDebugLoc(Loc); + if (MF.getDefaultDebugLoc().isUnknown()) + MF.setDefaultDebugLoc(Loc); + } + } + // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa(BI)) if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - cerr << "FastISel miss: "; + errs() << "FastISel miss: "; BI->dump(); } - if (EnableFastISelAbort) - assert(0 && "FastISel didn't handle a PHI in a successor"); + assert(!EnableFastISelAbort && + "FastISel didn't handle a PHI in a successor"); break; } @@ -842,11 +778,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa(BI)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - cerr << "FastISel missed call: "; + errs() << "FastISel missed call: "; BI->dump(); } - if (BI->getType() != Type::VoidTy) { + if (BI->getType() != Type::getVoidTy(*CurDAG->getContext())) { unsigned &R = FuncInfo->ValueMap[BI]; if (!R) R = FuncInfo->CreateRegForValue(BI); @@ -864,13 +800,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // For now, be a little lenient about non-branch terminators. if (!isa(BI) || isa(BI)) { if (EnableFastISelVerbose || EnableFastISelAbort) { - cerr << "FastISel miss: "; + errs() << "FastISel miss: "; BI->dump(); } if (EnableFastISelAbort) // The "fast" selector couldn't handle something and bailed. // For the purpose of debugging, just abort. - assert(0 && "FastISel didn't select the entire block"); + llvm_unreachable("FastISel didn't select the entire block"); } break; } @@ -895,15 +831,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, void SelectionDAGISel::FinishBasicBlock() { - DOUT << "Target-post-processed machine code:\n"; + DEBUG(errs() << "Target-post-processed machine code:\n"); DEBUG(BB->dump()); - DOUT << "Total amount of phi nodes to update: " - << SDL->PHINodesToUpdate.size() << "\n"; + DEBUG(errs() << "Total amount of phi nodes to update: " + << SDL->PHINodesToUpdate.size() << "\n"); DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) - DOUT << "Node " << i << " : (" << SDL->PHINodesToUpdate[i].first - << ", " << SDL->PHINodesToUpdate[i].second << ")\n";); - + errs() << "Node " << i << " : (" + << SDL->PHINodesToUpdate[i].first + << ", " << SDL->PHINodesToUpdate[i].second << ")\n"); + // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. if (SDL->SwitchCases.empty() && @@ -932,7 +869,7 @@ SelectionDAGISel::FinishBasicBlock() { CurDAG->setRoot(SDL->getRoot()); CodeGenAndEmitDAG(); SDL->clear(); - } + } for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into @@ -947,8 +884,8 @@ SelectionDAGISel::FinishBasicBlock() { SDL->visitBitTestCase(SDL->BitTestCases[i].Default, SDL->BitTestCases[i].Reg, SDL->BitTestCases[i].Cases[j]); - - + + CurDAG->setRoot(SDL->getRoot()); CodeGenAndEmitDAG(); SDL->clear(); @@ -1001,7 +938,7 @@ SelectionDAGISel::FinishBasicBlock() { CodeGenAndEmitDAG(); SDL->clear(); } - + // Set the current basic block to the mbb we wish to insert the code into BB = SDL->JTCases[i].second.MBB; SDL->setCurrentBasicBlock(BB); @@ -1010,7 +947,7 @@ SelectionDAGISel::FinishBasicBlock() { CurDAG->setRoot(SDL->getRoot()); CodeGenAndEmitDAG(); SDL->clear(); - + // Update PHI Nodes for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; @@ -1019,20 +956,21 @@ SelectionDAGISel::FinishBasicBlock() { "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. if (PHIBB == SDL->JTCases[i].second.Default) { - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB)); + PHI->addOperand + (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false)); + PHI->addOperand + (MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, - false)); + PHI->addOperand + (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } } } SDL->JTCases.clear(); - + // If the switch block involved a branch to one of the actual successors, we // need to update PHI nodes in that block. for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { @@ -1045,25 +983,31 @@ SelectionDAGISel::FinishBasicBlock() { PHI->addOperand(MachineOperand::CreateMBB(BB)); } } - + // If we generated any switch lowering information, build and codegen any // additional DAGs necessary. for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDL->SwitchCases[i].ThisBB; + MachineBasicBlock *ThisBB = BB = SDL->SwitchCases[i].ThisBB; SDL->setCurrentBasicBlock(BB); - + // Emit the code SDL->visitSwitchCase(SDL->SwitchCases[i]); CurDAG->setRoot(SDL->getRoot()); CodeGenAndEmitDAG(); - SDL->clear(); - + // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. + // If new BB's are created during scheduling, the edges may have been + // updated. That is, the edge from ThisBB to BB may have been split and + // BB's predecessor is now another block. + DenseMap::iterator EI = + SDL->EdgeMapping.find(BB); + if (EI != SDL->EdgeMapping.end()) + ThisBB = EI->second; for (MachineBasicBlock::iterator Phi = BB->begin(); Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ // This value for this PHI node is recorded in PHINodesToUpdate, get it. @@ -1073,21 +1017,22 @@ SelectionDAGISel::FinishBasicBlock() { if (SDL->PHINodesToUpdate[pn].first == Phi) { Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn]. second, false)); - Phi->addOperand(MachineOperand::CreateMBB(SDL->SwitchCases[i].ThisBB)); + Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); break; } } } - + // Don't process RHS if same block as LHS. if (BB == SDL->SwitchCases[i].FalseBB) SDL->SwitchCases[i].FalseBB = 0; - + // If we haven't handled the RHS, do so now. Otherwise, we're done. SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB; SDL->SwitchCases[i].FalseBB = 0; } assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0); + SDL->clear(); } SDL->SwitchCases.clear(); @@ -1101,12 +1046,12 @@ SelectionDAGISel::FinishBasicBlock() { /// ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault(); - + if (!Ctor) { Ctor = ISHeuristic; RegisterScheduler::setDefault(Ctor); } - + return Ctor(this, OptLevel); } @@ -1123,25 +1068,25 @@ ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { /// the dag combiner simplified the 255, we still want to match. RHS is the /// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value /// specified in the .td file (e.g. 255). -bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, int64_t DesiredMaskS) const { const APInt &ActualMask = RHS->getAPIntValue(); const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); - + // If the actual mask exactly matches, success! if (ActualMask == DesiredMask) return true; - + // If the actual AND mask is allowing unallowed bits, this doesn't match. if (ActualMask.intersects(~DesiredMask)) return false; - + // Otherwise, the DAG Combiner may have proven that the value coming in is // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) return true; - + // TODO: check to see if missing bits are just not demanded. // Otherwise, this pattern doesn't match. @@ -1152,32 +1097,32 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, /// the dag combiner simplified the 255, we still want to match. RHS is the /// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value /// specified in the .td file (e.g. 255). -bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, int64_t DesiredMaskS) const { const APInt &ActualMask = RHS->getAPIntValue(); const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); - + // If the actual mask exactly matches, success! if (ActualMask == DesiredMask) return true; - + // If the actual AND mask is allowing unallowed bits, this doesn't match. if (ActualMask.intersects(~DesiredMask)) return false; - + // Otherwise, the DAG Combiner may have proven that the value coming in is // either already zero or is not demanded. Check for known zero input bits. APInt NeededMask = DesiredMask & ~ActualMask; - + APInt KnownZero, KnownOne; CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); - + // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) return true; - + // TODO: check to see if missing bits are just not demanded. - + // Otherwise, this pattern doesn't match. return false; } @@ -1196,7 +1141,7 @@ SelectInlineAsmMemoryOperands(std::vector &Ops) { unsigned i = 2, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) --e; // Don't process a flag operand if it is here. - + while (i != e) { unsigned Flags = cast(InOps[i])->getZExtValue(); if ((Flags & 7) != 4 /*MEM*/) { @@ -1210,25 +1155,25 @@ SelectInlineAsmMemoryOperands(std::vector &Ops) { // Otherwise, this is a memory operand. Ask the target to select it. std::vector SelOps; if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps)) { - cerr << "Could not match memory address. Inline asm failure!\n"; - exit(1); + llvm_report_error("Could not match memory address. Inline asm" + " failure!"); } - + // Add this to the output node. - MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy(); + EVT IntPtrTy = TLI.getPointerTy(); Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ | (SelOps.size()<< 3), IntPtrTy)); Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); i += 2; } } - + // Add the flag input back if present. if (e != InOps.size()) Ops.push_back(InOps.back()); } -/// findFlagUse - Return use of MVT::Flag value produced by the specified +/// findFlagUse - Return use of EVT::Flag value produced by the specified /// SDNode. /// static SDNode *findFlagUse(SDNode *N) { @@ -1331,7 +1276,7 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, // Fold. But since Fold and FU are flagged together, this will create // a cycle in the scheduling graph. - MVT VT = Root->getValueType(Root->getNumValues()-1); + EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Flag) { SDNode *FU = findFlagUse(Root); if (FU == NULL) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 6fd5df2b937d6..ccc5e3c75c99b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -44,7 +44,7 @@ namespace llvm { } static std::string getEdgeDestLabel(const void *Node, unsigned i) { - return ((const SDNode *) Node)->getValueType(i).getMVTString(); + return ((const SDNode *) Node)->getValueType(i).getEVTString(); } /// edgeTargetsEdgeSource - This method returns true if this outgoing edge @@ -84,7 +84,7 @@ namespace llvm { template static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { SDValue Op = EI.getNode()->getOperand(EI.getOperand()); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); if (VT == MVT::Flag) return "color=red,style=bold"; else if (VT == MVT::Other) @@ -138,11 +138,11 @@ std::string DOTGraphTraits::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false, - Title); + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + false, Title); #else - cerr << "SelectionDAG::viewGraph is only available in debug builds on " - << "systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; #endif // NDEBUG } @@ -158,8 +158,8 @@ void SelectionDAG::clearGraphAttrs() { #ifndef NDEBUG NodeGraphAttrs.clear(); #else - cerr << "SelectionDAG::clearGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif } @@ -170,8 +170,8 @@ void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { #ifndef NDEBUG NodeGraphAttrs[N] = Attrs; #else - cerr << "SelectionDAG::setGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif } @@ -188,8 +188,8 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { else return ""; #else - cerr << "SelectionDAG::getGraphAttrs is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; return std::string(""); #endif } @@ -200,8 +200,8 @@ void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { #ifndef NDEBUG NodeGraphAttrs[N] = std::string("color=") + Color; #else - cerr << "SelectionDAG::setGraphColor is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif } @@ -216,7 +216,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet if (level >= 20) { if (!printed) { printed = true; - DOUT << "setSubgraphColor hit max level\n"; + DEBUG(errs() << "setSubgraphColor hit max level\n"); } return true; } @@ -232,8 +232,8 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet } } #else - cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif return hit_limit; } @@ -255,8 +255,8 @@ void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { } #else - cerr << "SelectionDAG::setSubgraphColor is only available in debug builds" - << " on systems with Graphviz or gv!\n"; + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; #endif } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 83357e066009b..a2baee42310a2 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11,18 +11,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtarget.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" using namespace llvm; @@ -239,12 +241,23 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::UO_F64] = "__unorddf2"; Names[RTLIB::O_F32] = "__unordsf2"; Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::MEMCPY] = "memcpy"; + Names[RTLIB::MEMMOVE] = "memmove"; + Names[RTLIB::MEMSET] = "memset"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; } +/// InitLibcallCallingConvs - Set default libcall CallingConvs. +/// +static void InitLibcallCallingConvs(CallingConv::ID *CCs) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + CCs[i] = CallingConv::C; + } +} + /// getFPEXT - Return the FPEXT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { if (RetVT == MVT::f64) return FPEXT_F32_F64; @@ -254,7 +267,7 @@ RTLIB::Libcall RTLIB::getFPEXT(MVT OpVT, MVT RetVT) { /// getFPROUND - Return the FPROUND_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { if (RetVT == MVT::f32) { if (OpVT == MVT::f64) return FPROUND_F64_F32; @@ -273,7 +286,7 @@ RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) { /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { if (RetVT == MVT::i8) return FPTOSINT_F32_I8; @@ -312,7 +325,7 @@ RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) { /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { if (OpVT == MVT::f32) { if (RetVT == MVT::i8) return FPTOUINT_F32_I8; @@ -351,7 +364,7 @@ RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) { /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { if (RetVT == MVT::f32) return SINTTOFP_I32_F32; @@ -385,7 +398,7 @@ RTLIB::Libcall RTLIB::getSINTTOFP(MVT OpVT, MVT RetVT) { /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or /// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getUINTTOFP(MVT OpVT, MVT RetVT) { +RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { if (OpVT == MVT::i32) { if (RetVT == MVT::f32) return UINTTOFP_I32_F32; @@ -439,8 +452,9 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { CCs[RTLIB::O_F64] = ISD::SETEQ; } -TargetLowering::TargetLowering(TargetMachine &tm) - : TM(tm), TD(TM.getTargetData()) { +/// NOTE: The constructor takes ownership of TLOF. +TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof) + : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) { // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); memset(LoadExtActions, 0, sizeof(LoadExtActions)); @@ -490,12 +504,10 @@ TargetLowering::TargetLowering(TargetMachine &tm) IsLittleEndian = TD->isLittleEndian(); UsesGlobalOffsetTable = false; - ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType()); - ShiftAmtHandling = Undefined; + ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; - allowUnalignedMemoryAccesses = false; benefitFromCodePlacementOpt = false; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; @@ -515,14 +527,62 @@ TargetLowering::TargetLowering(TargetMachine &tm) InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); // Tell Legalize whether the assembler supports DEBUG_LOC. - const TargetAsmInfo *TASM = TM.getTargetAsmInfo(); + const MCAsmInfo *TASM = TM.getMCAsmInfo(); if (!TASM || !TASM->hasDotLocAndDotFile()) setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); } -TargetLowering::~TargetLowering() {} +TargetLowering::~TargetLowering() { + delete &TLOF; +} + +static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, + unsigned &NumIntermediates, + EVT &RegisterVT, + TargetLowering* TLI) { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!TLI->isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + EVT DestVT = TLI->getRegisterType(NewVT); + RegisterVT = DestVT; + if (EVT(DestVT).bitsLT(NewVT)) { + // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); + } else { + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; + } + + return 1; +} /// computeRegisterProperties - Once all of the register classes are added, /// this allows us to compute derived properties we expose. @@ -546,13 +606,13 @@ void TargetLowering::computeRegisterProperties() { // Every integer value type larger than this largest register takes twice as // many registers to represent as the previous ValueType. for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { - MVT EVT = (MVT::SimpleValueType)ExpandedReg; - if (!EVT.isInteger()) + EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg; + if (!ExpandedVT.isInteger()) break; NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); - ValueTypeActions.setTypeAction(EVT, Expand); + ValueTypeActions.setTypeAction(ExpandedVT, Expand); } // Inspect all of the ValueType's smaller than the largest integer @@ -560,7 +620,7 @@ void TargetLowering::computeRegisterProperties() { unsigned LegalIntReg = LargestIntReg; for (unsigned IntReg = LargestIntReg - 1; IntReg >= (unsigned)MVT::i1; --IntReg) { - MVT IVT = (MVT::SimpleValueType)IntReg; + EVT IVT = (MVT::SimpleValueType)IntReg; if (isTypeLegal(IVT)) { LegalIntReg = IntReg; } else { @@ -608,20 +668,20 @@ void TargetLowering::computeRegisterProperties() { i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; if (!isTypeLegal(VT)) { - MVT IntermediateVT, RegisterVT; + MVT IntermediateVT; + EVT RegisterVT; unsigned NumIntermediates; NumRegistersForVT[i] = - getVectorTypeBreakdown(VT, - IntermediateVT, NumIntermediates, - RegisterVT); + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); RegisterTypeForVT[i] = RegisterVT; // Determine if there is a legal wider type. bool IsLegalWiderType = false; - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; + EVT SVT = (MVT::SimpleValueType)nVT; if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && SVT.getVectorNumElements() > NElts) { TransformToType[i] = SVT; @@ -631,7 +691,7 @@ void TargetLowering::computeRegisterProperties() { } } if (!IsLegalWiderType) { - MVT NVT = VT.getPow2VectorType(); + EVT NVT = VT.getPow2VectorType(); if (NVT == VT) { // Type is already a power of 2. The default action is to split. TransformToType[i] = MVT::Other; @@ -650,11 +710,10 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { } -MVT TargetLowering::getSetCCResultType(MVT VT) const { - return getValueType(TD->getIntPtrType()); +MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const { + return PointerTy.SimpleTy; } - /// getVectorTypeBreakdown - Vector types are broken down into some number of /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. @@ -664,13 +723,13 @@ MVT TargetLowering::getSetCCResultType(MVT VT) const { /// register. It also returns the VT and quantity of the intermediate values /// before they are promoted/expanded. /// -unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, - MVT &IntermediateVT, +unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, unsigned &NumIntermediates, - MVT &RegisterVT) const { + EVT &RegisterVT) const { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType(); + EVT EltTy = VT.getVectorElementType(); unsigned NumVectorRegs = 1; @@ -683,19 +742,20 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, // Divide the input until we get to a supported size. This will always // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + while (NumElts > 1 && !isTypeLegal( + EVT::getVectorVT(Context, EltTy, NumElts))) { NumElts >>= 1; NumVectorRegs <<= 1; } NumIntermediates = NumVectorRegs; - MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); if (!isTypeLegal(NewVT)) NewVT = EltTy; IntermediateVT = NewVT; - MVT DestVT = getRegisterType(NewVT); + EVT DestVT = getRegisterType(Context, NewVT); RegisterVT = DestVT; if (DestVT.bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16. @@ -714,7 +774,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, /// If there is no vector type that we want to widen to, returns MVT::Other /// When and where to widen is target dependent based on the cost of /// scalarizing vs using the wider vector type. -MVT TargetLowering::getWidenVectorType(MVT VT) const { +EVT TargetLowering::getWidenVectorType(EVT VT) const { assert(VT.isVector()); if (isTypeLegal(VT)) return VT; @@ -781,7 +841,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, // if we can expand it to have all bits set, do it if (C->getAPIntValue().intersects(~Demanded)) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), DAG.getConstant(Demanded & C->getAPIntValue(), @@ -822,7 +882,7 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, if (!isPowerOf2_32(SmallVTBits)) SmallVTBits = NextPowerOf2(SmallVTBits); for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { - MVT SmallVT = MVT::getIntegerVT(SmallVTBits); + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && TLI.isZExtFree(SmallVT, Op.getValueType())) { // We found a type with free casts. @@ -1008,7 +1068,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known if ((KnownOne & KnownOne2) == KnownOne) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); @@ -1023,7 +1083,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // if we can expand it to have all bits set, do it if (Expanded.isAllOnesValue()) { if (Expanded != C->getAPIntValue()) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), TLO.DAG.getConstant(Expanded, VT)); return TLO.CombineTo(Op, New); @@ -1099,7 +1159,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); } @@ -1116,7 +1176,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; case ISD::SRL: if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned ShAmt = SA->getZExtValue(); unsigned VTSize = VT.getSizeInBits(); SDValue InOp = Op.getOperand(0); @@ -1168,7 +1228,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0), Op.getOperand(1))); if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned ShAmt = SA->getZExtValue(); // If the shift count is an invalid immediate, don't do anything. @@ -1205,7 +1265,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; case ISD::SIGN_EXTEND_INREG: { - MVT EVT = cast(Op.getOperand(1))->getVT(); + EVT EVT = cast(Op.getOperand(1))->getVT(); // Sign extension. Compute the demanded bits in the result that are not // present in the input. @@ -1272,7 +1332,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::SIGN_EXTEND: { - MVT InVT = Op.getOperand(0).getValueType(); + EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getSizeInBits(); APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); @@ -1371,7 +1431,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::AssertZext: { - MVT VT = cast(Op.getOperand(1))->getVT(); + EVT VT = cast(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, @@ -1385,7 +1445,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, #if 0 // If this is an FP->Int bitcast and if the sign bit is the only thing that // is demanded, turn this into a FGETSIGN. - if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) && + if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) && MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && !MVT::isVector(Op.getOperand(0).getValueType())) { // Only do this xform if FGETSIGN is valid or if before legalize. @@ -1492,7 +1552,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // to handle some common cases. // Fall back to ComputeMaskedBits to catch other known cases. - MVT OpVT = Val.getValueType(); + EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getSizeInBits(); APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; @@ -1504,10 +1564,11 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { /// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue -TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, +TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, DebugLoc dl) const { SelectionDAG &DAG = DCI.DAG; + LLVMContext &Context = *DAG.getContext(); // These setcc operations always fold. switch (Cond) { @@ -1518,316 +1579,321 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, case ISD::SETTRUE2: return DAG.getConstant(1, VT); } + if (isa(N0.getNode())) { + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } + if (ConstantSDNode *N1C = dyn_cast(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); - if (isa(N0.getNode())) { - return DAG.FoldSetCC(VT, N0, N1, Cond, dl); - } else { - // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an - // equality comparison, then we're just comparing whether X itself is - // zero. - if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && - N0.getOperand(0).getOpcode() == ISD::CTLZ && - N0.getOperand(1).getOpcode() == ISD::Constant) { - unsigned ShAmt = cast(N0.getOperand(1))->getZExtValue(); - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { - if ((C1 == 0) == (Cond == ISD::SETEQ)) { - // (srl (ctlz x), 5) == 0 -> X != 0 - // (srl (ctlz x), 5) != 1 -> X != 0 - Cond = ISD::SETNE; - } else { - // (srl (ctlz x), 5) != 0 -> X == 0 - // (srl (ctlz x), 5) == 1 -> X == 0 - Cond = ISD::SETEQ; - } - SDValue Zero = DAG.getConstant(0, N0.getValueType()); - return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), - Zero, Cond); + + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + unsigned ShAmt = cast(N0.getOperand(1))->getZExtValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; } + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); } + } - // If the LHS is '(and load, const)', the RHS is 0, - // the test is for equality or unsigned, and all 1 bits of the const are - // in the same partial word, see if we can shorten the load. - if (DCI.isBeforeLegalize() && - N0.getOpcode() == ISD::AND && C1 == 0 && - N0.getNode()->hasOneUse() && - isa(N0.getOperand(0)) && - N0.getOperand(0).getNode()->hasOneUse() && - isa(N0.getOperand(1))) { - LoadSDNode *Lod = cast(N0.getOperand(0)); - uint64_t bestMask = 0; - unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed() && - // FIXME: This uses getZExtValue() below so it only works on i64 and - // below. - N0.getValueType().getSizeInBits() <= 64) { - unsigned origWidth = N0.getValueType().getSizeInBits(); - // We can narrow (e.g.) 16-bit extending loads on 32-bit target to - // 8 bits, but have to be careful... - if (Lod->getExtensionType() != ISD::NON_EXTLOAD) - origWidth = Lod->getMemoryVT().getSizeInBits(); - uint64_t Mask =cast(N0.getOperand(1))->getZExtValue(); - for (unsigned width = origWidth / 2; width>=8; width /= 2) { - uint64_t newMask = (1ULL << width) - 1; - for (unsigned offset=0; offsetisLittleEndian()) - bestOffset = (origWidth/width - offset - 1) * (width/8); - else - bestOffset = (uint64_t)offset * (width/8); - bestMask = Mask >> (offset * (width/8) * 8); - bestWidth = width; - break; - } - newMask = newMask << width; + // If the LHS is '(and load, const)', the RHS is 0, + // the test is for equality or unsigned, and all 1 bits of the const are + // in the same partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa(N0.getOperand(1))) { + LoadSDNode *Lod = cast(N0.getOperand(0)); + uint64_t bestMask = 0; + unsigned bestWidth = 0, bestOffset = 0; + if (!Lod->isVolatile() && Lod->isUnindexed() && + // FIXME: This uses getZExtValue() below so it only works on i64 and + // below. + N0.getValueType().getSizeInBits() <= 64) { + unsigned origWidth = N0.getValueType().getSizeInBits(); + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + uint64_t Mask =cast(N0.getOperand(1))->getZExtValue(); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + uint64_t newMask = (1ULL << width) - 1; + for (unsigned offset=0; offsetisLittleEndian()) + bestOffset = (origWidth/width - offset - 1) * (width/8); + else + bestOffset = (uint64_t)offset * (width/8); + bestMask = Mask >> (offset * (width/8) * 8); + bestWidth = width; + break; } + newMask = newMask << width; } } - if (bestWidth) { - MVT newVT = MVT::getIntegerVT(bestWidth); - if (newVT.isRound()) { - MVT PtrType = Lod->getOperand(1).getValueType(); - SDValue Ptr = Lod->getBasePtr(); - if (bestOffset != 0) - Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), - DAG.getConstant(bestOffset, PtrType)); - unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); - SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, - Lod->getSrcValue(), - Lod->getSrcValueOffset() + bestOffset, - false, NewAlign); - return DAG.getSetCC(dl, VT, - DAG.getNode(ISD::AND, dl, newVT, NewLoad, - DAG.getConstant(bestMask, newVT)), - DAG.getConstant(0LL, newVT), Cond); - } + } + if (bestWidth) { + EVT newVT = EVT::getIntegerVT(Context, bestWidth); + if (newVT.isRound()) { + EVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getSrcValue(), + Lod->getSrcValueOffset() + bestOffset, + false, NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask, newVT)), + DAG.getConstant(0LL, newVT), Cond); } } + } - // If the LHS is a ZERO_EXTEND, perform the comparison on the input. - if (N0.getOpcode() == ISD::ZERO_EXTEND) { - unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); - - // If the comparison constant has bits in the upper part, the - // zero-extended value could never match. - if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), - C1.getBitWidth() - InSize))) { - switch (Cond) { - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETEQ: return DAG.getConstant(0, VT); - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETNE: return DAG.getConstant(1, VT); - case ISD::SETGT: - case ISD::SETGE: - // True if the sign bit of C1 is set. - return DAG.getConstant(C1.isNegative(), VT); - case ISD::SETLT: - case ISD::SETLE: - // True if the sign bit of C1 isn't set. - return DAG.getConstant(C1.isNonNegative(), VT); - default: - break; - } - } + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); - // Otherwise, we can perform the comparison with the low bits. + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { switch (Cond) { - case ISD::SETEQ: - case ISD::SETNE: case ISD::SETUGT: case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, VT); case ISD::SETULT: case ISD::SETULE: - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(APInt(C1).trunc(InSize), - N0.getOperand(0).getValueType()), - Cond); + case ISD::SETNE: return DAG.getConstant(1, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), VT); default: - break; // todo, be more careful with signed comparisons - } - } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - MVT ExtSrcTy = cast(N0.getOperand(1))->getVT(); - unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); - MVT ExtDstTy = N0.getValueType(); - unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); - - // If the extended part has any inconsistent bits, it cannot ever - // compare equal. In other words, they have to be all ones or all - // zeros. - APInt ExtBits = - APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); - if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) - return DAG.getConstant(Cond == ISD::SETNE, VT); - - SDValue ZextOp; - MVT Op0Ty = N0.getOperand(0).getValueType(); - if (Op0Ty == ExtSrcTy) { - ZextOp = N0.getOperand(0); - } else { - APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); - ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), - DAG.getConstant(Imm, Op0Ty)); - } - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(ZextOp.getNode()); - // Otherwise, make this a use of a zext. - return DAG.getSetCC(dl, VT, ZextOp, - DAG.getConstant(C1 & APInt::getLowBitsSet( - ExtDstTyBits, - ExtSrcTyBits), - ExtDstTy), - Cond); - } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - - // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC - if (N0.getOpcode() == ISD::SETCC) { - bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); - if (TrueWhenTrue) - return N0; - - // Invert the condition. - ISD::CondCode CC = cast(N0.getOperand(2))->get(); - CC = ISD::getSetCCInverse(CC, - N0.getOperand(0).getValueType().isInteger()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); - } - - if ((N0.getOpcode() == ISD::XOR || - (N0.getOpcode() == ISD::AND && - N0.getOperand(0).getOpcode() == ISD::XOR && - N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && - isa(N0.getOperand(1)) && - cast(N0.getOperand(1))->getAPIntValue() == 1) { - // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We - // can only do this if the top bits are known zero. - unsigned BitWidth = N0.getValueSizeInBits(); - if (DAG.MaskedValueIsZero(N0, - APInt::getHighBitsSet(BitWidth, - BitWidth-1))) { - // Okay, get the un-inverted input value. - SDValue Val; - if (N0.getOpcode() == ISD::XOR) - Val = N0.getOperand(0); - else { - assert(N0.getOpcode() == ISD::AND && - N0.getOperand(0).getOpcode() == ISD::XOR); - // ((X^1)&1)^1 -> X & 1 - Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), - N0.getOperand(0).getOperand(0), - N0.getOperand(1)); - } - return DAG.getSetCC(dl, VT, Val, N1, - Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); - } + break; } } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: { + EVT newVT = N0.getOperand(0).getValueType(); + if (DCI.isBeforeLegalizeOps() || + (isOperationLegal(ISD::SETCC, newVT) && + getCondCodeAction(Cond, newVT)==Legal)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(APInt(C1).trunc(InSize), newVT), + Cond); + break; + } + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + EVT ExtSrcTy = cast(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + EVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the extended part has any inconsistent bits, it cannot ever + // compare equal. In other words, they have to be all ones or all + // zeros. + APInt ExtBits = + APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); + if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) + return DAG.getConstant(Cond == ISD::SETNE, VT); - APInt MinVal, MaxVal; - unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); - if (ISD::isSignedIntSetCC(Cond)) { - MinVal = APInt::getSignedMinValue(OperandBitSize); - MaxVal = APInt::getSignedMaxValue(OperandBitSize); + SDValue ZextOp; + EVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); } else { - MinVal = APInt::getMinValue(OperandBitSize); - MaxVal = APInt::getMaxValue(OperandBitSize); + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, Op0Ty)); } - - // Canonicalize GE/LE comparisons to use GT/LT comparisons. - if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { - if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true - // X >= C0 --> X > (C0-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1-1, N1.getValueType()), - (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1); + if (TrueWhenTrue) + return N0; + + // Invert the condition. + ISD::CondCode CC = cast(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } - - if (Cond == ISD::SETLE || Cond == ISD::SETULE) { - if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true - // X <= C0 --> X < (C0+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C1+1, N1.getValueType()), - (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa(N0.getOperand(1)) && + cast(N0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } } + } + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getConstant(0, VT); // X < MIN --> false - if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) - return DAG.getConstant(1, VT); // X >= MIN --> true - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getConstant(0, VT); // X > MAX --> false - if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) - return DAG.getConstant(1, VT); // X <= MAX --> true - - // Canonicalize setgt X, Min --> setne X, Min - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - // Canonicalize setlt X, Max --> setne X, Max - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - - // If we have setult X, 1, turn it into seteq X, 0 - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, N0.getValueType()), - ISD::SETEQ); - // If we have setugt X, Max-1, turn it into seteq X, Max - else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MaxVal, N0.getValueType()), - ISD::SETEQ); - - // If we have "setcc X, C0", check to see if we can shrink the immediate - // by changing cc. - - // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && - C1 == APInt::getSignedMaxValue(OperandBitSize)) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(0, N1.getValueType()), - ISD::SETLT); - - // SETULT X, SINTMIN -> SETGT X, -1 - if (Cond == ISD::SETULT && - C1 == APInt::getSignedMinValue(OperandBitSize)) { - SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), - N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); - } + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true + // X >= C0 --> X > (C0-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1-1, N1.getValueType()), + (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); + } - // Fold bit comparisons when we can. - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - VT == N0.getValueType() && N0.getOpcode() == ISD::AND) - if (ConstantSDNode *AndRHS = - dyn_cast(N0.getOperand(1))) { - MVT ShiftTy = DCI.isBeforeLegalize() ? - getPointerTy() : getShiftAmountTy(); - if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 - // Perform the xform if the AND RHS is a single bit. - if (isPowerOf2_64(AndRHS->getZExtValue())) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(Log2_64(AndRHS->getZExtValue()), - ShiftTy)); - } - } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { - // (X & 8) == 8 --> (X & 8) >> 3 - // Perform the xform if C1 is a single bit. - if (C1.isPowerOf2()) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(C1.logBase2(), ShiftTy)); - } + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true + // X <= C0 --> X < (C0+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C1+1, N1.getValueType()), + (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = + dyn_cast(N0.getOperand(1))) { + EVT ShiftTy = DCI.isBeforeLegalize() ? + getPointerTy() : getShiftAmountTy(); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (isPowerOf2_64(AndRHS->getZExtValue())) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(Log2_64(AndRHS->getZExtValue()), + ShiftTy)); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (C1.isPowerOf2()) { + return DAG.getNode(ISD::SRL, dl, VT, N0, + DAG.getConstant(C1.logBase2(), ShiftTy)); } } - } - } else if (isa(N0.getNode())) { - // Ensure that the constant occurs on the RHS. - return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + } } if (isa(N0.getNode())) { @@ -1840,7 +1906,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, if (CFP->getValueAPF().isNaN()) { // If an operand is known to be a nan, we can fold it. switch (ISD::getUnorderedFlavor(Cond)) { - default: assert(0 && "Unknown flavor!"); + default: llvm_unreachable("Unknown flavor!"); case 0: // Known false. return DAG.getConstant(0, VT); case 1: // Known true. @@ -1856,6 +1922,43 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, // materialize 0.0. if (Cond == ISD::SETO || Cond == ISD::SETUO) return DAG.getSetCC(dl, VT, N0, N0, Cond); + + // If the condition is not legal, see if we can find an equivalent one + // which is legal. + if (!isCondCodeLegal(Cond, N0.getValueType())) { + // If the comparison was an awkward floating-point == or != and one of + // the comparison operands is infinity or negative infinity, convert the + // condition to a less-awkward <= or >=. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); + } else { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETULT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETULT, N0.getValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + } + } + } } if (N0 == N1) { @@ -2000,7 +2103,7 @@ TargetLowering::SimplifySetCC(MVT VT, SDValue N0, SDValue N1, SDValue Temp; if (N0.getValueType() == MVT::i1 && foldBooleans) { switch (Cond) { - default: assert(0 && "Unknown integer setcc!"); + default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETEQ: // X == Y -> ~(X^Y) Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); N0 = DAG.getNOT(dl, Temp, MVT::i1); @@ -2090,7 +2193,7 @@ bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, const MachineFrameInfo *MFI) const { if (LD->getChain() != Base->getChain()) return false; - MVT VT = LD->getValueType(0); + EVT VT = LD->getValueType(0); if (VT.getSizeInBits() / 8 != Bytes) return false; @@ -2171,7 +2274,7 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { /// LowerXConstraint - try to replace an X constraint, which matches anything, /// with another that has more specific requirements based on the type of the /// corresponding operand. -const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{ +const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ if (ConstraintVT.isInteger()) return "r"; if (ConstraintVT.isFloatingPoint()) @@ -2244,14 +2347,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::vector TargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { return std::vector(); } std::pair TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint[0] != '{') return std::pair(0, 0); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2280,7 +2383,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (StringsEqualNoCase(RegName, RI->get(*I).AsmName)) + if (StringsEqualNoCase(RegName, RI->getName(*I))) return std::make_pair(*I, RC); } } @@ -2310,7 +2413,7 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { /// is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { - default: assert(0 && "Unknown constraint type!"); + default: llvm_unreachable("Unknown constraint type!"); case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -2406,10 +2509,13 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, // 'X' matches anything. if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { // Labels and constants are handled elsewhere ('X' is the only thing - // that matches labels). - if (isa(OpInfo.CallOperandVal) || - isa(OpInfo.CallOperandVal)) + // that matches labels). For Functions, the type here is the type of + // the result, which is not what we want to look at; leave them alone. + Value *v = OpInfo.CallOperandVal; + if (isa(v) || isa(v) || isa(v)) { + OpInfo.CallOperandVal = v; return; + } // Otherwise, try to resolve it to something we know about by looking at // the actual operand type. @@ -2464,7 +2570,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, /// SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, std::vector* Created) const { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); // Check to see if we can do this. @@ -2521,7 +2627,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, /// SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, std::vector* Created) const { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // Check to see if we can do this. @@ -2569,45 +2675,3 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, DAG.getConstant(magics.s-1, getShiftAmountTy())); } } - -/// IgnoreHarmlessInstructions - Ignore instructions between a CALL and RET -/// node that don't prevent tail call optimization. -static SDValue IgnoreHarmlessInstructions(SDValue node) { - // Found call return. - if (node.getOpcode() == ISD::CALL) return node; - // Ignore MERGE_VALUES. Will have at least one operand. - if (node.getOpcode() == ISD::MERGE_VALUES) - return IgnoreHarmlessInstructions(node.getOperand(0)); - // Ignore ANY_EXTEND node. - if (node.getOpcode() == ISD::ANY_EXTEND) - return IgnoreHarmlessInstructions(node.getOperand(0)); - if (node.getOpcode() == ISD::TRUNCATE) - return IgnoreHarmlessInstructions(node.getOperand(0)); - // Any other node type. - return node; -} - -bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall, - SDValue Ret) { - unsigned NumOps = Ret.getNumOperands(); - // ISD::CALL results:(value0, ..., valuen, chain) - // ISD::RET operands:(chain, value0, flag0, ..., valuen, flagn) - // Value return: - // Check that operand of the RET node sources from the CALL node. The RET node - // has at least two operands. Operand 0 holds the chain. Operand 1 holds the - // value. - // Also we need to check that there is no code in between the call and the - // return. Hence we also check that the incomming chain to the return sources - // from the outgoing chain of the call. - if (NumOps > 1 && - IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0) && - Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1)) - return true; - // void return: The RET node has the chain result value of the CALL node as - // input. - if (NumOps == 1 && - Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1)) - return true; - - return false; -} diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 2402f81bb04f3..25a499b88968d 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -62,9 +62,11 @@ namespace { Constant *GetFrameMap(Function &F); const Type* GetConcreteStackEntryType(Function &F); void CollectRoots(Function &F); - static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr, + static GetElementPtrInst *CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, int Idx1, const char *Name); - static GetElementPtrInst *CreateGEP(IRBuilder<> &B, Value *BasePtr, + static GetElementPtrInst *CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Value *BasePtr, int Idx1, int Idx2, const char *Name); }; @@ -93,7 +95,7 @@ namespace { public: EscapeEnumerator(Function &F, const char *N = "cleanup") - : F(F), CleanupBBName(N), State(0) {} + : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} IRBuilder<> *Next() { switch (State) { @@ -136,8 +138,9 @@ namespace { return 0; // Create a cleanup block. - BasicBlock *CleanupBB = BasicBlock::Create(CleanupBBName, &F); - UnwindInst *UI = new UnwindInst(CleanupBB); + BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(), + CleanupBBName, &F); + UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB); // Transform the 'call' instructions into 'invoke's branching to the // cleanup block. Go in reverse order to make prettier BB names. @@ -186,8 +189,7 @@ ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { Constant *ShadowStackGC::GetFrameMap(Function &F) { // doInitialization creates the abstract type of this value. - - Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty); + const Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); // Truncate the ShadowStackDescriptor if some metadata is null. unsigned NumMeta = 0; @@ -200,17 +202,18 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { } Constant *BaseElts[] = { - ConstantInt::get(Type::Int32Ty, Roots.size(), false), - ConstantInt::get(Type::Int32Ty, NumMeta, false), + ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false), + ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false), }; Constant *DescriptorElts[] = { - ConstantStruct::get(BaseElts, 2), + ConstantStruct::get(F.getContext(), BaseElts, 2, false), ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata.begin(), NumMeta) }; - Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2); + Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2, + false); std::string TypeName("gc_map."); TypeName += utostr(NumMeta); @@ -229,13 +232,14 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { // to be a ModulePass (which means it cannot be in the 'llc' pipeline // (which uses a FunctionPassManager (which segfaults (not asserts) if // provided a ModulePass))). - Constant *GV = new GlobalVariable(FrameMap->getType(), true, + Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, GlobalVariable::InternalLinkage, - FrameMap, "__gc_" + F.getName(), - F.getParent()); + FrameMap, "__gc_" + F.getName()); - Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0), - ConstantInt::get(Type::Int32Ty, 0) }; + Constant *GEPIndices[2] = { + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) + }; return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2); } @@ -245,7 +249,7 @@ const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) { EltTys.push_back(StackEntryTy); for (size_t I = 0; I != Roots.size(); I++) EltTys.push_back(Roots[I].second->getAllocatedType()); - Type *Ty = StructType::get(EltTys); + Type *Ty = StructType::get(F.getContext(), EltTys); std::string TypeName("gc_stackentry."); TypeName += F.getName(); @@ -263,9 +267,11 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // void *Meta[]; // May be absent for roots without metadata. // }; std::vector EltTys; - EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :) - EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array. - StructType *FrameMapTy = StructType::get(EltTys); + // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::getInt32Ty(M.getContext())); + // Specifies length of variable length array. + EltTys.push_back(Type::getInt32Ty(M.getContext())); + StructType *FrameMapTy = StructType::get(M.getContext(), EltTys); M.addTypeName("gc_map", FrameMapTy); PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); @@ -274,12 +280,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { // FrameMap *Map; // Pointer to constant FrameMap. // void *Roots[]; // Stack roots (in-place array, so we pretend). // }; - OpaqueType *RecursiveTy = OpaqueType::get(); + OpaqueType *RecursiveTy = OpaqueType::get(M.getContext()); EltTys.clear(); EltTys.push_back(PointerType::getUnqual(RecursiveTy)); EltTys.push_back(FrameMapPtrTy); - PATypeHolder LinkTyH = StructType::get(EltTys); + PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys); RecursiveTy->refineAbstractTypeTo(LinkTyH.get()); StackEntryTy = cast(LinkTyH.get()); @@ -292,10 +298,10 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) { if (!Head) { // If the root chain does not exist, insert a new one with linkonce // linkage! - Head = new GlobalVariable(StackEntryPtrTy, false, + Head = new GlobalVariable(M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage, Constant::getNullValue(StackEntryPtrTy), - "llvm_gc_root_chain", &M); + "llvm_gc_root_chain"); } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); @@ -338,11 +344,11 @@ void ShadowStackGC::CollectRoots(Function &F) { } GetElementPtrInst * -ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, +ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, int Idx, int Idx2, const char *Name) { - Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), - ConstantInt::get(Type::Int32Ty, Idx), - ConstantInt::get(Type::Int32Ty, Idx2) }; + Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx), + ConstantInt::get(Type::getInt32Ty(Context), Idx2) }; Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name); assert(isa(Val) && "Unexpected folded constant"); @@ -351,10 +357,10 @@ ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, } GetElementPtrInst * -ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, +ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr, int Idx, const char *Name) { - Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0), - ConstantInt::get(Type::Int32Ty, Idx) }; + Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx) }; Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name); assert(isa(Val) && "Unexpected folded constant"); @@ -364,6 +370,8 @@ ShadowStackGC::CreateGEP(IRBuilder<> &B, Value *BasePtr, /// runOnFunction - Insert code to maintain the shadow stack. bool ShadowStackGC::performCustomLowering(Function &F) { + LLVMContext &Context = F.getContext(); + // Find calls to llvm.gcroot. CollectRoots(F); @@ -388,13 +396,14 @@ bool ShadowStackGC::performCustomLowering(Function &F) { // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); - Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map"); + Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, + 0,1,"gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { // For each root, find the corresponding slot in the aggregate... - Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root"); + Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); // And use it in lieu of the alloca. AllocaInst *OriginalAlloca = Roots[I].second; @@ -410,17 +419,19 @@ bool ShadowStackGC::performCustomLowering(Function &F) { AtEntry.SetInsertPoint(IP->getParent(), IP); // Push the entry onto the shadow stack. - Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next"); - Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead"); - AtEntry.CreateStore(CurrentHead, EntryNextPtr); - AtEntry.CreateStore(NewHeadVal, Head); + Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, + StackEntry,0,0,"gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(Context, AtEntry, + StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup"); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. - Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0, + Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next"); Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index e44a138cf9250..8070570cb84b8 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -158,7 +158,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getName(); + std::string MFName = MF->getFunction()->getNameStr(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -185,8 +185,8 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { initShrinkWrappingInfo(); DEBUG(if (ShrinkWrapThisFunction) { - DOUT << "Place CSR spills/restores for " - << MF->getFunction()->getName() << "\n"; + errs() << "Place CSR spills/restores for " + << MF->getFunction()->getName() << "\n"; }); if (calculateSets(Fn)) @@ -297,20 +297,26 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { } } - DEBUG(if (ShrinkWrapDebugging >= Details) { - DOUT << "-----------------------------------------------------------\n"; - DOUT << " Antic/Avail Sets:\n"; - DOUT << "-----------------------------------------------------------\n"; - DOUT << "iterations = " << iterations << "\n"; - DOUT << "-----------------------------------------------------------\n"; - DOUT << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"; - DOUT << "-----------------------------------------------------------\n"; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets(MBB); + DEBUG({ + if (ShrinkWrapDebugging >= Details) { + errs() + << "-----------------------------------------------------------\n" + << " Antic/Avail Sets:\n" + << "-----------------------------------------------------------\n" + << "iterations = " << iterations << "\n" + << "-----------------------------------------------------------\n" + << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" + << "-----------------------------------------------------------\n"; + + for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); + MBBI != MBBE; ++MBBI) { + MachineBasicBlock* MBB = MBBI; + dumpSets(MBB); + } + + errs() + << "-----------------------------------------------------------\n"; } - DOUT << "-----------------------------------------------------------\n"; }); } @@ -357,8 +363,8 @@ bool PEI::calculateSets(MachineFunction &Fn) { // If no CSRs used, we are done. if (CSI.empty()) { DEBUG(if (ShrinkWrapThisFunction) - DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": uses no callee-saved registers\n"); + errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": uses no callee-saved registers\n"); return false; } @@ -377,8 +383,8 @@ bool PEI::calculateSets(MachineFunction &Fn) { // implementation to functions with <= 500 MBBs. if (Fn.size() > 500) { DEBUG(if (ShrinkWrapThisFunction) - DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": too large (" << Fn.size() << " MBBs)\n"); + errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": too large (" << Fn.size() << " MBBs)\n"); ShrinkWrapThisFunction = false; } @@ -459,8 +465,8 @@ bool PEI::calculateSets(MachineFunction &Fn) { } if (allCSRUsesInEntryBlock) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in EntryBlock\n"); + DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in EntryBlock\n"); ShrinkWrapThisFunction = false; } else { bool allCSRsUsedInEntryFanout = true; @@ -471,8 +477,8 @@ bool PEI::calculateSets(MachineFunction &Fn) { allCSRsUsedInEntryFanout = false; } if (allCSRsUsedInEntryFanout) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in imm successors of EntryBlock\n"); + DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in imm successors of EntryBlock\n"); ShrinkWrapThisFunction = false; } } @@ -498,9 +504,9 @@ bool PEI::calculateSets(MachineFunction &Fn) { if (dominatesExitNodes) { CSRUsedInChokePoints |= CSRUsed[MBB]; if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(DOUT << "DISABLED: " << Fn.getFunction()->getName() - << ": all CSRs used in choke point(s) at " - << getBasicBlockName(MBB) << "\n"); + DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName() + << ": all CSRs used in choke point(s) at " + << getBasicBlockName(MBB) << "\n"); ShrinkWrapThisFunction = false; break; } @@ -514,16 +520,16 @@ bool PEI::calculateSets(MachineFunction &Fn) { return false; DEBUG({ - DOUT << "ENABLED: " << Fn.getFunction()->getName(); + errs() << "ENABLED: " << Fn.getFunction()->getName(); if (HasFastExitPath) - DOUT << " (fast exit path)"; - DOUT << "\n"; + errs() << " (fast exit path)"; + errs() << "\n"; if (ShrinkWrapDebugging >= BasicInfo) { - DOUT << "------------------------------" + errs() << "------------------------------" << "-----------------------------\n"; - DOUT << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; + errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; if (ShrinkWrapDebugging >= Details) { - DOUT << "------------------------------" + errs() << "------------------------------" << "-----------------------------\n"; dumpAllUsed(); } @@ -596,7 +602,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, addedUses = true; blks.push_back(SUCC); DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << getBasicBlockName(MBB) + errs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "successor " << getBasicBlockName(SUCC) << "\n"); } @@ -612,7 +618,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, addedUses = true; blks.push_back(PRED); DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << getBasicBlockName(MBB) + errs() << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(prop) << ")->" << "predecessor " << getBasicBlockName(PRED) << "\n"); } @@ -650,7 +656,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector& blks) { CSRUsed[EXB] |= loopSpills; addedUses = true; DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << "LOOP " << getBasicBlockName(MBB) + errs() << "LOOP " << getBasicBlockName(MBB) << "(" << stringifyCSRegSet(loopSpills) << ")->" << getBasicBlockName(EXB) << "\n"); if (EXB->succ_size() > 1 || EXB->pred_size() > 1) @@ -717,7 +723,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, blks.push_back(MBB); DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " + errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRSave[MBB]) << "\n"); return placedSpills; @@ -778,7 +784,7 @@ bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, blks.push_back(MBB); DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " + errs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); return placedRestores; @@ -802,7 +808,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { ++iterations; DEBUG(if (ShrinkWrapDebugging >= Iterations) - DOUT << "iter " << iterations + errs() << "iter " << iterations << " --------------------------------------------------\n"); // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, @@ -852,15 +858,15 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) { unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); numSRReduced += numSRReducedThisFunc; DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - DOUT << "-----------------------------------------------------------\n"; - DOUT << "total iterations = " << iterations << " ( " + errs() << "-----------------------------------------------------------\n"; + errs() << "total iterations = " << iterations << " ( " << Fn.getFunction()->getName() << " " << numSRReducedThisFunc << " " << Fn.size() << " )\n"; - DOUT << "-----------------------------------------------------------\n"; + errs() << "-----------------------------------------------------------\n"; dumpSRSets(); - DOUT << "-----------------------------------------------------------\n"; + errs() << "-----------------------------------------------------------\n"; if (numSRReducedThisFunc) verifySpillRestorePlacement(); }); @@ -893,7 +899,7 @@ void PEI::findFastExitPath() { // Check the immediate successors. if (isReturnBlock(SUCC)) { if (ShrinkWrapDebugging >= BasicInfo) - DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) + errs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << getBasicBlockName(SUCC) << "\n"; break; } @@ -911,7 +917,7 @@ void PEI::findFastExitPath() { } if (HasFastExitPath) { if (ShrinkWrapDebugging >= BasicInfo) - DOUT << "Fast exit path: " << getBasicBlockName(EntryBlock) + errs() << "Fast exit path: " << getBasicBlockName(EntryBlock) << "->" << exitPath << "\n"; break; } @@ -945,10 +951,10 @@ void PEI::verifySpillRestorePlacement() { if (spilled.empty()) continue; - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(spilled) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(spilled) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); if (CSRRestore[MBB].intersects(spilled)) { restored |= (CSRRestore[MBB] & spilled); @@ -977,11 +983,11 @@ void PEI::verifySpillRestorePlacement() { if (isReturnBlock(SBB) || SBB->succ_size() == 0) { if (restored != spilled) { CSRegSet notRestored = (spilled - restored); - DOUT << MF->getFunction()->getName() << ": " - << stringifyCSRegSet(notRestored) - << " spilled at " << getBasicBlockName(MBB) - << " are never restored on path to return " - << getBasicBlockName(SBB) << "\n"; + DEBUG(errs() << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notRestored) + << " spilled at " << getBasicBlockName(MBB) + << " are never restored on path to return " + << getBasicBlockName(SBB) << "\n"); } restored.clear(); } @@ -998,10 +1004,10 @@ void PEI::verifySpillRestorePlacement() { if (restored.empty()) continue; - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(restored) << "\n"; + DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]) + << " RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(restored) << "\n"); if (CSRSave[MBB].intersects(restored)) { spilled |= (CSRSave[MBB] & restored); @@ -1025,23 +1031,24 @@ void PEI::verifySpillRestorePlacement() { } if (spilled != restored) { CSRegSet notSpilled = (restored - spilled); - DOUT << MF->getFunction()->getName() << ": " - << stringifyCSRegSet(notSpilled) - << " restored at " << getBasicBlockName(MBB) - << " are never spilled\n"; + DEBUG(errs() << MF->getFunction()->getName() << ": " + << stringifyCSRegSet(notSpilled) + << " restored at " << getBasicBlockName(MBB) + << " are never spilled\n"); } } } // Debugging print methods. std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { + if (!MBB) + return ""; + + if (MBB->getBasicBlock()) + return MBB->getBasicBlock()->getNameStr(); + std::ostringstream name; - if (MBB) { - if (MBB->getBasicBlock()) - name << MBB->getBasicBlock()->getName(); - else - name << "_MBB_" << MBB->getNumber(); - } + name << "_MBB_" << MBB->getNumber(); return name.str(); } @@ -1071,14 +1078,15 @@ std::string PEI::stringifyCSRegSet(const CSRegSet& s) { } void PEI::dumpSet(const CSRegSet& s) { - DOUT << stringifyCSRegSet(s) << "\n"; + DEBUG(errs() << stringifyCSRegSet(s) << "\n"); } void PEI::dumpUsed(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << "CSRUsed[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; - } + DEBUG({ + if (MBB) + errs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; + }); } void PEI::dumpAllUsed() { @@ -1090,27 +1098,29 @@ void PEI::dumpAllUsed() { } void PEI::dumpSets(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << "\n"; - } + DEBUG({ + if (MBB) + errs() << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << "\n"; + }); } void PEI::dumpSets1(MachineBasicBlock* MBB) { - if (MBB) { - DOUT << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << " | " - << stringifyCSRegSet(CSRSave[MBB]) << " | " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } + DEBUG({ + if (MBB) + errs() << getBasicBlockName(MBB) << " | " + << stringifyCSRegSet(CSRUsed[MBB]) << " | " + << stringifyCSRegSet(AnticIn[MBB]) << " | " + << stringifyCSRegSet(AnticOut[MBB]) << " | " + << stringifyCSRegSet(AvailIn[MBB]) << " | " + << stringifyCSRegSet(AvailOut[MBB]) << " | " + << stringifyCSRegSet(CSRSave[MBB]) << " | " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + }); } void PEI::dumpAllSets() { @@ -1122,20 +1132,21 @@ void PEI::dumpAllSets() { } void PEI::dumpSRSets() { - for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); - MBB != E; ++MBB) { - if (! CSRSave[MBB].empty()) { - DOUT << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]); - if (CSRRestore[MBB].empty()) - DOUT << "\n"; - } - if (! CSRRestore[MBB].empty()) { - if (! CSRSave[MBB].empty()) - DOUT << " "; - DOUT << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } - } + DEBUG({ + for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); + MBB != E; ++MBB) { + if (!CSRSave[MBB].empty()) { + errs() << "SAVE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRSave[MBB]); + if (CSRRestore[MBB].empty()) + errs() << '\n'; + } + + if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) + errs() << " " + << "RESTORE[" << getBasicBlockName(MBB) << "] = " + << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; + } + }); } #endif diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h new file mode 100644 index 0000000000000..f69feaf9e570d --- /dev/null +++ b/lib/CodeGen/SimpleHazardRecognizer.h @@ -0,0 +1,89 @@ +//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SimpleHazardRecognizer class, which +// implements hazard-avoidance heuristics for scheduling, based on the +// scheduling itineraries specified for the target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H +#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses + /// a coarse classification and attempts to avoid that instructions of + /// a given class aren't grouped too densely together. + class SimpleHazardRecognizer : public ScheduleHazardRecognizer { + /// Class - A simple classification for SUnits. + enum Class { + Other, Load, Store + }; + + /// Window - The Class values of the most recently issued + /// instructions. + Class Window[8]; + + /// getClass - Classify the given SUnit. + Class getClass(const SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayLoad()) + return Load; + if (TID.mayStore()) + return Store; + return Other; + } + + /// Step - Rotate the existing entries in Window and insert the + /// given class value in position as the most recent. + void Step(Class C) { + std::copy(Window+1, array_endof(Window), Window); + Window[array_lengthof(Window)-1] = C; + } + + public: + SimpleHazardRecognizer() : Window() { + Reset(); + } + + virtual HazardType getHazardType(SUnit *SU) { + Class C = getClass(SU); + if (C == Other) + return NoHazard; + unsigned Score = 0; + for (unsigned i = 0; i != array_lengthof(Window); ++i) + if (Window[i] == C) + Score += i + 1; + if (Score > array_lengthof(Window) * 2) + return Hazard; + return NoHazard; + } + + virtual void Reset() { + for (unsigned i = 0; i != array_lengthof(Window); ++i) + Window[i] = Other; + } + + virtual void EmitInstruction(SUnit *SU) { + Step(getClass(SU)); + } + + virtual void AdvanceCycle() { + Step(Other); + } + }; +} + +#endif diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 7e7d6b8f68f11..9c283b0f02341 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -17,6 +17,7 @@ #include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -28,6 +29,8 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -51,13 +54,8 @@ EnableJoining("join-liveintervals", cl::init(true)); static cl::opt -NewHeuristic("new-coalescer-heuristic", - cl::desc("Use new coalescer heuristic"), - cl::init(false), cl::Hidden); - -static cl::opt -CrossClassJoin("join-cross-class-copies", - cl::desc("Coalesce cross register class copies"), +DisableCrossClassJoin("disable-cross-class-join", + cl::desc("Avoid coalescing cross register class copies"), cl::init(false), cl::Hidden); static cl::opt @@ -65,7 +63,7 @@ PhysJoinTweak("tweak-phys-join-heuristics", cl::desc("Tweak heuristics for joining phys reg with vr"), cl::init(false), cl::Hidden); -static RegisterPass +static RegisterPass X("simple-register-coalescing", "Simple Register Coalescing"); // Declare that we implement the RegisterCoalescer interface @@ -74,6 +72,8 @@ static RegisterAnalysisGroup V(X); const PassInfo *const llvm::SimpleRegisterCoalescingID = &X; void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -105,22 +105,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); assert(BLR != IntB.end() && "Live range not found!"); VNInfo *BValNo = BLR->valno; - + // Get the location that B is defined at. Two options: either this value has - // an unknown definition point or it is defined at CopyIdx. If unknown, we + // an unknown definition point or it is defined at CopyIdx. If unknown, we // can't process it. - if (!BValNo->copy) return false; + if (!BValNo->getCopy()) return false; assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - + // AValNo is the value number in A that defines the copy, A3 in the example. - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1); + LiveIndex CopyUseIdx = li_->getUseIndex(CopyIdx); + LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); assert(ALR != IntA.end() && "Live range not found!"); VNInfo *AValNo = ALR->valno; // If it's re-defined by an early clobber somewhere in the live range, then @@ -143,26 +144,28 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // The coalescer has no idea there was a def in the middle of [174,230]. if (AValNo->hasRedefByEC()) return false; - - // If AValNo is defined as a copy from IntB, we can potentially process this. + + // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. unsigned SrcReg = li_->getVNInfoSourceReg(AValNo); if (!SrcReg) return false; // Not defined by a copy. - + // If the value number is not defined by a copy instruction, ignore it. // If the source register comes from an interval other than IntB, we can't // handle this. if (SrcReg != IntB.reg) return false; - + // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def-1); + LiveInterval::iterator ValLR = + IntB.FindLiveRangeContaining(li_->getPrevSlot(AValNo->def)); assert(ValLR != IntB.end() && "Live range not found!"); - + // Make sure that the end of the live range is inside the same block as // CopyMI. - MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end-1); - if (!ValLREndInst || + MachineInstr *ValLREndInst = + li_->getInstructionFromIndex(li_->getPrevSlot(ValLR->end)); + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) return false; // Okay, we now know that ValLR ends in the same block that the CopyMI @@ -177,28 +180,33 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, *tri_->getSubRegisters(IntB.reg)) { for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; } } - - DOUT << "\nExtending: "; IntB.print(DOUT, tri_); - - unsigned FillerStart = ValLR->end, FillerEnd = BLR->start; + + DEBUG({ + errs() << "\nExtending: "; + IntB.print(errs(), tri_); + }); + + LiveIndex FillerStart = ValLR->end, FillerEnd = BLR->start; // We are about to delete CopyMI, so need to remove it as the 'instruction // that defines this value #'. Update the the valnum with the new defining // instruction #. BValNo->def = FillerStart; - BValNo->copy = NULL; - + BValNo->setCopy(0); + // Okay, we can merge them. We need to insert a new liverange: // [ValLR.end, BLR.begin) of either value number, then we merge the // two value numbers. IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. + // physreg has sub-registers, update their live intervals as well. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { LiveInterval &SRLI = li_->getInterval(*SR); @@ -213,17 +221,26 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, IntB.addKills(ValLR->valno, BValNo->kills); IntB.MergeValueNumberInto(BValNo, ValLR->valno); } - DOUT << " result = "; IntB.print(DOUT, tri_); - DOUT << "\n"; + DEBUG({ + errs() << " result = "; + IntB.print(errs(), tri_); + errs() << "\n"; + }); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { ValLREndInst->getOperand(UIdx).setIsKill(false); - IntB.removeKill(ValLR->valno, FillerStart); + ValLR->valno->removeKill(FillerStart); } + // If the copy instruction was killing the destination register before the + // merge, find the last use and trim the live range. That will also add the + // isKill marker. + if (CopyMI->killsRegister(IntA.reg)) + TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); + ++numExtends; return true; } @@ -253,6 +270,16 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, return false; } +static void +TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { + for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isImplicit()) + NewMI->addOperand(MO); + } +} + /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a commutable @@ -279,7 +306,8 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA, bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex CopyIdx = + li_->getDefIndex(li_->getInstructionIndex(CopyMI)); // FIXME: For now, only eliminate the copy by commuting its def when the // source register is a virtual register. We want to guard against cases @@ -293,15 +321,17 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); assert(BLR != IntB.end() && "Live range not found!"); VNInfo *BValNo = BLR->valno; - + // Get the location that B is defined at. Two options: either this value has - // an unknown definition point or it is defined at CopyIdx. If unknown, we + // an unknown definition point or it is defined at CopyIdx. If unknown, we // can't process it. - if (!BValNo->copy) return false; + if (!BValNo->getCopy()) return false; assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - + // AValNo is the value number in A that defines the copy, A3 in the example. - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyIdx-1); + LiveInterval::iterator ALR = + IntA.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx)); + assert(ALR != IntA.end() && "Live range not found!"); VNInfo *AValNo = ALR->valno; // If other defs can reach uses of this def, then it's not safe to perform @@ -312,9 +342,23 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, return false; MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); const TargetInstrDesc &TID = DefMI->getDesc(); - unsigned NewDstIdx; - if (!TID.isCommutable() || - !tii_->CommuteChangesDestination(DefMI, NewDstIdx)) + if (!TID.isCommutable()) + return false; + // If DefMI is a two-address instruction then commuting it will change the + // destination register. + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + assert(DefIdx != -1); + unsigned UseOpIdx; + if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) + return false; + unsigned Op1, Op2, NewDstIdx; + if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2)) + return false; + if (Op1 == UseOpIdx) + NewDstIdx = Op2; + else if (Op2 == UseOpIdx) + NewDstIdx = Op1; + else return false; MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); @@ -332,7 +376,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), UE = mri_->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; - unsigned UseIdx = li_->getInstructionIndex(UseMI); + LiveIndex UseIdx = li_->getInstructionIndex(UseMI); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end()) continue; @@ -356,8 +400,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, bool BHasPHIKill = BValNo->hasPHIKill(); SmallVector BDeadValNos; - SmallVector BKills; - std::map BExtend; + VNInfo::KillSet BKills; + std::map BExtend; // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. // A = or A, B @@ -384,7 +428,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, ++UI; if (JoinedCopies.count(UseMI)) continue; - unsigned UseIdx = li_->getInstructionIndex(UseMI); + LiveIndex UseIdx= li_->getUseIndex(li_->getInstructionIndex(UseMI)); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; @@ -395,7 +439,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (Extended) UseMO.setIsKill(false); else - BKills.push_back(li_->getUseIndex(UseIdx)+1); + BKills.push_back(li_->getNextSlot(UseIdx)); } unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) @@ -404,7 +448,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // This copy will become a noop. If it's defining a new val#, // remove that val# as well. However this live range is being // extended to the end of the existing live range defined by the copy. - unsigned DefIdx = li_->getDefIndex(UseIdx); + LiveIndex DefIdx = li_->getDefIndex(UseIdx); const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); BHasPHIKill |= DLR->valno->hasPHIKill(); assert(DLR->valno->def == DefIdx); @@ -420,7 +464,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // We need to insert a new liverange: [ALR.start, LastUse). It may be we can // simply extend BLR if CopyMI doesn't end the range. - DOUT << "\nExtending: "; IntB.print(DOUT, tri_); + DEBUG({ + errs() << "\nExtending: "; + IntB.print(errs(), tri_); + }); // Remove val#'s defined by copies that will be coalesced away. for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) { @@ -439,24 +486,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // is updated. Kills are also updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; - ValNo->copy = NULL; + ValNo->setCopy(0); for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) { - unsigned Kill = ValNo->kills[j]; - if (Kill != BLR->end) - BKills.push_back(Kill); + if (ValNo->kills[j] != BLR->end) + BKills.push_back(ValNo->kills[j]); } ValNo->kills.clear(); for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - unsigned End = AI->end; - std::map::iterator EI = BExtend.find(End); + LiveIndex End = AI->end; + std::map::iterator + EI = BExtend.find(End); if (EI != BExtend.end()) End = EI->second; IntB.addRange(LiveRange(AI->start, End, ValNo)); // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. + // physreg has sub-registers, update their live intervals as well. if (BHasSubRegs) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { LiveInterval &SRLI = li_->getInterval(*SR); @@ -467,13 +514,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, IntB.addKills(ValNo, BKills); ValNo->setHasPHIKill(BHasPHIKill); - DOUT << " result = "; IntB.print(DOUT, tri_); - DOUT << "\n"; + DEBUG({ + errs() << " result = "; + IntB.print(errs(), tri_); + errs() << '\n'; + errs() << "\nShortening: "; + IntA.print(errs(), tri_); + }); - DOUT << "\nShortening: "; IntA.print(DOUT, tri_); IntA.removeValNo(AValNo); - DOUT << " result = "; IntA.print(DOUT, tri_); - DOUT << "\n"; + + DEBUG({ + errs() << " result = "; + IntA.print(errs(), tri_); + errs() << '\n'; + }); ++numCommutes; return true; @@ -495,7 +550,8 @@ static bool isSameOrFallThroughBB(MachineBasicBlock *MBB, /// removeRange - Wrapper for LiveInterval::removeRange. This removes a range /// from a physical register live interval as well as from the live intervals /// of its sub-registers. -static void removeRange(LiveInterval &li, unsigned Start, unsigned End, +static void removeRange(LiveInterval &li, + LiveIndex Start, LiveIndex End, LiveIntervals *li_, const TargetRegisterInfo *tri_) { li.removeRange(Start, End, true); if (TargetRegisterInfo::isPhysicalRegister(li.reg)) { @@ -503,14 +559,15 @@ static void removeRange(LiveInterval &li, unsigned Start, unsigned End, if (!li_->hasInterval(*SR)) continue; LiveInterval &sli = li_->getInterval(*SR); - unsigned RemoveEnd = Start; + LiveIndex RemoveStart = Start; + LiveIndex RemoveEnd = Start; while (RemoveEnd != End) { - LiveInterval::iterator LR = sli.FindLiveRangeContaining(Start); + LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart); if (LR == sli.end()) break; RemoveEnd = (LR->end < End) ? LR->end : End; - sli.removeRange(Start, RemoveEnd, true); - Start = RemoveEnd; + sli.removeRange(RemoveStart, RemoveEnd, true); + RemoveStart = RemoveEnd; } } } @@ -520,14 +577,14 @@ static void removeRange(LiveInterval &li, unsigned Start, unsigned End, /// as the copy instruction, trim the live interval to the last use and return /// true. bool -SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, +SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(LiveIndex CopyIdx, MachineBasicBlock *CopyMBB, LiveInterval &li, const LiveRange *LR) { - unsigned MBBStart = li_->getMBBStartIdx(CopyMBB); - unsigned LastUseIdx; - MachineOperand *LastUse = lastRegisterUse(LR->start, CopyIdx-1, li.reg, - LastUseIdx); + LiveIndex MBBStart = li_->getMBBStartIdx(CopyMBB); + LiveIndex LastUseIdx; + MachineOperand *LastUse = + lastRegisterUse(LR->start, li_->getPrevSlot(CopyIdx), li.reg, LastUseIdx); if (LastUse) { MachineInstr *LastUseMI = LastUse->getParent(); if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) { @@ -547,7 +604,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, // of last use. LastUse->setIsKill(); removeRange(li, li_->getDefIndex(LastUseIdx), LR->end, li_, tri_); - li.addKill(LR->valno, LastUseIdx+1); + LR->valno->addKill(li_->getNextSlot(LastUseIdx)); unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && DstReg == li.reg) { @@ -560,7 +617,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, // Is it livein? if (LR->start <= MBBStart && LR->end > MBBStart) { - if (LR->start == 0) { + if (LR->start == LiveIndex()) { assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); // Live-in to the function but dead. Remove it from entry live-in set. mf_->begin()->removeLiveIn(li.reg); @@ -575,8 +632,9 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(unsigned CopyIdx, /// computation, replace the copy by rematerialize the definition. bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, + unsigned DstSubIdx, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex CopyIdx = li_->getUseIndex(li_->getInstructionIndex(CopyMI)); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; @@ -590,24 +648,52 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isAsCheapAsAMove()) return false; - if (!DefMI->getDesc().isRematerializable() || - !tii_->isTriviallyReMaterializable(DefMI)) + if (!tii_->isTriviallyReMaterializable(DefMI, AA)) return false; bool SawStore = false; - if (!DefMI->isSafeToMove(tii_, SawStore)) + if (!DefMI->isSafeToMove(tii_, SawStore, AA)) + return false; + if (TID.getNumDefs() != 1) return false; + if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) { + // Make sure the copy destination register class fits the instruction + // definition register class. The mismatch can happen as a result of earlier + // extract_subreg, insert_subreg, subreg_to_reg coalescing. + const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + if (mri_->getRegClass(DstReg) != RC) + return false; + } else if (!RC->contains(DstReg)) + return false; + } - unsigned DefIdx = li_->getDefIndex(CopyIdx); + // If destination register has a sub-register index on it, make sure it mtches + // the instruction register class. + if (DstSubIdx) { + const TargetInstrDesc &TID = DefMI->getDesc(); + if (TID.getNumDefs() != 1) + return false; + const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); + const TargetRegisterClass *DstSubRC = + DstRC->getSubRegisterRegClass(DstSubIdx); + const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_); + if (DefRC == DstRC) + DstSubIdx = 0; + else if (DefRC != DstSubRC) + return false; + } + + LiveIndex DefIdx = li_->getDefIndex(CopyIdx); const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx); - DLR->valno->copy = NULL; + DLR->valno->setCopy(0); // Don't forget to update sub-register intervals. if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) { if (!li_->hasInterval(*SR)) continue; DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->copy == CopyMI) - DLR->valno->copy = NULL; + if (DLR && DLR->valno->getCopy() == CopyMI) + DLR->valno->setCopy(0); } } @@ -621,7 +707,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, } MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DefMI); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI); MachineInstr *NewMI = prior(MII); if (checkForDeadDef) { @@ -630,7 +716,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, // should mark it dead: if (DefMI->getParent() == MBB) { DefMI->addRegisterDead(SrcInt.reg, tri_); - SrcLR->end = SrcLR->start + 1; + SrcLR->end = li_->getNextSlot(SrcLR->start); } } @@ -644,11 +730,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, if (MO.isDef() && li_->hasInterval(MO.getReg())) { unsigned Reg = MO.getReg(); DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->copy == CopyMI) - DLR->valno->copy = NULL; + if (DLR && DLR->valno->getCopy() == CopyMI) + DLR->valno->setCopy(0); } } + TransferImplicitOps(CopyMI, NewMI); li_->ReplaceMachineInstrInMaps(CopyMI, NewMI); CopyMI->eraseFromParent(); ReMatCopies.insert(CopyMI); @@ -657,30 +744,6 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, return true; } -/// isBackEdgeCopy - Returns true if CopyMI is a back edge copy. -/// -bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI, - unsigned DstReg) const { - MachineBasicBlock *MBB = CopyMI->getParent(); - const MachineLoop *L = loopInfo->getLoopFor(MBB); - if (!L) - return false; - if (MBB != L->getLoopLatch()) - return false; - - LiveInterval &LI = li_->getInterval(DstReg); - unsigned DefIdx = li_->getInstructionIndex(CopyMI); - LiveInterval::const_iterator DstLR = - LI.FindLiveRangeContaining(li_->getDefIndex(DefIdx)); - if (DstLR == LI.end()) - return false; - unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1; - if (DstLR->valno->kills.size() == 1 && - DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill()) - return true; - return false; -} - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use @@ -714,7 +777,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { // If the use is a copy and it won't be coalesced away, and its source // is defined by a trivial computation, try to rematerialize it instead. - if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,UseMI)) + if (ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, + CopyDstSubIdx, UseMI)) continue; } @@ -751,44 +815,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || allocatableRegs_[CopyDstReg])) { LiveInterval &LI = li_->getInterval(CopyDstReg); - unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(UseMI)); + LiveIndex DefIdx = + li_->getDefIndex(li_->getInstructionIndex(UseMI)); if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) { if (DLR->valno->def == DefIdx) - DLR->valno->copy = UseMI; + DLR->valno->setCopy(UseMI); } } } } -/// RemoveDeadImpDef - Remove implicit_def instructions which are "re-defining" -/// registers due to insert_subreg coalescing. e.g. -/// r1024 = op -/// r1025 = implicit_def -/// r1025 = insert_subreg r1025, r1024 -/// = op r1025 -/// => -/// r1025 = op -/// r1025 = implicit_def -/// r1025 = insert_subreg r1025, r1025 -/// = op r1025 -void -SimpleRegisterCoalescing::RemoveDeadImpDef(unsigned Reg, LiveInterval &LI) { - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), - E = mri_->reg_end(); I != E; ) { - MachineOperand &O = I.getOperand(); - MachineInstr *DefMI = &*I; - ++I; - if (!O.isDef()) - continue; - if (DefMI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) - continue; - if (!LI.liveBeforeAndAt(li_->getInstructionIndex(DefMI))) - continue; - li_->RemoveMachineInstrFromMaps(DefMI); - DefMI->eraseFromParent(); - } -} - /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate /// due to live range lengthening as the result of coalescing. void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, @@ -796,12 +832,27 @@ void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), UE = mri_->use_end(); UI != UE; ++UI) { MachineOperand &UseMO = UI.getOperand(); - if (UseMO.isKill()) { - MachineInstr *UseMI = UseMO.getParent(); - unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); - const LiveRange *UI = LI.getLiveRangeContaining(UseIdx); - if (!UI || !LI.isKill(UI->valno, UseIdx+1)) - UseMO.setIsKill(false); + if (!UseMO.isKill()) + continue; + MachineInstr *UseMI = UseMO.getParent(); + LiveIndex UseIdx = + li_->getUseIndex(li_->getInstructionIndex(UseMI)); + const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); + if (!LR || + (!LR->valno->isKill(li_->getNextSlot(UseIdx)) && + LR->valno->def != li_->getNextSlot(UseIdx))) { + // Interesting problem. After coalescing reg1027's def and kill are both + // at the same point: %reg1027,0.000000e+00 = [56,814:0) 0@70-(814) + // + // bb5: + // 60 %reg1027 = t2MOVr %reg1027, 14, %reg0, %reg0 + // 68 %reg1027 = t2LDRi12 %reg1027, 8, 14, %reg0 + // 76 t2CMPzri %reg1038, 0, 14, %reg0, %CPSR + // 84 %reg1027 = t2MOVr %reg1027, 14, %reg0, %reg0 + // 96 t2Bcc mbb, 1, %CPSR + // + // Do not remove the kill marker on t2LDRi12. + UseMO.setIsKill(false); } } } @@ -830,15 +881,16 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_, /// Return true if live interval is removed. bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getInstructionIndex(CopyMI); + LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI); LiveInterval::iterator MLR = li.FindLiveRangeContaining(li_->getDefIndex(CopyIdx)); if (MLR == li.end()) return false; // Already removed by ShortenDeadCopySrcLiveRange. - unsigned RemoveStart = MLR->start; - unsigned RemoveEnd = MLR->end; + LiveIndex RemoveStart = MLR->start; + LiveIndex RemoveEnd = MLR->end; + LiveIndex DefIdx = li_->getDefIndex(CopyIdx); // Remove the liverange that's defined by this. - if (RemoveEnd == li_->getDefIndex(CopyIdx)+1) { + if (RemoveStart == DefIdx && RemoveEnd == li_->getNextSlot(DefIdx)) { removeRange(li, RemoveStart, RemoveEnd, li_, tri_); return removeIntervalIfEmpty(li, li_, tri_); } @@ -849,7 +901,7 @@ bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li, /// the val# it defines. If the live interval becomes empty, remove it as well. bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI) { - unsigned DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI)); + LiveIndex DefIdx = li_->getDefIndex(li_->getInstructionIndex(DefMI)); LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); if (DefIdx != MLR->valno->def) return false; @@ -860,17 +912,18 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, /// PropagateDeadness - Propagate the dead marker to the instruction which /// defines the val#. static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, - unsigned &LRStart, LiveIntervals *li_, + LiveIndex &LRStart, LiveIntervals *li_, const TargetRegisterInfo* tri_) { MachineInstr *DefMI = li_->getInstructionFromIndex(li_->getDefIndex(LRStart)); if (DefMI && DefMI != CopyMI) { - int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false, tri_); - if (DeadIdx != -1) { + int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false); + if (DeadIdx != -1) DefMI->getOperand(DeadIdx).setIsDead(); - // A dead def should have a single cycle interval. - ++LRStart; - } + else + DefMI->addOperand(MachineOperand::CreateReg(li.reg, + true, true, false, true)); + LRStart = li_->getNextSlot(LRStart); } } @@ -881,8 +934,8 @@ static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, bool SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI) { - unsigned CopyIdx = li_->getInstructionIndex(CopyMI); - if (CopyIdx == 0) { + LiveIndex CopyIdx = li_->getInstructionIndex(CopyMI); + if (CopyIdx == LiveIndex()) { // FIXME: special case: function live in. It can be a general case if the // first instruction index starts at > 0 value. assert(TargetRegisterInfo::isPhysicalRegister(li.reg)); @@ -894,13 +947,14 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, return removeIntervalIfEmpty(li, li_, tri_); } - LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx-1); + LiveInterval::iterator LR = + li.FindLiveRangeContaining(li_->getPrevSlot(CopyIdx)); if (LR == li.end()) // Livein but defined by a phi. return false; - unsigned RemoveStart = LR->start; - unsigned RemoveEnd = li_->getDefIndex(CopyIdx)+1; + LiveIndex RemoveStart = LR->start; + LiveIndex RemoveEnd = li_->getNextSlot(li_->getDefIndex(CopyIdx)); if (LR->end > RemoveEnd) // More uses past this copy? Nothing to do. return false; @@ -911,22 +965,25 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR)) return false; + // There are other kills of the val#. Nothing to do. + if (!li.isOnlyLROfValNo(LR)) + return false; + MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart); if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_)) // If the live range starts in another mbb and the copy mbb is not a fall // through mbb, then we can only cut the range from the beginning of the // copy mbb. - RemoveStart = li_->getMBBStartIdx(CopyMBB) + 1; + RemoveStart = li_->getNextSlot(li_->getMBBStartIdx(CopyMBB)); if (LR->valno->def == RemoveStart) { // If the def MI defines the val# and this copy is the only kill of the // val#, then propagate the dead marker. - if (li.isOnlyLROfValNo(LR)) { - PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); - ++numDeadValNo; - } - if (li.isKill(LR->valno, RemoveEnd)) - li.removeKill(LR->valno, RemoveEnd); + PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); + ++numDeadValNo; + + if (LR->valno->isKill(RemoveEnd)) + LR->valno->removeKill(RemoveEnd); } removeRange(li, RemoveStart, RemoveEnd, li_, tri_); @@ -940,97 +997,19 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, LiveInterval &ImpLi) const{ if (!CopyMI->killsRegister(ImpLi.reg)) return false; - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); - LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx); - if (LR == li.end()) - return false; - if (LR->valno->hasPHIKill()) - return false; - if (LR->valno->def != CopyIdx) - return false; - // Make sure all of val# uses are copies. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(li.reg), + // Make sure this is the only use. + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg), UE = mri_->use_end(); UI != UE;) { MachineInstr *UseMI = &*UI; ++UI; - if (JoinedCopies.count(UseMI)) - continue; - unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); - LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); - if (ULR == li.end() || ULR->valno != LR->valno) + if (CopyMI == UseMI || JoinedCopies.count(UseMI)) continue; - // If the use is not a use, then it's not safe to coalesce the move. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (UseMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG && - UseMI->getOperand(1).getReg() == li.reg) - continue; - return false; - } + return false; } return true; } -/// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an -/// implicit_def and it is being removed. Turn all copies from this value# -/// into implicit_defs. -void SimpleRegisterCoalescing::TurnCopiesFromValNoToImpDefs(LiveInterval &li, - VNInfo *VNI) { - SmallVector ImpDefs; - MachineOperand *LastUse = NULL; - unsigned LastUseIdx = li_->getUseIndex(VNI->def); - for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg), - RE = mri_->reg_end(); RI != RE;) { - MachineOperand *MO = &RI.getOperand(); - MachineInstr *MI = &*RI; - ++RI; - if (MO->isDef()) { - if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) - ImpDefs.push_back(MI); - continue; - } - if (JoinedCopies.count(MI)) - continue; - unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(MI)); - LiveInterval::iterator ULR = li.FindLiveRangeContaining(UseIdx); - if (ULR == li.end() || ULR->valno != VNI) - continue; - // If the use is a copy, turn it into an identity copy. - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == li.reg) { - // Change it to an implicit_def. - MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); - for (int i = MI->getNumOperands() - 1, e = 0; i > e; --i) - MI->RemoveOperand(i); - // It's no longer a copy, update the valno it defines. - unsigned DefIdx = li_->getDefIndex(UseIdx); - LiveInterval &DstInt = li_->getInterval(DstReg); - LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(DefIdx); - assert(DLR != DstInt.end() && "Live range not found!"); - assert(DLR->valno->copy == MI); - DLR->valno->copy = NULL; - ReMatCopies.insert(MI); - } else if (UseIdx > LastUseIdx) { - LastUseIdx = UseIdx; - LastUse = MO; - } - } - if (LastUse) { - LastUse->setIsKill(); - li.addKill(VNI, LastUseIdx+1); - } else { - // Remove dead implicit_def's. - while (!ImpDefs.empty()) { - MachineInstr *ImpDef = ImpDefs.back(); - ImpDefs.pop_back(); - li_->RemoveMachineInstrFromMaps(ImpDef); - ImpDef->eraseFromParent(); - } - } -} - /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a /// a virtual destination register with physical source register. bool @@ -1051,13 +1030,14 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, // If the virtual register live interval extends into a loop, turn down // aggressiveness. - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex CopyIdx = + li_->getDefIndex(li_->getInstructionIndex(CopyMI)); const MachineLoop *L = loopInfo->getLoopFor(CopyMBB); if (!L) { // Let's see if the virtual register live interval extends into the loop. LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx); assert(DLR != DstInt.end() && "Live range not found!"); - DLR = DstInt.FindLiveRangeContaining(DLR->end+1); + DLR = DstInt.FindLiveRangeContaining(li_->getNextSlot(DLR->end)); if (DLR != DstInt.end()) { CopyMBB = li_->getMBBFromIndex(DLR->start); L = loopInfo->getLoopFor(CopyMBB); @@ -1067,7 +1047,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, if (!L || Length <= Threshold) return true; - unsigned UseIdx = li_->getUseIndex(CopyIdx); + LiveIndex UseIdx = li_->getUseIndex(CopyIdx); LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); if (loopInfo->getLoopFor(SMBB) != L) { @@ -1080,7 +1060,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, if (SuccMBB == CopyMBB) continue; if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB), - li_->getMBBEndIdx(SuccMBB)+1)) + li_->getNextSlot(li_->getMBBEndIdx(SuccMBB)))) return false; } } @@ -1111,11 +1091,12 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, // If the virtual register live interval is defined or cross a loop, turn // down aggressiveness. - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); - unsigned UseIdx = li_->getUseIndex(CopyIdx); + LiveIndex CopyIdx = + li_->getDefIndex(li_->getInstructionIndex(CopyMI)); + LiveIndex UseIdx = li_->getUseIndex(CopyIdx); LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); assert(SLR != SrcInt.end() && "Live range not found!"); - SLR = SrcInt.FindLiveRangeContaining(SLR->start-1); + SLR = SrcInt.FindLiveRangeContaining(li_->getPrevSlot(SLR->start)); if (SLR == SrcInt.end()) return true; MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); @@ -1135,7 +1116,7 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, if (PredMBB == SMBB) continue; if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB), - li_->getMBBEndIdx(PredMBB)+1)) + li_->getNextSlot(li_->getMBBEndIdx(PredMBB)))) return false; } } @@ -1236,14 +1217,18 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, LiveInterval &RHS = li_->getInterval(SrcReg); if (li_->hasInterval(RealDstReg) && RHS.overlaps(li_->getInterval(RealDstReg))) { - DOUT << "Interfere with register "; - DEBUG(li_->getInterval(RealDstReg).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with register "; + li_->getInterval(RealDstReg).print(errs(), tri_); + }); return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; // Not coalescable } return true; @@ -1263,14 +1248,18 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, LiveInterval &RHS = li_->getInterval(DstReg); if (li_->hasInterval(RealSrcReg) && RHS.overlaps(li_->getInterval(RealSrcReg))) { - DOUT << "Interfere with register "; - DEBUG(li_->getInterval(RealSrcReg).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with register "; + li_->getInterval(RealSrcReg).print(errs(), tri_); + }); return false; // Not coalescable } for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; // Not coalescable } return true; @@ -1299,7 +1288,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) return false; // Already done. - DOUT << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI; + DEBUG(errs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG; @@ -1312,41 +1301,43 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { SrcReg = CopyMI->getOperand(1).getReg(); SrcSubIdx = CopyMI->getOperand(2).getImm(); } else if (isInsSubReg || isSubRegToReg) { - if (CopyMI->getOperand(2).getSubReg()) { - DOUT << "\tSource of insert_subreg is already coalesced " - << "to another register.\n"; - return false; // Not coalescable. - } DstReg = CopyMI->getOperand(0).getReg(); DstSubIdx = CopyMI->getOperand(3).getImm(); SrcReg = CopyMI->getOperand(2).getReg(); + SrcSubIdx = CopyMI->getOperand(2).getSubReg(); + if (SrcSubIdx && SrcSubIdx != DstSubIdx) { + // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been + // coalesced to a larger register so the subreg indices cancel out. + DEBUG(errs() << "\tSource of insert_subreg or subreg_to_reg is already " + "coalesced to another register.\n"); + return false; // Not coalescable. + } } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ - assert(0 && "Unrecognized copy instruction!"); - return false; + llvm_unreachable("Unrecognized copy instruction!"); } // If they are already joined we continue. if (SrcReg == DstReg) { - DOUT << "\tCopy already coalesced.\n"; + DEBUG(errs() << "\tCopy already coalesced.\n"); return false; // Not coalescable. } - + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); // If they are both physical registers, we cannot join them. if (SrcIsPhys && DstIsPhys) { - DOUT << "\tCan not coalesce physregs.\n"; + DEBUG(errs() << "\tCan not coalesce physregs.\n"); return false; // Not coalescable. } - + // We only join virtual registers with allocatable physical registers. if (SrcIsPhys && !allocatableRegs_[SrcReg]) { - DOUT << "\tSrc reg is unallocatable physreg.\n"; + DEBUG(errs() << "\tSrc reg is unallocatable physreg.\n"); return false; // Not coalescable. } if (DstIsPhys && !allocatableRegs_[DstReg]) { - DOUT << "\tDst reg is unallocatable physreg.\n"; + DEBUG(errs() << "\tDst reg is unallocatable physreg.\n"); return false; // Not coalescable. } @@ -1360,9 +1351,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); assert(DstSubRC && "Illegal subregister index"); if (!DstSubRC->contains(SrcSubReg)) { - DOUT << "\tIncompatible destination regclass: " - << tri_->getName(SrcSubReg) << " not in " << DstSubRC->getName() - << ".\n"; + DEBUG(errs() << "\tIncompatible destination regclass: " + << tri_->getName(SrcSubReg) << " not in " + << DstSubRC->getName() << ".\n"); return false; // Not coalescable. } } @@ -1377,15 +1368,18 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); assert(SrcSubRC && "Illegal subregister index"); if (!SrcSubRC->contains(DstReg)) { - DOUT << "\tIncompatible source regclass: " - << tri_->getName(DstSubReg) << " not in " << SrcSubRC->getName() - << ".\n"; + DEBUG(errs() << "\tIncompatible source regclass: " + << tri_->getName(DstSubReg) << " not in " + << SrcSubRC->getName() << ".\n"); + (void)DstSubReg; return false; // Not coalescable. } } // Should be non-null only when coalescing to a sub-register class. bool CrossRC = false; + const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); + const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); const TargetRegisterClass *NewRC = NULL; MachineBasicBlock *CopyMBB = CopyMI->getParent(); unsigned RealDstReg = 0; @@ -1400,7 +1394,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. if (DstSubIdx != SubIdx) { - DOUT << "\t Sub-register indices mismatch.\n"; + DEBUG(errs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } else @@ -1413,7 +1407,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been // coalesced to a larger register so the subreg indices cancel out. if (SrcSubIdx != SubIdx) { - DOUT << "\t Sub-register indices mismatch.\n"; + DEBUG(errs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } else @@ -1422,8 +1416,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } else if ((DstIsPhys && isExtSubReg) || (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { - DOUT << "\tSrc of extract_subreg already coalesced with reg" - << " of a super-class.\n"; + DEBUG(errs() << "\tSrc of extract_subreg already coalesced with reg" + << " of a super-class.\n"); return false; // Not coalescable. } @@ -1446,11 +1440,22 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // class as the would be resulting register. SubIdx = 0; else { - DOUT << "\t Sub-register indices mismatch.\n"; + DEBUG(errs() << "\t Sub-register indices mismatch.\n"); return false; // Not coalescable. } } if (SubIdx) { + if (!DstIsPhys && !SrcIsPhys) { + if (isInsSubReg || isSubRegToReg) { + NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx); + } else // extract_subreg { + NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx); + } + if (!NewRC) { + DEBUG(errs() << "\t Conflicting sub-register indices.\n"); + return false; // Not coalescable + } + unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); @@ -1461,7 +1466,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } } else if (differingRegisterClasses(SrcReg, DstReg)) { - if (!CrossClassJoin) + if (DisableCrossClassJoin) return false; CrossRC = true; @@ -1502,11 +1507,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } - const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); - const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); unsigned LargeReg = SrcReg; unsigned SmallReg = DstReg; - unsigned Limit = 0; // Now determine the register class of the joined register. if (isExtSubReg) { @@ -1517,13 +1519,14 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { Again = true; return false; } - Limit = allocatableRCRegs_[DstRC].count(); + if (!DstIsPhys && !SrcIsPhys) + NewRC = SrcRC; } else if (!SrcIsPhys && !DstIsPhys) { NewRC = getCommonSubClass(SrcRC, DstRC); if (!NewRC) { - DOUT << "\tDisjoint regclasses: " - << SrcRC->getName() << ", " - << DstRC->getName() << ".\n"; + DEBUG(errs() << "\tDisjoint regclasses: " + << SrcRC->getName() << ", " + << DstRC->getName() << ".\n"); return false; // Not coalescable. } if (DstRC->getSize() > SrcRC->getSize()) @@ -1537,7 +1540,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { (isExtSubReg || DstRC->isASubClass()) && !isWinToJoinCrossClass(LargeReg, SmallReg, allocatableRCRegs_[NewRC].count())) { - DOUT << "\tSrc/Dest are different register classes.\n"; + DEBUG(errs() << "\tSrc/Dest are different register classes.\n"); // Allow the coalescer to try again in case either side gets coalesced to // a physical register that's compatible with the other side. e.g. // r1024 = MOV32to32_ r1025 @@ -1552,15 +1555,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg)) return false; - + LiveInterval &SrcInt = li_->getInterval(SrcReg); LiveInterval &DstInt = li_->getInterval(DstReg); assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg && "Register mapping is horribly broken!"); - DOUT << "\t\tInspecting "; SrcInt.print(DOUT, tri_); - DOUT << " and "; DstInt.print(DOUT, tri_); - DOUT << ": "; + DEBUG({ + errs() << "\t\tInspecting "; SrcInt.print(errs(), tri_); + errs() << " and "; DstInt.print(errs(), tri_); + errs() << ": "; + }); // Save a copy of the virtual register live interval. We'll manually // merge this into the "real" physical register live interval this is @@ -1590,7 +1595,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg); ++numAborts; - DOUT << "\tMay tie down a physical register, abort!\n"; + DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1598,7 +1603,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg); ++numAborts; - DOUT << "\tMay tie down a physical register, abort!\n"; + DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1612,9 +1617,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - if (TheCopy.isBackEdge) - Threshold *= 2; // Favors back edge copies. - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); float Ratio = 1.0 / Threshold; if (Length > Threshold && @@ -1622,7 +1624,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->use_end()) / Length) < Ratio)) { mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); ++numAborts; - DOUT << "\tMay tie down a physical register, abort!\n"; + DEBUG(errs() << "\tMay tie down a physical register, abort!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1641,7 +1643,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Only coalesce an empty interval (defined by implicit_def) with // another interval which has a valno defined by the CopyMI and the CopyMI // is a kill of the implicit def. - DOUT << "Not profitable!\n"; + DEBUG(errs() << "Not profitable!\n"); return false; } @@ -1651,9 +1653,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && - ReMaterializeTrivialDef(SrcInt, DstInt.reg, CopyMI)) + ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) return true; - + // If we can eliminate the copy without merging the live ranges, do so now. if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || @@ -1661,9 +1663,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { JoinedCopies.insert(CopyMI); return true; } - + // Otherwise, we are unable to join the intervals. - DOUT << "Interference!\n"; + DEBUG(errs() << "Interference!\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1676,7 +1678,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && "LiveInterval::join didn't work right!"); - + // If we're about to merge live ranges into a physical register live interval, // we have to update any aliased register's live ranges to indicate that they // have clobbered values for this range. @@ -1690,14 +1692,14 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(), E = SavedLI->vni_end(); I != E; ++I) { const VNInfo *ValNo = *I; - VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy, + VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(), false, // updated at * li_->getVNInfoAllocator()); NewValNo->setFlags(ValNo->getFlags()); // * updated here. RealInt.addKills(NewValNo, ValNo->kills); RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo); } - RealInt.weight += SavedLI->weight; + RealInt.weight += SavedLI->weight; DstReg = RealDstReg ? RealDstReg : RealSrcReg; } @@ -1721,32 +1723,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Coalescing to a virtual register that is of a sub-register class of the // other. Make sure the resulting register is set to the right register class. - if (CrossRC) { - ++numCrossRCs; - if (NewRC) - mri_->setRegClass(DstReg, NewRC); - } - - if (NewHeuristic) { - // Add all copies that define val# in the source interval into the queue. - for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(), - e = ResSrcInt->vni_end(); i != e; ++i) { - const VNInfo *vni = *i; - // FIXME: Do isPHIDef and isDefAccurate both need to be tested? - if (!vni->def || vni->isUnused() || vni->isPHIDef() || !vni->isDefAccurate()) - continue; - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx; - if (CopyMI && - JoinedCopies.count(CopyMI) == 0 && - tii_->isMoveInstr(*CopyMI, NewSrcReg, NewDstReg, - NewSrcSubIdx, NewDstSubIdx)) { - unsigned LoopDepth = loopInfo->getLoopDepth(CopyMBB); - JoinQueue->push(CopyRec(CopyMI, LoopDepth, - isBackEdgeCopy(CopyMI, DstReg))); - } - } - } + if (CrossRC) + ++numCrossRCs; + + // This may happen even if it's cross-rc coalescing. e.g. + // %reg1026 = SUBREG_TO_REG 0, %reg1037, 4 + // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to + // be allocate a register from GR64_ABCD. + if (NewRC) + mri_->setRegClass(DstReg, NewRC); // Remember to delete the copy instruction. JoinedCopies.insert(CopyMI); @@ -1757,13 +1742,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (TargetRegisterInfo::isVirtualRegister(DstReg)) RemoveUnnecessaryKills(DstReg, *ResDstInt); - if (isInsSubReg) - // Avoid: - // r1024 = op - // r1024 = implicit_def - // ... - // = r1024 - RemoveDeadImpDef(DstReg, *ResDstInt); UpdateRegDefsUses(SrcReg, DstReg, SubIdx); // SrcReg is guarateed to be the register whose live interval that is @@ -1779,29 +1757,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { delete SavedLI; } - if (isEmpty) { - // Now the copy is being coalesced away, the val# previously defined - // by the copy is being defined by an IMPLICIT_DEF which defines a zero - // length interval. Remove the val#. - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); - const LiveRange *LR = ResDstInt->getLiveRangeContaining(CopyIdx); - VNInfo *ImpVal = LR->valno; - assert(ImpVal->def == CopyIdx); - unsigned NextDef = LR->end; - TurnCopiesFromValNoToImpDefs(*ResDstInt, ImpVal); - ResDstInt->removeValNo(ImpVal); - LR = ResDstInt->FindLiveRangeContaining(NextDef); - if (LR != ResDstInt->end() && LR->valno->def == NextDef) { - // Special case: vr1024 = implicit_def - // vr1024 = insert_subreg vr1024, vr1025, c - // The insert_subreg becomes a "copy" that defines a val# which can itself - // be coalesced away. - MachineInstr *DefMI = li_->getInstructionFromIndex(NextDef); - if (DefMI->getOpcode() == TargetInstrInfo::INSERT_SUBREG) - LR->valno->copy = DefMI; - } - } - // If resulting interval has a preference that no longer fits because of subreg // coalescing, just clear the preference. unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_); @@ -1812,8 +1767,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->setRegAllocationHint(ResDstInt->reg, 0, 0); } - DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_); - DOUT << "\n"; + DEBUG({ + errs() << "\n\t\tJoined. Result = "; + ResDstInt->print(errs(), tri_); + errs() << "\n"; + }); ++numJoins; return true; @@ -1860,7 +1818,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI, // been computed, return it. if (OtherValNoAssignments[OtherValNo->id] >= 0) return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; - + // Mark this value number as currently being computed, then ask what the // ultimate value # of the other value is. ThisValNoAssignments[VN] = -2; @@ -1896,7 +1854,7 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, DstReg == li.reg && SrcReg == Reg) { // Cache computed info. LR->valno->def = LR->start; - LR->valno->copy = DefMI; + LR->valno->setCopy(DefMI); return true; } } @@ -1910,16 +1868,16 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, /// joins them and returns true. bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ assert(RHS.containsOneValue()); - + // Some number (potentially more than one) value numbers in the current // interval may be defined as copies from the RHS. Scan the overlapping // portions of the LHS and RHS, keeping track of this and looking for // overlapping live ranges that are NOT defined as copies. If these exist, we // cannot coalesce. - + LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end(); LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end(); - + if (LHSIt->start < RHSIt->start) { LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start); if (LHSIt != LHS.begin()) --LHSIt; @@ -1927,9 +1885,9 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start); if (RHSIt != RHS.begin()) --RHSIt; } - + SmallVector EliminatedLHSVals; - + while (1) { // Determine if these live intervals overlap. bool Overlaps = false; @@ -1937,7 +1895,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ Overlaps = LHSIt->end > RHSIt->start; else Overlaps = RHSIt->end > LHSIt->start; - + // If the live intervals overlap, there are two interesting cases: if the // LHS interval is defined by a copy from the RHS, it's ok and we record // that the LHS value # is the same as the RHS. If it's not, then we cannot @@ -1955,7 +1913,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // vr1025 = copy vr1024 // .. // BB2: - // vr1024 = op + // vr1024 = op // = vr1025 // Even though vr1025 is copied from vr1024, it's not safe to // coalesce them since the live range of vr1025 intersects the @@ -1964,12 +1922,12 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ return false; EliminatedLHSVals.push_back(LHSIt->valno); } - + // We know this entire LHS live range is okay, so skip it now. if (++LHSIt == LHSEnd) break; continue; } - + if (LHSIt->end < RHSIt->end) { if (++LHSIt == LHSEnd) break; } else { @@ -1993,7 +1951,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // vr1025 = copy vr1024 // .. // BB2: - // vr1024 = op + // vr1024 = op // = vr1025 // Even though vr1025 is copied from vr1024, it's not safe to // coalesced them since live range of vr1025 intersects the @@ -2007,11 +1965,11 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ } } } - + if (++RHSIt == RHSEnd) break; } } - + // If we got here, we know that the coalescing will be successful and that // the value numbers in EliminatedLHSVals will all be merged together. Since // the most common case is that EliminatedLHSVals has a single number, we @@ -2039,28 +1997,29 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ *tri_->getSuperRegisters(LHS.reg)) // Imprecise sub-register information. Can't handle it. return false; - assert(0 && "No copies from the RHS?"); + llvm_unreachable("No copies from the RHS?"); } else { LHSValNo = EliminatedLHSVals[0]; } - + // Okay, now that there is a single LHS value number that we're merging the // RHS into, update the value number info for the LHS to indicate that the // value number is defined where the RHS value number was. const VNInfo *VNI = RHS.getValNumInfo(0); LHSValNo->def = VNI->def; - LHSValNo->copy = VNI->copy; - + LHSValNo->setCopy(VNI->getCopy()); + // Okay, the final step is to loop over the RHS live intervals, adding them to // the LHS. if (VNI->hasPHIKill()) LHSValNo->setHasPHIKill(true); LHS.addKills(LHSValNo, VNI->kills); LHS.MergeRangesInAsValue(RHS, LHSValNo); - LHS.weight += RHS.weight; + + LHS.ComputeJoinedWeight(RHS); // Update regalloc hint if both are virtual registers. - if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && + if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && TargetRegisterInfo::isVirtualRegister(RHS.reg)) { std::pair RHSPref = mri_->getRegAllocationHint(RHS.reg); std::pair LHSPref = mri_->getRegAllocationHint(LHS.reg); @@ -2122,8 +2081,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, } else { for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; } } @@ -2137,19 +2098,21 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, } else { for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { - DOUT << "Interfere with sub-register "; - DEBUG(li_->getInterval(*SR).print(DOUT, tri_)); + DEBUG({ + errs() << "Interfere with sub-register "; + li_->getInterval(*SR).print(errs(), tri_); + }); return false; } } } - + // Compute ultimate value numbers for the LHS and RHS values. if (RHS.containsOneValue()) { // Copies from a liveinterval with a single value are simple to handle and // very common, handle the special case here. This is important, because // often RHS is small and LHS is large (e.g. a physreg). - + // Find out if the RHS is defined as a copy from some value in the LHS. int RHSVal0DefinedFromLHS = -1; int RHSValID = -1; @@ -2167,15 +2130,16 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, } } else { // It was defined as a copy from the LHS, find out what value # it is. - RHSValNoInfo = LHS.getLiveRangeContaining(RHSValNoInfo0->def-1)->valno; + RHSValNoInfo = + LHS.getLiveRangeContaining(li_->getPrevSlot(RHSValNoInfo0->def))->valno; RHSValID = RHSValNoInfo->id; RHSVal0DefinedFromLHS = RHSValID; } - + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); RHSValNoAssignments.resize(RHS.getNumValNums(), -1); NewVNInfo.resize(LHS.getNumValNums(), NULL); - + // Okay, *all* of the values in LHS that are defined as a copy from RHS // should now get updated. for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); @@ -2207,7 +2171,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, LHSValNoAssignments[VN] = VN; } } - + assert(RHSValID != -1 && "Didn't find value #?"); RHSValNoAssignments[0] = RHSValID; if (RHSVal0DefinedFromLHS != -1) { @@ -2221,44 +2185,46 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->copy == 0) // Src not defined by a copy? + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? continue; - + // DstReg is known to be a register in the LHS interval. If the src is // from the RHS interval, we can use its value #. if (li_->getVNInfoSourceReg(VNI) != RHS.reg) continue; - + // Figure out the value # from the RHS. - LHSValsDefinedFromRHS[VNI]=RHS.getLiveRangeContaining(VNI->def-1)->valno; + LHSValsDefinedFromRHS[VNI]= + RHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno; } - + // Loop over the value numbers of the RHS, seeing if any are defined from // the LHS. for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->copy == 0) // Src not defined by a copy? + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? continue; - + // DstReg is known to be a register in the RHS interval. If the src is // from the LHS interval, we can use its value #. if (li_->getVNInfoSourceReg(VNI) != LHS.reg) continue; - + // Figure out the value # from the LHS. - RHSValsDefinedFromLHS[VNI]=LHS.getLiveRangeContaining(VNI->def-1)->valno; + RHSValsDefinedFromLHS[VNI]= + LHS.getLiveRangeContaining(li_->getPrevSlot(VNI->def))->valno; } - + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); RHSValNoAssignments.resize(RHS.getNumValNums(), -1); NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); - + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); i != e; ++i) { VNInfo *VNI = *i; unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) continue; ComputeUltimateVN(VNI, NewVNInfo, LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, @@ -2276,20 +2242,20 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, RHSValNoAssignments[VN] = NewVNInfo.size()-1; continue; } - + ComputeUltimateVN(VNI, NewVNInfo, RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, RHSValNoAssignments, LHSValNoAssignments); } } - + // Armed with the mappings of LHS/RHS values to ultimate values, walk the // interval lists to see if these intervals are coalescable. LiveInterval::const_iterator I = LHS.begin(); LiveInterval::const_iterator IE = LHS.end(); LiveInterval::const_iterator J = RHS.begin(); LiveInterval::const_iterator JE = RHS.end(); - + // Skip ahead until the first place of potential sharing. if (I->start < J->start) { I = std::upper_bound(I, IE, J->start); @@ -2298,7 +2264,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, J = std::upper_bound(J, JE, I->start); if (J != RHS.begin()) --J; } - + while (1) { // Determine if these two live ranges overlap. bool Overlaps; @@ -2316,7 +2282,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, RHSValNoAssignments[J->valno->id]) return false; } - + if (I->end < J->end) { ++I; if (I == IE) break; @@ -2331,7 +2297,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = LHSValsDefinedFromRHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned LHSValID = LHSValNoAssignments[VNI->id]; - LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def); + NewVNInfo[LHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[LHSValID]->setHasPHIKill(true); RHS.addKills(NewVNInfo[LHSValID], VNI->kills); @@ -2342,7 +2308,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = RHSValsDefinedFromLHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned RHSValID = RHSValNoAssignments[VNI->id]; - LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def); + NewVNInfo[RHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[RHSValID]->setHasPHIKill(true); LHS.addKills(NewVNInfo[RHSValID], VNI->kills); @@ -2377,37 +2343,17 @@ namespace { }; } -/// getRepIntervalSize - Returns the size of the interval that represents the -/// specified register. -template -unsigned JoinPriorityQueue::getRepIntervalSize(unsigned Reg) { - return Rc->getRepIntervalSize(Reg); -} - -/// CopyRecSort::operator - Join priority queue sorting function. -/// -bool CopyRecSort::operator()(CopyRec left, CopyRec right) const { - // Inner loops first. - if (left.LoopDepth > right.LoopDepth) - return false; - else if (left.LoopDepth == right.LoopDepth) - if (left.isBackEdge && !right.isBackEdge) - return false; - return true; -} - void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, std::vector &TryAgain) { - DOUT << ((Value*)MBB->getBasicBlock())->getName() << ":\n"; + DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n"); std::vector VirtCopies; std::vector PhysCopies; std::vector ImpDefCopies; - unsigned LoopDepth = loopInfo->getLoopDepth(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E;) { MachineInstr *Inst = MII++; - + // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) { @@ -2422,21 +2368,14 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (NewHeuristic) { - JoinQueue->push(CopyRec(Inst, LoopDepth, isBackEdgeCopy(Inst, DstReg))); - } else { - if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) - ImpDefCopies.push_back(CopyRec(Inst, 0, false)); - else if (SrcIsPhys || DstIsPhys) - PhysCopies.push_back(CopyRec(Inst, 0, false)); - else - VirtCopies.push_back(CopyRec(Inst, 0, false)); - } + if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()) + ImpDefCopies.push_back(CopyRec(Inst, 0)); + else if (SrcIsPhys || DstIsPhys) + PhysCopies.push_back(CopyRec(Inst, 0)); + else + VirtCopies.push_back(CopyRec(Inst, 0)); } - if (NewHeuristic) - return; - // Try coalescing implicit copies first, followed by copies to / from // physical registers, then finally copies from virtual registers to // virtual registers. @@ -2464,10 +2403,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, } void SimpleRegisterCoalescing::joinIntervals() { - DOUT << "********** JOINING INTERVALS ***********\n"; - - if (NewHeuristic) - JoinQueue = new JoinPriorityQueue(this); + DEBUG(errs() << "********** JOINING INTERVALS ***********\n"); std::vector TryAgainList; if (loopInfo->empty()) { @@ -2495,52 +2431,26 @@ void SimpleRegisterCoalescing::joinIntervals() { for (unsigned i = 0, e = MBBs.size(); i != e; ++i) CopyCoalesceInMBB(MBBs[i].second, TryAgainList); } - + // Joining intervals can allow other intervals to be joined. Iteratively join // until we make no progress. - if (NewHeuristic) { - SmallVector TryAgain; - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - while (!JoinQueue->empty()) { - CopyRec R = JoinQueue->pop(); - bool Again = false; - bool Success = JoinCopy(R, Again); - if (Success) - ProgressMade = true; - else if (Again) - TryAgain.push_back(R); - } + bool ProgressMade = true; + while (ProgressMade) { + ProgressMade = false; - if (ProgressMade) { - while (!TryAgain.empty()) { - JoinQueue->push(TryAgain.back()); - TryAgain.pop_back(); - } - } - } - } else { - bool ProgressMade = true; - while (ProgressMade) { - ProgressMade = false; - - for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { - CopyRec &TheCopy = TryAgainList[i]; - if (TheCopy.MI) { - bool Again = false; - bool Success = JoinCopy(TheCopy, Again); - if (Success || !Again) { - TheCopy.MI = 0; // Mark this one as done. - ProgressMade = true; - } - } + for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { + CopyRec &TheCopy = TryAgainList[i]; + if (!TheCopy.MI) + continue; + + bool Again = false; + bool Success = JoinCopy(TheCopy, Again); + if (Success || !Again) { + TheCopy.MI = 0; // Mark this one as done. + ProgressMade = true; } } } - - if (NewHeuristic) - delete JoinQueue; } /// Return true if the two specified registers belong to different register @@ -2567,9 +2477,11 @@ SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA, /// lastRegisterUse - Returns the last use of the specific register between /// cycles Start and End or NULL if there are no uses. MachineOperand * -SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, - unsigned Reg, unsigned &UseIdx) const{ - UseIdx = 0; +SimpleRegisterCoalescing::lastRegisterUse(LiveIndex Start, + LiveIndex End, + unsigned Reg, + LiveIndex &UseIdx) const{ + UseIdx = LiveIndex(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { MachineOperand *LastUse = NULL; for (MachineRegisterInfo::use_iterator I = mri_->use_begin(Reg), @@ -2581,7 +2493,7 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, SrcReg == DstReg) // Ignore identity copies. continue; - unsigned Idx = li_->getInstructionIndex(UseMI); + LiveIndex Idx = li_->getInstructionIndex(UseMI); if (Idx >= Start && Idx < End && Idx >= UseIdx) { LastUse = &Use; UseIdx = li_->getUseIndex(Idx); @@ -2590,13 +2502,13 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, return LastUse; } - int e = (End-1) / InstrSlots::NUM * InstrSlots::NUM; - int s = Start; + LiveIndex s = Start; + LiveIndex e = li_->getBaseIndex(li_->getPrevSlot(End)); while (e >= s) { // Skip deleted instructions MachineInstr *MI = li_->getInstructionFromIndex(e); - while ((e - InstrSlots::NUM) >= s && !MI) { - e -= InstrSlots::NUM; + while (e != LiveIndex() && li_->getPrevIndex(e) >= s && !MI) { + e = li_->getPrevIndex(e); MI = li_->getInstructionFromIndex(e); } if (e < s || MI == NULL) @@ -2615,7 +2527,7 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, } } - e -= InstrSlots::NUM; + e = li_->getPrevIndex(e); } return NULL; @@ -2624,9 +2536,9 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, void SimpleRegisterCoalescing::printRegName(unsigned reg) const { if (TargetRegisterInfo::isPhysicalRegister(reg)) - cerr << tri_->getName(reg); + errs() << tri_->getName(reg); else - cerr << "%reg" << reg; + errs() << "%reg" << reg; } void SimpleRegisterCoalescing::releaseMemory() { @@ -2635,64 +2547,106 @@ void SimpleRegisterCoalescing::releaseMemory() { ReMatDefs.clear(); } -static bool isZeroLengthInterval(LiveInterval *li) { +/// Returns true if the given live interval is zero length. +static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) { for (LiveInterval::Ranges::const_iterator i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i) - if (i->end - i->start > LiveInterval::InstrSlots::NUM) + if (li_->getPrevIndex(i->end) > i->start) return false; return true; } -/// TurnCopyIntoImpDef - If source of the specified copy is an implicit def, -/// turn the copy into an implicit def. -bool -SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, - MachineBasicBlock *MBB, - unsigned DstReg, unsigned SrcReg) { - MachineInstr *CopyMI = &*I; - unsigned CopyIdx = li_->getDefIndex(li_->getInstructionIndex(CopyMI)); - if (!li_->hasInterval(SrcReg)) - return false; - LiveInterval &SrcInt = li_->getInterval(SrcReg); - if (!SrcInt.empty()) - return false; - if (!li_->hasInterval(DstReg)) - return false; - LiveInterval &DstInt = li_->getInterval(DstReg); - const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx); - // If the valno extends beyond this basic block, then it's not safe to delete - // the val# or else livein information won't be correct. - MachineBasicBlock *EndMBB = li_->getMBBFromIndex(DstLR->end); - if (EndMBB != MBB) - return false; - DstInt.removeValNo(DstLR->valno); - CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); - for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i) - CopyMI->RemoveOperand(i); - CopyMI->getOperand(0).setIsUndef(); - bool NoUse = mri_->use_empty(SrcReg); - if (NoUse) { - for (MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(SrcReg), - RE = mri_->reg_end(); RI != RE; ) { - assert(RI.getOperand().isDef()); - MachineInstr *DefMI = &*RI; - ++RI; - // The implicit_def source has no other uses, delete it. - assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF); - li_->RemoveMachineInstrFromMaps(DefMI); - DefMI->eraseFromParent(); +void SimpleRegisterCoalescing::CalculateSpillWeights() { + SmallSet Processed; + for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); + mbbi != mbbe; ++mbbi) { + MachineBasicBlock* MBB = mbbi; + LiveIndex MBBEnd = li_->getMBBEndIdx(MBB); + MachineLoop* loop = loopInfo->getLoopFor(MBB); + unsigned loopDepth = loop ? loop->getLoopDepth() : 0; + bool isExit = loop ? loop->isLoopExit(MBB) : false; + + for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end(); + mii != mie; ++mii) { + MachineInstr *MI = mii; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &mopi = MI->getOperand(i); + if (!mopi.isReg() || mopi.getReg() == 0) + continue; + unsigned Reg = mopi.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) + continue; + // Multiple uses of reg by the same instruction. It should not + // contribute to spill weight again. + if (!Processed.insert(Reg)) + continue; + + bool HasDef = mopi.isDef(); + bool HasUse = !HasDef; + for (unsigned j = i+1; j != e; ++j) { + const MachineOperand &mopj = MI->getOperand(j); + if (!mopj.isReg() || mopj.getReg() != Reg) + continue; + HasDef |= mopj.isDef(); + HasUse |= mopj.isUse(); + if (HasDef && HasUse) + break; + } + + LiveInterval &RegInt = li_->getInterval(Reg); + float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth); + if (HasDef && isExit) { + // Looks like this is a loop count variable update. + LiveIndex DefIdx = + li_->getDefIndex(li_->getInstructionIndex(MI)); + const LiveRange *DLR = + li_->getInterval(Reg).getLiveRangeContaining(DefIdx); + if (DLR->end > MBBEnd) + Weight *= 3.0F; + } + RegInt.weight += Weight; + } + Processed.clear(); } } - // Mark uses of implicit_def isUndef. - for (MachineRegisterInfo::use_iterator RI = mri_->use_begin(DstReg), - RE = mri_->use_end(); RI != RE; ++RI) { - assert((*RI).getParent() == MBB); - RI.getOperand().setIsUndef(); - } + for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { + LiveInterval &LI = *I->second; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // If the live interval length is essentially zero, i.e. in every live + // range the use follows def immediately, it doesn't make sense to spill + // it and hope it will be easier to allocate for this li. + if (isZeroLengthInterval(&LI, li_)) { + LI.weight = HUGE_VALF; + continue; + } - ++I; - return true; + bool isLoad = false; + SmallVector SpillIs; + if (li_->isReMaterializable(LI, SpillIs, isLoad)) { + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. If non of the defs are + // loads, then it's potentially very cheap to re-materialize. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isLoad) + LI.weight *= 0.9F; + else + LI.weight *= 0.5F; + } + + // Slightly prefer live interval that has been assigned a preferred reg. + std::pair Hint = mri_->getRegAllocationHint(LI.reg); + if (Hint.first || Hint.second) + LI.weight *= 1.01F; + + // Divide the weight of the interval by its size. This encourages + // spilling of intervals that are large and have few uses, and + // discourages spilling of small intervals with many uses. + LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; + } + } } @@ -2703,11 +2657,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { tri_ = tm_->getRegisterInfo(); tii_ = tm_->getInstrInfo(); li_ = &getAnalysis(); + AA = &getAnalysis(); loopInfo = &getAnalysis(); - DOUT << "********** SIMPLE REGISTER COALESCING **********\n" - << "********** Function: " - << ((Value*)mf_->getFunction())->getName() << '\n'; + DEBUG(errs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " + << ((Value*)mf_->getFunction())->getName() << '\n'); allocatableRegs_ = tri_->getAllocatableSet(fn); for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(), @@ -2719,10 +2674,10 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { if (EnableJoining) { joinIntervals(); DEBUG({ - DOUT << "********** INTERVALS POST JOINING **********\n"; + errs() << "********** INTERVALS POST JOINING **********\n"; for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){ - I->second->print(DOUT, tri_); - DOUT << "\n"; + I->second->print(errs(), tri_); + errs() << "\n"; } }); } @@ -2733,29 +2688,40 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); mbbi != mbbe; ++mbbi) { MachineBasicBlock* mbb = mbbi; - unsigned loopDepth = loopInfo->getLoopDepth(mbb); - for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); mii != mie; ) { MachineInstr *MI = mii; unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (JoinedCopies.count(MI)) { // Delete all coalesced copies. + bool DoDelete = true; if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { assert((MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || MI->getOpcode() == TargetInstrInfo::INSERT_SUBREG || MI->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) && "Unrecognized copy instruction"); DstReg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + // Do not delete extract_subreg, insert_subreg of physical + // registers unless the definition is dead. e.g. + // %DO = INSERT_SUBREG %D0, %S0, 1 + // or else the scavenger may complain. LowerSubregs will + // change this to an IMPLICIT_DEF later. + DoDelete = false; } if (MI->registerDefIsDead(DstReg)) { LiveInterval &li = li_->getInterval(DstReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); + DoDelete = true; + } + if (!DoDelete) + mii = next(mii); + else { + li_->RemoveMachineInstrFromMaps(MI); + mii = mbbi->erase(mii); + ++numPeep; } - li_->RemoveMachineInstrFromMaps(MI); - mii = mbbi->erase(mii); - ++numPeep; continue; } @@ -2807,70 +2773,20 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { li_->RemoveMachineInstrFromMaps(MI); mii = mbbi->erase(mii); ++numPeep; - } else if (!isMove || !TurnCopyIntoImpDef(mii, mbb, DstReg, SrcReg)) { - SmallSet UniqueUses; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &mop = MI->getOperand(i); - if (mop.isReg() && mop.getReg() && - TargetRegisterInfo::isVirtualRegister(mop.getReg())) { - unsigned reg = mop.getReg(); - // Multiple uses of reg by the same instruction. It should not - // contribute to spill weight again. - if (UniqueUses.count(reg) != 0) - continue; - LiveInterval &RegInt = li_->getInterval(reg); - RegInt.weight += - li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth); - UniqueUses.insert(reg); - } - } + } else { ++mii; } } } - for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { - LiveInterval &LI = *I->second; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // If the live interval length is essentially zero, i.e. in every live - // range the use follows def immediately, it doesn't make sense to spill - // it and hope it will be easier to allocate for this li. - if (isZeroLengthInterval(&LI)) - LI.weight = HUGE_VALF; - else { - bool isLoad = false; - SmallVector SpillIs; - if (li_->isReMaterializable(LI, SpillIs, isLoad)) { - // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If non of the defs are - // loads, then it's potentially very cheap to re-materialize. - // FIXME: this gets much more complicated once we support non-trivial - // re-materialization. - if (isLoad) - LI.weight *= 0.9F; - else - LI.weight *= 0.5F; - } - } - - // Slightly prefer live interval that has been assigned a preferred reg. - std::pair Hint = mri_->getRegAllocationHint(LI.reg); - if (Hint.first || Hint.second) - LI.weight *= 1.01F; - - // Divide the weight of the interval by its size. This encourages - // spilling of intervals that are large and have few uses, and - // discourages spilling of small intervals with many uses. - LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; - } - } + CalculateSpillWeights(); DEBUG(dump()); return true; } /// print - Implement the dump method. -void SimpleRegisterCoalescing::print(std::ostream &O, const Module* m) const { +void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const { li_->print(O, m); } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index d2c55810f60cb..3ebe3a1f7de4e 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -18,7 +18,6 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/ADT/BitVector.h" -#include namespace llvm { class SimpleRegisterCoalescing; @@ -33,44 +32,8 @@ namespace llvm { struct CopyRec { MachineInstr *MI; unsigned LoopDepth; - bool isBackEdge; - CopyRec(MachineInstr *mi, unsigned depth, bool be) - : MI(mi), LoopDepth(depth), isBackEdge(be) {}; - }; - - template class JoinPriorityQueue; - - /// CopyRecSort - Sorting function for coalescer queue. - /// - struct CopyRecSort : public std::binary_function { - JoinPriorityQueue *JPQ; - explicit CopyRecSort(JoinPriorityQueue *jpq) : JPQ(jpq) {} - CopyRecSort(const CopyRecSort &RHS) : JPQ(RHS.JPQ) {} - bool operator()(CopyRec left, CopyRec right) const; - }; - - /// JoinQueue - A priority queue of copy instructions the coalescer is - /// going to process. - template - class JoinPriorityQueue { - SimpleRegisterCoalescing *Rc; - std::priority_queue, SF> Queue; - - public: - explicit JoinPriorityQueue(SimpleRegisterCoalescing *rc) - : Rc(rc), Queue(SF(this)) {} - - bool empty() const { return Queue.empty(); } - void push(CopyRec R) { Queue.push(R); } - CopyRec pop() { - if (empty()) return CopyRec(0, 0, false); - CopyRec R = Queue.top(); - Queue.pop(); - return R; - } - - // Callbacks to SimpleRegisterCoalescing. - unsigned getRepIntervalSize(unsigned Reg); + CopyRec(MachineInstr *mi, unsigned depth) + : MI(mi), LoopDepth(depth) {}; }; class SimpleRegisterCoalescing : public MachineFunctionPass, @@ -82,14 +45,11 @@ namespace llvm { const TargetInstrInfo* tii_; LiveIntervals *li_; const MachineLoopInfo* loopInfo; + AliasAnalysis *AA; BitVector allocatableRegs_; DenseMap allocatableRCRegs_; - /// JoinQueue - A priority queue of copy instructions the coalescer is - /// going to process. - JoinPriorityQueue *JoinQueue; - /// JoinedCopies - Keep track of copies eliminated due to coalescing. /// SmallPtrSet JoinedCopies; @@ -127,20 +87,8 @@ namespace llvm { return false; }; - /// getRepIntervalSize - Called from join priority queue sorting function. - /// It returns the size of the interval that represent the given register. - unsigned getRepIntervalSize(unsigned Reg) { - if (!li_->hasInterval(Reg)) - return 0; - return li_->getApproximateInstructionCount(li_->getInterval(Reg)) * - LiveInterval::InstrSlots::NUM; - } - /// print - Implement the dump method. - virtual void print(std::ostream &O, const Module* = 0) const; - void print(std::ostream *O, const Module* M = 0) const { - if (O) print(*O, M); - } + virtual void print(raw_ostream &O, const Module* = 0) const; private: /// joinIntervals - join compatible live intervals @@ -176,7 +124,6 @@ namespace llvm { /// classes. The registers may be either phys or virt regs. bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; - /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single @@ -199,20 +146,14 @@ namespace llvm { /// TrimLiveIntervalToLastUse - If there is a last use in the same basic /// block as the copy instruction, trim the ive interval to the last use /// and return true. - bool TrimLiveIntervalToLastUse(unsigned CopyIdx, + bool TrimLiveIntervalToLastUse(LiveIndex CopyIdx, MachineBasicBlock *CopyMBB, LiveInterval &li, const LiveRange *LR); /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, - MachineInstr *CopyMI); - - /// TurnCopyIntoImpDef - If source of the specified copy is an implicit def, - /// turn the copy into an implicit def. - bool TurnCopyIntoImpDef(MachineBasicBlock::iterator &I, - MachineBasicBlock *MBB, - unsigned DstReg, unsigned SrcReg); + unsigned DstSubIdx, MachineInstr *CopyMI); /// CanCoalesceWithImpDef - Returns true if the specified copy instruction /// from an implicit def to another register can be coalesced away. @@ -266,10 +207,6 @@ namespace llvm { bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, unsigned Reg); - /// isBackEdgeCopy - Return true if CopyMI is a back edge copy. - /// - bool isBackEdgeCopy(MachineInstr *CopyMI, unsigned DstReg) const; - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use @@ -277,10 +214,6 @@ namespace llvm { /// subregister. void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); - /// RemoveDeadImpDef - Remove implicit_def instructions which are - /// "re-defining" registers due to insert_subreg coalescing. e.g. - void RemoveDeadImpDef(unsigned Reg, LiveInterval &LI); - /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate /// due to live range lengthening as the result of coalescing. void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI); @@ -302,8 +235,13 @@ namespace llvm { /// lastRegisterUse - Returns the last use of the specific register between /// cycles Start and End or NULL if there are no uses. - MachineOperand *lastRegisterUse(unsigned Start, unsigned End, unsigned Reg, - unsigned &LastUseIdx) const; + MachineOperand *lastRegisterUse(LiveIndex Start, + LiveIndex End, unsigned Reg, + LiveIndex &LastUseIdx) const; + + /// CalculateSpillWeights - Compute spill weights for all virtual register + /// live intervals. + void CalculateSpillWeights(); void printRegName(unsigned reg) const; }; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp new file mode 100644 index 0000000000000..e987fa2fbc8e4 --- /dev/null +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -0,0 +1,520 @@ +//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use SjLj +// based exception handling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sjljehprepare" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumUnwinds, "Number of unwinds replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +namespace { + class VISIBILITY_HIDDEN SjLjEHPass : public FunctionPass { + + const TargetLowering *TLI; + + const Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *ResumeFn; + Constant *BuiltinSetjmpFn; + Constant *FrameAddrFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *SelectorFn; + Constant *ExceptionFn; + + Value *CallSite; + public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPass(const TargetLowering *tli = NULL) + : FunctionPass(&ID), TLI(tli) { } + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + const char *getPassName() const { + return "SJLJ Exception Handling preparation"; + } + + private: + void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, + Value *CallSite, + SwitchInst *CatchSwitch); + void splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes); + bool insertSjLjEHSupport(Function &F); + }; +} // end anonymous namespace + +char SjLjEHPass::ID = 0; + +// Public Interface To the SjLjEHPass pass. +FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { + return new SjLjEHPass(TLI); +} +// doInitialization - Set up decalarations and types needed to process +// exceptions. +bool SjLjEHPass::doInitialization(Module &M) { + // Build the function context structure. + // builtin_setjmp uses a five word jbuf + const Type *VoidPtrTy = + Type::getInt8PtrTy(M.getContext()); + const Type *Int32Ty = Type::getInt32Ty(M.getContext()); + FunctionContextTy = + StructType::get(M.getContext(), + VoidPtrTy, // __prev + Int32Ty, // call_site + ArrayType::get(Int32Ty, 4), // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + ArrayType::get(VoidPtrTy, 5), // __jbuf + NULL); + RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register", + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), + (Type *)0); + UnregisterFn = + M.getOrInsertFunction("_Unwind_SjLj_Unregister", + Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), + (Type *)0); + ResumeFn = + M.getOrInsertFunction("_Unwind_SjLj_Resume", + Type::getVoidTy(M.getContext()), + VoidPtrTy, + (Type *)0); + FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); + LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); + SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); + ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception); + PersonalityFn = 0; + + return true; +} + +/// markInvokeCallSite - Insert code to mark the call_site for this invoke +void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo, + Value *CallSite, + SwitchInst *CatchSwitch) { + ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo); + // The runtime comes back to the dispatcher with the call_site - 1 in + // the context. Odd, but there it is. + ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo - 1); + + // If the unwind edge has phi nodes, split the edge. + if (isa(II->getUnwindDest()->begin())) { + SplitCriticalEdge(II, 1, this); + + // If there are any phi nodes left, they must have a single predecessor. + while (PHINode *PN = dyn_cast(II->getUnwindDest()->begin())) { + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + PN->eraseFromParent(); + } + } + + // Insert a store of the invoke num before the invoke and store zero into the + // location afterward. + new StoreInst(CallSiteNoC, CallSite, true, II); // volatile + + // Add a switch case to our unwind block. + CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); + // We still want this to look like an invoke so we emit the LSDA properly + // FIXME: ??? Or will this cause strangeness with mis-matched IDs like + // when it was in the front end? +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, std::set &LiveBBs) { + if (!LiveBBs.insert(BB).second) return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge +/// we spill into a stack location, guaranteeing that there is nothing live +/// across the unwind edge. This process also splits all critical edges +/// coming out of invoke's. +void SjLjEHPass:: +splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes) { + // First step, split all critical edges from invoke instructions. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + InvokeInst *II = Invokes[i]; + SplitCriticalEdge(II, 0, this); + SplitCriticalEdge(II, 1, this); + assert(!isa(II->getNormalDest()) && + !isa(II->getUnwindDest()) && + "critical edge splitting left single entry phi nodes?"); + } + + Function *F = Invokes.back()->getParent()->getParent(); + + // To avoid having to handle incoming arguments specially, we lower each arg + // to a copy instruction in the entry block. This ensures that the argument + // value itself cannot be live across the entry block. + BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); + while (isa(AfterAllocaInsertPt) && + isa(cast(AfterAllocaInsertPt)->getArraySize())) + ++AfterAllocaInsertPt; + for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); + AI != E; ++AI) { + // This is always a no-op cast because we're casting AI to AI->getType() so + // src and destination types are identical. BitCast is the only possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Normally its is forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, we're + // replacing it here with the same value it was constructed with to simply + // make NC its user. + NC->setOperand(0, AI); + } + + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = II; + if (Inst->use_empty()) continue; + if (Inst->hasOneUse() && + cast(Inst->use_back())->getParent() == BB && + !isa(Inst->use_back())) continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast(Inst)) + if (isa(AI->getArraySize()) && BB == F->begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + SmallVector Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + Instruction *User = cast(*UI); + if (User->getParent() != BB || isa(User)) + Users.push_back(User); + } + + // Find all of the blocks that this value is live in. + std::set LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + NeedsSpill = true; + } + } + + // If we decided we need a spill, do it. + if (NeedsSpill) { + ++NumSpilled; + DemoteRegToStack(*Inst, true); + } + } +} + +bool SjLjEHPass::insertSjLjEHSupport(Function &F) { + SmallVector Returns; + SmallVector Unwinds; + SmallVector Invokes; + + // Look through the terminators of the basic blocks to find invokes, returns + // and unwinds + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { + // Remember all return instructions in case we insert an invoke into this + // function. + Returns.push_back(RI); + } else if (InvokeInst *II = dyn_cast(BB->getTerminator())) { + Invokes.push_back(II); + } else if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { + Unwinds.push_back(UI); + } + // If we don't have any invokes or unwinds, there's nothing to do. + if (Unwinds.empty() && Invokes.empty()) return false; + + // Find the eh.selector.* and eh.exception calls. We'll use the first + // eh.selector to determine the right personality function to use. For + // SJLJ, we always use the same personality for the whole function, + // not on a per-selector basis. + // FIXME: That's a bit ugly. Better way? + SmallVector EH_Selectors; + SmallVector EH_Exceptions; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast(I)) { + if (CI->getCalledFunction() == SelectorFn) { + if (!PersonalityFn) PersonalityFn = CI->getOperand(2); + EH_Selectors.push_back(CI); + } else if (CI->getCalledFunction() == ExceptionFn) { + EH_Exceptions.push_back(CI); + } + } + } + } + // If we don't have any eh.selector calls, we can't determine the personality + // function. Without a personality function, we can't process exceptions. + if (!PersonalityFn) return false; + + NumInvokes += Invokes.size(); + NumUnwinds += Unwinds.size(); + + if (!Invokes.empty()) { + // We have invokes, so we need to add register/unregister calls to get + // this function onto the global unwind stack. + // + // First thing we need to do is scan the whole function for values that are + // live across unwind edges. Each value that is live across an unwind edge + // we spill into a stack location, guaranteeing that there is nothing live + // across the unwind edge. This process also splits all critical edges + // coming out of invoke's. + splitLiveRangesLiveAcrossInvokes(Invokes); + + BasicBlock *EntryBB = F.begin(); + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an + // alloca because the value needs to be added to the global context list. + unsigned Align = 4; // FIXME: Should be a TLI check? + AllocaInst *FunctionContext = + new AllocaInst(FunctionContextTy, 0, Align, + "fcn_context", F.begin()->begin()); + + Value *Idxs[2]; + const Type *Int32Ty = Type::getInt32Ty(F.getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + // We need to also keep around a reference to the call_site field + Idxs[0] = Zero; + Idxs[1] = ConstantInt::get(Int32Ty, 1); + CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "call_site", + EntryBB->getTerminator()); + + // The exception selector comes back in context->data[1] + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "fc_data", + EntryBB->getTerminator()); + Idxs[1] = ConstantInt::get(Int32Ty, 1); + Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + "exc_selector_gep", + EntryBB->getTerminator()); + // The exception value comes back in context->data[0] + Idxs[1] = Zero; + Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2, + "exception_gep", + EntryBB->getTerminator()); + + // The result of the eh.selector call will be replaced with a + // a reference to the selector value returned in the function + // context. We leave the selector itself so the EH analysis later + // can use it. + for (int i = 0, e = EH_Selectors.size(); i < e; ++i) { + CallInst *I = EH_Selectors[i]; + Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I); + I->replaceAllUsesWith(SelectorVal); + } + // eh.exception calls are replaced with references to the proper + // location in the context. Unlike eh.selector, the eh.exception + // calls are removed entirely. + for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) { + CallInst *I = EH_Exceptions[i]; + // Possible for there to be duplicates, so check to make sure + // the instruction hasn't already been removed. + if (!I->getParent()) continue; + Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); + const Type *Ty = Type::getInt8PtrTy(F.getContext()); + Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); + + I->replaceAllUsesWith(Val); + I->eraseFromParent(); + } + + + + + // The entry block changes to have the eh.sjlj.setjmp, with a conditional + // branch to a dispatch block for non-zero returns. If we return normally, + // we're not handling an exception and just register the function context + // and continue. + + // Create the dispatch block. The dispatch block is basically a big switch + // statement that goes to all of the invoke landing pads. + BasicBlock *DispatchBlock = + BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); + + // Insert a load in the Catch block, and a switch on its value. By default, + // we go to a block that just does an unwind (which is the correct action + // for a standard call). + BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwindbb", &F); + Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); + + Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, + DispatchBlock); + SwitchInst *DispatchSwitch = + SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), DispatchBlock); + // Split the entry block to insert the conditional branch for the setjmp. + BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), + "eh.sjlj.setjmp.cont"); + + // Populate the Function Context + // 1. LSDA address + // 2. Personality function address + // 3. jmpbuf (save FP and call eh.sjlj.setjmp) + + // LSDA address + Idxs[0] = Zero; + Idxs[1] = ConstantInt::get(Int32Ty, 4); + Value *LSDAFieldPtr = + GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "lsda_gep", + EntryBB->getTerminator()); + Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", + EntryBB->getTerminator()); + new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); + + Idxs[1] = ConstantInt::get(Int32Ty, 3); + Value *PersonalityFieldPtr = + GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "lsda_gep", + EntryBB->getTerminator()); + new StoreInst(PersonalityFn, PersonalityFieldPtr, true, + EntryBB->getTerminator()); + + // Save the frame pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 5); + Value *FieldPtr + = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, + "jbuf_gep", + EntryBB->getTerminator()); + Idxs[1] = ConstantInt::get(Int32Ty, 0); + Value *ElemPtr = + GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep", + EntryBB->getTerminator()); + + Value *Val = CallInst::Create(FrameAddrFn, + ConstantInt::get(Int32Ty, 0), + "fp", + EntryBB->getTerminator()); + new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator()); + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf + Value *SetjmpArg = + CastInst::Create(Instruction::BitCast, FieldPtr, + Type::getInt8PtrTy(F.getContext()), "", + EntryBB->getTerminator()); + Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, + "dispatch", + EntryBB->getTerminator()); + // check the return value of the setjmp. non-zero goes to dispatcher + Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), + ICmpInst::ICMP_EQ, DispatchVal, Zero, + "notunwind"); + // Nuke the uncond branch. + EntryBB->getTerminator()->eraseFromParent(); + + // Put in a new condbranch in its place. + BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB); + + // Register the function context and make sure it's known to not throw + CallInst *Register = + CallInst::Create(RegisterFn, FunctionContext, "", + ContBlock->getTerminator()); + Register->setDoesNotThrow(); + + // At this point, we are all set up, update the invoke instructions + // to mark their call_site values, and fill in the dispatch switch + // accordingly. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) + markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); + + // The front end has likely added calls to _Unwind_Resume. We need + // to find those calls and mark the call_site as -1 immediately prior. + // resume is a noreturn function, so any block that has a call to it + // should end in an 'unreachable' instruction with the call immediately + // prior. That's how we'll search. + // ??? There's got to be a better way. this is fugly. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if ((dyn_cast(BB->getTerminator()))) { + BasicBlock::iterator I = BB->getTerminator(); + // Check the previous instruction and see if it's a resume call + if (I == BB->begin()) continue; + if (CallInst *CI = dyn_cast(--I)) { + if (CI->getCalledFunction() == ResumeFn) { + Value *NegativeOne = Constant::getAllOnesValue(Int32Ty); + new StoreInst(NegativeOne, CallSite, true, I); // volatile + } + } + } + + // Replace all unwinds with a branch to the unwind handler. + // ??? Should this ever happen with sjlj exceptions? + for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { + BranchInst::Create(UnwindBlock, Unwinds[i]); + Unwinds[i]->eraseFromParent(); + } + + // Finally, for any returns from this function, if this function contains an + // invoke, add a call to unregister the function context. + for (unsigned i = 0, e = Returns.size(); i != e; ++i) + CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]); + } + + return true; +} + +bool SjLjEHPass::runOnFunction(Function &F) { + bool Res = insertSjLjEHSupport(F); + return Res; +} diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 405cd8087ac58..0277d64cdd960 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -13,12 +13,13 @@ #include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -50,13 +51,13 @@ protected: /// Ensures there is space before the given machine instruction, returns the /// instruction's new number. - unsigned makeSpaceBefore(MachineInstr *mi) { + LiveIndex makeSpaceBefore(MachineInstr *mi) { if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { lis->scaleNumbering(2); ls->scaleNumbering(2); } - unsigned miIdx = lis->getInstructionIndex(mi); + LiveIndex miIdx = lis->getInstructionIndex(mi); assert(lis->hasGapBeforeInstr(miIdx)); @@ -65,13 +66,13 @@ protected: /// Ensure there is space after the given machine instruction, returns the /// instruction's new number. - unsigned makeSpaceAfter(MachineInstr *mi) { + LiveIndex makeSpaceAfter(MachineInstr *mi) { if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { lis->scaleNumbering(2); ls->scaleNumbering(2); } - unsigned miIdx = lis->getInstructionIndex(mi); + LiveIndex miIdx = lis->getInstructionIndex(mi); assert(lis->hasGapAfterInstr(miIdx)); @@ -82,19 +83,19 @@ protected: /// after the given instruction. Returns the base index of the inserted /// instruction. The caller is responsible for adding an appropriate /// LiveInterval to the LiveIntervals analysis. - unsigned insertStoreAfter(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { + LiveIndex insertStoreAfter(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { MachineBasicBlock::iterator nextInstItr(next(mi)); - unsigned miIdx = makeSpaceAfter(mi); + LiveIndex miIdx = makeSpaceAfter(mi); tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg, true, ss, trc); MachineBasicBlock::iterator storeInstItr(next(mi)); MachineInstr *storeInst = &*storeInstItr; - unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM; + LiveIndex storeInstIdx = lis->getNextIndex(miIdx); assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && "Store inst index already in use."); @@ -107,15 +108,15 @@ protected: /// Insert a store of the given vreg to the given stack slot immediately /// before the given instructnion. Returns the base index of the inserted /// Instruction. - unsigned insertStoreBefore(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - unsigned miIdx = makeSpaceBefore(mi); + LiveIndex insertStoreBefore(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { + LiveIndex miIdx = makeSpaceBefore(mi); tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc); MachineBasicBlock::iterator storeInstItr(prior(mi)); MachineInstr *storeInst = &*storeInstItr; - unsigned storeInstIdx = miIdx - LiveInterval::InstrSlots::NUM; + LiveIndex storeInstIdx = lis->getPrevIndex(miIdx); assert(lis->getInstructionFromIndex(storeInstIdx) == 0 && "Store inst index already in use."); @@ -130,14 +131,15 @@ protected: unsigned vreg, const TargetRegisterClass *trc) { - unsigned storeInstIdx = insertStoreAfter(mi, ss, vreg, trc); - unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)), - end = lis->getUseIndex(storeInstIdx); + LiveIndex storeInstIdx = insertStoreAfter(mi, ss, vreg, trc); + LiveIndex start = lis->getDefIndex(lis->getInstructionIndex(mi)), + end = lis->getUseIndex(storeInstIdx); VNInfo *vni = li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator()); - vni->kills.push_back(storeInstIdx); - DOUT << " Inserting store range: [" << start << ", " << end << ")\n"; + vni->addKill(storeInstIdx); + DEBUG(errs() << " Inserting store range: [" << start + << ", " << end << ")\n"); LiveRange lr(start, end, vni); li->addRange(lr); @@ -147,18 +149,18 @@ protected: /// after the given instruction. Returns the base index of the inserted /// instruction. The caller is responsibel for adding/removing an appropriate /// range vreg's LiveInterval. - unsigned insertLoadAfter(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { + LiveIndex insertLoadAfter(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { MachineBasicBlock::iterator nextInstItr(next(mi)); - unsigned miIdx = makeSpaceAfter(mi); + LiveIndex miIdx = makeSpaceAfter(mi); tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(next(mi)); MachineInstr *loadInst = &*loadInstItr; - unsigned loadInstIdx = miIdx + LiveInterval::InstrSlots::NUM; + LiveIndex loadInstIdx = lis->getNextIndex(miIdx); assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && "Store inst index already in use."); @@ -172,15 +174,15 @@ protected: /// before the given instruction. Returns the base index of the inserted /// instruction. The caller is responsible for adding an appropriate /// LiveInterval to the LiveIntervals analysis. - unsigned insertLoadBefore(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - unsigned miIdx = makeSpaceBefore(mi); + LiveIndex insertLoadBefore(MachineInstr *mi, unsigned ss, + unsigned vreg, + const TargetRegisterClass *trc) { + LiveIndex miIdx = makeSpaceBefore(mi); tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc); MachineBasicBlock::iterator loadInstItr(prior(mi)); MachineInstr *loadInst = &*loadInstItr; - unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM; + LiveIndex loadInstIdx = lis->getPrevIndex(miIdx); assert(lis->getInstructionFromIndex(loadInstIdx) == 0 && "Load inst index already in use."); @@ -195,14 +197,15 @@ protected: unsigned vreg, const TargetRegisterClass *trc) { - unsigned loadInstIdx = insertLoadBefore(mi, ss, vreg, trc); - unsigned start = lis->getDefIndex(loadInstIdx), - end = lis->getUseIndex(lis->getInstructionIndex(mi)); + LiveIndex loadInstIdx = insertLoadBefore(mi, ss, vreg, trc); + LiveIndex start = lis->getDefIndex(loadInstIdx), + end = lis->getUseIndex(lis->getInstructionIndex(mi)); VNInfo *vni = li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator()); - vni->kills.push_back(lis->getInstructionIndex(mi)); - DOUT << " Intserting load range: [" << start << ", " << end << ")\n"; + vni->addKill(lis->getInstructionIndex(mi)); + DEBUG(errs() << " Intserting load range: [" << start + << ", " << end << ")\n"); LiveRange lr(start, end, vni); li->addRange(lr); @@ -214,7 +217,7 @@ protected: /// immediately before each use, and stores after each def. No folding is /// attempted. std::vector trivialSpillEverywhere(LiveInterval *li) { - DOUT << "Spilling everywhere " << *li << "\n"; + DEBUG(errs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && "Attempting to spill already spilled value."); @@ -222,7 +225,7 @@ protected: assert(!li->isStackSlot() && "Trying to spill a stack slot."); - DOUT << "Trivial spill everywhere of reg" << li->reg << "\n"; + DEBUG(errs() << "Trivial spill everywhere of reg" << li->reg << "\n"); std::vector added; @@ -234,7 +237,7 @@ protected: MachineInstr *mi = &*regItr; - DOUT << " Processing " << *mi; + DEBUG(errs() << " Processing " << *mi); do { ++regItr; @@ -318,23 +321,21 @@ public: vrm->assignVirt2StackSlot(li->reg, ss); MachineInstr *mi = 0; - unsigned storeIdx = 0; + LiveIndex storeIdx = LiveIndex(); if (valno->isDefAccurate()) { // If we have an accurate def we can just grab an iterator to the instr // after the def. mi = lis->getInstructionFromIndex(valno->def); - storeIdx = insertStoreAfter(mi, ss, li->reg, trc) + - LiveInterval::InstrSlots::DEF; + storeIdx = lis->getDefIndex(insertStoreAfter(mi, ss, li->reg, trc)); } else { // if we get here we have a PHI def. mi = &lis->getMBBFromIndex(valno->def)->front(); - storeIdx = insertStoreBefore(mi, ss, li->reg, trc) + - LiveInterval::InstrSlots::DEF; + storeIdx = lis->getDefIndex(insertStoreBefore(mi, ss, li->reg, trc)); } MachineBasicBlock *defBlock = mi->getParent(); - unsigned loadIdx = 0; + LiveIndex loadIdx = LiveIndex(); // Now we need to find the load... MachineBasicBlock::iterator useItr(mi); @@ -342,13 +343,11 @@ public: if (useItr != defBlock->end()) { MachineInstr *loadInst = useItr; - loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc) + - LiveInterval::InstrSlots::USE; + loadIdx = lis->getUseIndex(insertLoadBefore(loadInst, ss, li->reg, trc)); } else { MachineInstr *loadInst = &defBlock->back(); - loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc) + - LiveInterval::InstrSlots::USE; + loadIdx = lis->getUseIndex(insertLoadAfter(loadInst, ss, li->reg, trc)); } li->removeRange(storeIdx, loadIdx, true); diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index c179f1e3df97c..350bc6e1ade7d 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -148,7 +148,8 @@ bool StackProtector::InsertStackProtectors() { // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) // - PointerType *PtrTy = PointerType::getUnqual(Type::Int8Ty); + PointerType *PtrTy = PointerType::getUnqual( + Type::getInt8Ty(RI->getContext())); StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); BasicBlock &Entry = F->getEntryBlock(); @@ -201,7 +202,7 @@ bool StackProtector::InsertStackProtectors() { // Generate the stack protector instructions in the old basic block. LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); LoadInst *LI2 = new LoadInst(AI, "", true, BB); - ICmpInst *Cmp = new ICmpInst(CmpInst::ICMP_EQ, LI1, LI2, "", BB); + ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); BranchInst::Create(NewBB, FailBB, Cmp, BB); } @@ -215,10 +216,12 @@ bool StackProtector::InsertStackProtectors() { /// CreateFailBB - Create a basic block to jump to when the stack protector /// check fails. BasicBlock *StackProtector::CreateFailBB() { - BasicBlock *FailBB = BasicBlock::Create("CallStackCheckFailBlk", F); + BasicBlock *FailBB = BasicBlock::Create(F->getContext(), + "CallStackCheckFailBlk", F); Constant *StackChkFail = - M->getOrInsertFunction("__stack_chk_fail", Type::VoidTy, NULL); + M->getOrInsertFunction("__stack_chk_fail", + Type::getVoidTy(F->getContext()), NULL); CallInst::Create(StackChkFail, "", FailBB); - new UnreachableInst(FailBB); + new UnreachableInst(F->getContext(), FailBB); return FailBB; } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 582464478cfc1..fad0808c8931c 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" @@ -97,6 +98,7 @@ namespace { MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -197,7 +199,7 @@ void StackSlotColoring::InitializeSlots() { Assignments.resize(LastFI); // Gather all spill slots into a list. - DOUT << "Spill slot intervals:\n"; + DEBUG(errs() << "Spill slot intervals:\n"); for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) { LiveInterval &li = i->second; DEBUG(li.dump()); @@ -209,7 +211,7 @@ void StackSlotColoring::InitializeSlots() { OrigSizes[FI] = MFI->getObjectSize(FI); AllColors.set(FI); } - DOUT << '\n'; + DEBUG(errs() << '\n'); // Sort them by weight. std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); @@ -241,7 +243,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, return false; bool Changed = false; - DOUT << "Assigning unused registers to spill slots:\n"; + DEBUG(errs() << "Assigning unused registers to spill slots:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = li->getStackSlotIndex(); @@ -271,7 +273,8 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, AllColored = false; continue; } else { - DOUT << "Assigning fi#" << RSS << " to " << TRI->getName(Reg) << '\n'; + DEBUG(errs() << "Assigning fi#" << RSS << " to " + << TRI->getName(Reg) << '\n'); ColoredRegs.push_back(Reg); SlotMapping[RSS] = Reg; SlotIsReg.set(RSS); @@ -298,7 +301,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, ++NumEliminated; } } - DOUT << '\n'; + DEBUG(errs() << '\n'); return Changed; } @@ -333,7 +336,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Record the assignment. Assignments[Color].push_back(li); int FI = li->getStackSlotIndex(); - DOUT << "Assigning fi#" << FI << " to fi#" << Color << "\n"; + DEBUG(errs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); // Change size and alignment of the allocated slot. If there are multiple // objects sharing the same slot, then make sure the size and alignment @@ -357,7 +360,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { BitVector SlotIsReg(NumObjs); BitVector UsedColors(NumObjs); - DOUT << "Color spill slot intervals:\n"; + DEBUG(errs() << "Color spill slot intervals:\n"); bool Changed = false; for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; @@ -371,7 +374,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { Changed |= (SS != NewSS); } - DOUT << "\nSpill slots after coloring:\n"; + DEBUG(errs() << "\nSpill slots after coloring:\n"); for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { LiveInterval *li = SSIntervals[i]; int SS = li->getStackSlotIndex(); @@ -383,7 +386,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { #ifndef NDEBUG for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) DEBUG(SSIntervals[i]->dump()); - DOUT << '\n'; + DEBUG(errs() << '\n'); #endif // Can we "color" a stack slot with a unused register? @@ -415,7 +418,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { // Delete unused stack slots. while (NextColor != -1) { - DOUT << "Removing unused stack object fi#" << NextColor << "\n"; + DEBUG(errs() << "Removing unused stack object fi#" << NextColor << "\n"); MFI->RemoveStackObject(NextColor); NextColor = AllColors.find_next(NextColor); } @@ -449,6 +452,7 @@ bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { /// to old frame index with new one. void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, MachineFunction &MF) { + // Update the operands. for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isFI()) @@ -459,22 +463,15 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, MO.setIndex(NewFI); } - // Update the MachineMemOperand for the new memory location. - // FIXME: We need a better method of managing these too. - SmallVector MMOs(MI->memoperands_begin(), - MI->memoperands_end()); - MI->clearMemOperands(MF); + // Update the memory references. This changes the MachineMemOperands + // directly. They may be in use by multiple instructions, however all + // instructions using OldFI are being rewritten to use NewFI. const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); - for (unsigned i = 0, ee = MMOs.size(); i != ee; ++i) { - if (MMOs[i].getValue() != OldSV) - MI->addMemOperand(MF, MMOs[i]); - else { - MachineMemOperand MMO(PseudoSourceValue::getFixedStack(NewFI), - MMOs[i].getFlags(), MMOs[i].getOffset(), - MMOs[i].getSize(), MMOs[i].getAlignment()); - MI->addMemOperand(MF, MMO); - } - } + const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) + if ((*I)->getValue() == OldSV) + (*I)->setValue(NewSV); } /// PropagateBackward - Traverse backward and look for the definition of @@ -503,7 +500,16 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, if (Reg == OldReg) { if (MO.isImplicit()) return false; - const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i); + + // Abort the use is actually a sub-register def. We don't have enough + // information to figure out if it is really legal. + if (MO.getSubReg() || + TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG || + TID.getOpcode() == TargetInstrInfo::INSERT_SUBREG || + TID.getOpcode() == TargetInstrInfo::SUBREG_TO_REG) + return false; + + const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); if (RC && !RC->contains(NewReg)) return false; @@ -547,7 +553,6 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, SmallVector Uses; while (++MII != MBB->end()) { - bool FoundUse = false; bool FoundKill = false; const TargetInstrDesc &TID = MII->getDesc(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { @@ -561,12 +566,18 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, if (MO.isDef() || MO.isImplicit()) return false; - const TargetRegisterClass *RC = getInstrOperandRegClass(TRI, TID, i); + // Abort the use is actually a sub-register use. We don't have enough + // information to figure out if it is really legal. + if (MO.getSubReg() || + TID.getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) + return false; + + const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); if (RC && !RC->contains(NewReg)) return false; - FoundUse = true; if (MO.isKill()) FoundKill = true; + Uses.push_back(&MO); } else if (TRI->regsOverlap(Reg, NewReg) || TRI->regsOverlap(Reg, OldReg)) @@ -593,7 +604,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, MachineBasicBlock *MBB = MI->getParent(); if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { if (PropagateForward(MI, MBB, DstReg, Reg)) { - DOUT << "Eliminated load: "; + DEBUG(errs() << "Eliminated load: "); DEBUG(MI->dump()); ++NumLoadElim; } else { @@ -609,7 +620,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, } } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { - DOUT << "Eliminated store: "; + DEBUG(errs() << "Eliminated store: "); DEBUG(MI->dump()); ++NumStoreElim; } else { @@ -687,7 +698,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { - DOUT << "********** Stack Slot Coloring **********\n"; + DEBUG(errs() << "********** Stack Slot Coloring **********\n"); MFI = MF.getFrameInfo(); MRI = &MF.getRegInfo(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index ca9952863b7c5..48d6dc1db4c63 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -71,6 +71,7 @@ namespace { bool runOnMachineFunction(MachineFunction &Fn); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); @@ -294,7 +295,7 @@ StrongPHIElimination::computeDomForest( static bool isLiveIn(unsigned r, MachineBasicBlock* MBB, LiveIntervals& LI) { LiveInterval& I = LI.getOrCreateInterval(r); - unsigned idx = LI.getMBBStartIdx(MBB); + LiveIndex idx = LI.getMBBStartIdx(MBB); return I.liveAt(idx); } @@ -427,7 +428,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { } LiveInterval& PI = LI.getOrCreateInterval(DestReg); - unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P)); + LiveIndex pIdx = LI.getDefIndex(LI.getInstructionIndex(P)); VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno; PhiValueNumber.insert(std::make_pair(DestReg, PVN->id)); @@ -553,8 +554,8 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { // Add the renaming set for this PHI node to our overall renaming information for (std::map::iterator QI = PHIUnion.begin(), QE = PHIUnion.end(); QI != QE; ++QI) { - DOUT << "Adding Renaming: " << QI->first << " -> " - << P->getOperand(0).getReg() << "\n"; + DEBUG(errs() << "Adding Renaming: " << QI->first << " -> " + << P->getOperand(0).getReg() << "\n"); } RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion)); @@ -696,7 +697,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, TII->copyRegToReg(*PI->getParent(), PI, t, curr.second, RC, RC); - DOUT << "Inserted copy from " << curr.second << " to " << t << "\n"; + DEBUG(errs() << "Inserted copy from " << curr.second << " to " << t + << "\n"); // Push temporary on Stacks Stacks[curr.second].push_back(t); @@ -712,8 +714,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, map[curr.first], RC, RC); map[curr.first] = curr.second; - DOUT << "Inserted copy from " << curr.first << " to " - << curr.second << "\n"; + DEBUG(errs() << "Inserted copy from " << curr.first << " to " + << curr.second << "\n"); // Push this copy onto InsertedPHICopies so we can // update LiveIntervals with it. @@ -746,7 +748,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, LiveInterval& I = LI.getInterval(curr.second); MachineBasicBlock::iterator term = MBB->getFirstTerminator(); - unsigned endIdx = 0; + LiveIndex endIdx = LiveIndex(); if (term != MBB->end()) endIdx = LI.getInstructionIndex(term); else @@ -782,16 +784,15 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) { if (RegHandled.insert(I->first).second) { LiveInterval& Int = LI.getOrCreateInterval(I->first); - unsigned instrIdx = LI.getInstructionIndex(I->second); - if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx))) - Int.removeRange(LiveIntervals::getDefIndex(instrIdx), - LI.getMBBEndIdx(I->second->getParent())+1, + LiveIndex instrIdx = LI.getInstructionIndex(I->second); + if (Int.liveAt(LI.getDefIndex(instrIdx))) + Int.removeRange(LI.getDefIndex(instrIdx), + LI.getNextSlot(LI.getMBBEndIdx(I->second->getParent())), true); LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second); - R.valno->copy = I->second; - R.valno->def = - LiveIntervals::getDefIndex(LI.getInstructionIndex(I->second)); + R.valno->setCopy(I->second); + R.valno->def = LI.getDefIndex(LI.getInstructionIndex(I->second)); } } } @@ -817,7 +818,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, // Remove the live range for the old vreg. LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg()); LiveInterval::iterator OldLR = OldInt.FindLiveRangeContaining( - LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); + LI.getUseIndex(LI.getInstructionIndex(I))); if (OldLR != OldInt.end()) OldInt.removeRange(*OldLR, true); @@ -829,11 +830,11 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, VNInfo* FirstVN = *Int.vni_begin(); FirstVN->setHasPHIKill(false); if (I->getOperand(i).isKill()) - FirstVN->kills.push_back( - LiveIntervals::getUseIndex(LI.getInstructionIndex(I))); + FirstVN->addKill( + LI.getUseIndex(LI.getInstructionIndex(I))); LiveRange LR (LI.getMBBStartIdx(I->getParent()), - LiveIntervals::getUseIndex(LI.getInstructionIndex(I))+1, + LI.getNextSlot(LI.getUseIndex(LI.getInstructionIndex(I))), FirstVN); Int.addRange(LR); @@ -868,8 +869,8 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary, for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { LiveRange R = *I; - unsigned Start = R.start; - unsigned End = R.end; + LiveIndex Start = R.start; + LiveIndex End = R.end; if (LHS.getLiveRangeContaining(Start)) return false; @@ -927,7 +928,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { unsigned reg = OI->first; ++OI; I->second.erase(reg); - DOUT << "Removing Renaming: " << reg << " -> " << I->first << "\n"; + DEBUG(errs() << "Removing Renaming: " << reg << " -> " << I->first + << "\n"); } } } @@ -944,7 +946,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { while (I->second.size()) { std::map::iterator SI = I->second.begin(); - DOUT << "Renaming: " << SI->first << " -> " << I->first << "\n"; + DEBUG(errs() << "Renaming: " << SI->first << " -> " << I->first << "\n"); if (SI->first != I->first) { if (mergeLiveIntervals(I->first, SI->first)) { @@ -965,19 +967,19 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { LI.computeNumbering(); LiveInterval& Int = LI.getOrCreateInterval(I->first); - unsigned instrIdx = + LiveIndex instrIdx = LI.getInstructionIndex(--SI->second->getFirstTerminator()); - if (Int.liveAt(LiveIntervals::getDefIndex(instrIdx))) - Int.removeRange(LiveIntervals::getDefIndex(instrIdx), - LI.getMBBEndIdx(SI->second)+1, true); + if (Int.liveAt(LI.getDefIndex(instrIdx))) + Int.removeRange(LI.getDefIndex(instrIdx), + LI.getNextSlot(LI.getMBBEndIdx(SI->second)), true); LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, --SI->second->getFirstTerminator()); - R.valno->copy = --SI->second->getFirstTerminator(); - R.valno->def = LiveIntervals::getDefIndex(instrIdx); + R.valno->setCopy(--SI->second->getFirstTerminator()); + R.valno->def = LI.getDefIndex(instrIdx); - DOUT << "Renaming failed: " << SI->first << " -> " - << I->first << "\n"; + DEBUG(errs() << "Renaming failed: " << SI->first << " -> " + << I->first << "\n"); } } @@ -1009,7 +1011,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { if (PI.containsOneValue()) { LI.removeInterval(DestReg); } else { - unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); + LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); PI.removeRange(*PI.getLiveRangeContaining(idx), true); } } else { @@ -1023,8 +1025,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { LiveInterval& InputI = LI.getInterval(reg); if (MBB != PInstr->getParent() && InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) && - InputI.expiredAt(LI.getInstructionIndex(PInstr) + - LiveInterval::InstrSlots::NUM)) + InputI.expiredAt(LI.getNextIndex(LI.getInstructionIndex(PInstr)))) InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), LI.getInstructionIndex(PInstr), true); @@ -1032,7 +1033,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { // If the PHI is not dead, then the valno defined by the PHI // now has an unknown def. - unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); + LiveIndex idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); const LiveRange* PLR = PI.getLiveRangeContaining(idx); PLR->valno->setIsPHIDef(true); LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index b7595990de74c..c646869e8a735 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -13,21 +13,35 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; // commuteInstruction - The default implementation of this method just exchanges -// operand 1 and 2. +// the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, bool NewMI) const { const TargetInstrDesc &TID = MI->getDesc(); bool HasDef = TID.getNumDefs(); - unsigned Idx1 = HasDef ? 1 : 0; - unsigned Idx2 = HasDef ? 2 : 1; + if (HasDef && !MI->getOperand(0).isReg()) + // No idea how to commute this instruction. Target should implement its own. + return 0; + unsigned Idx1, Idx2; + if (!findCommutedOpIndices(MI, Idx1, Idx2)) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Don't know how to commute: " << *MI; + llvm_report_error(Msg.str()); + } assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && "This only knows how to commute register operands so far"); @@ -70,26 +84,24 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, return MI; } -/// CommuteChangesDestination - Return true if commuting the specified -/// instruction will also changes the destination operand. Also return the -/// current operand index of the would be new destination register by -/// reference. This can happen when the commutable instruction is also a -/// two-address instruction. -bool TargetInstrInfoImpl::CommuteChangesDestination(MachineInstr *MI, - unsigned &OpIdx) const{ +/// findCommutedOpIndices - If specified MI is commutable, return the two +/// operand indices that would swap value. Return true if the instruction +/// is not in a form which this routine understands. +bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.getNumDefs()) + if (!TID.isCommutable()) return false; - assert(MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && - "This only knows how to commute register operands so far"); - if (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { - // Must be two address instruction! - assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) && - "Expecting a two-address instruction!"); - OpIdx = 2; - return true; - } - return false; + // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this + // is not true, then the target must implement this. + SrcOpIdx1 = TID.getNumDefs(); + SrcOpIdx2 = SrcOpIdx1 + 1; + if (!MI->getOperand(SrcOpIdx1).isReg() || + !MI->getOperand(SrcOpIdx2).isReg()) + // No idea. + return false; + return true; } @@ -122,9 +134,12 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, + unsigned SubIdx, const MachineInstr *Orig) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); + MachineOperand &MO = MI->getOperand(0); + MO.setReg(DestReg); + MO.setSubReg(SubIdx); MBB.insert(I, MI); } @@ -171,11 +186,11 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, "Folded a use to a non-load!"); const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FrameIndex) != -1); - MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FrameIndex), - Flags, - MFI.getObjectOffset(FrameIndex), - MFI.getObjectSize(FrameIndex), - MFI.getObjectAlignment(FrameIndex)); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex), + Flags, /*Offset=*/0, + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); NewMI->addMemOperand(MF, MMO); return NewMI; @@ -200,9 +215,93 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, if (!NewMI) return 0; // Copy the memoperands from the load to the folded instruction. - for (std::list::iterator I = LoadMI->memoperands_begin(), - E = LoadMI->memoperands_end(); I != E; ++I) - NewMI->addMemOperand(MF, *I); + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); return NewMI; } + +bool +TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * + MI, + AliasAnalysis * + AA) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetMachine &TM = MF.getTarget(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + + // A load from a fixed stack slot can be rematerialized. This may be + // redundant with subsequent checks, but it's target-independent, + // simple, and a common case. + int FrameIdx = 0; + if (TII.isLoadFromStackSlot(MI, FrameIdx) && + MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + return true; + + const TargetInstrDesc &TID = MI->getDesc(); + + // Avoid instructions obviously unsafe for remat. + if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() || + TID.mayStore()) + return false; + + // Avoid instructions which load from potentially varying memory. + if (TID.mayLoad() && !MI->isInvariantLoad(AA)) + return false; + + // If any of the registers accessed are non-constant, conservatively assume + // the instruction is not rematerializable. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Check for a well-behaved physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI.def_empty(Reg)) + return false; + BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0); + if (AllocatableRegs.test(Reg)) + return false; + // Check for a def among the register's aliases too. + for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (!MRI.def_empty(AliasReg)) + return false; + if (AllocatableRegs.test(AliasReg)) + return false; + } + } else { + // A physreg def. We can't remat it. + return false; + } + continue; + } + + // Only allow one virtual-register def, and that in the first operand. + if (MO.isDef() != (i == 0)) + return false; + + // For the def, it should be the only def of that register. + if (MO.isDef() && (next(MRI.def_begin(Reg)) != MRI.def_end() || + MRI.isLiveIn(Reg))) + return false; + + // Don't allow any virtual-register uses. Rematting an instruction with + // virtual register uses would length the live ranges of the uses, which + // is not necessarily a good idea, certainly not "trivial". + if (MO.isUse()) + return false; + } + + // Everything checked out. + return true; +} diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 3c404046f15e4..a5a0f5bdcc22b 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -62,6 +63,7 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; LiveVariables *LV; + AliasAnalysis *AA; // DistanceMap - Keep track the distance of a MI from the start of the // current basic block. @@ -106,13 +108,31 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegB, unsigned Dist); + typedef std::pair, MachineInstr*> NewKill; + bool canUpdateDeletedKills(SmallVector &Kills, + SmallVector &NewKills, + MachineBasicBlock *MBB, unsigned Dist); + bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned regB, unsigned regBIdx, unsigned Dist); + + bool TryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist); + void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &Processed); + public: static char ID; // Pass identification, replacement for typeid TwoAddressInstructionPass() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); AU.addPreserved(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); @@ -143,7 +163,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MachineBasicBlock::iterator OldPos) { // Check if it's safe to move this instruction. bool SeenStore = true; // Be conservative. - if (!MI->isSafeToMove(TII, SeenStore)) + if (!MI->isSafeToMove(TII, SeenStore, AA)) return false; unsigned DefReg = 0; @@ -556,15 +576,15 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned RegB, unsigned RegC, unsigned Dist) { MachineInstr *MI = mi; - DOUT << "2addr: COMMUTING : " << *MI; + DEBUG(errs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); if (NewMI == 0) { - DOUT << "2addr: COMMUTING FAILED!\n"; + DEBUG(errs() << "2addr: COMMUTING FAILED!\n"); return false; } - DOUT << "2addr: COMMUTED TO: " << *NewMI; + DEBUG(errs() << "2addr: COMMUTED TO: " << *NewMI); // If the instruction changed to commute it, update livevar. if (NewMI != MI) { if (LV) @@ -611,8 +631,8 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, unsigned RegB, unsigned Dist) { MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV); if (NewMI) { - DOUT << "2addr: CONVERTING 2-ADDR: " << *mi; - DOUT << "2addr: TO 3-ADDR: " << *NewMI; + DEBUG(errs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(errs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; if (NewMI->findRegisterUseOperand(RegB, false, TRI)) @@ -734,25 +754,174 @@ static bool isSafeToDelete(MachineInstr *MI, unsigned Reg, return true; } +/// canUpdateDeletedKills - Check if all the registers listed in Kills are +/// killed by instructions in MBB preceding the current instruction at +/// position Dist. If so, return true and record information about the +/// preceding kills in NewKills. +bool TwoAddressInstructionPass:: +canUpdateDeletedKills(SmallVector &Kills, + SmallVector &NewKills, + MachineBasicBlock *MBB, unsigned Dist) { + while (!Kills.empty()) { + unsigned Kill = Kills.back(); + Kills.pop_back(); + if (TargetRegisterInfo::isPhysicalRegister(Kill)) + return false; + + MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist); + if (!LastKill) + return false; + + bool isModRef = LastKill->modifiesRegister(Kill); + NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), + LastKill)); + } + return true; +} + +/// DeleteUnusedInstr - If an instruction with a tied register operand can +/// be safely deleted, just delete it. +bool +TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned regB, unsigned regBIdx, + unsigned Dist) { + // Check if the instruction has no side effects and if all its defs are dead. + SmallVector Kills; + if (!isSafeToDelete(mi, regB, TII, Kills)) + return false; + + // If this instruction kills some virtual registers, we need to + // update the kill information. If it's not possible to do so, + // then bail out. + SmallVector NewKills; + if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist)) + return false; + + if (LV) { + while (!NewKills.empty()) { + MachineInstr *NewKill = NewKills.back().second; + unsigned Kill = NewKills.back().first.first; + bool isDead = NewKills.back().first.second; + NewKills.pop_back(); + if (LV->removeVirtualRegisterKilled(Kill, mi)) { + if (isDead) + LV->addVirtualRegisterDead(Kill, NewKill); + else + LV->addVirtualRegisterKilled(Kill, NewKill); + } + } + + // If regB was marked as a kill, update its Kills list. + if (mi->getOperand(regBIdx).isKill()) + LV->removeVirtualRegisterKilled(regB, mi); + } + + mbbi->erase(mi); // Nuke the old inst. + mi = nmi; + return true; +} + +/// TryInstructionTransform - For the case where an instruction has a single +/// pair of tied register operands, attempt some transformations that may +/// either eliminate the tied operands or improve the opportunities for +/// coalescing away the register copy. Returns true if the tied operands +/// are eliminated altogether. +bool TwoAddressInstructionPass:: +TryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + MachineFunction::iterator &mbbi, + unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + const TargetInstrDesc &TID = mi->getDesc(); + unsigned regA = mi->getOperand(DstIdx).getReg(); + unsigned regB = mi->getOperand(SrcIdx).getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(regB) && + "cannot make instruction into two-address form"); + + // If regA is dead and the instruction can be deleted, just delete + // it so it doesn't clobber regB. + bool regBKilled = isKilled(*mi, regB, MRI, TII); + if (!regBKilled && mi->getOperand(DstIdx).isDead() && + DeleteUnusedInstr(mi, nmi, mbbi, regB, SrcIdx, Dist)) { + ++NumDeletes; + return true; // Done with this instruction. + } + + // Check if it is profitable to commute the operands. + unsigned SrcOp1, SrcOp2; + unsigned regC = 0; + unsigned regCIdx = ~0U; + bool TryCommute = false; + bool AggressiveCommute = false; + if (TID.isCommutable() && mi->getNumOperands() >= 3 && + TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { + if (SrcIdx == SrcOp1) + regCIdx = SrcOp2; + else if (SrcIdx == SrcOp2) + regCIdx = SrcOp1; + + if (regCIdx != ~0U) { + regC = mi->getOperand(regCIdx).getReg(); + if (!regBKilled && isKilled(*mi, regC, MRI, TII)) + // If C dies but B does not, swap the B and C operands. + // This makes the live ranges of A and C joinable. + TryCommute = true; + else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) { + TryCommute = true; + AggressiveCommute = true; + } + } + } + + // If it's profitable to commute, try to do so. + if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) { + ++NumCommuted; + if (AggressiveCommute) + ++NumAggrCommuted; + return false; + } + + if (TID.isConvertibleTo3Addr()) { + // This instruction is potentially convertible to a true + // three-address instruction. Check if it is profitable. + if (!regBKilled || isProfitableToConv3Addr(regA)) { + // Try to convert it. + if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { + ++NumConvertedTo3Addr; + return true; // Done with this instruction. + } + } + } + return false; +} + /// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DOUT << "Machine Function\n"; + DEBUG(errs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); LV = getAnalysisIfAvailable(); + AA = &getAnalysis(); bool MadeChange = false; - DOUT << "********** REWRITING TWO-ADDR INSTRS **********\n"; - DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); + DEBUG(errs() << "********** Function: " + << MF.getFunction()->getName() << '\n'); // ReMatRegs - Keep track of the registers whose def's are remat'ed. BitVector ReMatRegs; ReMatRegs.resize(MRI->getLastVirtReg()+1); + typedef DenseMap, 4> > + TiedOperandMap; + TiedOperandMap TiedOperands(4); + SmallPtrSet Processed; for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); mbbi != mbbe; ++mbbi) { @@ -771,175 +940,102 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { ProcessCopy(&*mi, &*mbbi, Processed); + // First scan through all the tied register uses in this instruction + // and record a list of pairs of tied operands for each register. unsigned NumOps = (mi->getOpcode() == TargetInstrInfo::INLINEASM) ? mi->getNumOperands() : TID.getNumOperands(); - for (unsigned si = 0; si < NumOps; ++si) { - unsigned ti = 0; - if (!mi->isRegTiedToDefOperand(si, &ti)) + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) continue; if (FirstTied) { + FirstTied = false; ++NumTwoAddressInstrs; - DOUT << '\t'; DEBUG(mi->print(*cerr.stream(), &TM)); + DEBUG(errs() << '\t' << *mi); } - FirstTied = false; + assert(mi->getOperand(SrcIdx).isReg() && + mi->getOperand(SrcIdx).getReg() && + mi->getOperand(SrcIdx).isUse() && + "two address instruction invalid"); - assert(mi->getOperand(si).isReg() && mi->getOperand(si).getReg() && - mi->getOperand(si).isUse() && "two address instruction invalid"); + unsigned regB = mi->getOperand(SrcIdx).getReg(); + TiedOperandMap::iterator OI = TiedOperands.find(regB); + if (OI == TiedOperands.end()) { + SmallVector, 4> TiedPair; + OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first; + } + OI->second.push_back(std::make_pair(SrcIdx, DstIdx)); + } - // If the two operands are the same we just remove the use - // and mark the def as def&use, otherwise we have to insert a copy. - if (mi->getOperand(ti).getReg() != mi->getOperand(si).getReg()) { - // Rewrite: - // a = b op c - // to: - // a = b - // a = a op c - unsigned regA = mi->getOperand(ti).getReg(); - unsigned regB = mi->getOperand(si).getReg(); + // Now iterate over the information collected above. + for (TiedOperandMap::iterator OI = TiedOperands.begin(), + OE = TiedOperands.end(); OI != OE; ++OI) { + SmallVector, 4> &TiedPairs = OI->second; + + // If the instruction has a single pair of tied operands, try some + // transformations that may either eliminate the tied operands or + // improve the opportunities for coalescing away the register copy. + if (TiedOperands.size() == 1 && TiedPairs.size() == 1) { + unsigned SrcIdx = TiedPairs[0].first; + unsigned DstIdx = TiedPairs[0].second; + + // If the registers are already equal, nothing needs to be done. + if (mi->getOperand(SrcIdx).getReg() == + mi->getOperand(DstIdx).getReg()) + break; // Done with this instruction. + + if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) + break; // The tied operands have been eliminated. + } + + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + unsigned regB = OI->first; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + unsigned regA = mi->getOperand(DstIdx).getReg(); + // Grab regB from the instruction because it may have changed if the + // instruction was commuted. + regB = mi->getOperand(SrcIdx).getReg(); + + if (regA == regB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = regA; assert(TargetRegisterInfo::isVirtualRegister(regB) && - "cannot update physical register live information"); + "cannot make instruction into two-address form"); #ifndef NDEBUG - // First, verify that we don't have a use of a in the instruction (a = - // b + a for example) because our transformation will not work. This - // should never occur because we are in SSA form. + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. for (unsigned i = 0; i != mi->getNumOperands(); ++i) - assert(i == ti || + assert(i == DstIdx || !mi->getOperand(i).isReg() || mi->getOperand(i).getReg() != regA); #endif - // If this instruction is not the killing user of B, see if we can - // rearrange the code to make it so. Making it the killing user will - // allow us to coalesce A and B together, eliminating the copy we are - // about to insert. - if (!isKilled(*mi, regB, MRI, TII)) { - // If regA is dead and the instruction can be deleted, just delete - // it so it doesn't clobber regB. - SmallVector Kills; - if (mi->getOperand(ti).isDead() && - isSafeToDelete(mi, regB, TII, Kills)) { - SmallVector - ,MachineInstr*>, 4> NewKills; - bool ReallySafe = true; - // If this instruction kills some virtual registers, we need - // update the kill information. If it's not possible to do so, - // then bail out. - while (!Kills.empty()) { - unsigned Kill = Kills.back(); - Kills.pop_back(); - if (TargetRegisterInfo::isPhysicalRegister(Kill)) { - ReallySafe = false; - break; - } - MachineInstr *LastKill = FindLastUseInMBB(Kill, &*mbbi, Dist); - if (LastKill) { - bool isModRef = LastKill->modifiesRegister(Kill); - NewKills.push_back(std::make_pair(std::make_pair(Kill,isModRef), - LastKill)); - } else { - ReallySafe = false; - break; - } - } - - if (ReallySafe) { - if (LV) { - while (!NewKills.empty()) { - MachineInstr *NewKill = NewKills.back().second; - unsigned Kill = NewKills.back().first.first; - bool isDead = NewKills.back().first.second; - NewKills.pop_back(); - if (LV->removeVirtualRegisterKilled(Kill, mi)) { - if (isDead) - LV->addVirtualRegisterDead(Kill, NewKill); - else - LV->addVirtualRegisterKilled(Kill, NewKill); - } - } - } - - // We're really going to nuke the old inst. If regB was marked - // as a kill we need to update its Kills list. - if (mi->getOperand(si).isKill()) - LV->removeVirtualRegisterKilled(regB, mi); - - mbbi->erase(mi); // Nuke the old inst. - mi = nmi; - ++NumDeletes; - break; // Done with this instruction. - } - } - - // If this instruction is commutative, check to see if C dies. If - // so, swap the B and C operands. This makes the live ranges of A - // and C joinable. - // FIXME: This code also works for A := B op C instructions. - if (TID.isCommutable() && mi->getNumOperands() >= 3) { - assert(mi->getOperand(3-si).isReg() && - "Not a proper commutative instruction!"); - unsigned regC = mi->getOperand(3-si).getReg(); - if (isKilled(*mi, regC, MRI, TII)) { - if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { - ++NumCommuted; - regB = regC; - goto InstructionRearranged; - } - } - } - - // If this instruction is potentially convertible to a true - // three-address instruction, - if (TID.isConvertibleTo3Addr()) { - // FIXME: This assumes there are no more operands which are tied - // to another register. -#ifndef NDEBUG - for (unsigned i = si + 1, e = TID.getNumOperands(); i < e; ++i) - assert(TID.getOperandConstraint(i, TOI::TIED_TO) == -1); -#endif - - if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { - ++NumConvertedTo3Addr; - break; // Done with this instruction. - } - } - } - - // If it's profitable to commute the instruction, do so. - if (TID.isCommutable() && mi->getNumOperands() >= 3) { - unsigned regC = mi->getOperand(3-si).getReg(); - if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) - if (CommuteInstruction(mi, mbbi, regB, regC, Dist)) { - ++NumAggrCommuted; - ++NumCommuted; - regB = regC; - goto InstructionRearranged; - } - } - - // If it's profitable to convert the 2-address instruction to a - // 3-address one, do so. - if (TID.isConvertibleTo3Addr() && isProfitableToConv3Addr(regA)) { - if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) { - ++NumConvertedTo3Addr; - break; // Done with this instruction. - } - } - - InstructionRearranged: - const TargetRegisterClass* rc = MRI->getRegClass(regB); + // Emit a copy or rematerialize the definition. + const TargetRegisterClass *rc = MRI->getRegClass(regB); MachineInstr *DefMI = MRI->getVRegDef(regB); // If it's safe and profitable, remat the definition instead of // copying it. if (DefMI && DefMI->getDesc().isAsCheapAsAMove() && - DefMI->isSafeToReMat(TII, regB) && + DefMI->isSafeToReMat(TII, regB, AA) && isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(cerr << "2addr: REMATTING : " << *DefMI << "\n"); - TII->reMaterialize(*mbbi, mi, regA, DefMI); + DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n"); + unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI); ReMatRegs.set(regB); ++NumReMats; } else { @@ -953,32 +1049,57 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DistanceMap.insert(std::make_pair(prevMI, Dist)); DistanceMap[mi] = ++Dist; - // Update live variables for regB. - if (LV) { - if (LV->removeVirtualRegisterKilled(regB, mi)) - LV->addVirtualRegisterKilled(regB, prevMI); + DEBUG(errs() << "\t\tprepend:\t" << *prevMI); - if (LV->removeVirtualRegisterDead(regB, mi)) - LV->addVirtualRegisterDead(regB, prevMI); + MachineOperand &MO = mi->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; } + MO.setReg(regA); + } - DOUT << "\t\tprepend:\t"; DEBUG(prevMI->print(*cerr.stream(), &TM)); - - // Replace all occurences of regB with regA. + if (AllUsesCopied) { + // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - if (mi->getOperand(i).isReg() && - mi->getOperand(i).getReg() == regB) - mi->getOperand(i).setReg(regA); + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } } - } - assert(mi->getOperand(ti).isDef() && mi->getOperand(si).isUse()); - mi->getOperand(ti).setReg(mi->getOperand(si).getReg()); + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) + LV->addVirtualRegisterKilled(regB, prior(mi)); + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } + MadeChange = true; - DOUT << "\t\trewrite to:\t"; DEBUG(mi->print(*cerr.stream(), &TM)); + DEBUG(errs() << "\t\trewrite to:\t" << *mi); } + // Clear TiedOperands here instead of at the top of the loop + // since most instructions do not have tied operands. + TiedOperands.clear(); mi = nmi; } } diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index c3b213cebe954..e7c34129268e5 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -26,8 +26,11 @@ #include "llvm/Function.h" #include "llvm/Pass.h" #include "llvm/Type.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" @@ -42,6 +45,10 @@ namespace { public: static char ID; // Pass identification, replacement for typeid UnreachableBlockElim() : FunctionPass(&ID) {} + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + } }; } char UnreachableBlockElim::ID = 0; @@ -77,8 +84,11 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { } // Actually remove the blocks now. - for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + ProfileInfo *PI = getAnalysisIfAvailable(); + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + if (PI) PI->removeBlock(DeadBlocks[i]); DeadBlocks[i]->eraseFromParent(); + } return DeadBlocks.size(); } @@ -88,6 +98,7 @@ namespace { class VISIBILITY_HIDDEN UnreachableMachineBlockElim : public MachineFunctionPass { virtual bool runOnMachineFunction(MachineFunction &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; MachineModuleInfo *MMI; public: static char ID; // Pass identification, replacement for typeid @@ -102,10 +113,18 @@ Y("unreachable-mbb-elimination", const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y; +void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { SmallPtrSet Reachable; MMI = getAnalysisIfAvailable(); + MachineDominatorTree *MDT = getAnalysisIfAvailable(); + MachineLoopInfo *MLI = getAnalysisIfAvailable(); // Mark all reachable blocks. for (df_ext_iterator > @@ -123,6 +142,10 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { if (!Reachable.count(BB)) { DeadBlocks.push_back(BB); + // Update dominator and loop info. + if (MLI) MLI->removeBlock(BB); + if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB); + while (BB->succ_begin() != BB->succ_end()) { MachineBasicBlock* succ = *BB->succ_begin(); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 4d3417fdff514..c78f35bdb136a 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -258,7 +259,7 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { return AnyUnused; } -void VirtRegMap::print(std::ostream &OS, const Module* M) const { +void VirtRegMap::print(raw_ostream &OS, const Module* M) const { const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); OS << "********** REGISTER MAP **********\n"; @@ -277,5 +278,5 @@ void VirtRegMap::print(std::ostream &OS, const Module* M) const { } void VirtRegMap::dump() const { - print(cerr); + print(errs()); } diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index fe767b7671e11..bdc2d1f712761 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -18,13 +18,13 @@ #define LLVM_CODEGEN_VIRTREGMAP_H #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Streams.h" #include namespace llvm { @@ -34,6 +34,7 @@ namespace llvm { class MachineRegisterInfo; class TargetInstrInfo; class TargetRegisterInfo; + class raw_ostream; class VirtRegMap : public MachineFunctionPass { public: @@ -79,7 +80,7 @@ namespace llvm { /// Virt2SplitKillMap - This is splitted virtual register to its last use /// (kill) index mapping. - IndexedMap Virt2SplitKillMap; + IndexedMap Virt2SplitKillMap; /// ReMatMap - This is virtual register to re-materialized instruction /// mapping. Each virtual register whose definition is going to be @@ -141,7 +142,7 @@ namespace llvm { VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG), Virt2StackSlotMap(NO_STACK_SLOT), Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), - Virt2SplitKillMap(0), ReMatMap(NULL), + Virt2SplitKillMap(LiveIndex()), ReMatMap(NULL), ReMatId(MAX_STACK_SLOT+1), LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -265,17 +266,17 @@ namespace llvm { } /// @brief record the last use (kill) of a split virtual register. - void addKillPoint(unsigned virtReg, unsigned index) { + void addKillPoint(unsigned virtReg, LiveIndex index) { Virt2SplitKillMap[virtReg] = index; } - unsigned getKillPoint(unsigned virtReg) const { + LiveIndex getKillPoint(unsigned virtReg) const { return Virt2SplitKillMap[virtReg]; } /// @brief remove the last use (kill) of a split virtual register. void removeKillPoint(unsigned virtReg) { - Virt2SplitKillMap[virtReg] = 0; + Virt2SplitKillMap[virtReg] = LiveIndex(); } /// @brief returns true if the specified MachineInstr is a spill point. @@ -481,16 +482,11 @@ namespace llvm { return 0; } - void print(std::ostream &OS, const Module* M = 0) const; - void print(std::ostream *OS) const { if (OS) print(*OS); } + void print(raw_ostream &OS, const Module* M = 0) const; void dump() const; }; - inline std::ostream *operator<<(std::ostream *OS, const VirtRegMap &VRM) { - VRM.print(OS); - return OS; - } - inline std::ostream &operator<<(std::ostream &OS, const VirtRegMap &VRM) { + inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) { VRM.print(OS); return OS; } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index be0b016b669c0..401bcb618e429 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -9,10 +9,19 @@ #define DEBUG_TYPE "virtregrewriter" #include "VirtRegRewriter.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include using namespace llvm; @@ -45,10 +54,15 @@ RewriterOpt("rewriter", clEnumValEnd), cl::init(local)); +static cl::opt +ScheduleSpills("schedule-spills", + cl::desc("Schedule spill code"), + cl::init(false)); + VirtRegRewriter::~VirtRegRewriter() {} +namespace { - /// This class is intended for use with the new spilling framework only. It /// rewrites vreg def/uses to use the assigned preg, but does not insert any /// spill code. @@ -56,8 +70,13 @@ struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter { bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, LiveIntervals* LIs) { - DOUT << "********** REWRITE MACHINE CODE **********\n"; - DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; + DEBUG(errs() << "********** REWRITE MACHINE CODE **********\n"); + DEBUG(errs() << "********** Function: " + << MF.getFunction()->getName() << '\n'); + DEBUG(errs() << "**** Machine Instrs" + << "(NOTE! Does not include spills and reloads!) ****\n"); + DEBUG(MF.dump()); + MachineRegisterInfo *mri = &MF.getRegInfo(); bool changed = false; @@ -79,14 +98,22 @@ struct VISIBILITY_HIDDEN TrivialRewriter : public VirtRegRewriter { } } } + + + DEBUG(errs() << "**** Post Machine Instrs ****\n"); + DEBUG(MF.dump()); return changed; } }; +} + // ************************************************************************ // +namespace { + /// AvailableSpills - As the local rewriter is scanning and rewriting an MBB /// from top down, keep track of which spill slots or remat are available in /// each register. @@ -154,10 +181,11 @@ public: (unsigned)CanClobber; if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DOUT << "Remembering RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Remembering RM#" + << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Remembering SS#" << SlotOrReMat; - DOUT << " in physreg " << TRI->getName(Reg) << "\n"; + DEBUG(errs() << "Remembering SS#" << SlotOrReMat); + DEBUG(errs() << " in physreg " << TRI->getName(Reg) << "\n"); } /// canClobberPhysRegForSS - Return true if the spiller is allowed to change @@ -209,8 +237,82 @@ public: std::vector &KillOps); }; +} + // ************************************************************************ // +// Given a location where a reload of a spilled register or a remat of +// a constant is to be inserted, attempt to find a safe location to +// insert the load at an earlier point in the basic-block, to hide +// latency of the load and to avoid address-generation interlock +// issues. +static MachineBasicBlock::iterator +ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc, + MachineBasicBlock::iterator const Begin, + unsigned PhysReg, + const TargetRegisterInfo *TRI, + bool DoReMat, + int SSorRMId, + const TargetInstrInfo *TII, + const MachineFunction &MF) +{ + if (!ScheduleSpills) + return InsertLoc; + + // Spill backscheduling is of primary interest to addresses, so + // don't do anything if the register isn't in the register class + // used for pointers. + + const TargetLowering *TL = MF.getTarget().getTargetLowering(); + + if (!TL->isTypeLegal(TL->getPointerTy())) + // Believe it or not, this is true on PIC16. + return InsertLoc; + + const TargetRegisterClass *ptrRegClass = + TL->getRegClassFor(TL->getPointerTy()); + if (!ptrRegClass->contains(PhysReg)) + return InsertLoc; + + // Scan upwards through the preceding instructions. If an instruction doesn't + // reference the stack slot or the register we're loading, we can + // backschedule the reload up past it. + MachineBasicBlock::iterator NewInsertLoc = InsertLoc; + while (NewInsertLoc != Begin) { + MachineBasicBlock::iterator Prev = prior(NewInsertLoc); + for (unsigned i = 0; i < Prev->getNumOperands(); ++i) { + MachineOperand &Op = Prev->getOperand(i); + if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId) + goto stop; + } + if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 || + Prev->findRegisterDefOperand(PhysReg)) + goto stop; + for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias) + if (Prev->findRegisterUseOperandIdx(*Alias) != -1 || + Prev->findRegisterDefOperand(*Alias)) + goto stop; + NewInsertLoc = Prev; + } +stop:; + + // If we made it to the beginning of the block, turn around and move back + // down just past any existing reloads. They're likely to be reloads/remats + // for instructions earlier than what our current reload/remat is for, so + // they should be scheduled earlier. + if (NewInsertLoc == Begin) { + int FrameIdx; + while (InsertLoc != NewInsertLoc && + (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) || + TII->isTriviallyReMaterializable(NewInsertLoc))) + ++NewInsertLoc; + } + + return NewInsertLoc; +} + +namespace { + // ReusedOp - For each reused operand, we keep track of a bit of information, // in case we need to rollback upon processing a new operand. See comments // below. @@ -276,7 +378,8 @@ public: /// GetRegForReload - We are about to emit a reload into PhysReg. If there /// is some other operand that is using the specified register, either pick /// a new register to use, or evict the previous reload and use this reg. - unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg, + MachineFunction &MF, MachineInstr *MI, AvailableSpills &Spills, std::vector &MaybeDeadStores, SmallSet &Rejected, @@ -295,18 +398,21 @@ public: /// sees r1 is taken by t2, tries t2's reload register r0 /// sees r0 is taken by t3, tries t3's reload register r1 /// sees r1 is taken by t2, tries t2's reload register r0 ... - unsigned GetRegForReload(unsigned PhysReg, MachineInstr *MI, + unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI, AvailableSpills &Spills, std::vector &MaybeDeadStores, BitVector &RegKills, std::vector &KillOps, VirtRegMap &VRM) { SmallSet Rejected; - return GetRegForReload(PhysReg, MI, Spills, MaybeDeadStores, Rejected, - RegKills, KillOps, VRM); + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); + return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); } }; +} // ****************** // // Utility Functions // @@ -489,7 +595,14 @@ static void ReMaterialize(MachineBasicBlock &MBB, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, VirtRegMap &VRM) { - TII->reMaterialize(MBB, MII, DestReg, VRM.getReMaterializedMI(Reg)); + MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); +#ifndef NDEBUG + const TargetInstrDesc &TID = ReMatDefMI->getDesc(); + assert(TID.getNumDefs() == 1 && + "Don't know how to remat instructions that define > 1 values!"); +#endif + TII->reMaterialize(MBB, MII, DestReg, + ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI); MachineInstr *NewMI = prior(MII); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -538,8 +651,8 @@ void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && "Bidirectional map mismatch!"); SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; - DOUT << "PhysReg " << TRI->getName(PhysReg) - << " copied, it is available for use but can no longer be modified\n"; + DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg) + << " copied, it is available for use but can no longer be modified\n"); } } @@ -563,12 +676,12 @@ void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && "Bidirectional map mismatch!"); SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); - DOUT << "PhysReg " << TRI->getName(PhysReg) - << " clobbered, invalidating "; + DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg) + << " clobbered, invalidating "); if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DOUT << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 << "\n"; + DEBUG(errs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); else - DOUT << "SS#" << SlotOrReMat << "\n"; + DEBUG(errs() << "SS#" << SlotOrReMat << "\n"); } } @@ -650,15 +763,17 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { /// GetRegForReload - We are about to emit a reload into PhysReg. If there /// is some other operand that is using the specified register, either pick /// a new register to use, or evict the previous reload and use this reg. -unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, - AvailableSpills &Spills, +unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, + unsigned PhysReg, + MachineFunction &MF, + MachineInstr *MI, AvailableSpills &Spills, std::vector &MaybeDeadStores, SmallSet &Rejected, BitVector &RegKills, std::vector &KillOps, VirtRegMap &VRM) { - const TargetInstrInfo* TII = MI->getParent()->getParent()->getTarget() - .getInstrInfo(); + const TargetInstrInfo* TII = MF.getTarget().getInstrInfo(); + const TargetRegisterInfo *TRI = Spills.getRegInfo(); if (Reuses.empty()) return PhysReg; // This is most often empty. @@ -670,19 +785,19 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, // considered and subsequently rejected because it has also been reused // by another operand. if (Op.PhysRegReused == PhysReg && - Rejected.count(Op.AssignedPhysReg) == 0) { + Rejected.count(Op.AssignedPhysReg) == 0 && + RC->contains(Op.AssignedPhysReg)) { // Yup, use the reload register that we didn't use before. unsigned NewReg = Op.AssignedPhysReg; Rejected.insert(PhysReg); - return GetRegForReload(NewReg, MI, Spills, MaybeDeadStores, Rejected, + return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected, RegKills, KillOps, VRM); } else { // Otherwise, we might also have a problem if a previously reused - // value aliases the new register. If so, codegen the previous reload + // value aliases the new register. If so, codegen the previous reload // and use this one. unsigned PRRU = Op.PhysRegReused; - const TargetRegisterInfo *TRI = Spills.getRegInfo(); - if (TRI->areAliases(PRRU, PhysReg)) { + if (TRI->regsOverlap(PRRU, PhysReg)) { // Okay, we found out that an alias of a reused register // was used. This isn't good because it means we have // to undo a previous reuse. @@ -695,21 +810,45 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, ReusedOp NewOp = Op; Reuses.erase(Reuses.begin()+ro); + // MI may be using only a sub-register of PhysRegUsed. + unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg(); + unsigned SubIdx = 0; + assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) && + "A reuse cannot be a virtual register"); + if (PRRU != RealPhysRegUsed) { + // What was the sub-register index? + unsigned SubReg; + for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++) + if (SubReg == RealPhysRegUsed) + break; + assert(SubReg == RealPhysRegUsed && + "Operand physreg is not a sub-register of PhysRegUsed"); + } + // Ok, we're going to try to reload the assigned physreg into the // slot that we were supposed to in the first place. However, that // register could hold a reuse. Check to see if it conflicts or // would prefer us to use a different register. - unsigned NewPhysReg = GetRegForReload(NewOp.AssignedPhysReg, - MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - - MachineBasicBlock::iterator MII = MI; - if (NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT) { - ReMaterialize(*MBB, MII, NewPhysReg, NewOp.VirtReg, TII, TRI,VRM); - } else { - TII->loadRegFromStackSlot(*MBB, MII, NewPhysReg, + unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg, + MF, MI, Spills, MaybeDeadStores, + Rejected, RegKills, KillOps, VRM); + + bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; + int SSorRMId = DoReMat + ? VRM.getReMatId(NewOp.VirtReg) : NewOp.StackSlotOrReMat; + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, + DoReMat, SSorRMId, TII, MF); + + if (DoReMat) { + ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII, + TRI, VRM); + } else { + TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg, NewOp.StackSlotOrReMat, AliasRC); - MachineInstr *LoadMI = prior(MII); + MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); // Any stores to this stack slot are not dead anymore. MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; @@ -718,17 +857,15 @@ unsigned ReuseInfo::GetRegForReload(unsigned PhysReg, MachineInstr *MI, Spills.ClobberPhysReg(NewPhysReg); Spills.ClobberPhysReg(NewOp.PhysRegReused); - unsigned SubIdx = MI->getOperand(NewOp.Operand).getSubReg(); unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg; MI->getOperand(NewOp.Operand).setReg(RReg); MI->getOperand(NewOp.Operand).setSubReg(0); Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); - --MII; - UpdateKills(*MII, TRI, RegKills, KillOps); - DOUT << '\t' << *MII; + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(errs() << '\t' << *prior(InsertLoc)); - DOUT << "Reuse undone!\n"; + DEBUG(errs() << "Reuse undone!\n"); --NumReused; // Finally, PhysReg is now available, go ahead and use it. @@ -856,6 +993,8 @@ namespace { // Local Spiller Implementation // // ***************************** // +namespace { + class VISIBILITY_HIDDEN LocalRewriter : public VirtRegRewriter { MachineRegisterInfo *RegInfo; const TargetRegisterInfo *TRI; @@ -870,10 +1009,10 @@ public: TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); AllocatableRegs = TRI->getAllocatableSet(MF); - DOUT << "\n**** Local spiller rewriting function '" - << MF.getFunction()->getName() << "':\n"; - DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)" - " ****\n"; + DEBUG(errs() << "\n**** Local spiller rewriting function '" + << MF.getFunction()->getName() << "':\n"); + DEBUG(errs() << "**** Machine Instrs (NOTE! Does not include spills and" + " reloads!) ****\n"); DEBUG(MF.dump()); // Spills - Keep track of which spilled values are available in physregs @@ -924,7 +1063,7 @@ public: Spills.clear(); } - DOUT << "**** Post Machine Instrs ****\n"; + DEBUG(errs() << "**** Post Machine Instrs ****\n"); DEBUG(MF.dump()); // Mark unused spill slots. @@ -988,6 +1127,9 @@ private: if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM)) return false; + // Back-schedule reloads and remats. + ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, false, SS, TII, MF); + // Load from SS to the spare physical register. TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC); // This invalidates Phys. @@ -999,7 +1141,7 @@ private: // Unfold current MI. SmallVector NewMIs; if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) - assert(0 && "Unable unfold the load / store folding instruction!"); + llvm_unreachable("Unable unfold the load / store folding instruction!"); assert(NewMIs.size() == 1); AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); VRM.transferRestorePts(&MI, NewMIs[0]); @@ -1015,7 +1157,7 @@ private: NextMII = next(NextMII); NewMIs.clear(); if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) - assert(0 && "Unable unfold the load / store folding instruction!"); + llvm_unreachable("Unable unfold the load / store folding instruction!"); assert(NewMIs.size() == 1); AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg); VRM.transferRestorePts(&NextMI, NewMIs[0]); @@ -1157,6 +1299,32 @@ private: return false; } + /// CommuteChangesDestination - We are looking for r0 = op r1, r2 and + /// where SrcReg is r1 and it is tied to r0. Return true if after + /// commuting this instruction it will be r0 = op r2, r1. + static bool CommuteChangesDestination(MachineInstr *DefMI, + const TargetInstrDesc &TID, + unsigned SrcReg, + const TargetInstrInfo *TII, + unsigned &DstIdx) { + if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3) + return false; + if (!DefMI->getOperand(1).isReg() || + DefMI->getOperand(1).getReg() != SrcReg) + return false; + unsigned DefIdx; + if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0) + return false; + unsigned SrcIdx1, SrcIdx2; + if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2)) + return false; + if (SrcIdx1 == 1 && SrcIdx2 == 2) { + DstIdx = 2; + return true; + } + return false; + } + /// CommuteToFoldReload - /// Look for /// r1 = load fi#1 @@ -1185,7 +1353,7 @@ private: unsigned NewDstIdx; if (DefMII != MBB.begin() && TID.isCommutable() && - TII->CommuteChangesDestination(DefMI, NewDstIdx)) { + CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) { MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) @@ -1266,11 +1434,11 @@ private: TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC); MachineInstr *StoreMI = next(MII); VRM.addSpillSlotUse(StackSlot, StoreMI); - DOUT << "Store:\t" << *StoreMI; + DEBUG(errs() << "Store:\t" << *StoreMI); // If there is a dead store to this stack slot, nuke it now. if (LastStore) { - DOUT << "Removed dead store:\t" << *LastStore; + DEBUG(errs() << "Removed dead store:\t" << *LastStore); ++NumDSE; SmallVector KillRegs; InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); @@ -1310,6 +1478,29 @@ private: ++NumStores; } + /// isSafeToDelete - Return true if this instruction doesn't produce any side + /// effect and all of its defs are dead. + static bool isSafeToDelete(MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() || + TID.isCall() || TID.isBarrier() || TID.isReturn() || + TID.hasUnmodeledSideEffects()) + return false; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && !MO.isDead()) + return false; + if (MO.isUse() && MO.isKill()) + // FIXME: We can't remove kill markers or else the scavenger will assert. + // An alternative is to add a ADD pseudo instruction to replace kill + // markers. + return false; + } + return true; + } + /// TransferDeadness - A identity copy definition is dead and it's being /// removed. Find the last def or use and mark it as dead / kill. void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, @@ -1351,9 +1542,7 @@ private: if (LastUD->isDef()) { // If the instruction has no side effect, delete it and propagate // backward further. Otherwise, mark is dead and we are done. - const TargetInstrDesc &TID = LastUDMI->getDesc(); - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || - TID.hasUnmodeledSideEffects()) { + if (!isSafeToDelete(*LastUDMI)) { LastUD->setIsDead(); break; } @@ -1375,8 +1564,8 @@ private: AvailableSpills &Spills, BitVector &RegKills, std::vector &KillOps) { - DOUT << "\n**** Local spiller rewriting MBB '" - << MBB.getBasicBlock()->getName() << "':\n"; + DEBUG(errs() << "\n**** Local spiller rewriting MBB '" + << MBB.getBasicBlock()->getName() << "':\n"); MachineFunction &MF = *MBB.getParent(); @@ -1425,15 +1614,23 @@ private: assert(RC && "Unable to determine register class!"); int SS = VRM.getEmergencySpillSlot(RC); if (UsedSS.count(SS)) - assert(0 && "Need to spill more than one physical registers!"); + llvm_unreachable("Need to spill more than one physical registers!"); UsedSS.insert(SS); TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC); MachineInstr *StoreMI = prior(MII); VRM.addSpillSlotUse(SS, StoreMI); - TII->loadRegFromStackSlot(MBB, next(MII), PhysReg, SS, RC); - MachineInstr *LoadMI = next(MII); + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(next(MII), MBB.begin(), PhysReg, TRI, false, + SS, TII, MF); + + TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC); + + MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(SS, LoadMI); ++NumPSpills; + DistanceMap.insert(std::make_pair(LoadMI, Dist++)); } NextMII = next(MII); } @@ -1467,28 +1664,36 @@ private: // If the value is already available in the expected register, save // a reload / remat. if (SSorRMId) - DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Reusing RM#" + << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Reusing SS#" << SSorRMId; - DOUT << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " - << TRI->getName(Phys) << "\n"; + DEBUG(errs() << "Reusing SS#" << SSorRMId); + DEBUG(errs() << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(Phys) << '\n'); ++NumOmitted; continue; } else if (InReg && InReg != Phys) { if (SSorRMId) - DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Reusing RM#" + << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Reusing SS#" << SSorRMId; - DOUT << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" by copying it into physreg " - << TRI->getName(Phys) << "\n"; + DEBUG(errs() << "Reusing SS#" << SSorRMId); + DEBUG(errs() << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" by copying it into physreg " + << TRI->getName(Phys) << '\n'); // If the reloaded / remat value is available in another register, // copy it to the desired register. - TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC); + + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat, + SSorRMId, TII, MF); + + TII->copyRegToReg(MBB, InsertLoc, Phys, InReg, RC, RC); // This invalidates Phys. Spills.ClobberPhysReg(Phys); @@ -1496,24 +1701,30 @@ private: Spills.addAvailable(SSorRMId, Phys); // Mark is killed. - MachineInstr *CopyMI = prior(MII); + MachineInstr *CopyMI = prior(InsertLoc); MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); - DOUT << '\t' << *CopyMI; + DEBUG(errs() << '\t' << *CopyMI); ++NumCopified; continue; } + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat, + SSorRMId, TII, MF); + if (VRM.isReMaterialized(VirtReg)) { - ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM); + ReMaterialize(MBB, InsertLoc, Phys, VirtReg, TII, TRI, VRM); } else { const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); - TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC); - MachineInstr *LoadMI = prior(MII); + TII->loadRegFromStackSlot(MBB, InsertLoc, Phys, SSorRMId, RC); + MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(SSorRMId, LoadMI); ++NumLoads; + DistanceMap.insert(std::make_pair(LoadMI, Dist++)); } // This invalidates Phys. @@ -1521,8 +1732,8 @@ private: // Remember it's available. Spills.addAvailable(SSorRMId, Phys); - UpdateKills(*prior(MII), TRI, RegKills, KillOps); - DOUT << '\t' << *prior(MII); + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(errs() << '\t' << *prior(MII)); } } @@ -1541,7 +1752,7 @@ private: TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC); MachineInstr *StoreMI = next(MII); VRM.addSpillSlotUse(StackSlot, StoreMI); - DOUT << "Store:\t" << *StoreMI; + DEBUG(errs() << "Store:\t" << *StoreMI); VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); } NextMII = next(MII); @@ -1660,13 +1871,14 @@ private: if (CanReuse) { // If this stack slot value is already available, reuse it! if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Reusing RM#" + << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Reusing SS#" << ReuseSlot; - DOUT << " from physreg " - << TRI->getName(PhysReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " - << TRI->getName(VRM.getPhys(VirtReg)) << "\n"; + DEBUG(errs() << "Reusing SS#" << ReuseSlot); + DEBUG(errs() << " from physreg " + << TRI->getName(PhysReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(VRM.getPhys(VirtReg)) << '\n'); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); @@ -1733,20 +1945,22 @@ private: // available. If this occurs, use the register indicated by the // reuser. if (ReusedOperands.hasReuses()) - DesignatedReg = ReusedOperands.GetRegForReload(DesignatedReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, VRM); + DesignatedReg = ReusedOperands.GetRegForReload(VirtReg, + DesignatedReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); // If the mapped designated register is actually the physreg we have // incoming, we don't need to inserted a dead copy. if (DesignatedReg == PhysReg) { // If this stack slot value is already available, reuse it! if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DOUT << "Reusing RM#" << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1; + DEBUG(errs() << "Reusing RM#" + << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); else - DOUT << "Reusing SS#" << ReuseSlot; - DOUT << " from physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg - << " instead of reloading into same physreg.\n"; + DEBUG(errs() << "Reusing SS#" << ReuseSlot); + DEBUG(errs() << " from physreg " << TRI->getName(PhysReg) + << " for vreg" << VirtReg + << " instead of reloading into same physreg.\n"); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); @@ -1758,9 +1972,15 @@ private: const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); RegInfo->setPhysRegUsed(DesignatedReg); ReusedOperands.markClobbered(DesignatedReg); - TII->copyRegToReg(MBB, &MI, DesignatedReg, PhysReg, RC, RC); - MachineInstr *CopyMI = prior(MII); + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(&MI, MBB.begin(), PhysReg, TRI, DoReMat, + SSorRMId, TII, MF); + + TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC); + + MachineInstr *CopyMI = prior(InsertLoc); UpdateKills(*CopyMI, TRI, RegKills, KillOps); // This invalidates DesignatedReg. @@ -1771,7 +1991,7 @@ private: SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); - DOUT << '\t' << *prior(MII); + DEBUG(errs() << '\t' << *prior(MII)); ++NumReused; continue; } // if (PhysReg) @@ -1785,22 +2005,28 @@ private: // available. If this occurs, use the register indicated by the // reuser. if (ReusedOperands.hasReuses()) - PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, VRM); + PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); RegInfo->setPhysRegUsed(PhysReg); ReusedOperands.markClobbered(PhysReg); if (AvoidReload) ++NumAvoided; else { + // Back-schedule reloads and remats. + MachineBasicBlock::iterator InsertLoc = + ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, DoReMat, + SSorRMId, TII, MF); + if (DoReMat) { - ReMaterialize(MBB, MII, PhysReg, VirtReg, TII, TRI, VRM); + ReMaterialize(MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, VRM); } else { const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); - TII->loadRegFromStackSlot(MBB, &MI, PhysReg, SSorRMId, RC); - MachineInstr *LoadMI = prior(MII); + TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SSorRMId, RC); + MachineInstr *LoadMI = prior(InsertLoc); VRM.addSpillSlotUse(SSorRMId, LoadMI); ++NumLoads; + DistanceMap.insert(std::make_pair(LoadMI, Dist++)); } // This invalidates PhysReg. Spills.ClobberPhysReg(PhysReg); @@ -1817,8 +2043,8 @@ private: KilledMIRegs.insert(VirtReg); } - UpdateKills(*prior(MII), TRI, RegKills, KillOps); - DOUT << '\t' << *prior(MII); + UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); + DEBUG(errs() << '\t' << *prior(InsertLoc)); } unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); @@ -1832,7 +2058,7 @@ private: int PDSSlot = PotentialDeadStoreSlots[j]; MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; if (DeadStore) { - DOUT << "Removed dead store:\t" << *DeadStore; + DEBUG(errs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(DeadStore); MBB.erase(DeadStore); @@ -1842,7 +2068,7 @@ private: } - DOUT << '\t' << MI; + DEBUG(errs() << '\t' << MI); // If we have folded references to memory operands, make sure we clear all @@ -1852,7 +2078,7 @@ private: for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) { unsigned VirtReg = I->second.first; VirtRegMap::ModRef MR = I->second.second; - DOUT << "Folded vreg: " << VirtReg << " MR: " << MR; + DEBUG(errs() << "Folded vreg: " << VirtReg << " MR: " << MR); // MI2VirtMap be can updated which invalidate the iterator. // Increment the iterator first. @@ -1861,7 +2087,7 @@ private: if (SS == VirtRegMap::NO_STACK_SLOT) continue; FoldedSS.insert(SS); - DOUT << " - StackSlot: " << SS << "\n"; + DEBUG(errs() << " - StackSlot: " << SS << "\n"); // If this folded instruction is just a use, check to see if it's a // straight load from the virt reg slot. @@ -1872,7 +2098,7 @@ private: // If this spill slot is available, turn it into a copy (or nothing) // instead of leaving it as a load! if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { - DOUT << "Promoted Load To Copy: " << MI; + DEBUG(errs() << "Promoted Load To Copy: " << MI); if (DestReg != InReg) { const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg); TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC); @@ -1895,7 +2121,7 @@ private: BackTracked = true; } else { - DOUT << "Removing now-noop copy: " << MI; + DEBUG(errs() << "Removing now-noop copy: " << MI); // Unset last kill since it's being reused. InvalidateKill(InReg, TRI, RegKills, KillOps); Spills.disallowClobberPhysReg(InReg); @@ -1965,7 +2191,7 @@ private: if (isDead) { // Previous store is dead. // If we get here, the store is dead, nuke it now. - DOUT << "Removed dead store:\t" << *DeadStore; + DEBUG(errs() << "Removed dead store:\t" << *DeadStore); InvalidateKills(*DeadStore, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(DeadStore); MBB.erase(DeadStore); @@ -2036,7 +2262,7 @@ private: if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && !MI.findRegisterUseOperand(Src)->isUndef()) { ++NumDCE; - DOUT << "Removing now-noop copy: " << MI; + DEBUG(errs() << "Removing now-noop copy: " << MI); SmallVector KillRegs; InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); if (MO.isDead() && !KillRegs.empty()) { @@ -2100,8 +2326,8 @@ private: if (ReusedOperands.isClobbered(PhysReg)) { // Another def has taken the assigned physreg. It must have been a // use&def which got it due to reuse. Undo the reuse! - PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, VRM); + PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); } } @@ -2124,7 +2350,7 @@ private: unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { ++NumDCE; - DOUT << "Removing now-noop copy: " << MI; + DEBUG(errs() << "Removing now-noop copy: " << MI); InvalidateKills(MI, TRI, RegKills, KillOps); VRM.RemoveMachineInstrFromMaps(&MI); MBB.erase(&MI); @@ -2136,7 +2362,15 @@ private: } } ProcessNextInst: - DistanceMap.insert(std::make_pair(&MI, Dist++)); + // Delete dead instructions without side effects. + if (!Erased && !BackTracked && isSafeToDelete(MI)) { + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + Erased = true; + } + if (!Erased) + DistanceMap.insert(std::make_pair(&MI, Dist++)); if (!Erased && !BackTracked) { for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) UpdateKills(*II, TRI, RegKills, KillOps); @@ -2148,9 +2382,11 @@ private: }; +} + llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { switch (RewriterOpt) { - default: assert(0 && "Unreachable!"); + default: llvm_unreachable("Unreachable!"); case local: return new LocalRewriter(); case trivial: diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h index f9d7fbbfa1d3c..44f9df659c818 100644 --- a/lib/CodeGen/VirtRegRewriter.h +++ b/lib/CodeGen/VirtRegRewriter.h @@ -10,27 +10,9 @@ #ifndef LLVM_CODEGEN_VIRTREGREWRITER_H #define LLVM_CODEGEN_VIRTREGREWRITER_H -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Streams.h" -#include "llvm/Function.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" #include "VirtRegMap.h" -#include namespace llvm { diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp index a3364e8a72f0b..d90c50d67d92e 100644 --- a/lib/CompilerDriver/BuiltinOptions.cpp +++ b/lib/CompilerDriver/BuiltinOptions.cpp @@ -25,6 +25,8 @@ cl::list InputFilenames(cl::Positional, cl::desc(""), cl::ZeroOrMore); cl::opt OutputFilename("o", cl::desc("Output file name"), cl::value_desc("file"), cl::Prefix); +cl::opt TempDirname("temp-dir", cl::desc("Temp dir name"), + cl::value_desc(""), cl::Prefix); cl::list Languages("x", cl::desc("Specify the language of the following input files"), cl::ZeroOrMore); diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp index f3039433b0314..bb0eb7bcf197f 100644 --- a/lib/CompilerDriver/CompilationGraph.cpp +++ b/lib/CompilerDriver/CompilationGraph.cpp @@ -514,13 +514,13 @@ namespace llvm { } void CompilationGraph::writeGraph(const std::string& OutputFilename) { - std::ofstream O(OutputFilename.c_str()); + std::string ErrorInfo; + raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo); - if (O.good()) { + if (ErrorInfo.empty()) { errs() << "Writing '"<< OutputFilename << "' file..."; llvm::WriteGraph(O, this); errs() << "done.\n"; - O.close(); } else { throw std::runtime_error("Error opening file '" + OutputFilename diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp index c9c0413028d8f..3e1fc9f124e6f 100644 --- a/lib/CompilerDriver/Main.cpp +++ b/lib/CompilerDriver/Main.cpp @@ -31,20 +31,29 @@ namespace { sys::Path getTempDir() { sys::Path tempDir; + // The --temp-dir option. + if (!TempDirname.empty()) { + tempDir = TempDirname; + } // GCC 4.5-style -save-temps handling. - if (SaveTemps == SaveTempsEnum::Unset) { + else if (SaveTemps == SaveTempsEnum::Unset) { tempDir = sys::Path::GetTemporaryDirectory(); + return tempDir; } else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) { tempDir = OutputFilename; + tempDir = tempDir.getDirname(); + } + else { + // SaveTemps == Cwd --> use current dir (leave tempDir empty). + return tempDir; + } - if (!tempDir.exists()) { - std::string ErrMsg; - if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) - throw std::runtime_error(ErrMsg); - } + if (!tempDir.exists()) { + std::string ErrMsg; + if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) + throw std::runtime_error(ErrMsg); } - // else if (SaveTemps == Cwd) -> use current dir (leave tempDir empty) return tempDir; } @@ -53,17 +62,18 @@ namespace { int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) { int ret; const sys::Path& tempDir = getTempDir(); + bool toDelete = (SaveTemps == SaveTempsEnum::Unset); try { ret = graph.Build(tempDir, langMap); } catch(...) { - if (SaveTemps == SaveTempsEnum::Unset) + if (toDelete) tempDir.eraseFromDisk(true); throw; } - if (SaveTemps == SaveTempsEnum::Unset) + if (toDelete) tempDir.eraseFromDisk(true); return ret; } diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp index cb3c7be39dd37..7310d120bff58 100644 --- a/lib/CompilerDriver/Plugin.cpp +++ b/lib/CompilerDriver/Plugin.cpp @@ -42,7 +42,7 @@ namespace { namespace llvmc { PluginLoader::PluginLoader() { - llvm::sys::SmartScopedLock Lock(&*PluginMutex); + llvm::sys::SmartScopedLock Lock(*PluginMutex); if (!pluginListInitialized) { for (PluginRegistry::iterator B = PluginRegistry::begin(), E = PluginRegistry::end(); B != E; ++B) @@ -53,7 +53,7 @@ namespace llvmc { } PluginLoader::~PluginLoader() { - llvm::sys::SmartScopedLock Lock(&*PluginMutex); + llvm::sys::SmartScopedLock Lock(*PluginMutex); if (pluginListInitialized) { for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); B != E; ++B) @@ -63,14 +63,14 @@ namespace llvmc { } void PluginLoader::PopulateLanguageMap(LanguageMap& langMap) { - llvm::sys::SmartScopedLock Lock(&*PluginMutex); + llvm::sys::SmartScopedLock Lock(*PluginMutex); for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); B != E; ++B) (*B)->PopulateLanguageMap(langMap); } void PluginLoader::PopulateCompilationGraph(CompilationGraph& graph) { - llvm::sys::SmartScopedLock Lock(&*PluginMutex); + llvm::sys::SmartScopedLock Lock(*PluginMutex); for (PluginList::iterator B = Plugins.begin(), E = Plugins.end(); B != E; ++B) (*B)->PopulateCompilationGraph(graph); diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp index 7953dd24934b7..5a32fd33c3eec 100644 --- a/lib/CompilerDriver/Tool.cpp +++ b/lib/CompilerDriver/Tool.cpp @@ -56,7 +56,7 @@ sys::Path Tool::OutFilename(const sys::Path& In, sys::Path Out; if (StopCompilation) { - if (!OutputFilename.empty() && SaveTemps != SaveTempsEnum::Obj ) { + if (!OutputFilename.empty()) { Out.set(OutputFilename); } else if (IsJoin()) { diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index a80513f3df9e2..053d96020d373 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -13,16 +13,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "jit" +#include "llvm/ExecutionEngine/ExecutionEngine.h" + #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/ModuleProvider.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Config/alloca.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MutexGuard.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/DynamicLibrary.h" #include "llvm/System/Host.h" #include "llvm/Target/TargetData.h" @@ -33,12 +36,19 @@ using namespace llvm; STATISTIC(NumInitBytes, "Number of bytes of global vars initialized"); STATISTIC(NumGlobals , "Number of global vars initialized"); -ExecutionEngine::EECtorFn ExecutionEngine::JITCtor = 0; -ExecutionEngine::EECtorFn ExecutionEngine::InterpCtor = 0; +ExecutionEngine *(*ExecutionEngine::JITCtor)(ModuleProvider *MP, + std::string *ErrorStr, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel, + bool GVsWithCode) = 0; +ExecutionEngine *(*ExecutionEngine::InterpCtor)(ModuleProvider *MP, + std::string *ErrorStr) = 0; ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0; -ExecutionEngine::ExecutionEngine(ModuleProvider *P) : LazyFunctionCreator(0) { +ExecutionEngine::ExecutionEngine(ModuleProvider *P) + : EEState(*this), + LazyFunctionCreator(0) { LazyCompilationDisabled = false; GVCompilationDisabled = false; SymbolSearchingDisabled = false; @@ -105,6 +115,22 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) { } +void *ExecutionEngineState::RemoveMapping( + const MutexGuard &, const GlobalValue *ToUnmap) { + std::map::iterator I = + GlobalAddressMap.find(getVH(ToUnmap)); + void *OldVal; + if (I == GlobalAddressMap.end()) + OldVal = 0; + else { + OldVal = I->second; + GlobalAddressMap.erase(I); + } + + GlobalAddressReverseMap.erase(OldVal); + return OldVal; +} + /// addGlobalMapping - Tell the execution engine that the specified global is /// at the specified location. This is used internally as functions are JIT'd /// and as global variables are laid out in memory. It can and should also be @@ -113,14 +139,16 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) { void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { MutexGuard locked(lock); - DOUT << "JIT: Map \'" << GV->getNameStart() << "\' to [" << Addr << "]\n"; - void *&CurVal = state.getGlobalAddressMap(locked)[GV]; + DEBUG(errs() << "JIT: Map \'" << GV->getName() + << "\' to [" << Addr << "]\n";); + void *&CurVal = EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)]; assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!"); CurVal = Addr; // If we are using the reverse mapping, add it too - if (!state.getGlobalAddressReverseMap(locked).empty()) { - const GlobalValue *&V = state.getGlobalAddressReverseMap(locked)[Addr]; + if (!EEState.getGlobalAddressReverseMap(locked).empty()) { + AssertingVH &V = + EEState.getGlobalAddressReverseMap(locked)[Addr]; assert((V == 0 || GV == 0) && "GlobalMapping already established!"); V = GV; } @@ -131,8 +159,8 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { void ExecutionEngine::clearAllGlobalMappings() { MutexGuard locked(lock); - state.getGlobalAddressMap(locked).clear(); - state.getGlobalAddressReverseMap(locked).clear(); + EEState.getGlobalAddressMap(locked).clear(); + EEState.getGlobalAddressReverseMap(locked).clear(); } /// clearGlobalMappingsFromModule - Clear all global mappings that came from a @@ -141,13 +169,11 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { MutexGuard locked(lock); for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) { - state.getGlobalAddressMap(locked).erase(FI); - state.getGlobalAddressReverseMap(locked).erase(FI); + EEState.RemoveMapping(locked, FI); } for (Module::global_iterator GI = M->global_begin(), GE = M->global_end(); GI != GE; ++GI) { - state.getGlobalAddressMap(locked).erase(GI); - state.getGlobalAddressReverseMap(locked).erase(GI); + EEState.RemoveMapping(locked, GI); } } @@ -157,34 +183,25 @@ void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { MutexGuard locked(lock); - std::map &Map = state.getGlobalAddressMap(locked); + std::map &Map = + EEState.getGlobalAddressMap(locked); // Deleting from the mapping? if (Addr == 0) { - std::map::iterator I = Map.find(GV); - void *OldVal; - if (I == Map.end()) - OldVal = 0; - else { - OldVal = I->second; - Map.erase(I); - } - - if (!state.getGlobalAddressReverseMap(locked).empty()) - state.getGlobalAddressReverseMap(locked).erase(Addr); - return OldVal; + return EEState.RemoveMapping(locked, GV); } - void *&CurVal = Map[GV]; + void *&CurVal = Map[EEState.getVH(GV)]; void *OldVal = CurVal; - if (CurVal && !state.getGlobalAddressReverseMap(locked).empty()) - state.getGlobalAddressReverseMap(locked).erase(CurVal); + if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty()) + EEState.getGlobalAddressReverseMap(locked).erase(CurVal); CurVal = Addr; // If we are using the reverse mapping, add it too - if (!state.getGlobalAddressReverseMap(locked).empty()) { - const GlobalValue *&V = state.getGlobalAddressReverseMap(locked)[Addr]; + if (!EEState.getGlobalAddressReverseMap(locked).empty()) { + AssertingVH &V = + EEState.getGlobalAddressReverseMap(locked)[Addr]; assert((V == 0 || GV == 0) && "GlobalMapping already established!"); V = GV; } @@ -197,9 +214,9 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { MutexGuard locked(lock); - std::map::iterator I = - state.getGlobalAddressMap(locked).find(GV); - return I != state.getGlobalAddressMap(locked).end() ? I->second : 0; + std::map::iterator I = + EEState.getGlobalAddressMap(locked).find(EEState.getVH(GV)); + return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0; } /// getGlobalValueAtAddress - Return the LLVM global value object that starts @@ -209,34 +226,34 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { MutexGuard locked(lock); // If we haven't computed the reverse mapping yet, do so first. - if (state.getGlobalAddressReverseMap(locked).empty()) { - for (std::map::iterator - I = state.getGlobalAddressMap(locked).begin(), - E = state.getGlobalAddressMap(locked).end(); I != E; ++I) - state.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second, + if (EEState.getGlobalAddressReverseMap(locked).empty()) { + for (std::map::iterator + I = EEState.getGlobalAddressMap(locked).begin(), + E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I) + EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(I->second, I->first)); } - std::map::iterator I = - state.getGlobalAddressReverseMap(locked).find(Addr); - return I != state.getGlobalAddressReverseMap(locked).end() ? I->second : 0; + std::map >::iterator I = + EEState.getGlobalAddressReverseMap(locked).find(Addr); + return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0; } // CreateArgv - Turn a vector of strings into a nice argv style array of // pointers to null terminated strings. // -static void *CreateArgv(ExecutionEngine *EE, +static void *CreateArgv(LLVMContext &C, ExecutionEngine *EE, const std::vector &InputArgv) { unsigned PtrSize = EE->getTargetData()->getPointerSize(); char *Result = new char[(InputArgv.size()+1)*PtrSize]; - DOUT << "JIT: ARGV = " << (void*)Result << "\n"; - const Type *SBytePtr = PointerType::getUnqual(Type::Int8Ty); + DEBUG(errs() << "JIT: ARGV = " << (void*)Result << "\n"); + const Type *SBytePtr = Type::getInt8PtrTy(C); for (unsigned i = 0; i != InputArgv.size(); ++i) { unsigned Size = InputArgv[i].size()+1; char *Dest = new char[Size]; - DOUT << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"; + DEBUG(errs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n"); std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest); Dest[Size-1] = 0; @@ -257,7 +274,8 @@ static void *CreateArgv(ExecutionEngine *EE, /// runStaticConstructorsDestructors - This method is used to execute all of /// the static constructors or destructors for a module, depending on the /// value of isDtors. -void ExecutionEngine::runStaticConstructorsDestructors(Module *module, bool isDtors) { +void ExecutionEngine::runStaticConstructorsDestructors(Module *module, + bool isDtors) { const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors"; // Execute global ctors/dtors for each module in the program. @@ -327,49 +345,47 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, unsigned NumArgs = Fn->getFunctionType()->getNumParams(); const FunctionType *FTy = Fn->getFunctionType(); const Type* PPInt8Ty = - PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); + PointerType::getUnqual(PointerType::getUnqual( + Type::getInt8Ty(Fn->getContext()))); switch (NumArgs) { case 3: if (FTy->getParamType(2) != PPInt8Ty) { - cerr << "Invalid type for third argument of main() supplied\n"; - abort(); + llvm_report_error("Invalid type for third argument of main() supplied"); } // FALLS THROUGH case 2: if (FTy->getParamType(1) != PPInt8Ty) { - cerr << "Invalid type for second argument of main() supplied\n"; - abort(); + llvm_report_error("Invalid type for second argument of main() supplied"); } // FALLS THROUGH case 1: - if (FTy->getParamType(0) != Type::Int32Ty) { - cerr << "Invalid type for first argument of main() supplied\n"; - abort(); + if (FTy->getParamType(0) != Type::getInt32Ty(Fn->getContext())) { + llvm_report_error("Invalid type for first argument of main() supplied"); } // FALLS THROUGH case 0: if (!isa(FTy->getReturnType()) && - FTy->getReturnType() != Type::VoidTy) { - cerr << "Invalid return type of main() supplied\n"; - abort(); + FTy->getReturnType() != Type::getVoidTy(FTy->getContext())) { + llvm_report_error("Invalid return type of main() supplied"); } break; default: - cerr << "Invalid number of arguments of main() supplied\n"; - abort(); + llvm_report_error("Invalid number of arguments of main() supplied"); } if (NumArgs) { GVArgs.push_back(GVArgc); // Arg #0 = argc. if (NumArgs > 1) { - GVArgs.push_back(PTOGV(CreateArgv(this, argv))); // Arg #1 = argv. + // Arg #1 = argv. + GVArgs.push_back(PTOGV(CreateArgv(Fn->getContext(), this, argv))); assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) && "argv[0] was null after CreateArgv"); if (NumArgs > 2) { std::vector EnvVars; for (unsigned i = 0; envp[i]; ++i) EnvVars.push_back(envp[i]); - GVArgs.push_back(PTOGV(CreateArgv(this, EnvVars))); // Arg #2 = envp. + // Arg #2 = envp. + GVArgs.push_back(PTOGV(CreateArgv(Fn->getContext(), this, EnvVars))); } } } @@ -383,27 +399,73 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn, ExecutionEngine *ExecutionEngine::create(ModuleProvider *MP, bool ForceInterpreter, std::string *ErrorStr, - CodeGenOpt::Level OptLevel) { - ExecutionEngine *EE = 0; + CodeGenOpt::Level OptLevel, + bool GVsWithCode) { + return EngineBuilder(MP) + .setEngineKind(ForceInterpreter + ? EngineKind::Interpreter + : EngineKind::JIT) + .setErrorStr(ErrorStr) + .setOptLevel(OptLevel) + .setAllocateGVsWithCode(GVsWithCode) + .create(); +} +ExecutionEngine *ExecutionEngine::create(Module *M) { + return EngineBuilder(M).create(); +} + +/// EngineBuilder - Overloaded constructor that automatically creates an +/// ExistingModuleProvider for an existing module. +EngineBuilder::EngineBuilder(Module *m) : MP(new ExistingModuleProvider(m)) { + InitEngine(); +} + +ExecutionEngine *EngineBuilder::create() { // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) return 0; - // Unless the interpreter was explicitly selected, try making a JIT. - if (!ForceInterpreter && JITCtor) - EE = JITCtor(MP, ErrorStr, OptLevel); + // If the user specified a memory manager but didn't specify which engine to + // create, we assume they only want the JIT, and we fail if they only want + // the interpreter. + if (JMM) { + if (WhichEngine & EngineKind::JIT) + WhichEngine = EngineKind::JIT; + else { + if (ErrorStr) + *ErrorStr = "Cannot create an interpreter with a memory manager."; + return 0; + } + } - // If we can't make a JIT, make an interpreter instead. - if (EE == 0 && InterpCtor) - EE = InterpCtor(MP, ErrorStr, OptLevel); + // Unless the interpreter was explicitly selected or the JIT is not linked, + // try making a JIT. + if (WhichEngine & EngineKind::JIT) { + if (ExecutionEngine::JITCtor) { + ExecutionEngine *EE = + ExecutionEngine::JITCtor(MP, ErrorStr, JMM, OptLevel, + AllocateGVsWithCode); + if (EE) return EE; + } + } - return EE; -} + // If we can't make a JIT and we didn't request one specifically, try making + // an interpreter instead. + if (WhichEngine & EngineKind::Interpreter) { + if (ExecutionEngine::InterpCtor) + return ExecutionEngine::InterpCtor(MP, ErrorStr); + if (ErrorStr) + *ErrorStr = "Interpreter has not been linked in."; + return 0; + } -ExecutionEngine *ExecutionEngine::create(Module *M) { - return create(new ExistingModuleProvider(M)); + if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) { + if (ErrorStr) + *ErrorStr = "JIT has not been linked in."; + } + return 0; } /// getPointerToGlobal - This returns the address of the specified global @@ -414,7 +476,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { return getPointerToFunction(F); MutexGuard locked(lock); - void *p = state.getGlobalAddressMap(locked)[GV]; + void *p = EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)]; if (p) return p; @@ -423,8 +485,8 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { const_cast(dyn_cast(GV))) EmitGlobalVariable(GVar); else - assert(0 && "Global hasn't had an address allocated yet!"); - return state.getGlobalAddressMap(locked)[GV]; + llvm_unreachable("Global hasn't had an address allocated yet!"); + return EEState.getGlobalAddressMap(locked)[EEState.getVH(GV)]; } /// This function converts a Constant* into a GenericValue. The interesting @@ -482,11 +544,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { } case Instruction::UIToFP: { GenericValue GV = getConstantValue(Op0); - if (CE->getType() == Type::FloatTy) + if (CE->getType()->isFloatTy()) GV.FloatVal = float(GV.IntVal.roundToDouble()); - else if (CE->getType() == Type::DoubleTy) + else if (CE->getType()->isDoubleTy()) GV.DoubleVal = GV.IntVal.roundToDouble(); - else if (CE->getType() == Type::X86_FP80Ty) { + else if (CE->getType()->isX86_FP80Ty()) { const uint64_t zero[] = {0, 0}; APFloat apf = APFloat(APInt(80, 2, zero)); (void)apf.convertFromAPInt(GV.IntVal, @@ -498,11 +560,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { } case Instruction::SIToFP: { GenericValue GV = getConstantValue(Op0); - if (CE->getType() == Type::FloatTy) + if (CE->getType()->isFloatTy()) GV.FloatVal = float(GV.IntVal.signedRoundToDouble()); - else if (CE->getType() == Type::DoubleTy) + else if (CE->getType()->isDoubleTy()) GV.DoubleVal = GV.IntVal.signedRoundToDouble(); - else if (CE->getType() == Type::X86_FP80Ty) { + else if (CE->getType()->isX86_FP80Ty()) { const uint64_t zero[] = { 0, 0}; APFloat apf = APFloat(APInt(80, 2, zero)); (void)apf.convertFromAPInt(GV.IntVal, @@ -516,11 +578,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Instruction::FPToSI: { GenericValue GV = getConstantValue(Op0); uint32_t BitWidth = cast(CE->getType())->getBitWidth(); - if (Op0->getType() == Type::FloatTy) + if (Op0->getType()->isFloatTy()) GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth); - else if (Op0->getType() == Type::DoubleTy) + else if (Op0->getType()->isDoubleTy()) GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth); - else if (Op0->getType() == Type::X86_FP80Ty) { + else if (Op0->getType()->isX86_FP80Ty()) { APFloat apf = APFloat(GV.IntVal); uint64_t v; bool ignored; @@ -550,20 +612,22 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { GenericValue GV = getConstantValue(Op0); const Type* DestTy = CE->getType(); switch (Op0->getType()->getTypeID()) { - default: assert(0 && "Invalid bitcast operand"); + default: llvm_unreachable("Invalid bitcast operand"); case Type::IntegerTyID: assert(DestTy->isFloatingPoint() && "invalid bitcast"); - if (DestTy == Type::FloatTy) + if (DestTy->isFloatTy()) GV.FloatVal = GV.IntVal.bitsToFloat(); - else if (DestTy == Type::DoubleTy) + else if (DestTy->isDoubleTy()) GV.DoubleVal = GV.IntVal.bitsToDouble(); break; case Type::FloatTyID: - assert(DestTy == Type::Int32Ty && "Invalid bitcast"); + assert(DestTy == Type::getInt32Ty(DestTy->getContext()) && + "Invalid bitcast"); GV.IntVal.floatToBits(GV.FloatVal); break; case Type::DoubleTyID: - assert(DestTy == Type::Int64Ty && "Invalid bitcast"); + assert(DestTy == Type::getInt64Ty(DestTy->getContext()) && + "Invalid bitcast"); GV.IntVal.doubleToBits(GV.DoubleVal); break; case Type::PointerTyID: @@ -589,10 +653,10 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { GenericValue RHS = getConstantValue(CE->getOperand(1)); GenericValue GV; switch (CE->getOperand(0)->getType()->getTypeID()) { - default: assert(0 && "Bad add type!"); abort(); + default: llvm_unreachable("Bad add type!"); case Type::IntegerTyID: switch (CE->getOpcode()) { - default: assert(0 && "Invalid integer opcode"); + default: llvm_unreachable("Invalid integer opcode"); case Instruction::Add: GV.IntVal = LHS.IntVal + RHS.IntVal; break; case Instruction::Sub: GV.IntVal = LHS.IntVal - RHS.IntVal; break; case Instruction::Mul: GV.IntVal = LHS.IntVal * RHS.IntVal; break; @@ -607,7 +671,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { break; case Type::FloatTyID: switch (CE->getOpcode()) { - default: assert(0 && "Invalid float opcode"); abort(); + default: llvm_unreachable("Invalid float opcode"); case Instruction::FAdd: GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break; case Instruction::FSub: @@ -622,7 +686,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { break; case Type::DoubleTyID: switch (CE->getOpcode()) { - default: assert(0 && "Invalid double opcode"); abort(); + default: llvm_unreachable("Invalid double opcode"); case Instruction::FAdd: GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break; case Instruction::FSub: @@ -640,7 +704,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Type::FP128TyID: { APFloat apfLHS = APFloat(LHS.IntVal); switch (CE->getOpcode()) { - default: assert(0 && "Invalid long double opcode"); abort(); + default: llvm_unreachable("Invalid long double opcode");llvm_unreachable(0); case Instruction::FAdd: apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); @@ -670,8 +734,10 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { default: break; } - cerr << "ConstantExpr not handled: " << *CE << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "ConstantExpr not handled: " << *CE; + llvm_report_error(Msg.str()); } GenericValue Result; @@ -698,11 +764,13 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { else if (const GlobalVariable* GV = dyn_cast(C)) Result = PTOGV(getOrEmitGlobalVariable(const_cast(GV))); else - assert(0 && "Unknown constant pointer type!"); + llvm_unreachable("Unknown constant pointer type!"); break; default: - cerr << "ERROR: Constant unimplemented for type: " << *C->getType() << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "ERROR: Constant unimplemented for type: " << *C->getType(); + llvm_report_error(Msg.str()); } return Result; } @@ -762,7 +830,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, *((PointerTy*)Ptr) = Val.PointerVal; break; default: - cerr << "Cannot store value of type " << *Ty << "!\n"; + errs() << "Cannot store value of type " << *Ty << "!\n"; } if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) @@ -803,15 +871,6 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, const Type *Ty) { const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty); - if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian()) { - // Host and target are different endian - reverse copy the stored - // bytes into a buffer, and load from that. - uint8_t *Src = (uint8_t*)Ptr; - uint8_t *Buf = (uint8_t*)alloca(LoadBytes); - std::reverse_copy(Src, Src + LoadBytes, Buf); - Ptr = (GenericValue*)Buf; - } - switch (Ty->getTypeID()) { case Type::IntegerTyID: // An APInt with all words initially zero. @@ -836,8 +895,10 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, break; } default: - cerr << "Cannot load value of type " << *Ty << "!\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot load value of type " << *Ty << "!"; + llvm_report_error(Msg.str()); } } @@ -845,7 +906,7 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, // specified memory location... // void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { - DOUT << "JIT: Initializing " << Addr << " "; + DEBUG(errs() << "JIT: Initializing " << Addr << " "); DEBUG(Init->dump()); if (isa(Init)) { return; @@ -876,8 +937,8 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) { return; } - cerr << "Bad Type: " << *Init->getType() << "\n"; - assert(0 && "Unknown constant type to initialize memory with!"); + errs() << "Bad Type: " << *Init->getType() << "\n"; + llvm_unreachable("Unknown constant type to initialize memory with!"); } /// EmitGlobals - Emit all of the global variables to memory, storing their @@ -950,12 +1011,11 @@ void ExecutionEngine::emitGlobals() { // External variable reference. Try to use the dynamic loader to // get a pointer to it. if (void *SymAddr = - sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName().c_str())) + sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName())) addGlobalMapping(I, SymAddr); else { - cerr << "Could not resolve external global address: " - << I->getName() << "\n"; - abort(); + llvm_report_error("Could not resolve external global address: " + +I->getName()); } } } @@ -1011,3 +1071,18 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) { NumInitBytes += (unsigned)GVSize; ++NumGlobals; } + +ExecutionEngineState::MapUpdatingCVH::MapUpdatingCVH( + ExecutionEngineState &EES, const GlobalValue *GV) + : CallbackVH(const_cast(GV)), EES(EES) {} + +void ExecutionEngineState::MapUpdatingCVH::deleted() { + MutexGuard locked(EES.EE.lock); + EES.RemoveMapping(locked, *this); // Destroys *this. +} + +void ExecutionEngineState::MapUpdatingCVH::allUsesReplacedWith( + Value *new_value) { + assert(false && "The ExecutionEngine doesn't know how to handle a" + " RAUW on a value it has a global mapping for."); +} diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 401a22647e1d8..5901cd757dc1f 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -15,6 +15,7 @@ #include "llvm-c/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/Support/ErrorHandling.h" #include using namespace llvm; @@ -45,8 +46,7 @@ LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef TyRef, double N) { GenVal->DoubleVal = N; break; default: - assert(0 && "LLVMGenericValueToFloat supports only float and double."); - break; + llvm_unreachable("LLVMGenericValueToFloat supports only float and double."); } return wrap(GenVal); } @@ -75,7 +75,7 @@ double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) { case Type::DoubleTyID: return unwrap(GenVal)->DoubleVal; default: - assert(0 && "LLVMGenericValueToFloat supports only float and double."); + llvm_unreachable("LLVMGenericValueToFloat supports only float and double."); break; } return 0; // Not reached @@ -91,7 +91,10 @@ int LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE, LLVMModuleProviderRef MP, char **OutError) { std::string Error; - if (ExecutionEngine *EE = ExecutionEngine::create(unwrap(MP), false, &Error)){ + EngineBuilder builder(unwrap(MP)); + builder.setEngineKind(EngineKind::Either) + .setErrorStr(&Error); + if (ExecutionEngine *EE = builder.create()){ *OutEE = wrap(EE); return 0; } @@ -103,8 +106,10 @@ int LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp, LLVMModuleProviderRef MP, char **OutError) { std::string Error; - if (ExecutionEngine *Interp = - ExecutionEngine::create(unwrap(MP), true, &Error)) { + EngineBuilder builder(unwrap(MP)); + builder.setEngineKind(EngineKind::Interpreter) + .setErrorStr(&Error); + if (ExecutionEngine *Interp = builder.create()) { *OutInterp = wrap(Interp); return 0; } @@ -117,9 +122,11 @@ int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT, unsigned OptLevel, char **OutError) { std::string Error; - if (ExecutionEngine *JIT = - ExecutionEngine::create(unwrap(MP), false, &Error, - (CodeGenOpt::Level)OptLevel)) { + EngineBuilder builder(unwrap(MP)); + builder.setEngineKind(EngineKind::JIT) + .setErrorStr(&Error) + .setOptLevel((CodeGenOpt::Level)OptLevel); + if (ExecutionEngine *JIT = builder.create()) { *OutJIT = wrap(JIT); return 0; } diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index bb3f64e626f05..f8c775ee7c187 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -22,10 +22,10 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include #include -#include using namespace llvm; STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed"); @@ -37,15 +37,6 @@ static cl::opt PrintVolatile("interpreter-print-volatile", cl::Hidden, // Various Helper Functions //===----------------------------------------------------------------------===// -static inline uint64_t doSignExtension(uint64_t Val, const IntegerType* ITy) { - // Determine if the value is signed or not - bool isSigned = (Val & (1 << (ITy->getBitWidth()-1))) != 0; - // If its signed, extend the sign bits - if (isSigned) - Val |= ~ITy->getBitMask(); - return Val; -} - static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) { SF.Values[V] = Val; } @@ -65,8 +56,8 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(+, Float); IMPLEMENT_BINARY_OPERATOR(+, Double); default: - cerr << "Unhandled type for FAdd instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FAdd instruction: " << *Ty << "\n"; + llvm_unreachable(0); } } @@ -76,8 +67,8 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(-, Float); IMPLEMENT_BINARY_OPERATOR(-, Double); default: - cerr << "Unhandled type for FSub instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FSub instruction: " << *Ty << "\n"; + llvm_unreachable(0); } } @@ -87,8 +78,8 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(*, Float); IMPLEMENT_BINARY_OPERATOR(*, Double); default: - cerr << "Unhandled type for FMul instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FMul instruction: " << *Ty << "\n"; + llvm_unreachable(0); } } @@ -98,8 +89,8 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(/, Float); IMPLEMENT_BINARY_OPERATOR(/, Double); default: - cerr << "Unhandled type for FDiv instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FDiv instruction: " << *Ty << "\n"; + llvm_unreachable(0); } } @@ -113,8 +104,8 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1, Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal); break; default: - cerr << "Unhandled type for Rem instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; + llvm_unreachable(0); } } @@ -140,8 +131,8 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(eq,Ty); IMPLEMENT_POINTER_ICMP(==); default: - cerr << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -153,8 +144,8 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ne,Ty); IMPLEMENT_POINTER_ICMP(!=); default: - cerr << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -166,8 +157,8 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ult,Ty); IMPLEMENT_POINTER_ICMP(<); default: - cerr << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -179,8 +170,8 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(slt,Ty); IMPLEMENT_POINTER_ICMP(<); default: - cerr << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -192,8 +183,8 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ugt,Ty); IMPLEMENT_POINTER_ICMP(>); default: - cerr << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -205,8 +196,8 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sgt,Ty); IMPLEMENT_POINTER_ICMP(>); default: - cerr << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -218,8 +209,8 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(ule,Ty); IMPLEMENT_POINTER_ICMP(<=); default: - cerr << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -231,8 +222,8 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sle,Ty); IMPLEMENT_POINTER_ICMP(<=); default: - cerr << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -244,8 +235,8 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(uge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: - cerr << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -257,8 +248,8 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_INTEGER_ICMP(sge,Ty); IMPLEMENT_POINTER_ICMP(>=); default: - cerr << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -282,8 +273,8 @@ void Interpreter::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break; case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break; default: - cerr << "Don't know how to handle this ICmp predicate!\n-->" << I; - abort(); + errs() << "Don't know how to handle this ICmp predicate!\n-->" << I; + llvm_unreachable(0); } SetValue(&I, R, SF); @@ -301,8 +292,8 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(==, Float); IMPLEMENT_FCMP(==, Double); default: - cerr << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -315,8 +306,8 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(!=, Double); default: - cerr << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -328,8 +319,8 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(<=, Float); IMPLEMENT_FCMP(<=, Double); default: - cerr << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -341,8 +332,8 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(>=, Float); IMPLEMENT_FCMP(>=, Double); default: - cerr << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -354,8 +345,8 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(<, Float); IMPLEMENT_FCMP(<, Double); default: - cerr << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } @@ -367,14 +358,14 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_FCMP(>, Float); IMPLEMENT_FCMP(>, Double); default: - cerr << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; + llvm_unreachable(0); } return Dest; } #define IMPLEMENT_UNORDERED(TY, X,Y) \ - if (TY == Type::FloatTy) { \ + if (TY->isFloatTy()) { \ if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) { \ Dest.IntVal = APInt(1,true); \ return Dest; \ @@ -430,7 +421,7 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2, static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, const Type *Ty) { GenericValue Dest; - if (Ty == Type::FloatTy) + if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && Src2.FloatVal == Src2.FloatVal)); else @@ -442,7 +433,7 @@ static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2, static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2, const Type *Ty) { GenericValue Dest; - if (Ty == Type::FloatTy) + if (Ty->isFloatTy()) Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || Src2.FloatVal != Src2.FloatVal)); else @@ -476,8 +467,8 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_UGE: R = executeFCMP_UGE(Src1, Src2, Ty); break; case FCmpInst::FCMP_OGE: R = executeFCMP_OGE(Src1, Src2, Ty); break; default: - cerr << "Don't know how to handle this FCmp predicate!\n-->" << I; - abort(); + errs() << "Don't know how to handle this FCmp predicate!\n-->" << I; + llvm_unreachable(0); } SetValue(&I, R, SF); @@ -522,8 +513,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, return Result; } default: - cerr << "Unhandled Cmp predicate\n"; - abort(); + errs() << "Unhandled Cmp predicate\n"; + llvm_unreachable(0); } } @@ -551,8 +542,8 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { case Instruction::Or: R.IntVal = Src1.IntVal | Src2.IntVal; break; case Instruction::Xor: R.IntVal = Src1.IntVal ^ Src2.IntVal; break; default: - cerr << "Don't know how to handle this binary operator!\n-->" << I; - abort(); + errs() << "Don't know how to handle this binary operator!\n-->" << I; + llvm_unreachable(0); } SetValue(&I, R, SF); @@ -610,7 +601,8 @@ void Interpreter::popStackAndReturnValueToCaller (const Type *RetTy, // fill in the return value... ExecutionContext &CallingSF = ECStack.back(); if (Instruction *I = CallingSF.Caller.getInstruction()) { - if (CallingSF.Caller.getType() != Type::VoidTy) // Save result... + // Save result... + if (CallingSF.Caller.getType() != Type::getVoidTy(RetTy->getContext())) SetValue(I, Result, CallingSF); if (InvokeInst *II = dyn_cast (I)) SwitchToNewBasicBlock (II->getNormalDest (), CallingSF); @@ -621,7 +613,7 @@ void Interpreter::popStackAndReturnValueToCaller (const Type *RetTy, void Interpreter::visitReturnInst(ReturnInst &I) { ExecutionContext &SF = ECStack.back(); - const Type *RetTy = Type::VoidTy; + const Type *RetTy = Type::getVoidTy(I.getContext()); GenericValue Result; // Save away the return value... (if we are not 'ret void') @@ -639,7 +631,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) { do { ECStack.pop_back (); if (ECStack.empty ()) - abort (); + llvm_report_error("Empty stack during unwind!"); Inst = ECStack.back ().Caller.getInstruction (); } while (!(Inst && isa (Inst))); @@ -652,8 +644,7 @@ void Interpreter::visitUnwindInst(UnwindInst &I) { } void Interpreter::visitUnreachableInst(UnreachableInst &I) { - cerr << "ERROR: Program executed an 'unreachable' instruction!\n"; - abort(); + llvm_report_error("Program executed an 'unreachable' instruction!"); } void Interpreter::visitBranchInst(BranchInst &I) { @@ -746,9 +737,9 @@ void Interpreter::visitAllocationInst(AllocationInst &I) { // Allocate enough memory to hold the type... void *Memory = malloc(MemToAlloc); - DOUT << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " - << NumElements << " (Total: " << MemToAlloc << ") at " - << uintptr_t(Memory) << '\n'; + DEBUG(errs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " + << NumElements << " (Total: " << MemToAlloc << ") at " + << uintptr_t(Memory) << '\n'); GenericValue Result = PTOGV(Memory); assert(Result.PointerVal != 0 && "Null pointer returned by malloc!"); @@ -804,7 +795,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I, GenericValue Result; Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total; - DOUT << "GEP Index " << Total << " bytes.\n"; + DEBUG(errs() << "GEP Index " << Total << " bytes.\n"); return Result; } @@ -822,7 +813,7 @@ void Interpreter::visitLoadInst(LoadInst &I) { LoadValueFromMemory(Result, Ptr, I.getType()); SetValue(&I, Result, SF); if (I.isVolatile() && PrintVolatile) - cerr << "Volatile load " << I; + errs() << "Volatile load " << I; } void Interpreter::visitStoreInst(StoreInst &I) { @@ -832,7 +823,7 @@ void Interpreter::visitStoreInst(StoreInst &I) { StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC), I.getOperand(0)->getType()); if (I.isVolatile() && PrintVolatile) - cerr << "Volatile store: " << I; + errs() << "Volatile store: " << I; } //===----------------------------------------------------------------------===// @@ -979,7 +970,7 @@ GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy, GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcVal->getType() == Type::DoubleTy && DstTy == Type::FloatTy && + assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() && "Invalid FPTrunc instruction"); Dest.FloatVal = (float) Src.DoubleVal; return Dest; @@ -988,7 +979,7 @@ GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy, GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcVal->getType() == Type::FloatTy && DstTy == Type::DoubleTy && + assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() && "Invalid FPTrunc instruction"); Dest.DoubleVal = (double) Src.FloatVal; return Dest; @@ -1079,28 +1070,28 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy, assert(isa(SrcTy) && "Invalid BitCast"); Dest.PointerVal = Src.PointerVal; } else if (DstTy->isInteger()) { - if (SrcTy == Type::FloatTy) { + if (SrcTy->isFloatTy()) { Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT); Dest.IntVal.floatToBits(Src.FloatVal); - } else if (SrcTy == Type::DoubleTy) { + } else if (SrcTy->isDoubleTy()) { Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT); Dest.IntVal.doubleToBits(Src.DoubleVal); } else if (SrcTy->isInteger()) { Dest.IntVal = Src.IntVal; } else - assert(0 && "Invalid BitCast"); - } else if (DstTy == Type::FloatTy) { + llvm_unreachable("Invalid BitCast"); + } else if (DstTy->isFloatTy()) { if (SrcTy->isInteger()) Dest.FloatVal = Src.IntVal.bitsToFloat(); else Dest.FloatVal = Src.FloatVal; - } else if (DstTy == Type::DoubleTy) { + } else if (DstTy->isDoubleTy()) { if (SrcTy->isInteger()) Dest.DoubleVal = Src.IntVal.bitsToDouble(); else Dest.DoubleVal = Src.DoubleVal; } else - assert(0 && "Invalid Bitcast"); + llvm_unreachable("Invalid Bitcast"); return Dest; } @@ -1184,8 +1175,8 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { IMPLEMENT_VAARG(Float); IMPLEMENT_VAARG(Double); default: - cerr << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; - abort(); + errs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; + llvm_unreachable(0); } // Set the Value of this Instruction. @@ -1271,8 +1262,8 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue()); break; default: - cerr << "Unhandled ConstantExpr: " << *CE << "\n"; - abort(); + errs() << "Unhandled ConstantExpr: " << *CE << "\n"; + llvm_unreachable(0); return GenericValue(); } return Dest; @@ -1344,30 +1335,29 @@ void Interpreter::run() { // Track the number of dynamic instructions executed. ++NumDynamicInsts; - DOUT << "About to interpret: " << I; + DEBUG(errs() << "About to interpret: " << I); visit(I); // Dispatch to one of the visit* methods... #if 0 // This is not safe, as visiting the instruction could lower it and free I. -#ifndef NDEBUG +DEBUG( if (!isa(I) && !isa(I) && I.getType() != Type::VoidTy) { - DOUT << " --> "; + errs() << " --> "; const GenericValue &Val = SF.Values[&I]; switch (I.getType()->getTypeID()) { - default: assert(0 && "Invalid GenericValue Type"); - case Type::VoidTyID: DOUT << "void"; break; - case Type::FloatTyID: DOUT << "float " << Val.FloatVal; break; - case Type::DoubleTyID: DOUT << "double " << Val.DoubleVal; break; - case Type::PointerTyID: DOUT << "void* " << intptr_t(Val.PointerVal); + default: llvm_unreachable("Invalid GenericValue Type"); + case Type::VoidTyID: errs() << "void"; break; + case Type::FloatTyID: errs() << "float " << Val.FloatVal; break; + case Type::DoubleTyID: errs() << "double " << Val.DoubleVal; break; + case Type::PointerTyID: errs() << "void* " << intptr_t(Val.PointerVal); break; case Type::IntegerTyID: - DOUT << "i" << Val.IntVal.getBitWidth() << " " - << Val.IntVal.toStringUnsigned(10) - << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n"; + errs() << "i" << Val.IntVal.getBitWidth() << " " + << Val.IntVal.toStringUnsigned(10) + << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n"; break; } - } -#endif + }); #endif } } diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index b8525a30ecad6..8c45a36b56a11 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -23,7 +23,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Config/config.h" // Detect libffi -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/System/DynamicLibrary.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/ManagedStatic.h" @@ -54,7 +54,7 @@ static ManagedStatic > ExportedFunctions; static std::map FuncNames; #ifdef USE_LIBFFI -typedef void (*RawFunc)(void); +typedef void (*RawFunc)(); static ManagedStatic > RawFunctions; #endif @@ -95,15 +95,15 @@ static ExFunc lookupFunction(const Function *F) { const FunctionType *FT = F->getFunctionType(); for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i) ExtName += getTypeID(FT->getContainedType(i)); - ExtName += "_" + F->getName(); + ExtName + "_" + F->getNameStr(); - sys::ScopedLock Writer(&*FunctionsLock); + sys::ScopedLock Writer(*FunctionsLock); ExFunc FnPtr = FuncNames[ExtName]; if (FnPtr == 0) - FnPtr = FuncNames["lle_X_"+F->getName()]; + FnPtr = FuncNames["lle_X_" + F->getNameStr()]; if (FnPtr == 0) // Try calling a generic function... if it exists... - FnPtr = (ExFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( - ("lle_X_"+F->getName()).c_str()); + FnPtr = (ExFunc)(intptr_t) + sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr()); if (FnPtr != 0) ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later return FnPtr; @@ -126,8 +126,7 @@ static ffi_type *ffiTypeFor(const Type *Ty) { default: break; } // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc. - cerr << "Type could not be mapped for use with libffi.\n"; - abort(); + llvm_report_error("Type could not be mapped for use with libffi."); return NULL; } @@ -175,8 +174,7 @@ static void *ffiValueFor(const Type *Ty, const GenericValue &AV, default: break; } // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc. - cerr << "Type value could not be mapped for use with libffi.\n"; - abort(); + llvm_report_error("Type value could not be mapped for use with libffi."); return NULL; } @@ -190,9 +188,8 @@ static bool ffiInvoke(RawFunc Fn, Function *F, // TODO: We don't have type information about the remaining arguments, because // this information is never passed into ExecutionEngine::runFunction(). if (ArgVals.size() > NumArgs && F->isVarArg()) { - cerr << "Calling external var arg function '" << F->getName() - << "' is not supported by the Interpreter.\n"; - abort(); + llvm_report_error("Calling external var arg function '" + F->getName() + + "' is not supported by the Interpreter."); } unsigned ArgBytes = 0; @@ -206,9 +203,10 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArgBytes += TD->getTypeStoreSize(ArgTy); } - uint8_t *ArgData = (uint8_t*) alloca(ArgBytes); - uint8_t *ArgDataPtr = ArgData; - std::vector values(NumArgs); + SmallVector ArgData; + ArgData.resize(ArgBytes); + uint8_t *ArgDataPtr = ArgData.data(); + SmallVector values(NumArgs); for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E; ++A) { const unsigned ArgNo = A->getArgNo(); @@ -221,22 +219,22 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ffi_type *rtype = ffiTypeFor(RetTy); if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) { - void *ret = NULL; + SmallVector ret; if (RetTy->getTypeID() != Type::VoidTyID) - ret = alloca(TD->getTypeStoreSize(RetTy)); - ffi_call(&cif, Fn, ret, &values[0]); + ret.resize(TD->getTypeStoreSize(RetTy)); + ffi_call(&cif, Fn, ret.data(), values.data()); switch (RetTy->getTypeID()) { case Type::IntegerTyID: switch (cast(RetTy)->getBitWidth()) { - case 8: Result.IntVal = APInt(8 , *(int8_t *) ret); break; - case 16: Result.IntVal = APInt(16, *(int16_t*) ret); break; - case 32: Result.IntVal = APInt(32, *(int32_t*) ret); break; - case 64: Result.IntVal = APInt(64, *(int64_t*) ret); break; + case 8: Result.IntVal = APInt(8 , *(int8_t *) ret.data()); break; + case 16: Result.IntVal = APInt(16, *(int16_t*) ret.data()); break; + case 32: Result.IntVal = APInt(32, *(int32_t*) ret.data()); break; + case 64: Result.IntVal = APInt(64, *(int64_t*) ret.data()); break; } break; - case Type::FloatTyID: Result.FloatVal = *(float *) ret; break; - case Type::DoubleTyID: Result.DoubleVal = *(double*) ret; break; - case Type::PointerTyID: Result.PointerVal = *(void **) ret; break; + case Type::FloatTyID: Result.FloatVal = *(float *) ret.data(); break; + case Type::DoubleTyID: Result.DoubleVal = *(double*) ret.data(); break; + case Type::PointerTyID: Result.PointerVal = *(void **) ret.data(); break; default: break; } return true; @@ -272,7 +270,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, } else { RawFn = RF->second; } - + FunctionsLock->release(); GenericValue Result; @@ -280,10 +278,12 @@ GenericValue Interpreter::callExternalFunction(Function *F, return Result; #endif // USE_LIBFFI - cerr << "Tried to execute an unknown external function: " - << F->getType()->getDescription() << " " << F->getName() << "\n"; - if (F->getName() != "__main") - abort(); + if (F->getName() == "__main") + errs() << "Tried to execute an unknown external function: " + << F->getType()->getDescription() << " __main\n"; + else + llvm_report_error("Tried to execute an unknown external function: " + + F->getType()->getDescription() + " " +F->getName()); return GenericValue(); } @@ -291,6 +291,12 @@ GenericValue Interpreter::callExternalFunction(Function *F, //===----------------------------------------------------------------------===// // Functions "exported" to the running application... // + +// Visual Studio warns about returning GenericValue in extern "C" linkage +#ifdef _MSC_VER + #pragma warning(disable : 4190) +#endif + extern "C" { // Don't add C++ manglings to llvm mangling :) // void atexit(Function*) @@ -313,6 +319,8 @@ GenericValue lle_X_exit(const FunctionType *FT, // void abort(void) GenericValue lle_X_abort(const FunctionType *FT, const std::vector &Args) { + //FIXME: should we report or raise here? + //llvm_report_error("Interpreted program raised SIGABRT"); raise (SIGABRT); return GenericValue(); } @@ -327,7 +335,7 @@ GenericValue lle_X_sprintf(const FunctionType *FT, // printf should return # chars printed. This is completely incorrect, but // close enough for now. - GenericValue GV; + GenericValue GV; GV.IntVal = APInt(32, strlen(FmtStr)); while (1) { switch (*FmtStr) { @@ -385,7 +393,8 @@ GenericValue lle_X_sprintf(const FunctionType *FT, sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break; case 's': sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break; - default: cerr << ""; + default: + errs() << ""; ArgNo++; break; } strcpy(OutputBuffer, Buffer); @@ -406,11 +415,12 @@ GenericValue lle_X_printf(const FunctionType *FT, NewArgs.push_back(PTOGV((void*)&Buffer[0])); NewArgs.insert(NewArgs.end(), Args.begin(), Args.end()); GenericValue GV = lle_X_sprintf(FT, NewArgs); - cout << Buffer; + outs() << Buffer; return GV; } -static void ByteswapSCANFResults(const char *Fmt, void *Arg0, void *Arg1, +static void ByteswapSCANFResults(LLVMContext &C, + const char *Fmt, void *Arg0, void *Arg1, void *Arg2, void *Arg3, void *Arg4, void *Arg5, void *Arg6, void *Arg7, void *Arg8) { void *Args[] = { Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, 0 }; @@ -450,26 +460,26 @@ static void ByteswapSCANFResults(const char *Fmt, void *Arg0, void *Arg1, case 'i': case 'o': case 'u': case 'x': case 'X': case 'n': case 'p': case 'd': if (Long || LongLong) { - Size = 8; Ty = Type::Int64Ty; + Size = 8; Ty = Type::getInt64Ty(C); } else if (Half) { - Size = 4; Ty = Type::Int16Ty; + Size = 4; Ty = Type::getInt16Ty(C); } else { - Size = 4; Ty = Type::Int32Ty; + Size = 4; Ty = Type::getInt32Ty(C); } break; case 'e': case 'g': case 'E': case 'f': if (Long || LongLong) { - Size = 8; Ty = Type::DoubleTy; + Size = 8; Ty = Type::getDoubleTy(C); } else { - Size = 4; Ty = Type::FloatTy; + Size = 4; Ty = Type::getFloatTy(C); } break; case 's': case 'c': case '[': // No byteswap needed Size = 1; - Ty = Type::Int8Ty; + Ty = Type::getInt8Ty(C); break; default: break; @@ -498,7 +508,8 @@ GenericValue lle_X_sscanf(const FunctionType *FT, GenericValue GV; GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4], Args[5], Args[6], Args[7], Args[8], Args[9])); - ByteswapSCANFResults(Args[1], Args[2], Args[3], Args[4], + ByteswapSCANFResults(FT->getContext(), + Args[1], Args[2], Args[3], Args[4], Args[5], Args[6], Args[7], Args[8], Args[9], 0); return GV; } @@ -515,7 +526,8 @@ GenericValue lle_X_scanf(const FunctionType *FT, GenericValue GV; GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4], Args[5], Args[6], Args[7], Args[8], Args[9])); - ByteswapSCANFResults(Args[0], Args[1], Args[2], Args[3], Args[4], + ByteswapSCANFResults(FT->getContext(), + Args[0], Args[1], Args[2], Args[3], Args[4], Args[5], Args[6], Args[7], Args[8], Args[9]); return GV; } @@ -537,9 +549,14 @@ GenericValue lle_X_fprintf(const FunctionType *FT, } // End extern "C" +// Done with externals; turn the warning back on +#ifdef _MSC_VER + #pragma warning(default: 4190) +#endif + void Interpreter::initializeExternalFunctions() { - sys::ScopedLock Writer(&*FunctionsLock); + sys::ScopedLock Writer(*FunctionsLock); FuncNames["lle_X_atexit"] = lle_X_atexit; FuncNames["lle_X_exit"] = lle_X_exit; FuncNames["lle_X_abort"] = lle_X_abort; @@ -550,4 +567,3 @@ void Interpreter::initializeExternalFunctions() { FuncNames["lle_X_scanf"] = lle_X_scanf; FuncNames["lle_X_fprintf"] = lle_X_fprintf; } - diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp index d7f38ef548f2b..9be6a9265d613 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -33,8 +33,7 @@ extern "C" void LLVMLinkInInterpreter() { } /// create - Create a new interpreter object. This can never fail. /// -ExecutionEngine *Interpreter::create(ModuleProvider *MP, std::string* ErrStr, - CodeGenOpt::Level OptLevel /*unused*/) { +ExecutionEngine *Interpreter::create(ModuleProvider *MP, std::string* ErrStr) { // Tell this ModuleProvide to materialize and release the module if (!MP->materializeModule(ErrStr)) // We got an error, just return 0 @@ -98,4 +97,3 @@ Interpreter::runFunction(Function *F, return ExitValue; } - diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index 6b13c90f6671a..e026287bb5590 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -17,11 +17,12 @@ #include "llvm/Function.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/DataTypes.h" - +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { class IntrinsicLowering; @@ -107,8 +108,7 @@ public: /// create - Create an interpreter ExecutionEngine. This can never fail. /// - static ExecutionEngine *create(ModuleProvider *M, std::string *ErrorStr = 0, - CodeGenOpt::Level = CodeGenOpt::Default); + static ExecutionEngine *create(ModuleProvider *M, std::string *ErrorStr = 0); /// run - Start execution with the specified function and arguments. /// @@ -144,7 +144,9 @@ public: void visitLoadInst(LoadInst &I); void visitStoreInst(StoreInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); - void visitPHINode(PHINode &PN) { assert(0 && "PHI nodes already handled!"); } + void visitPHINode(PHINode &PN) { + llvm_unreachable("PHI nodes already handled!"); + } void visitTruncInst(TruncInst &I); void visitZExtInst(ZExtInst &I); void visitSExtInst(SExtInst &I); @@ -172,8 +174,8 @@ public: void visitVAArgInst(VAArgInst &I); void visitInstruction(Instruction &I) { - cerr << I; - assert(0 && "Instruction not interpretable yet!"); + errs() << I; + llvm_unreachable("Instruction not interpretable yet!"); } GenericValue callExternalFunction(Function *F, diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt index bf915f7c4ca19..41b3b4e99cb13 100644 --- a/lib/ExecutionEngine/JIT/CMakeLists.txt +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -4,9 +4,11 @@ add_definitions(-DENABLE_X86_JIT) add_llvm_library(LLVMJIT Intercept.cpp JIT.cpp + JITDebugRegisterer.cpp JITDwarfEmitter.cpp JITEmitter.cpp JITMemoryManager.cpp MacOSJITEventListener.cpp + OProfileJITEventListener.cpp TargetSelect.cpp ) diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp index 3dcc4626a1fa4..c00b60a276c2f 100644 --- a/lib/ExecutionEngine/JIT/Intercept.cpp +++ b/lib/ExecutionEngine/JIT/Intercept.cpp @@ -16,7 +16,7 @@ //===----------------------------------------------------------------------===// #include "JIT.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/System/DynamicLibrary.h" #include "llvm/Config/config.h" using namespace llvm; @@ -56,6 +56,7 @@ static void runAtExitHandlers() { * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' * available as an exported symbol, so we have to add it explicitly. */ +namespace { class StatSymbols { public: StatSymbols() { @@ -72,6 +73,7 @@ public: sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); } }; +} static StatSymbols initStatSymbols; #endif // __linux__ @@ -82,7 +84,7 @@ static void jit_exit(int Status) { } // jit_atexit - Used to intercept the "atexit" library call. -static int jit_atexit(void (*Fn)(void)) { +static int jit_atexit(void (*Fn)()) { AtExitHandlers.push_back(Fn); // Take note of atexit handler... return 0; // Always successful } @@ -140,9 +142,8 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, return RP; if (AbortOnFailure) { - cerr << "ERROR: Program used external function '" << Name - << "' which could not be resolved!\n"; - abort(); + llvm_report_error("Program used external function '"+Name+ + "' which could not be resolved!"); } return 0; } diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 1d8312f76298a..b2a268bce8b74 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetJITInfo.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MutexGuard.h" #include "llvm/System/DynamicLibrary.h" #include "llvm/Config/config.h" @@ -196,25 +197,44 @@ void DarwinRegisterFrame(void* FrameBegin) { ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP, std::string *ErrorStr, JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel) { - ExecutionEngine *EE = JIT::createJIT(MP, ErrorStr, JMM, OptLevel); - if (!EE) return 0; - + CodeGenOpt::Level OptLevel, + bool GVsWithCode) { + return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode); +} + +ExecutionEngine *JIT::createJIT(ModuleProvider *MP, + std::string *ErrorStr, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel, + bool GVsWithCode) { // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. - sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr); - return EE; + if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) + return 0; + + // Pick a target either via -march or by guessing the native arch. + TargetMachine *TM = JIT::selectTarget(MP, ErrorStr); + if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0; + + // If the target supports JIT code generation, create a the JIT. + if (TargetJITInfo *TJ = TM->getJITInfo()) { + return new JIT(MP, *TM, *TJ, JMM, OptLevel, GVsWithCode); + } else { + if (ErrorStr) + *ErrorStr = "target does not support JIT code generation"; + return 0; + } } JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel) - : ExecutionEngine(MP), TM(tm), TJI(tji) { + JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode) + : ExecutionEngine(MP), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode) { setTargetData(TM.getTargetData()); jitstate = new JITState(MP); // Initialize JCE - JCE = createEmitter(*this, JMM); + JCE = createEmitter(*this, JMM, TM); // Add target data MutexGuard locked(lock); @@ -224,8 +244,7 @@ JIT::JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, // Turn the machine code intermediate representation into bytes in memory that // may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) { - cerr << "Target does not support machine code emission!\n"; - abort(); + llvm_report_error("Target does not support machine code emission!"); } // Register routine for informing unwinding runtime about new EH frames @@ -273,8 +292,7 @@ void JIT::addModuleProvider(ModuleProvider *MP) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { - cerr << "Target does not support machine code emission!\n"; - abort(); + llvm_report_error("Target does not support machine code emission!"); } // Initialize passes. @@ -306,8 +324,7 @@ Module *JIT::removeModuleProvider(ModuleProvider *MP, std::string *E) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { - cerr << "Target does not support machine code emission!\n"; - abort(); + llvm_report_error("Target does not support machine code emission!"); } // Initialize passes. @@ -338,8 +355,7 @@ void JIT::deleteModuleProvider(ModuleProvider *MP, std::string *E) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { - cerr << "Target does not support machine code emission!\n"; - abort(); + llvm_report_error("Target does not support machine code emission!"); } // Initialize passes. @@ -366,10 +382,11 @@ GenericValue JIT::runFunction(Function *F, // Handle some common cases first. These cases correspond to common `main' // prototypes. - if (RetTy == Type::Int32Ty || RetTy == Type::VoidTy) { + if (RetTy == Type::getInt32Ty(F->getContext()) || + RetTy == Type::getVoidTy(F->getContext())) { switch (ArgValues.size()) { case 3: - if (FTy->getParamType(0) == Type::Int32Ty && + if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) && isa(FTy->getParamType(1)) && isa(FTy->getParamType(2))) { int (*PF)(int, char **, const char **) = @@ -384,7 +401,7 @@ GenericValue JIT::runFunction(Function *F, } break; case 2: - if (FTy->getParamType(0) == Type::Int32Ty && + if (FTy->getParamType(0) == Type::getInt32Ty(F->getContext()) && isa(FTy->getParamType(1))) { int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; @@ -397,7 +414,7 @@ GenericValue JIT::runFunction(Function *F, break; case 1: if (FTy->getNumParams() == 1 && - FTy->getParamType(0) == Type::Int32Ty) { + FTy->getParamType(0) == Type::getInt32Ty(F->getContext())) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); @@ -411,7 +428,7 @@ GenericValue JIT::runFunction(Function *F, if (ArgValues.empty()) { GenericValue rv; switch (RetTy->getTypeID()) { - default: assert(0 && "Unknown return type for function call!"); + default: llvm_unreachable("Unknown return type for function call!"); case Type::IntegerTyID: { unsigned BitWidth = cast(RetTy)->getBitWidth(); if (BitWidth == 1) @@ -425,7 +442,7 @@ GenericValue JIT::runFunction(Function *F, else if (BitWidth <= 64) rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); else - assert(0 && "Integer types > 64 bits not supported"); + llvm_unreachable("Integer types > 64 bits not supported"); return rv; } case Type::VoidTyID: @@ -440,7 +457,7 @@ GenericValue JIT::runFunction(Function *F, case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - assert(0 && "long double not supported yet"); + llvm_unreachable("long double not supported yet"); return rv; case Type::PointerTyID: return PTOGV(((void*(*)())(intptr_t)FPtr)()); @@ -458,7 +475,7 @@ GenericValue JIT::runFunction(Function *F, F->getParent()); // Insert a basic block. - BasicBlock *StubBB = BasicBlock::Create("", Stub); + BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub); // Convert all of the GenericValue arguments over to constants. Note that we // currently don't support varargs. @@ -468,28 +485,31 @@ GenericValue JIT::runFunction(Function *F, const Type *ArgTy = FTy->getParamType(i); const GenericValue &AV = ArgValues[i]; switch (ArgTy->getTypeID()) { - default: assert(0 && "Unknown argument type for function call!"); + default: llvm_unreachable("Unknown argument type for function call!"); case Type::IntegerTyID: - C = ConstantInt::get(AV.IntVal); + C = ConstantInt::get(F->getContext(), AV.IntVal); break; case Type::FloatTyID: - C = ConstantFP::get(APFloat(AV.FloatVal)); + C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal)); break; case Type::DoubleTyID: - C = ConstantFP::get(APFloat(AV.DoubleVal)); + C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal)); break; case Type::PPC_FP128TyID: case Type::X86_FP80TyID: case Type::FP128TyID: - C = ConstantFP::get(APFloat(AV.IntVal)); + C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal)); break; case Type::PointerTyID: void *ArgPtr = GVTOP(AV); if (sizeof(void*) == 4) - C = ConstantInt::get(Type::Int32Ty, (int)(intptr_t)ArgPtr); + C = ConstantInt::get(Type::getInt32Ty(F->getContext()), + (int)(intptr_t)ArgPtr); else - C = ConstantInt::get(Type::Int64Ty, (intptr_t)ArgPtr); - C = ConstantExpr::getIntToPtr(C, ArgTy); // Cast the integer to pointer + C = ConstantInt::get(Type::getInt64Ty(F->getContext()), + (intptr_t)ArgPtr); + // Cast the integer to pointer + C = ConstantExpr::getIntToPtr(C, ArgTy); break; } Args.push_back(C); @@ -499,10 +519,11 @@ GenericValue JIT::runFunction(Function *F, "", StubBB); TheCall->setCallingConv(F->getCallingConv()); TheCall->setTailCall(); - if (TheCall->getType() != Type::VoidTy) - ReturnInst::Create(TheCall, StubBB); // Return result of the call. + if (TheCall->getType() != Type::getVoidTy(F->getContext())) + // Return result of the call. + ReturnInst::Create(F->getContext(), TheCall, StubBB); else - ReturnInst::Create(StubBB); // Just return void. + ReturnInst::Create(F->getContext(), StubBB); // Just return void. // Finally, return the value returned by our nullary stub function. return runFunction(Stub, std::vector()); @@ -629,9 +650,8 @@ void *JIT::getPointerToFunction(Function *F) { std::string ErrorMsg; if (MP->materializeFunction(F, &ErrorMsg)) { - cerr << "Error reading function '" << F->getName() - << "' from bitcode file: " << ErrorMsg << "\n"; - abort(); + llvm_report_error("Error reading function '" + F->getName()+ + "' from bitcode file: " + ErrorMsg); } // Now retry to get the address. @@ -669,45 +689,18 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { if (GV->getName() == "__dso_handle") return (void*)&__dso_handle; #endif - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName().c_str()); + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName()); if (Ptr == 0 && !areDlsymStubsEnabled()) { - cerr << "Could not resolve external global address: " - << GV->getName() << "\n"; - abort(); + llvm_report_error("Could not resolve external global address: " + +GV->getName()); } addGlobalMapping(GV, Ptr); } else { - // GlobalVariable's which are not "constant" will cause trouble in a server - // situation. It's returned in the same block of memory as code which may - // not be writable. - if (isGVCompilationDisabled() && !GV->isConstant()) { - cerr << "Compilation of non-internal GlobalValue is disabled!\n"; - abort(); - } // If the global hasn't been emitted to memory yet, allocate space and - // emit it into memory. It goes in the same array as the generated - // code, jump tables, etc. - const Type *GlobalType = GV->getType()->getElementType(); - size_t S = getTargetData()->getTypeAllocSize(GlobalType); - size_t A = getTargetData()->getPreferredAlignment(GV); - if (GV->isThreadLocal()) { - MutexGuard locked(lock); - Ptr = TJI.allocateThreadLocalMemory(S); - } else if (TJI.allocateSeparateGVMemory()) { - if (A <= 8) { - Ptr = malloc(S); - } else { - // Allocate S+A bytes of memory, then use an aligned pointer within that - // space. - Ptr = malloc(S+A); - unsigned MisAligned = ((intptr_t)Ptr & (A-1)); - Ptr = (char*)Ptr + (MisAligned ? (A-MisAligned) : 0); - } - } else { - Ptr = JCE->allocateSpace(S, A); - } + // emit it into memory. + Ptr = getMemoryForGV(GV); addGlobalMapping(GV, Ptr); - EmitGlobalVariable(GV); + EmitGlobalVariable(GV); // Initialize the variable. } return Ptr; } @@ -742,14 +735,41 @@ void *JIT::recompileAndRelinkFunction(Function *F) { /// on the target. /// char* JIT::getMemoryForGV(const GlobalVariable* GV) { - const Type *ElTy = GV->getType()->getElementType(); - size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy); + char *Ptr; + + // GlobalVariable's which are not "constant" will cause trouble in a server + // situation. It's returned in the same block of memory as code which may + // not be writable. + if (isGVCompilationDisabled() && !GV->isConstant()) { + llvm_report_error("Compilation of non-internal GlobalValue is disabled!"); + } + + // Some applications require globals and code to live together, so they may + // be allocated into the same buffer, but in general globals are allocated + // through the memory manager which puts them near the code but not in the + // same buffer. + const Type *GlobalType = GV->getType()->getElementType(); + size_t S = getTargetData()->getTypeAllocSize(GlobalType); + size_t A = getTargetData()->getPreferredAlignment(GV); if (GV->isThreadLocal()) { MutexGuard locked(lock); - return TJI.allocateThreadLocalMemory(GVSize); + Ptr = TJI.allocateThreadLocalMemory(S); + } else if (TJI.allocateSeparateGVMemory()) { + if (A <= 8) { + Ptr = (char*)malloc(S); + } else { + // Allocate S+A bytes of memory, then use an aligned pointer within that + // space. + Ptr = (char*)malloc(S+A); + unsigned MisAligned = ((intptr_t)Ptr & (A-1)); + Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0); + } + } else if (AllocateGVsWithCode) { + Ptr = (char*)JCE->allocateSpace(S, A); } else { - return new char[GVSize]; + Ptr = (char*)JCE->allocateGlobal(S, A); } + return Ptr; } void JIT::addPendingFunction(Function *F) { diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index 66417a71b2c8e..525cc84f945c7 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -16,11 +16,12 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/PassManager.h" +#include "llvm/Support/ValueHandle.h" namespace llvm { class Function; -class JITEvent_EmittedFunctionDetails; +struct JITEvent_EmittedFunctionDetails; class MachineCodeEmitter; class MachineCodeInfo; class TargetJITInfo; @@ -33,7 +34,7 @@ private: /// PendingFunctions - Functions which have not been code generated yet, but /// were called from a function being code generated. - std::vector PendingFunctions; + std::vector > PendingFunctions; public: explicit JITState(ModuleProvider *MP) : PM(MP), MP(MP) {} @@ -43,7 +44,7 @@ public: } ModuleProvider *getMP() const { return MP; } - std::vector &getPendingFunctions(const MutexGuard &L) { + std::vector > &getPendingFunctions(const MutexGuard &L){ return PendingFunctions; } }; @@ -55,10 +56,16 @@ class JIT : public ExecutionEngine { JITCodeEmitter *JCE; // JCE object std::vector EventListeners; + /// AllocateGVsWithCode - Some applications require that global variables and + /// code be allocated into the same region of memory, in which case this flag + /// should be set to true. Doing so breaks freeMachineCodeForFunction. + bool AllocateGVsWithCode; + JITState *jitstate; - JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel); + JIT(ModuleProvider *MP, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, + bool AllocateGVsWithCode); public: ~JIT(); @@ -73,10 +80,13 @@ public: /// create - Create an return a new JIT compiler if there is one available /// for the current target. Otherwise, return null. /// - static ExecutionEngine *create(ModuleProvider *MP, std::string *Err, + static ExecutionEngine *create(ModuleProvider *MP, + std::string *Err, + JITMemoryManager *JMM, CodeGenOpt::Level OptLevel = - CodeGenOpt::Default) { - return createJIT(MP, Err, 0, OptLevel); + CodeGenOpt::Default, + bool GVsWithCode = true) { + return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode); } virtual void addModuleProvider(ModuleProvider *MP); @@ -145,16 +155,22 @@ public: /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd /// function was encountered. Add it to a pending list to be processed after /// the current function. - /// + /// void addPendingFunction(Function *F); - + /// getCodeEmitter - Return the code emitter this JIT is emitting into. + /// JITCodeEmitter *getCodeEmitter() const { return JCE; } - - static ExecutionEngine *createJIT(ModuleProvider *MP, std::string *Err, - JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel); + /// selectTarget - Pick a target either via -march or by guessing the native + /// arch. Add any CPU features specified via -mcpu or -mattr. + static TargetMachine *selectTarget(ModuleProvider *MP, std::string *Err); + + static ExecutionEngine *createJIT(ModuleProvider *MP, + std::string *ErrorStr, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel, + bool GVsWithCode); // Run the JIT on F and return information about the generated code void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0); @@ -170,7 +186,8 @@ public: void NotifyFreeingMachineCode(const Function &F, void *OldPtr); private: - static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM); + static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, + TargetMachine &tm); void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked); void updateFunctionStub(Function *F); void updateDlsymStubTable(); diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp new file mode 100644 index 0000000000000..fa640103c28b0 --- /dev/null +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -0,0 +1,208 @@ +//===-- JITDebugRegisterer.cpp - Register debug symbols for JIT -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a JITDebugRegisterer object that is used by the JIT to +// register debug info with debuggers like GDB. +// +//===----------------------------------------------------------------------===// + +#include "JITDebugRegisterer.h" +#include "../../CodeGen/ELF.h" +#include "../../CodeGen/ELFWriter.h" +#include "llvm/LLVMContext.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" +#include +#include + +namespace llvm { + +// This must be kept in sync with gdb/gdb/jit.h . +extern "C" { + + // Debuggers puts a breakpoint in this function. + void DISABLE_INLINE __jit_debug_register_code() { } + + // We put information about the JITed function in this global, which the + // debugger reads. Make sure to specify the version statically, because the + // debugger checks the version before we can set it during runtime. + struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; + +} + +namespace { + + /// JITDebugLock - Used to serialize all code registration events, since they + /// modify global variables. + sys::Mutex JITDebugLock; + +} + +JITDebugRegisterer::JITDebugRegisterer(TargetMachine &tm) : TM(tm), FnMap() { } + +JITDebugRegisterer::~JITDebugRegisterer() { + // Free all ELF memory. + for (RegisteredFunctionsMap::iterator I = FnMap.begin(), E = FnMap.end(); + I != E; ++I) { + // Call the private method that doesn't update the map so our iterator + // doesn't break. + UnregisterFunctionInternal(I); + } + FnMap.clear(); +} + +std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) { + // Stack allocate an empty module with an empty LLVMContext for the ELFWriter + // API. We don't use the real module because then the ELFWriter would write + // out unnecessary GlobalValues during finalization. + LLVMContext Context; + Module M("", Context); + + // Make a buffer for the ELF in memory. + std::string Buffer; + raw_string_ostream O(Buffer); + ELFWriter EW(O, TM); + EW.doInitialization(M); + + // Copy the binary into the .text section. This isn't necessary, but it's + // useful to be able to disassemble the ELF by hand. + ELFSection &Text = EW.getTextSection((Function *)F); + Text.Addr = (uint64_t)I.FnStart; + // TODO: We could eliminate this copy if we somehow used a pointer/size pair + // instead of a vector. + Text.getData().assign(I.FnStart, I.FnEnd); + + // Copy the exception handling call frame information into the .eh_frame + // section. This allows GDB to get a good stack trace, particularly on + // linux x86_64. Mark this as a PROGBITS section that needs to be loaded + // into memory at runtime. + ELFSection &EH = EW.getSection(".eh_frame", ELFSection::SHT_PROGBITS, + ELFSection::SHF_ALLOC); + // Pointers in the DWARF EH info are all relative to the EH frame start, + // which is stored here. + EH.Addr = (uint64_t)I.EhStart; + // TODO: We could eliminate this copy if we somehow used a pointer/size pair + // instead of a vector. + EH.getData().assign(I.EhStart, I.EhEnd); + + // Add this single function to the symbol table, so the debugger prints the + // name instead of '???'. We give the symbol default global visibility. + ELFSym *FnSym = ELFSym::getGV(F, + ELFSym::STB_GLOBAL, + ELFSym::STT_FUNC, + ELFSym::STV_DEFAULT); + FnSym->SectionIdx = Text.SectionIdx; + FnSym->Size = I.FnEnd - I.FnStart; + FnSym->Value = 0; // Offset from start of section. + EW.SymbolList.push_back(FnSym); + + EW.doFinalization(M); + O.flush(); + + // When trying to debug why GDB isn't getting the debug info right, it's + // awfully helpful to write the object file to disk so that it can be + // inspected with readelf and objdump. + if (JITEmitDebugInfoToDisk) { + std::string Filename; + raw_string_ostream O2(Filename); + O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o"; + O2.flush(); + std::string Errors; + raw_fd_ostream O3(Filename.c_str(), Errors); + O3 << Buffer; + O3.close(); + } + + return Buffer; +} + +void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) { + // TODO: Support non-ELF platforms. + if (!TM.getELFWriterInfo()) + return; + + std::string Buffer = MakeELF(F, I); + + jit_code_entry *JITCodeEntry = new jit_code_entry(); + JITCodeEntry->symfile_addr = Buffer.c_str(); + JITCodeEntry->symfile_size = Buffer.size(); + + // Add a mapping from F to the entry and buffer, so we can delete this + // info later. + FnMap[F] = std::make_pair(Buffer, JITCodeEntry); + + // Acquire the lock and do the registration. + { + MutexGuard locked(JITDebugLock); + __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; + + // Insert this entry at the head of the list. + JITCodeEntry->prev_entry = NULL; + jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry; + JITCodeEntry->next_entry = NextEntry; + if (NextEntry != NULL) { + NextEntry->prev_entry = JITCodeEntry; + } + __jit_debug_descriptor.first_entry = JITCodeEntry; + __jit_debug_descriptor.relevant_entry = JITCodeEntry; + __jit_debug_register_code(); + } +} + +void JITDebugRegisterer::UnregisterFunctionInternal( + RegisteredFunctionsMap::iterator I) { + jit_code_entry *JITCodeEntry = I->second.second; + + // Acquire the lock and do the unregistration. + { + MutexGuard locked(JITDebugLock); + __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN; + + // Remove the jit_code_entry from the linked list. + jit_code_entry *PrevEntry = JITCodeEntry->prev_entry; + jit_code_entry *NextEntry = JITCodeEntry->next_entry; + if (NextEntry) { + NextEntry->prev_entry = PrevEntry; + } + if (PrevEntry) { + PrevEntry->next_entry = NextEntry; + } else { + assert(__jit_debug_descriptor.first_entry == JITCodeEntry); + __jit_debug_descriptor.first_entry = NextEntry; + } + + // Tell GDB which entry we removed, and unregister the code. + __jit_debug_descriptor.relevant_entry = JITCodeEntry; + __jit_debug_register_code(); + } + + // Free the ELF file in memory. + std::string &Buffer = I->second.first; + Buffer.clear(); +} + +void JITDebugRegisterer::UnregisterFunction(const Function *F) { + // TODO: Support non-ELF platforms. + if (!TM.getELFWriterInfo()) + return; + + RegisteredFunctionsMap::iterator I = FnMap.find(F); + if (I == FnMap.end()) return; + UnregisterFunctionInternal(I); + FnMap.erase(I); +} + +} // end namespace llvm diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h new file mode 100644 index 0000000000000..dce506bbfefd9 --- /dev/null +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h @@ -0,0 +1,116 @@ +//===-- JITDebugRegisterer.h - Register debug symbols for JIT -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a JITDebugRegisterer object that is used by the JIT to +// register debug info with debuggers like GDB. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H +#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/DataTypes.h" +#include + +// This must be kept in sync with gdb/gdb/jit.h . +extern "C" { + + typedef enum { + JIT_NOACTION = 0, + JIT_REGISTER_FN, + JIT_UNREGISTER_FN + } jit_actions_t; + + struct jit_code_entry { + struct jit_code_entry *next_entry; + struct jit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; + }; + + struct jit_descriptor { + uint32_t version; + // This should be jit_actions_t, but we want to be specific about the + // bit-width. + uint32_t action_flag; + struct jit_code_entry *relevant_entry; + struct jit_code_entry *first_entry; + }; + +} + +namespace llvm { + +class ELFSection; +class Function; +class TargetMachine; + + +/// This class encapsulates information we want to send to the debugger. +/// +struct DebugInfo { + uint8_t *FnStart; + uint8_t *FnEnd; + uint8_t *EhStart; + uint8_t *EhEnd; + + DebugInfo() : FnStart(0), FnEnd(0), EhStart(0), EhEnd(0) {} +}; + +typedef DenseMap< const Function*, std::pair > + RegisteredFunctionsMap; + +/// This class registers debug info for JITed code with an attached debugger. +/// Without proper debug info, GDB can't do things like source level debugging +/// or even produce a proper stack trace on linux-x86_64. To use this class, +/// whenever a function is JITed, create a DebugInfo struct and pass it to the +/// RegisterFunction method. The method will then do whatever is necessary to +/// inform the debugger about the JITed function. +class JITDebugRegisterer { + + TargetMachine &TM; + + /// FnMap - A map of functions that have been registered to the associated + /// temporary files. Used for cleanup. + RegisteredFunctionsMap FnMap; + + /// MakeELF - Builds the ELF file in memory and returns a std::string that + /// contains the ELF. + std::string MakeELF(const Function *F, DebugInfo &I); + +public: + JITDebugRegisterer(TargetMachine &tm); + + /// ~JITDebugRegisterer - Unregisters all code and frees symbol files. + /// + ~JITDebugRegisterer(); + + /// RegisterFunction - Register debug info for the given function with an + /// attached debugger. Clients must call UnregisterFunction on all + /// registered functions before deleting them to free the associated symbol + /// file and unregister it from the debugger. + void RegisterFunction(const Function *F, DebugInfo &I); + + /// UnregisterFunction - Unregister the debug info for the given function + /// from the debugger and free associated memory. + void UnregisterFunction(const Function *F); + +private: + /// UnregisterFunctionInternal - Unregister the debug info for the given + /// function from the debugger and delete any temporary files. The private + /// version of this method does not remove the function from FnMap so that it + /// can be called while iterating over FnMap. + void UnregisterFunctionInternal(RegisteredFunctionsMap::iterator I); + +}; + +} // end namespace llvm + +#endif // LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index e101ef371ed04..f2b28ad326e53 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -21,25 +21,27 @@ #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" - using namespace llvm; -JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : Jit(theJit) {} +JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {} unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, JITCodeEmitter& jce, unsigned char* StartFunction, - unsigned char* EndFunction) { + unsigned char* EndFunction, + unsigned char* &EHFramePtr) { + assert(MMI && "MachineModuleInfo not registered!"); + const TargetMachine& TM = F.getTarget(); TD = TM.getTargetData(); - needsIndirectEncoding = TM.getTargetAsmInfo()->getNeedsIndirectEncoding(); stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection(); RI = TM.getRegisterInfo(); JCE = &jce; @@ -48,14 +50,13 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, EndFunction); unsigned char* Result = 0; - unsigned char* EHFramePtr = 0; const std::vector Personalities = MMI->getPersonalities(); EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]); Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr, StartFunction, EndFunction, ExceptionTable); - + return Result; } @@ -106,11 +107,9 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true)); } - int Offset = -Src.getOffset(); - - JCE->emitULEB128Bytes(Offset); + JCE->emitULEB128Bytes(-Src.getOffset()); } else { - assert(0 && "Machine move no supported yet."); + llvm_unreachable("Machine move not supported yet."); } } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { @@ -118,7 +117,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, JCE->emitByte(dwarf::DW_CFA_def_cfa_register); JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true)); } else { - assert(0 && "Machine move no supported yet."); + llvm_unreachable("Machine move not supported yet."); } } else { unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true); @@ -209,6 +208,8 @@ struct CallSiteEntry { unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, unsigned char* StartFunction, unsigned char* EndFunction) const { + assert(MMI && "MachineModuleInfo not registered!"); + // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); @@ -241,7 +242,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, for(std::vector::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { FilterOffsets.push_back(Offset); - Offset -= TargetAsmInfo::getULEB128Size(*I); + Offset -= MCAsmInfo::getULEB128Size(*I); } // Compute the actions table and gather the first action index for each @@ -266,10 +267,10 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); assert(Actions.size()); PrevAction = &Actions.back(); - SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) + - TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) + + MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { - SizeAction -= TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); SizeAction += -PrevAction->NextAction; PrevAction = PrevAction->Previous; } @@ -280,10 +281,10 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, int TypeID = TypeIds[I]; assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; - unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID); + unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction); + SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); SizeSiteActions += SizeAction; ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; @@ -386,29 +387,19 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, sizeof(int32_t) + // Site length. sizeof(int32_t)); // Landing pad. for (unsigned i = 0, e = CallSites.size(); i < e; ++i) - SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action); + SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(); unsigned TypeOffset = sizeof(int8_t) + // Call site format // Call-site table length - TargetAsmInfo::getULEB128Size(SizeSites) + + MCAsmInfo::getULEB128Size(SizeSites) + SizeSites + SizeActions + SizeTypes; - unsigned TotalSize = sizeof(int8_t) + // LPStart format - sizeof(int8_t) + // TType format - TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset - TypeOffset; - - unsigned SizeAlign = (4 - TotalSize) & 3; - // Begin the exception table. - JCE->emitAlignment(4); - for (unsigned i = 0; i != SizeAlign; ++i) { - JCE->emitByte(0); - // Asm->EOL("Padding"); - } - + JCE->emitAlignmentWithFill(4, 0); + // Asm->EOL("Padding"); + unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue(); // Emit the header. @@ -475,11 +466,10 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, GlobalVariable *GV = TypeInfos[M - 1]; if (GV) { - if (TD->getPointerSize() == sizeof(int32_t)) { + if (TD->getPointerSize() == sizeof(int32_t)) JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV)); - } else { + else JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV)); - } } else { if (TD->getPointerSize() == sizeof(int32_t)) JCE->emitInt32(0); @@ -495,8 +485,8 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, JCE->emitULEB128Bytes(TypeID); //Asm->EOL("Filter TypeInfo index"); } - - JCE->emitAlignment(4); + + JCE->emitAlignmentWithFill(4, 0); return DwarfExceptionTable; } @@ -517,7 +507,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { JCE->emitULEB128Bytes(1); JCE->emitSLEB128Bytes(stackGrowth); JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true)); - + if (Personality) { // Augmentation Size: 3 small ULEBs of one byte each, and the personality // function which size is PointerSize. @@ -533,10 +523,9 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { JCE->emitByte(dwarf::DW_EH_PE_sdata8); JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality))); } - + JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - } else { JCE->emitULEB128Bytes(1); JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); @@ -545,11 +534,12 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { std::vector Moves; RI->getInitialFrameState(Moves); EmitFrameMoves(0, Moves); - JCE->emitAlignment(PointerSize); - - JCE->emitInt32At((uintptr_t*)StartCommonPtr, - (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - - FrameCommonBeginPtr)); + + JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop); + + JCE->emitInt32At((uintptr_t*)StartCommonPtr, + (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - + FrameCommonBeginPtr)); return StartCommonPtr; } @@ -574,13 +564,19 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, // If there is a personality and landing pads then point to the language // specific data area in the exception table. - if (MMI->getPersonalityIndex()) { - JCE->emitULEB128Bytes(4); + if (Personality) { + JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8); - if (!MMI->getLandingPads().empty()) { - JCE->emitInt32(ExceptionTable - (unsigned char*)JCE->getCurrentPCValue()); + if (PointerSize == 4) { + if (!MMI->getLandingPads().empty()) + JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue()); + else + JCE->emitInt32((int)0); } else { - JCE->emitInt32((int)0); + if (!MMI->getLandingPads().empty()) + JCE->emitInt64(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue()); + else + JCE->emitInt64((int)0); } } else { JCE->emitULEB128Bytes(0); @@ -589,14 +585,14 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, // Indicate locations of function specific callee saved registers in // frame. EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves()); - - JCE->emitAlignment(PointerSize); - + + JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop); + // Indicate the size of the table - JCE->emitInt32At((uintptr_t*)StartEHPtr, - (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - - StartEHPtr)); - + JCE->emitInt32At((uintptr_t*)StartEHPtr, + (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - + StartEHPtr)); + // Double zeroes for the unwind runtime if (PointerSize == 8) { JCE->emitInt64(0); @@ -605,7 +601,6 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, JCE->emitInt32(0); JCE->emitInt32(0); } - return StartEHPtr; } @@ -616,7 +611,6 @@ unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F, unsigned char* EndFunction) { const TargetMachine& TM = F.getTarget(); TD = TM.getTargetData(); - needsIndirectEncoding = TM.getTargetAsmInfo()->getNeedsIndirectEncoding(); stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection(); RI = TM.getRegisterInfo(); JCE = &jce; @@ -630,7 +624,7 @@ unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F, FinalSize += GetEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()], StartFunction); - + return FinalSize; } @@ -653,11 +647,11 @@ JITDwarfEmitter::GetEHFrameSizeInBytes(const Function* Personality, FinalSize += 3 * PointerSize; // If there is a personality and landing pads then point to the language // specific data area in the exception table. - if (MMI->getPersonalityIndex()) { - FinalSize += TargetAsmInfo::getULEB128Size(4); + if (Personality) { + FinalSize += MCAsmInfo::getULEB128Size(4); FinalSize += PointerSize; } else { - FinalSize += TargetAsmInfo::getULEB128Size(0); + FinalSize += MCAsmInfo::getULEB128Size(0); } // Indicate locations of function specific callee saved registers in @@ -685,24 +679,24 @@ unsigned JITDwarfEmitter::GetCommonEHFrameSizeInBytes(const Function* Personalit FinalSize += 4; FinalSize += 1; FinalSize += Personality ? 5 : 3; // "zPLR" or "zR" - FinalSize += TargetAsmInfo::getULEB128Size(1); - FinalSize += TargetAsmInfo::getSLEB128Size(stackGrowth); + FinalSize += MCAsmInfo::getULEB128Size(1); + FinalSize += MCAsmInfo::getSLEB128Size(stackGrowth); FinalSize += 1; if (Personality) { - FinalSize += TargetAsmInfo::getULEB128Size(7); + FinalSize += MCAsmInfo::getULEB128Size(7); // Encoding FinalSize+= 1; //Personality FinalSize += PointerSize; - FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); - FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); + FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); + FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); } else { - FinalSize += TargetAsmInfo::getULEB128Size(1); - FinalSize += TargetAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); + FinalSize += MCAsmInfo::getULEB128Size(1); + FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel); } std::vector Moves; @@ -754,23 +748,23 @@ JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr, } else { ++FinalSize; unsigned RegNum = RI->getDwarfRegNum(Src.getReg(), true); - FinalSize += TargetAsmInfo::getULEB128Size(RegNum); + FinalSize += MCAsmInfo::getULEB128Size(RegNum); } int Offset = -Src.getOffset(); - FinalSize += TargetAsmInfo::getULEB128Size(Offset); + FinalSize += MCAsmInfo::getULEB128Size(Offset); } else { - assert(0 && "Machine move no supported yet."); + llvm_unreachable("Machine move no supported yet."); } } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { if (Dst.isReg()) { ++FinalSize; unsigned RegNum = RI->getDwarfRegNum(Dst.getReg(), true); - FinalSize += TargetAsmInfo::getULEB128Size(RegNum); + FinalSize += MCAsmInfo::getULEB128Size(RegNum); } else { - assert(0 && "Machine move no supported yet."); + llvm_unreachable("Machine move no supported yet."); } } else { unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true); @@ -778,15 +772,15 @@ JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr, if (Offset < 0) { ++FinalSize; - FinalSize += TargetAsmInfo::getULEB128Size(Reg); - FinalSize += TargetAsmInfo::getSLEB128Size(Offset); + FinalSize += MCAsmInfo::getULEB128Size(Reg); + FinalSize += MCAsmInfo::getSLEB128Size(Offset); } else if (Reg < 64) { ++FinalSize; - FinalSize += TargetAsmInfo::getULEB128Size(Offset); + FinalSize += MCAsmInfo::getULEB128Size(Offset); } else { ++FinalSize; - FinalSize += TargetAsmInfo::getULEB128Size(Reg); - FinalSize += TargetAsmInfo::getULEB128Size(Offset); + FinalSize += MCAsmInfo::getULEB128Size(Reg); + FinalSize += MCAsmInfo::getULEB128Size(Offset); } } } @@ -829,7 +823,7 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { for(std::vector::const_iterator I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { FilterOffsets.push_back(Offset); - Offset -= TargetAsmInfo::getULEB128Size(*I); + Offset -= MCAsmInfo::getULEB128Size(*I); } // Compute the actions table and gather the first action index for each @@ -854,10 +848,10 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); assert(Actions.size()); PrevAction = &Actions.back(); - SizeAction = TargetAsmInfo::getSLEB128Size(PrevAction->NextAction) + - TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) + + MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); for (unsigned j = NumShared; j != SizePrevIds; ++j) { - SizeAction -= TargetAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); + SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); SizeAction += -PrevAction->NextAction; PrevAction = PrevAction->Previous; } @@ -868,10 +862,10 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { int TypeID = TypeIds[I]; assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; - unsigned SizeTypeID = TargetAsmInfo::getSLEB128Size(ValueForTypeID); + unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + TargetAsmInfo::getSLEB128Size(NextAction); + SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); SizeSiteActions += SizeAction; ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; @@ -974,18 +968,18 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { sizeof(int32_t) + // Site length. sizeof(int32_t)); // Landing pad. for (unsigned i = 0, e = CallSites.size(); i < e; ++i) - SizeSites += TargetAsmInfo::getULEB128Size(CallSites[i].Action); + SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(); unsigned TypeOffset = sizeof(int8_t) + // Call site format // Call-site table length - TargetAsmInfo::getULEB128Size(SizeSites) + + MCAsmInfo::getULEB128Size(SizeSites) + SizeSites + SizeActions + SizeTypes; unsigned TotalSize = sizeof(int8_t) + // LPStart format sizeof(int8_t) + // TType format - TargetAsmInfo::getULEB128Size(TypeOffset) + // TType base offset + MCAsmInfo::getULEB128Size(TypeOffset) + // TType base offset TypeOffset; unsigned SizeAlign = (4 - TotalSize) & 3; @@ -1023,7 +1017,7 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { // Asm->EOL("Landing pad"); FinalSize += PointerSize; - FinalSize += TargetAsmInfo::getULEB128Size(S.Action); + FinalSize += MCAsmInfo::getULEB128Size(S.Action); // Asm->EOL("Action"); } @@ -1032,9 +1026,9 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { ActionEntry &Action = Actions[I]; //Asm->EOL("TypeInfo index"); - FinalSize += TargetAsmInfo::getSLEB128Size(Action.ValueForTypeID); + FinalSize += MCAsmInfo::getSLEB128Size(Action.ValueForTypeID); //Asm->EOL("Next action"); - FinalSize += TargetAsmInfo::getSLEB128Size(Action.NextAction); + FinalSize += MCAsmInfo::getSLEB128Size(Action.NextAction); } // Emit the type ids. @@ -1046,7 +1040,7 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const { // Emit the filter typeids. for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { unsigned TypeID = FilterIds[j]; - FinalSize += TargetAsmInfo::getULEB128Size(TypeID); + FinalSize += MCAsmInfo::getULEB128Size(TypeID); //Asm->EOL("Filter TypeInfo index"); } diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h index 9120ed44e6a60..e627550d6d0e6 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h @@ -32,7 +32,6 @@ class JITDwarfEmitter { const TargetRegisterInfo* RI; MachineModuleInfo* MMI; JIT& Jit; - bool needsIndirectEncoding; bool stackGrowthDirection; unsigned char* EmitExceptionTable(MachineFunction* MF, @@ -68,7 +67,8 @@ public: unsigned char* EmitDwarfTable(MachineFunction& F, JITCodeEmitter& JCE, unsigned char* StartFunction, - unsigned char* EndFunction); + unsigned char* EndFunction, + unsigned char* &EHFramePtr); unsigned GetDwarfTableSizeInBytes(MachineFunction& F, diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 8fe7ab848b736..eacd9f9720586 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -14,7 +14,9 @@ #define DEBUG_TYPE "jit" #include "JIT.h" +#include "JITDebugRegisterer.h" #include "JITDwarfEmitter.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/DerivedTypes.h" @@ -33,8 +35,10 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MutexGuard.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Disassembler.h" #include "llvm/System/Memory.h" #include "llvm/Target/TargetInstrInfo.h" @@ -49,6 +53,7 @@ using namespace llvm; STATISTIC(NumBytes, "Number of bytes of machine code compiled"); STATISTIC(NumRelos, "Number of relocations applied"); +STATISTIC(NumRetries, "Number of retries with more memory"); static JIT *TheJIT = 0; @@ -59,7 +64,7 @@ namespace { class JITResolverState { public: typedef std::map, void*> FunctionToStubMapTy; - typedef std::map StubToFunctionMapTy; + typedef std::map > StubToFunctionMapTy; typedef std::map, void*> GlobalToIndirectSymMapTy; private: /// FunctionToStubMap - Keep track of the stub created for a particular @@ -193,9 +198,9 @@ void *JITResolver::getFunctionStub(Function *F) { // Call the lazy resolver function unless we are JIT'ing non-lazily, in which // case we must resolve the symbol now. - void *Actual = TheJIT->isLazyCompilationDisabled() + void *Actual = TheJIT->isLazyCompilationDisabled() ? (void *)0 : (void *)(intptr_t)LazyResolverFn; - + // If this is an external declaration, attempt to resolve the address now // to place in the stub. if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode()) { @@ -220,20 +225,20 @@ void *JITResolver::getFunctionStub(Function *F) { TheJIT->updateGlobalMapping(F, Stub); } - DOUT << "JIT: Stub emitted at [" << Stub << "] for function '" - << F->getName() << "'\n"; + DEBUG(errs() << "JIT: Stub emitted at [" << Stub << "] for function '" + << F->getName() << "'\n"); // Finally, keep track of the stub-to-Function mapping so that the // JITCompilerFn knows which function to compile! state.getStubToFunctionMap(locked)[Stub] = F; - + // If we are JIT'ing non-lazily but need to call a function that does not // exist yet, add it to the JIT's work list so that we can fill in the stub // address later. if (!Actual && TheJIT->isLazyCompilationDisabled()) if (!F->isDeclaration() || F->hasNotBeenReadFromBitcode()) TheJIT->addPendingFunction(F); - + return Stub; } @@ -250,8 +255,8 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress, *TheJIT->getCodeEmitter()); - DOUT << "JIT: Indirect symbol emitted at [" << IndirectSym << "] for GV '" - << GV->getName() << "'\n"; + DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym + << "] for GV '" << GV->getName() << "'\n"); return IndirectSym; } @@ -266,8 +271,8 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) { Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, *TheJIT->getCodeEmitter()); - DOUT << "JIT: Stub emitted at [" << Stub - << "] for external function at '" << FnAddr << "'\n"; + DEBUG(errs() << "JIT: Stub emitted at [" << Stub + << "] for external function at '" << FnAddr << "'\n"); return Stub; } @@ -276,7 +281,8 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) { if (!idx) { idx = ++nextGOTIndex; revGOTMap[addr] = idx; - DOUT << "JIT: Adding GOT entry " << idx << " for addr [" << addr << "]\n"; + DEBUG(errs() << "JIT: Adding GOT entry " << idx << " for addr [" + << addr << "]\n"); } return idx; } @@ -373,9 +379,8 @@ void *JITResolver::JITCompilerFn(void *Stub) { // If lazy compilation is disabled, emit a useful error message and abort. if (TheJIT->isLazyCompilationDisabled()) { - cerr << "LLVM JIT requested to do lazy compilation of function '" - << F->getName() << "' when lazy compiles are disabled!\n"; - abort(); + llvm_report_error("LLVM JIT requested to do lazy compilation of function '" + + F->getName() + "' when lazy compiles are disabled!"); } // We might like to remove the stub from the StubToFunction map. @@ -385,9 +390,9 @@ void *JITResolver::JITCompilerFn(void *Stub) { // it needs to call. //JR.state.getStubToFunctionMap(locked).erase(I); - DOUT << "JIT: Lazily resolving function '" << F->getName() - << "' In stub ptr = " << Stub << " actual ptr = " - << ActualPtr << "\n"; + DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName() + << "' In stub ptr = " << Stub << " actual ptr = " + << ActualPtr << "\n"); Result = TheJIT->getPointerToFunction(F); } @@ -424,6 +429,12 @@ namespace { // save BufferBegin/BufferEnd/CurBufferPtr here. uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; + // When reattempting to JIT a function after running out of space, we store + // the estimated size of the function we're trying to JIT here, so we can + // ask the memory manager for at least this much space. When we + // successfully emit the function, we reset this back to zero. + uintptr_t SizeEstimate; + /// Relocations - These are the relocations that the function needs, as /// emitted. std::vector Relocations; @@ -455,9 +466,12 @@ namespace { /// Resolver - This contains info about the currently resolved functions. JITResolver Resolver; - + /// DE - The dwarf emitter for the jit. - JITDwarfEmitter *DE; + OwningPtr DE; + + /// DR - The debug registerer for the jit. + OwningPtr DR; /// LabelLocations - This vector is a mapping from Label ID's to their /// address. @@ -472,7 +486,12 @@ namespace { // CurFn - The llvm function being emitted. Only valid during // finishFunction(). const Function *CurFn; - + + /// Information about emitted code, which is passed to the + /// JITEventListeners. This is reset in startFunction and used in + /// finishFunction. + JITEvent_EmittedFunctionDetails EmissionDetails; + // CurFnStubUses - For a given Function, a vector of stubs that it // references. This facilitates the JIT detecting that a stub is no // longer used, so that it may be deallocated. @@ -487,19 +506,26 @@ namespace { // in the JITResolver's ExternalFnToStubMap. StringMap ExtFnStubs; + DebugLocTuple PrevDLT; + public: - JITEmitter(JIT &jit, JITMemoryManager *JMM) : Resolver(jit), CurFn(0) { + JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) + : SizeEstimate(0), Resolver(jit), MMI(0), CurFn(0) { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); - DOUT << "JIT is managing a GOT\n"; + DEBUG(errs() << "JIT is managing a GOT\n"); } - if (ExceptionHandling) DE = new JITDwarfEmitter(jit); + if (DwarfExceptionHandling || JITEmitDebugInfo) { + DE.reset(new JITDwarfEmitter(jit)); + } + if (JITEmitDebugInfo) { + DR.reset(new JITDebugRegisterer(TM)); + } } ~JITEmitter() { delete MemMgr; - if (ExceptionHandling) delete DE; } /// classof - Methods for support type inquiry through isa, cast, and @@ -527,6 +553,11 @@ namespace { /// allocate a new one of the given size. virtual void *allocateSpace(uintptr_t Size, unsigned Alignment); + /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, + /// this method does not allocate memory in the current output buffer, + /// because a global may live longer than the current function. + virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment); + virtual void addRelocation(const MachineRelocation &MR) { Relocations.push_back(MR); } @@ -535,8 +566,8 @@ namespace { if (MBBLocations.size() <= (unsigned)MBB->getNumber()) MBBLocations.resize((MBB->getNumber()+1)*2); MBBLocations[MBB->getNumber()] = getCurrentPCValue(); - DOUT << "JIT: Emitting BB" << MBB->getNumber() << " at [" - << (void*) getCurrentPCValue() << "]\n"; + DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" + << (void*) getCurrentPCValue() << "]\n"); } virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const; @@ -548,9 +579,14 @@ namespace { return MBBLocations[MBB->getNumber()]; } + /// retryWithMoreMemory - Log a retry and deallocate all memory for the + /// given function. Increase the minimum allocation size so that we get + /// more memory next time. + void retryWithMoreMemory(MachineFunction &F); + /// deallocateMemForFunction - Deallocate all memory for the specified /// function body. - void deallocateMemForFunction(Function *F); + void deallocateMemForFunction(const Function *F); /// AddStubToCurrentFunction - Mark the current function being JIT'd as /// using the stub at the specified address. Allows @@ -561,6 +597,8 @@ namespace { /// MachineRelocations that reference external functions by name. const StringMap &getExternalFnStubs() const { return ExtFnStubs; } + virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn); + virtual void emitLabel(uint64_t LabelID) { if (LabelLocations.size() <= LabelID) LabelLocations.resize((LabelID+1)*2); @@ -575,14 +613,14 @@ namespace { virtual void setModuleInfo(MachineModuleInfo* Info) { MMI = Info; - if (ExceptionHandling) DE->setModuleInfo(Info); + if (DE.get()) DE->setModuleInfo(Info); } - void setMemoryExecutable(void) { + void setMemoryExecutable() { MemMgr->setMemoryExecutable(); } - JITMemoryManager *getMemMgr(void) const { return MemMgr; } + JITMemoryManager *getMemMgr() const { return MemMgr; } private: void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub); @@ -606,7 +644,7 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, // If we have already compiled the function, return a pointer to its body. Function *F = cast(V); void *ResultPtr; - if (!DoesntNeedStub && !TheJIT->isLazyCompilationDisabled()) { + if (!DoesntNeedStub) { // Return the function stub if it's already created. ResultPtr = Resolver.getFunctionStubIfAvailable(F); if (ResultPtr) @@ -658,11 +696,8 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference, } void JITEmitter::AddStubToCurrentFunction(void *StubAddr) { - if (!TheJIT->areDlsymStubsEnabled()) - return; - assert(CurFn && "Stub added to current function, but current function is 0!"); - + SmallVectorImpl &StubsUsed = CurFnStubUses[CurFn]; StubsUsed.push_back(StubAddr); @@ -670,6 +705,23 @@ void JITEmitter::AddStubToCurrentFunction(void *StubAddr) { FnRefs.insert(CurFn); } +void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { + if (!DL.isUnknown()) { + DebugLocTuple CurDLT = EmissionDetails.MF->getDebugLocTuple(DL); + + if (BeforePrintingInsn) { + if (CurDLT.Scope != 0 && PrevDLT != CurDLT) { + JITEvent_EmittedFunctionDetails::LineStart NextLine; + NextLine.Address = getCurrentPCValue(); + NextLine.Loc = DL; + EmissionDetails.LineStarts.push_back(NextLine); + } + + PrevDLT = CurDLT; + } + } +} + static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, const TargetData *TD) { const std::vector &Constants = MCP->getConstants(); @@ -713,7 +765,7 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) { size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy); size_t GVAlign = (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV); - DOUT << "JIT: Adding in size " << GVSize << " alignment " << GVAlign; + DEBUG(errs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign); DEBUG(GV->dump()); // Assume code section ends with worst possible alignment, so first // variable needs maximal padding. @@ -772,8 +824,10 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, break; } default: { - cerr << "ConstantExpr not handled: " << *CE << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "ConstantExpr not handled: " << *CE; + llvm_report_error(Msg.str()); } } } @@ -839,7 +893,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { } } } - DOUT << "JIT: About to look through initializers\n"; + DEBUG(errs() << "JIT: About to look through initializers\n"); // Look for more globals that are referenced only from initializers. // GVSet.end is computed each time because the set can grow as we go. for (SmallPtrSet::iterator I = GVSet.begin(); @@ -853,14 +907,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) { } void JITEmitter::startFunction(MachineFunction &F) { - DOUT << "JIT: Starting CodeGen of Function " - << F.getFunction()->getName() << "\n"; + DEBUG(errs() << "JIT: Starting CodeGen of Function " + << F.getFunction()->getName() << "\n"); uintptr_t ActualSize = 0; // Set the memory writable, if it's not already MemMgr->setMemoryWritable(); if (MemMgr->NeedsExactSize()) { - DOUT << "JIT: ExactSize\n"; + DEBUG(errs() << "JIT: ExactSize\n"); const TargetInstrInfo* TII = F.getTarget().getInstrInfo(); MachineJumpTableInfo *MJTI = F.getJumpTableInfo(); MachineConstantPool *MCP = F.getConstantPool(); @@ -887,12 +941,15 @@ void JITEmitter::startFunction(MachineFunction &F) { // Add the function size ActualSize += TII->GetFunctionSizeInBytes(F); - DOUT << "JIT: ActualSize before globals " << ActualSize << "\n"; + DEBUG(errs() << "JIT: ActualSize before globals " << ActualSize << "\n"); // Add the size of the globals that will be allocated after this function. // These are all the ones referenced from this function that were not // previously allocated. ActualSize += GetSizeOfGlobalsInBytes(F); - DOUT << "JIT: ActualSize after globals " << ActualSize << "\n"; + DEBUG(errs() << "JIT: ActualSize after globals " << ActualSize << "\n"); + } else if (SizeEstimate > 0) { + // SizeEstimate will be non-zero on reallocation attempts. + ActualSize = SizeEstimate; } BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(), @@ -910,17 +967,22 @@ void JITEmitter::startFunction(MachineFunction &F) { TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr); MBBLocations.clear(); + + EmissionDetails.MF = &F; + EmissionDetails.LineStarts.clear(); } bool JITEmitter::finishFunction(MachineFunction &F) { if (CurBufferPtr == BufferEnd) { - // FIXME: Allocate more space, then try again. - cerr << "JIT: Ran out of space for generated machine code!\n"; - abort(); + // We must call endFunctionBody before retrying, because + // deallocateMemForFunction requires it. + MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); + retryWithMoreMemory(F); + return true; } - + emitJumpTableInfo(F.getJumpTableInfo()); - + // FnStart is the start of the text, not the start of the constant pool and // other per-function data. uint8_t *FnStart = @@ -941,8 +1003,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (MR.isExternalSymbol()) { ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), false); - DOUT << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" - << ResultPtr << "]\n"; + DEBUG(errs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" + << ResultPtr << "]\n"); // If the target REALLY wants a stub for this function, emit it now. if (!MR.doesntNeedStub()) { @@ -983,9 +1045,9 @@ bool JITEmitter::finishFunction(MachineFunction &F) { unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr); MR.setGOTIndex(idx); if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) { - DOUT << "JIT: GOT was out of date for " << ResultPtr - << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] - << "\n"; + DEBUG(errs() << "JIT: GOT was out of date for " << ResultPtr + << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] + << "\n"); ((void**)MemMgr->getGOTBase())[idx] = ResultPtr; } } @@ -1000,8 +1062,9 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (MemMgr->isManagingGOT()) { unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin); if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) { - DOUT << "JIT: GOT was out of date for " << (void*)BufferBegin - << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] << "\n"; + DEBUG(errs() << "JIT: GOT was out of date for " << (void*)BufferBegin + << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] + << "\n"); ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin; } } @@ -1011,9 +1074,12 @@ bool JITEmitter::finishFunction(MachineFunction &F) { MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); if (CurBufferPtr == BufferEnd) { - // FIXME: Allocate more space, then try again. - cerr << "JIT: Ran out of space for generated machine code!\n"; - abort(); + retryWithMoreMemory(F); + return true; + } else { + // Now that we've succeeded in emitting the function, reset the + // SizeEstimate back down to zero. + SizeEstimate = 0; } BufferBegin = CurBufferPtr = 0; @@ -1022,14 +1088,13 @@ bool JITEmitter::finishFunction(MachineFunction &F) { // Invalidate the icache if necessary. sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart); - JITEvent_EmittedFunctionDetails Details; TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart, - Details); + EmissionDetails); - DOUT << "JIT: Finished CodeGen of [" << (void*)FnStart - << "] Function: " << F.getFunction()->getName() - << ": " << (FnEnd-FnStart) << " bytes of text, " - << Relocations.size() << " relocations\n"; + DEBUG(errs() << "JIT: Finished CodeGen of [" << (void*)FnStart + << "] Function: " << F.getFunction()->getName() + << ": " << (FnEnd-FnStart) << " bytes of text, " + << Relocations.size() << " relocations\n"); Relocations.clear(); ConstPoolAddresses.clear(); @@ -1037,45 +1102,42 @@ bool JITEmitter::finishFunction(MachineFunction &F) { // Mark code region readable and executable if it's not so already. MemMgr->setMemoryExecutable(); -#ifndef NDEBUG - { + DEBUG( if (sys::hasDisassembler()) { - DOUT << "JIT: Disassembled code:\n"; - DOUT << sys::disassembleBuffer(FnStart, FnEnd-FnStart, (uintptr_t)FnStart); + errs() << "JIT: Disassembled code:\n"; + errs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart, + (uintptr_t)FnStart); } else { - DOUT << "JIT: Binary code:\n"; - DOUT << std::hex; + errs() << "JIT: Binary code:\n"; uint8_t* q = FnStart; for (int i = 0; q < FnEnd; q += 4, ++i) { if (i == 4) i = 0; if (i == 0) - DOUT << "JIT: " << std::setw(8) << std::setfill('0') - << (long)(q - FnStart) << ": "; + errs() << "JIT: " << (long)(q - FnStart) << ": "; bool Done = false; for (int j = 3; j >= 0; --j) { if (q + j >= FnEnd) Done = true; else - DOUT << std::setw(2) << std::setfill('0') << (unsigned short)q[j]; + errs() << (unsigned short)q[j]; } if (Done) break; - DOUT << ' '; + errs() << ' '; if (i == 3) - DOUT << '\n'; + errs() << '\n'; } - DOUT << std::dec; - DOUT<< '\n'; + errs()<< '\n'; } - } -#endif - if (ExceptionHandling) { + ); + + if (DwarfExceptionHandling || JITEmitDebugInfo) { uintptr_t ActualSize = 0; SavedBufferBegin = BufferBegin; SavedBufferEnd = BufferEnd; SavedCurBufferPtr = CurBufferPtr; - + if (MemMgr->NeedsExactSize()) { ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd); } @@ -1083,14 +1145,28 @@ bool JITEmitter::finishFunction(MachineFunction &F) { BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), ActualSize); BufferEnd = BufferBegin+ActualSize; - uint8_t* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd); + uint8_t *EhStart; + uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, + EhStart); MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, FrameRegister); + uint8_t *EhEnd = CurBufferPtr; BufferBegin = SavedBufferBegin; BufferEnd = SavedBufferEnd; CurBufferPtr = SavedCurBufferPtr; - TheJIT->RegisterTable(FrameRegister); + if (DwarfExceptionHandling) { + TheJIT->RegisterTable(FrameRegister); + } + + if (JITEmitDebugInfo) { + DebugInfo I; + I.FnStart = FnStart; + I.FnEnd = FnEnd; + I.EhStart = EhStart; + I.EhEnd = EhEnd; + DR->RegisterFunction(F.getFunction(), I); + } } if (MMI) @@ -1099,11 +1175,28 @@ bool JITEmitter::finishFunction(MachineFunction &F) { return false; } +void JITEmitter::retryWithMoreMemory(MachineFunction &F) { + DEBUG(errs() << "JIT: Ran out of space for native code. Reattempting.\n"); + Relocations.clear(); // Clear the old relocations or we'll reapply them. + ConstPoolAddresses.clear(); + ++NumRetries; + deallocateMemForFunction(F.getFunction()); + // Try again with at least twice as much free space. + SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin)); +} + /// deallocateMemForFunction - Deallocate all memory for the specified /// function body. Also drop any references the function has to stubs. -void JITEmitter::deallocateMemForFunction(Function *F) { +void JITEmitter::deallocateMemForFunction(const Function *F) { MemMgr->deallocateMemForFunction(F); + // TODO: Do we need to unregister exception handling information from libgcc + // here? + + if (JITEmitDebugInfo) { + DR->UnregisterFunction(F); + } + // If the function did not reference any stubs, return. if (CurFnStubUses.find(F) == CurFnStubUses.end()) return; @@ -1125,7 +1218,7 @@ void JITEmitter::deallocateMemForFunction(Function *F) { // in the JITResolver. Were there a memory manager deallocateStub routine, // we could call that at this point too. if (FnRefs.empty()) { - DOUT << "\nJIT: Invalidated Stub at [" << Stub << "]\n"; + DEBUG(errs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n"); StubFnRefs.erase(Stub); // Invalidate the stub. If it is a GV stub, update the JIT's global @@ -1161,6 +1254,11 @@ void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { return CurBufferPtr; } +void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) { + // Delegate this call through the memory manager. + return MemMgr->allocateGlobal(Size, Alignment); +} + void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { if (TheJIT->getJITInfo().hasCustomConstantPool()) return; @@ -1175,8 +1273,8 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { if (ConstantPoolBase == 0) return; // Buffer overflow. - DOUT << "JIT: Emitted constant pool at [" << ConstantPoolBase - << "] (size: " << Size << ", alignment: " << Align << ")\n"; + DEBUG(errs() << "JIT: Emitted constant pool at [" << ConstantPoolBase + << "] (size: " << Size << ", alignment: " << Align << ")\n"); // Initialize the memory for all of the constant pool entries. unsigned Offset = 0; @@ -1189,13 +1287,12 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { ConstPoolAddresses.push_back(CAddr); if (CPE.isMachineConstantPoolEntry()) { // FIXME: add support to lower machine constant pool values into bytes! - cerr << "Initialize memory with machine specific constant pool entry" - << " has not been implemented!\n"; - abort(); + llvm_report_error("Initialize memory with machine specific constant pool" + "entry has not been implemented!"); } TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr); - DOUT << "JIT: CP" << i << " at [0x" - << std::hex << CAddr << std::dec << "]\n"; + DEBUG(errs() << "JIT: CP" << i << " at [0x"; + errs().write_hex(CAddr) << "]\n"); const Type *Ty = CPE.Val.ConstVal->getType(); Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty); @@ -1322,8 +1419,9 @@ uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const { // Public interface to this file //===----------------------------------------------------------------------===// -JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM) { - return new JITEmitter(jit, JMM); +JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM, + TargetMachine &tm) { + return new JITEmitter(jit, JMM, tm); } // getPointerToNamedFunction - This function is used as a global wrapper to @@ -1396,7 +1494,7 @@ void JIT::updateDlsymStubTable() { SmallVector Offsets; for (unsigned i = 0; i != GVs.size(); ++i) { Offsets.push_back(offset); - offset += GVs[i]->getName().length() + 1; + offset += GVs[i]->getName().size() + 1; } for (StringMapConstIterator i = ExtFns.begin(), e = ExtFns.end(); i != e; ++i) { diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 70ccdccb8049c..474843f066247 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -11,9 +11,16 @@ // //===----------------------------------------------------------------------===// -#include "llvm/GlobalValue.h" +#define DEBUG_TYPE "jit" #include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/GlobalValue.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Memory.h" #include #include @@ -24,6 +31,7 @@ #include using namespace llvm; +STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT"); JITMemoryManager::~JITMemoryManager() {} @@ -140,7 +148,7 @@ FreeRangeHeader *FreeRangeHeader::AllocateBlock() { /// FreeRangeHeader to allocate from. FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) { MemoryRangeHeader *FollowingBlock = &getBlockAfter(); - assert(ThisAllocated && "This block is already allocated!"); + assert(ThisAllocated && "This block is already free!"); assert(FollowingBlock->PrevAllocated && "Flags out of sync!"); FreeRangeHeader *FreeListToReturn = FreeList; @@ -243,67 +251,160 @@ TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) { // Memory Block Implementation. //===----------------------------------------------------------------------===// -namespace { +namespace { + + class DefaultJITMemoryManager; + + class JITSlabAllocator : public SlabAllocator { + DefaultJITMemoryManager &JMM; + public: + JITSlabAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { } + virtual ~JITSlabAllocator() { } + virtual MemSlab *Allocate(size_t Size); + virtual void Deallocate(MemSlab *Slab); + }; + /// DefaultJITMemoryManager - Manage memory for the JIT code generation. /// This splits a large block of MAP_NORESERVE'd memory into two /// sections, one for function stubs, one for the functions themselves. We /// have to do this because we may need to emit a function stub while in the /// middle of emitting a function, and we don't know how large the function we /// are emitting is. - class VISIBILITY_HIDDEN DefaultJITMemoryManager : public JITMemoryManager { - std::vector Blocks; // Memory blocks allocated by the JIT - FreeRangeHeader *FreeMemoryList; // Circular list of free blocks. - + class DefaultJITMemoryManager : public JITMemoryManager { + + // Whether to poison freed memory. + bool PoisonMemory; + + /// LastSlab - This points to the last slab allocated and is used as the + /// NearBlock parameter to AllocateRWX so that we can attempt to lay out all + /// stubs, data, and code contiguously in memory. In general, however, this + /// is not possible because the NearBlock parameter is ignored on Windows + /// platforms and even on Unix it works on a best-effort pasis. + sys::MemoryBlock LastSlab; + + // Memory slabs allocated by the JIT. We refer to them as slabs so we don't + // confuse them with the blocks of memory descibed above. + std::vector CodeSlabs; + JITSlabAllocator BumpSlabAllocator; + BumpPtrAllocator StubAllocator; + BumpPtrAllocator DataAllocator; + + // Circular list of free blocks. + FreeRangeHeader *FreeMemoryList; + // When emitting code into a memory block, this is the block. MemoryRangeHeader *CurBlock; - - uint8_t *CurStubPtr, *StubBase; + uint8_t *GOTBase; // Target Specific reserved memory void *DlsymTable; // Stub external symbol information - // Centralize memory block allocation. - sys::MemoryBlock getNewMemoryBlock(unsigned size); - std::map FunctionBlocks; std::map TableBlocks; public: DefaultJITMemoryManager(); ~DefaultJITMemoryManager(); + /// allocateNewSlab - Allocates a new MemoryBlock and remembers it as the + /// last slab it allocated, so that subsequent allocations follow it. + sys::MemoryBlock allocateNewSlab(size_t size); + + /// DefaultCodeSlabSize - When we have to go map more memory, we allocate at + /// least this much unless more is requested. + static const size_t DefaultCodeSlabSize; + + /// DefaultSlabSize - Allocate data into slabs of this size unless we get + /// an allocation above SizeThreshold. + static const size_t DefaultSlabSize; + + /// DefaultSizeThreshold - For any allocation larger than this threshold, we + /// should allocate a separate slab. + static const size_t DefaultSizeThreshold; + void AllocateGOT(); void SetDlsymTable(void *); - - uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, - unsigned Alignment); - + + // Testing methods. + virtual bool CheckInvariants(std::string &ErrorStr); + size_t GetDefaultCodeSlabSize() { return DefaultCodeSlabSize; } + size_t GetDefaultDataSlabSize() { return DefaultSlabSize; } + size_t GetDefaultStubSlabSize() { return DefaultSlabSize; } + unsigned GetNumCodeSlabs() { return CodeSlabs.size(); } + unsigned GetNumDataSlabs() { return DataAllocator.GetNumSlabs(); } + unsigned GetNumStubSlabs() { return StubAllocator.GetNumSlabs(); } + /// startFunctionBody - When a function starts, allocate a block of free /// executable memory, returning a pointer to it and its actual size. uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) { - + FreeRangeHeader* candidateBlock = FreeMemoryList; FreeRangeHeader* head = FreeMemoryList; FreeRangeHeader* iter = head->Next; uintptr_t largest = candidateBlock->BlockSize; - + // Search for the largest free block while (iter != head) { - if (iter->BlockSize > largest) { - largest = iter->BlockSize; - candidateBlock = iter; - } - iter = iter->Next; + if (iter->BlockSize > largest) { + largest = iter->BlockSize; + candidateBlock = iter; + } + iter = iter->Next; } + + largest = largest - sizeof(MemoryRangeHeader); + // If this block isn't big enough for the allocation desired, allocate + // another block of memory and add it to the free list. + if (largest < ActualSize || + largest <= FreeRangeHeader::getMinBlockSize()) { + DEBUG(errs() << "JIT: Allocating another slab of memory for function."); + candidateBlock = allocateNewCodeSlab((size_t)ActualSize); + } + // Select this candidate block for allocation CurBlock = candidateBlock; // Allocate the entire memory block. FreeMemoryList = candidateBlock->AllocateBlock(); - ActualSize = CurBlock->BlockSize-sizeof(MemoryRangeHeader); - return (uint8_t *)(CurBlock+1); + ActualSize = CurBlock->BlockSize - sizeof(MemoryRangeHeader); + return (uint8_t *)(CurBlock + 1); } - + + /// allocateNewCodeSlab - Helper method to allocate a new slab of code + /// memory from the OS and add it to the free list. Returns the new + /// FreeRangeHeader at the base of the slab. + FreeRangeHeader *allocateNewCodeSlab(size_t MinSize) { + // If the user needs at least MinSize free memory, then we account for + // two MemoryRangeHeaders: the one in the user's block, and the one at the + // end of the slab. + size_t PaddedMin = MinSize + 2 * sizeof(MemoryRangeHeader); + size_t SlabSize = std::max(DefaultCodeSlabSize, PaddedMin); + sys::MemoryBlock B = allocateNewSlab(SlabSize); + CodeSlabs.push_back(B); + char *MemBase = (char*)(B.base()); + + // Put a tiny allocated block at the end of the memory chunk, so when + // FreeBlock calls getBlockAfter it doesn't fall off the end. + MemoryRangeHeader *EndBlock = + (MemoryRangeHeader*)(MemBase + B.size()) - 1; + EndBlock->ThisAllocated = 1; + EndBlock->PrevAllocated = 0; + EndBlock->BlockSize = sizeof(MemoryRangeHeader); + + // Start out with a vast new block of free memory. + FreeRangeHeader *NewBlock = (FreeRangeHeader*)MemBase; + NewBlock->ThisAllocated = 0; + // Make sure getFreeBlockBefore doesn't look into unmapped memory. + NewBlock->PrevAllocated = 1; + NewBlock->BlockSize = (uintptr_t)EndBlock - (uintptr_t)NewBlock; + NewBlock->SetEndOfBlockSizeMarker(); + NewBlock->AddToFreeList(FreeMemoryList); + + assert(NewBlock->BlockSize - sizeof(MemoryRangeHeader) >= MinSize && + "The block was too small!"); + return NewBlock; + } + /// endFunctionBody - The function F is now allocated, and takes the memory /// in the range [FunctionStart,FunctionEnd). void endFunctionBody(const Function *F, uint8_t *FunctionStart, @@ -319,12 +420,13 @@ namespace { FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); } - /// allocateSpace - Allocate a memory block of the given size. + /// allocateSpace - Allocate a memory block of the given size. This method + /// cannot be called between calls to startFunctionBody and endFunctionBody. uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) { CurBlock = FreeMemoryList; FreeMemoryList = FreeMemoryList->AllocateBlock(); - uint8_t *result = (uint8_t *)CurBlock+1; + uint8_t *result = (uint8_t *)(CurBlock + 1); if (Alignment == 0) Alignment = 1; result = (uint8_t*)(((intptr_t)result+Alignment-1) & @@ -336,6 +438,17 @@ namespace { return result; } + /// allocateStub - Allocate memory for a function stub. + uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment) { + return (uint8_t*)StubAllocator.Allocate(StubSize, Alignment); + } + + /// allocateGlobal - Allocate memory for a global. + uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) { + return (uint8_t*)DataAllocator.Allocate(Size, Alignment); + } + /// startExceptionTable - Use startFunctionBody to allocate memory for the /// function's exception table. uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) { @@ -375,12 +488,12 @@ namespace { // Find the block that is allocated for this function. MemoryRangeHeader *MemRange = I->second; assert(MemRange->ThisAllocated && "Block isn't allocated!"); - + // Fill the buffer with garbage! -#ifndef NDEBUG - memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange)); -#endif - + if (PoisonMemory) { + memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange)); + } + // Free the memory. FreeMemoryList = MemRange->FreeBlock(FreeMemoryList); @@ -393,12 +506,12 @@ namespace { // Find the block that is allocated for this function. MemRange = I->second; assert(MemRange->ThisAllocated && "Block isn't allocated!"); - + // Fill the buffer with garbage! -#ifndef NDEBUG - memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange)); -#endif - + if (PoisonMemory) { + memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange)); + } + // Free the memory. FreeMemoryList = MemRange->FreeBlock(FreeMemoryList); @@ -408,36 +521,57 @@ namespace { /// setMemoryWritable - When code generation is in progress, /// the code pages may need permissions changed. - void setMemoryWritable(void) + void setMemoryWritable() { - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) - sys::Memory::setWritable(Blocks[i]); + for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i) + sys::Memory::setWritable(CodeSlabs[i]); } /// setMemoryExecutable - When code generation is done and we're ready to /// start execution, the code pages may need permissions changed. - void setMemoryExecutable(void) + void setMemoryExecutable() { - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) - sys::Memory::setExecutable(Blocks[i]); + for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i) + sys::Memory::setExecutable(CodeSlabs[i]); + } + + /// setPoisonMemory - Controls whether we write garbage over freed memory. + /// + void setPoisonMemory(bool poison) { + PoisonMemory = poison; } }; } -DefaultJITMemoryManager::DefaultJITMemoryManager() { - // Allocate a 16M block of memory for functions. -#if defined(__APPLE__) && defined(__arm__) - sys::MemoryBlock MemBlock = getNewMemoryBlock(4 << 20); +MemSlab *JITSlabAllocator::Allocate(size_t Size) { + sys::MemoryBlock B = JMM.allocateNewSlab(Size); + MemSlab *Slab = (MemSlab*)B.base(); + Slab->Size = B.size(); + Slab->NextPtr = 0; + return Slab; +} + +void JITSlabAllocator::Deallocate(MemSlab *Slab) { + sys::MemoryBlock B(Slab, Slab->Size); + sys::Memory::ReleaseRWX(B); +} + +DefaultJITMemoryManager::DefaultJITMemoryManager() + : +#ifdef NDEBUG + PoisonMemory(false), #else - sys::MemoryBlock MemBlock = getNewMemoryBlock(16 << 20); + PoisonMemory(true), #endif + LastSlab(0, 0), + BumpSlabAllocator(*this), + StubAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator), + DataAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator) { - uint8_t *MemBase = static_cast(MemBlock.base()); + // Allocate space for code. + sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize); + CodeSlabs.push_back(MemBlock); + uint8_t *MemBase = (uint8_t*)MemBlock.base(); - // Allocate stubs backwards from the base, allocate functions forward - // from the base. - StubBase = MemBase; - CurStubPtr = MemBase + 512*1024; // Use 512k for stubs, working backwards. - // We set up the memory chunk with 4 mem regions, like this: // [ START // [ Free #0 ] -> Large space to allocate functions from. @@ -453,7 +587,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() { MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1; Mem3->ThisAllocated = 1; Mem3->PrevAllocated = 0; - Mem3->BlockSize = 0; + Mem3->BlockSize = sizeof(MemoryRangeHeader); /// Add a tiny free region so that the free list always has one entry. FreeRangeHeader *Mem2 = @@ -469,12 +603,12 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() { MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1; Mem1->ThisAllocated = 1; Mem1->PrevAllocated = 0; - Mem1->BlockSize = (char*)Mem2 - (char*)Mem1; + Mem1->BlockSize = sizeof(MemoryRangeHeader); // Add a FreeRangeHeader to the start of the function body region, indicating // that the space is free. Mark the previous block allocated so we never look // at it. - FreeRangeHeader *Mem0 = (FreeRangeHeader*)CurStubPtr; + FreeRangeHeader *Mem0 = (FreeRangeHeader*)MemBase; Mem0->ThisAllocated = 0; Mem0->PrevAllocated = 1; Mem0->BlockSize = (char*)Mem1-(char*)Mem0; @@ -499,43 +633,128 @@ void DefaultJITMemoryManager::SetDlsymTable(void *ptr) { } DefaultJITMemoryManager::~DefaultJITMemoryManager() { - for (unsigned i = 0, e = Blocks.size(); i != e; ++i) - sys::Memory::ReleaseRWX(Blocks[i]); - - delete[] GOTBase; - Blocks.clear(); -} + for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i) + sys::Memory::ReleaseRWX(CodeSlabs[i]); -uint8_t *DefaultJITMemoryManager::allocateStub(const GlobalValue* F, - unsigned StubSize, - unsigned Alignment) { - CurStubPtr -= StubSize; - CurStubPtr = (uint8_t*)(((intptr_t)CurStubPtr) & - ~(intptr_t)(Alignment-1)); - if (CurStubPtr < StubBase) { - // FIXME: allocate a new block - fprintf(stderr, "JIT ran out of memory for function stubs!\n"); - abort(); - } - return CurStubPtr; + delete[] GOTBase; } -sys::MemoryBlock DefaultJITMemoryManager::getNewMemoryBlock(unsigned size) { +sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) { // Allocate a new block close to the last one. - const sys::MemoryBlock *BOld = Blocks.empty() ? 0 : &Blocks.front(); std::string ErrMsg; - sys::MemoryBlock B = sys::Memory::AllocateRWX(size, BOld, &ErrMsg); + sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0; + sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg); if (B.base() == 0) { - fprintf(stderr, - "Allocation failed when allocating new memory in the JIT\n%s\n", - ErrMsg.c_str()); - abort(); + llvm_report_error("Allocation failed when allocating new memory in the" + " JIT\n" + ErrMsg); + } + LastSlab = B; + ++NumSlabs; + // Initialize the slab to garbage when debugging. + if (PoisonMemory) { + memset(B.base(), 0xCD, B.size()); } - Blocks.push_back(B); return B; } +/// CheckInvariants - For testing only. Return "" if all internal invariants +/// are preserved, and a helpful error message otherwise. For free and +/// allocated blocks, make sure that adding BlockSize gives a valid block. +/// For free blocks, make sure they're in the free list and that their end of +/// block size marker is correct. This function should return an error before +/// accessing bad memory. This function is defined here instead of in +/// JITMemoryManagerTest.cpp so that we don't have to expose all of the +/// implementation details of DefaultJITMemoryManager. +bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) { + raw_string_ostream Err(ErrorStr); + + // Construct a the set of FreeRangeHeader pointers so we can query it + // efficiently. + llvm::SmallPtrSet FreeHdrSet; + FreeRangeHeader* FreeHead = FreeMemoryList; + FreeRangeHeader* FreeRange = FreeHead; + + do { + // Check that the free range pointer is in the blocks we've allocated. + bool Found = false; + for (std::vector::iterator I = CodeSlabs.begin(), + E = CodeSlabs.end(); I != E && !Found; ++I) { + char *Start = (char*)I->base(); + char *End = Start + I->size(); + Found = (Start <= (char*)FreeRange && (char*)FreeRange < End); + } + if (!Found) { + Err << "Corrupt free list; points to " << FreeRange; + return false; + } + + if (FreeRange->Next->Prev != FreeRange) { + Err << "Next and Prev pointers do not match."; + return false; + } + + // Otherwise, add it to the set. + FreeHdrSet.insert(FreeRange); + FreeRange = FreeRange->Next; + } while (FreeRange != FreeHead); + + // Go over each block, and look at each MemoryRangeHeader. + for (std::vector::iterator I = CodeSlabs.begin(), + E = CodeSlabs.end(); I != E; ++I) { + char *Start = (char*)I->base(); + char *End = Start + I->size(); + + // Check each memory range. + for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = NULL; + Start <= (char*)Hdr && (char*)Hdr < End; + Hdr = &Hdr->getBlockAfter()) { + if (Hdr->ThisAllocated == 0) { + // Check that this range is in the free list. + if (!FreeHdrSet.count(Hdr)) { + Err << "Found free header at " << Hdr << " that is not in free list."; + return false; + } + + // Now make sure the size marker at the end of the block is correct. + uintptr_t *Marker = ((uintptr_t*)&Hdr->getBlockAfter()) - 1; + if (!(Start <= (char*)Marker && (char*)Marker < End)) { + Err << "Block size in header points out of current MemoryBlock."; + return false; + } + if (Hdr->BlockSize != *Marker) { + Err << "End of block size marker (" << *Marker << ") " + << "and BlockSize (" << Hdr->BlockSize << ") don't match."; + return false; + } + } + + if (LastHdr && LastHdr->ThisAllocated != Hdr->PrevAllocated) { + Err << "Hdr->PrevAllocated (" << Hdr->PrevAllocated << ") != " + << "LastHdr->ThisAllocated (" << LastHdr->ThisAllocated << ")"; + return false; + } else if (!LastHdr && !Hdr->PrevAllocated) { + Err << "The first header should have PrevAllocated true."; + return false; + } + + // Remember the last header. + LastHdr = Hdr; + } + } + + // All invariants are preserved. + return true; +} JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() { return new DefaultJITMemoryManager(); } + +// Allocate memory for code in 512K slabs. +const size_t DefaultJITMemoryManager::DefaultCodeSlabSize = 512 * 1024; + +// Allocate globals and stubs in slabs of 64K. (probably 16 pages) +const size_t DefaultJITMemoryManager::DefaultSlabSize = 64 * 1024; + +// Waste at most 16K at the end of each bump slab. (probably 4 pages) +const size_t DefaultJITMemoryManager::DefaultSizeThreshold = 16 * 1024; diff --git a/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp index 3b8b84ce5bcb2..53585b877b19a 100644 --- a/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp +++ b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp @@ -84,8 +84,7 @@ JITEventListener *createMacOSJITEventListener() { void MacOSJITEventListener::NotifyFunctionEmitted( const Function &F, void *FnStart, size_t FnSize, const EmittedFunctionDetails &) { - const char *const FnName = F.getNameStart(); - assert(FnName != 0 && FnStart != 0 && "Bad symbol to add"); + assert(F.hasName() && FnStart != 0 && "Bad symbol to add"); JITSymbolTable **SymTabPtrPtr = 0; SymTabPtrPtr = &__jitSymbolTable; @@ -120,7 +119,7 @@ void MacOSJITEventListener::NotifyFunctionEmitted( // Otherwise, we have enough space, just tack it onto the end of the array. JITSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols]; - Entry.FnName = strdup(FnName); + Entry.FnName = strdup(F.getName().data()); Entry.FnStart = FnStart; Entry.FnSize = FnSize; ++SymTabPtr->NumSymbols; diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp new file mode 100644 index 0000000000000..69398be5080cf --- /dev/null +++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp @@ -0,0 +1,178 @@ +//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a JITEventListener object that calls into OProfile to tell +// it about JITted functions. For now, we only record function names and sizes, +// but eventually we'll also record line number information. +// +// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the +// definition of the interface we're using. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "oprofile-jit-event-listener" +#include "llvm/Function.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Errno.h" +#include "llvm/Config/config.h" +#include +using namespace llvm; + +#if USE_OPROFILE + +#include + +namespace { + +class OProfileJITEventListener : public JITEventListener { + op_agent_t Agent; +public: + OProfileJITEventListener(); + ~OProfileJITEventListener(); + + virtual void NotifyFunctionEmitted(const Function &F, + void *FnStart, size_t FnSize, + const EmittedFunctionDetails &Details); + virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr); +}; + +OProfileJITEventListener::OProfileJITEventListener() + : Agent(op_open_agent()) { + if (Agent == NULL) { + const std::string err_str = sys::StrError(); + DEBUG(errs() << "Failed to connect to OProfile agent: " << err_str << "\n"); + } else { + DEBUG(errs() << "Connected to OProfile agent.\n"); + } +} + +OProfileJITEventListener::~OProfileJITEventListener() { + if (Agent != NULL) { + if (op_close_agent(Agent) == -1) { + const std::string err_str = sys::StrError(); + DEBUG(errs() << "Failed to disconnect from OProfile agent: " + << err_str << "\n"); + } else { + DEBUG(errs() << "Disconnected from OProfile agent.\n"); + } + } +} + +class FilenameCache { + // Holds the filename of each CompileUnit, so that we can pass the + // pointer into oprofile. These char*s are freed in the destructor. + DenseMap Filenames; + + public: + const char *getFilename(MDNode *CompileUnit) { + char *&Filename = Filenames[CompileUnit]; + if (Filename == NULL) { + DICompileUnit CU(CompileUnit); + Filename = strdup(CU.getFilename()); + } + return Filename; + } + ~FilenameCache() { + for (DenseMap::iterator + I = Filenames.begin(), E = Filenames.end(); I != E; ++I) { + free(I->second); + } + } +}; + +static debug_line_info LineStartToOProfileFormat( + const MachineFunction &MF, FilenameCache &Filenames, + uintptr_t Address, DebugLoc Loc) { + debug_line_info Result; + Result.vma = Address; + const DebugLocTuple &tuple = MF.getDebugLocTuple(Loc); + Result.lineno = tuple.Line; + Result.filename = Filenames.getFilename(tuple.CompileUnit); + DEBUG(errs() << "Mapping " << reinterpret_cast(Result.vma) << " to " + << Result.filename << ":" << Result.lineno << "\n"); + return Result; +} + +// Adds the just-emitted function to the symbol table. +void OProfileJITEventListener::NotifyFunctionEmitted( + const Function &F, void *FnStart, size_t FnSize, + const EmittedFunctionDetails &Details) { + assert(F.hasName() && FnStart != 0 && "Bad symbol to add"); + if (op_write_native_code(Agent, F.getName().data(), + reinterpret_cast(FnStart), + FnStart, FnSize) == -1) { + DEBUG(errs() << "Failed to tell OProfile about native function " + << F.getName() << " at [" + << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); + return; + } + + // Now we convert the line number information from the address/DebugLoc format + // in Details to the address/filename/lineno format that OProfile expects. + // OProfile 0.9.4 (and maybe later versions) has a bug that causes it to + // ignore line numbers for addresses above 4G. + FilenameCache Filenames; + std::vector LineInfo; + LineInfo.reserve(1 + Details.LineStarts.size()); + if (!Details.MF->getDefaultDebugLoc().isUnknown()) { + LineInfo.push_back(LineStartToOProfileFormat( + *Details.MF, Filenames, + reinterpret_cast(FnStart), + Details.MF->getDefaultDebugLoc())); + } + for (std::vector::const_iterator + I = Details.LineStarts.begin(), E = Details.LineStarts.end(); + I != E; ++I) { + LineInfo.push_back(LineStartToOProfileFormat( + *Details.MF, Filenames, I->Address, I->Loc)); + } + if (!LineInfo.empty()) { + if (op_write_debug_line_info(Agent, FnStart, + LineInfo.size(), &*LineInfo.begin()) == -1) { + DEBUG(errs() + << "Failed to tell OProfile about line numbers for native function " + << F.getName() << " at [" + << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); + } + } +} + +// Removes the to-be-deleted function from the symbol table. +void OProfileJITEventListener::NotifyFreeingMachineCode( + const Function &F, void *FnStart) { + assert(FnStart && "Invalid function pointer"); + if (op_unload_native_code(Agent, reinterpret_cast(FnStart)) == -1) { + DEBUG(errs() << "Failed to tell OProfile about unload of native function " + << F.getName() << " at " << FnStart << "\n"); + } +} + +} // anonymous namespace. + +namespace llvm { +JITEventListener *createOProfileJITEventListener() { + return new OProfileJITEventListener; +} +} + +#else // USE_OPROFILE + +namespace llvm { +// By defining this to return NULL, we can let clients call it unconditionally, +// even if they haven't configured with the OProfile libraries. +JITEventListener *createOProfileJITEventListener() { + return NULL; +} +} // namespace llvm + +#endif // USE_OPROFILE diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp index 0f208193075b8..8bed33bb7d42c 100644 --- a/lib/ExecutionEngine/JIT/TargetSelect.cpp +++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp @@ -7,24 +7,27 @@ // //===----------------------------------------------------------------------===// // -// This just asks the TargetMachineRegistry for the appropriate JIT to use, and -// allows the user to specify a specific one on the commandline with -march=x. +// This just asks the TargetRegistry for the appropriate JIT to use, and allows +// the user to specify a specific one on the commandline with -march=x. Clients +// should initialize targets prior to calling createJIT. // //===----------------------------------------------------------------------===// #include "JIT.h" #include "llvm/Module.h" #include "llvm/ModuleProvider.h" -#include "llvm/Support/RegistryParser.h" -#include "llvm/Support/Streams.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Host.h" #include "llvm/Target/SubtargetFeature.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -static cl::opt > -MArch("march", cl::desc("Architecture to generate assembly for:")); +static cl::opt +MArch("march", + cl::desc("Architecture to generate assembly for (see --version)")); static cl::opt MCPU("mcpu", @@ -38,25 +41,51 @@ MAttrs("mattr", cl::desc("Target specific attributes (-mattr=help for details)"), cl::value_desc("a1,+a2,-a3,...")); -/// createInternal - Create an return a new JIT compiler if there is one -/// available for the current target. Otherwise, return null. -/// -ExecutionEngine *JIT::createJIT(ModuleProvider *MP, std::string *ErrorStr, - JITMemoryManager *JMM, - CodeGenOpt::Level OptLevel) { - const TargetMachineRegistry::entry *TheArch = MArch; - if (TheArch == 0) { +/// selectTarget - Pick a target either via -march or by guessing the native +/// arch. Add any CPU features specified via -mcpu or -mattr. +TargetMachine *JIT::selectTarget(ModuleProvider *MP, std::string *ErrorStr) { + Module &Mod = *MP->getModule(); + + Triple TheTriple(Mod.getTargetTriple()); + if (TheTriple.getTriple().empty()) + TheTriple.setTriple(sys::getHostTriple()); + + // Adjust the triple to match what the user requested. + const Target *TheTarget = 0; + if (!MArch.empty()) { + for (TargetRegistry::iterator it = TargetRegistry::begin(), + ie = TargetRegistry::end(); it != ie; ++it) { + if (MArch == it->getName()) { + TheTarget = &*it; + break; + } + } + + if (!TheTarget) { + *ErrorStr = "No available targets are compatible with this -march, " + "see -version for the available targets.\n"; + return 0; + } + + // Adjust the triple to match (if known), otherwise stick with the + // module/host triple. + Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch); + if (Type != Triple::UnknownArch) + TheTriple.setArch(Type); + } else { std::string Error; - TheArch = TargetMachineRegistry::getClosestTargetForJIT(Error); - if (TheArch == 0) { + TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error); + if (TheTarget == 0) { if (ErrorStr) *ErrorStr = Error; return 0; } - } else if (TheArch->JITMatchQualityFn() == 0) { - cerr << "WARNING: This target JIT is not designed for the host you are" - << " running. If bad things happen, please choose a different " - << "-march switch.\n"; + } + + if (!TheTarget->hasJIT()) { + errs() << "WARNING: This target JIT is not designed for the host you are" + << " running. If bad things happen, please choose a different " + << "-march switch.\n"; } // Package up features to be passed to target/subtarget @@ -70,14 +99,8 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP, std::string *ErrorStr, } // Allocate a target... - TargetMachine *Target = TheArch->CtorFn(*MP->getModule(), FeaturesStr); + TargetMachine *Target = + TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr); assert(Target && "Could not allocate target machine!"); - - // If the target supports JIT code generation, return a new JIT now. - if (TargetJITInfo *TJ = Target->getJITInfo()) - return new JIT(MP, *Target, *TJ, JMM, OptLevel); - - if (ErrorStr) - *ErrorStr = "target does not support JIT code generation"; - return 0; + return Target; } diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp index faf01af127e94..76d81c219426b 100644 --- a/lib/Linker/LinkArchives.cpp +++ b/lib/Linker/LinkArchives.cpp @@ -96,10 +96,10 @@ bool Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { // Make sure this is an archive file we're dealing with if (!Filename.isArchive()) - return error("File '" + Filename.toString() + "' is not an archive."); + return error("File '" + Filename.str() + "' is not an archive."); // Open the archive file - verbose("Linking archive file '" + Filename.toString() + "'"); + verbose("Linking archive file '" + Filename.str() + "'"); // Find all of the symbols currently undefined in the bitcode program. // If all the symbols are defined, the program is complete, and there is @@ -108,8 +108,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { GetAllUndefinedSymbols(Composite, UndefinedSymbols); if (UndefinedSymbols.empty()) { - verbose("No symbols undefined, skipping library '" + - Filename.toString() + "'"); + verbose("No symbols undefined, skipping library '" + Filename.str() + "'"); return false; // No need to link anything in! } @@ -120,7 +119,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { Archive* arch = AutoArch.get(); if (!arch) - return error("Cannot read archive '" + Filename.toString() + + return error("Cannot read archive '" + Filename.str() + "': " + ErrMsg); if (!arch->isBitcodeArchive()) { is_native = true; @@ -143,7 +142,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) { // Find the modules we need to link into the target module std::set Modules; if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg)) - return error("Cannot find symbols in '" + Filename.toString() + + return error("Cannot find symbols in '" + Filename.str() + "': " + ErrMsg); // If we didn't find any more modules to link this time, we are done diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp index dc0f7c17bf42d..61f3c26c6a1ca 100644 --- a/lib/Linker/LinkItems.cpp +++ b/lib/Linker/LinkItems.cpp @@ -14,9 +14,10 @@ #include "llvm/Linker.h" #include "llvm/Module.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Bitcode/ReaderWriter.h" - +#include "llvm/System/Path.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" using namespace llvm; // LinkItems - This function is the main entry point into linking. It takes a @@ -69,20 +70,20 @@ Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) { /// LinkInLibrary - links one library into the HeadModule. /// -bool Linker::LinkInLibrary(const std::string& Lib, bool& is_native) { +bool Linker::LinkInLibrary(const StringRef &Lib, bool& is_native) { is_native = false; // Determine where this library lives. sys::Path Pathname = FindLib(Lib); if (Pathname.isEmpty()) - return error("Cannot find library '" + Lib + "'"); + return error("Cannot find library '" + Lib.str() + "'"); // If its an archive, try to link it in std::string Magic; Pathname.getMagicNumber(Magic, 64); switch (sys::IdentifyFileType(Magic.c_str(), 64)) { - default: assert(0 && "Bad file type identification"); + default: llvm_unreachable("Bad file type identification"); case sys::Unknown_FileType: - return warning("Supposed library '" + Lib + "' isn't a library."); + return warning("Supposed library '" + Lib.str() + "' isn't a library."); case sys::Bitcode_FileType: // LLVM ".so" file. @@ -92,7 +93,7 @@ bool Linker::LinkInLibrary(const std::string& Lib, bool& is_native) { case sys::Archive_FileType: if (LinkInArchive(Pathname, is_native)) - return error("Cannot link archive '" + Pathname.toString() + "'"); + return error("Cannot link archive '" + Pathname.str() + "'"); break; case sys::ELF_Relocatable_FileType: @@ -157,7 +158,7 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) { is_native = false; // Check for a file of name "-", which means "read standard input" - if (File.toString() == "-") { + if (File.str() == "-") { std::auto_ptr M; if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN()) { M.reset(ParseBitcodeFile(Buffer, Context, &Error)); @@ -172,34 +173,34 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) { // Make sure we can at least read the file if (!File.canRead()) - return error("Cannot find linker input '" + File.toString() + "'"); + return error("Cannot find linker input '" + File.str() + "'"); // If its an archive, try to link it in std::string Magic; File.getMagicNumber(Magic, 64); switch (sys::IdentifyFileType(Magic.c_str(), 64)) { - default: assert(0 && "Bad file type identification"); + default: llvm_unreachable("Bad file type identification"); case sys::Unknown_FileType: - return warning("Ignoring file '" + File.toString() + + return warning("Ignoring file '" + File.str() + "' because does not contain bitcode."); case sys::Archive_FileType: // A user may specify an ar archive without -l, perhaps because it // is not installed as a library. Detect that and link the archive. - verbose("Linking archive file '" + File.toString() + "'"); + verbose("Linking archive file '" + File.str() + "'"); if (LinkInArchive(File, is_native)) return true; break; case sys::Bitcode_FileType: { - verbose("Linking bitcode file '" + File.toString() + "'"); + verbose("Linking bitcode file '" + File.str() + "'"); std::auto_ptr M(LoadObject(File)); if (M.get() == 0) - return error("Cannot load file '" + File.toString() + "': " + Error); + return error("Cannot load file '" + File.str() + "': " + Error); if (LinkInModule(M.get(), &Error)) - return error("Cannot link file '" + File.toString() + "': " + Error); + return error("Cannot link file '" + File.str() + "': " + Error); - verbose("Linked in file '" + File.toString() + "'"); + verbose("Linked in file '" + File.str() + "'"); break; } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 4a15d88d8f369..e64c200cf6321 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -19,21 +19,22 @@ #include "llvm/Linker.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/TypeSymbolTable.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Instructions.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Path.h" #include "llvm/ADT/DenseMap.h" -#include using namespace llvm; // Error - Simple wrapper function to conditionally assign to E and return true. // This just makes error return conditions a little bit simpler... -static inline bool Error(std::string *E, const std::string &Message) { - if (E) *E = Message; +static inline bool Error(std::string *E, const Twine &Message) { + if (E) *E = Message.str(); return true; } @@ -143,7 +144,7 @@ protected: // for debugging... virtual void dump() const { - cerr << "AbstractTypeSet!\n"; + errs() << "AbstractTypeSet!\n"; } }; } @@ -336,11 +337,11 @@ static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) { static void PrintMap(const std::map &M) { for (std::map::const_iterator I = M.begin(), E =M.end(); I != E; ++I) { - cerr << " Fr: " << (void*)I->first << " "; + errs() << " Fr: " << (void*)I->first << " "; I->first->dump(); - cerr << " To: " << (void*)I->second << " "; + errs() << " To: " << (void*)I->second << " "; I->second->dump(); - cerr << "\n"; + errs() << "\n"; } } #endif @@ -348,7 +349,8 @@ static void PrintMap(const std::map &M) { // RemapOperand - Use ValueMap to convert constants from one module to another. static Value *RemapOperand(const Value *In, - std::map &ValueMap) { + std::map &ValueMap, + LLVMContext &Context) { std::map::const_iterator I = ValueMap.find(In); if (I != ValueMap.end()) return I->second; @@ -363,29 +365,37 @@ static Value *RemapOperand(const Value *In, if (const ConstantArray *CPA = dyn_cast(CPV)) { std::vector Operands(CPA->getNumOperands()); for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i) - Operands[i] =cast(RemapOperand(CPA->getOperand(i), ValueMap)); - Result = ConstantArray::get(cast(CPA->getType()), Operands); + Operands[i] =cast(RemapOperand(CPA->getOperand(i), ValueMap, + Context)); + Result = + ConstantArray::get(cast(CPA->getType()), Operands); } else if (const ConstantStruct *CPS = dyn_cast(CPV)) { std::vector Operands(CPS->getNumOperands()); for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i) - Operands[i] =cast(RemapOperand(CPS->getOperand(i), ValueMap)); - Result = ConstantStruct::get(cast(CPS->getType()), Operands); + Operands[i] =cast(RemapOperand(CPS->getOperand(i), ValueMap, + Context)); + Result = + ConstantStruct::get(cast(CPS->getType()), Operands); } else if (isa(CPV) || isa(CPV)) { Result = const_cast(CPV); } else if (const ConstantVector *CP = dyn_cast(CPV)) { std::vector Operands(CP->getNumOperands()); for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i) - Operands[i] = cast(RemapOperand(CP->getOperand(i), ValueMap)); + Operands[i] = cast(RemapOperand(CP->getOperand(i), ValueMap, + Context)); Result = ConstantVector::get(Operands); } else if (const ConstantExpr *CE = dyn_cast(CPV)) { std::vector Ops; for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) - Ops.push_back(cast(RemapOperand(CE->getOperand(i),ValueMap))); + Ops.push_back(cast(RemapOperand(CE->getOperand(i),ValueMap, + Context))); Result = CE->getWithOperands(Ops); } else { assert(!isa(CPV) && "Unmapped global?"); - assert(0 && "Unknown type of derived type constant value!"); + llvm_unreachable("Unknown type of derived type constant value!"); } + } else if (isa(In)) { + Result = const_cast(In); } else if (isa(In)) { Result = const_cast(In); } @@ -397,11 +407,11 @@ static Value *RemapOperand(const Value *In, } #ifndef NDEBUG - cerr << "LinkModules ValueMap: \n"; + errs() << "LinkModules ValueMap: \n"; PrintMap(ValueMap); - cerr << "Couldn't remap value: " << (void*)In << " " << *In << "\n"; - assert(0 && "Couldn't remap value!"); + errs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n"; + llvm_unreachable("Couldn't remap value!"); #endif return 0; } @@ -521,6 +531,22 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src, return false; } +// Insert all of the named mdnoes in Src into the Dest module. +static void LinkNamedMDNodes(Module *Dest, Module *Src) { + for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(), + E = Src->named_metadata_end(); I != E; ++I) { + const NamedMDNode *SrcNMD = I; + NamedMDNode *DestNMD = Dest->getNamedMetadata(SrcNMD->getName()); + if (!DestNMD) + NamedMDNode::Create(SrcNMD, Dest); + else { + // Add Src elements into Dest node. + for (unsigned i = 0, e = SrcNMD->getNumElements(); i != e; ++i) + DestNMD->addElement(SrcNMD->getElement(i)); + } + } +} + // LinkGlobals - Loop through the global variables in the src module and merge // them into the dest module. static bool LinkGlobals(Module *Dest, const Module *Src, @@ -538,8 +564,7 @@ static bool LinkGlobals(Module *Dest, const Module *Src, // Check to see if may have to link the global with the global, alias or // function. if (SGV->hasName() && !SGV->hasLocalLinkage()) - DGV = cast_or_null(DestSymTab.lookup(SGV->getNameStart(), - SGV->getNameEnd())); + DGV = cast_or_null(DestSymTab.lookup(SGV->getName())); // If we found a global with the same name in the dest module, but it has // internal linkage, we are really not doing any linkage here. @@ -564,9 +589,9 @@ static bool LinkGlobals(Module *Dest, const Module *Src, // symbol over in the dest module... the initializer will be filled in // later by LinkGlobalInits. GlobalVariable *NewDGV = - new GlobalVariable(SGV->getType()->getElementType(), + new GlobalVariable(*Dest, SGV->getType()->getElementType(), SGV->isConstant(), SGV->getLinkage(), /*init*/0, - SGV->getName(), Dest, false, + SGV->getName(), 0, false, SGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. CopyGVAttributes(NewDGV, SGV); @@ -597,9 +622,9 @@ static bool LinkGlobals(Module *Dest, const Module *Src, // AppendingVars map. The name is cleared out so that no linkage is // performed. GlobalVariable *NewDGV = - new GlobalVariable(SGV->getType()->getElementType(), + new GlobalVariable(*Dest, SGV->getType()->getElementType(), SGV->isConstant(), SGV->getLinkage(), /*init*/0, - "", Dest, false, + "", 0, false, SGV->getType()->getAddressSpace()); // Set alignment allowing CopyGVAttributes merge it with alignment of SGV. @@ -625,13 +650,15 @@ static bool LinkGlobals(Module *Dest, const Module *Src, // we are replacing may be a function (if a prototype, weak, etc) or a // global variable. GlobalVariable *NewDGV = - new GlobalVariable(SGV->getType()->getElementType(), SGV->isConstant(), - NewLinkage, /*init*/0, DGV->getName(), Dest, false, + new GlobalVariable(*Dest, SGV->getType()->getElementType(), + SGV->isConstant(), NewLinkage, /*init*/0, + DGV->getName(), 0, false, SGV->getType()->getAddressSpace()); // Propagate alignment, section, and visibility info. CopyGVAttributes(NewDGV, SGV); - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType())); + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, + DGV->getType())); // DGV will conflict with NewDGV because they both had the same // name. We must erase this now so ForceRenaming doesn't assert @@ -697,6 +724,9 @@ CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) { else if (SL == GlobalValue::InternalLinkage && DL == GlobalValue::InternalLinkage) return GlobalValue::InternalLinkage; + else if (SL == GlobalValue::LinkerPrivateLinkage && + DL == GlobalValue::LinkerPrivateLinkage) + return GlobalValue::LinkerPrivateLinkage; else { assert (SL == GlobalValue::PrivateLinkage && DL == GlobalValue::PrivateLinkage && "Unexpected linkage type"); @@ -866,7 +896,8 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src, if (SGV->hasInitializer()) { // Only process initialized GV's // Figure out what the initializer looks like in the dest module... Constant *SInit = - cast(RemapOperand(SGV->getInitializer(), ValueMap)); + cast(RemapOperand(SGV->getInitializer(), ValueMap, + Dest->getContext())); // Grab destination global variable or alias. GlobalValue *DGV = cast(ValueMap[SGV]->stripPointerCasts()); @@ -885,9 +916,9 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src, // Nothing is required, mapped values will take the new global // automatically. } else if (DGVar->hasAppendingLinkage()) { - assert(0 && "Appending linkage unimplemented!"); + llvm_unreachable("Appending linkage unimplemented!"); } else { - assert(0 && "Unknown linkage!"); + llvm_unreachable("Unknown linkage!"); } } else { // Copy the initializer over now... @@ -923,8 +954,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src, // Check to see if may have to link the function with the global, alias or // function. if (SF->hasName() && !SF->hasLocalLinkage()) - DGV = cast_or_null(DestSymTab.lookup(SF->getNameStart(), - SF->getNameEnd())); + DGV = cast_or_null(DestSymTab.lookup(SF->getName())); // If we found a global with the same name in the dest module, but it has // internal linkage, we are really not doing any linkage here. @@ -979,7 +1009,8 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src, CopyGVAttributes(NewDF, SF); // Any uses of DF need to change to NewDF, with cast - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType())); + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, + DGV->getType())); // DF will conflict with NewDF because they both had the same. We must // erase this now so ForceRenaming doesn't assert because DF might @@ -1053,7 +1084,7 @@ static bool LinkFunctionBody(Function *Dest, Function *Src, for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) if (!isa(*OI) && !isa(*OI)) - *OI = RemapOperand(*OI, ValueMap); + *OI = RemapOperand(*OI, ValueMap, Dest->getContext()); // There is no need to map the arguments anymore. for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); @@ -1132,14 +1163,15 @@ static bool LinkAppendingVars(Module *M, "Appending variables with different section name need to be linked!"); unsigned NewSize = T1->getNumElements() + T2->getNumElements(); - ArrayType *NewType = ArrayType::get(T1->getElementType(), NewSize); + ArrayType *NewType = ArrayType::get(T1->getElementType(), + NewSize); G1->setName(""); // Clear G1's name in case of a conflict! // Create the new global variable... GlobalVariable *NG = - new GlobalVariable(NewType, G1->isConstant(), G1->getLinkage(), - /*init*/0, First->first, M, G1->isThreadLocal(), + new GlobalVariable(*M, NewType, G1->isConstant(), G1->getLinkage(), + /*init*/0, First->first, 0, G1->isThreadLocal(), G1->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. @@ -1173,8 +1205,10 @@ static bool LinkAppendingVars(Module *M, // FIXME: This should rewrite simple/straight-forward uses such as // getelementptr instructions to not use the Cast! - G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG, G1->getType())); - G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG, G2->getType())); + G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG, + G1->getType())); + G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG, + G2->getType())); // Remove the two globals from the module now... M->getGlobalList().erase(G1); @@ -1239,10 +1273,10 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { if (!Src->getDataLayout().empty() && !Dest->getDataLayout().empty() && Src->getDataLayout() != Dest->getDataLayout()) - cerr << "WARNING: Linking two modules of different data layouts!\n"; + errs() << "WARNING: Linking two modules of different data layouts!\n"; if (!Src->getTargetTriple().empty() && Dest->getTargetTriple() != Src->getTargetTriple()) - cerr << "WARNING: Linking two modules of different target triples!\n"; + errs() << "WARNING: Linking two modules of different target triples!\n"; // Append the module inline asm string. if (!Src->getModuleInlineAsm().empty()) { @@ -1282,6 +1316,9 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) { AppendingVars.insert(std::make_pair(I->getName(), I)); } + // Insert all of the named mdnoes in Src into the Dest module. + LinkNamedMDNodes(Dest, Src); + // Insert all of the globals in src into the Dest module... without linking // initializers (which could refer to functions not yet mapped over). if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg)) diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp index 6e0b760b85def..aef79d08f423e 100644 --- a/lib/Linker/Linker.cpp +++ b/lib/Linker/Linker.cpp @@ -14,12 +14,13 @@ #include "llvm/Linker.h" #include "llvm/Module.h" #include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/Config/config.h" +#include "llvm/System/Path.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Config/config.h" using namespace llvm; -Linker::Linker(const std::string& progname, const std::string& modname, +Linker::Linker(const StringRef &progname, const StringRef &modname, LLVMContext& C, unsigned flags): Context(C), Composite(new Module(modname, C)), @@ -28,7 +29,7 @@ Linker::Linker(const std::string& progname, const std::string& modname, Error(), ProgramName(progname) { } -Linker::Linker(const std::string& progname, Module* aModule, unsigned flags) : +Linker::Linker(const StringRef &progname, Module* aModule, unsigned flags) : Context(aModule->getContext()), Composite(aModule), LibPaths(), @@ -41,25 +42,25 @@ Linker::~Linker() { } bool -Linker::error(const std::string& message) { +Linker::error(const StringRef &message) { Error = message; if (!(Flags&QuietErrors)) - cerr << ProgramName << ": error: " << message << "\n"; + errs() << ProgramName << ": error: " << message << "\n"; return true; } bool -Linker::warning(const std::string& message) { +Linker::warning(const StringRef &message) { Error = message; if (!(Flags&QuietWarnings)) - cerr << ProgramName << ": warning: " << message << "\n"; + errs() << ProgramName << ": warning: " << message << "\n"; return false; } void -Linker::verbose(const std::string& message) { +Linker::verbose(const StringRef &message) { if (Flags&Verbose) - cerr << " " << message << "\n"; + errs() << " " << message << "\n"; } void @@ -69,11 +70,8 @@ Linker::addPath(const sys::Path& path) { void Linker::addPaths(const std::vector& paths) { - for (unsigned i = 0; i != paths.size(); ++i) { - sys::Path aPath; - aPath.set(paths[i]); - LibPaths.push_back(aPath); - } + for (unsigned i = 0, e = paths.size(); i != e; ++i) + LibPaths.push_back(sys::Path(paths[i])); } void @@ -100,16 +98,15 @@ Linker::LoadObject(const sys::Path &FN) { std::string ParseErrorMessage; Module *Result = 0; - const std::string &FNS = FN.toString(); - std::auto_ptr Buffer(MemoryBuffer::getFileOrSTDIN(FNS.c_str())); + std::auto_ptr Buffer(MemoryBuffer::getFileOrSTDIN(FN.c_str())); if (Buffer.get()) Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage); else - ParseErrorMessage = "Error reading file '" + FNS + "'"; + ParseErrorMessage = "Error reading file '" + FN.str() + "'"; if (Result) return std::auto_ptr(Result); - Error = "Bitcode file '" + FN.toString() + "' could not be loaded"; + Error = "Bitcode file '" + FN.str() + "' could not be loaded"; if (ParseErrorMessage.size()) Error += ": " + ParseErrorMessage; return std::auto_ptr(); @@ -117,13 +114,13 @@ Linker::LoadObject(const sys::Path &FN) { // IsLibrary - Determine if "Name" is a library in "Directory". Return // a non-empty sys::Path if its found, an empty one otherwise. -static inline sys::Path IsLibrary(const std::string& Name, - const sys::Path& Directory) { +static inline sys::Path IsLibrary(const StringRef &Name, + const sys::Path &Directory) { sys::Path FullPath(Directory); // Try the libX.a form - FullPath.appendComponent("lib" + Name); + FullPath.appendComponent(("lib" + Name).str()); FullPath.appendSuffix("a"); if (FullPath.isArchive()) return FullPath; @@ -156,7 +153,7 @@ static inline sys::Path IsLibrary(const std::string& Name, /// Path if no matching file can be found. /// sys::Path -Linker::FindLib(const std::string &Filename) { +Linker::FindLib(const StringRef &Filename) { // Determine if the pathname can be found as it stands. sys::Path FilePath(Filename); if (FilePath.canRead() && @@ -167,7 +164,7 @@ Linker::FindLib(const std::string &Filename) { // there. for (unsigned Index = 0; Index != LibPaths.size(); ++Index) { sys::Path Directory(LibPaths[Index]); - sys::Path FullPath = IsLibrary(Filename,Directory); + sys::Path FullPath = IsLibrary(Filename, Directory); if (!FullPath.isEmpty()) return FullPath; } diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 6307ffe32c8a7..8a1a05863746a 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -1,5 +1,24 @@ add_llvm_library(LLVMMC + MCAsmInfo.cpp + MCAsmInfoCOFF.cpp + MCAsmInfoDarwin.cpp + MCAsmLexer.cpp + MCAsmParser.cpp MCAsmStreamer.cpp + MCAssembler.cpp + MCCodeEmitter.cpp MCContext.cpp + MCDisassembler.cpp + MCExpr.cpp + MCInst.cpp + MCInstPrinter.cpp + MCMachOStreamer.cpp + MCNullStreamer.cpp + MCSection.cpp + MCSectionELF.cpp + MCSectionMachO.cpp MCStreamer.cpp + MCSymbol.cpp + MCValue.cpp + TargetAsmParser.cpp ) diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp new file mode 100644 index 0000000000000..74fb930fbc147 --- /dev/null +++ b/lib/MC/MCAsmInfo.cpp @@ -0,0 +1,107 @@ +//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/DataTypes.h" +#include +#include +using namespace llvm; + +MCAsmInfo::MCAsmInfo() { + ZeroFillDirective = 0; + NonexecutableStackDirective = 0; + NeedsSet = false; + MaxInstLength = 4; + PCSymbol = "$"; + SeparatorChar = ';'; + CommentColumn = 60; + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = "."; + LinkerPrivateGlobalPrefix = ""; + InlineAsmStart = "APP"; + InlineAsmEnd = "NO_APP"; + AssemblerDialect = 0; + AllowQuotesInName = false; + AllowNameToStartWithDigit = false; + ZeroDirective = "\t.zero\t"; + ZeroDirectiveSuffix = 0; + AsciiDirective = "\t.ascii\t"; + AscizDirective = "\t.asciz\t"; + Data8bitsDirective = "\t.byte\t"; + Data16bitsDirective = "\t.short\t"; + Data32bitsDirective = "\t.long\t"; + Data64bitsDirective = "\t.quad\t"; + SunStyleELFSectionSwitchSyntax = false; + UsesELFSectionDirectiveForBSS = false; + AlignDirective = "\t.align\t"; + AlignmentIsInBytes = true; + TextAlignFillValue = 0; + JumpTableDirective = 0; + PICJumpTableDirective = 0; + GlobalDirective = "\t.globl\t"; + SetDirective = 0; + LCOMMDirective = 0; + COMMDirective = "\t.comm\t"; + COMMDirectiveTakesAlignment = true; + HasDotTypeDotSizeDirective = true; + HasSingleParameterDotFile = true; + UsedDirective = 0; + WeakRefDirective = 0; + WeakDefDirective = 0; + // FIXME: These are ELFish - move to ELFMAI. + HiddenDirective = "\t.hidden\t"; + ProtectedDirective = "\t.protected\t"; + AbsoluteDebugSectionOffsets = false; + AbsoluteEHSectionOffsets = false; + HasLEB128 = false; + HasDotLocAndDotFile = false; + SupportsDebugInformation = false; + ExceptionsType = ExceptionHandling::None; + DwarfRequiresFrameSection = true; + DwarfUsesInlineInfoSection = false; + Is_EHSymbolPrivate = true; + GlobalEHDirective = 0; + SupportsWeakOmittedEHFrame = true; + DwarfSectionOffsetDirective = 0; + + AsmTransCBE = 0; +} + +MCAsmInfo::~MCAsmInfo() { +} + + +unsigned MCAsmInfo::getULEB128Size(unsigned Value) { + unsigned Size = 0; + do { + Value >>= 7; + Size += sizeof(int8_t); + } while (Value); + return Size; +} + +unsigned MCAsmInfo::getSLEB128Size(int Value) { + unsigned Size = 0; + int Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + unsigned Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + Size += sizeof(int8_t); + } while (IsMore); + return Size; +} diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp new file mode 100644 index 0000000000000..23b0dd77916b9 --- /dev/null +++ b/lib/MC/MCAsmInfoCOFF.cpp @@ -0,0 +1,37 @@ +//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on COFF-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfoCOFF.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +MCAsmInfoCOFF::MCAsmInfoCOFF() { + GlobalPrefix = "_"; + LCOMMDirective = "\t.lcomm\t"; + COMMDirectiveTakesAlignment = false; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + HiddenDirective = NULL; + PrivateGlobalPrefix = "L"; // Prefix for private global symbols + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + AbsoluteDebugSectionOffsets = true; + AbsoluteEHSectionOffsets = false; + SupportsDebugInformation = true; + DwarfSectionOffsetDirective = "\t.secrel32\t"; +} + diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp new file mode 100644 index 0000000000000..d99120d4d7888 --- /dev/null +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -0,0 +1,52 @@ +//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on Darwin-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfoDarwin.h" +using namespace llvm; + +MCAsmInfoDarwin::MCAsmInfoDarwin() { + // Common settings for all Darwin targets. + // Syntax: + GlobalPrefix = "_"; + PrivateGlobalPrefix = "L"; + LinkerPrivateGlobalPrefix = "l"; + NeedsSet = true; + AllowQuotesInName = true; + HasSingleParameterDotFile = false; + + AlignmentIsInBytes = false; + InlineAsmStart = " InlineAsm Start"; + InlineAsmEnd = " InlineAsm End"; + + // Directives: + WeakDefDirective = "\t.weak_definition "; + WeakRefDirective = "\t.weak_reference "; + HiddenDirective = "\t.private_extern "; + LCOMMDirective = "\t.lcomm\t"; + ZeroDirective = "\t.space\t"; // ".space N" emits N zeros. + ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill + SetDirective = "\t.set"; + ProtectedDirective = "\t.globl\t"; + HasDotTypeDotSizeDirective = false; + UsedDirective = "\t.no_dead_strip\t"; + + // _foo.eh symbols are currently always exported so that the linker knows + // about them. This is not necessary on 10.6 and later, but it + // doesn't hurt anything. + // FIXME: I need to get this from Triple. + Is_EHSymbolPrivate = false; + GlobalEHDirective = "\t.globl\t"; + SupportsWeakOmittedEHFrame = false; +} + diff --git a/lib/MC/MCAsmLexer.cpp b/lib/MC/MCAsmLexer.cpp new file mode 100644 index 0000000000000..1e34ed6f7900b --- /dev/null +++ b/lib/MC/MCAsmLexer.cpp @@ -0,0 +1,23 @@ +//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmLexer.h" +#include "llvm/Support/SourceMgr.h" + +using namespace llvm; + +MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()) { +} + +MCAsmLexer::~MCAsmLexer() { +} + +SMLoc AsmToken::getLoc() const { + return SMLoc::getFromPointer(Str.data()); +} diff --git a/lib/MC/MCAsmParser.cpp b/lib/MC/MCAsmParser.cpp new file mode 100644 index 0000000000000..2287e8965d7be --- /dev/null +++ b/lib/MC/MCAsmParser.cpp @@ -0,0 +1,18 @@ +//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmParser.h" + +using namespace llvm; + +MCAsmParser::MCAsmParser() { +} + +MCAsmParser::~MCAsmParser() { +} diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 7d94464448895..e56e968380f40 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -8,118 +8,121 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCStreamer.h" - +#include "llvm/ADT/SmallString.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { - class MCAsmStreamer : public MCStreamer { - raw_ostream &OS; +class MCAsmStreamer : public MCStreamer { + raw_ostream &OS; + const MCAsmInfo &MAI; + MCInstPrinter *InstPrinter; + MCCodeEmitter *Emitter; +public: + MCAsmStreamer(MCContext &Context, raw_ostream &_OS, const MCAsmInfo &tai, + MCInstPrinter *_Printer, MCCodeEmitter *_Emitter) + : MCStreamer(Context), OS(_OS), MAI(tai), InstPrinter(_Printer), + Emitter(_Emitter) {} + ~MCAsmStreamer() {} - MCSection *CurSection; + /// @name MCStreamer Interface + /// @{ - public: - MCAsmStreamer(MCContext &Context, raw_ostream &_OS) - : MCStreamer(Context), OS(_OS), CurSection(0) {} - ~MCAsmStreamer() {} + virtual void SwitchSection(const MCSection *Section); - /// @name MCStreamer Interface - /// @{ + virtual void EmitLabel(MCSymbol *Symbol); - virtual void SwitchSection(MCSection *Section); + virtual void EmitAssemblerFlag(AssemblerFlag Flag); - virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); - virtual void EmitAssignment(MCSymbol *Symbol, const MCValue &Value, - bool MakeAbsolute = false); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute); - virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); - virtual void EmitBytes(const char *Data, unsigned Length); + virtual void EmitCommonSymbol(MCSymbol *Symbol, unsigned Size, + unsigned ByteAlignment); - virtual void EmitValue(const MCValue &Value, unsigned Size); + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); - virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, - unsigned ValueSize = 1, - unsigned MaxBytesToEmit = 0); + virtual void EmitBytes(const StringRef &Data); - virtual void EmitValueToOffset(const MCValue &Offset, - unsigned char Value = 0); - - virtual void EmitInstruction(const MCInst &Inst); + virtual void EmitValue(const MCExpr *Value, unsigned Size); - virtual void Finish(); - - /// @} - }; - -} - -/// Allow printing values directly to a raw_ostream. -static inline raw_ostream &operator<<(raw_ostream &os, const MCValue &Value) { - if (Value.getSymA()) { - os << Value.getSymA()->getName(); - if (Value.getSymB()) - os << " - " << Value.getSymB()->getName(); - if (Value.getConstant()) - os << " + " << Value.getConstant(); - } else { - assert(!Value.getSymB() && "Invalid machine code value!"); - os << Value.getConstant(); - } + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); - return os; -} + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitInstruction(const MCInst &Inst); + + virtual void Finish(); + + /// @} +}; + +} // end anonymous namespace. static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { assert(Bytes && "Invalid size!"); return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); } -static inline MCValue truncateToSize(const MCValue &Value, unsigned Bytes) { - return MCValue::get(Value.getSymA(), Value.getSymB(), - truncateToSize(Value.getConstant(), Bytes)); +static inline const MCExpr *truncateToSize(const MCExpr *Value, + unsigned Bytes) { + // FIXME: Do we really need this routine? + return Value; } -void MCAsmStreamer::SwitchSection(MCSection *Section) { +void MCAsmStreamer::SwitchSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); if (Section != CurSection) { CurSection = Section; - - // FIXME: Really we would like the segment, flags, etc. to be separate - // values instead of embedded in the name. Not all assemblers understand all - // this stuff though. - OS << ".section " << Section->getName() << "\n"; + Section->PrintSwitchToSection(MAI, OS); } } void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { - assert(Symbol->getSection() == 0 && "Cannot emit a symbol twice!"); + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(CurSection && "Cannot emit before setting section!"); - assert(!getContext().GetSymbolValue(Symbol) && - "Cannot emit symbol which was directly assigned to!"); - OS << Symbol->getName() << ":\n"; - Symbol->setSection(CurSection); - Symbol->setExternal(false); + Symbol->print(OS, &MAI); + OS << ":\n"; + Symbol->setSection(*CurSection); } -void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCValue &Value, - bool MakeAbsolute) { - assert(!Symbol->getSection() && "Cannot assign to a label!"); - - if (MakeAbsolute) { - OS << ".set " << Symbol->getName() << ", " << Value << '\n'; - } else { - OS << Symbol->getName() << " = " << Value << '\n'; +void MCAsmStreamer::EmitAssemblerFlag(AssemblerFlag Flag) { + switch (Flag) { + default: assert(0 && "Invalid flag!"); + case SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break; } + OS << '\n'; +} - getContext().SetSymbolValue(Symbol, Value); +void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // Only absolute symbols can be redefined. + assert((Symbol->isUndefined() || Symbol->isAbsolute()) && + "Cannot define a symbol twice!"); + + Symbol->print(OS, &MAI); + OS << " = "; + Value->print(OS, &MAI); + OS << '\n'; } void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, @@ -139,93 +142,165 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case WeakReference: OS << ".weak_reference"; break; } - OS << ' ' << Symbol->getName() << '\n'; + OS << ' '; + Symbol->print(OS, &MAI); + OS << '\n'; +} + +void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + OS << ".desc" << ' '; + Symbol->print(OS, &MAI); + OS << ',' << DescValue << '\n'; +} + +void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, unsigned Size, + unsigned ByteAlignment) { + OS << ".comm "; + Symbol->print(OS, &MAI); + OS << ',' << Size; + if (ByteAlignment != 0) + OS << ',' << Log2_32(ByteAlignment); + OS << '\n'; } -void MCAsmStreamer::EmitBytes(const char *Data, unsigned Length) { +void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) { + // Note: a .zerofill directive does not switch sections. + OS << ".zerofill "; + + // This is a mach-o specific directive. + const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section); + OS << MOSection->getSegmentName() << "," << MOSection->getSectionName(); + + if (Symbol != NULL) { + OS << ','; + Symbol->print(OS, &MAI); + OS << ',' << Size; + if (ByteAlignment != 0) + OS << ',' << Log2_32(ByteAlignment); + } + OS << '\n'; +} + +void MCAsmStreamer::EmitBytes(const StringRef &Data) { assert(CurSection && "Cannot emit contents before setting section!"); - for (unsigned i = 0; i != Length; ++i) - OS << ".byte " << (unsigned) Data[i] << '\n'; + for (unsigned i = 0, e = Data.size(); i != e; ++i) + OS << ".byte " << (unsigned) (unsigned char) Data[i] << '\n'; } -void MCAsmStreamer::EmitValue(const MCValue &Value, unsigned Size) { +void MCAsmStreamer::EmitValue(const MCExpr *Value, unsigned Size) { assert(CurSection && "Cannot emit contents before setting section!"); // Need target hooks to know how to print this. switch (Size) { default: - assert(0 && "Invalid size for machine code value!"); + llvm_unreachable("Invalid size for machine code value!"); case 1: OS << ".byte"; break; case 2: OS << ".short"; break; case 4: OS << ".long"; break; case 8: OS << ".quad"; break; } - OS << ' ' << truncateToSize(Value, Size) << '\n'; + OS << ' '; + truncateToSize(Value, Size)->print(OS, &MAI); + OS << '\n'; } void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit) { - // Some assemblers don't support .balign, so we always emit as .p2align if - // this is a power of two. Otherwise we assume the client knows the target - // supports .balign and use that. - unsigned Pow2 = Log2_32(ByteAlignment); - bool IsPow2 = (1U << Pow2) == ByteAlignment; - + // Some assemblers don't support non-power of two alignments, so we always + // emit alignments as a power of two if possible. + if (isPowerOf2_32(ByteAlignment)) { + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << MAI.getAlignDirective(); break; + // FIXME: use MAI for this! + case 2: OS << ".p2alignw "; break; + case 4: OS << ".p2alignl "; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + if (MAI.getAlignmentIsInBytes()) + OS << ByteAlignment; + else + OS << Log2_32(ByteAlignment); + + if (Value || MaxBytesToEmit) { + OS << ", 0x"; + OS.write_hex(truncateToSize(Value, ValueSize)); + + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + } + OS << '\n'; + return; + } + + // Non-power of two alignment. This is not widely supported by assemblers. + // FIXME: Parameterize this based on MAI. switch (ValueSize) { - default: - assert(0 && "Invalid size for machine code value!"); - case 8: - assert(0 && "Unsupported alignment size!"); - case 1: OS << (IsPow2 ? ".p2align" : ".balign"); break; - case 2: OS << (IsPow2 ? ".p2alignw" : ".balignw"); break; - case 4: OS << (IsPow2 ? ".p2alignl" : ".balignl"); break; + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << ".balign"; break; + case 2: OS << ".balignw"; break; + case 4: OS << ".balignl"; break; + case 8: llvm_unreachable("Unsupported alignment size!"); } - OS << ' ' << (IsPow2 ? Pow2 : ByteAlignment); - + OS << ' ' << ByteAlignment; OS << ", " << truncateToSize(Value, ValueSize); if (MaxBytesToEmit) OS << ", " << MaxBytesToEmit; OS << '\n'; } -void MCAsmStreamer::EmitValueToOffset(const MCValue &Offset, +void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { // FIXME: Verify that Offset is associated with the current section. - OS << ".org " << Offset << ", " << (unsigned) Value << '\n'; -} - -static raw_ostream &operator<<(raw_ostream &OS, const MCOperand &Op) { - if (Op.isReg()) - return OS << "reg:" << Op.getReg(); - if (Op.isImm()) - return OS << "imm:" << Op.getImm(); - if (Op.isMBBLabel()) - return OS << "mbblabel:(" - << Op.getMBBLabelFunction() << ", " << Op.getMBBLabelBlock(); - assert(Op.isMCValue() && "Invalid operand!"); - return OS << "val:" << Op.getMCValue(); + OS << ".org "; + Offset->print(OS, &MAI); + OS << ", " << (unsigned) Value << '\n'; } void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { assert(CurSection && "Cannot emit contents before setting section!"); - // FIXME: Implement proper printing. - OS << "MCInst(" - << "opcode=" << Inst.getOpcode() << ", " - << "operands=["; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) { - if (i) - OS << ", "; - OS << Inst.getOperand(i); + + // If we have an AsmPrinter, use that to print. + if (InstPrinter) { + InstPrinter->printInst(&Inst); + OS << '\n'; + + // Show the encoding if we have a code emitter. + if (Emitter) { + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Emitter->EncodeInstruction(Inst, VecOS); + VecOS.flush(); + + OS.indent(20); + OS << " # encoding: ["; + for (unsigned i = 0, e = Code.size(); i != e; ++i) { + if (i) + OS << ','; + OS << format("%#04x", uint8_t(Code[i])); + } + OS << "]\n"; + } + + return; } - OS << "])\n"; + + // Otherwise fall back to a structural printing for now. Eventually we should + // always have access to the target specific printer. + Inst.print(OS, &MAI); + OS << '\n'; } void MCAsmStreamer::Finish() { OS.flush(); } -MCStreamer *llvm::createAsmStreamer(MCContext &Context, raw_ostream &OS) { - return new MCAsmStreamer(Context, OS); +MCStreamer *llvm::createAsmStreamer(MCContext &Context, raw_ostream &OS, + const MCAsmInfo &MAI, MCInstPrinter *IP, + MCCodeEmitter *CE) { + return new MCAsmStreamer(Context, OS, MAI, IP, CE); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp new file mode 100644 index 0000000000000..0afdf98cbe797 --- /dev/null +++ b/lib/MC/MCAssembler.cpp @@ -0,0 +1,1190 @@ +//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "assembler" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/Target/TargetMachOWriterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +class MachObjectWriter; + +STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); + +// FIXME FIXME FIXME: There are number of places in this file where we convert +// what is a 64-bit assembler value used for computation into a value in the +// object file, which may truncate it. We should detect that truncation where +// invalid and report errors back. + +static void WriteFileData(raw_ostream &OS, const MCSectionData &SD, + MachObjectWriter &MOW); + +/// isVirtualSection - Check if this is a section which does not actually exist +/// in the object file. +static bool isVirtualSection(const MCSection &Section) { + // FIXME: Lame. + const MCSectionMachO &SMO = static_cast(Section); + unsigned Type = SMO.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE; + return (Type == MCSectionMachO::S_ZEROFILL); +} + +class MachObjectWriter { + // See . + enum { + Header_Magic32 = 0xFEEDFACE, + Header_Magic64 = 0xFEEDFACF + }; + + static const unsigned Header32Size = 28; + static const unsigned Header64Size = 32; + static const unsigned SegmentLoadCommand32Size = 56; + static const unsigned Section32Size = 68; + static const unsigned SymtabLoadCommandSize = 24; + static const unsigned DysymtabLoadCommandSize = 80; + static const unsigned Nlist32Size = 12; + static const unsigned RelocationInfoSize = 8; + + enum HeaderFileType { + HFT_Object = 0x1 + }; + + enum HeaderFlags { + HF_SubsectionsViaSymbols = 0x2000 + }; + + enum LoadCommandType { + LCT_Segment = 0x1, + LCT_Symtab = 0x2, + LCT_Dysymtab = 0xb + }; + + // See . + enum SymbolTypeType { + STT_Undefined = 0x00, + STT_Absolute = 0x02, + STT_Section = 0x0e + }; + + enum SymbolTypeFlags { + // If any of these bits are set, then the entry is a stab entry number (see + // . Otherwise the other masks apply. + STF_StabsEntryMask = 0xe0, + + STF_TypeMask = 0x0e, + STF_External = 0x01, + STF_PrivateExtern = 0x10 + }; + + /// IndirectSymbolFlags - Flags for encoding special values in the indirect + /// symbol entry. + enum IndirectSymbolFlags { + ISF_Local = 0x80000000, + ISF_Absolute = 0x40000000 + }; + + /// RelocationFlags - Special flags for addresses. + enum RelocationFlags { + RF_Scattered = 0x80000000 + }; + + enum RelocationInfoType { + RIT_Vanilla = 0, + RIT_Pair = 1, + RIT_Difference = 2, + RIT_PreboundLazyPointer = 3, + RIT_LocalDifference = 4 + }; + + /// MachSymbolData - Helper struct for containing some precomputed information + /// on symbols. + struct MachSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint8_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const MachSymbolData &RHS) const { + const std::string &Name = SymbolData->getSymbol().getName(); + return Name < RHS.SymbolData->getSymbol().getName(); + } + }; + + raw_ostream &OS; + bool IsLSB; + +public: + MachObjectWriter(raw_ostream &_OS, bool _IsLSB = true) + : OS(_OS), IsLSB(_IsLSB) { + } + + /// @name Helper Methods + /// @{ + + void Write8(uint8_t Value) { + OS << char(Value); + } + + void Write16(uint16_t Value) { + if (IsLSB) { + Write8(uint8_t(Value >> 0)); + Write8(uint8_t(Value >> 8)); + } else { + Write8(uint8_t(Value >> 8)); + Write8(uint8_t(Value >> 0)); + } + } + + void Write32(uint32_t Value) { + if (IsLSB) { + Write16(uint16_t(Value >> 0)); + Write16(uint16_t(Value >> 16)); + } else { + Write16(uint16_t(Value >> 16)); + Write16(uint16_t(Value >> 0)); + } + } + + void Write64(uint64_t Value) { + if (IsLSB) { + Write32(uint32_t(Value >> 0)); + Write32(uint32_t(Value >> 32)); + } else { + Write32(uint32_t(Value >> 32)); + Write32(uint32_t(Value >> 0)); + } + } + + void WriteZeros(unsigned N) { + const char Zeros[16] = { 0 }; + + for (unsigned i = 0, e = N / 16; i != e; ++i) + OS << StringRef(Zeros, 16); + + OS << StringRef(Zeros, N % 16); + } + + void WriteString(const StringRef &Str, unsigned ZeroFillSize = 0) { + OS << Str; + if (ZeroFillSize) + WriteZeros(ZeroFillSize - Str.size()); + } + + /// @} + + void WriteHeader32(unsigned NumLoadCommands, unsigned LoadCommandsSize, + bool SubsectionsViaSymbols) { + uint32_t Flags = 0; + + if (SubsectionsViaSymbols) + Flags |= HF_SubsectionsViaSymbols; + + // struct mach_header (28 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(Header_Magic32); + + // FIXME: Support cputype. + Write32(TargetMachOWriterInfo::HDR_CPU_TYPE_I386); + // FIXME: Support cpusubtype. + Write32(TargetMachOWriterInfo::HDR_CPU_SUBTYPE_I386_ALL); + Write32(HFT_Object); + Write32(NumLoadCommands); // Object files have a single load command, the + // segment. + Write32(LoadCommandsSize); + Write32(Flags); + + assert(OS.tell() - Start == Header32Size); + } + + /// WriteSegmentLoadCommand32 - Write a 32-bit segment load command. + /// + /// \arg NumSections - The number of sections in this segment. + /// \arg SectionDataSize - The total size of the sections. + void WriteSegmentLoadCommand32(unsigned NumSections, + uint64_t VMSize, + uint64_t SectionDataStartOffset, + uint64_t SectionDataSize) { + // struct segment_command (56 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Segment); + Write32(SegmentLoadCommand32Size + NumSections * Section32Size); + + WriteString("", 16); + Write32(0); // vmaddr + Write32(VMSize); // vmsize + Write32(SectionDataStartOffset); // file offset + Write32(SectionDataSize); // file size + Write32(0x7); // maxprot + Write32(0x7); // initprot + Write32(NumSections); + Write32(0); // flags + + assert(OS.tell() - Start == SegmentLoadCommand32Size); + } + + void WriteSection32(const MCSectionData &SD, uint64_t FileOffset, + uint64_t RelocationsStart, unsigned NumRelocations) { + // The offset is unused for virtual sections. + if (isVirtualSection(SD.getSection())) { + assert(SD.getFileSize() == 0 && "Invalid file size!"); + FileOffset = 0; + } + + // struct section (68 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + // FIXME: cast<> support! + const MCSectionMachO &Section = + static_cast(SD.getSection()); + WriteString(Section.getSectionName(), 16); + WriteString(Section.getSegmentName(), 16); + Write32(SD.getAddress()); // address + Write32(SD.getSize()); // size + Write32(FileOffset); + + assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); + Write32(Log2_32(SD.getAlignment())); + Write32(NumRelocations ? RelocationsStart : 0); + Write32(NumRelocations); + Write32(Section.getTypeAndAttributes()); + Write32(0); // reserved1 + Write32(Section.getStubSize()); // reserved2 + + assert(OS.tell() - Start == Section32Size); + } + + void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, + uint32_t StringTableOffset, + uint32_t StringTableSize) { + // struct symtab_command (24 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Symtab); + Write32(SymtabLoadCommandSize); + Write32(SymbolOffset); + Write32(NumSymbols); + Write32(StringTableOffset); + Write32(StringTableSize); + + assert(OS.tell() - Start == SymtabLoadCommandSize); + } + + void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, + uint32_t NumLocalSymbols, + uint32_t FirstExternalSymbol, + uint32_t NumExternalSymbols, + uint32_t FirstUndefinedSymbol, + uint32_t NumUndefinedSymbols, + uint32_t IndirectSymbolOffset, + uint32_t NumIndirectSymbols) { + // struct dysymtab_command (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Dysymtab); + Write32(DysymtabLoadCommandSize); + Write32(FirstLocalSymbol); + Write32(NumLocalSymbols); + Write32(FirstExternalSymbol); + Write32(NumExternalSymbols); + Write32(FirstUndefinedSymbol); + Write32(NumUndefinedSymbols); + Write32(0); // tocoff + Write32(0); // ntoc + Write32(0); // modtaboff + Write32(0); // nmodtab + Write32(0); // extrefsymoff + Write32(0); // nextrefsyms + Write32(IndirectSymbolOffset); + Write32(NumIndirectSymbols); + Write32(0); // extreloff + Write32(0); // nextrel + Write32(0); // locreloff + Write32(0); // nlocrel + + assert(OS.tell() - Start == DysymtabLoadCommandSize); + } + + void WriteNlist32(MachSymbolData &MSD) { + MCSymbolData &Data = *MSD.SymbolData; + const MCSymbol &Symbol = Data.getSymbol(); + uint8_t Type = 0; + uint16_t Flags = Data.getFlags(); + uint32_t Address = 0; + + // Set the N_TYPE bits. See . + // + // FIXME: Are the prebound or indirect fields possible here? + if (Symbol.isUndefined()) + Type = STT_Undefined; + else if (Symbol.isAbsolute()) + Type = STT_Absolute; + else + Type = STT_Section; + + // FIXME: Set STAB bits. + + if (Data.isPrivateExtern()) + Type |= STF_PrivateExtern; + + // Set external bit. + if (Data.isExternal() || Symbol.isUndefined()) + Type |= STF_External; + + // Compute the symbol address. + if (Symbol.isDefined()) { + if (Symbol.isAbsolute()) { + llvm_unreachable("FIXME: Not yet implemented!"); + } else { + Address = Data.getFragment()->getAddress() + Data.getOffset(); + } + } else if (Data.isCommon()) { + // Common symbols are encoded with the size in the address + // field, and their alignment in the flags. + Address = Data.getCommonSize(); + + // Common alignment is packed into the 'desc' bits. + if (unsigned Align = Data.getCommonAlignment()) { + unsigned Log2Size = Log2_32(Align); + assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); + if (Log2Size > 15) + llvm_report_error("invalid 'common' alignment '" + + Twine(Align) + "'"); + // FIXME: Keep this mask with the SymbolFlags enumeration. + Flags = (Flags & 0xF0FF) | (Log2Size << 8); + } + } + + // struct nlist (12 bytes) + + Write32(MSD.StringIndex); + Write8(Type); + Write8(MSD.SectionIndex); + + // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' + // value. + Write16(Flags); + Write32(Address); + } + + struct MachRelocationEntry { + uint32_t Word0; + uint32_t Word1; + }; + void ComputeScatteredRelocationInfo(MCAssembler &Asm, + MCSectionData::Fixup &Fixup, + DenseMap &SymbolMap, + std::vector &Relocs) { + uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset; + unsigned IsPCRel = 0; + unsigned Type = RIT_Vanilla; + + // See . + + const MCSymbol *A = Fixup.Value.getSymA(); + MCSymbolData *SD = SymbolMap.lookup(A); + uint32_t Value = SD->getFragment()->getAddress() + SD->getOffset(); + uint32_t Value2 = 0; + + if (const MCSymbol *B = Fixup.Value.getSymB()) { + Type = RIT_LocalDifference; + + MCSymbolData *SD = SymbolMap.lookup(B); + Value2 = SD->getFragment()->getAddress() + SD->getOffset(); + } + + unsigned Log2Size = Log2_32(Fixup.Size); + assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!"); + + // The value which goes in the fixup is current value of the expression. + Fixup.FixedValue = Value - Value2 + Fixup.Value.getConstant(); + + MachRelocationEntry MRE; + MRE.Word0 = ((Address << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + RF_Scattered); + MRE.Word1 = Value; + Relocs.push_back(MRE); + + if (Type == RIT_LocalDifference) { + Type = RIT_Pair; + + MachRelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (Type << 24) | + (Log2Size << 28) | + (0 << 30) | + RF_Scattered); + MRE.Word1 = Value2; + Relocs.push_back(MRE); + } + } + + void ComputeRelocationInfo(MCAssembler &Asm, + MCSectionData::Fixup &Fixup, + DenseMap &SymbolMap, + std::vector &Relocs) { + // If this is a local symbol plus an offset or a difference, then we need a + // scattered relocation entry. + if (Fixup.Value.getSymB()) // a - b + return ComputeScatteredRelocationInfo(Asm, Fixup, SymbolMap, Relocs); + if (Fixup.Value.getSymA() && Fixup.Value.getConstant()) + if (!Fixup.Value.getSymA()->isUndefined()) + return ComputeScatteredRelocationInfo(Asm, Fixup, SymbolMap, Relocs); + + // See . + uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset; + uint32_t Value = 0; + unsigned Index = 0; + unsigned IsPCRel = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Fixup.Value.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + Type = RIT_Vanilla; + Value = 0; + llvm_unreachable("FIXME: Not yet implemented!"); + } else { + const MCSymbol *Symbol = Fixup.Value.getSymA(); + MCSymbolData *SD = SymbolMap.lookup(Symbol); + + if (Symbol->isUndefined()) { + IsExtern = 1; + Index = SD->getIndex(); + Value = 0; + } else { + // The index is the section ordinal. + // + // FIXME: O(N) + Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + if (&*it == SD->getFragment()->getParent()) + break; + Value = SD->getFragment()->getAddress() + SD->getOffset(); + } + + Type = RIT_Vanilla; + } + + // The value which goes in the fixup is current value of the expression. + Fixup.FixedValue = Value + Fixup.Value.getConstant(); + + unsigned Log2Size = Log2_32(Fixup.Size); + assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!"); + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = Address; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocs.push_back(MRE); + } + + void BindIndirectSymbols(MCAssembler &Asm, + DenseMap &SymbolMap) { + // This is the point where 'as' creates actual symbols for indirect symbols + // (in the following two passes). It would be easier for us to do this + // sooner when we see the attribute, but that makes getting the order in the + // symbol table much more complicated than it is worth. + // + // FIXME: Revisit this when the dust settles. + + // Bind non lazy symbol pointers first. + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // FIXME: cast<> support! + const MCSectionMachO &Section = + static_cast(it->SectionData->getSection()); + + unsigned Type = + Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE; + if (Type != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) + continue; + + MCSymbolData *&Entry = SymbolMap[it->Symbol]; + if (!Entry) + Entry = new MCSymbolData(*it->Symbol, 0, 0, &Asm); + } + + // Then lazy symbol pointers and symbol stubs. + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // FIXME: cast<> support! + const MCSectionMachO &Section = + static_cast(it->SectionData->getSection()); + + unsigned Type = + Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE; + if (Type != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && + Type != MCSectionMachO::S_SYMBOL_STUBS) + continue; + + MCSymbolData *&Entry = SymbolMap[it->Symbol]; + if (!Entry) { + Entry = new MCSymbolData(*it->Symbol, 0, 0, &Asm); + + // Set the symbol type to undefined lazy, but only on construction. + // + // FIXME: Do not hardcode. + Entry->setFlags(Entry->getFlags() | 0x0001); + } + } + } + + /// ComputeSymbolTable - Compute the symbol table data + /// + /// \param StringTable [out] - The string table data. + /// \param StringIndexMap [out] - Map from symbol names to offsets in the + /// string table. + void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, + std::vector &LocalSymbolData, + std::vector &ExternalSymbolData, + std::vector &UndefinedSymbolData) { + // Build section lookup table. + DenseMap SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + assert(Index <= 256 && "Too many sections!"); + + // Index 0 is always the empty string. + StringMap StringIndexMap; + StringTable += '\x00'; + + // Build the symbol arrays and the string table, but only for non-local + // symbols. + // + // The particular order that we collect the symbols and create the string + // table, then sort the symbols is chosen to match 'as'. Even though it + // doesn't matter for correctness, this is important for letting us diff .o + // files. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore assembler temporaries. + if (it->getSymbol().isTemporary()) + continue; + + if (!it->isExternal() && !Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isUndefined()) { + MSD.SectionIndex = 0; + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + ExternalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); + } + } + + // Now add the data for local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore assembler temporaries. + if (it->getSymbol().isTemporary()) + continue; + + if (it->isExternal() || Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + LocalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); + } + } + + // External and undefined symbols are required to be in lexicographic order. + std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + + // Set the symbol indices. + Index = 0; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + LocalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + ExternalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + UndefinedSymbolData[i].SymbolData->setIndex(Index++); + + // The string table is padded to a multiple of 4. + // + // FIXME: Check to see if this varies per arch. + while (StringTable.size() % 4) + StringTable += '\x00'; + } + + void WriteObject(MCAssembler &Asm) { + unsigned NumSections = Asm.size(); + + // Compute the symbol -> symbol data map. + // + // FIXME: This should not be here. + DenseMap SymbolMap; + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) + SymbolMap[&it->getSymbol()] = it; + + // Create symbol data for any indirect symbols. + BindIndirectSymbols(Asm, SymbolMap); + + // Compute symbol table information. + SmallString<256> StringTable; + std::vector LocalSymbolData; + std::vector ExternalSymbolData; + std::vector UndefinedSymbolData; + unsigned NumSymbols = Asm.symbol_size(); + + // No symbol table command is written if there are no symbols. + if (NumSymbols) + ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, + UndefinedSymbolData); + + // The section data starts after the header, the segment load command (and + // section headers) and the symbol table. + unsigned NumLoadCommands = 1; + uint64_t LoadCommandsSize = + SegmentLoadCommand32Size + NumSections * Section32Size; + + // Add the symbol table load command sizes, if used. + if (NumSymbols) { + NumLoadCommands += 2; + LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize; + } + + // Compute the total size of the section data, as well as its file size and + // vm size. + uint64_t SectionDataStart = Header32Size + LoadCommandsSize; + uint64_t SectionDataSize = 0; + uint64_t SectionDataFileSize = 0; + uint64_t VMSize = 0; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + MCSectionData &SD = *it; + + VMSize = std::max(VMSize, SD.getAddress() + SD.getSize()); + + if (isVirtualSection(SD.getSection())) + continue; + + SectionDataSize = std::max(SectionDataSize, + SD.getAddress() + SD.getSize()); + SectionDataFileSize = std::max(SectionDataFileSize, + SD.getAddress() + SD.getFileSize()); + } + + // The section data is passed to 4 bytes. + // + // FIXME: Is this machine dependent? + unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); + SectionDataFileSize += SectionDataPadding; + + // Write the prolog, starting with the header and load command... + WriteHeader32(NumLoadCommands, LoadCommandsSize, + Asm.getSubsectionsViaSymbols()); + WriteSegmentLoadCommand32(NumSections, VMSize, + SectionDataStart, SectionDataSize); + + // ... and then the section headers. + // + // We also compute the section relocations while we do this. Note that + // compute relocation info will also update the fixup to have the correct + // value; this will be overwrite the appropriate data in the fragment when + // it is written. + std::vector RelocInfos; + uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; + ++it) { + MCSectionData &SD = *it; + + // The assembler writes relocations in the reverse order they were seen. + // + // FIXME: It is probably more complicated than this. + unsigned NumRelocsStart = RelocInfos.size(); + for (unsigned i = 0, e = SD.fixup_size(); i != e; ++i) + ComputeRelocationInfo(Asm, SD.getFixups()[e - i - 1], SymbolMap, + RelocInfos); + + unsigned NumRelocs = RelocInfos.size() - NumRelocsStart; + uint64_t SectionStart = SectionDataStart + SD.getAddress(); + WriteSection32(SD, SectionStart, RelocTableEnd, NumRelocs); + RelocTableEnd += NumRelocs * RelocationInfoSize; + } + + // Write the symbol table load command, if used. + if (NumSymbols) { + unsigned FirstLocalSymbol = 0; + unsigned NumLocalSymbols = LocalSymbolData.size(); + unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; + unsigned NumExternalSymbols = ExternalSymbolData.size(); + unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; + unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); + unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); + unsigned NumSymTabSymbols = + NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; + uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; + uint64_t IndirectSymbolOffset = 0; + + // If used, the indirect symbols are written after the section data. + if (NumIndirectSymbols) + IndirectSymbolOffset = RelocTableEnd; + + // The symbol table is written after the indirect symbol data. + uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; + + // The string table is written after symbol table. + uint64_t StringTableOffset = + SymbolTableOffset + NumSymTabSymbols * Nlist32Size; + WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, + StringTableOffset, StringTable.size()); + + WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, + FirstExternalSymbol, NumExternalSymbols, + FirstUndefinedSymbol, NumUndefinedSymbols, + IndirectSymbolOffset, NumIndirectSymbols); + } + + // Write the actual section data. + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + WriteFileData(OS, *it, *this); + + // Write the extra padding. + WriteZeros(SectionDataPadding); + + // Write the relocation entries. + for (unsigned i = 0, e = RelocInfos.size(); i != e; ++i) { + Write32(RelocInfos[i].Word0); + Write32(RelocInfos[i].Word1); + } + + // Write the symbol table data, if used. + if (NumSymbols) { + // Write the indirect symbol entries. + for (MCAssembler::indirect_symbol_iterator + it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // Indirect symbols in the non lazy symbol pointer section have some + // special handling. + const MCSectionMachO &Section = + static_cast(it->SectionData->getSection()); + unsigned Type = + Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE; + if (Type == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { + // If this symbol is defined and internal, mark it as such. + if (it->Symbol->isDefined() && + !SymbolMap.lookup(it->Symbol)->isExternal()) { + uint32_t Flags = ISF_Local; + if (it->Symbol->isAbsolute()) + Flags |= ISF_Absolute; + Write32(Flags); + continue; + } + } + + Write32(SymbolMap[it->Symbol]->getIndex()); + } + + // FIXME: Check that offsets match computed ones. + + // Write the symbol table entries. + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + WriteNlist32(LocalSymbolData[i]); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + WriteNlist32(ExternalSymbolData[i]); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + WriteNlist32(UndefinedSymbolData[i]); + + // Write the string table. + OS << StringTable.str(); + } + } +}; + +/* *** */ + +MCFragment::MCFragment() : Kind(FragmentType(~0)) { +} + +MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) + : Kind(_Kind), + Parent(_Parent), + FileSize(~UINT64_C(0)) +{ + if (Parent) + Parent->getFragmentList().push_back(this); +} + +MCFragment::~MCFragment() { +} + +uint64_t MCFragment::getAddress() const { + assert(getParent() && "Missing Section!"); + return getParent()->getAddress() + Offset; +} + +/* *** */ + +MCSectionData::MCSectionData() : Section(0) {} + +MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) + : Section(&_Section), + Alignment(1), + Address(~UINT64_C(0)), + Size(~UINT64_C(0)), + FileSize(~UINT64_C(0)), + LastFixupLookup(~0) +{ + if (A) + A->getSectionList().push_back(this); +} + +const MCSectionData::Fixup * +MCSectionData::LookupFixup(const MCFragment *Fragment, uint64_t Offset) const { + // Use a one level cache to turn the common case of accessing the fixups in + // order into O(1) instead of O(N). + unsigned i = LastFixupLookup, Count = Fixups.size(), End = Fixups.size(); + if (i >= End) + i = 0; + while (Count--) { + const Fixup &F = Fixups[i]; + if (F.Fragment == Fragment && F.Offset == Offset) { + LastFixupLookup = i; + return &F; + } + + ++i; + if (i == End) + i = 0; + } + + return 0; +} + +/* *** */ + +MCSymbolData::MCSymbolData() : Symbol(0) {} + +MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, + uint64_t _Offset, MCAssembler *A) + : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset), + IsExternal(false), IsPrivateExtern(false), + CommonSize(0), CommonAlign(0), Flags(0), Index(0) +{ + if (A) + A->getSymbolList().push_back(this); +} + +/* *** */ + +MCAssembler::MCAssembler(MCContext &_Context, raw_ostream &_OS) + : Context(_Context), OS(_OS), SubsectionsViaSymbols(false) +{ +} + +MCAssembler::~MCAssembler() { +} + +void MCAssembler::LayoutSection(MCSectionData &SD) { + uint64_t Address = SD.getAddress(); + + for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) { + MCFragment &F = *it; + + F.setOffset(Address - SD.getAddress()); + + // Evaluate fragment size. + switch (F.getKind()) { + case MCFragment::FT_Align: { + MCAlignFragment &AF = cast(F); + + uint64_t Size = OffsetToAlignment(Address, AF.getAlignment()); + if (Size > AF.getMaxBytesToEmit()) + AF.setFileSize(0); + else + AF.setFileSize(Size); + break; + } + + case MCFragment::FT_Data: + F.setFileSize(F.getMaxFileSize()); + break; + + case MCFragment::FT_Fill: { + MCFillFragment &FF = cast(F); + + F.setFileSize(F.getMaxFileSize()); + + // If the fill value is constant, thats it. + if (FF.getValue().isAbsolute()) + break; + + // Otherwise, add fixups for the values. + for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { + MCSectionData::Fixup Fix(F, i * FF.getValueSize(), + FF.getValue(),FF.getValueSize()); + SD.getFixups().push_back(Fix); + } + break; + } + + case MCFragment::FT_Org: { + MCOrgFragment &OF = cast(F); + + if (!OF.getOffset().isAbsolute()) + llvm_unreachable("FIXME: Not yet implemented!"); + uint64_t OrgOffset = OF.getOffset().getConstant(); + uint64_t Offset = Address - SD.getAddress(); + + // FIXME: We need a way to communicate this error. + if (OrgOffset < Offset) + llvm_report_error("invalid .org offset '" + Twine(OrgOffset) + + "' (at offset '" + Twine(Offset) + "'"); + + F.setFileSize(OrgOffset - Offset); + break; + } + + case MCFragment::FT_ZeroFill: { + MCZeroFillFragment &ZFF = cast(F); + + // Align the fragment offset; it is safe to adjust the offset freely since + // this is only in virtual sections. + uint64_t Aligned = RoundUpToAlignment(Address, ZFF.getAlignment()); + F.setOffset(Aligned - SD.getAddress()); + + // FIXME: This is misnamed. + F.setFileSize(ZFF.getSize()); + break; + } + } + + Address += F.getFileSize(); + } + + // Set the section sizes. + SD.setSize(Address - SD.getAddress()); + if (isVirtualSection(SD.getSection())) + SD.setFileSize(0); + else + SD.setFileSize(Address - SD.getAddress()); +} + +/// WriteFileData - Write the \arg F data to the output file. +static void WriteFileData(raw_ostream &OS, const MCFragment &F, + MachObjectWriter &MOW) { + uint64_t Start = OS.tell(); + (void) Start; + + ++EmittedFragments; + + // FIXME: Embed in fragments instead? + switch (F.getKind()) { + case MCFragment::FT_Align: { + MCAlignFragment &AF = cast(F); + uint64_t Count = AF.getFileSize() / AF.getValueSize(); + + // FIXME: This error shouldn't actually occur (the front end should emit + // multiple .align directives to enforce the semantics it wants), but is + // severe enough that we want to report it. How to handle this? + if (Count * AF.getValueSize() != AF.getFileSize()) + llvm_report_error("undefined .align directive, value size '" + + Twine(AF.getValueSize()) + + "' is not a divisor of padding size '" + + Twine(AF.getFileSize()) + "'"); + + for (uint64_t i = 0; i != Count; ++i) { + switch (AF.getValueSize()) { + default: + assert(0 && "Invalid size!"); + case 1: MOW.Write8 (uint8_t (AF.getValue())); break; + case 2: MOW.Write16(uint16_t(AF.getValue())); break; + case 4: MOW.Write32(uint32_t(AF.getValue())); break; + case 8: MOW.Write64(uint64_t(AF.getValue())); break; + } + } + break; + } + + case MCFragment::FT_Data: + OS << cast(F).getContents().str(); + break; + + case MCFragment::FT_Fill: { + MCFillFragment &FF = cast(F); + + int64_t Value = 0; + if (FF.getValue().isAbsolute()) + Value = FF.getValue().getConstant(); + for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { + if (!FF.getValue().isAbsolute()) { + // Find the fixup. + // + // FIXME: Find a better way to write in the fixes. + const MCSectionData::Fixup *Fixup = + F.getParent()->LookupFixup(&F, i * FF.getValueSize()); + assert(Fixup && "Missing fixup for fill value!"); + Value = Fixup->FixedValue; + } + + switch (FF.getValueSize()) { + default: + assert(0 && "Invalid size!"); + case 1: MOW.Write8 (uint8_t (Value)); break; + case 2: MOW.Write16(uint16_t(Value)); break; + case 4: MOW.Write32(uint32_t(Value)); break; + case 8: MOW.Write64(uint64_t(Value)); break; + } + } + break; + } + + case MCFragment::FT_Org: { + MCOrgFragment &OF = cast(F); + + for (uint64_t i = 0, e = OF.getFileSize(); i != e; ++i) + MOW.Write8(uint8_t(OF.getValue())); + + break; + } + + case MCFragment::FT_ZeroFill: { + assert(0 && "Invalid zero fill fragment in concrete section!"); + break; + } + } + + assert(OS.tell() - Start == F.getFileSize()); +} + +/// WriteFileData - Write the \arg SD data to the output file. +static void WriteFileData(raw_ostream &OS, const MCSectionData &SD, + MachObjectWriter &MOW) { + // Ignore virtual sections. + if (isVirtualSection(SD.getSection())) { + assert(SD.getFileSize() == 0); + return; + } + + uint64_t Start = OS.tell(); + (void) Start; + + for (MCSectionData::const_iterator it = SD.begin(), + ie = SD.end(); it != ie; ++it) + WriteFileData(OS, *it, MOW); + + // Add section padding. + assert(SD.getFileSize() >= SD.getSize() && "Invalid section sizes!"); + MOW.WriteZeros(SD.getFileSize() - SD.getSize()); + + assert(OS.tell() - Start == SD.getFileSize()); +} + +void MCAssembler::Finish() { + // Layout the concrete sections and fragments. + uint64_t Address = 0; + MCSectionData *Prev = 0; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + MCSectionData &SD = *it; + + // Skip virtual sections. + if (isVirtualSection(SD.getSection())) + continue; + + // Align this section if necessary by adding padding bytes to the previous + // section. + if (uint64_t Pad = OffsetToAlignment(Address, it->getAlignment())) { + assert(Prev && "Missing prev section!"); + Prev->setFileSize(Prev->getFileSize() + Pad); + Address += Pad; + } + + // Layout the section fragments and its size. + SD.setAddress(Address); + LayoutSection(SD); + Address += SD.getFileSize(); + + Prev = &SD; + } + + // Layout the virtual sections. + for (iterator it = begin(), ie = end(); it != ie; ++it) { + MCSectionData &SD = *it; + + if (!isVirtualSection(SD.getSection())) + continue; + + SD.setAddress(Address); + LayoutSection(SD); + Address += SD.getSize(); + } + + // Write the object file. + MachObjectWriter MOW(OS); + MOW.WriteObject(*this); + + OS.flush(); +} diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp new file mode 100644 index 0000000000000..c122763b2fe59 --- /dev/null +++ b/lib/MC/MCCodeEmitter.cpp @@ -0,0 +1,18 @@ +//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCCodeEmitter.h" + +using namespace llvm; + +MCCodeEmitter::MCCodeEmitter() { +} + +MCCodeEmitter::~MCCodeEmitter() { +} diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 6c6019c76ffdf..f36564a6afaeb 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -14,23 +14,15 @@ #include "llvm/MC/MCValue.h" using namespace llvm; -MCContext::MCContext() -{ +MCContext::MCContext() { } MCContext::~MCContext() { + // NOTE: The sections are all allocated out of a bump pointer allocator, + // we don't need to free them here. } -MCSection *MCContext::GetSection(const char *Name) { - MCSection *&Entry = Sections[Name]; - - if (!Entry) - Entry = new (*this) MCSection(Name); - - return Entry; -} - -MCSymbol *MCContext::CreateSymbol(const char *Name) { +MCSymbol *MCContext::CreateSymbol(const StringRef &Name) { assert(Name[0] != '\0' && "Normal symbols cannot be unnamed!"); // Create and bind the symbol, and ensure that names are unique. @@ -39,17 +31,16 @@ MCSymbol *MCContext::CreateSymbol(const char *Name) { return Entry = new (*this) MCSymbol(Name, false); } -MCSymbol *MCContext::GetOrCreateSymbol(const char *Name) { +MCSymbol *MCContext::GetOrCreateSymbol(const StringRef &Name) { MCSymbol *&Entry = Symbols[Name]; if (Entry) return Entry; return Entry = new (*this) MCSymbol(Name, false); } - -MCSymbol *MCContext::CreateTemporarySymbol(const char *Name) { +MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) { // If unnamed, just create a symbol. - if (Name[0] == '\0') + if (Name.empty()) new (*this) MCSymbol("", true); // Otherwise create as usual. @@ -58,20 +49,20 @@ MCSymbol *MCContext::CreateTemporarySymbol(const char *Name) { return Entry = new (*this) MCSymbol(Name, true); } -MCSymbol *MCContext::LookupSymbol(const char *Name) const { +MCSymbol *MCContext::LookupSymbol(const StringRef &Name) const { return Symbols.lookup(Name); } -void MCContext::ClearSymbolValue(MCSymbol *Sym) { +void MCContext::ClearSymbolValue(const MCSymbol *Sym) { SymbolValues.erase(Sym); } -void MCContext::SetSymbolValue(MCSymbol *Sym, const MCValue &Value) { +void MCContext::SetSymbolValue(const MCSymbol *Sym, const MCValue &Value) { SymbolValues[Sym] = Value; } -const MCValue *MCContext::GetSymbolValue(MCSymbol *Sym) const { - DenseMap::iterator it = SymbolValues.find(Sym); +const MCValue *MCContext::GetSymbolValue(const MCSymbol *Sym) const { + DenseMap::iterator it = SymbolValues.find(Sym); if (it == SymbolValues.end()) return 0; diff --git a/lib/MC/MCDisassembler.cpp b/lib/MC/MCDisassembler.cpp new file mode 100644 index 0000000000000..08096906462f1 --- /dev/null +++ b/lib/MC/MCDisassembler.cpp @@ -0,0 +1,14 @@ +//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler.h" +using namespace llvm; + +MCDisassembler::~MCDisassembler() { +} diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp new file mode 100644 index 0000000000000..0f3e053de8ec8 --- /dev/null +++ b/lib/MC/MCExpr.cpp @@ -0,0 +1,286 @@ +//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + switch (getKind()) { + case MCExpr::Constant: + OS << cast(*this).getValue(); + return; + + case MCExpr::SymbolRef: { + const MCSymbol &Sym = cast(*this).getSymbol(); + + // Parenthesize names that start with $ so that they don't look like + // absolute names. + if (Sym.getName()[0] == '$') { + OS << '('; + Sym.print(OS, MAI); + OS << ')'; + } else { + Sym.print(OS, MAI); + } + return; + } + + case MCExpr::Unary: { + const MCUnaryExpr &UE = cast(*this); + switch (UE.getOpcode()) { + default: assert(0 && "Invalid opcode!"); + case MCUnaryExpr::LNot: OS << '!'; break; + case MCUnaryExpr::Minus: OS << '-'; break; + case MCUnaryExpr::Not: OS << '~'; break; + case MCUnaryExpr::Plus: OS << '+'; break; + } + UE.getSubExpr()->print(OS, MAI); + return; + } + + case MCExpr::Binary: { + const MCBinaryExpr &BE = cast(*this); + + // Only print parens around the LHS if it is non-trivial. + if (isa(BE.getLHS()) || isa(BE.getLHS())) { + BE.getLHS()->print(OS, MAI); + } else { + OS << '('; + BE.getLHS()->print(OS, MAI); + OS << ')'; + } + + switch (BE.getOpcode()) { + default: assert(0 && "Invalid opcode!"); + case MCBinaryExpr::Add: + // Print "X-42" instead of "X+-42". + if (const MCConstantExpr *RHSC = dyn_cast(BE.getRHS())) { + if (RHSC->getValue() < 0) { + OS << RHSC->getValue(); + return; + } + } + + OS << '+'; + break; + case MCBinaryExpr::And: OS << '&'; break; + case MCBinaryExpr::Div: OS << '/'; break; + case MCBinaryExpr::EQ: OS << "=="; break; + case MCBinaryExpr::GT: OS << '>'; break; + case MCBinaryExpr::GTE: OS << ">="; break; + case MCBinaryExpr::LAnd: OS << "&&"; break; + case MCBinaryExpr::LOr: OS << "||"; break; + case MCBinaryExpr::LT: OS << '<'; break; + case MCBinaryExpr::LTE: OS << "<="; break; + case MCBinaryExpr::Mod: OS << '%'; break; + case MCBinaryExpr::Mul: OS << '*'; break; + case MCBinaryExpr::NE: OS << "!="; break; + case MCBinaryExpr::Or: OS << '|'; break; + case MCBinaryExpr::Shl: OS << "<<"; break; + case MCBinaryExpr::Shr: OS << ">>"; break; + case MCBinaryExpr::Sub: OS << '-'; break; + case MCBinaryExpr::Xor: OS << '^'; break; + } + + // Only print parens around the LHS if it is non-trivial. + if (isa(BE.getRHS()) || isa(BE.getRHS())) { + BE.getRHS()->print(OS, MAI); + } else { + OS << '('; + BE.getRHS()->print(OS, MAI); + OS << ')'; + } + return; + } + } + + assert(0 && "Invalid expression kind!"); +} + +void MCExpr::dump() const { + print(errs(), 0); + errs() << '\n'; +} + +/* *** */ + +const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS, + const MCExpr *RHS, MCContext &Ctx) { + return new (Ctx) MCBinaryExpr(Opc, LHS, RHS); +} + +const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) MCUnaryExpr(Opc, Expr); +} + +const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) { + return new (Ctx) MCConstantExpr(Value); +} + +const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym, + MCContext &Ctx) { + return new (Ctx) MCSymbolRefExpr(Sym); +} + +const MCSymbolRefExpr *MCSymbolRefExpr::Create(const StringRef &Name, + MCContext &Ctx) { + return Create(Ctx.GetOrCreateSymbol(Name), Ctx); +} + + +/* *** */ + +bool MCExpr::EvaluateAsAbsolute(MCContext &Ctx, int64_t &Res) const { + MCValue Value; + + if (!EvaluateAsRelocatable(Ctx, Value) || !Value.isAbsolute()) + return false; + + Res = Value.getConstant(); + return true; +} + +static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A, + const MCSymbol *RHS_B, int64_t RHS_Cst, + MCValue &Res) { + // We can't add or subtract two symbols. + if ((LHS.getSymA() && RHS_A) || + (LHS.getSymB() && RHS_B)) + return false; + + const MCSymbol *A = LHS.getSymA() ? LHS.getSymA() : RHS_A; + const MCSymbol *B = LHS.getSymB() ? LHS.getSymB() : RHS_B; + if (B) { + // If we have a negated symbol, then we must have also have a non-negated + // symbol in order to encode the expression. We can do this check later to + // permit expressions which eventually fold to a representable form -- such + // as (a + (0 - b)) -- if necessary. + if (!A) + return false; + } + Res = MCValue::get(A, B, LHS.getConstant() + RHS_Cst); + return true; +} + +bool MCExpr::EvaluateAsRelocatable(MCContext &Ctx, MCValue &Res) const { + switch (getKind()) { + case Constant: + Res = MCValue::get(cast(this)->getValue()); + return true; + + case SymbolRef: { + const MCSymbol &Sym = cast(this)->getSymbol(); + if (const MCValue *Value = Ctx.GetSymbolValue(&Sym)) + Res = *Value; + else + Res = MCValue::get(&Sym, 0, 0); + return true; + } + + case Unary: { + const MCUnaryExpr *AUE = cast(this); + MCValue Value; + + if (!AUE->getSubExpr()->EvaluateAsRelocatable(Ctx, Value)) + return false; + + switch (AUE->getOpcode()) { + case MCUnaryExpr::LNot: + if (!Value.isAbsolute()) + return false; + Res = MCValue::get(!Value.getConstant()); + break; + case MCUnaryExpr::Minus: + /// -(a - b + const) ==> (b - a - const) + if (Value.getSymA() && !Value.getSymB()) + return false; + Res = MCValue::get(Value.getSymB(), Value.getSymA(), + -Value.getConstant()); + break; + case MCUnaryExpr::Not: + if (!Value.isAbsolute()) + return false; + Res = MCValue::get(~Value.getConstant()); + break; + case MCUnaryExpr::Plus: + Res = Value; + break; + } + + return true; + } + + case Binary: { + const MCBinaryExpr *ABE = cast(this); + MCValue LHSValue, RHSValue; + + if (!ABE->getLHS()->EvaluateAsRelocatable(Ctx, LHSValue) || + !ABE->getRHS()->EvaluateAsRelocatable(Ctx, RHSValue)) + return false; + + // We only support a few operations on non-constant expressions, handle + // those first. + if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) { + switch (ABE->getOpcode()) { + default: + return false; + case MCBinaryExpr::Sub: + // Negate RHS and add. + return EvaluateSymbolicAdd(LHSValue, + RHSValue.getSymB(), RHSValue.getSymA(), + -RHSValue.getConstant(), + Res); + + case MCBinaryExpr::Add: + return EvaluateSymbolicAdd(LHSValue, + RHSValue.getSymA(), RHSValue.getSymB(), + RHSValue.getConstant(), + Res); + } + } + + // FIXME: We need target hooks for the evaluation. It may be limited in + // width, and gas defines the result of comparisons differently from Apple + // as (the result is sign extended). + int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant(); + int64_t Result = 0; + switch (ABE->getOpcode()) { + case MCBinaryExpr::Add: Result = LHS + RHS; break; + case MCBinaryExpr::And: Result = LHS & RHS; break; + case MCBinaryExpr::Div: Result = LHS / RHS; break; + case MCBinaryExpr::EQ: Result = LHS == RHS; break; + case MCBinaryExpr::GT: Result = LHS > RHS; break; + case MCBinaryExpr::GTE: Result = LHS >= RHS; break; + case MCBinaryExpr::LAnd: Result = LHS && RHS; break; + case MCBinaryExpr::LOr: Result = LHS || RHS; break; + case MCBinaryExpr::LT: Result = LHS < RHS; break; + case MCBinaryExpr::LTE: Result = LHS <= RHS; break; + case MCBinaryExpr::Mod: Result = LHS % RHS; break; + case MCBinaryExpr::Mul: Result = LHS * RHS; break; + case MCBinaryExpr::NE: Result = LHS != RHS; break; + case MCBinaryExpr::Or: Result = LHS | RHS; break; + case MCBinaryExpr::Shl: Result = LHS << RHS; break; + case MCBinaryExpr::Shr: Result = LHS >> RHS; break; + case MCBinaryExpr::Sub: Result = LHS - RHS; break; + case MCBinaryExpr::Xor: Result = LHS ^ RHS; break; + } + + Res = MCValue::get(Result); + return true; + } + } + + assert(0 && "Invalid assembly expression kind!"); + return false; +} diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp new file mode 100644 index 0000000000000..d05031870add8 --- /dev/null +++ b/lib/MC/MCInst.cpp @@ -0,0 +1,50 @@ +//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + OS << "print(OS, MAI); + OS << ")"; + } else + OS << "UNDEFINED"; + OS << ">"; +} + +void MCOperand::dump() const { + print(errs(), 0); + errs() << "\n"; +} + +void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + OS << ""; +} + +void MCInst::dump() const { + print(errs(), 0); + errs() << "\n"; +} diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp new file mode 100644 index 0000000000000..e90c03c0cf426 --- /dev/null +++ b/lib/MC/MCInstPrinter.cpp @@ -0,0 +1,14 @@ +//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInstPrinter.h" +using namespace llvm; + +MCInstPrinter::~MCInstPrinter() { +} diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp new file mode 100644 index 0000000000000..e04bd1fd1cb89 --- /dev/null +++ b/lib/MC/MCMachOStreamer.cpp @@ -0,0 +1,379 @@ +//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" + +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +class MCMachOStreamer : public MCStreamer { + /// SymbolFlags - We store the value for the 'desc' symbol field in the lowest + /// 16 bits of the implementation defined flags. + enum SymbolFlags { // See . + SF_DescFlagsMask = 0xFFFF, + + // Reference type flags. + SF_ReferenceTypeMask = 0x0007, + SF_ReferenceTypeUndefinedNonLazy = 0x0000, + SF_ReferenceTypeUndefinedLazy = 0x0001, + SF_ReferenceTypeDefined = 0x0002, + SF_ReferenceTypePrivateDefined = 0x0003, + SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004, + SF_ReferenceTypePrivateUndefinedLazy = 0x0005, + + // Other 'desc' flags. + SF_NoDeadStrip = 0x0020, + SF_WeakReference = 0x0040, + SF_WeakDefinition = 0x0080 + }; + +private: + MCAssembler Assembler; + + MCCodeEmitter *Emitter; + + MCSectionData *CurSectionData; + + DenseMap SectionMap; + + DenseMap SymbolMap; + +private: + MCFragment *getCurrentFragment() const { + assert(CurSectionData && "No current section!"); + + if (!CurSectionData->empty()) + return &CurSectionData->getFragmentList().back(); + + return 0; + } + + MCSectionData &getSectionData(const MCSection &Section) { + MCSectionData *&Entry = SectionMap[&Section]; + + if (!Entry) + Entry = new MCSectionData(Section, &Assembler); + + return *Entry; + } + + MCSymbolData &getSymbolData(const MCSymbol &Symbol) { + MCSymbolData *&Entry = SymbolMap[&Symbol]; + + if (!Entry) + Entry = new MCSymbolData(Symbol, 0, 0, &Assembler); + + return *Entry; + } + +public: + MCMachOStreamer(MCContext &Context, raw_ostream &_OS, MCCodeEmitter *_Emitter) + : MCStreamer(Context), Assembler(Context, _OS), Emitter(_Emitter), + CurSectionData(0) {} + ~MCMachOStreamer() {} + + const MCExpr *AddValueSymbols(const MCExpr *Value) { + switch (Value->getKind()) { + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Value); + AddValueSymbols(BE->getLHS()); + AddValueSymbols(BE->getRHS()); + break; + } + + case MCExpr::SymbolRef: + getSymbolData(cast(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbols(cast(Value)->getSubExpr()); + break; + } + + return Value; + } + + /// @name MCStreamer Interface + /// @{ + + virtual void SwitchSection(const MCSection *Section); + + virtual void EmitLabel(MCSymbol *Symbol); + + virtual void EmitAssemblerFlag(AssemblerFlag Flag); + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute); + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + + virtual void EmitCommonSymbol(MCSymbol *Symbol, unsigned Size, + unsigned ByteAlignment); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); + + virtual void EmitBytes(const StringRef &Data); + + virtual void EmitValue(const MCExpr *Value, unsigned Size); + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitInstruction(const MCInst &Inst); + + virtual void Finish(); + + /// @} +}; + +} // end anonymous namespace. + +void MCMachOStreamer::SwitchSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); + + // If already in this section, then this is a noop. + if (Section == CurSection) return; + + CurSection = Section; + CurSectionData = &getSectionData(*Section); +} + +void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + // FIXME: We should also use offsets into Fill fragments. + MCDataFragment *F = dyn_cast_or_null(getCurrentFragment()); + if (!F) + F = new MCDataFragment(CurSectionData); + + MCSymbolData &SD = getSymbolData(*Symbol); + assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); + SD.setFragment(F); + SD.setOffset(F->getContents().size()); + + // This causes the reference type and weak reference flags to be cleared. + SD.setFlags(SD.getFlags() & ~(SF_WeakReference | SF_ReferenceTypeMask)); + + Symbol->setSection(*CurSection); +} + +void MCMachOStreamer::EmitAssemblerFlag(AssemblerFlag Flag) { + switch (Flag) { + case SubsectionsViaSymbols: + Assembler.setSubsectionsViaSymbols(true); + return; + } + + assert(0 && "invalid assembler flag!"); +} + +void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // Only absolute symbols can be redefined. + assert((Symbol->isUndefined() || Symbol->isAbsolute()) && + "Cannot define a symbol twice!"); + + llvm_unreachable("FIXME: Not yet implemented!"); +} + +void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + SymbolAttr Attribute) { + // Indirect symbols are handled differently, to match how 'as' handles + // them. This makes writing matching .o files easier. + if (Attribute == MCStreamer::IndirectSymbol) { + // Note that we intentionally cannot use the symbol data here; this is + // important for matching the string table that 'as' generates. + IndirectSymbolData ISD; + ISD.Symbol = Symbol; + ISD.SectionData = CurSectionData; + Assembler.getIndirectSymbols().push_back(ISD); + return; + } + + // Adding a symbol attribute always introduces the symbol, note that an + // important side effect of calling getSymbolData here is to register the + // symbol with the assembler. + MCSymbolData &SD = getSymbolData(*Symbol); + + // The implementation of symbol attributes is designed to match 'as', but it + // leaves much to desired. It doesn't really make sense to arbitrarily add and + // remove flags, but 'as' allows this (in particular, see .desc). + // + // In the future it might be worth trying to make these operations more well + // defined. + switch (Attribute) { + case MCStreamer::IndirectSymbol: + case MCStreamer::Hidden: + case MCStreamer::Internal: + case MCStreamer::Protected: + case MCStreamer::Weak: + assert(0 && "Invalid symbol attribute for Mach-O!"); + break; + + case MCStreamer::Global: + SD.setExternal(true); + break; + + case MCStreamer::LazyReference: + // FIXME: This requires -dynamic. + SD.setFlags(SD.getFlags() | SF_NoDeadStrip); + if (Symbol->isUndefined()) + SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy); + break; + + // Since .reference sets the no dead strip bit, it is equivalent to + // .no_dead_strip in practice. + case MCStreamer::Reference: + case MCStreamer::NoDeadStrip: + SD.setFlags(SD.getFlags() | SF_NoDeadStrip); + break; + + case MCStreamer::PrivateExtern: + SD.setExternal(true); + SD.setPrivateExtern(true); + break; + + case MCStreamer::WeakReference: + // FIXME: This requires -dynamic. + if (Symbol->isUndefined()) + SD.setFlags(SD.getFlags() | SF_WeakReference); + break; + + case MCStreamer::WeakDefinition: + // FIXME: 'as' enforces that this is defined and global. The manual claims + // it has to be in a coalesced section, but this isn't enforced. + SD.setFlags(SD.getFlags() | SF_WeakDefinition); + break; + } +} + +void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + // Encode the 'desc' value into the lowest implementation defined bits. + assert(DescValue == (DescValue & SF_DescFlagsMask) && + "Invalid .desc value!"); + getSymbolData(*Symbol).setFlags(DescValue & SF_DescFlagsMask); +} + +void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, unsigned Size, + unsigned ByteAlignment) { + // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself. + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + MCSymbolData &SD = getSymbolData(*Symbol); + SD.setExternal(true); + SD.setCommon(Size, ByteAlignment); +} + +void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) { + MCSectionData &SectData = getSectionData(*Section); + + // The symbol may not be present, which only creates the section. + if (!Symbol) + return; + + // FIXME: Assert that this section has the zerofill type. + + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + + MCSymbolData &SD = getSymbolData(*Symbol); + + MCFragment *F = new MCZeroFillFragment(Size, ByteAlignment, &SectData); + SD.setFragment(F); + + Symbol->setSection(*Section); + + // Update the maximum alignment on the zero fill section if necessary. + if (ByteAlignment > SectData.getAlignment()) + SectData.setAlignment(ByteAlignment); +} + +void MCMachOStreamer::EmitBytes(const StringRef &Data) { + MCDataFragment *DF = dyn_cast_or_null(getCurrentFragment()); + if (!DF) + DF = new MCDataFragment(CurSectionData); + DF->getContents().append(Data.begin(), Data.end()); +} + +void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size) { + MCValue RelocValue; + + if (!AddValueSymbols(Value)->EvaluateAsRelocatable(getContext(), RelocValue)) + return llvm_report_error("expected relocatable expression"); + + new MCFillFragment(RelocValue, Size, 1, CurSectionData); +} + +void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, unsigned ValueSize, + unsigned MaxBytesToEmit) { + if (MaxBytesToEmit == 0) + MaxBytesToEmit = ByteAlignment; + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + CurSectionData); + + // Update the maximum alignment on the current section if necessary. + if (ByteAlignment > CurSectionData->getAlignment()) + CurSectionData->setAlignment(ByteAlignment); +} + +void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) { + MCValue RelocOffset; + + if (!AddValueSymbols(Offset)->EvaluateAsRelocatable(getContext(), + RelocOffset)) + return llvm_report_error("expected relocatable expression"); + + new MCOrgFragment(RelocOffset, Value, CurSectionData); +} + +void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { + // Scan for values. + for (unsigned i = 0; i != Inst.getNumOperands(); ++i) + if (Inst.getOperand(i).isExpr()) + AddValueSymbols(Inst.getOperand(i).getExpr()); + + if (!Emitter) + llvm_unreachable("no code emitter available!"); + + // FIXME: Relocations! + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + Emitter->EncodeInstruction(Inst, VecOS); + EmitBytes(VecOS.str()); +} + +void MCMachOStreamer::Finish() { + Assembler.Finish(); +} + +MCStreamer *llvm::createMachOStreamer(MCContext &Context, raw_ostream &OS, + MCCodeEmitter *CE) { + return new MCMachOStreamer(Context, OS, CE); +} diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp new file mode 100644 index 0000000000000..3cd22ca6f009c --- /dev/null +++ b/lib/MC/MCNullStreamer.cpp @@ -0,0 +1,70 @@ +//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCStreamer.h" + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" + +using namespace llvm; + +namespace { + + class MCNullStreamer : public MCStreamer { + public: + MCNullStreamer(MCContext &Context) : MCStreamer(Context) {} + + /// @name MCStreamer Interface + /// @{ + + virtual void SwitchSection(const MCSection *Section) { + CurSection = Section; + } + + virtual void EmitLabel(MCSymbol *Symbol) {} + + virtual void EmitAssemblerFlag(AssemblerFlag Flag) {} + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {} + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute) {} + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + + virtual void EmitCommonSymbol(MCSymbol *Symbol, unsigned Size, + unsigned ByteAlignment) {} + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) {} + + virtual void EmitBytes(const StringRef &Data) {} + + virtual void EmitValue(const MCExpr *Value, unsigned Size) {} + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0) {} + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) {} + + virtual void EmitInstruction(const MCInst &Inst) {} + + virtual void Finish() {} + + /// @} + }; + +} + +MCStreamer *llvm::createNullStreamer(MCContext &Context) { + return new MCNullStreamer(Context); +} diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp new file mode 100644 index 0000000000000..333a4710f962d --- /dev/null +++ b/lib/MC/MCSection.cpp @@ -0,0 +1,45 @@ +//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MCSection +//===----------------------------------------------------------------------===// + +MCSection::~MCSection() { +} + +//===----------------------------------------------------------------------===// +// MCSectionCOFF +//===----------------------------------------------------------------------===// + +MCSectionCOFF *MCSectionCOFF:: +Create(const StringRef &Name, bool IsDirective, SectionKind K, MCContext &Ctx) { + return new (Ctx) MCSectionCOFF(Name, IsDirective, K); +} + +void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + + if (isDirective()) { + OS << getName() << '\n'; + return; + } + OS << "\t.section\t" << getName() << ",\""; + if (getKind().isText()) + OS << 'x'; + if (getKind().isWriteable()) + OS << 'w'; + OS << "\"\n"; +} diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp new file mode 100644 index 0000000000000..660a8c9489f0b --- /dev/null +++ b/lib/MC/MCSectionELF.cpp @@ -0,0 +1,138 @@ +//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCAsmInfo.h" + +using namespace llvm; + +MCSectionELF *MCSectionELF:: +Create(const StringRef &Section, unsigned Type, unsigned Flags, + SectionKind K, bool isExplicit, MCContext &Ctx) { + return new (Ctx) MCSectionELF(Section, Type, Flags, K, isExplicit); +} + +// ShouldOmitSectionDirective - Decides whether a '.section' directive +// should be printed before the section name +bool MCSectionELF::ShouldOmitSectionDirective(const char *Name, + const MCAsmInfo &MAI) const { + + // FIXME: Does .section .bss/.data/.text work everywhere?? + if (strcmp(Name, ".text") == 0 || + strcmp(Name, ".data") == 0 || + (strcmp(Name, ".bss") == 0 && + !MAI.usesELFSectionDirectiveForBSS())) + return true; + + return false; +} + +// ShouldPrintSectionType - Only prints the section type if supported +bool MCSectionELF::ShouldPrintSectionType(unsigned Ty) const { + + if (IsExplicit && !(Ty == SHT_NOBITS || Ty == SHT_PROGBITS)) + return false; + + return true; +} + +void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + + if (ShouldOmitSectionDirective(SectionName.c_str(), MAI)) { + OS << '\t' << getSectionName() << '\n'; + return; + } + + OS << "\t.section\t" << getSectionName(); + + // Handle the weird solaris syntax if desired. + if (MAI.usesSunStyleELFSectionSwitchSyntax() && + !(Flags & MCSectionELF::SHF_MERGE)) { + if (Flags & MCSectionELF::SHF_ALLOC) + OS << ",#alloc"; + if (Flags & MCSectionELF::SHF_EXECINSTR) + OS << ",#execinstr"; + if (Flags & MCSectionELF::SHF_WRITE) + OS << ",#write"; + if (Flags & MCSectionELF::SHF_TLS) + OS << ",#tls"; + } else { + OS << ",\""; + if (Flags & MCSectionELF::SHF_ALLOC) + OS << 'a'; + if (Flags & MCSectionELF::SHF_EXECINSTR) + OS << 'x'; + if (Flags & MCSectionELF::SHF_WRITE) + OS << 'w'; + if (Flags & MCSectionELF::SHF_MERGE) + OS << 'M'; + if (Flags & MCSectionELF::SHF_STRINGS) + OS << 'S'; + if (Flags & MCSectionELF::SHF_TLS) + OS << 'T'; + + // If there are target-specific flags, print them. + if (Flags & ~MCSectionELF::TARGET_INDEP_SHF) + PrintTargetSpecificSectionFlags(MAI, OS); + + OS << '"'; + + if (ShouldPrintSectionType(Type)) { + OS << ','; + + // If comment string is '@', e.g. as on ARM - use '%' instead + if (MAI.getCommentString()[0] == '@') + OS << '%'; + else + OS << '@'; + + if (Type == MCSectionELF::SHT_INIT_ARRAY) + OS << "init_array"; + else if (Type == MCSectionELF::SHT_FINI_ARRAY) + OS << "fini_array"; + else if (Type == MCSectionELF::SHT_PREINIT_ARRAY) + OS << "preinit_array"; + else if (Type == MCSectionELF::SHT_NOBITS) + OS << "nobits"; + else if (Type == MCSectionELF::SHT_PROGBITS) + OS << "progbits"; + + if (getKind().isMergeable1ByteCString()) { + OS << ",1"; + } else if (getKind().isMergeable2ByteCString()) { + OS << ",2"; + } else if (getKind().isMergeable4ByteCString() || + getKind().isMergeableConst4()) { + OS << ",4"; + } else if (getKind().isMergeableConst8()) { + OS << ",8"; + } else if (getKind().isMergeableConst16()) { + OS << ",16"; + } + } + } + + OS << '\n'; +} + +// HasCommonSymbols - True if this section holds common symbols, this is +// indicated on the ELF object file by a symbol with SHN_COMMON section +// header index. +bool MCSectionELF::HasCommonSymbols() const { + + if (strncmp(SectionName.c_str(), ".gnu.linkonce.", 14) == 0) + return true; + + return false; +} + + diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp new file mode 100644 index 0000000000000..b3aeb9c1789f8 --- /dev/null +++ b/lib/MC/MCSectionMachO.cpp @@ -0,0 +1,271 @@ +//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// SectionTypeDescriptors - These are strings that describe the various section +/// types. This *must* be kept in order with and stay synchronized with the +/// section type list. +static const struct { + const char *AssemblerName, *EnumName; +} SectionTypeDescriptors[MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1] = { + { "regular", "S_REGULAR" }, // 0x00 + { 0, "S_ZEROFILL" }, // 0x01 + { "cstring_literals", "S_CSTRING_LITERALS" }, // 0x02 + { "4byte_literals", "S_4BYTE_LITERALS" }, // 0x03 + { "8byte_literals", "S_8BYTE_LITERALS" }, // 0x04 + { "literal_pointers", "S_LITERAL_POINTERS" }, // 0x05 + { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" }, // 0x06 + { "lazy_symbol_pointers", "S_LAZY_SYMBOL_POINTERS" }, // 0x07 + { "symbol_stubs", "S_SYMBOL_STUBS" }, // 0x08 + { "mod_init_funcs", "S_MOD_INIT_FUNC_POINTERS" }, // 0x09 + { "mod_term_funcs", "S_MOD_TERM_FUNC_POINTERS" }, // 0x0A + { "coalesced", "S_COALESCED" }, // 0x0B + { 0, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C + { "interposing", "S_INTERPOSING" }, // 0x0D + { "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E + { 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F + { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" } // 0x10 +}; + + +/// SectionAttrDescriptors - This is an array of descriptors for section +/// attributes. Unlike the SectionTypeDescriptors, this is not directly indexed +/// by attribute, instead it is searched. The last entry has an AttrFlagEnd +/// AttrFlag value. +static const struct { + unsigned AttrFlag; + const char *AssemblerName, *EnumName; +} SectionAttrDescriptors[] = { +#define ENTRY(ASMNAME, ENUM) \ + { MCSectionMachO::ENUM, ASMNAME, #ENUM }, +ENTRY("pure_instructions", S_ATTR_PURE_INSTRUCTIONS) +ENTRY("no_toc", S_ATTR_NO_TOC) +ENTRY("strip_static_syms", S_ATTR_STRIP_STATIC_SYMS) +ENTRY("no_dead_strip", S_ATTR_NO_DEAD_STRIP) +ENTRY("live_support", S_ATTR_LIVE_SUPPORT) +ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE) +ENTRY("debug", S_ATTR_DEBUG) +ENTRY(0 /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS) +ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC) +ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) +#undef ENTRY + { 0, "none", 0 }, // used if section has no attributes but has a stub size +#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set + { AttrFlagEnd, 0, 0 } +}; + + +MCSectionMachO *MCSectionMachO:: +Create(const StringRef &Segment, const StringRef &Section, + unsigned TypeAndAttributes, unsigned Reserved2, + SectionKind K, MCContext &Ctx) { + // S_SYMBOL_STUBS must be set for Reserved2 to be non-zero. + return new (Ctx) MCSectionMachO(Segment, Section, TypeAndAttributes, + Reserved2, K); +} + +void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + OS << "\t.section\t" << getSegmentName() << ',' << getSectionName(); + + // Get the section type and attributes. + unsigned TAA = getTypeAndAttributes(); + if (TAA == 0) { + OS << '\n'; + return; + } + + OS << ','; + + unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE; + assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE && + "Invalid SectionType specified!"); + + if (SectionTypeDescriptors[SectionType].AssemblerName) + OS << SectionTypeDescriptors[SectionType].AssemblerName; + else + OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>"; + + // If we don't have any attributes, we're done. + unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES; + if (SectionAttrs == 0) { + // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as + // the attribute specifier. + if (Reserved2 != 0) + OS << ",none," << Reserved2; + OS << '\n'; + return; + } + + // Check each attribute to see if we have it. + char Separator = ','; + for (unsigned i = 0; SectionAttrDescriptors[i].AttrFlag; ++i) { + // Check to see if we have this attribute. + if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0) + continue; + + // Yep, clear it and print it. + SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag; + + OS << Separator; + if (SectionAttrDescriptors[i].AssemblerName) + OS << SectionAttrDescriptors[i].AssemblerName; + else + OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>"; + Separator = '+'; + } + + assert(SectionAttrs == 0 && "Unknown section attributes!"); + + // If we have a S_SYMBOL_STUBS size specified, print it. + if (Reserved2 != 0) + OS << ',' << Reserved2; + OS << '\n'; +} + +/// StripSpaces - This removes leading and trailing spaces from the StringRef. +static void StripSpaces(StringRef &Str) { + while (!Str.empty() && isspace(Str[0])) + Str = Str.substr(1); + while (!Str.empty() && isspace(Str.back())) + Str = Str.substr(0, Str.size()-1); +} + +/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec". +/// This is a string that can appear after a .section directive in a mach-o +/// flavored .s file. If successful, this fills in the specified Out +/// parameters and returns an empty string. When an invalid section +/// specifier is present, this returns a string indicating the problem. +std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. + StringRef &Segment, // Out. + StringRef &Section, // Out. + unsigned &TAA, // Out. + unsigned &StubSize) { // Out. + // Find the first comma. + std::pair Comma = Spec.split(','); + + // If there is no comma, we fail. + if (Comma.second.empty()) + return "mach-o section specifier requires a segment and section " + "separated by a comma"; + + // Capture segment, remove leading and trailing whitespace. + Segment = Comma.first; + StripSpaces(Segment); + + // Verify that the segment is present and not too long. + if (Segment.empty() || Segment.size() > 16) + return "mach-o section specifier requires a segment whose length is " + "between 1 and 16 characters"; + + // Split the section name off from any attributes if present. + Comma = Comma.second.split(','); + + // Capture section, remove leading and trailing whitespace. + Section = Comma.first; + StripSpaces(Section); + + // Verify that the section is present and not too long. + if (Section.empty() || Section.size() > 16) + return "mach-o section specifier requires a section whose length is " + "between 1 and 16 characters"; + + // If there is no comma after the section, we're done. + TAA = 0; + StubSize = 0; + if (Comma.second.empty()) + return ""; + + // Otherwise, we need to parse the section type and attributes. + Comma = Comma.second.split(','); + + // Get the section type. + StringRef SectionType = Comma.first; + StripSpaces(SectionType); + + // Figure out which section type it is. + unsigned TypeID; + for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID) + if (SectionTypeDescriptors[TypeID].AssemblerName && + SectionType == SectionTypeDescriptors[TypeID].AssemblerName) + break; + + // If we didn't find the section type, reject it. + if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE) + return "mach-o section specifier uses an unknown section type"; + + // Remember the TypeID. + TAA = TypeID; + + // If we have no comma after the section type, there are no attributes. + if (Comma.second.empty()) { + // S_SYMBOL_STUBS always require a symbol stub size specifier. + if (TAA == MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier of type 'symbol_stubs' requires a size " + "specifier"; + return ""; + } + + // Otherwise, we do have some attributes. Split off the size specifier if + // present. + Comma = Comma.second.split(','); + StringRef Attrs = Comma.first; + + // The attribute list is a '+' separated list of attributes. + std::pair Plus = Attrs.split('+'); + + while (1) { + StringRef Attr = Plus.first; + StripSpaces(Attr); + + // Look up the attribute. + for (unsigned i = 0; ; ++i) { + if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd) + return "mach-o section specifier has invalid attribute"; + + if (SectionAttrDescriptors[i].AssemblerName && + Attr == SectionAttrDescriptors[i].AssemblerName) { + TAA |= SectionAttrDescriptors[i].AttrFlag; + break; + } + } + + if (Plus.second.empty()) break; + Plus = Plus.second.split('+'); + }; + + // Okay, we've parsed the section attributes, see if we have a stub size spec. + if (Comma.second.empty()) { + // S_SYMBOL_STUBS always require a symbol stub size specifier. + if (TAA == MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier of type 'symbol_stubs' requires a size " + "specifier"; + return ""; + } + + // If we have a stub size spec, we must have a sectiontype of S_SYMBOL_STUBS. + if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS) + return "mach-o section specifier cannot have a stub size specified because " + "it does not have type 'symbol_stubs'"; + + // Okay, if we do, it must be a number. + StringRef StubSizeStr = Comma.second; + StripSpaces(StubSizeStr); + + // Convert the stub size from a string to an integer. + if (StubSizeStr.getAsInteger(0, StubSize)) + return "mach-o section specifier has a malformed stub size"; + + return ""; +} + diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index a634f33ad34aa..8a6dcdae7a407 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -11,7 +11,7 @@ using namespace llvm; -MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context) { +MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context), CurSection(0) { } MCStreamer::~MCStreamer() { diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp new file mode 100644 index 0000000000000..86ff3f3bddb19 --- /dev/null +++ b/lib/MC/MCSymbol.cpp @@ -0,0 +1,110 @@ +//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// Sentinel value for the absolute pseudo section. +const MCSection *MCSymbol::AbsolutePseudoSection = + reinterpret_cast(1); + +static bool isAcceptableChar(char C) { + if ((C < 'a' || C > 'z') && + (C < 'A' || C > 'Z') && + (C < '0' || C > '9') && + C != '_' && C != '$' && C != '.' && C != '@') + return false; + return true; +} + +static char HexDigit(int V) { + return V < 10 ? V+'0' : V+'A'-10; +} + +static void MangleLetter(raw_ostream &OS, unsigned char C) { + OS << '_' << HexDigit(C >> 4) << HexDigit(C & 15) << '_'; +} + +/// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes +/// for this assembler. +static bool NameNeedsEscaping(const StringRef &Str, const MCAsmInfo &MAI) { + assert(!Str.empty() && "Cannot create an empty MCSymbol"); + + // If the first character is a number and the target does not allow this, we + // need quotes. + if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') + return true; + + // If any of the characters in the string is an unacceptable character, force + // quotes. + for (unsigned i = 0, e = Str.size(); i != e; ++i) + if (!isAcceptableChar(Str[i])) + return true; + return false; +} + +static void PrintMangledName(raw_ostream &OS, StringRef Str, + const MCAsmInfo &MAI) { + // The first character is not allowed to be a number unless the target + // explicitly allows it. + if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') { + MangleLetter(OS, Str[0]); + Str = Str.substr(1); + } + + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (!isAcceptableChar(Str[i])) + MangleLetter(OS, Str[i]); + else + OS << Str[i]; + } +} + +/// PrintMangledQuotedName - On systems that support quoted symbols, we still +/// have to escape some (obscure) characters like " and \n which would break the +/// assembler's lexing. +static void PrintMangledQuotedName(raw_ostream &OS, StringRef Str) { + OS << '"'; + + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + if (Str[i] == '"') + OS << "_QQ_"; + else if (Str[i] == '\n') + OS << "_NL_"; + else + OS << Str[i]; + } + OS << '"'; +} + + +void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + if (MAI == 0 || !NameNeedsEscaping(getName(), *MAI)) { + OS << getName(); + return; + } + + // On systems that do not allow quoted names, print with mangling. + if (!MAI->doesAllowQuotesInName()) + return PrintMangledName(OS, getName(), *MAI); + + // If the string contains a double quote or newline, we still have to mangle + // it. + if (getName().find('"') != std::string::npos || + getName().find('\n') != std::string::npos) + return PrintMangledQuotedName(OS, getName()); + + OS << '"' << getName() << '"'; +} + +void MCSymbol::dump() const { + print(errs(), 0); +} diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp new file mode 100644 index 0000000000000..69bd10c8e699d --- /dev/null +++ b/lib/MC/MCValue.cpp @@ -0,0 +1,34 @@ +//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCValue.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { + if (isAbsolute()) { + OS << getConstant(); + return; + } + + getSymA()->print(OS, MAI); + + if (getSymB()) { + OS << " - "; + getSymB()->print(OS, MAI); + } + + if (getConstant()) + OS << " + " << getConstant(); +} + +void MCValue::dump() const { + print(errs(), 0); +} diff --git a/lib/MC/TargetAsmParser.cpp b/lib/MC/TargetAsmParser.cpp new file mode 100644 index 0000000000000..05760c96cc658 --- /dev/null +++ b/lib/MC/TargetAsmParser.cpp @@ -0,0 +1,19 @@ +//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + +TargetAsmParser::TargetAsmParser(const Target &T) + : TheTarget(T) +{ +} + +TargetAsmParser::~TargetAsmParser() { +} diff --git a/lib/Makefile b/lib/Makefile index 1e87d9ebfd10c..3807f31c70376 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,7 @@ LEVEL = .. include $(LEVEL)/Makefile.config PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \ - Target ExecutionEngine Debugger Linker MC CompilerDriver + Target ExecutionEngine Linker MC CompilerDriver include $(LEVEL)/Makefile.common diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 3b03c54e97641..e431d27902397 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include @@ -122,27 +124,30 @@ assertArithmeticOK(const llvm::fltSemantics &semantics) { If the exponent overflows, returns a large exponent with the appropriate sign. */ static int -readExponent(const char *p) +readExponent(StringRef::iterator begin, StringRef::iterator end) { bool isNegative; unsigned int absExponent; const unsigned int overlargeExponent = 24000; /* FIXME. */ + StringRef::iterator p = begin; + + assert(p != end && "Exponent has no digits"); isNegative = (*p == '-'); - if (*p == '-' || *p == '+') + if (*p == '-' || *p == '+') { p++; + assert(p != end && "Exponent has no digits"); + } absExponent = decDigitValue(*p++); - assert (absExponent < 10U); + assert(absExponent < 10U && "Invalid character in exponent"); - for (;;) { + for (; p != end; ++p) { unsigned int value; value = decDigitValue(*p); - if (value >= 10U) - break; + assert(value < 10U && "Invalid character in exponent"); - p++; value += absExponent * 10; if (absExponent >= overlargeExponent) { absExponent = overlargeExponent; @@ -151,6 +156,8 @@ readExponent(const char *p) absExponent = value; } + assert(p == end && "Invalid exponent in exponent"); + if (isNegative) return -(int) absExponent; else @@ -160,28 +167,29 @@ readExponent(const char *p) /* This is ugly and needs cleaning up, but I don't immediately see how whilst remaining safe. */ static int -totalExponent(const char *p, int exponentAdjustment) +totalExponent(StringRef::iterator p, StringRef::iterator end, + int exponentAdjustment) { int unsignedExponent; bool negative, overflow; int exponent; - /* Move past the exponent letter and sign to the digits. */ - p++; + assert(p != end && "Exponent has no digits"); + negative = *p == '-'; - if(*p == '-' || *p == '+') + if(*p == '-' || *p == '+') { p++; + assert(p != end && "Exponent has no digits"); + } unsignedExponent = 0; overflow = false; - for(;;) { + for(; p != end; ++p) { unsigned int value; value = decDigitValue(*p); - if(value >= 10U) - break; + assert(value < 10U && "Invalid character in exponent"); - p++; unsignedExponent = unsignedExponent * 10 + value; if(unsignedExponent > 65535) overflow = true; @@ -205,16 +213,21 @@ totalExponent(const char *p, int exponentAdjustment) return exponent; } -static const char * -skipLeadingZeroesAndAnyDot(const char *p, const char **dot) +static StringRef::iterator +skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end, + StringRef::iterator *dot) { - *dot = 0; - while(*p == '0') + StringRef::iterator p = begin; + *dot = end; + while(*p == '0' && p != end) p++; if(*p == '.') { *dot = p++; - while(*p == '0') + + assert(end - begin != 1 && "Significand has no digits"); + + while(*p == '0' && p != end) p++; } @@ -242,41 +255,50 @@ struct decimalInfo { }; static void -interpretDecimal(const char *p, decimalInfo *D) +interpretDecimal(StringRef::iterator begin, StringRef::iterator end, + decimalInfo *D) { - const char *dot; - - p = skipLeadingZeroesAndAnyDot (p, &dot); + StringRef::iterator dot = end; + StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot); D->firstSigDigit = p; D->exponent = 0; D->normalizedExponent = 0; - for (;;) { + for (; p != end; ++p) { if (*p == '.') { - assert(dot == 0); + assert(dot == end && "String contains multiple dots"); dot = p++; + if (p == end) + break; } if (decDigitValue(*p) >= 10U) break; - p++; } - /* If number is all zerooes accept any exponent. */ - if (p != D->firstSigDigit) { - if (*p == 'e' || *p == 'E') - D->exponent = readExponent(p + 1); + if (p != end) { + assert((*p == 'e' || *p == 'E') && "Invalid character in significand"); + assert(p != begin && "Significand has no digits"); + assert((dot == end || p - begin != 1) && "Significand has no digits"); + + /* p points to the first non-digit in the string */ + D->exponent = readExponent(p + 1, end); /* Implied decimal point? */ - if (!dot) + if (dot == end) dot = p; + } + /* If number is all zeroes accept any exponent. */ + if (p != D->firstSigDigit) { /* Drop insignificant trailing zeroes. */ - do + if (p != begin) { do - p--; - while (*p == '0'); - while (*p == '.'); + do + p--; + while (p != begin && *p == '0'); + while (p != begin && *p == '.'); + } /* Adjust the exponents for any decimal point. */ D->exponent += static_cast((dot - p) - (dot > p)); @@ -292,7 +314,8 @@ interpretDecimal(const char *p, decimalInfo *D) DIGITVALUE is the first hex digit of the fraction, P points to the next digit. */ static lostFraction -trailingHexadecimalFraction(const char *p, unsigned int digitValue) +trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, + unsigned int digitValue) { unsigned int hexDigit; @@ -307,6 +330,8 @@ trailingHexadecimalFraction(const char *p, unsigned int digitValue) while(*p == '0') p++; + assert(p != end && "Invalid trailing hexadecimal fraction!"); + hexDigit = hexDigitValue(*p); /* If we ran off the end it is exactly zero or one-half, otherwise @@ -667,6 +692,14 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) normalize(rmNearestTiesToEven, lfExactlyZero); } +APFloat::APFloat(const fltSemantics &ourSemantics) { + assertArithmeticOK(ourSemantics); + initialize(&ourSemantics); + category = fcZero; + sign = false; +} + + APFloat::APFloat(const fltSemantics &ourSemantics, fltCategory ourCategory, bool negative, unsigned type) { @@ -680,7 +713,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics, makeNaN(type); } -APFloat::APFloat(const fltSemantics &ourSemantics, const char *text) +APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); @@ -1068,7 +1101,7 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, switch (rounding_mode) { default: - assert(0); + llvm_unreachable(0); case rmNearestTiesToAway: return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf; @@ -1207,7 +1240,7 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) { switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -1331,7 +1364,7 @@ APFloat::multiplySpecials(const APFloat &rhs) { switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -1373,7 +1406,7 @@ APFloat::divideSpecials(const APFloat &rhs) { switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -1415,7 +1448,7 @@ APFloat::modSpecials(const APFloat &rhs) { switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -1692,7 +1725,7 @@ APFloat::compare(const APFloat &rhs) const switch (convolve(category, rhs.category)) { default: - assert(0); + llvm_unreachable(0); case convolve(fcNaN, fcZero): case convolve(fcNaN, fcNormal): @@ -2106,13 +2139,13 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts, } APFloat::opStatus -APFloat::convertFromHexadecimalString(const char *p, +APFloat::convertFromHexadecimalString(const StringRef &s, roundingMode rounding_mode) { - lostFraction lost_fraction; + lostFraction lost_fraction = lfExactlyZero; integerPart *significand; unsigned int bitPos, partsCount; - const char *dot, *firstSignificantDigit; + StringRef::iterator dot, firstSignificantDigit; zeroSignificand(); exponent = 0; @@ -2123,47 +2156,58 @@ APFloat::convertFromHexadecimalString(const char *p, bitPos = partsCount * integerPartWidth; /* Skip leading zeroes and any (hexa)decimal point. */ - p = skipLeadingZeroesAndAnyDot(p, &dot); + StringRef::iterator begin = s.begin(); + StringRef::iterator end = s.end(); + StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); firstSignificantDigit = p; - for(;;) { + for(; p != end;) { integerPart hex_value; if(*p == '.') { - assert(dot == 0); + assert(dot == end && "String contains multiple dots"); dot = p++; + if (p == end) { + break; + } } hex_value = hexDigitValue(*p); if(hex_value == -1U) { - lost_fraction = lfExactlyZero; break; } p++; - /* Store the number whilst 4-bit nibbles remain. */ - if(bitPos) { - bitPos -= 4; - hex_value <<= bitPos % integerPartWidth; - significand[bitPos / integerPartWidth] |= hex_value; - } else { - lost_fraction = trailingHexadecimalFraction(p, hex_value); - while(hexDigitValue(*p) != -1U) - p++; + if (p == end) { break; + } else { + /* Store the number whilst 4-bit nibbles remain. */ + if(bitPos) { + bitPos -= 4; + hex_value <<= bitPos % integerPartWidth; + significand[bitPos / integerPartWidth] |= hex_value; + } else { + lost_fraction = trailingHexadecimalFraction(p, end, hex_value); + while(p != end && hexDigitValue(*p) != -1U) + p++; + break; + } } } /* Hex floats require an exponent but not a hexadecimal point. */ - assert(*p == 'p' || *p == 'P'); + assert(p != end && "Hex strings require an exponent"); + assert((*p == 'p' || *p == 'P') && "Invalid character in significand"); + assert(p != begin && "Significand has no digits"); + assert((dot == end || p - begin != 1) && "Significand has no digits"); /* Ignore the exponent if we are zero. */ if(p != firstSignificantDigit) { int expAdjustment; /* Implicit hexadecimal point? */ - if(!dot) + if (dot == end) dot = p; /* Calculate the exponent adjustment implicit in the number of @@ -2179,7 +2223,7 @@ APFloat::convertFromHexadecimalString(const char *p, expAdjustment -= partsCount * integerPartWidth; /* Adjust for the given exponent. */ - exponent = totalExponent(p, expAdjustment); + exponent = totalExponent(p + 1, end, expAdjustment); } return normalize(rounding_mode, lost_fraction); @@ -2271,13 +2315,14 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, } APFloat::opStatus -APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) +APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode) { decimalInfo D; opStatus fs; /* Scan the text. */ - interpretDecimal(p, &D); + StringRef::iterator p = str.begin(); + interpretDecimal(p, str.end(), &D); /* Handle the quick cases. First the case of no significant digits, i.e. zero, and then exponents that are obviously too large or too @@ -2332,10 +2377,14 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) multiplier = 1; do { - if (*p == '.') + if (*p == '.') { p++; - + if (p == str.end()) { + break; + } + } decValue = decDigitValue(*p++); + assert(decValue < 10U && "Invalid character in significand"); multiplier *= 10; val = val * 10 + decValue; /* The maximum number that can be multiplied by ten with any @@ -2363,20 +2412,28 @@ APFloat::convertFromDecimalString(const char *p, roundingMode rounding_mode) } APFloat::opStatus -APFloat::convertFromString(const char *p, roundingMode rounding_mode) +APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode) { assertArithmeticOK(*semantics); + assert(!str.empty() && "Invalid string length"); /* Handle a leading minus sign. */ - if(*p == '-') - sign = 1, p++; - else - sign = 0; + StringRef::iterator p = str.begin(); + size_t slen = str.size(); + sign = *p == '-' ? 1 : 0; + if(*p == '-' || *p == '+') { + p++; + slen--; + assert(slen && "String has no digits"); + } - if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) - return convertFromHexadecimalString(p + 2, rounding_mode); + if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { + assert(slen - 2 && "Invalid string"); + return convertFromHexadecimalString(StringRef(p + 2, slen - 2), + rounding_mode); + } - return convertFromDecimalString(p, rounding_mode); + return convertFromDecimalString(StringRef(p, slen), rounding_mode); } /* Write out a hexadecimal representation of the floating point value @@ -2660,6 +2717,42 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const return APInt(128, 2, words); } +APInt +APFloat::convertQuadrupleAPFloatToAPInt() const +{ + assert(semantics == (const llvm::fltSemantics*)&IEEEquad); + assert (partCount()==2); + + uint64_t myexponent, mysignificand, mysignificand2; + + if (category==fcNormal) { + myexponent = exponent+16383; //bias + mysignificand = significandParts()[0]; + mysignificand2 = significandParts()[1]; + if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL)) + myexponent = 0; // denormal + } else if (category==fcZero) { + myexponent = 0; + mysignificand = mysignificand2 = 0; + } else if (category==fcInfinity) { + myexponent = 0x7fff; + mysignificand = mysignificand2 = 0; + } else { + assert(category == fcNaN && "Unknown category!"); + myexponent = 0x7fff; + mysignificand = significandParts()[0]; + mysignificand2 = significandParts()[1]; + } + + uint64_t words[2]; + words[0] = mysignificand; + words[1] = ((uint64_t)(sign & 1) << 63) | + ((myexponent & 0x7fff) << 48) | + (mysignificand2 & 0xffffffffffffLL); + + return APInt(128, 2, words); +} + APInt APFloat::convertDoubleAPFloatToAPInt() const { @@ -2728,10 +2821,13 @@ APFloat::bitcastToAPInt() const { if (semantics == (const llvm::fltSemantics*)&IEEEsingle) return convertFloatAPFloatToAPInt(); - + if (semantics == (const llvm::fltSemantics*)&IEEEdouble) return convertDoubleAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&IEEEquad) + return convertQuadrupleAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble) return convertPPCDoubleDoubleAPFloatToAPInt(); @@ -2743,7 +2839,8 @@ APFloat::bitcastToAPInt() const float APFloat::convertToFloat() const { - assert(semantics == (const llvm::fltSemantics*)&IEEEsingle); + assert(semantics == (const llvm::fltSemantics*)&IEEEsingle && + "Float semantics are not IEEEsingle"); APInt api = bitcastToAPInt(); return api.bitsToFloat(); } @@ -2751,7 +2848,8 @@ APFloat::convertToFloat() const double APFloat::convertToDouble() const { - assert(semantics == (const llvm::fltSemantics*)&IEEEdouble); + assert(semantics == (const llvm::fltSemantics*)&IEEEdouble && + "Float semantics are not IEEEdouble"); APInt api = bitcastToAPInt(); return api.bitsToDouble(); } @@ -2847,6 +2945,46 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) } } +void +APFloat::initFromQuadrupleAPInt(const APInt &api) +{ + assert(api.getBitWidth()==128); + uint64_t i1 = api.getRawData()[0]; + uint64_t i2 = api.getRawData()[1]; + uint64_t myexponent = (i2 >> 48) & 0x7fff; + uint64_t mysignificand = i1; + uint64_t mysignificand2 = i2 & 0xffffffffffffLL; + + initialize(&APFloat::IEEEquad); + assert(partCount()==2); + + sign = static_cast(i2>>63); + if (myexponent==0 && + (mysignificand==0 && mysignificand2==0)) { + // exponent, significand meaningless + category = fcZero; + } else if (myexponent==0x7fff && + (mysignificand==0 && mysignificand2==0)) { + // exponent, significand meaningless + category = fcInfinity; + } else if (myexponent==0x7fff && + (mysignificand!=0 || mysignificand2 !=0)) { + // exponent meaningless + category = fcNaN; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + } else { + category = fcNormal; + exponent = myexponent - 16383; + significandParts()[0] = mysignificand; + significandParts()[1] = mysignificand2; + if (myexponent==0) // denormal + exponent = -16382; + else + significandParts()[1] |= 0x1000000000000LL; // integer bit + } +} + void APFloat::initFromDoubleAPInt(const APInt &api) { @@ -2926,10 +3064,11 @@ APFloat::initFromAPInt(const APInt& api, bool isIEEE) return initFromDoubleAPInt(api); else if (api.getBitWidth()==80) return initFromF80LongDoubleAPInt(api); - else if (api.getBitWidth()==128 && !isIEEE) - return initFromPPCDoubleDoubleAPInt(api); + else if (api.getBitWidth()==128) + return (isIEEE ? + initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api)); else - assert(0); + llvm_unreachable(0); } APFloat::APFloat(const APInt& api, bool isIEEE) diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 30dc3526abd43..56d47736eabae 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -14,9 +14,11 @@ #define DEBUG_TYPE "apint" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include @@ -34,7 +36,7 @@ inline static uint64_t* getClearedMemory(unsigned numWords) { return result; } -/// A utility function for allocating memory and checking for allocation +/// A utility function for allocating memory and checking for allocation /// failure. The content is not zeroed. inline static uint64_t* getMemory(unsigned numWords) { uint64_t * result = new uint64_t[numWords]; @@ -42,10 +44,36 @@ inline static uint64_t* getMemory(unsigned numWords) { return result; } +/// A utility function that converts a character to a digit. +inline static unsigned getDigit(char cdigit, uint8_t radix) { + unsigned r; + + if (radix == 16) { + r = cdigit - '0'; + if (r <= 9) + return r; + + r = cdigit - 'A'; + if (r <= 5) + return r + 10; + + r = cdigit - 'a'; + if (r <= 5) + return r + 10; + } + + r = cdigit - '0'; + if (r < radix) + return r; + + return -1U; +} + + void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) { pVal = getClearedMemory(getNumWords()); pVal[0] = val; - if (isSigned && int64_t(val) < 0) + if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) pVal[i] = -1ULL; } @@ -58,7 +86,7 @@ void APInt::initSlowCase(const APInt& that) { APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) : BitWidth(numBits), VAL(0) { - assert(BitWidth && "bitwidth too small"); + assert(BitWidth && "Bitwidth too small"); assert(bigVal && "Null pointer detected!"); if (isSingleWord()) VAL = bigVal[0]; @@ -74,11 +102,10 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) clearUnusedBits(); } -APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen, - uint8_t radix) +APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix) : BitWidth(numbits), VAL(0) { - assert(BitWidth && "bitwidth too small"); - fromString(numbits, StrStart, slen, radix); + assert(BitWidth && "Bitwidth too small"); + fromString(numbits, Str, radix); } APInt& APInt::AssignSlowCase(const APInt& RHS) { @@ -99,7 +126,7 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) { VAL = 0; pVal = getMemory(RHS.getNumWords()); memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); - } else if (getNumWords() == RHS.getNumWords()) + } else if (getNumWords() == RHS.getNumWords()) memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE); else if (RHS.isSingleWord()) { delete [] pVal; @@ -114,7 +141,7 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) { } APInt& APInt::operator=(uint64_t RHS) { - if (isSingleWord()) + if (isSingleWord()) VAL = RHS; else { pVal[0] = RHS; @@ -126,7 +153,7 @@ APInt& APInt::operator=(uint64_t RHS) { /// Profile - This method 'profiles' an APInt for use with FoldingSet. void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(BitWidth); - + if (isSingleWord()) { ID.AddInteger(VAL); return; @@ -137,7 +164,7 @@ void APInt::Profile(FoldingSetNodeID& ID) const { ID.AddInteger(pVal[i]); } -/// add_1 - This function adds a single "digit" integer, y, to the multiple +/// add_1 - This function adds a single "digit" integer, y, to the multiple /// "digit" integer array, x[]. x[] is modified to reflect the addition and /// 1 is returned if there is a carry out, otherwise 0 is returned. /// @returns the carry of the addition. @@ -156,15 +183,15 @@ static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { /// @brief Prefix increment operator. Increments the APInt by one. APInt& APInt::operator++() { - if (isSingleWord()) + if (isSingleWord()) ++VAL; else add_1(pVal, pVal, getNumWords(), 1); return clearUnusedBits(); } -/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from -/// the multi-digit integer array, x[], propagating the borrowed 1 value until +/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from +/// the multi-digit integer array, x[], propagating the borrowed 1 value until /// no further borrowing is neeeded or it runs out of "digits" in x. The result /// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted. /// In other words, if y > x then this function returns 1, otherwise 0. @@ -173,7 +200,7 @@ static bool sub_1(uint64_t x[], unsigned len, uint64_t y) { for (unsigned i = 0; i < len; ++i) { uint64_t X = x[i]; x[i] -= y; - if (y > X) + if (y > X) y = 1; // We have to "borrow 1" from next "digit" else { y = 0; // No need to borrow @@ -185,7 +212,7 @@ static bool sub_1(uint64_t x[], unsigned len, uint64_t y) { /// @brief Prefix decrement operator. Decrements the APInt by one. APInt& APInt::operator--() { - if (isSingleWord()) + if (isSingleWord()) --VAL; else sub_1(pVal, getNumWords(), 1); @@ -193,10 +220,10 @@ APInt& APInt::operator--() { } /// add - This function adds the integer array x to the integer array Y and -/// places the result in dest. +/// places the result in dest. /// @returns the carry out from the addition /// @brief General addition of 64-bit integer arrays -static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, +static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len) { bool carry = false; for (unsigned i = 0; i< len; ++i) { @@ -209,10 +236,10 @@ static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y, /// Adds the RHS APint to this APInt. /// @returns this, after addition of RHS. -/// @brief Addition assignment operator. +/// @brief Addition assignment operator. APInt& APInt::operator+=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) + if (isSingleWord()) VAL += RHS.VAL; else { add(pVal, pVal, RHS.pVal, getNumWords()); @@ -220,10 +247,10 @@ APInt& APInt::operator+=(const APInt& RHS) { return clearUnusedBits(); } -/// Subtracts the integer array y from the integer array x +/// Subtracts the integer array y from the integer array x /// @returns returns the borrow out. /// @brief Generalized subtraction of 64-bit integer arrays. -static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, +static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, unsigned len) { bool borrow = false; for (unsigned i = 0; i < len; ++i) { @@ -236,10 +263,10 @@ static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y, /// Subtracts the RHS APInt from this APInt /// @returns this, after subtraction -/// @brief Subtraction assignment operator. +/// @brief Subtraction assignment operator. APInt& APInt::operator-=(const APInt& RHS) { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); - if (isSingleWord()) + if (isSingleWord()) VAL -= RHS.VAL; else sub(pVal, pVal, RHS.pVal, getNumWords()); @@ -247,7 +274,7 @@ APInt& APInt::operator-=(const APInt& RHS) { } /// Multiplies an integer array, x by a a uint64_t integer and places the result -/// into dest. +/// into dest. /// @returns the carry out of the multiplication. /// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer. static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { @@ -269,19 +296,19 @@ static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) { // Determine if the add above introduces carry. hasCarry = (dest[i] < carry) ? 1 : 0; carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0); - // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + + // The upper limit of carry can be (2^32 - 1)(2^32 - 1) + // (2^32 - 1) + 2^32 = 2^64. hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0); carry += (lx * hy) & 0xffffffffULL; dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL); - carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + + carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) + (carry >> 32) + ((lx * hy) >> 32) + hx * hy; } return carry; } -/// Multiplies integer array x by integer array y and stores the result into +/// Multiplies integer array x by integer array y and stores the result into /// the integer array dest. Note that dest's size must be >= xlen + ylen. /// @brief Generalized multiplicate of integer arrays. static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], @@ -307,7 +334,7 @@ static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[], resul = (carry << 32) | (resul & 0xffffffffULL); dest[i+j] += resul; carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+ - (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + + (carry >> 32) + (dest[i+j] < resul ? 1 : 0) + ((lx * hy) >> 32) + hx * hy; } dest[i+xlen] = carry; @@ -325,7 +352,7 @@ APInt& APInt::operator*=(const APInt& RHS) { // Get some bit facts about LHS and check for zero unsigned lhsBits = getActiveBits(); unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1; - if (!lhsWords) + if (!lhsWords) // 0 * X ===> 0 return *this; @@ -385,7 +412,7 @@ APInt& APInt::operator^=(const APInt& RHS) { VAL ^= RHS.VAL; this->clearUnusedBits(); return *this; - } + } unsigned numWords = getNumWords(); for (unsigned i = 0; i < numWords; ++i) pVal[i] ^= RHS.pVal[i]; @@ -423,7 +450,7 @@ bool APInt::operator !() const { return !VAL; for (unsigned i = 0; i < getNumWords(); ++i) - if (pVal[i]) + if (pVal[i]) return false; return true; } @@ -456,7 +483,7 @@ APInt APInt::operator-(const APInt& RHS) const { } bool APInt::operator[](unsigned bitPosition) const { - return (maskBit(bitPosition) & + return (maskBit(bitPosition) & (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0; } @@ -466,7 +493,7 @@ bool APInt::EqualSlowCase(const APInt& RHS) const { unsigned n2 = RHS.getActiveBits(); // If the number of bits isn't the same, they aren't equal - if (n1 != n2) + if (n1 != n2) return false; // If the number of bits fits in a word, we only need to compare the low word. @@ -475,7 +502,7 @@ bool APInt::EqualSlowCase(const APInt& RHS) const { // Otherwise, compare everything for (int i = whichWord(n1 - 1); i >= 0; --i) - if (pVal[i] != RHS.pVal[i]) + if (pVal[i] != RHS.pVal[i]) return false; return true; } @@ -512,9 +539,9 @@ bool APInt::ult(const APInt& RHS) const { // Otherwise, compare all words unsigned topWord = whichWord(std::max(n1,n2)-1); for (int i = topWord; i >= 0; --i) { - if (pVal[i] > RHS.pVal[i]) + if (pVal[i] > RHS.pVal[i]) return false; - if (pVal[i] < RHS.pVal[i]) + if (pVal[i] < RHS.pVal[i]) return true; } return false; @@ -552,14 +579,14 @@ bool APInt::slt(const APInt& RHS) const { return true; else if (rhsNeg) return false; - else + else return lhs.ult(rhs); } APInt& APInt::set(unsigned bitPosition) { - if (isSingleWord()) + if (isSingleWord()) VAL |= maskBit(bitPosition); - else + else pVal[whichWord(bitPosition)] |= maskBit(bitPosition); return *this; } @@ -567,16 +594,16 @@ APInt& APInt::set(unsigned bitPosition) { /// Set the given bit to 0 whose position is given as "bitPosition". /// @brief Set a given bit to 0. APInt& APInt::clear(unsigned bitPosition) { - if (isSingleWord()) + if (isSingleWord()) VAL &= ~maskBit(bitPosition); - else + else pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition); return *this; } /// @brief Toggle every bit to its opposite value. -/// Toggle a given bit to its opposite value whose position is given +/// Toggle a given bit to its opposite value whose position is given /// as "bitPosition". /// @brief Toggles a given bit to its opposite value. APInt& APInt::flip(unsigned bitPosition) { @@ -586,16 +613,22 @@ APInt& APInt::flip(unsigned bitPosition) { return *this; } -unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) { - assert(str != 0 && "Invalid value string"); - assert(slen > 0 && "Invalid string length"); +unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) { + assert(!str.empty() && "Invalid string length"); + assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && + "Radix should be 2, 8, 10, or 16!"); + + size_t slen = str.size(); - // Each computation below needs to know if its negative - unsigned isNegative = str[0] == '-'; - if (isNegative) { + // Each computation below needs to know if it's negative. + StringRef::iterator p = str.begin(); + unsigned isNegative = *p == '-'; + if (*p == '-' || *p == '+') { + p++; slen--; - str++; + assert(slen && "String is only a sign, needs a value."); } + // For radixes of power-of-two values, the bits required is accurately and // easily computed if (radix == 2) @@ -605,22 +638,27 @@ unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) { if (radix == 16) return slen * 4 + isNegative; - // Otherwise it must be radix == 10, the hard case - assert(radix == 10 && "Invalid radix"); - // This is grossly inefficient but accurate. We could probably do something // with a computation of roughly slen*64/20 and then adjust by the value of // the first few digits. But, I'm not sure how accurate that could be. // Compute a sufficient number of bits that is always large enough but might - // be too large. This avoids the assertion in the constructor. - unsigned sufficient = slen*64/18; + // be too large. This avoids the assertion in the constructor. This + // calculation doesn't work appropriately for the numbers 0-9, so just use 4 + // bits in that case. + unsigned sufficient = slen == 1 ? 4 : slen * 64/18; // Convert to the actual binary value. - APInt tmp(sufficient, str, slen, radix); + APInt tmp(sufficient, StringRef(p, slen), radix); - // Compute how many bits are required. - return isNegative + tmp.logBase2() + 1; + // Compute how many bits are required. If the log is infinite, assume we need + // just bit. + unsigned log = tmp.logBase2(); + if (log == (unsigned)-1) { + return isNegative + 1; + } else { + return isNegative + log + 1; + } } // From http://www.burtleburtle.net, byBob Jenkins. @@ -720,7 +758,7 @@ APInt APInt::getHiBits(unsigned numBits) const { /// LoBits - This function returns the low "numBits" bits of this APInt. APInt APInt::getLoBits(unsigned numBits) const { - return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits), + return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits), BitWidth - numBits); } @@ -837,7 +875,7 @@ APInt APInt::byteSwap() const { } } -APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1, +APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1, const APInt& API2) { APInt A = API1, B = API2; while (!!B) { @@ -870,7 +908,7 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { // If the exponent doesn't shift all bits out of the mantissa if (exp < 52) - return isNeg ? -APInt(width, mantissa >> (52 - exp)) : + return isNeg ? -APInt(width, mantissa >> (52 - exp)) : APInt(width, mantissa >> (52 - exp)); // If the client didn't provide enough bits for us to shift the mantissa into @@ -884,22 +922,23 @@ APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { return isNeg ? -Tmp : Tmp; } -/// RoundToDouble - This function convert this APInt to a double. +/// RoundToDouble - This function converts this APInt to a double. /// The layout for double is as following (IEEE Standard 754): /// -------------------------------------- /// | Sign Exponent Fraction Bias | /// |-------------------------------------- | /// | 1[63] 11[62-52] 52[51-00] 1023 | -/// -------------------------------------- +/// -------------------------------------- double APInt::roundToDouble(bool isSigned) const { // Handle the simple case where the value is contained in one uint64_t. + // It is wrong to optimize getWord(0) to VAL; there might be more than one word. if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { if (isSigned) { - int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth); + int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth); return double(sext); } else - return double(VAL); + return double(getWord(0)); } // Determine if the value is negative. @@ -920,7 +959,7 @@ double APInt::roundToDouble(bool isSigned) const { if (exp > 1023) { if (!isSigned || !isNeg) return std::numeric_limits::infinity(); - else + else return -std::numeric_limits::infinity(); } exp += 1023; // Increment for 1023 bias @@ -1030,7 +1069,7 @@ APInt &APInt::zext(unsigned width) { uint64_t *newVal = getClearedMemory(wordsAfter); if (wordsBefore == 1) newVal[0] = VAL; - else + else for (unsigned i = 0; i < wordsBefore; ++i) newVal[i] = pVal[i]; if (wordsBefore != 1) @@ -1076,7 +1115,7 @@ APInt APInt::ashr(unsigned shiftAmt) const { return APInt(BitWidth, 0); // undefined else { unsigned SignBit = APINT_BITS_PER_WORD - BitWidth; - return APInt(BitWidth, + return APInt(BitWidth, (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt)); } } @@ -1113,11 +1152,11 @@ APInt APInt::ashr(unsigned shiftAmt) const { if (bitsInWord < APINT_BITS_PER_WORD) val[breakWord] |= ~0ULL << bitsInWord; // set high bits } else { - // Shift the low order words + // Shift the low order words for (unsigned i = 0; i < breakWord; ++i) { // This combines the shifted corresponding word with the low bits from // the next word (shifted into this word's high bits). - val[i] = (pVal[i+offset] >> wordShift) | + val[i] = (pVal[i+offset] >> wordShift) | (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift)); } @@ -1130,10 +1169,10 @@ APInt APInt::ashr(unsigned shiftAmt) const { if (isNegative()) { if (wordShift > bitsInWord) { if (breakWord > 0) - val[breakWord-1] |= + val[breakWord-1] |= ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord)); val[breakWord] |= ~0ULL; - } else + } else val[breakWord] |= (~0ULL << (bitsInWord - wordShift)); } } @@ -1157,7 +1196,7 @@ APInt APInt::lshr(unsigned shiftAmt) const { if (isSingleWord()) { if (shiftAmt == BitWidth) return APInt(BitWidth, 0); - else + else return APInt(BitWidth, this->VAL >> shiftAmt); } @@ -1168,7 +1207,7 @@ APInt APInt::lshr(unsigned shiftAmt) const { return APInt(BitWidth, 0); // If none of the bits are shifted out, the result is *this. This avoids - // issues with shifting by the size of the integer type, which produces + // issues with shifting by the size of the integer type, which produces // undefined results in the code below. This is also an optimization. if (shiftAmt == 0) return *this; @@ -1199,7 +1238,7 @@ APInt APInt::lshr(unsigned shiftAmt) const { return APInt(val,BitWidth).clearUnusedBits(); } - // Shift the low order words + // Shift the low order words unsigned breakWord = getNumWords() - offset -1; for (unsigned i = 0; i < breakWord; ++i) val[i] = (pVal[i+offset] >> wordShift) | @@ -1306,7 +1345,7 @@ APInt APInt::rotr(unsigned rotateAmt) const { // values using less than 52 bits, the value is converted to double and then // the libc sqrt function is called. The result is rounded and then converted // back to a uint64_t which is then used to construct the result. Finally, -// the Babylonian method for computing square roots is used. +// the Babylonian method for computing square roots is used. APInt APInt::sqrt() const { // Determine the magnitude of the value. @@ -1318,7 +1357,7 @@ APInt APInt::sqrt() const { static const uint8_t results[32] = { /* 0 */ 0, /* 1- 2 */ 1, 1, - /* 3- 6 */ 2, 2, 2, 2, + /* 3- 6 */ 2, 2, 2, 2, /* 7-12 */ 3, 3, 3, 3, 3, 3, /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -1334,10 +1373,10 @@ APInt APInt::sqrt() const { if (magnitude < 52) { #ifdef _MSC_VER // Amazingly, VC++ doesn't have round(). - return APInt(BitWidth, + return APInt(BitWidth, uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5); #else - return APInt(BitWidth, + return APInt(BitWidth, uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); #endif } @@ -1346,7 +1385,7 @@ APInt APInt::sqrt() const { // is a classical Babylonian method for computing the square root. This code // was adapted to APINt from a wikipedia article on such computations. // See http://www.wikipedia.org/ and go to the page named - // Calculate_an_integer_square_root. + // Calculate_an_integer_square_root. unsigned nbits = BitWidth, i = 4; APInt testy(BitWidth, 16); APInt x_old(BitWidth, 1); @@ -1354,13 +1393,13 @@ APInt APInt::sqrt() const { APInt two(BitWidth, 2); // Select a good starting value using binary logarithms. - for (;; i += 2, testy = testy.shl(2)) + for (;; i += 2, testy = testy.shl(2)) if (i >= nbits || this->ule(testy)) { x_old = x_old.shl(i / 2); break; } - // Use the Babylonian method to arrive at the integer square root: + // Use the Babylonian method to arrive at the integer square root: for (;;) { x_new = (this->udiv(x_old) + x_old).udiv(two); if (x_old.ule(x_new)) @@ -1369,9 +1408,9 @@ APInt APInt::sqrt() const { } // Make sure we return the closest approximation - // NOTE: The rounding calculation below is correct. It will produce an + // NOTE: The rounding calculation below is correct. It will produce an // off-by-one discrepancy with results from pari/gp. That discrepancy has been - // determined to be a rounding issue with pari/gp as it begins to use a + // determined to be a rounding issue with pari/gp as it begins to use a // floating point representation after 192 bits. There are no discrepancies // between this algorithm and pari/gp for bit widths < 192 bits. APInt square(x_old * x_old); @@ -1386,7 +1425,7 @@ APInt APInt::sqrt() const { else return x_old + 1; } else - assert(0 && "Error in APInt::sqrt computation"); + llvm_unreachable("Error in APInt::sqrt computation"); return x_old + 1; } @@ -1409,7 +1448,7 @@ APInt APInt::multiplicativeInverse(const APInt& modulo) const { APInt r[2] = { modulo, *this }; APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; APInt q(BitWidth, 0); - + unsigned i; for (i = 0; r[i^1] != 0; i ^= 1) { // An overview of the math without the confusing bit-flipping: @@ -1442,11 +1481,9 @@ APInt::ms APInt::magic() const { const APInt& d = *this; unsigned p; APInt ad, anc, delta, q1, r1, q2, r2, t; - APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()); APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); - APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); struct ms mag; - + ad = d.abs(); t = signedMin + (d.lshr(d.getBitWidth() - 1)); anc = t - 1 - t.urem(ad); // absolute value of nc @@ -1471,7 +1508,7 @@ APInt::ms APInt::magic() const { } delta = ad - r2; } while (q1.ule(delta) || (q1 == delta && r1 == 0)); - + mag.m = q2 + 1; if (d.isNegative()) mag.m = -mag.m; // resulting magic number mag.s = p - d.getBitWidth(); // resulting shift @@ -1543,17 +1580,17 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, uint64_t b = uint64_t(1) << 32; #if 0 - DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n'); - DEBUG(cerr << "KnuthDiv: original:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]); - DEBUG(cerr << " by"); - DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]); - DEBUG(cerr << '\n'); + DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); + DEBUG(errs() << "KnuthDiv: original:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); + DEBUG(errs() << " by"); + DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]); + DEBUG(errs() << '\n'); #endif - // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of - // u and v by d. Note that we have taken Knuth's advice here to use a power - // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of - // 2 allows us to shift instead of multiply and it is easy to determine the + // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of + // u and v by d. Note that we have taken Knuth's advice here to use a power + // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of + // 2 allows us to shift instead of multiply and it is easy to determine the // shift amount from the leading zeros. We are basically normalizing the u // and v so that its high bits are shifted to the top of v's range without // overflow. Note that this can require an extra word in u so that u must @@ -1575,27 +1612,27 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } u[m+n] = u_carry; #if 0 - DEBUG(cerr << "KnuthDiv: normal:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]); - DEBUG(cerr << " by"); - DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]); - DEBUG(cerr << '\n'); + DEBUG(errs() << "KnuthDiv: normal:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); + DEBUG(errs() << " by"); + DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]); + DEBUG(errs() << '\n'); #endif // D2. [Initialize j.] Set j to m. This is the loop counter over the places. int j = m; do { - DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n'); - // D3. [Calculate q'.]. + DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n'); + // D3. [Calculate q'.]. // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test // on v[n-2] determines at high speed most of the cases in which the trial - // value qp is one too large, and it eliminates all cases where qp is two - // too large. + // value qp is one too large, and it eliminates all cases where qp is two + // too large. uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]); - DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n'); + DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n'); uint64_t qp = dividend / v[n-1]; uint64_t rp = dividend % v[n-1]; if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { @@ -1604,20 +1641,20 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) qp--; } - DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); + DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation // consists of a simple multiplication by a one-place number, combined with - // a subtraction. + // a subtraction. bool isNeg = false; for (unsigned i = 0; i < n; ++i) { uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32); uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]); bool borrow = subtrahend > u_tmp; - DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp - << ", subtrahend == " << subtrahend - << ", borrow = " << borrow << '\n'); + DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp + << ", subtrahend == " << subtrahend + << ", borrow = " << borrow << '\n'); uint64_t result = u_tmp - subtrahend; unsigned k = j + i; @@ -1629,14 +1666,14 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, k++; } isNeg |= borrow; - DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << - u[j+i+1] << '\n'); + DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ", u[j+i+1] == " << + u[j+i+1] << '\n'); } - DEBUG(cerr << "KnuthDiv: after subtraction:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]); - DEBUG(cerr << '\n'); - // The digits (u[j+n]...u[j]) should be kept positive; if the result of - // this step is actually negative, (u[j+n]...u[j]) should be left as the + DEBUG(errs() << "KnuthDiv: after subtraction:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); + DEBUG(errs() << '\n'); + // The digits (u[j+n]...u[j]) should be kept positive; if the result of + // this step is actually negative, (u[j+n]...u[j]) should be left as the // true value plus b**(n+1), namely as the b's complement of // the true value, and a "borrow" to the left should be remembered. // @@ -1647,20 +1684,20 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, carry = carry && u[i] == 0; } } - DEBUG(cerr << "KnuthDiv: after complement:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]); - DEBUG(cerr << '\n'); + DEBUG(errs() << "KnuthDiv: after complement:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]); + DEBUG(errs() << '\n'); - // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was + // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was // negative, go to step D6; otherwise go on to step D7. q[j] = (unsigned)qp; if (isNeg) { - // D6. [Add back]. The probability that this step is necessary is very + // D6. [Add back]. The probability that this step is necessary is very // small, on the order of only 2/b. Make sure that test data accounts for - // this possibility. Decrease q[j] by 1 + // this possibility. Decrease q[j] by 1 q[j]--; - // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). - // A carry will occur to the left of u[j+n], and it should be ignored + // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). + // A carry will occur to the left of u[j+n], and it should be ignored // since it cancels with the borrow that occurred in D4. bool carry = false; for (unsigned i = 0; i < n; i++) { @@ -1670,16 +1707,16 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, } u[j+n] += carry; } - DEBUG(cerr << "KnuthDiv: after correction:"); - DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]); - DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n'); + DEBUG(errs() << "KnuthDiv: after correction:"); + DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]); + DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. } while (--j >= 0); - DEBUG(cerr << "KnuthDiv: quotient:"); - DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]); - DEBUG(cerr << '\n'); + DEBUG(errs() << "KnuthDiv: quotient:"); + DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]); + DEBUG(errs() << '\n'); // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired // remainder may be obtained by dividing u[...] by d. If r is non-null we @@ -1690,22 +1727,22 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // shift right here. In order to mak if (shift) { unsigned carry = 0; - DEBUG(cerr << "KnuthDiv: remainder:"); + DEBUG(errs() << "KnuthDiv: remainder:"); for (int i = n-1; i >= 0; i--) { r[i] = (u[i] >> shift) | carry; carry = u[i] << (32 - shift); - DEBUG(cerr << " " << r[i]); + DEBUG(errs() << " " << r[i]); } } else { for (int i = n-1; i >= 0; i--) { r[i] = u[i]; - DEBUG(cerr << " " << r[i]); + DEBUG(errs() << " " << r[i]); } } - DEBUG(cerr << '\n'); + DEBUG(errs() << '\n'); } #if 0 - DEBUG(cerr << std::setbase(10) << '\n'); + DEBUG(errs() << '\n'); #endif } @@ -1715,12 +1752,12 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, { assert(lhsWords >= rhsWords && "Fractional result"); - // First, compose the values into an array of 32-bit words instead of + // First, compose the values into an array of 32-bit words instead of // 64-bit words. This is a necessity of both the "short division" algorithm - // and the the Knuth "classical algorithm" which requires there to be native - // operations for +, -, and * on an m bit value with an m*2 bit result. We - // can't use 64-bit operands here because we don't have native results of - // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't + // and the the Knuth "classical algorithm" which requires there to be native + // operations for +, -, and * on an m bit value with an m*2 bit result. We + // can't use 64-bit operands here because we don't have native results of + // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't // work on large-endian machines. uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT); unsigned n = rhsWords * 2; @@ -1769,9 +1806,9 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, if (Remainder) memset(R, 0, n * sizeof(unsigned)); - // Now, adjust m and n for the Knuth division. n is the number of words in + // Now, adjust m and n for the Knuth division. n is the number of words in // the divisor. m is the number of words by which the dividend exceeds the - // divisor (i.e. m+n is the length of the dividend). These sizes must not + // divisor (i.e. m+n is the length of the dividend). These sizes must not // contain any zero words or the Knuth algorithm fails. for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { n--; @@ -1828,10 +1865,10 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, } else Quotient->clear(); - // The quotient is in Q. Reconstitute the quotient into Quotient's low + // The quotient is in Q. Reconstitute the quotient into Quotient's low // order words. if (lhsWords == 1) { - uint64_t tmp = + uint64_t tmp = uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2)); if (Quotient->isSingleWord()) Quotient->VAL = tmp; @@ -1840,7 +1877,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, } else { assert(!Quotient->isSingleWord() && "Quotient APInt not large enough"); for (unsigned i = 0; i < lhsWords; ++i) - Quotient->pVal[i] = + Quotient->pVal[i] = uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2)); } } @@ -1862,7 +1899,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, // The remainder is in R. Reconstitute the remainder into Remainder's low // order words. if (rhsWords == 1) { - uint64_t tmp = + uint64_t tmp = uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2)); if (Remainder->isSingleWord()) Remainder->VAL = tmp; @@ -1871,7 +1908,7 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, } else { assert(!Remainder->isSingleWord() && "Remainder APInt not large enough"); for (unsigned i = 0; i < rhsWords; ++i) - Remainder->pVal[i] = + Remainder->pVal[i] = uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2)); } } @@ -1902,9 +1939,9 @@ APInt APInt::udiv(const APInt& RHS) const { unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1); // Deal with some degenerate cases - if (!lhsWords) + if (!lhsWords) // 0 / X ===> 0 - return APInt(BitWidth, 0); + return APInt(BitWidth, 0); else if (lhsWords < rhsWords || this->ult(RHS)) { // X / Y ===> 0, iff X < Y return APInt(BitWidth, 0); @@ -1959,7 +1996,7 @@ APInt APInt::urem(const APInt& RHS) const { return Remainder; } -void APInt::udivrem(const APInt &LHS, const APInt &RHS, +void APInt::udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder) { // Get some size facts about the dividend and divisor unsigned lhsBits = LHS.getActiveBits(); @@ -1968,24 +2005,24 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1); // Check the degenerate cases - if (lhsWords == 0) { + if (lhsWords == 0) { Quotient = 0; // 0 / Y ===> 0 Remainder = 0; // 0 % Y ===> 0 return; - } - - if (lhsWords < rhsWords || LHS.ult(RHS)) { + } + + if (lhsWords < rhsWords || LHS.ult(RHS)) { Quotient = 0; // X / Y ===> 0, iff X < Y Remainder = LHS; // X % Y ===> X, iff X < Y return; - } - + } + if (LHS == RHS) { Quotient = 1; // X / X ===> 1 Remainder = 0; // X % X ===> 0; return; - } - + } + if (lhsWords == 1 && rhsWords == 1) { // There is only one word to consider so use the native versions. uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0]; @@ -1999,19 +2036,25 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS, divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder); } -void APInt::fromString(unsigned numbits, const char *str, unsigned slen, - uint8_t radix) { +void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) { // Check our assumptions here + assert(!str.empty() && "Invalid string length"); assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) && "Radix should be 2, 8, 10, or 16!"); - assert(str && "String is null?"); - bool isNeg = str[0] == '-'; - if (isNeg) - str++, slen--; + + StringRef::iterator p = str.begin(); + size_t slen = str.size(); + bool isNeg = *p == '-'; + if (*p == '-' || *p == '+') { + p++; + slen--; + assert(slen && "String is only a sign, needs a value."); + } assert((slen <= numbits || radix != 2) && "Insufficient bit width"); assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); - assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width"); + assert((((slen-1)*64)/22 <= numbits || radix != 10) + && "Insufficient bit width"); // Allocate memory if (!isSingleWord()) @@ -2026,30 +2069,9 @@ void APInt::fromString(unsigned numbits, const char *str, unsigned slen, APInt apradix(getBitWidth(), radix); // Enter digit traversal loop - for (unsigned i = 0; i < slen; i++) { - // Get a digit - unsigned digit = 0; - char cdigit = str[i]; - if (radix == 16) { - if (!isxdigit(cdigit)) - assert(0 && "Invalid hex digit in string"); - if (isdigit(cdigit)) - digit = cdigit - '0'; - else if (cdigit >= 'a') - digit = cdigit - 'a' + 10; - else if (cdigit >= 'A') - digit = cdigit - 'A' + 10; - else - assert(0 && "huh? we shouldn't get here"); - } else if (isdigit(cdigit)) { - digit = cdigit - '0'; - assert((radix == 10 || - (radix == 8 && digit != 8 && digit != 9) || - (radix == 2 && (digit == 0 || digit == 1))) && - "Invalid digit in string for given radix"); - } else { - assert(0 && "Invalid character in digit string"); - } + for (StringRef::iterator e = str.end(); p != e; ++p) { + unsigned digit = getDigit(*p, radix); + assert(digit < radix && "Invalid character in digit string"); // Shift or multiply the value by the radix if (slen > 1) { @@ -2077,19 +2099,19 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, bool Signed) const { assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) && "Radix should be 2, 8, 10, or 16!"); - + // First, check for a zero value and just short circuit the logic below. if (*this == 0) { Str.push_back('0'); return; } - + static const char Digits[] = "0123456789ABCDEF"; - + if (isSingleWord()) { char Buffer[65]; char *BufPtr = Buffer+65; - + uint64_t N; if (Signed) { int64_t I = getSExtValue(); @@ -2101,7 +2123,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, } else { N = getZExtValue(); } - + while (N) { *--BufPtr = Digits[N % Radix]; N /= Radix; @@ -2111,7 +2133,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, } APInt Tmp(*this); - + if (Signed && isNegative()) { // They want to print the signed version and it is a negative value // Flip the bits and add one to turn it into the equivalent positive @@ -2120,18 +2142,18 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, Tmp++; Str.push_back('-'); } - + // We insert the digits backward, then reverse them to get the right order. unsigned StartDig = Str.size(); - - // For the 2, 8 and 16 bit cases, we can just shift instead of divide - // because the number of bits per digit (1, 3 and 4 respectively) divides + + // For the 2, 8 and 16 bit cases, we can just shift instead of divide + // because the number of bits per digit (1, 3 and 4 respectively) divides // equaly. We just shift until the value is zero. if (Radix != 10) { // Just shift tmp right for each digit width until it becomes zero unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); unsigned MaskAmt = Radix - 1; - + while (Tmp != 0) { unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; Str.push_back(Digits[Digit]); @@ -2142,7 +2164,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, while (Tmp != 0) { APInt APdigit(1, 0); APInt tmp2(Tmp.getBitWidth(), 0); - divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, + divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2, &APdigit); unsigned Digit = (unsigned)APdigit.getZExtValue(); assert(Digit < Radix && "divide failed"); @@ -2150,7 +2172,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, Tmp = tmp2; } } - + // Reverse the digits before returning. std::reverse(Str.begin()+StartDig, Str.end()); } @@ -2161,7 +2183,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { SmallString<40> S; toString(S, Radix, Signed); - return S.c_str(); + return S.str(); } @@ -2169,26 +2191,21 @@ void APInt::dump() const { SmallString<40> S, U; this->toStringUnsigned(U); this->toStringSigned(S); - fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str()); + errs() << "APInt(" << BitWidth << "b, " + << U.str() << "u " << S.str() << "s)"; } void APInt::print(raw_ostream &OS, bool isSigned) const { SmallString<40> S; this->toString(S, 10, isSigned); - OS << S.c_str(); -} - -std::ostream &llvm::operator<<(std::ostream &o, const APInt &I) { - raw_os_ostream OS(o); - OS << I; - return o; + OS << S.str(); } // This implements a variety of operations on a representation of // arbitrary precision, two's-complement, bignum integer values. -/* Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe - and unrestricting assumption. */ +// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe +// and unrestricting assumption. #define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1] COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0); diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index db0d8f31e55dc..7a3fd87c17eef 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -12,130 +12,160 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Allocator.h" -#include "llvm/Support/Recycler.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/Streams.h" -#include -using namespace llvm; +#include "llvm/Support/Recycler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Memory.h" +#include -//===----------------------------------------------------------------------===// -// MemRegion class implementation -//===----------------------------------------------------------------------===// +namespace llvm { -namespace { -/// MemRegion - This is one chunk of the BumpPtrAllocator. -class MemRegion { - unsigned RegionSize; - MemRegion *Next; - char *NextPtr; -public: - void Init(unsigned size, unsigned Alignment, MemRegion *next) { - RegionSize = size; - Next = next; - NextPtr = (char*)(this+1); - - // Align NextPtr. - NextPtr = (char*)((intptr_t)(NextPtr+Alignment-1) & - ~(intptr_t)(Alignment-1)); - } - - const MemRegion *getNext() const { return Next; } - unsigned getNumBytesAllocated() const { - return NextPtr-(const char*)this; - } - - /// Allocate - Allocate and return at least the specified number of bytes. - /// - void *Allocate(size_t AllocSize, size_t Alignment, MemRegion **RegPtr) { - - char* Result = (char*) (((uintptr_t) (NextPtr+Alignment-1)) - & ~((uintptr_t) Alignment-1)); - - // Speculate the new value of NextPtr. - char* NextPtrTmp = Result + AllocSize; - - // If we are still within the current region, return Result. - if (unsigned (NextPtrTmp - (char*) this) <= RegionSize) { - NextPtr = NextPtrTmp; - return Result; - } - - // Otherwise, we have to allocate a new chunk. Create one twice as big as - // this one. - MemRegion *NewRegion = (MemRegion *)malloc(RegionSize*2); - NewRegion->Init(RegionSize*2, Alignment, this); - - // Update the current "first region" pointer to point to the new region. - *RegPtr = NewRegion; - - // Try allocating from it now. - return NewRegion->Allocate(AllocSize, Alignment, RegPtr); - } - - /// Deallocate - Recursively release all memory for this and its next regions - /// to the system. - void Deallocate() { - MemRegion *next = Next; - free(this); - if (next) - next->Deallocate(); - } +BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold, + SlabAllocator &allocator) + : SlabSize(size), SizeThreshold(threshold), Allocator(allocator), + CurSlab(0), BytesAllocated(0) { + StartNewSlab(); +} - /// DeallocateAllButLast - Recursively release all memory for this and its - /// next regions to the system stopping at the last region in the list. - /// Returns the pointer to the last region. - MemRegion *DeallocateAllButLast() { - MemRegion *next = Next; - if (!next) - return this; - free(this); - return next->DeallocateAllButLast(); - } -}; +BumpPtrAllocator::~BumpPtrAllocator() { + DeallocateSlabs(CurSlab); } -//===----------------------------------------------------------------------===// -// BumpPtrAllocator class implementation -//===----------------------------------------------------------------------===// +/// AlignPtr - Align Ptr to Alignment bytes, rounding up. Alignment should +/// be a power of two. This method rounds up, so AlignPtr(7, 4) == 8 and +/// AlignPtr(8, 4) == 8. +char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) { + assert(Alignment && (Alignment & (Alignment - 1)) == 0 && + "Alignment is not a power of two!"); -BumpPtrAllocator::BumpPtrAllocator() { - TheMemory = malloc(4096); - ((MemRegion*)TheMemory)->Init(4096, 1, 0); + // Do the alignment. + return (char*)(((uintptr_t)Ptr + Alignment - 1) & + ~(uintptr_t)(Alignment - 1)); } -BumpPtrAllocator::~BumpPtrAllocator() { - ((MemRegion*)TheMemory)->Deallocate(); +/// StartNewSlab - Allocate a new slab and move the bump pointers over into +/// the new slab. Modifies CurPtr and End. +void BumpPtrAllocator::StartNewSlab() { + MemSlab *NewSlab = Allocator.Allocate(SlabSize); + NewSlab->NextPtr = CurSlab; + CurSlab = NewSlab; + CurPtr = (char*)(CurSlab + 1); + End = ((char*)CurSlab) + CurSlab->Size; +} + +/// DeallocateSlabs - Deallocate all memory slabs after and including this +/// one. +void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) { + while (Slab) { + MemSlab *NextSlab = Slab->NextPtr; +#ifndef NDEBUG + // Poison the memory so stale pointers crash sooner. Note we must + // preserve the Size and NextPtr fields at the beginning. + sys::Memory::setRangeWritable(Slab + 1, Slab->Size - sizeof(MemSlab)); + memset(Slab + 1, 0xCD, Slab->Size - sizeof(MemSlab)); +#endif + Allocator.Deallocate(Slab); + Slab = NextSlab; + } } +/// Reset - Deallocate all but the current slab and reset the current pointer +/// to the beginning of it, freeing all memory allocated so far. void BumpPtrAllocator::Reset() { - MemRegion *MRP = (MemRegion*)TheMemory; - MRP = MRP->DeallocateAllButLast(); - MRP->Init(4096, 1, 0); - TheMemory = MRP; + DeallocateSlabs(CurSlab->NextPtr); + CurSlab->NextPtr = 0; + CurPtr = (char*)(CurSlab + 1); + End = ((char*)CurSlab) + CurSlab->Size; } -void *BumpPtrAllocator::Allocate(size_t Size, size_t Align) { - MemRegion *MRP = (MemRegion*)TheMemory; - void *Ptr = MRP->Allocate(Size, Align, &MRP); - TheMemory = MRP; +/// Allocate - Allocate space at the specified alignment. +/// +void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) { + // Keep track of how many bytes we've allocated. + BytesAllocated += Size; + + // 0-byte alignment means 1-byte alignment. + if (Alignment == 0) Alignment = 1; + + // Allocate the aligned space, going forwards from CurPtr. + char *Ptr = AlignPtr(CurPtr, Alignment); + + // Check if we can hold it. + if (Ptr + Size <= End) { + CurPtr = Ptr + Size; + return Ptr; + } + + // If Size is really big, allocate a separate slab for it. + size_t PaddedSize = Size + sizeof(MemSlab) + Alignment - 1; + if (PaddedSize > SizeThreshold) { + MemSlab *NewSlab = Allocator.Allocate(PaddedSize); + + // Put the new slab after the current slab, since we are not allocating + // into it. + NewSlab->NextPtr = CurSlab->NextPtr; + CurSlab->NextPtr = NewSlab; + + Ptr = AlignPtr((char*)(NewSlab + 1), Alignment); + assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size); + return Ptr; + } + + // Otherwise, start a new slab and try again. + StartNewSlab(); + Ptr = AlignPtr(CurPtr, Alignment); + CurPtr = Ptr + Size; + assert(CurPtr <= End && "Unable to allocate memory!"); return Ptr; } +unsigned BumpPtrAllocator::GetNumSlabs() const { + unsigned NumSlabs = 0; + for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) { + ++NumSlabs; + } + return NumSlabs; +} + void BumpPtrAllocator::PrintStats() const { - unsigned BytesUsed = 0; - unsigned NumRegions = 0; - const MemRegion *R = (MemRegion*)TheMemory; - for (; R; R = R->getNext(), ++NumRegions) - BytesUsed += R->getNumBytesAllocated(); - - cerr << "\nNumber of memory regions: " << NumRegions << "\n"; - cerr << "Bytes allocated: " << BytesUsed << "\n"; + unsigned NumSlabs = 0; + size_t TotalMemory = 0; + for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) { + TotalMemory += Slab->Size; + ++NumSlabs; + } + + errs() << "\nNumber of memory regions: " << NumSlabs << '\n' + << "Bytes used: " << BytesAllocated << '\n' + << "Bytes allocated: " << TotalMemory << '\n' + << "Bytes wasted: " << (TotalMemory - BytesAllocated) + << " (includes alignment, etc)\n"; +} + +MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator = + MallocSlabAllocator(); + +SlabAllocator::~SlabAllocator() { } + +MallocSlabAllocator::~MallocSlabAllocator() { } + +MemSlab *MallocSlabAllocator::Allocate(size_t Size) { + MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0); + Slab->Size = Size; + Slab->NextPtr = 0; + return Slab; +} + +void MallocSlabAllocator::Deallocate(MemSlab *Slab) { + Allocator.Deallocate(Slab); +} + +void PrintRecyclerStats(size_t Size, + size_t Align, + size_t FreeListSize) { + errs() << "Recycler element size: " << Size << '\n' + << "Recycler element alignment: " << Align << '\n' + << "Number of elements free for recycling: " << FreeListSize << '\n'; } -void llvm::PrintRecyclerStats(size_t Size, - size_t Align, - size_t FreeListSize) { - cerr << "Recycler element size: " << Size << '\n'; - cerr << "Recycler element alignment: " << Align << '\n'; - cerr << "Number of elements free for recycling: " << FreeListSize << '\n'; } diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index f26c2c0f4ecac..cd355ffe3604d 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -3,32 +3,43 @@ add_llvm_library(LLVMSupport APInt.cpp APSInt.cpp Allocator.cpp - Annotation.cpp CommandLine.cpp ConstantRange.cpp Debug.cpp Dwarf.cpp + ErrorHandling.cpp FileUtilities.cpp FoldingSet.cpp + FormattedStream.cpp GraphWriter.cpp IsInf.cpp IsNAN.cpp ManagedStatic.cpp MemoryBuffer.cpp + MemoryObject.cpp PluginLoader.cpp PrettyStackTrace.cpp + Regex.cpp SlowOperationInformer.cpp SmallPtrSet.cpp SourceMgr.cpp Statistic.cpp - Streams.cpp StringExtras.cpp StringMap.cpp StringPool.cpp + StringRef.cpp SystemUtils.cpp + TargetRegistry.cpp Timer.cpp Triple.cpp + Twine.cpp + raw_os_ostream.cpp raw_ostream.cpp + regcomp.c + regerror.c + regexec.c + regfree.c + regstrlcpy.c ) target_link_libraries (LLVMSupport LLVMSystem) diff --git a/lib/Support/COPYRIGHT.regex b/lib/Support/COPYRIGHT.regex new file mode 100644 index 0000000000000..a6392fd37c3df --- /dev/null +++ b/lib/Support/COPYRIGHT.regex @@ -0,0 +1,54 @@ +$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $ + +Copyright 1992, 1993, 1994 Henry Spencer. All rights reserved. +This software is not subject to any license of the American Telephone +and Telegraph Company or of the Regents of the University of California. + +Permission is granted to anyone to use this software for any purpose on +any computer system, and to alter it and redistribute it, subject +to the following restrictions: + +1. The author is not responsible for the consequences of use of this + software, no matter how awful, even if they arise from flaws in it. + +2. The origin of this software must not be misrepresented, either by + explicit claim or by omission. Since few users ever read sources, + credits must appear in the documentation. + +3. Altered versions must be plainly marked as such, and must not be + misrepresented as being the original software. Since few users + ever read sources, credits must appear in the documentation. + +4. This notice may not be removed or altered. + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)COPYRIGHT 8.1 (Berkeley) 3/16/94 + */ diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 4922560200a02..626daa254dd7b 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -16,22 +16,22 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Config/config.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/System/Host.h" #include "llvm/System/Path.h" -#include -#include -#include -#include -#include -#include +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Config/config.h" #include -#include -#include +#include using namespace llvm; using namespace cl; @@ -105,10 +105,10 @@ void Option::addArgument() { /// GetOptionInfo - Scan the list of registered options, turning them into data /// structures that are easier to handle. -static void GetOptionInfo(std::vector &PositionalOpts, - std::vector &SinkOpts, - std::map &OptionsMap) { - std::vector OptionNames; +static void GetOptionInfo(SmallVectorImpl &PositionalOpts, + SmallVectorImpl &SinkOpts, + StringMap &OptionsMap) { + SmallVector OptionNames; Option *CAOpt = 0; // The ConsumeAfter option if it exists. for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) { // If this option wants to handle multiple option names, get the full set. @@ -120,9 +120,8 @@ static void GetOptionInfo(std::vector &PositionalOpts, // Handle named options. for (size_t i = 0, e = OptionNames.size(); i != e; ++i) { // Add argument to the argument map! - if (!OptionsMap.insert(std::pair(OptionNames[i], - O)).second) { - cerr << ProgramName << ": CommandLine Error: Argument '" + if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) { + errs() << ProgramName << ": CommandLine Error: Argument '" << OptionNames[i] << "' defined more than once!\n"; } } @@ -151,29 +150,39 @@ static void GetOptionInfo(std::vector &PositionalOpts, /// LookupOption - Lookup the option specified by the specified option on the /// command line. If there is a value specified (after an equal sign) return -/// that as well. -static Option *LookupOption(const char *&Arg, const char *&Value, - std::map &OptionsMap) { - while (*Arg == '-') ++Arg; // Eat leading dashes - - const char *ArgEnd = Arg; - while (*ArgEnd && *ArgEnd != '=') - ++ArgEnd; // Scan till end of argument name. +/// that as well. This assumes that leading dashes have already been stripped. +static Option *LookupOption(StringRef &Arg, StringRef &Value, + const StringMap &OptionsMap) { + // Reject all dashes. + if (Arg.empty()) return 0; + + size_t EqualPos = Arg.find('='); + + // If we have an equals sign, remember the value. + if (EqualPos == StringRef::npos) { + // Look up the option. + StringMap::const_iterator I = OptionsMap.find(Arg); + return I != OptionsMap.end() ? I->second : 0; + } - if (*ArgEnd == '=') // If we have an equals sign... - Value = ArgEnd+1; // Get the value, not the equals + // If the argument before the = is a valid option name, we match. If not, + // return Arg unmolested. + StringMap::const_iterator I = + OptionsMap.find(Arg.substr(0, EqualPos)); + if (I == OptionsMap.end()) return 0; + + Value = Arg.substr(EqualPos+1); + Arg = Arg.substr(0, EqualPos); + return I->second; +} - if (*Arg == 0) return 0; - // Look up the option. - std::map::iterator I = - OptionsMap.find(std::string(Arg, ArgEnd)); - return I != OptionsMap.end() ? I->second : 0; -} - -static inline bool ProvideOption(Option *Handler, const char *ArgName, - const char *Value, int argc, char **argv, +/// ProvideOption - For Value, this differentiates between an empty value ("") +/// and a null value (StringRef()). The later is accepted for arguments that +/// don't allow a value (-foo) the former is rejected (-foo=). +static inline bool ProvideOption(Option *Handler, StringRef ArgName, + StringRef Value, int argc, char **argv, int &i) { // Is this a multi-argument option? unsigned NumAdditionalVals = Handler->getNumAdditionalVals(); @@ -181,68 +190,62 @@ static inline bool ProvideOption(Option *Handler, const char *ArgName, // Enforce value requirements switch (Handler->getValueExpectedFlag()) { case ValueRequired: - if (Value == 0) { // No value specified? - if (i+1 < argc) { // Steal the next argument, like for '-o filename' - Value = argv[++i]; - } else { - return Handler->error(" requires a value!"); - } + if (Value.data() == 0) { // No value specified? + if (i+1 >= argc) + return Handler->error("requires a value!"); + // Steal the next argument, like for '-o filename' + Value = argv[++i]; } break; case ValueDisallowed: if (NumAdditionalVals > 0) - return Handler->error(": multi-valued option specified" - " with ValueDisallowed modifier!"); + return Handler->error("multi-valued option specified" + " with ValueDisallowed modifier!"); - if (Value) - return Handler->error(" does not allow a value! '" + - std::string(Value) + "' specified."); + if (Value.data()) + return Handler->error("does not allow a value! '" + + Twine(Value) + "' specified."); break; case ValueOptional: break; + default: - cerr << ProgramName + errs() << ProgramName << ": Bad ValueMask flag! CommandLine usage error:" << Handler->getValueExpectedFlag() << "\n"; - abort(); - break; + llvm_unreachable(0); } // If this isn't a multi-arg option, just run the handler. - if (NumAdditionalVals == 0) { - return Handler->addOccurrence(i, ArgName, Value ? Value : ""); - } + if (NumAdditionalVals == 0) + return Handler->addOccurrence(i, ArgName, Value); + // If it is, run the handle several times. - else { - bool MultiArg = false; - - if (Value) { - if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) - return true; - --NumAdditionalVals; - MultiArg = true; - } + bool MultiArg = false; - while (NumAdditionalVals > 0) { + if (Value.data()) { + if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) + return true; + --NumAdditionalVals; + MultiArg = true; + } - if (i+1 < argc) { - Value = argv[++i]; - } else { - return Handler->error(": not enough values!"); - } - if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) - return true; - MultiArg = true; - --NumAdditionalVals; - } - return false; + while (NumAdditionalVals > 0) { + if (i+1 >= argc) + return Handler->error("not enough values!"); + Value = argv[++i]; + + if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) + return true; + MultiArg = true; + --NumAdditionalVals; } + return false; } -static bool ProvidePositionalOption(Option *Handler, const std::string &Arg, - int i) { +static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { int Dummy = i; - return ProvideOption(Handler, Handler->ArgStr, Arg.c_str(), 0, 0, Dummy); + return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy); } @@ -260,33 +263,78 @@ static inline bool isPrefixedOrGrouping(const Option *O) { // see if there options that satisfy the predicate. If we find one, return it, // otherwise return null. // -static Option *getOptionPred(std::string Name, size_t &Length, +static Option *getOptionPred(StringRef Name, size_t &Length, bool (*Pred)(const Option*), - std::map &OptionsMap) { + const StringMap &OptionsMap) { - std::map::iterator OMI = OptionsMap.find(Name); - if (OMI != OptionsMap.end() && Pred(OMI->second)) { - Length = Name.length(); - return OMI->second; - } + StringMap::const_iterator OMI = OptionsMap.find(Name); - if (Name.size() == 1) return 0; - do { - Name.erase(Name.end()-1, Name.end()); // Chop off the last character... + // Loop while we haven't found an option and Name still has at least two + // characters in it (so that the next iteration will not be the empty + // string. + while (OMI == OptionsMap.end() && Name.size() > 1) { + Name = Name.substr(0, Name.size()-1); // Chop off the last character. OMI = OptionsMap.find(Name); - - // Loop while we haven't found an option and Name still has at least two - // characters in it (so that the next iteration will not be the empty - // string... - } while ((OMI == OptionsMap.end() || !Pred(OMI->second)) && Name.size() > 1); + } if (OMI != OptionsMap.end() && Pred(OMI->second)) { - Length = Name.length(); + Length = Name.size(); return OMI->second; // Found one! } return 0; // No option found! } +/// HandlePrefixedOrGroupedOption - The specified argument string (which started +/// with at least one '-') does not fully match an available option. Check to +/// see if this is a prefix or grouped option. If so, split arg into output an +/// Arg/Value pair and return the Option to parse it with. +static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, + bool &ErrorParsing, + const StringMap &OptionsMap) { + if (Arg.size() == 1) return 0; + + // Do the lookup! + size_t Length = 0; + Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap); + if (PGOpt == 0) return 0; + + // If the option is a prefixed option, then the value is simply the + // rest of the name... so fall through to later processing, by + // setting up the argument name flags and value fields. + if (PGOpt->getFormattingFlag() == cl::Prefix) { + Value = Arg.substr(Length); + Arg = Arg.substr(0, Length); + assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt); + return PGOpt; + } + + // This must be a grouped option... handle them now. Grouping options can't + // have values. + assert(isGrouping(PGOpt) && "Broken getOptionPred!"); + + do { + // Move current arg name out of Arg into OneArgName. + StringRef OneArgName = Arg.substr(0, Length); + Arg = Arg.substr(Length); + + // Because ValueRequired is an invalid flag for grouped arguments, + // we don't need to pass argc/argv in. + assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired && + "Option can not be cl::Grouping AND cl::ValueRequired!"); + int Dummy; + ErrorParsing |= ProvideOption(PGOpt, OneArgName, + StringRef(), 0, 0, Dummy); + + // Get the next grouping option. + PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap); + } while (PGOpt && Length != Arg.size()); + + // Return the last option with Arg cut down to just the last one. + return PGOpt; +} + + + static bool RequiresValue(const Option *O) { return O->getNumOccurrencesFlag() == cl::Required || O->getNumOccurrencesFlag() == cl::OneOrMore; @@ -300,45 +348,35 @@ static bool EatsUnboundedNumberOfValues(const Option *O) { /// ParseCStringVector - Break INPUT up wherever one or more /// whitespace characters are found, and store the resulting tokens in /// OUTPUT. The tokens stored in OUTPUT are dynamically allocated -/// using strdup (), so it is the caller's responsibility to free () +/// using strdup(), so it is the caller's responsibility to free() /// them later. /// -static void ParseCStringVector(std::vector &output, - const char *input) { +static void ParseCStringVector(std::vector &OutputVector, + const char *Input) { // Characters which will be treated as token separators: - static const char *const delims = " \v\f\t\r\n"; - - std::string work (input); - // Skip past any delims at head of input string. - size_t pos = work.find_first_not_of (delims); - // If the string consists entirely of delims, then exit early. - if (pos == std::string::npos) return; - // Otherwise, jump forward to beginning of first word. - work = work.substr (pos); - // Find position of first delimiter. - pos = work.find_first_of (delims); - - while (!work.empty() && pos != std::string::npos) { - // Everything from 0 to POS is the next word to copy. - output.push_back (strdup (work.substr (0,pos).c_str ())); - // Is there another word in the string? - size_t nextpos = work.find_first_not_of (delims, pos + 1); - if (nextpos != std::string::npos) { - // Yes? Then remove delims from beginning ... - work = work.substr (work.find_first_not_of (delims, pos + 1)); - // and find the end of the word. - pos = work.find_first_of (delims); - } else { - // No? (Remainder of string is delims.) End the loop. - work = ""; - pos = std::string::npos; + StringRef Delims = " \v\f\t\r\n"; + + StringRef WorkStr(Input); + while (!WorkStr.empty()) { + // If the first character is a delimiter, strip them off. + if (Delims.find(WorkStr[0]) != StringRef::npos) { + size_t Pos = WorkStr.find_first_not_of(Delims); + if (Pos == StringRef::npos) Pos = WorkStr.size(); + WorkStr = WorkStr.substr(Pos); + continue; } - } - - // If `input' ended with non-delim char, then we'll get here with - // the last word of `input' in `work'; copy it now. - if (!work.empty ()) { - output.push_back (strdup (work.c_str ())); + + // Find position of first delimiter. + size_t Pos = WorkStr.find_first_of(Delims); + if (Pos == StringRef::npos) Pos = WorkStr.size(); + + // Everything from 0 to Pos is the next word to copy. + char *NewStr = (char*)malloc(Pos+1); + memcpy(NewStr, WorkStr.data(), Pos); + NewStr[Pos] = 0; + OutputVector.push_back(NewStr); + + WorkStr = WorkStr.substr(Pos); } } @@ -372,20 +410,19 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar, // Free all the strdup()ed strings. for (std::vector::iterator i = newArgv.begin(), e = newArgv.end(); i != e; ++i) - free (*i); + free(*i); } /// ExpandResponseFiles - Copy the contents of argv into newArgv, /// substituting the contents of the response files for the arguments /// of type @file. -static void ExpandResponseFiles(int argc, char** argv, +static void ExpandResponseFiles(unsigned argc, char** argv, std::vector& newArgv) { - for (int i = 1; i != argc; ++i) { - char* arg = argv[i]; + for (unsigned i = 1; i != argc; ++i) { + char *arg = argv[i]; if (arg[0] == '@') { - sys::PathWithStatus respFile(++arg); // Check that the response file is not empty (mmap'ing empty @@ -418,9 +455,9 @@ static void ExpandResponseFiles(int argc, char** argv, void cl::ParseCommandLineOptions(int argc, char **argv, const char *Overview, bool ReadResponseFiles) { // Process all registered options. - std::vector PositionalOpts; - std::vector SinkOpts; - std::map Opts; + SmallVector PositionalOpts; + SmallVector SinkOpts; + StringMap Opts; GetOptionInfo(PositionalOpts, SinkOpts, Opts); assert((!Opts.empty() || !PositionalOpts.empty()) && @@ -469,7 +506,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // unless there is only one positional argument... if (PositionalOpts.size() > 2) ErrorParsing |= - Opt->error(" error - this positional option will never be matched, " + Opt->error("error - this positional option will never be matched, " "because it does not Require a value, and a " "cl::ConsumeAfter option is active!"); } else if (UnboundedFound && !Opt->ArgStr[0]) { @@ -477,7 +514,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // not specified after an option that eats all extra arguments, or this // one will never get any! // - ErrorParsing |= Opt->error(" error - option can never match, because " + ErrorParsing |= Opt->error("error - option can never match, because " "another positional argument will match an " "unbounded number of values, and this option" " does not require a value!"); @@ -488,9 +525,9 @@ void cl::ParseCommandLineOptions(int argc, char **argv, } // PositionalVals - A vector of "positional" arguments we accumulate into - // the process at the end... + // the process at the end. // - std::vector > PositionalVals; + SmallVector, 4> PositionalVals; // If the program has named positional arguments, and the name has been run // across, keep track of which positional argument was named. Otherwise put @@ -501,8 +538,8 @@ void cl::ParseCommandLineOptions(int argc, char **argv, bool DashDashFound = false; // Have we read '--'? for (int i = 1; i < argc; ++i) { Option *Handler = 0; - const char *Value = 0; - const char *ArgName = ""; + StringRef Value; + StringRef ArgName = ""; // If the option list changed, this means that some command line // option has just been registered or deregistered. This can occur in @@ -524,7 +561,9 @@ void cl::ParseCommandLineOptions(int argc, char **argv, if (ActivePositionalArg) { ProvidePositionalOption(ActivePositionalArg, argv[i], i); continue; // We are done! - } else if (!PositionalOpts.empty()) { + } + + if (!PositionalOpts.empty()) { PositionalVals.push_back(std::make_pair(argv[i],i)); // All of the positional arguments have been fulfulled, give the rest to @@ -550,69 +589,37 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // option is another positional argument. If so, treat it as an argument, // otherwise feed it to the eating positional. ArgName = argv[i]+1; + // Eat leading dashes. + while (!ArgName.empty() && ArgName[0] == '-') + ArgName = ArgName.substr(1); + Handler = LookupOption(ArgName, Value, Opts); if (!Handler || Handler->getFormattingFlag() != cl::Positional) { ProvidePositionalOption(ActivePositionalArg, argv[i], i); continue; // We are done! } - } else { // We start with a '-', must be an argument... + } else { // We start with a '-', must be an argument. ArgName = argv[i]+1; + // Eat leading dashes. + while (!ArgName.empty() && ArgName[0] == '-') + ArgName = ArgName.substr(1); + Handler = LookupOption(ArgName, Value, Opts); // Check to see if this "option" is really a prefixed or grouped argument. - if (Handler == 0) { - std::string RealName(ArgName); - if (RealName.size() > 1) { - size_t Length = 0; - Option *PGOpt = getOptionPred(RealName, Length, isPrefixedOrGrouping, - Opts); - - // If the option is a prefixed option, then the value is simply the - // rest of the name... so fall through to later processing, by - // setting up the argument name flags and value fields. - // - if (PGOpt && PGOpt->getFormattingFlag() == cl::Prefix) { - Value = ArgName+Length; - assert(Opts.find(std::string(ArgName, Value)) != Opts.end() && - Opts.find(std::string(ArgName, Value))->second == PGOpt); - Handler = PGOpt; - } else if (PGOpt) { - // This must be a grouped option... handle them now. - assert(isGrouping(PGOpt) && "Broken getOptionPred!"); - - do { - // Move current arg name out of RealName into RealArgName... - std::string RealArgName(RealName.begin(), - RealName.begin() + Length); - RealName.erase(RealName.begin(), RealName.begin() + Length); - - // Because ValueRequired is an invalid flag for grouped arguments, - // we don't need to pass argc/argv in... - // - assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired && - "Option can not be cl::Grouping AND cl::ValueRequired!"); - int Dummy; - ErrorParsing |= ProvideOption(PGOpt, RealArgName.c_str(), - 0, 0, 0, Dummy); - - // Get the next grouping option... - PGOpt = getOptionPred(RealName, Length, isGrouping, Opts); - } while (PGOpt && Length != RealName.size()); - - Handler = PGOpt; // Ate all of the options. - } - } - } + if (Handler == 0) + Handler = HandlePrefixedOrGroupedOption(ArgName, Value, + ErrorParsing, Opts); } if (Handler == 0) { if (SinkOpts.empty()) { - cerr << ProgramName << ": Unknown command line argument '" + errs() << ProgramName << ": Unknown command line argument '" << argv[i] << "'. Try: '" << argv[0] << " --help'\n"; ErrorParsing = true; } else { - for (std::vector::iterator I = SinkOpts.begin(), + for (SmallVectorImpl::iterator I = SinkOpts.begin(), E = SinkOpts.end(); I != E ; ++I) (*I)->addOccurrence(i, "", argv[i]); } @@ -620,24 +627,23 @@ void cl::ParseCommandLineOptions(int argc, char **argv, } // Check to see if this option accepts a comma separated list of values. If - // it does, we have to split up the value into multiple values... - if (Value && Handler->getMiscFlags() & CommaSeparated) { - std::string Val(Value); - std::string::size_type Pos = Val.find(','); - - while (Pos != std::string::npos) { - // Process the portion before the comma... - ErrorParsing |= ProvideOption(Handler, ArgName, - std::string(Val.begin(), - Val.begin()+Pos).c_str(), + // it does, we have to split up the value into multiple values. + if (Handler->getMiscFlags() & CommaSeparated) { + StringRef Val(Value); + StringRef::size_type Pos = Val.find(','); + + while (Pos != StringRef::npos) { + // Process the portion before the comma. + ErrorParsing |= ProvideOption(Handler, ArgName, Val.substr(0, Pos), argc, argv, i); - // Erase the portion before the comma, AND the comma... - Val.erase(Val.begin(), Val.begin()+Pos+1); - Value += Pos+1; // Increment the original value pointer as well... + // Erase the portion before the comma, AND the comma. + Val = Val.substr(Pos+1); + Value.substr(Pos+1); // Increment the original value pointer as well. - // Check for another comma... + // Check for another comma. Pos = Val.find(','); } + Value = Val; } // If this is a named positional argument, just remember that it is the @@ -650,7 +656,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Check and handle positional arguments now... if (NumPositionalRequired > PositionalVals.size()) { - cerr << ProgramName + errs() << ProgramName << ": Not enough positional command line arguments specified!\n" << "Must specify at least " << NumPositionalRequired << " positional arguments: See: " << argv[0] << " --help\n"; @@ -658,14 +664,14 @@ void cl::ParseCommandLineOptions(int argc, char **argv, ErrorParsing = true; } else if (!HasUnlimitedPositionals && PositionalVals.size() > PositionalOpts.size()) { - cerr << ProgramName + errs() << ProgramName << ": Too many positional arguments specified!\n" << "Can specify at most " << PositionalOpts.size() << " positional arguments: See: " << argv[0] << " --help\n"; ErrorParsing = true; } else if (ConsumeAfterOpt == 0) { - // Positional args have already been handled if ConsumeAfter is specified... + // Positional args have already been handled if ConsumeAfter is specified. unsigned ValNo = 0, NumVals = static_cast(PositionalVals.size()); for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) { if (RequiresValue(PositionalOpts[i])) { @@ -693,7 +699,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, ValNo++; break; default: - assert(0 && "Internal error, unexpected NumOccurrences flag in " + llvm_unreachable("Internal error, unexpected NumOccurrences flag in " "positional argument processing!"); } } @@ -730,13 +736,13 @@ void cl::ParseCommandLineOptions(int argc, char **argv, } // Loop over args and make sure all required args are specified! - for (std::map::iterator I = Opts.begin(), + for (StringMap::iterator I = Opts.begin(), E = Opts.end(); I != E; ++I) { switch (I->second->getNumOccurrencesFlag()) { case Required: case OneOrMore: if (I->second->getNumOccurrences() == 0) { - I->second->error(" must be specified at least once!"); + I->second->error("must be specified at least once!"); ErrorParsing = true; } // Fall through @@ -756,7 +762,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Free all the strdup()ed strings. for (std::vector::iterator i = newArgv.begin(), e = newArgv.end(); i != e; ++i) - free (*i); + free(*i); } // If we had an error processing our arguments, don't let the program execute @@ -767,36 +773,35 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Option Base class implementation // -bool Option::error(std::string Message, const char *ArgName) { - if (ArgName == 0) ArgName = ArgStr; - if (ArgName[0] == 0) - cerr << HelpStr; // Be nice for positional arguments +bool Option::error(const Twine &Message, StringRef ArgName) { + if (ArgName.data() == 0) ArgName = ArgStr; + if (ArgName.empty()) + errs() << HelpStr; // Be nice for positional arguments else - cerr << ProgramName << ": for the -" << ArgName; + errs() << ProgramName << ": for the -" << ArgName; - cerr << " option: " << Message << "\n"; + errs() << " option: " << Message << "\n"; return true; } -bool Option::addOccurrence(unsigned pos, const char *ArgName, - const std::string &Value, - bool MultiArg) { +bool Option::addOccurrence(unsigned pos, StringRef ArgName, + StringRef Value, bool MultiArg) { if (!MultiArg) NumOccurrences++; // Increment the number of times we have been seen switch (getNumOccurrencesFlag()) { case Optional: if (NumOccurrences > 1) - return error(": may only occur zero or one times!", ArgName); + return error("may only occur zero or one times!", ArgName); break; case Required: if (NumOccurrences > 1) - return error(": must occur exactly one time!", ArgName); + return error("must occur exactly one time!", ArgName); // Fall through case OneOrMore: case ZeroOrMore: case ConsumeAfter: break; - default: return error(": bad num occurrences flag value!"); + default: return error("bad num occurrences flag value!"); } return handleOccurrence(pos, ArgName, Value); @@ -823,8 +828,8 @@ size_t alias::getOptionWidth() const { // Print out the option for the alias. void alias::printOptionInfo(size_t GlobalWidth) const { size_t L = std::strlen(ArgStr); - cout << " -" << ArgStr << std::string(GlobalWidth-L-6, ' ') << " - " - << HelpStr << "\n"; + errs() << " -" << ArgStr; + errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n"; } @@ -850,13 +855,12 @@ size_t basic_parser_impl::getOptionWidth(const Option &O) const { // void basic_parser_impl::printOptionInfo(const Option &O, size_t GlobalWidth) const { - cout << " -" << O.ArgStr; + outs() << " -" << O.ArgStr; if (const char *ValName = getValueName()) - cout << "=<" << getValueStr(O, ValName) << ">"; + outs() << "=<" << getValueStr(O, ValName) << '>'; - cout << std::string(GlobalWidth-getOptionWidth(O), ' ') << " - " - << O.HelpStr << "\n"; + outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n'; } @@ -864,81 +868,78 @@ void basic_parser_impl::printOptionInfo(const Option &O, // parser implementation // -bool parser::parse(Option &O, const char *ArgName, - const std::string &Arg, bool &Value) { +bool parser::parse(Option &O, StringRef ArgName, + StringRef Arg, bool &Value) { if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" || Arg == "1") { Value = true; - } else if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { + return false; + } + + if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { Value = false; - } else { - return O.error(": '" + Arg + - "' is invalid value for boolean argument! Try 0 or 1"); + return false; } - return false; + return O.error("'" + Arg + + "' is invalid value for boolean argument! Try 0 or 1"); } // parser implementation // -bool parser::parse(Option &O, const char *ArgName, - const std::string &Arg, boolOrDefault &Value) { +bool parser::parse(Option &O, StringRef ArgName, + StringRef Arg, boolOrDefault &Value) { if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" || Arg == "1") { Value = BOU_TRUE; - } else if (Arg == "false" || Arg == "FALSE" - || Arg == "False" || Arg == "0") { + return false; + } + if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { Value = BOU_FALSE; - } else { - return O.error(": '" + Arg + - "' is invalid value for boolean argument! Try 0 or 1"); + return false; } - return false; + + return O.error("'" + Arg + + "' is invalid value for boolean argument! Try 0 or 1"); } // parser implementation // -bool parser::parse(Option &O, const char *ArgName, - const std::string &Arg, int &Value) { - char *End; - Value = (int)strtol(Arg.c_str(), &End, 0); - if (*End != 0) - return O.error(": '" + Arg + "' value invalid for integer argument!"); +bool parser::parse(Option &O, StringRef ArgName, + StringRef Arg, int &Value) { + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for integer argument!"); return false; } // parser implementation // -bool parser::parse(Option &O, const char *ArgName, - const std::string &Arg, unsigned &Value) { - char *End; - errno = 0; - unsigned long V = strtoul(Arg.c_str(), &End, 0); - Value = (unsigned)V; - if (((V == ULONG_MAX) && (errno == ERANGE)) - || (*End != 0) - || (Value != V)) - return O.error(": '" + Arg + "' value invalid for uint argument!"); +bool parser::parse(Option &O, StringRef ArgName, + StringRef Arg, unsigned &Value) { + + if (Arg.getAsInteger(0, Value)) + return O.error("'" + Arg + "' value invalid for uint argument!"); return false; } // parser/parser implementation // -static bool parseDouble(Option &O, const std::string &Arg, double &Value) { - const char *ArgStart = Arg.c_str(); +static bool parseDouble(Option &O, StringRef Arg, double &Value) { + SmallString<32> TmpStr(Arg.begin(), Arg.end()); + const char *ArgStart = TmpStr.c_str(); char *End; Value = strtod(ArgStart, &End); if (*End != 0) - return O.error(": '" +Arg+ "' value invalid for floating point argument!"); + return O.error("'" + Arg + "' value invalid for floating point argument!"); return false; } -bool parser::parse(Option &O, const char *AN, - const std::string &Arg, double &Val) { +bool parser::parse(Option &O, StringRef ArgName, + StringRef Arg, double &Val) { return parseDouble(O, Arg, Val); } -bool parser::parse(Option &O, const char *AN, - const std::string &Arg, float &Val) { +bool parser::parse(Option &O, StringRef ArgName, + StringRef Arg, float &Val) { double dVal; if (parseDouble(O, Arg, dVal)) return true; @@ -955,14 +956,12 @@ bool parser::parse(Option &O, const char *AN, // argument string. If the option is not found, getNumOptions() is returned. // unsigned generic_parser_base::findOption(const char *Name) { - unsigned i = 0, e = getNumOptions(); - std::string N(Name); + unsigned e = getNumOptions(); - while (i != e) - if (getOption(i) == N) + for (unsigned i = 0; i != e; ++i) { + if (strcmp(getOption(i), Name) == 0) return i; - else - ++i; + } return e; } @@ -989,21 +988,21 @@ void generic_parser_base::printOptionInfo(const Option &O, size_t GlobalWidth) const { if (O.hasArgStr()) { size_t L = std::strlen(O.ArgStr); - cout << " -" << O.ArgStr << std::string(GlobalWidth-L-6, ' ') - << " - " << O.HelpStr << "\n"; + outs() << " -" << O.ArgStr; + outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n'; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8; - cout << " =" << getOption(i) << std::string(NumSpaces, ' ') - << " - " << getDescription(i) << "\n"; + outs() << " =" << getOption(i); + outs().indent(NumSpaces) << " - " << getDescription(i) << '\n'; } } else { if (O.HelpStr[0]) - cout << " " << O.HelpStr << "\n"; + outs() << " " << O.HelpStr << '\n'; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { size_t L = std::strlen(getOption(i)); - cout << " -" << getOption(i) << std::string(GlobalWidth-L-8, ' ') - << " - " << getDescription(i) << "\n"; + outs() << " -" << getOption(i); + outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n'; } } } @@ -1013,6 +1012,12 @@ void generic_parser_base::printOptionInfo(const Option &O, // --help and --help-hidden option implementation // +static int OptNameCompare(const void *LHS, const void *RHS) { + typedef std::pair pair_ty; + + return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first); +} + namespace { class HelpPrinter { @@ -1020,14 +1025,6 @@ class HelpPrinter { const Option *EmptyArg; const bool ShowHidden; - // isHidden/isReallyHidden - Predicates to be used to filter down arg lists. - inline static bool isHidden(std::pair &OptPair) { - return OptPair.second->getOptionHiddenFlag() >= Hidden; - } - inline static bool isReallyHidden(std::pair &OptPair) { - return OptPair.second->getOptionHiddenFlag() == ReallyHidden; - } - public: explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) { EmptyArg = 0; @@ -1037,34 +1034,40 @@ public: if (Value == false) return; // Get all the options. - std::vector PositionalOpts; - std::vector SinkOpts; - std::map OptMap; + SmallVector PositionalOpts; + SmallVector SinkOpts; + StringMap OptMap; GetOptionInfo(PositionalOpts, SinkOpts, OptMap); - // Copy Options into a vector so we can sort them as we like... - std::vector > Opts; - copy(OptMap.begin(), OptMap.end(), std::back_inserter(Opts)); - - // Eliminate Hidden or ReallyHidden arguments, depending on ShowHidden - Opts.erase(std::remove_if(Opts.begin(), Opts.end(), - std::ptr_fun(ShowHidden ? isReallyHidden : isHidden)), - Opts.end()); - - // Eliminate duplicate entries in table (from enum flags options, f.e.) - { // Give OptionSet a scope - std::set OptionSet; - for (unsigned i = 0; i != Opts.size(); ++i) - if (OptionSet.count(Opts[i].second) == 0) - OptionSet.insert(Opts[i].second); // Add new entry to set - else - Opts.erase(Opts.begin()+i--); // Erase duplicate + // Copy Options into a vector so we can sort them as we like. + SmallVector, 128> Opts; + SmallPtrSet OptionSet; // Duplicate option detection. + + for (StringMap::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) { + // Ignore really-hidden options. + if (I->second->getOptionHiddenFlag() == ReallyHidden) + continue; + + // Unless showhidden is set, ignore hidden flags. + if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden) + continue; + + // If we've already seen this option, don't add it to the list again. + if (!OptionSet.insert(I->second)) + continue; + + Opts.push_back(std::pair(I->getKey().data(), + I->second)); } + + // Sort the options list alphabetically. + qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare); if (ProgramOverview) - cout << "OVERVIEW: " << ProgramOverview << "\n"; + outs() << "OVERVIEW: " << ProgramOverview << "\n"; - cout << "USAGE: " << ProgramName << " [options]"; + outs() << "USAGE: " << ProgramName << " [options]"; // Print out the positional options. Option *CAOpt = 0; // The cl::ConsumeAfter option, if it exists... @@ -1074,28 +1077,28 @@ public: for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) { if (PositionalOpts[i]->ArgStr[0]) - cout << " --" << PositionalOpts[i]->ArgStr; - cout << " " << PositionalOpts[i]->HelpStr; + outs() << " --" << PositionalOpts[i]->ArgStr; + outs() << " " << PositionalOpts[i]->HelpStr; } // Print the consume after option info if it exists... - if (CAOpt) cout << " " << CAOpt->HelpStr; + if (CAOpt) outs() << " " << CAOpt->HelpStr; - cout << "\n\n"; + outs() << "\n\n"; // Compute the maximum argument length... MaxArgLen = 0; for (size_t i = 0, e = Opts.size(); i != e; ++i) MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); - cout << "OPTIONS:\n"; + outs() << "OPTIONS:\n"; for (size_t i = 0, e = Opts.size(); i != e; ++i) Opts[i].second->printOptionInfo(MaxArgLen); // Print any extra help the user has declared. for (std::vector::iterator I = MoreHelp->begin(), E = MoreHelp->end(); I != E; ++I) - cout << *I; + outs() << *I; MoreHelp->clear(); // Halt the program since help information was printed @@ -1120,37 +1123,64 @@ HHOp("help-hidden", cl::desc("Display all available options"), static void (*OverrideVersionPrinter)() = 0; +static int TargetArraySortFn(const void *LHS, const void *RHS) { + typedef std::pair pair_ty; + return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first); +} + namespace { class VersionPrinter { public: void print() { - cout << "Low Level Virtual Machine (http://llvm.org/):\n"; - cout << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION; + raw_ostream &OS = outs(); + OS << "Low Level Virtual Machine (http://llvm.org/):\n" + << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION; #ifdef LLVM_VERSION_INFO - cout << LLVM_VERSION_INFO; + OS << LLVM_VERSION_INFO; #endif - cout << "\n "; + OS << "\n "; #ifndef __OPTIMIZE__ - cout << "DEBUG build"; + OS << "DEBUG build"; #else - cout << "Optimized build"; + OS << "Optimized build"; #endif #ifndef NDEBUG - cout << " with assertions"; + OS << " with assertions"; #endif - cout << ".\n"; - cout << " Built " << __DATE__ << "(" << __TIME__ << ").\n"; + OS << ".\n" + << " Built " << __DATE__ << " (" << __TIME__ << ").\n" + << " Host: " << sys::getHostTriple() << '\n' + << '\n' + << " Registered Targets:\n"; + + std::vector > Targets; + size_t Width = 0; + for (TargetRegistry::iterator it = TargetRegistry::begin(), + ie = TargetRegistry::end(); it != ie; ++it) { + Targets.push_back(std::make_pair(it->getName(), &*it)); + Width = std::max(Width, strlen(Targets.back().first)); + } + if (!Targets.empty()) + qsort(&Targets[0], Targets.size(), sizeof(Targets[0]), + TargetArraySortFn); + + for (unsigned i = 0, e = Targets.size(); i != e; ++i) { + OS << " " << Targets[i].first; + OS.indent(Width - strlen(Targets[i].first)) << " - " + << Targets[i].second->getShortDescription() << '\n'; + } + if (Targets.empty()) + OS << " (none)\n"; } void operator=(bool OptionWasSpecified) { - if (OptionWasSpecified) { - if (OverrideVersionPrinter == 0) { - print(); - exit(1); - } else { - (*OverrideVersionPrinter)(); - exit(1); - } + if (!OptionWasSpecified) return; + + if (OverrideVersionPrinter == 0) { + print(); + exit(1); } + (*OverrideVersionPrinter)(); + exit(1); } }; } // End anonymous namespace diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index cb8c4b013c32b..423e90d993527 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -23,12 +23,12 @@ #include "llvm/Support/ConstantRange.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Instructions.h" using namespace llvm; /// Initialize a full (the default) or empty set for the specified type. /// -ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) : - Lower(BitWidth, 0), Upper(BitWidth, 0) { +ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) { if (Full) Lower = Upper = APInt::getMaxValue(BitWidth); else @@ -37,16 +37,63 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) : /// Initialize a range to hold the single specified value. /// -ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) { } +ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) {} ConstantRange::ConstantRange(const APInt &L, const APInt &U) : Lower(L), Upper(U) { - assert(L.getBitWidth() == U.getBitWidth() && + assert(L.getBitWidth() == U.getBitWidth() && "ConstantRange with unequal bit widths"); assert((L != U || (L.isMaxValue() || L.isMinValue())) && "Lower == Upper, but they aren't min or max value!"); } +ConstantRange ConstantRange::makeICmpRegion(unsigned Pred, + const ConstantRange &CR) { + uint32_t W = CR.getBitWidth(); + switch (Pred) { + default: assert(!"Invalid ICmp predicate to makeICmpRegion()"); + case ICmpInst::ICMP_EQ: + return CR; + case ICmpInst::ICMP_NE: + if (CR.isSingleElement()) + return ConstantRange(CR.getUpper(), CR.getLower()); + return ConstantRange(W); + case ICmpInst::ICMP_ULT: + return ConstantRange(APInt::getMinValue(W), CR.getUnsignedMax()); + case ICmpInst::ICMP_SLT: + return ConstantRange(APInt::getSignedMinValue(W), CR.getSignedMax()); + case ICmpInst::ICMP_ULE: { + APInt UMax(CR.getUnsignedMax()); + if (UMax.isMaxValue()) + return ConstantRange(W); + return ConstantRange(APInt::getMinValue(W), UMax + 1); + } + case ICmpInst::ICMP_SLE: { + APInt SMax(CR.getSignedMax()); + if (SMax.isMaxSignedValue() || (SMax+1).isMaxSignedValue()) + return ConstantRange(W); + return ConstantRange(APInt::getSignedMinValue(W), SMax + 1); + } + case ICmpInst::ICMP_UGT: + return ConstantRange(CR.getUnsignedMin() + 1, APInt::getNullValue(W)); + case ICmpInst::ICMP_SGT: + return ConstantRange(CR.getSignedMin() + 1, + APInt::getSignedMinValue(W)); + case ICmpInst::ICMP_UGE: { + APInt UMin(CR.getUnsignedMin()); + if (UMin.isMinValue()) + return ConstantRange(W); + return ConstantRange(UMin, APInt::getNullValue(W)); + } + case ICmpInst::ICMP_SGE: { + APInt SMin(CR.getSignedMin()); + if (SMin.isMinSignedValue()) + return ConstantRange(W); + return ConstantRange(SMin, APInt::getSignedMinValue(W)); + } + } +} + /// isFullSet - Return true if this set contains all of the elements possible /// for this data-type bool ConstantRange::isFullSet() const { @@ -112,14 +159,10 @@ APInt ConstantRange::getSignedMax() const { else return SignedMax; } else { - if ((getUpper() - 1).slt(getLower())) { - if (getLower() != SignedMax) - return SignedMax; - else - return getUpper() - 1; - } else { + if (getLower().isNegative() == getUpper().isNegative()) + return SignedMax; + else return getUpper() - 1; - } } } @@ -157,6 +200,30 @@ bool ConstantRange::contains(const APInt &V) const { return Lower.ule(V) || V.ult(Upper); } +/// contains - Return true if the argument is a subset of this range. +/// Two equal set contain each other. The empty set is considered to be +/// contained by all other sets. +/// +bool ConstantRange::contains(const ConstantRange &Other) const { + if (isFullSet()) return true; + if (Other.isFullSet()) return false; + if (Other.isEmptySet()) return true; + if (isEmptySet()) return false; + + if (!isWrappedSet()) { + if (Other.isWrappedSet()) + return false; + + return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper); + } + + if (!Other.isWrappedSet()) + return Other.getUpper().ule(Upper) || + Lower.ule(Other.getLower()); + + return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower()); +} + /// subtract - Subtract the specified constant from the endpoints of this /// constant range. ConstantRange ConstantRange::subtract(const APInt &Val) const { @@ -208,50 +275,11 @@ ConstantRange::intersect1Wrapped(const ConstantRange &LHS, } /// intersectWith - Return the range that results from the intersection of this -/// range with another range. -/// +/// range with another range. The resultant range is guaranteed to include all +/// elements contained in both input ranges, and to have the smallest possible +/// set size that does so. Because there may be two intersections with the +/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A). ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const { - assert(getBitWidth() == CR.getBitWidth() && - "ConstantRange types don't agree!"); - // Handle common special cases - if (isEmptySet() || CR.isFullSet()) - return *this; - if (isFullSet() || CR.isEmptySet()) - return CR; - - if (!isWrappedSet()) { - if (!CR.isWrappedSet()) { - using namespace APIntOps; - APInt L = umax(Lower, CR.Lower); - APInt U = umin(Upper, CR.Upper); - - if (L.ult(U)) // If range isn't empty... - return ConstantRange(L, U); - else - return ConstantRange(getBitWidth(), false);// Otherwise, empty set - } else - return intersect1Wrapped(CR, *this); - } else { // We know "this" is wrapped... - if (!CR.isWrappedSet()) - return intersect1Wrapped(*this, CR); - else { - // Both ranges are wrapped... - using namespace APIntOps; - APInt L = umax(Lower, CR.Lower); - APInt U = umin(Upper, CR.Upper); - return ConstantRange(L, U); - } - } - return *this; -} - -/// maximalIntersectWith - Return the range that results from the intersection -/// of this range with another range. The resultant range is guaranteed to -/// include all elements contained in both input ranges, and to have the -/// smallest possible set size that does so. Because there may be two -/// intersections with the same set size, A.maximalIntersectWith(B) might not -/// be equal to B.maximalIntersect(A). -ConstantRange ConstantRange::maximalIntersectWith(const ConstantRange &CR) const { assert(getBitWidth() == CR.getBitWidth() && "ConstantRange types don't agree!"); @@ -260,7 +288,7 @@ ConstantRange ConstantRange::maximalIntersectWith(const ConstantRange &CR) const if (CR.isEmptySet() || isFullSet()) return CR; if (!isWrappedSet() && CR.isWrappedSet()) - return CR.maximalIntersectWith(*this); + return CR.intersectWith(*this); if (!isWrappedSet() && !CR.isWrappedSet()) { if (Lower.ult(CR.Lower)) { @@ -343,69 +371,74 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const { if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this); - APInt L = Lower, U = Upper; - if (!isWrappedSet() && !CR.isWrappedSet()) { + if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) { + // If the two ranges are disjoint, find the smaller gap and bridge it. + APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper; + if (d1.ult(d2)) + return ConstantRange(Lower, CR.Upper); + else + return ConstantRange(CR.Lower, Upper); + } + + APInt L = Lower, U = Upper; if (CR.Lower.ult(L)) L = CR.Lower; - - if (CR.Upper.ugt(U)) + if ((CR.Upper - 1).ugt(U - 1)) U = CR.Upper; + + if (L == 0 && U == 0) + return ConstantRange(getBitWidth()); + + return ConstantRange(L, U); } - if (isWrappedSet() && !CR.isWrappedSet()) { - if ((CR.Lower.ult(Upper) && CR.Upper.ult(Upper)) || - (CR.Lower.ugt(Lower) && CR.Upper.ugt(Lower))) { + if (!CR.isWrappedSet()) { + // ------U L----- and ------U L----- : this + // L--U L--U : CR + if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower)) return *this; - } - if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper)) { + // ------U L----- : this + // L---------U : CR + if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper)) return ConstantRange(getBitWidth()); - } - - if (CR.Lower.ule(Upper) && CR.Upper.ule(Lower)) { - APInt d1 = CR.Upper - Upper, d2 = Lower - CR.Upper; - if (d1.ult(d2)) { - U = CR.Upper; - } else { - L = CR.Upper; - } - } - if (Upper.ult(CR.Lower) && CR.Upper.ult(Lower)) { + // ----U L---- : this + // L---U : CR + // + if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) { APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper; - if (d1.ult(d2)) { - U = CR.Lower + 1; - } else { - L = CR.Upper - 1; - } + if (d1.ult(d2)) + return ConstantRange(Lower, CR.Upper); + else + return ConstantRange(CR.Lower, Upper); } - if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper)) { - APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Lower; + // ----U L----- : this + // L----U : CR + if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper)) + return ConstantRange(CR.Lower, Upper); - if (d1.ult(d2)) { - U = CR.Lower + 1; - } else { - L = CR.Lower; - } - } + // ------U L---- : this + // L-----U : CR + if (CR.Lower.ult(Upper) && CR.Upper.ult(Lower)) + return ConstantRange(Lower, CR.Upper); } - if (isWrappedSet() && CR.isWrappedSet()) { - if (Lower.ult(CR.Upper) || CR.Lower.ult(Upper)) - return ConstantRange(getBitWidth()); + assert(isWrappedSet() && CR.isWrappedSet() && + "ConstantRange::unionWith missed wrapped union unwrapped case"); - if (CR.Upper.ugt(U)) { - U = CR.Upper; - } - - if (CR.Lower.ult(L)) { - L = CR.Lower; - } + // ------U L---- and ------U L---- : this + // -U L----------- and ------------U L : CR + if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper)) + return ConstantRange(getBitWidth()); - if (L == U) return ConstantRange(getBitWidth()); - } + APInt L = Lower, U = Upper; + if (CR.Upper.ugt(U)) + U = CR.Upper; + if (CR.Lower.ult(L)) + L = CR.Lower; return ConstantRange(L, U); } @@ -435,7 +468,7 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { assert(SrcTySize < DstTySize && "Not a value extension"); if (isFullSet()) { return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1), - APInt::getLowBitsSet(DstTySize, SrcTySize-1)); + APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1); } APInt L = Lower; L.sext(DstTySize); @@ -459,6 +492,99 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { return ConstantRange(L, U); } +ConstantRange +ConstantRange::add(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + if (isFullSet() || Other.isFullSet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize(); + APInt NewLower = getLower() + Other.getLower(); + APInt NewUpper = getUpper() + Other.getUpper() - 1; + if (NewLower == NewUpper) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + ConstantRange X = ConstantRange(NewLower, NewUpper); + if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y)) + // We've wrapped, therefore, full set. + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + return X; +} + +ConstantRange +ConstantRange::multiply(const ConstantRange &Other) const { + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + if (isFullSet() || Other.isFullSet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + APInt this_min = getUnsignedMin().zext(getBitWidth() * 2); + APInt this_max = getUnsignedMax().zext(getBitWidth() * 2); + APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2); + APInt Other_max = Other.getUnsignedMax().zext(getBitWidth() * 2); + + ConstantRange Result_zext = ConstantRange(this_min * Other_min, + this_max * Other_max + 1); + return Result_zext.truncate(getBitWidth()); +} + +ConstantRange +ConstantRange::smax(const ConstantRange &Other) const { + // X smax Y is: range(smax(X_smin, Y_smin), + // smax(X_smax, Y_smax)) + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin()); + APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1; + if (NewU == NewL) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + return ConstantRange(NewL, NewU); +} + +ConstantRange +ConstantRange::umax(const ConstantRange &Other) const { + // X umax Y is: range(umax(X_umin, Y_umin), + // umax(X_umax, Y_umax)) + if (isEmptySet() || Other.isEmptySet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()); + APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1; + if (NewU == NewL) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + return ConstantRange(NewL, NewU); +} + +ConstantRange +ConstantRange::udiv(const ConstantRange &RHS) const { + if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax() == 0) + return ConstantRange(getBitWidth(), /*isFullSet=*/false); + if (RHS.isFullSet()) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax()); + + APInt RHS_umin = RHS.getUnsignedMin(); + if (RHS_umin == 0) { + // We want the lowest value in RHS excluding zero. Usually that would be 1 + // except for a range in the form of [X, 1) in which case it would be X. + if (RHS.getUpper() == 1) + RHS_umin = RHS.getLower(); + else + RHS_umin = APInt(getBitWidth(), 1); + } + + APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1; + + // If the LHS is Full and the RHS is a wrapped interval containing 1 then + // this could occur. + if (Lower == Upper) + return ConstantRange(getBitWidth(), /*isFullSet=*/true); + + return ConstantRange(Lower, Upper); +} + /// print - Print out the bounds to a stream... /// void ConstantRange::print(raw_ostream &OS) const { @@ -470,3 +596,5 @@ void ConstantRange::print(raw_ostream &OS) const { void ConstantRange::dump() const { print(errs()); } + + diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index a09cddf9022a5..71ff411def2b0 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -27,51 +27,37 @@ #include "llvm/Support/Debug.h" using namespace llvm; +// All Debug.h functionality is a no-op in NDEBUG mode. +#ifndef NDEBUG bool llvm::DebugFlag; // DebugFlag - Exported boolean set by the -debug option -namespace { -#ifndef NDEBUG - // -debug - Command line option to enable the DEBUG statements in the passes. - // This flag may only be enabled in debug builds. - static cl::opt - Debug("debug", cl::desc("Enable debug output"), cl::Hidden, - cl::location(DebugFlag)); +// -debug - Command line option to enable the DEBUG statements in the passes. +// This flag may only be enabled in debug builds. +static cl::opt +Debug("debug", cl::desc("Enable debug output"), cl::Hidden, + cl::location(DebugFlag)); - static std::string CurrentDebugType; - static struct DebugOnlyOpt { - void operator=(const std::string &Val) const { - DebugFlag |= !Val.empty(); - CurrentDebugType = Val; - } - } DebugOnlyOptLoc; +static std::string CurrentDebugType; +static struct DebugOnlyOpt { + void operator=(const std::string &Val) const { + DebugFlag |= !Val.empty(); + CurrentDebugType = Val; + } +} DebugOnlyOptLoc; - static cl::opt > - DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"), - cl::Hidden, cl::value_desc("debug string"), - cl::location(DebugOnlyOptLoc), cl::ValueRequired); -#endif -} +static cl::opt > +DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"), + cl::Hidden, cl::value_desc("debug string"), + cl::location(DebugOnlyOptLoc), cl::ValueRequired); // isCurrentDebugType - Return true if the specified string is the debug type // specified on the command line, or if none was specified on the command line // with the -debug-only=X option. // bool llvm::isCurrentDebugType(const char *DebugType) { -#ifndef NDEBUG return CurrentDebugType.empty() || DebugType == CurrentDebugType; +} #else - return false; +// Avoid "has no symbols" warning. +int Debug_dummy = 0; #endif -} - -// getErrorOutputStream - Returns the error output stream (std::cerr). This -// places the std::c* I/O streams into one .cpp file and relieves the whole -// program from having to have hundreds of static c'tor/d'tors for them. -// -OStream &llvm::getErrorOutputStream(const char *DebugType) { - static OStream cnoout(0); - if (DebugFlag && isCurrentDebugType(DebugType)) - return cerr; - else - return cnoout; -} diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index fa99035b679f2..8b688cae2a639 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include @@ -83,7 +84,7 @@ const char *TagString(unsigned Tag) { case DW_TAG_lo_user: return "DW_TAG_lo_user"; case DW_TAG_hi_user: return "DW_TAG_hi_user"; } - assert(0 && "Unknown Dwarf Tag"); + llvm_unreachable("Unknown Dwarf Tag"); return ""; } @@ -94,7 +95,7 @@ const char *ChildrenString(unsigned Children) { case DW_CHILDREN_no: return "CHILDREN_no"; case DW_CHILDREN_yes: return "CHILDREN_yes"; } - assert(0 && "Unknown Dwarf ChildrenFlag"); + llvm_unreachable("Unknown Dwarf ChildrenFlag"); return ""; } @@ -205,7 +206,7 @@ const char *AttributeString(unsigned Attribute) { case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers"; case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class"; } - assert(0 && "Unknown Dwarf Attribute"); + llvm_unreachable("Unknown Dwarf Attribute"); return ""; } @@ -235,7 +236,7 @@ const char *FormEncodingString(unsigned Encoding) { case DW_FORM_ref_udata: return "FORM_ref_udata"; case DW_FORM_indirect: return "FORM_indirect"; } - assert(0 && "Unknown Dwarf Form Encoding"); + llvm_unreachable("Unknown Dwarf Form Encoding"); return ""; } @@ -310,7 +311,7 @@ const char *OperationEncodingString(unsigned Encoding) { case DW_OP_lo_user: return "OP_lo_user"; case DW_OP_hi_user: return "OP_hi_user"; } - assert(0 && "Unknown Dwarf Operation Encoding"); + llvm_unreachable("Unknown Dwarf Operation Encoding"); return ""; } @@ -336,7 +337,7 @@ const char *AttributeEncodingString(unsigned Encoding) { case DW_ATE_lo_user: return "ATE_lo_user"; case DW_ATE_hi_user: return "ATE_hi_user"; } - assert(0 && "Unknown Dwarf Attribute Encoding"); + llvm_unreachable("Unknown Dwarf Attribute Encoding"); return ""; } @@ -350,7 +351,7 @@ const char *DecimalSignString(unsigned Sign) { case DW_DS_leading_separate: return "DS_leading_separate"; case DW_DS_trailing_separate: return "DS_trailing_separate"; } - assert(0 && "Unknown Dwarf Decimal Sign Attribute"); + llvm_unreachable("Unknown Dwarf Decimal Sign Attribute"); return ""; } @@ -364,7 +365,7 @@ const char *EndianityString(unsigned Endian) { case DW_END_lo_user: return "END_lo_user"; case DW_END_hi_user: return "END_hi_user"; } - assert(0 && "Unknown Dwarf Endianity"); + llvm_unreachable("Unknown Dwarf Endianity"); return ""; } @@ -377,7 +378,7 @@ const char *AccessibilityString(unsigned Access) { case DW_ACCESS_protected: return "ACCESS_protected"; case DW_ACCESS_private: return "ACCESS_private"; } - assert(0 && "Unknown Dwarf Accessibility"); + llvm_unreachable("Unknown Dwarf Accessibility"); return ""; } @@ -389,7 +390,7 @@ const char *VisibilityString(unsigned Visibility) { case DW_VIS_exported: return "VIS_exported"; case DW_VIS_qualified: return "VIS_qualified"; } - assert(0 && "Unknown Dwarf Visibility"); + llvm_unreachable("Unknown Dwarf Visibility"); return ""; } @@ -401,7 +402,7 @@ const char *VirtualityString(unsigned Virtuality) { case DW_VIRTUALITY_virtual: return "VIRTUALITY_virtual"; case DW_VIRTUALITY_pure_virtual: return "VIRTUALITY_pure_virtual"; } - assert(0 && "Unknown Dwarf Virtuality"); + llvm_unreachable("Unknown Dwarf Virtuality"); return ""; } @@ -431,7 +432,7 @@ const char *LanguageString(unsigned Language) { case DW_LANG_lo_user: return "LANG_lo_user"; case DW_LANG_hi_user: return "LANG_hi_user"; } - assert(0 && "Unknown Dwarf Language"); + llvm_unreachable("Unknown Dwarf Language"); return ""; } @@ -444,7 +445,7 @@ const char *CaseString(unsigned Case) { case DW_ID_down_case: return "ID_down_case"; case DW_ID_case_insensitive: return "ID_case_insensitive"; } - assert(0 && "Unknown Dwarf Identifier Case"); + llvm_unreachable("Unknown Dwarf Identifier Case"); return ""; } @@ -458,7 +459,7 @@ const char *ConventionString(unsigned Convention) { case DW_CC_lo_user: return "CC_lo_user"; case DW_CC_hi_user: return "CC_hi_user"; } - assert(0 && "Unknown Dwarf Calling Convention"); + llvm_unreachable("Unknown Dwarf Calling Convention"); return ""; } @@ -471,7 +472,7 @@ const char *InlineCodeString(unsigned Code) { case DW_INL_declared_not_inlined: return "INL_declared_not_inlined"; case DW_INL_declared_inlined: return "INL_declared_inlined"; } - assert(0 && "Unknown Dwarf Inline Code"); + llvm_unreachable("Unknown Dwarf Inline Code"); return ""; } @@ -482,7 +483,7 @@ const char *ArrayOrderString(unsigned Order) { case DW_ORD_row_major: return "ORD_row_major"; case DW_ORD_col_major: return "ORD_col_major"; } - assert(0 && "Unknown Dwarf Array Order"); + llvm_unreachable("Unknown Dwarf Array Order"); return ""; } @@ -493,7 +494,7 @@ const char *DiscriminantString(unsigned Discriminant) { case DW_DSC_label: return "DSC_label"; case DW_DSC_range: return "DSC_range"; } - assert(0 && "Unknown Dwarf Discriminant Descriptor"); + llvm_unreachable("Unknown Dwarf Discriminant Descriptor"); return ""; } @@ -514,7 +515,7 @@ const char *LNStandardString(unsigned Standard) { case DW_LNS_set_epilogue_begin: return "LNS_set_epilogue_begin"; case DW_LNS_set_isa: return "LNS_set_isa"; } - assert(0 && "Unknown Dwarf Line Number Standard"); + llvm_unreachable("Unknown Dwarf Line Number Standard"); return ""; } @@ -529,7 +530,7 @@ const char *LNExtendedString(unsigned Encoding) { case DW_LNE_lo_user: return "LNE_lo_user"; case DW_LNE_hi_user: return "LNE_hi_user"; } - assert(0 && "Unknown Dwarf Line Number Extended Opcode Encoding"); + llvm_unreachable("Unknown Dwarf Line Number Extended Opcode Encoding"); return ""; } @@ -544,7 +545,7 @@ const char *MacinfoString(unsigned Encoding) { case DW_MACINFO_end_file: return "MACINFO_end_file"; case DW_MACINFO_vendor_ext: return "MACINFO_vendor_ext"; } - assert(0 && "Unknown Dwarf Macinfo Type Encodings"); + llvm_unreachable("Unknown Dwarf Macinfo Type Encodings"); return ""; } @@ -580,7 +581,7 @@ const char *CallFrameString(unsigned Encoding) { case DW_CFA_lo_user: return "CFA_lo_user"; case DW_CFA_hi_user: return "CFA_hi_user"; } - assert(0 && "Unknown Dwarf Call Frame Instruction Encodings"); + llvm_unreachable("Unknown Dwarf Call Frame Instruction Encodings"); return ""; } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp new file mode 100644 index 0000000000000..dff4f030fefe2 --- /dev/null +++ b/lib/Support/ErrorHandling.cpp @@ -0,0 +1,73 @@ +//===- lib/Support/ErrorHandling.cpp - Callbacks for errors -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an API for error handling, it supersedes cerr+abort(), and +// cerr+exit() style error handling. +// Callbacks can be registered for these errors through this API. +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Threading.h" +#include +#include + +using namespace llvm; +using namespace std; + +static llvm_error_handler_t ErrorHandler = 0; +static void *ErrorHandlerUserData = 0; + +namespace llvm { +void llvm_install_error_handler(llvm_error_handler_t handler, + void *user_data) { + assert(!llvm_is_multithreaded() && + "Cannot register error handlers after starting multithreaded mode!\n"); + assert(!ErrorHandler && "Error handler already registered!\n"); + ErrorHandler = handler; + ErrorHandlerUserData = user_data; +} + +void llvm_remove_error_handler() { + ErrorHandler = 0; +} + +void llvm_report_error(const char *reason) { + llvm_report_error(Twine(reason)); +} + +void llvm_report_error(const std::string &reason) { + llvm_report_error(Twine(reason)); +} + +void llvm_report_error(const Twine &reason) { + if (!ErrorHandler) { + errs() << "LLVM ERROR: " << reason << "\n"; + } else { + ErrorHandler(ErrorHandlerUserData, reason.str()); + } + exit(1); +} + +void llvm_unreachable_internal(const char *msg, const char *file, + unsigned line) { + // This code intentionally doesn't call the ErrorHandler callback, because + // llvm_unreachable is intended to be used to indicate "impossible" + // situations, and not legitimate runtime errors. + if (msg) + errs() << msg << "\n"; + errs() << "UNREACHABLE executed"; + if (file) + errs() << " at " << file << ":" << line; + errs() << "!\n"; + abort(); +} +} + diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index 41c730e3e1e62..954dc77dff1e9 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include #include @@ -50,7 +51,7 @@ void FoldingSetNodeID::AddInteger(unsigned long I) { else if (sizeof(long) == sizeof(long long)) { AddInteger((unsigned long long)I); } else { - assert(0 && "unexpected sizeof(long)"); + llvm_unreachable("unexpected sizeof(long)"); } } void FoldingSetNodeID::AddInteger(long long I) { @@ -62,14 +63,14 @@ void FoldingSetNodeID::AddInteger(unsigned long long I) { Bits.push_back(unsigned(I >> 32)); } -void FoldingSetNodeID::AddString(const char *String, const char *End) { - unsigned Size = static_cast(End - String); +void FoldingSetNodeID::AddString(StringRef String) { + unsigned Size = String.size(); Bits.push_back(Size); if (!Size) return; unsigned Units = Size / 4; unsigned Pos = 0; - const unsigned *Base = (const unsigned *)String; + const unsigned *Base = (const unsigned*) String.data(); // If the string is aligned do a bulk transfer. if (!((intptr_t)Base & 3)) { @@ -99,14 +100,6 @@ void FoldingSetNodeID::AddString(const char *String, const char *End) { Bits.push_back(V); } -void FoldingSetNodeID::AddString(const char *String) { - AddString(String, String + strlen(String)); -} - -void FoldingSetNodeID::AddString(const std::string &String) { - AddString(&*String.begin(), &*String.end()); -} - /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to /// lookup the node in the FoldingSetImpl. unsigned FoldingSetNodeID::ComputeHash() const { diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp new file mode 100644 index 0000000000000..70f2cfa6ae882 --- /dev/null +++ b/lib/Support/FormattedStream.cpp @@ -0,0 +1,93 @@ +//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of formatted_raw_ostream. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +/// CountColumns - Examine the given char sequence and figure out which +/// column we end up in after output. +/// +static unsigned CountColumns(unsigned Column, const char *Ptr, size_t Size) { + // Keep track of the current column by scanning the string for + // special characters + + for (const char *End = Ptr + Size; Ptr != End; ++Ptr) { + ++Column; + if (*Ptr == '\n' || *Ptr == '\r') + Column = 0; + else if (*Ptr == '\t') + // Assumes tab stop = 8 characters. + Column += (8 - (Column & 0x7)) & 0x7; + } + + return Column; +} + +/// ComputeColumn - Examine the current output and figure out which +/// column we end up in after output. +void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) { + // If our previous scan pointer is inside the buffer, assume we already + // scanned those bytes. This depends on raw_ostream to not change our buffer + // in unexpected ways. + if (Ptr <= Scanned && Scanned <= Ptr + Size) { + // Scan all characters added since our last scan to determine the new + // column. + ColumnScanned = CountColumns(ColumnScanned, Scanned, + Size - (Scanned - Ptr)); + } else + ColumnScanned = CountColumns(ColumnScanned, Ptr, Size); + + // Update the scanning pointer. + Scanned = Ptr + Size; +} + +/// PadToColumn - Align the output to some column number. +/// +/// \param NewCol - The column to move to. +/// \param MinPad - The minimum space to give after the most recent +/// I/O, even if the current column + minpad > newcol. +/// +void formatted_raw_ostream::PadToColumn(unsigned NewCol) { + // Figure out what's in the buffer and add it to the column count. + ComputeColumn(getBufferStart(), GetNumBytesInBuffer()); + + // Output spaces until we reach the desired column. + indent(std::max(int(NewCol - ColumnScanned), 1)); +} + +void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { + // Figure out what's in the buffer and add it to the column count. + ComputeColumn(Ptr, Size); + + // Write the data to the underlying stream (which is unbuffered, so + // the data will be immediately written out). + TheStream->write(Ptr, Size); + + // Reset the scanning pointer. + Scanned = 0; +} + +/// fouts() - This returns a reference to a formatted_raw_ostream for +/// standard output. Use it like: fouts() << "foo" << "bar"; +formatted_raw_ostream &llvm::fouts() { + static formatted_raw_ostream S(outs()); + return S; +} + +/// ferrs() - This returns a reference to a formatted_raw_ostream for +/// standard error. Use it like: ferrs() << "foo" << "bar"; +formatted_raw_ostream &llvm::ferrs() { + static formatted_raw_ostream S(errs()); + return S; +} diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index c359dfb82ea7b..c8bca6ef887cc 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -12,13 +12,47 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/GraphWriter.h" -#include "llvm/Support/Streams.h" #include "llvm/System/Path.h" #include "llvm/System/Program.h" #include "llvm/Config/config.h" using namespace llvm; -void llvm::DisplayGraph(const sys::Path &Filename) { +std::string llvm::DOT::EscapeString(const std::string &Label) { + std::string Str(Label); + for (unsigned i = 0; i != Str.length(); ++i) + switch (Str[i]) { + case '\n': + Str.insert(Str.begin()+i, '\\'); // Escape character... + ++i; + Str[i] = 'n'; + break; + case '\t': + Str.insert(Str.begin()+i, ' '); // Convert to two spaces + ++i; + Str[i] = ' '; + break; + case '\\': + if (i+1 != Str.length()) + switch (Str[i+1]) { + case 'l': continue; // don't disturb \l + case '|': case '{': case '}': + Str.erase(Str.begin()+i); continue; + default: break; + } + case '{': case '}': + case '<': case '>': + case '|': case '"': + Str.insert(Str.begin()+i, '\\'); // Escape character... + ++i; // don't infinite loop + break; + } + return Str; +} + + + +void llvm::DisplayGraph(const sys::Path &Filename, bool wait, + GraphProgram::Name program) { std::string ErrMsg; #if HAVE_GRAPHVIZ sys::Path Graphviz(LLVM_PATH_GRAPHVIZ); @@ -28,18 +62,61 @@ void llvm::DisplayGraph(const sys::Path &Filename) { args.push_back(Filename.c_str()); args.push_back(0); - cerr << "Running 'Graphviz' program... " << std::flush; - if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) { - cerr << "Error viewing graph: " << ErrMsg << "\n"; - } -#elif (HAVE_GV && HAVE_DOT) + errs() << "Running 'Graphviz' program... "; + if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) + errs() << "Error viewing graph " << Filename.str() << ": " << ErrMsg + << "\n"; + else + Filename.eraseFromDisk(); + +#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \ + HAVE_TWOPI || HAVE_CIRCO)) sys::Path PSFilename = Filename; PSFilename.appendSuffix("ps"); - - sys::Path dot(LLVM_PATH_DOT); + + sys::Path prog; + + // Set default grapher +#if HAVE_CIRCO + prog = sys::Path(LLVM_PATH_CIRCO); +#endif +#if HAVE_TWOPI + prog = sys::Path(LLVM_PATH_TWOPI); +#endif +#if HAVE_NEATO + prog = sys::Path(LLVM_PATH_NEATO); +#endif +#if HAVE_FDP + prog = sys::Path(LLVM_PATH_FDP); +#endif +#if HAVE_DOT + prog = sys::Path(LLVM_PATH_DOT); +#endif + + // Find which program the user wants +#if HAVE_DOT + if (program == GraphProgram::DOT) + prog = sys::Path(LLVM_PATH_DOT); +#endif +#if (HAVE_FDP) + if (program == GraphProgram::FDP) + prog = sys::Path(LLVM_PATH_FDP); +#endif +#if (HAVE_NEATO) + if (program == GraphProgram::NEATO) + prog = sys::Path(LLVM_PATH_NEATO); +#endif +#if (HAVE_TWOPI) + if (program == GraphProgram::TWOPI) + prog = sys::Path(LLVM_PATH_TWOPI); +#endif +#if (HAVE_CIRCO) + if (program == GraphProgram::CIRCO) + prog = sys::Path(LLVM_PATH_CIRCO); +#endif std::vector args; - args.push_back(dot.c_str()); + args.push_back(prog.c_str()); args.push_back("-Tps"); args.push_back("-Nfontname=Courier"); args.push_back("-Gsize=7.5,10"); @@ -48,11 +125,13 @@ void llvm::DisplayGraph(const sys::Path &Filename) { args.push_back(PSFilename.c_str()); args.push_back(0); - cerr << "Running 'dot' program... " << std::flush; - if (sys::Program::ExecuteAndWait(dot, &args[0],0,0,0,0,&ErrMsg)) { - cerr << "Error viewing graph: '" << ErrMsg << "\n"; + errs() << "Running '" << prog.str() << "' program... "; + + if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) { + errs() << "Error viewing graph " << Filename.str() << ": '" + << ErrMsg << "\n"; } else { - cerr << " done. \n"; + errs() << " done. \n"; sys::Path gv(LLVM_PATH_GV); args.clear(); @@ -62,11 +141,18 @@ void llvm::DisplayGraph(const sys::Path &Filename) { args.push_back(0); ErrMsg.clear(); - if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) { - cerr << "Error viewing graph: " << ErrMsg << "\n"; + if (wait) { + if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) + errs() << "Error viewing graph: " << ErrMsg << "\n"; + Filename.eraseFromDisk(); + PSFilename.eraseFromDisk(); + } + else { + sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg); + errs() << "Remember to erase graph files: " << Filename.str() << " " + << PSFilename.str() << "\n"; } } - PSFilename.eraseFromDisk(); #elif HAVE_DOTTY sys::Path dotty(LLVM_PATH_DOTTY); @@ -75,15 +161,15 @@ void llvm::DisplayGraph(const sys::Path &Filename) { args.push_back(Filename.c_str()); args.push_back(0); - cerr << "Running 'dotty' program... " << std::flush; + errs() << "Running 'dotty' program... "; if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) { - cerr << "Error viewing graph: " << ErrMsg << "\n"; + errs() << "Error viewing graph " << Filename.str() << ": " + << ErrMsg << "\n"; } else { #ifdef __MINGW32__ // Dotty spawns another app and doesn't wait until it returns return; #endif + Filename.eraseFromDisk(); } #endif - - Filename.eraseFromDisk(); } diff --git a/lib/Support/MemoryObject.cpp b/lib/Support/MemoryObject.cpp new file mode 100644 index 0000000000000..91e3ecd23a2e7 --- /dev/null +++ b/lib/Support/MemoryObject.cpp @@ -0,0 +1,34 @@ +//===- MemoryObject.cpp - Abstract memory interface -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/MemoryObject.h" +using namespace llvm; + +MemoryObject::~MemoryObject() { +} + +int MemoryObject::readBytes(uint64_t address, + uint64_t size, + uint8_t* buf, + uint64_t* copied) const { + uint64_t current = address; + uint64_t limit = getBase() + getExtent(); + + while (current - address < size && current < limit) { + if (readByte(current, &buf[(current - address)])) + return -1; + + current++; + } + + if (copied) + *copied = current - address; + + return 0; +} diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp index ef32af4b3f386..36caecffeede7 100644 --- a/lib/Support/PluginLoader.cpp +++ b/lib/Support/PluginLoader.cpp @@ -14,10 +14,9 @@ #define DONT_GET_PLUGIN_LOADER_OPTION #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PluginLoader.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/DynamicLibrary.h" #include "llvm/System/Mutex.h" -#include #include using namespace llvm; @@ -25,23 +24,23 @@ static ManagedStatic > Plugins; static ManagedStatic > PluginsLock; void PluginLoader::operator=(const std::string &Filename) { - sys::SmartScopedLock Lock(&*PluginsLock); + sys::SmartScopedLock Lock(*PluginsLock); std::string Error; if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) { - cerr << "Error opening '" << Filename << "': " << Error - << "\n -load request ignored.\n"; + errs() << "Error opening '" << Filename << "': " << Error + << "\n -load request ignored.\n"; } else { Plugins->push_back(Filename); } } unsigned PluginLoader::getNumPlugins() { - sys::SmartScopedLock Lock(&*PluginsLock); + sys::SmartScopedLock Lock(*PluginsLock); return Plugins.isConstructed() ? Plugins->size() : 0; } std::string &PluginLoader::getPlugin(unsigned num) { - sys::SmartScopedLock Lock(&*PluginsLock); + sys::SmartScopedLock Lock(*PluginsLock); assert(Plugins.isConstructed() && num < Plugins->size() && "Asking for an out of bounds plugin"); return (*Plugins)[num]; diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index 14290a1284fee..68b41a7f09427 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -19,6 +19,10 @@ #include "llvm/ADT/SmallString.h" using namespace llvm; +namespace llvm { + bool DisablePrettyStackTrace = false; +} + // FIXME: This should be thread local when llvm supports threads. static sys::ThreadLocal PrettyStackTraceHead; @@ -67,15 +71,16 @@ static void CrashHandler(void *Cookie) { } if (!TmpStr.empty()) { - __crashreporter_info__ = strdup(TmpStr.c_str()); - errs() << __crashreporter_info__; + __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str()); + errs() << TmpStr.str(); } #endif } static bool RegisterCrashPrinter() { - sys::AddSignalHandler(CrashHandler, 0); + if (!DisablePrettyStackTrace) + sys::AddSignalHandler(CrashHandler, 0); return false; } diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp new file mode 100644 index 0000000000000..618ca0524a04f --- /dev/null +++ b/lib/Support/Regex.cpp @@ -0,0 +1,92 @@ +//===-- Regex.cpp - Regular Expression matcher implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a POSIX regular expression matcher. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Regex.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" +#include "regex_impl.h" +#include +using namespace llvm; + +Regex::Regex(const StringRef ®ex, unsigned Flags) { + unsigned flags = 0; + preg = new llvm_regex(); + preg->re_endp = regex.end(); + if (Flags & IgnoreCase) + flags |= REG_ICASE; + if (Flags & Newline) + flags |= REG_NEWLINE; + error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND); +} + +Regex::~Regex() { + llvm_regfree(preg); + delete preg; +} + +bool Regex::isValid(std::string &Error) { + if (!error) + return true; + + size_t len = llvm_regerror(error, preg, NULL, 0); + + Error.resize(len); + llvm_regerror(error, preg, &Error[0], len); + return false; +} + +/// getNumMatches - In a valid regex, return the number of parenthesized +/// matches it contains. +unsigned Regex::getNumMatches() const { + return preg->re_nsub; +} + +bool Regex::match(const StringRef &String, SmallVectorImpl *Matches){ + unsigned nmatch = Matches ? preg->re_nsub+1 : 0; + + // pmatch needs to have at least one element. + SmallVector pm; + pm.resize(nmatch > 0 ? nmatch : 1); + pm[0].rm_so = 0; + pm[0].rm_eo = String.size(); + + int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND); + + if (rc == REG_NOMATCH) + return false; + if (rc != 0) { + // regexec can fail due to invalid pattern or running out of memory. + error = rc; + return false; + } + + // There was a match. + + if (Matches) { // match position requested + Matches->clear(); + + for (unsigned i = 0; i != nmatch; ++i) { + if (pm[i].rm_so == -1) { + // this group didn't match + Matches->push_back(StringRef()); + continue; + } + assert(pm[i].rm_eo > pm[i].rm_so); + Matches->push_back(StringRef(String.data()+pm[i].rm_so, + pm[i].rm_eo-pm[i].rm_so)); + } + } + + return true; +} diff --git a/lib/Support/SlowOperationInformer.cpp b/lib/Support/SlowOperationInformer.cpp index d5ffff9d937fc..b4e9430e5fdfe 100644 --- a/lib/Support/SlowOperationInformer.cpp +++ b/lib/Support/SlowOperationInformer.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/SlowOperationInformer.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Alarm.h" #include #include @@ -28,8 +28,8 @@ SlowOperationInformer::~SlowOperationInformer() { if (LastPrintAmount) { // If we have printed something, make _sure_ we print the 100% amount, and // also print a newline. - cout << std::string(LastPrintAmount, '\b') << "Progress " - << OperationName << ": 100% \n"; + outs() << std::string(LastPrintAmount, '\b') << "Progress " + << OperationName << ": 100% \n"; } } @@ -40,7 +40,7 @@ SlowOperationInformer::~SlowOperationInformer() { bool SlowOperationInformer::progress(unsigned Amount) { int status = sys::AlarmStatus(); if (status == -1) { - cout << "\n"; + outs() << "\n"; LastPrintAmount = 0; return true; } @@ -61,6 +61,7 @@ bool SlowOperationInformer::progress(unsigned Amount) { OS << "% "; LastPrintAmount = OS.str().size(); - cout << ToPrint+OS.str() << std::flush; + outs() << ToPrint+OS.str(); + outs().flush(); return false; } diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 6b0d55c19f22a..4b93f7f99a24b 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -18,7 +18,24 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +namespace { + struct LineNoCacheTy { + int LastQueryBufferID; + const char *LastQuery; + unsigned LineNoOfQuery; + }; +} + +static LineNoCacheTy *getCache(void *Ptr) { + return (LineNoCacheTy*)Ptr; +} + + SourceMgr::~SourceMgr() { + // Delete the line # cache if allocated. + if (LineNoCacheTy *Cache = getCache(LineNoCache)) + delete Cache; + while (!Buffers.empty()) { delete Buffers.back().Buffer; Buffers.pop_back(); @@ -71,8 +88,31 @@ unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const { const char *Ptr = Buff->getBufferStart(); + // If we have a line number cache, and if the query is to a later point in the + // same file, start searching from the last query location. This optimizes + // for the case when multiple diagnostics come out of one file in order. + if (LineNoCacheTy *Cache = getCache(LineNoCache)) + if (Cache->LastQueryBufferID == BufferID && + Cache->LastQuery <= Loc.getPointer()) { + Ptr = Cache->LastQuery; + LineNo = Cache->LineNoOfQuery; + } + + // Scan for the location being queried, keeping track of the number of lines + // we see. for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr) if (*Ptr == '\n') ++LineNo; + + + // Allocate the line number cache if it doesn't exist. + if (LineNoCache == 0) + LineNoCache = new LineNoCacheTy(); + + // Update the line # cache. + LineNoCacheTy &Cache = *getCache(LineNoCache); + Cache.LastQueryBufferID = BufferID; + Cache.LastQuery = Ptr; + Cache.LineNoOfQuery = LineNo; return LineNo; } diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 33570b0ee5343..14f94bc284473 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -24,16 +24,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Mutex.h" #include "llvm/ADT/StringExtras.h" #include -#include #include using namespace llvm; // GetLibSupportInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); } +namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); } /// -stats - Command line option to cause transformations to emit stats about /// what they did. @@ -58,14 +57,14 @@ public: } static ManagedStatic StatInfo; -static ManagedStatic StatLock; +static ManagedStatic > StatLock; /// RegisterStatistic - The first time a statistic is bumped, this method is /// called. void Statistic::RegisterStatistic() { // If stats are enabled, inform StatInfo that this statistic should be // printed. - sys::ScopedLock Writer(&*StatLock); + sys::SmartScopedLock Writer(*StatLock); if (!Initialized) { if (Enabled) StatInfo->addStatistic(this); @@ -96,7 +95,7 @@ StatisticInfo::~StatisticInfo() { if (Stats.empty()) return; // Get the stream to write to. - std::ostream &OutStream = *GetLibSupportInfoOutputFile(); + raw_ostream &OutStream = *GetLibSupportInfoOutputFile(); // Figure out how long the biggest Value and Name fields are. unsigned MaxNameLen = 0, MaxValLen = 0; @@ -125,8 +124,9 @@ StatisticInfo::~StatisticInfo() { } - OutStream << std::endl; // Flush the output stream... + OutStream << '\n'; // Flush the output stream... + OutStream.flush(); - if (&OutStream != cerr.stream() && &OutStream != cout.stream()) + if (&OutStream != &outs() && &OutStream != &errs()) delete &OutStream; // Close the file. } diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index 0c61732a61b30..040308bbfd489 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -65,14 +65,13 @@ static unsigned HashString(const char *Start, const char *End) { /// specified bucket will be non-null. Otherwise, it will be null. In either /// case, the FullHashValue field of the bucket will be set to the hash value /// of the string. -unsigned StringMapImpl::LookupBucketFor(const char *NameStart, - const char *NameEnd) { +unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) { unsigned HTSize = NumBuckets; if (HTSize == 0) { // Hash table unallocated so far? init(16); HTSize = NumBuckets; } - unsigned FullHashValue = HashString(NameStart, NameEnd); + unsigned FullHashValue = HashString(Name.begin(), Name.end()); unsigned BucketNo = FullHashValue & (HTSize-1); unsigned ProbeAmt = 1; @@ -102,12 +101,10 @@ unsigned StringMapImpl::LookupBucketFor(const char *NameStart, // being non-null and for the full hash value) not at the items. This // is important for cache locality. - // Do the comparison like this because NameStart isn't necessarily + // Do the comparison like this because Name isn't necessarily // null-terminated! char *ItemStr = (char*)BucketItem+ItemSize; - unsigned ItemStrLen = BucketItem->getKeyLength(); - if (unsigned(NameEnd-NameStart) == ItemStrLen && - memcmp(ItemStr, NameStart, ItemStrLen) == 0) { + if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) { // We found a match! return BucketNo; } @@ -126,10 +123,10 @@ unsigned StringMapImpl::LookupBucketFor(const char *NameStart, /// FindKey - Look up the bucket that contains the specified key. If it exists /// in the map, return the bucket number of the key. Otherwise return -1. /// This does not modify the map. -int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { +int StringMapImpl::FindKey(const StringRef &Key) const { unsigned HTSize = NumBuckets; if (HTSize == 0) return -1; // Really empty table? - unsigned FullHashValue = HashString(KeyStart, KeyEnd); + unsigned FullHashValue = HashString(Key.begin(), Key.end()); unsigned BucketNo = FullHashValue & (HTSize-1); unsigned ProbeAmt = 1; @@ -151,9 +148,7 @@ int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { // Do the comparison like this because NameStart isn't necessarily // null-terminated! char *ItemStr = (char*)BucketItem+ItemSize; - unsigned ItemStrLen = BucketItem->getKeyLength(); - if (unsigned(KeyEnd-KeyStart) == ItemStrLen && - memcmp(ItemStr, KeyStart, ItemStrLen) == 0) { + if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) { // We found a match! return BucketNo; } @@ -172,16 +167,15 @@ int StringMapImpl::FindKey(const char *KeyStart, const char *KeyEnd) const { /// delete it. This aborts if the value isn't in the table. void StringMapImpl::RemoveKey(StringMapEntryBase *V) { const char *VStr = (char*)V + ItemSize; - StringMapEntryBase *V2 = RemoveKey(VStr, VStr+V->getKeyLength()); + StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength())); V2 = V2; assert(V == V2 && "Didn't find key?"); } /// RemoveKey - Remove the StringMapEntry for the specified key from the /// table, returning it. If the key is not in the table, this returns null. -StringMapEntryBase *StringMapImpl::RemoveKey(const char *KeyStart, - const char *KeyEnd) { - int Bucket = FindKey(KeyStart, KeyEnd); +StringMapEntryBase *StringMapImpl::RemoveKey(const StringRef &Key) { + int Bucket = FindKey(Key); if (Bucket == -1) return 0; StringMapEntryBase *Result = TheTable[Bucket].Item; diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp index b9c1fd0465bd6..1ee917f119f7e 100644 --- a/lib/Support/StringPool.cpp +++ b/lib/Support/StringPool.cpp @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/StringPool.h" -#include "llvm/Support/Streams.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; @@ -22,12 +22,12 @@ StringPool::~StringPool() { assert(InternTable.empty() && "PooledStringPtr leaked!"); } -PooledStringPtr StringPool::intern(const char *Begin, const char *End) { - table_t::iterator I = InternTable.find(Begin, End); +PooledStringPtr StringPool::intern(const StringRef &Key) { + table_t::iterator I = InternTable.find(Key); if (I != InternTable.end()) return PooledStringPtr(&*I); - entry_t *S = entry_t::Create(Begin, End); + entry_t *S = entry_t::Create(Key.begin(), Key.end()); S->getValue().Pool = this; InternTable.insert(S); diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp new file mode 100644 index 0000000000000..deaa19efe9986 --- /dev/null +++ b/lib/Support/StringRef.cpp @@ -0,0 +1,188 @@ +//===-- StringRef.cpp - Lightweight String References ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringRef.h" +using namespace llvm; + +// MSVC emits references to this into the translation units which reference it. +#ifndef _MSC_VER +const size_t StringRef::npos; +#endif + +//===----------------------------------------------------------------------===// +// String Searching +//===----------------------------------------------------------------------===// + + +/// find - Search for the first string \arg Str in the string. +/// +/// \return - The index of the first occurence of \arg Str, or npos if not +/// found. +size_t StringRef::find(const StringRef &Str) const { + size_t N = Str.size(); + if (N > Length) + return npos; + for (size_t i = 0, e = Length - N + 1; i != e; ++i) + if (substr(i, N).equals(Str)) + return i; + return npos; +} + +/// rfind - Search for the last string \arg Str in the string. +/// +/// \return - The index of the last occurence of \arg Str, or npos if not +/// found. +size_t StringRef::rfind(const StringRef &Str) const { + size_t N = Str.size(); + if (N > Length) + return npos; + for (size_t i = Length - N + 1, e = 0; i != e;) { + --i; + if (substr(i, N).equals(Str)) + return i; + } + return npos; +} + +/// find_first_of - Find the first character from the string 'Chars' in the +/// current string or return npos if not in string. +StringRef::size_type StringRef::find_first_of(StringRef Chars) const { + for (size_type i = 0, e = Length; i != e; ++i) + if (Chars.find(Data[i]) != npos) + return i; + return npos; +} + +/// find_first_not_of - Find the first character in the string that is not +/// in the string 'Chars' or return npos if all are in string. Same as find. +StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const { + for (size_type i = 0, e = Length; i != e; ++i) + if (Chars.find(Data[i]) == npos) + return i; + return npos; +} + + +//===----------------------------------------------------------------------===// +// Helpful Algorithms +//===----------------------------------------------------------------------===// + +/// count - Return the number of non-overlapped occurrences of \arg Str in +/// the string. +size_t StringRef::count(const StringRef &Str) const { + size_t Count = 0; + size_t N = Str.size(); + if (N > Length) + return 0; + for (size_t i = 0, e = Length - N + 1; i != e; ++i) + if (substr(i, N).equals(Str)) + ++Count; + return Count; +} + +/// GetAsUnsignedInteger - Workhorse method that converts a integer character +/// sequence of radix up to 36 to an unsigned long long value. +static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, + unsigned long long &Result) { + // Autosense radix if not specified. + if (Radix == 0) { + if (Str.startswith("0x")) { + Str = Str.substr(2); + Radix = 16; + } else if (Str.startswith("0b")) { + Str = Str.substr(2); + Radix = 2; + } else if (Str.startswith("0")) + Radix = 8; + else + Radix = 10; + } + + // Empty strings (after the radix autosense) are invalid. + if (Str.empty()) return true; + + // Parse all the bytes of the string given this radix. Watch for overflow. + Result = 0; + while (!Str.empty()) { + unsigned CharVal; + if (Str[0] >= '0' && Str[0] <= '9') + CharVal = Str[0]-'0'; + else if (Str[0] >= 'a' && Str[0] <= 'z') + CharVal = Str[0]-'a'+10; + else if (Str[0] >= 'A' && Str[0] <= 'Z') + CharVal = Str[0]-'A'+10; + else + return true; + + // If the parsed value is larger than the integer radix, the string is + // invalid. + if (CharVal >= Radix) + return true; + + // Add in this character. + unsigned long long PrevResult = Result; + Result = Result*Radix+CharVal; + + // Check for overflow. + if (Result < PrevResult) + return true; + + Str = Str.substr(1); + } + + return false; +} + +bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const { + return GetAsUnsignedInteger(*this, Radix, Result); +} + + +bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { + unsigned long long ULLVal; + + // Handle positive strings first. + if (empty() || front() != '-') { + if (GetAsUnsignedInteger(*this, Radix, ULLVal) || + // Check for value so large it overflows a signed value. + (long long)ULLVal < 0) + return true; + Result = ULLVal; + return false; + } + + // Get the positive part of the value. + if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) || + // Reject values so large they'd overflow as negative signed, but allow + // "-0". This negates the unsigned so that the negative isn't undefined + // on signed overflow. + (long long)-ULLVal > 0) + return true; + + Result = -ULLVal; + return false; +} + +bool StringRef::getAsInteger(unsigned Radix, int &Result) const { + long long Val; + if (getAsInteger(Radix, Val) || + (int)Val != Val) + return true; + Result = Val; + return false; +} + +bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { + unsigned long long Val; + if (getAsInteger(Radix, Val) || + (unsigned)Val != Val) + return true; + Result = Val; + return false; +} diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp index c8c323876bfb2..299032f187156 100644 --- a/lib/Support/SystemUtils.cpp +++ b/lib/Support/SystemUtils.cpp @@ -12,22 +12,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Support/Streams.h" #include "llvm/Support/SystemUtils.h" #include "llvm/System/Process.h" #include "llvm/System/Program.h" -#include +#include "llvm/Support/raw_ostream.h" using namespace llvm; -bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check, +bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check, bool print_warning) { - if (stream_to_check == cout.stream() && - sys::Process::StandardOutIsDisplayed()) { + if (stream_to_check.is_displayed()) { if (print_warning) { - cerr << "WARNING: You're attempting to print out a bitcode file.\n" - << "This is inadvisable as it may cause display problems. If\n" - << "you REALLY want to taste LLVM bitcode first-hand, you\n" - << "can force output with the `-f' option.\n\n"; + errs() << "WARNING: You're attempting to print out a bitcode file.\n" + << "This is inadvisable as it may cause display problems. If\n" + << "you REALLY want to taste LLVM bitcode first-hand, you\n" + << "can force output with the `-f' option.\n\n"; } return true; } @@ -35,24 +33,17 @@ bool llvm::CheckBitcodeOutputToConsole(std::ostream* stream_to_check, } /// FindExecutable - Find a named executable, giving the argv[0] of program -/// being executed. This allows us to find another LLVM tool if it is built -/// into the same directory, but that directory is neither the current -/// directory, nor in the PATH. If the executable cannot be found, return an -/// empty string. Return the input string if given a full path to an executable. -/// +/// being executed. This allows us to find another LLVM tool if it is built in +/// the same directory. If the executable cannot be found, return an +/// empty string. +/// @brief Find a named executable. #undef FindExecutable // needed on windows :( sys::Path llvm::FindExecutable(const std::string &ExeName, - const std::string &ProgramPath) { - // First check if the given name is already a valid path to an executable. - sys::Path Result(ExeName); - Result.makeAbsolute(); - if (Result.canExecute()) - return Result; - - // Otherwise check the directory that the calling program is in. We can do + const char *Argv0, void *MainAddr) { + // Check the directory that the calling program is in. We can do // this if ProgramPath contains at least one / character, indicating that it - // is a relative path to bugpoint itself. - Result = ProgramPath; + // is a relative path to the executable itself. + sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr); Result.eraseComponent(); if (!Result.isEmpty()) { Result.appendComponent(ExeName); @@ -60,5 +51,5 @@ sys::Path llvm::FindExecutable(const std::string &ExeName, return Result; } - return sys::Program::FindProgramByName(ExeName); + return sys::Path(); } diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp new file mode 100644 index 0000000000000..5896447f5ea5f --- /dev/null +++ b/lib/Support/TargetRegistry.cpp @@ -0,0 +1,92 @@ +//===--- TargetRegistry.cpp - Target registration -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetRegistry.h" +#include "llvm/System/Host.h" +#include +using namespace llvm; + +// Clients are responsible for avoid race conditions in registration. +static Target *FirstTarget = 0; + +TargetRegistry::iterator TargetRegistry::begin() { + return iterator(FirstTarget); +} + +const Target *TargetRegistry::lookupTarget(const std::string &TT, + std::string &Error) { + // Provide special warning when no targets are initialized. + if (begin() == end()) { + Error = "Unable to find target for this triple (no targets are registered)"; + return 0; + } + const Target *Best = 0, *EquallyBest = 0; + unsigned BestQuality = 0; + for (iterator it = begin(), ie = end(); it != ie; ++it) { + if (unsigned Qual = it->TripleMatchQualityFn(TT)) { + if (!Best || Qual > BestQuality) { + Best = &*it; + EquallyBest = 0; + BestQuality = Qual; + } else if (Qual == BestQuality) + EquallyBest = &*it; + } + } + + if (!Best) { + Error = "No available targets are compatible with this triple, " + "see -version for the available targets."; + return 0; + } + + // Otherwise, take the best target, but make sure we don't have two equally + // good best targets. + if (EquallyBest) { + Error = std::string("Cannot choose between targets \"") + + Best->Name + "\" and \"" + EquallyBest->Name + "\""; + return 0; + } + + return Best; +} + +void TargetRegistry::RegisterTarget(Target &T, + const char *Name, + const char *ShortDesc, + Target::TripleMatchQualityFnTy TQualityFn, + bool HasJIT) { + assert(Name && ShortDesc && TQualityFn && + "Missing required target information!"); + + // Check if this target has already been initialized, we allow this as a + // convenience to some clients. + if (T.Name) + return; + + // Add to the list of targets. + T.Next = FirstTarget; + FirstTarget = &T; + + T.Name = Name; + T.ShortDesc = ShortDesc; + T.TripleMatchQualityFn = TQualityFn; + T.HasJIT = HasJIT; +} + +const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) { + const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error); + + if (TheTarget && !TheTarget->hasJIT()) { + Error = "No JIT compatible target available for this host"; + return 0; + } + + return TheTarget; +} + diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index ede1dc96e8273..dd58d1f68b4de 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -14,16 +14,16 @@ #include "llvm/Support/Timer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" #include "llvm/System/Process.h" #include -#include #include #include using namespace llvm; // GetLibSupportInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern std::ostream *GetLibSupportInfoOutputFile(); } +namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); } // getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy // of constructor/destructor ordering being unspecified by C++. Basically the @@ -145,7 +145,7 @@ static TimeRecord getTimeRecord(bool Start) { static ManagedStatic > ActiveTimers; void Timer::startTimer() { - sys::SmartScopedLock L(&Lock); + sys::SmartScopedLock L(Lock); Started = true; ActiveTimers->push_back(this); TimeRecord TR = getTimeRecord(true); @@ -157,7 +157,7 @@ void Timer::startTimer() { } void Timer::stopTimer() { - sys::SmartScopedLock L(&Lock); + sys::SmartScopedLock L(Lock); TimeRecord TR = getTimeRecord(false); Elapsed += TR.Elapsed; UserTime += TR.UserTime; @@ -229,7 +229,7 @@ static ManagedStatic NamedTimers; static ManagedStatic NamedGroupedTimers; static Timer &getNamedRegionTimer(const std::string &Name) { - sys::SmartScopedLock L(&*TimerLock); + sys::SmartScopedLock L(*TimerLock); Name2Timer::iterator I = NamedTimers->find(Name); if (I != NamedTimers->end()) return I->second; @@ -239,7 +239,7 @@ static Timer &getNamedRegionTimer(const std::string &Name) { static Timer &getNamedRegionTimer(const std::string &Name, const std::string &GroupName) { - sys::SmartScopedLock L(&*TimerLock); + sys::SmartScopedLock L(*TimerLock); Name2Pair::iterator I = NamedGroupedTimers->find(GroupName); if (I == NamedGroupedTimers->end()) { @@ -269,38 +269,17 @@ NamedRegionTimer::NamedRegionTimer(const std::string &Name, // TimerGroup Implementation //===----------------------------------------------------------------------===// -// printAlignedFP - Simulate the printf "%A.Bf" format, where A is the -// TotalWidth size, and B is the AfterDec size. -// -static void printAlignedFP(double Val, unsigned AfterDec, unsigned TotalWidth, - std::ostream &OS) { - assert(TotalWidth >= AfterDec+1 && "Bad FP Format!"); - OS.width(TotalWidth-AfterDec-1); - char OldFill = OS.fill(); - OS.fill(' '); - OS << (int)Val; // Integer part; - OS << "."; - OS.width(AfterDec); - OS.fill('0'); - unsigned ResultFieldSize = 1; - while (AfterDec--) ResultFieldSize *= 10; - OS << (int)(Val*ResultFieldSize) % ResultFieldSize; - OS.fill(OldFill); -} -static void printVal(double Val, double Total, std::ostream &OS) { +static void printVal(double Val, double Total, raw_ostream &OS) { if (Total < 1e-7) // Avoid dividing by zero... OS << " ----- "; else { - OS << " "; - printAlignedFP(Val, 4, 7, OS); - OS << " ("; - printAlignedFP(Val*100/Total, 1, 5, OS); - OS << "%)"; + OS << " " << format("%7.4f", Val) << " ("; + OS << format("%5.1f", Val*100/Total) << "%)"; } } -void Timer::print(const Timer &Total, std::ostream &OS) { +void Timer::print(const Timer &Total, raw_ostream &OS) { if (&Total < this) { Total.Lock.acquire(); Lock.acquire(); @@ -320,13 +299,11 @@ void Timer::print(const Timer &Total, std::ostream &OS) { OS << " "; if (Total.MemUsed) { - OS.width(9); - OS << MemUsed << " "; + OS << format("%9lld", (long long)MemUsed) << " "; } if (Total.PeakMem) { if (PeakMem) { - OS.width(9); - OS << PeakMem << " "; + OS << format("%9lld", (long long)PeakMem) << " "; } else OS << " "; } @@ -344,28 +321,30 @@ void Timer::print(const Timer &Total, std::ostream &OS) { } // GetLibSupportInfoOutputFile - Return a file stream to print our output on... -std::ostream * +raw_ostream * llvm::GetLibSupportInfoOutputFile() { std::string &LibSupportInfoOutputFilename = getLibSupportInfoOutputFilename(); if (LibSupportInfoOutputFilename.empty()) - return cerr.stream(); + return &errs(); if (LibSupportInfoOutputFilename == "-") - return cout.stream(); + return &outs(); - std::ostream *Result = new std::ofstream(LibSupportInfoOutputFilename.c_str(), - std::ios::app); - if (!Result->good()) { - cerr << "Error opening info-output-file '" + + std::string Error; + raw_ostream *Result = new raw_fd_ostream(LibSupportInfoOutputFilename.c_str(), + Error, raw_fd_ostream::F_Append); + if (Error.empty()) + return Result; + + errs() << "Error opening info-output-file '" << LibSupportInfoOutputFilename << " for appending!\n"; - delete Result; - return cerr.stream(); - } - return Result; + delete Result; + return &errs(); } void TimerGroup::removeTimer() { - sys::SmartScopedLock L(&*TimerLock); + sys::SmartScopedLock L(*TimerLock); if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report... // Sort the timers in descending order by amount of time taken... std::sort(TimersToPrint.begin(), TimersToPrint.end(), @@ -375,7 +354,7 @@ void TimerGroup::removeTimer() { unsigned Padding = (80-Name.length())/2; if (Padding > 80) Padding = 0; // Don't allow "negative" numbers - std::ostream *OutStream = GetLibSupportInfoOutputFile(); + raw_ostream *OutStream = GetLibSupportInfoOutputFile(); ++NumTimers; { // Scope to contain Total timer... don't allow total timer to drop us to @@ -397,10 +376,8 @@ void TimerGroup::removeTimer() { if (this != DefaultTimerGroup) { *OutStream << " Total Execution Time: "; - printAlignedFP(Total.getProcessTime(), 4, 5, *OutStream); - *OutStream << " seconds ("; - printAlignedFP(Total.getWallTime(), 4, 5, *OutStream); - *OutStream << " wall clock)\n"; + *OutStream << format("%5.4f", Total.getProcessTime()) << " seconds ("; + *OutStream << format("%5.4f", Total.getWallTime()) << " wall clock)\n"; } *OutStream << "\n"; @@ -422,24 +399,25 @@ void TimerGroup::removeTimer() { TimersToPrint[i].print(Total, *OutStream); Total.print(Total, *OutStream); - *OutStream << std::endl; // Flush output + *OutStream << '\n'; + OutStream->flush(); } --NumTimers; TimersToPrint.clear(); - if (OutStream != cerr.stream() && OutStream != cout.stream()) + if (OutStream != &errs() && OutStream != &outs()) delete OutStream; // Close the file... } } void TimerGroup::addTimer() { - sys::SmartScopedLock L(&*TimerLock); + sys::SmartScopedLock L(*TimerLock); ++NumTimers; } void TimerGroup::addTimerToPrint(const Timer &T) { - sys::SmartScopedLock L(&*TimerLock); + sys::SmartScopedLock L(*TimerLock); TimersToPrint.push_back(Timer(true, T)); } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 279bd43ac5a82..6f805da332997 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -8,6 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Triple.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include #include using namespace llvm; @@ -18,22 +21,60 @@ const char *Triple::getArchTypeName(ArchType Kind) { switch (Kind) { case InvalidArch: return ""; case UnknownArch: return "unknown"; - - case x86: return "i386"; - case x86_64: return "x86_64"; - case ppc: return "powerpc"; - case ppc64: return "powerpc64"; + + case alpha: return "alpha"; + case arm: return "arm"; + case bfin: return "bfin"; + case cellspu: return "cellspu"; + case mips: return "mips"; + case mipsel: return "mipsel"; + case msp430: return "msp430"; + case pic16: return "pic16"; + case ppc64: return "powerpc64"; + case ppc: return "powerpc"; + case sparc: return "sparc"; + case systemz: return "s390x"; + case tce: return "tce"; + case thumb: return "thumb"; + case x86: return "i386"; + case x86_64: return "x86_64"; + case xcore: return "xcore"; } return ""; } +const char *Triple::getArchTypePrefix(ArchType Kind) { + switch (Kind) { + default: + return 0; + + case alpha: return "alpha"; + + case arm: + case thumb: return "arm"; + + case bfin: return "bfin"; + + case cellspu: return "spu"; + + case ppc64: + case ppc: return "ppc"; + + case sparc: return "sparc"; + + case x86: + case x86_64: return "x86"; + case xcore: return "xcore"; + } +} + const char *Triple::getVendorTypeName(VendorType Kind) { switch (Kind) { case UnknownVendor: return "unknown"; case Apple: return "apple"; - case PC: return "PC"; + case PC: return "pc"; } return ""; @@ -44,35 +85,166 @@ const char *Triple::getOSTypeName(OSType Kind) { case UnknownOS: return "unknown"; case AuroraUX: return "auroraux"; + case Cygwin: return "cygwin"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; case FreeBSD: return "freebsd"; case Linux: return "linux"; + case MinGW32: return "mingw32"; + case MinGW64: return "mingw64"; + case NetBSD: return "netbsd"; case OpenBSD: return "openbsd"; + case Solaris: return "solaris"; + case Win32: return "win32"; } return ""; } +Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) { + if (Name == "alpha") + return alpha; + if (Name == "arm") + return arm; + if (Name == "bfin") + return bfin; + if (Name == "cellspu") + return cellspu; + if (Name == "mips") + return mips; + if (Name == "mipsel") + return mipsel; + if (Name == "msp430") + return msp430; + if (Name == "pic16") + return pic16; + if (Name == "ppc64") + return ppc64; + if (Name == "ppc") + return ppc; + if (Name == "sparc") + return sparc; + if (Name == "systemz") + return systemz; + if (Name == "tce") + return tce; + if (Name == "thumb") + return thumb; + if (Name == "x86") + return x86; + if (Name == "x86-64") + return x86_64; + if (Name == "xcore") + return xcore; + + return UnknownArch; +} + +Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) { + // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for + // archs which Darwin doesn't use. + + // The matching this routine does is fairly pointless, since it is neither the + // complete architecture list, nor a reasonable subset. The problem is that + // historically the driver driver accepts this and also ties its -march= + // handling to the architecture name, so we need to be careful before removing + // support for it. + + // This code must be kept in sync with Clang's Darwin specific argument + // translation. + + if (Str == "ppc" || Str == "ppc601" || Str == "ppc603" || Str == "ppc604" || + Str == "ppc604e" || Str == "ppc750" || Str == "ppc7400" || + Str == "ppc7450" || Str == "ppc970") + return Triple::ppc; + + if (Str == "ppc64") + return Triple::ppc64; + + if (Str == "i386" || Str == "i486" || Str == "i486SX" || Str == "pentium" || + Str == "i586" || Str == "pentpro" || Str == "i686" || Str == "pentIIm3" || + Str == "pentIIm5" || Str == "pentium4") + return Triple::x86; + + if (Str == "x86_64") + return Triple::x86_64; + + // This is derived from the driver driver. + if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" || + Str == "armv6" || Str == "armv7") + return Triple::arm; + + return Triple::UnknownArch; +} + // void Triple::Parse() const { assert(!isInitialized() && "Invalid parse call."); - std::string ArchName = getArchName(); + StringRef ArchName = getArchName(); + StringRef VendorName = getVendorName(); + StringRef OSName = getOSName(); + if (ArchName.size() == 4 && ArchName[0] == 'i' && - ArchName[2] == '8' && ArchName[3] == '6') + ArchName[2] == '8' && ArchName[3] == '6' && + ArchName[1] - '3' < 6) // i[3-9]86 Arch = x86; else if (ArchName == "amd64" || ArchName == "x86_64") Arch = x86_64; + else if (ArchName == "bfin") + Arch = bfin; + else if (ArchName == "pic16") + Arch = pic16; else if (ArchName == "powerpc") Arch = ppc; else if (ArchName == "powerpc64") Arch = ppc64; + else if (ArchName == "arm" || + ArchName.startswith("armv") || + ArchName == "xscale") + Arch = arm; + else if (ArchName == "thumb" || + ArchName.startswith("thumbv")) + Arch = thumb; + else if (ArchName.startswith("alpha")) + Arch = alpha; + else if (ArchName == "spu" || ArchName == "cellspu") + Arch = cellspu; + else if (ArchName == "msp430") + Arch = msp430; + else if (ArchName == "mips" || ArchName == "mipsallegrex") + Arch = mips; + else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" || + ArchName == "psp") + Arch = mipsel; + else if (ArchName == "sparc") + Arch = sparc; + else if (ArchName == "s390x") + Arch = systemz; + else if (ArchName == "tce") + Arch = tce; + else if (ArchName == "xcore") + Arch = xcore; else Arch = UnknownArch; - std::string VendorName = getVendorName(); + + // Handle some exceptional cases where the OS / environment components are + // stuck into the vendor field. + if (StringRef(getTriple()).count('-') == 1) { + StringRef VendorName = getVendorName(); + + if (VendorName.startswith("mingw32")) { // 'i386-mingw32', etc. + Vendor = PC; + OS = MinGW32; + return; + } + + // arm-elf is another example, but we don't currently parse anything about + // the environment. + } + if (VendorName == "apple") Vendor = Apple; else if (VendorName == "pc") @@ -80,78 +252,129 @@ void Triple::Parse() const { else Vendor = UnknownVendor; - std::string OSName = getOSName(); - if (memcmp(&OSName[0], "auroraux", 8) == 0) + if (OSName.startswith("auroraux")) OS = AuroraUX; - else if (memcmp(&OSName[0], "darwin", 6) == 0) + else if (OSName.startswith("cygwin")) + OS = Cygwin; + else if (OSName.startswith("darwin")) OS = Darwin; - else if (memcmp(&OSName[0], "dragonfly", 9) == 0) + else if (OSName.startswith("dragonfly")) OS = DragonFly; - else if (memcmp(&OSName[0], "freebsd", 7) == 0) + else if (OSName.startswith("freebsd")) OS = FreeBSD; - else if (memcmp(&OSName[0], "linux", 5) == 0) + else if (OSName.startswith("linux")) OS = Linux; - else if (memcmp(&OSName[0], "openbsd", 7) == 0) + else if (OSName.startswith("mingw32")) + OS = MinGW32; + else if (OSName.startswith("mingw64")) + OS = MinGW64; + else if (OSName.startswith("netbsd")) + OS = NetBSD; + else if (OSName.startswith("openbsd")) OS = OpenBSD; + else if (OSName.startswith("solaris")) + OS = Solaris; + else if (OSName.startswith("win32")) + OS = Win32; else OS = UnknownOS; assert(isInitialized() && "Failed to initialize!"); } -static std::string extract(const std::string &A, - std::string::size_type begin, - std::string::size_type end) { - if (begin == std::string::npos) - return ""; - if (end == std::string::npos) - return A.substr(begin); - return A.substr(begin, end - begin); +StringRef Triple::getArchName() const { + return StringRef(Data).split('-').first; // Isolate first component } -static std::string extract1(const std::string &A, - std::string::size_type begin, - std::string::size_type end) { - if (begin == std::string::npos || begin == end) - return ""; - return extract(A, begin + 1, end); +StringRef Triple::getVendorName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + return Tmp.split('-').first; // Isolate second component } -std::string Triple::getArchName() const { - std::string Tmp = Data; - return extract(Tmp, 0, Tmp.find('-')); +StringRef Triple::getOSName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + Tmp = Tmp.split('-').second; // Strip second component + return Tmp.split('-').first; // Isolate third component } -std::string Triple::getVendorName() const { - std::string Tmp = Data; - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - return extract(Tmp, 0, Tmp.find('-')); +StringRef Triple::getEnvironmentName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + Tmp = Tmp.split('-').second; // Strip second component + return Tmp.split('-').second; // Strip third component } -std::string Triple::getOSName() const { - std::string Tmp = Data; - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - return extract(Tmp, 0, Tmp.find('-')); +StringRef Triple::getOSAndEnvironmentName() const { + StringRef Tmp = StringRef(Data).split('-').second; // Strip first component + return Tmp.split('-').second; // Strip second component } -std::string Triple::getEnvironmentName() const { - std::string Tmp = Data; - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - return extract(Tmp, 0, std::string::npos); +static unsigned EatNumber(StringRef &Str) { + assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number"); + unsigned Result = Str[0]-'0'; + + // Eat the digit. + Str = Str.substr(1); + + // Handle "darwin11". + if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') { + Result = Result*10 + (Str[0] - '0'); + // Eat the digit. + Str = Str.substr(1); + } + + return Result; } -std::string Triple::getOSAndEnvironmentName() const { - std::string Tmp = Data; - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - Tmp = extract1(Tmp, Tmp.find('-'), std::string::npos); - return extract(Tmp, 0, std::string::npos); +/// getDarwinNumber - Parse the 'darwin number' out of the specific target +/// triple. For example, if we have darwin8.5 return 8,5,0. If any entry is +/// not defined, return 0's. This requires that the triple have an OSType of +/// darwin before it is called. +void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min, + unsigned &Revision) const { + assert(getOS() == Darwin && "Not a darwin target triple!"); + StringRef OSName = getOSName(); + assert(OSName.startswith("darwin") && "Unknown darwin target triple!"); + + // Strip off "darwin". + OSName = OSName.substr(6); + + Maj = Min = Revision = 0; + + if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + return; + + // The major version is the first digit. + Maj = EatNumber(OSName); + if (OSName.empty()) return; + + // Handle minor version: 10.4.9 -> darwin8.9. + if (OSName[0] != '.') + return; + + // Eat the '.'. + OSName = OSName.substr(1); + + if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + return; + + Min = EatNumber(OSName); + if (OSName.empty()) return; + + // Handle revision darwin8.9.1 + if (OSName[0] != '.') + return; + + // Eat the '.'. + OSName = OSName.substr(1); + + if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + return; + + Revision = EatNumber(OSName); } -void Triple::setTriple(const std::string &Str) { - Data = Str; +void Triple::setTriple(const Twine &Str) { + Data = Str.str(); Arch = InvalidArch; } @@ -167,15 +390,22 @@ void Triple::setOS(OSType Kind) { setOSName(getOSTypeName(Kind)); } -void Triple::setArchName(const std::string &Str) { - setTriple(Str + "-" + getVendorName() + "-" + getOSAndEnvironmentName()); +void Triple::setArchName(const StringRef &Str) { + // Work around a miscompilation bug for Twines in gcc 4.0.3. + SmallString<64> Triple; + Triple += Str; + Triple += "-"; + Triple += getVendorName(); + Triple += "-"; + Triple += getOSAndEnvironmentName(); + setTriple(Triple.str()); } -void Triple::setVendorName(const std::string &Str) { +void Triple::setVendorName(const StringRef &Str) { setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName()); } -void Triple::setOSName(const std::string &Str) { +void Triple::setOSName(const StringRef &Str) { if (hasEnvironment()) setTriple(getArchName() + "-" + getVendorName() + "-" + Str + "-" + getEnvironmentName()); @@ -183,11 +413,11 @@ void Triple::setOSName(const std::string &Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + Str); } -void Triple::setEnvironmentName(const std::string &Str) { +void Triple::setEnvironmentName(const StringRef &Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + "-" + Str); } -void Triple::setOSAndEnvironmentName(const std::string &Str) { +void Triple::setOSAndEnvironmentName(const StringRef &Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + Str); } diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp new file mode 100644 index 0000000000000..292c0c2b9e5e5 --- /dev/null +++ b/lib/Support/Twine.cpp @@ -0,0 +1,133 @@ +//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +std::string Twine::str() const { + SmallString<256> Vec; + toVector(Vec); + return std::string(Vec.begin(), Vec.end()); +} + +void Twine::toVector(SmallVectorImpl &Out) const { + raw_svector_ostream OS(Out); + print(OS); +} + +void Twine::printOneChild(raw_ostream &OS, const void *Ptr, + NodeKind Kind) const { + switch (Kind) { + case Twine::NullKind: break; + case Twine::EmptyKind: break; + case Twine::TwineKind: + static_cast(Ptr)->print(OS); + break; + case Twine::CStringKind: + OS << static_cast(Ptr); + break; + case Twine::StdStringKind: + OS << *static_cast(Ptr); + break; + case Twine::StringRefKind: + OS << *static_cast(Ptr); + break; + case Twine::DecUIKind: + OS << *static_cast(Ptr); + break; + case Twine::DecIKind: + OS << *static_cast(Ptr); + break; + case Twine::DecULKind: + OS << *static_cast(Ptr); + break; + case Twine::DecLKind: + OS << *static_cast(Ptr); + break; + case Twine::DecULLKind: + OS << *static_cast(Ptr); + break; + case Twine::DecLLKind: + OS << *static_cast(Ptr); + break; + case Twine::UHexKind: + OS.write_hex(*static_cast(Ptr)); + break; + } +} + +void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr, + NodeKind Kind) const { + switch (Kind) { + case Twine::NullKind: + OS << "null"; break; + case Twine::EmptyKind: + OS << "empty"; break; + case Twine::TwineKind: + OS << "rope:"; + static_cast(Ptr)->printRepr(OS); + break; + case Twine::CStringKind: + OS << "cstring:\"" + << static_cast(Ptr) << "\""; + break; + case Twine::StdStringKind: + OS << "std::string:\"" + << static_cast(Ptr) << "\""; + break; + case Twine::StringRefKind: + OS << "stringref:\"" + << static_cast(Ptr) << "\""; + break; + case Twine::DecUIKind: + OS << "decUI:\"" << *static_cast(Ptr) << "\""; + break; + case Twine::DecIKind: + OS << "decI:\"" << *static_cast(Ptr) << "\""; + break; + case Twine::DecULKind: + OS << "decUL:\"" << *static_cast(Ptr) << "\""; + break; + case Twine::DecLKind: + OS << "decL:\"" << *static_cast(Ptr) << "\""; + break; + case Twine::DecULLKind: + OS << "decULL:\"" << *static_cast(Ptr) << "\""; + break; + case Twine::DecLLKind: + OS << "decLL:\"" << *static_cast(Ptr) << "\""; + break; + case Twine::UHexKind: + OS << "uhex:\"" << static_cast(Ptr) << "\""; + break; + } +} + +void Twine::print(raw_ostream &OS) const { + printOneChild(OS, LHS, getLHSKind()); + printOneChild(OS, RHS, getRHSKind()); +} + +void Twine::printRepr(raw_ostream &OS) const { + OS << "(Twine "; + printOneChildRepr(OS, LHS, getLHSKind()); + OS << " "; + printOneChildRepr(OS, RHS, getRHSKind()); + OS << ")"; +} + +void Twine::dump() const { + print(llvm::errs()); +} + +void Twine::dumpRepr() const { + printRepr(llvm::errs()); +} diff --git a/lib/Support/raw_os_ostream.cpp b/lib/Support/raw_os_ostream.cpp new file mode 100644 index 0000000000000..3374dd7a66a0b --- /dev/null +++ b/lib/Support/raw_os_ostream.cpp @@ -0,0 +1,30 @@ +//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements support adapting raw_ostream to std::ostream. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/raw_os_ostream.h" +#include +using namespace llvm; + +//===----------------------------------------------------------------------===// +// raw_os_ostream +//===----------------------------------------------------------------------===// + +raw_os_ostream::~raw_os_ostream() { + flush(); +} + +void raw_os_ostream::write_impl(const char *Ptr, size_t Size) { + OS.write(Ptr, Size); +} + +uint64_t raw_os_ostream::current_pos() { return OS.tellp(); } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 42e6fda97baf0..0a82cc1d10c39 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -18,7 +18,11 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" -#include +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include +#include #if defined(HAVE_UNISTD_H) # include @@ -43,10 +47,59 @@ using namespace llvm; +raw_ostream::~raw_ostream() { + // raw_ostream's subclasses should take care to flush the buffer + // in their destructors. + assert(OutBufCur == OutBufStart && + "raw_ostream destructor called with non-empty buffer!"); + + if (BufferMode == InternalBuffer) + delete [] OutBufStart; + + // If there are any pending errors, report them now. Clients wishing + // to avoid llvm_report_error calls should check for errors with + // has_error() and clear the error flag with clear_error() before + // destructing raw_ostream objects which may have errors. + if (Error) + llvm_report_error("IO failure on output stream."); +} // An out of line virtual method to provide a home for the class vtable. void raw_ostream::handle() {} +size_t raw_ostream::preferred_buffer_size() { + // BUFSIZ is intended to be a reasonable default. + return BUFSIZ; +} + +void raw_ostream::SetBuffered() { + // Ask the subclass to determine an appropriate buffer size. + if (size_t Size = preferred_buffer_size()) + SetBufferSize(Size); + else + // It may return 0, meaning this stream should be unbuffered. + SetUnbuffered(); +} + +void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size, + BufferKind Mode) { + assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) || + (Mode != Unbuffered && BufferStart && Size)) && + "stream must be unbuffered or have at least one byte"); + // Make sure the current buffer is free of content (we can't flush here; the + // child buffer management logic will be in write_impl). + assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!"); + + if (BufferMode == InternalBuffer) + delete [] OutBufStart; + OutBufStart = BufferStart; + OutBufEnd = OutBufStart+Size; + OutBufCur = OutBufStart; + BufferMode = Mode; + + assert(OutBufStart <= OutBufEnd && "Invalid size!"); +} + raw_ostream &raw_ostream::operator<<(unsigned long N) { // Zero is a special case. if (N == 0) @@ -73,10 +126,10 @@ raw_ostream &raw_ostream::operator<<(long N) { } raw_ostream &raw_ostream::operator<<(unsigned long long N) { - // Zero is a special case. - if (N == 0) - return *this << '0'; - + // Output using 32-bit div/mod when possible. + if (N == static_cast(N)) + return this->operator<<(static_cast(N)); + char NumberBuffer[20]; char *EndPtr = NumberBuffer+sizeof(NumberBuffer); char *CurPtr = EndPtr; @@ -97,10 +150,7 @@ raw_ostream &raw_ostream::operator<<(long long N) { return this->operator<<(static_cast(N)); } -raw_ostream &raw_ostream::operator<<(const void *P) { - uintptr_t N = (uintptr_t) P; - *this << '0' << 'x'; - +raw_ostream &raw_ostream::write_hex(unsigned long long N) { // Zero is a special case. if (N == 0) return *this << '0'; @@ -110,7 +160,7 @@ raw_ostream &raw_ostream::operator<<(const void *P) { char *CurPtr = EndPtr; while (N) { - unsigned x = N % 16; + uintptr_t x = N % 16; *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10); N /= 16; } @@ -118,44 +168,78 @@ raw_ostream &raw_ostream::operator<<(const void *P) { return write(CurPtr, EndPtr-CurPtr); } +raw_ostream &raw_ostream::operator<<(const void *P) { + *this << '0' << 'x'; + + return write_hex((uintptr_t) P); +} + +raw_ostream &raw_ostream::operator<<(double N) { + this->operator<<(ftostr(N)); + return *this; +} + + + void raw_ostream::flush_nonempty() { assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty."); - write_impl(OutBufStart, OutBufCur - OutBufStart); - OutBufCur = OutBufStart; + size_t Length = OutBufCur - OutBufStart; + OutBufCur = OutBufStart; + write_impl(OutBufStart, Length); } raw_ostream &raw_ostream::write(unsigned char C) { // Group exceptional cases into a single branch. - if (OutBufCur >= OutBufEnd) { - if (Unbuffered) { - write_impl(reinterpret_cast(&C), 1); - return *this; + if (BUILTIN_EXPECT(OutBufCur >= OutBufEnd, false)) { + if (BUILTIN_EXPECT(!OutBufStart, false)) { + if (BufferMode == Unbuffered) { + write_impl(reinterpret_cast(&C), 1); + return *this; + } + // Set up a buffer and start over. + SetBuffered(); + return write(C); } - - if (!OutBufStart) - SetBufferSize(); - else - flush_nonempty(); + + flush_nonempty(); } *OutBufCur++ = C; return *this; } -raw_ostream &raw_ostream::write(const char *Ptr, unsigned Size) { +raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { // Group exceptional cases into a single branch. if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) { - if (Unbuffered) { - write_impl(Ptr, Size); - return *this; + if (BUILTIN_EXPECT(!OutBufStart, false)) { + if (BufferMode == Unbuffered) { + write_impl(Ptr, Size); + return *this; + } + // Set up a buffer and start over. + SetBuffered(); + return write(Ptr, Size); } - - if (!OutBufStart) - SetBufferSize(); - else + + // Write out the data in buffer-sized blocks until the remainder + // fits within the buffer. + do { + size_t NumBytes = OutBufEnd - OutBufCur; + copy_to_buffer(Ptr, NumBytes); flush_nonempty(); + Ptr += NumBytes; + Size -= NumBytes; + } while (OutBufCur+Size > OutBufEnd); } - + + copy_to_buffer(Ptr, Size); + + return *this; +} + +void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) { + assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!"); + // Handle short strings specially, memcpy isn't very good at very short // strings. switch (Size) { @@ -165,40 +249,24 @@ raw_ostream &raw_ostream::write(const char *Ptr, unsigned Size) { case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH case 0: break; default: - // Normally the string to emit is shorter than the buffer. - if (Size <= unsigned(OutBufEnd-OutBufStart)) { - memcpy(OutBufCur, Ptr, Size); - break; - } - - // Otherwise we are emitting a string larger than our buffer. We - // know we already flushed, so just write it out directly. - write_impl(Ptr, Size); - Size = 0; + memcpy(OutBufCur, Ptr, Size); break; } - OutBufCur += Size; - return *this; + OutBufCur += Size; } // Formatted output. raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { // If we have more than a few bytes left in our output buffer, try // formatting directly onto its end. - // - // FIXME: This test is a bit silly, since if we don't have enough - // space in the buffer we will have to flush the formatted output - // anyway. We should just flush upfront in such cases, and use the - // whole buffer as our scratch pad. Note, however, that this case is - // also necessary for correctness on unbuffered streams. - unsigned NextBufferSize = 127; - if (OutBufEnd-OutBufCur > 3) { - unsigned BufferBytesLeft = OutBufEnd-OutBufCur; - unsigned BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft); + size_t NextBufferSize = 127; + size_t BufferBytesLeft = OutBufEnd - OutBufCur; + if (BufferBytesLeft > 3) { + size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft); // Common case is that we have plenty of space. - if (BytesUsed < BufferBytesLeft) { + if (BytesUsed <= BufferBytesLeft) { OutBufCur += BytesUsed; return *this; } @@ -217,11 +285,11 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { V.resize(NextBufferSize); // Try formatting into the SmallVector. - unsigned BytesUsed = Fmt.print(&V[0], NextBufferSize); + size_t BytesUsed = Fmt.print(V.data(), NextBufferSize); // If BytesUsed fit into the vector, we win. if (BytesUsed <= NextBufferSize) - return write(&V[0], BytesUsed); + return write(V.data(), BytesUsed); // Otherwise, try again with a new size. assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?"); @@ -229,6 +297,26 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) { } } +/// indent - Insert 'NumSpaces' spaces. +raw_ostream &raw_ostream::indent(unsigned NumSpaces) { + static const char Spaces[] = " " + " " + " "; + + // Usually the indentation is small, handle it with a fastpath. + if (NumSpaces < array_lengthof(Spaces)) + return write(Spaces, NumSpaces); + + while (NumSpaces) { + unsigned NumToWrite = std::min(NumSpaces, + (unsigned)array_lengthof(Spaces)-1); + write(Spaces, NumToWrite); + NumSpaces -= NumToWrite; + } + return *this; +} + + //===----------------------------------------------------------------------===// // Formatted Output //===----------------------------------------------------------------------===// @@ -245,8 +333,12 @@ void format_object_base::home() { /// occurs, information about the error is put into ErrorInfo, and the /// stream should be immediately destroyed; the string will be empty /// if no error occurred. -raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary, - std::string &ErrorInfo) : pos(0) { +raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, + unsigned Flags) : pos(0) { + // Verify that we don't have both "append" and "excl". + assert((!(Flags & F_Excl) || !(Flags & F_Append)) && + "Cannot specify both 'excl' and 'append' file creation flags!"); + ErrorInfo.clear(); // Handle "-" as stdout. @@ -254,18 +346,26 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary, FD = STDOUT_FILENO; // If user requested binary then put stdout into binary mode if // possible. - if (Binary) + if (Flags & F_Binary) sys::Program::ChangeStdoutToBinary(); ShouldClose = false; return; } - int Flags = O_WRONLY|O_CREAT|O_TRUNC; + int OpenFlags = O_WRONLY|O_CREAT; #ifdef O_BINARY - if (Binary) - Flags |= O_BINARY; + if (Flags & F_Binary) + OpenFlags |= O_BINARY; #endif - FD = open(Filename, Flags, 0644); + + if (Flags & F_Append) + OpenFlags |= O_APPEND; + else + OpenFlags |= O_TRUNC; + if (Flags & F_Excl) + OpenFlags |= O_EXCL; + + FD = open(Filename, OpenFlags, 0664); if (FD < 0) { ErrorInfo = "Error opening output file '" + std::string(Filename) + "'"; ShouldClose = false; @@ -275,33 +375,56 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, bool Binary, } raw_fd_ostream::~raw_fd_ostream() { - if (FD >= 0) { - flush(); - if (ShouldClose) - ::close(FD); - } + if (FD < 0) return; + flush(); + if (ShouldClose) + if (::close(FD) != 0) + error_detected(); } -void raw_fd_ostream::write_impl(const char *Ptr, unsigned Size) { + +void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { assert (FD >= 0 && "File already closed."); pos += Size; - ::write(FD, Ptr, Size); + if (::write(FD, Ptr, Size) != (ssize_t) Size) + error_detected(); } void raw_fd_ostream::close() { assert (ShouldClose); ShouldClose = false; flush(); - ::close(FD); + if (::close(FD) != 0) + error_detected(); FD = -1; } uint64_t raw_fd_ostream::seek(uint64_t off) { flush(); - pos = lseek(FD, off, SEEK_SET); + pos = ::lseek(FD, off, SEEK_SET); + if (pos != off) + error_detected(); return pos; } +size_t raw_fd_ostream::preferred_buffer_size() { +#if !defined(_MSC_VER) && !defined(__MINGW32__) // Windows has no st_blksize. + assert(FD >= 0 && "File not yet open!"); + struct stat statbuf; + if (fstat(FD, &statbuf) == 0) { + // If this is a terminal, don't use buffering. Line buffering + // would be a more traditional thing to do, but it's not worth + // the complexity. + if (S_ISCHR(statbuf.st_mode) && isatty(FD)) + return 0; + // Return the preferred block size. + return statbuf.st_blksize; + } + error_detected(); +#endif + return raw_ostream::preferred_buffer_size(); +} + raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, bool bg) { if (sys::Process::ColorNeedsFlush()) @@ -310,7 +433,7 @@ raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg) : sys::Process::OutputColor(colors, bold, bg); if (colorcode) { - unsigned len = strlen(colorcode); + size_t len = strlen(colorcode); write(colorcode, len); // don't account colors towards output characters pos -= len; @@ -323,7 +446,7 @@ raw_ostream &raw_fd_ostream::resetColor() { flush(); const char *colorcode = sys::Process::ResetColor(); if (colorcode) { - unsigned len = strlen(colorcode); + size_t len = strlen(colorcode); write(colorcode, len); // don't account colors towards output characters pos -= len; @@ -331,12 +454,18 @@ raw_ostream &raw_fd_ostream::resetColor() { return *this; } +bool raw_fd_ostream::is_displayed() const { + return sys::Process::FileDescriptorIsDisplayed(FD); +} + //===----------------------------------------------------------------------===// // raw_stdout/err_ostream //===----------------------------------------------------------------------===// +// Set buffer settings to model stdout and stderr behavior. +// Set standard error to be unbuffered by default. raw_stdout_ostream::raw_stdout_ostream():raw_fd_ostream(STDOUT_FILENO, false) {} -raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false, +raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false, true) {} // An out of line virtual method to provide a home for the class vtable. @@ -357,23 +486,12 @@ raw_ostream &llvm::errs() { return S; } -//===----------------------------------------------------------------------===// -// raw_os_ostream -//===----------------------------------------------------------------------===// - -raw_os_ostream::~raw_os_ostream() { - flush(); -} - -void raw_os_ostream::write_impl(const char *Ptr, unsigned Size) { - OS.write(Ptr, Size); +/// nulls() - This returns a reference to a raw_ostream which discards output. +raw_ostream &llvm::nulls() { + static raw_null_ostream S; + return S; } -uint64_t raw_os_ostream::current_pos() { return OS.tellp(); } - -uint64_t raw_os_ostream::tell() { - return (uint64_t)OS.tellp() + GetNumBytesInBuffer(); -} //===----------------------------------------------------------------------===// // raw_string_ostream @@ -383,7 +501,7 @@ raw_string_ostream::~raw_string_ostream() { flush(); } -void raw_string_ostream::write_impl(const char *Ptr, unsigned Size) { +void raw_string_ostream::write_impl(const char *Ptr, size_t Size) { OS.append(Ptr, Size); } @@ -391,16 +509,65 @@ void raw_string_ostream::write_impl(const char *Ptr, unsigned Size) { // raw_svector_ostream //===----------------------------------------------------------------------===// +// The raw_svector_ostream implementation uses the SmallVector itself as the +// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is +// always pointing past the end of the vector, but within the vector +// capacity. This allows raw_ostream to write directly into the correct place, +// and we only need to set the vector size when the data is flushed. + +raw_svector_ostream::raw_svector_ostream(SmallVectorImpl &O) : OS(O) { + // Set up the initial external buffer. We make sure that the buffer has at + // least 128 bytes free; raw_ostream itself only requires 64, but we want to + // make sure that we don't grow the buffer unnecessarily on destruction (when + // the data is flushed). See the FIXME below. + OS.reserve(OS.size() + 128); + SetBuffer(OS.end(), OS.capacity() - OS.size()); +} + raw_svector_ostream::~raw_svector_ostream() { + // FIXME: Prevent resizing during this flush(). flush(); } -void raw_svector_ostream::write_impl(const char *Ptr, unsigned Size) { - OS.append(Ptr, Ptr + Size); +void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) { + assert(Ptr == OS.end() && OS.size() + Size <= OS.capacity() && + "Invalid write_impl() call!"); + + // We don't need to copy the bytes, just commit the bytes to the + // SmallVector. + OS.set_size(OS.size() + Size); + + // Grow the vector if necessary. + if (OS.capacity() - OS.size() < 64) + OS.reserve(OS.capacity() * 2); + + // Update the buffer position. + SetBuffer(OS.end(), OS.capacity() - OS.size()); } uint64_t raw_svector_ostream::current_pos() { return OS.size(); } -uint64_t raw_svector_ostream::tell() { - return OS.size() + GetNumBytesInBuffer(); +StringRef raw_svector_ostream::str() { + flush(); + return StringRef(OS.begin(), OS.size()); +} + +//===----------------------------------------------------------------------===// +// raw_null_ostream +//===----------------------------------------------------------------------===// + +raw_null_ostream::~raw_null_ostream() { +#ifndef NDEBUG + // ~raw_ostream asserts that the buffer is empty. This isn't necessary + // with raw_null_ostream, but it's better to have raw_null_ostream follow + // the rules than to change the rules just for raw_null_ostream. + flush(); +#endif +} + +void raw_null_ostream::write_impl(const char *Ptr, size_t Size) { +} + +uint64_t raw_null_ostream::current_pos() { + return 0; } diff --git a/lib/Support/regcclass.h b/lib/Support/regcclass.h new file mode 100644 index 0000000000000..2cea3e4e54068 --- /dev/null +++ b/lib/Support/regcclass.h @@ -0,0 +1,70 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cclass.h 8.3 (Berkeley) 3/20/94 + */ + +/* character-class table */ +static struct cclass { + const char *name; + const char *chars; + const char *multis; +} cclasses[] = { + { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789", ""} , + { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + ""} , + { "blank", " \t", ""} , + { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", ""} , + { "digit", "0123456789", ""} , + { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ""} , + { "lower", "abcdefghijklmnopqrstuvwxyz", + ""} , + { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", + ""} , + { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ""} , + { "space", "\t\n\v\f\r ", ""} , + { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + ""} , + { "xdigit", "0123456789ABCDEFabcdef", + ""} , + { NULL, 0, "" } +}; diff --git a/lib/Support/regcname.h b/lib/Support/regcname.h new file mode 100644 index 0000000000000..3c0bb248ffa78 --- /dev/null +++ b/lib/Support/regcname.h @@ -0,0 +1,139 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cname.h 8.3 (Berkeley) 3/20/94 + */ + +/* character-name table */ +static struct cname { + const char *name; + char code; +} cnames[] = { + { "NUL", '\0' }, + { "SOH", '\001' }, + { "STX", '\002' }, + { "ETX", '\003' }, + { "EOT", '\004' }, + { "ENQ", '\005' }, + { "ACK", '\006' }, + { "BEL", '\007' }, + { "alert", '\007' }, + { "BS", '\010' }, + { "backspace", '\b' }, + { "HT", '\011' }, + { "tab", '\t' }, + { "LF", '\012' }, + { "newline", '\n' }, + { "VT", '\013' }, + { "vertical-tab", '\v' }, + { "FF", '\014' }, + { "form-feed", '\f' }, + { "CR", '\015' }, + { "carriage-return", '\r' }, + { "SO", '\016' }, + { "SI", '\017' }, + { "DLE", '\020' }, + { "DC1", '\021' }, + { "DC2", '\022' }, + { "DC3", '\023' }, + { "DC4", '\024' }, + { "NAK", '\025' }, + { "SYN", '\026' }, + { "ETB", '\027' }, + { "CAN", '\030' }, + { "EM", '\031' }, + { "SUB", '\032' }, + { "ESC", '\033' }, + { "IS4", '\034' }, + { "FS", '\034' }, + { "IS3", '\035' }, + { "GS", '\035' }, + { "IS2", '\036' }, + { "RS", '\036' }, + { "IS1", '\037' }, + { "US", '\037' }, + { "space", ' ' }, + { "exclamation-mark", '!' }, + { "quotation-mark", '"' }, + { "number-sign", '#' }, + { "dollar-sign", '$' }, + { "percent-sign", '%' }, + { "ampersand", '&' }, + { "apostrophe", '\'' }, + { "left-parenthesis", '(' }, + { "right-parenthesis", ')' }, + { "asterisk", '*' }, + { "plus-sign", '+' }, + { "comma", ',' }, + { "hyphen", '-' }, + { "hyphen-minus", '-' }, + { "period", '.' }, + { "full-stop", '.' }, + { "slash", '/' }, + { "solidus", '/' }, + { "zero", '0' }, + { "one", '1' }, + { "two", '2' }, + { "three", '3' }, + { "four", '4' }, + { "five", '5' }, + { "six", '6' }, + { "seven", '7' }, + { "eight", '8' }, + { "nine", '9' }, + { "colon", ':' }, + { "semicolon", ';' }, + { "less-than-sign", '<' }, + { "equals-sign", '=' }, + { "greater-than-sign", '>' }, + { "question-mark", '?' }, + { "commercial-at", '@' }, + { "left-square-bracket", '[' }, + { "backslash", '\\' }, + { "reverse-solidus", '\\' }, + { "right-square-bracket", ']' }, + { "circumflex", '^' }, + { "circumflex-accent", '^' }, + { "underscore", '_' }, + { "low-line", '_' }, + { "grave-accent", '`' }, + { "left-brace", '{' }, + { "left-curly-bracket", '{' }, + { "vertical-line", '|' }, + { "right-brace", '}' }, + { "right-curly-bracket", '}' }, + { "tilde", '~' }, + { "DEL", '\177' }, + { NULL, 0 } +}; diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c new file mode 100644 index 0000000000000..cd018d5dc5bc4 --- /dev/null +++ b/lib/Support/regcomp.c @@ -0,0 +1,1525 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 + */ + +#include +#include +#include +#include +#include +#include +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +#include "regcclass.h" +#include "regcname.h" + +/* + * parse structure, passed up and down to avoid global variables and + * other clumsinesses + */ +struct parse { + char *next; /* next character in RE */ + char *end; /* end of string (-> NUL normally) */ + int error; /* has an error been seen? */ + sop *strip; /* malloced strip */ + sopno ssize; /* malloced strip size (allocated) */ + sopno slen; /* malloced strip length (used) */ + int ncsalloc; /* number of csets allocated */ + struct re_guts *g; +# define NPAREN 10 /* we need to remember () 1-9 for back refs */ + sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ + sopno pend[NPAREN]; /* -> ) ([0] unused) */ +}; + +static void p_ere(struct parse *, int); +static void p_ere_exp(struct parse *); +static void p_str(struct parse *); +static void p_bre(struct parse *, int, int); +static int p_simp_re(struct parse *, int); +static int p_count(struct parse *); +static void p_bracket(struct parse *); +static void p_b_term(struct parse *, cset *); +static void p_b_cclass(struct parse *, cset *); +static void p_b_eclass(struct parse *, cset *); +static char p_b_symbol(struct parse *); +static char p_b_coll_elem(struct parse *, int); +static char othercase(int); +static void bothcases(struct parse *, int); +static void ordinary(struct parse *, int); +static void nonnewline(struct parse *); +static void repeat(struct parse *, sopno, int, int); +static int seterr(struct parse *, int); +static cset *allocset(struct parse *); +static void freeset(struct parse *, cset *); +static int freezeset(struct parse *, cset *); +static int firstch(struct parse *, cset *); +static int nch(struct parse *, cset *); +static void mcadd(struct parse *, cset *, const char *); +static void mcinvert(struct parse *, cset *); +static void mccase(struct parse *, cset *); +static int isinsets(struct re_guts *, int); +static int samesets(struct re_guts *, int, int); +static void categorize(struct parse *, struct re_guts *); +static sopno dupl(struct parse *, sopno, sopno); +static void doemit(struct parse *, sop, size_t); +static void doinsert(struct parse *, sop, size_t, sopno); +static void dofwd(struct parse *, sopno, sop); +static void enlarge(struct parse *, sopno); +static void stripsnug(struct parse *, struct re_guts *); +static void findmust(struct parse *, struct re_guts *); +static sopno pluscount(struct parse *, struct re_guts *); + +static char nuls[10]; /* place to point scanner in event of error */ + +/* + * macros for use with parse structure + * BEWARE: these know that the parse structure is named `p' !!! + */ +#define PEEK() (*p->next) +#define PEEK2() (*(p->next+1)) +#define MORE() (p->next < p->end) +#define MORE2() (p->next+1 < p->end) +#define SEE(c) (MORE() && PEEK() == (c)) +#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) +#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) +#define NEXT() (p->next++) +#define NEXT2() (p->next += 2) +#define NEXTn(n) (p->next += (n)) +#define GETNEXT() (*p->next++) +#define SETERROR(e) seterr(p, (e)) +#define REQUIRE(co, e) (void)((co) || SETERROR(e)) +#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) +#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) +#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) +#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) +#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) +#define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) +#define ASTERN(sop, pos) EMIT(sop, HERE()-pos) +#define HERE() (p->slen) +#define THERE() (p->slen - 1) +#define THERETHERE() (p->slen - 2) +#define DROP(n) (p->slen -= (n)) + +#ifdef _POSIX2_RE_DUP_MAX +#define DUPMAX _POSIX2_RE_DUP_MAX +#else +#define DUPMAX 255 +#endif +#define INFINITY (DUPMAX + 1) + +#ifndef NDEBUG +static int never = 0; /* for use in asserts; shuts lint up */ +#else +#define never 0 /* some s have bugs too */ +#endif + +/* + - llvm_regcomp - interface for parser and compilation + */ +int /* 0 success, otherwise REG_something */ +llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags) +{ + struct parse pa; + struct re_guts *g; + struct parse *p = &pa; + int i; + size_t len; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&~REG_DUMP) +#endif + + cflags = GOODFLAGS(cflags); + if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) + return(REG_INVARG); + + if (cflags®_PEND) { + if (preg->re_endp < pattern) + return(REG_INVARG); + len = preg->re_endp - pattern; + } else + len = strlen((const char *)pattern); + + /* do the mallocs early so failure handling is easy */ + g = (struct re_guts *)malloc(sizeof(struct re_guts) + + (NC-1)*sizeof(cat_t)); + if (g == NULL) + return(REG_ESPACE); + p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ + p->strip = (sop *)calloc(p->ssize, sizeof(sop)); + p->slen = 0; + if (p->strip == NULL) { + free((char *)g); + return(REG_ESPACE); + } + + /* set things up */ + p->g = g; + p->next = (char *)pattern; /* convenience; we do not modify it */ + p->end = p->next + len; + p->error = 0; + p->ncsalloc = 0; + for (i = 0; i < NPAREN; i++) { + p->pbegin[i] = 0; + p->pend[i] = 0; + } + g->csetsize = NC; + g->sets = NULL; + g->setbits = NULL; + g->ncsets = 0; + g->cflags = cflags; + g->iflags = 0; + g->nbol = 0; + g->neol = 0; + g->must = NULL; + g->mlen = 0; + g->nsub = 0; + g->ncategories = 1; /* category 0 is "everything else" */ + g->categories = &g->catspace[-(CHAR_MIN)]; + (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); + g->backrefs = 0; + + /* do it */ + EMIT(OEND, 0); + g->firststate = THERE(); + if (cflags®_EXTENDED) + p_ere(p, OUT); + else if (cflags®_NOSPEC) + p_str(p); + else + p_bre(p, OUT, OUT); + EMIT(OEND, 0); + g->laststate = THERE(); + + /* tidy up loose ends and fill things in */ + categorize(p, g); + stripsnug(p, g); + findmust(p, g); + g->nplus = pluscount(p, g); + g->magic = MAGIC2; + preg->re_nsub = g->nsub; + preg->re_g = g; + preg->re_magic = MAGIC1; +#ifndef REDEBUG + /* not debugging, so can't rely on the assert() in llvm_regexec() */ + if (g->iflags®EX_BAD) + SETERROR(REG_ASSERT); +#endif + + /* win or lose, we're done */ + if (p->error != 0) /* lose */ + llvm_regfree(preg); + return(p->error); +} + +/* + - p_ere - ERE parser top level, concatenation and alternation + */ +static void +p_ere(struct parse *p, int stop) /* character this ERE should end at */ +{ + char c; + sopno prevback = 0; + sopno prevfwd = 0; + sopno conc; + int first = 1; /* is this the first alternative? */ + + for (;;) { + /* do a bunch of concatenated expressions */ + conc = HERE(); + while (MORE() && (c = PEEK()) != '|' && c != stop) + p_ere_exp(p); + REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ + + if (!EAT('|')) + break; /* NOTE BREAK OUT */ + + if (first) { + INSERT(OCH_, conc); /* offset is wrong */ + prevfwd = conc; + prevback = conc; + first = 0; + } + ASTERN(OOR1, prevback); + prevback = THERE(); + AHEAD(prevfwd); /* fix previous offset */ + prevfwd = HERE(); + EMIT(OOR2, 0); /* offset is very wrong */ + } + + if (!first) { /* tail-end fixups */ + AHEAD(prevfwd); + ASTERN(O_CH, prevback); + } + + assert(!MORE() || SEE(stop)); +} + +/* + - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op + */ +static void +p_ere_exp(struct parse *p) +{ + char c; + sopno pos; + int count; + int count2; + sopno subno; + int wascaret = 0; + + assert(MORE()); /* caller should have ensured this */ + c = GETNEXT(); + + pos = HERE(); + switch (c) { + case '(': + REQUIRE(MORE(), REG_EPAREN); + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + if (!SEE(')')) + p_ere(p, ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + MUSTEAT(')', REG_EPAREN); + break; +#ifndef POSIX_MISTAKE + case ')': /* happens only if no current unmatched ( */ + /* + * You may ask, why the ifndef? Because I didn't notice + * this until slightly too late for 1003.2, and none of the + * other 1003.2 regular-expression reviewers noticed it at + * all. So an unmatched ) is legal POSIX, at least until + * we can get it fixed. + */ + SETERROR(REG_EPAREN); + break; +#endif + case '^': + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + wascaret = 1; + break; + case '$': + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + break; + case '|': + SETERROR(REG_EMPTY); + break; + case '*': + case '+': + case '?': + SETERROR(REG_BADRPT); + break; + case '.': + if (p->g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case '\\': + REQUIRE(MORE(), REG_EESCAPE); + c = GETNEXT(); + ordinary(p, c); + break; + case '{': /* okay as ordinary except if digit follows */ + REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); + /* FALLTHROUGH */ + default: + ordinary(p, c); + break; + } + + if (!MORE()) + return; + c = PEEK(); + /* we call { a repetition if followed by a digit */ + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) + return; /* no repetition, we're done */ + NEXT(); + + REQUIRE(!wascaret, REG_BADRPT); + switch (c) { + case '*': /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + break; + case '+': + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + break; + case '?': + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, pos); /* offset slightly wrong */ + ASTERN(OOR1, pos); /* this one's right */ + AHEAD(pos); /* fix the OCH_ */ + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + break; + case '{': + count = p_count(p); + if (EAT(',')) { + if (isdigit((uch)PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EAT('}')) { /* error heuristics */ + while (MORE() && PEEK() != '}') + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + break; + } + + if (!MORE()) + return; + c = PEEK(); + if (!( c == '*' || c == '+' || c == '?' || + (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) + return; + SETERROR(REG_BADRPT); +} + +/* + - p_str - string (no metacharacters) "parser" + */ +static void +p_str(struct parse *p) +{ + REQUIRE(MORE(), REG_EMPTY); + while (MORE()) + ordinary(p, GETNEXT()); +} + +/* + - p_bre - BRE parser top level, anchoring and concatenation + * Giving end1 as OUT essentially eliminates the end1/end2 check. + * + * This implementation is a bit of a kludge, in that a trailing $ is first + * taken as an ordinary character and then revised to be an anchor. The + * only undesirable side effect is that '$' gets included as a character + * category in such cases. This is fairly harmless; not worth fixing. + * The amount of lookahead needed to avoid this kludge is excessive. + */ +static void +p_bre(struct parse *p, + int end1, /* first terminating character */ + int end2) /* second terminating character */ +{ + sopno start = HERE(); + int first = 1; /* first subexpression? */ + int wasdollar = 0; + + if (EAT('^')) { + EMIT(OBOL, 0); + p->g->iflags |= USEBOL; + p->g->nbol++; + } + while (MORE() && !SEETWO(end1, end2)) { + wasdollar = p_simp_re(p, first); + first = 0; + } + if (wasdollar) { /* oops, that was a trailing anchor */ + DROP(1); + EMIT(OEOL, 0); + p->g->iflags |= USEEOL; + p->g->neol++; + } + + REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ +} + +/* + - p_simp_re - parse a simple RE, an atom possibly followed by a repetition + */ +static int /* was the simple RE an unbackslashed $? */ +p_simp_re(struct parse *p, + int starordinary) /* is a leading * an ordinary character? */ +{ + int c; + int count; + int count2; + sopno pos; + int i; + sopno subno; +# define BACKSL (1<g->cflags®_NEWLINE) + nonnewline(p); + else + EMIT(OANY, 0); + break; + case '[': + p_bracket(p); + break; + case BACKSL|'{': + SETERROR(REG_BADRPT); + break; + case BACKSL|'(': + p->g->nsub++; + subno = p->g->nsub; + if (subno < NPAREN) + p->pbegin[subno] = HERE(); + EMIT(OLPAREN, subno); + /* the MORE here is an error heuristic */ + if (MORE() && !SEETWO('\\', ')')) + p_bre(p, '\\', ')'); + if (subno < NPAREN) { + p->pend[subno] = HERE(); + assert(p->pend[subno] != 0); + } + EMIT(ORPAREN, subno); + REQUIRE(EATTWO('\\', ')'), REG_EPAREN); + break; + case BACKSL|')': /* should not get here -- must be user */ + case BACKSL|'}': + SETERROR(REG_EPAREN); + break; + case BACKSL|'1': + case BACKSL|'2': + case BACKSL|'3': + case BACKSL|'4': + case BACKSL|'5': + case BACKSL|'6': + case BACKSL|'7': + case BACKSL|'8': + case BACKSL|'9': + i = (c&~BACKSL) - '0'; + assert(i < NPAREN); + if (p->pend[i] != 0) { + assert(i <= p->g->nsub); + EMIT(OBACK_, i); + assert(p->pbegin[i] != 0); + assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); + assert(OP(p->strip[p->pend[i]]) == ORPAREN); + (void) dupl(p, p->pbegin[i]+1, p->pend[i]); + EMIT(O_BACK, i); + } else + SETERROR(REG_ESUBREG); + p->g->backrefs = 1; + break; + case '*': + REQUIRE(starordinary, REG_BADRPT); + /* FALLTHROUGH */ + default: + ordinary(p, (char)c); + break; + } + + if (EAT('*')) { /* implemented as +? */ + /* this case does not require the (y|) trick, noKLUDGE */ + INSERT(OPLUS_, pos); + ASTERN(O_PLUS, pos); + INSERT(OQUEST_, pos); + ASTERN(O_QUEST, pos); + } else if (EATTWO('\\', '{')) { + count = p_count(p); + if (EAT(',')) { + if (MORE() && isdigit((uch)PEEK())) { + count2 = p_count(p); + REQUIRE(count <= count2, REG_BADBR); + } else /* single number with comma */ + count2 = INFINITY; + } else /* just a single number */ + count2 = count; + repeat(p, pos, count, count2); + if (!EATTWO('\\', '}')) { /* error heuristics */ + while (MORE() && !SEETWO('\\', '}')) + NEXT(); + REQUIRE(MORE(), REG_EBRACE); + SETERROR(REG_BADBR); + } + } else if (c == '$') /* $ (but not \$) ends it */ + return(1); + + return(0); +} + +/* + - p_count - parse a repetition count + */ +static int /* the value */ +p_count(struct parse *p) +{ + int count = 0; + int ndigits = 0; + + while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { + count = count*10 + (GETNEXT() - '0'); + ndigits++; + } + + REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); + return(count); +} + +/* + - p_bracket - parse a bracketed character list + * + * Note a significant property of this code: if the allocset() did SETERROR, + * no set operations are done. + */ +static void +p_bracket(struct parse *p) +{ + cset *cs; + int invert = 0; + + /* Dept of Truly Sickening Special-Case Kludges */ + if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } + + if ((cs = allocset(p)) == NULL) { + /* allocset did set error status in p */ + return; + } + + if (EAT('^')) + invert++; /* make note to invert set at end */ + if (EAT(']')) + CHadd(cs, ']'); + else if (EAT('-')) + CHadd(cs, '-'); + while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) + p_b_term(p, cs); + if (EAT('-')) + CHadd(cs, '-'); + MUSTEAT(']', REG_EBRACK); + + if (p->error != 0) { /* don't mess things up further */ + freeset(p, cs); + return; + } + + if (p->g->cflags®_ICASE) { + int i; + int ci; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i) && isalpha(i)) { + ci = othercase(i); + if (ci != i) + CHadd(cs, ci); + } + if (cs->multis != NULL) + mccase(p, cs); + } + if (invert) { + int i; + + for (i = p->g->csetsize - 1; i >= 0; i--) + if (CHIN(cs, i)) + CHsub(cs, i); + else + CHadd(cs, i); + if (p->g->cflags®_NEWLINE) + CHsub(cs, '\n'); + if (cs->multis != NULL) + mcinvert(p, cs); + } + + assert(cs->multis == NULL); /* xxx */ + + if (nch(p, cs) == 1) { /* optimize singleton sets */ + ordinary(p, firstch(p, cs)); + freeset(p, cs); + } else + EMIT(OANYOF, freezeset(p, cs)); +} + +/* + - p_b_term - parse one term of a bracketed character list + */ +static void +p_b_term(struct parse *p, cset *cs) +{ + char c; + char start, finish; + int i; + + /* classify what we've got */ + switch ((MORE()) ? PEEK() : '\0') { + case '[': + c = (MORE2()) ? PEEK2() : '\0'; + break; + case '-': + SETERROR(REG_ERANGE); + return; /* NOTE RETURN */ + break; + default: + c = '\0'; + break; + } + + switch (c) { + case ':': /* character class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECTYPE); + p_b_cclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO(':', ']'), REG_ECTYPE); + break; + case '=': /* equivalence class */ + NEXT2(); + REQUIRE(MORE(), REG_EBRACK); + c = PEEK(); + REQUIRE(c != '-' && c != ']', REG_ECOLLATE); + p_b_eclass(p, cs); + REQUIRE(MORE(), REG_EBRACK); + REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); + break; + default: /* symbol, ordinary character, or range */ +/* xxx revision needed for multichar stuff */ + start = p_b_symbol(p); + if (SEE('-') && MORE2() && PEEK2() != ']') { + /* range */ + NEXT(); + if (EAT('-')) + finish = '-'; + else + finish = p_b_symbol(p); + } else + finish = start; +/* xxx what about signed chars here... */ + REQUIRE(start <= finish, REG_ERANGE); + for (i = start; i <= finish; i++) + CHadd(cs, i); + break; + } +} + +/* + - p_b_cclass - parse a character-class name and deal with it + */ +static void +p_b_cclass(struct parse *p, cset *cs) +{ + char *sp = p->next; + struct cclass *cp; + size_t len; + const char *u; + char c; + + while (MORE() && isalpha(PEEK())) + NEXT(); + len = p->next - sp; + for (cp = cclasses; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + break; + if (cp->name == NULL) { + /* oops, didn't find it */ + SETERROR(REG_ECTYPE); + return; + } + + u = cp->chars; + while ((c = *u++) != '\0') + CHadd(cs, c); + for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) + MCadd(p, cs, u); +} + +/* + - p_b_eclass - parse an equivalence-class name and deal with it + * + * This implementation is incomplete. xxx + */ +static void +p_b_eclass(struct parse *p, cset *cs) +{ + char c; + + c = p_b_coll_elem(p, '='); + CHadd(cs, c); +} + +/* + - p_b_symbol - parse a character or [..]ed multicharacter collating symbol + */ +static char /* value of symbol */ +p_b_symbol(struct parse *p) +{ + char value; + + REQUIRE(MORE(), REG_EBRACK); + if (!EATTWO('[', '.')) + return(GETNEXT()); + + /* collating symbol */ + value = p_b_coll_elem(p, '.'); + REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); + return(value); +} + +/* + - p_b_coll_elem - parse a collating-element name and look it up + */ +static char /* value of collating element */ +p_b_coll_elem(struct parse *p, + int endc) /* name ended by endc,']' */ +{ + char *sp = p->next; + struct cname *cp; + int len; + + while (MORE() && !SEETWO(endc, ']')) + NEXT(); + if (!MORE()) { + SETERROR(REG_EBRACK); + return(0); + } + len = p->next - sp; + for (cp = cnames; cp->name != NULL; cp++) + if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') + return(cp->code); /* known name */ + if (len == 1) + return(*sp); /* single character */ + SETERROR(REG_ECOLLATE); /* neither */ + return(0); +} + +/* + - othercase - return the case counterpart of an alphabetic + */ +static char /* if no counterpart, return ch */ +othercase(int ch) +{ + ch = (uch)ch; + assert(isalpha(ch)); + if (isupper(ch)) + return ((uch)tolower(ch)); + else if (islower(ch)) + return ((uch)toupper(ch)); + else /* peculiar, but could happen */ + return(ch); +} + +/* + - bothcases - emit a dualcase version of a two-case character + * + * Boy, is this implementation ever a kludge... + */ +static void +bothcases(struct parse *p, int ch) +{ + char *oldnext = p->next; + char *oldend = p->end; + char bracket[3]; + + ch = (uch)ch; + assert(othercase(ch) != ch); /* p_bracket() would recurse */ + p->next = bracket; + p->end = bracket+2; + bracket[0] = ch; + bracket[1] = ']'; + bracket[2] = '\0'; + p_bracket(p); + assert(p->next == bracket+2); + p->next = oldnext; + p->end = oldend; +} + +/* + - ordinary - emit an ordinary character + */ +static void +ordinary(struct parse *p, int ch) +{ + cat_t *cap = p->g->categories; + + if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) + bothcases(p, ch); + else { + EMIT(OCHAR, (uch)ch); + if (cap[ch] == 0) + cap[ch] = p->g->ncategories++; + } +} + +/* + - nonnewline - emit REG_NEWLINE version of OANY + * + * Boy, is this implementation ever a kludge... + */ +static void +nonnewline(struct parse *p) +{ + char *oldnext = p->next; + char *oldend = p->end; + char bracket[4]; + + p->next = bracket; + p->end = bracket+3; + bracket[0] = '^'; + bracket[1] = '\n'; + bracket[2] = ']'; + bracket[3] = '\0'; + p_bracket(p); + assert(p->next == bracket+3); + p->next = oldnext; + p->end = oldend; +} + +/* + - repeat - generate code for a bounded repetition, recursively if needed + */ +static void +repeat(struct parse *p, + sopno start, /* operand from here to end of strip */ + int from, /* repeated from this number */ + int to) /* to this number of times (maybe INFINITY) */ +{ + sopno finish = HERE(); +# define N 2 +# define INF 3 +# define REP(f, t) ((f)*8 + (t)) +# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) + sopno copy; + + if (p->error != 0) /* head off possible runaway recursion */ + return; + + assert(from <= to); + + switch (REP(MAP(from), MAP(to))) { + case REP(0, 0): /* must be user doing this */ + DROP(finish-start); /* drop the operand */ + break; + case REP(0, 1): /* as x{1,1}? */ + case REP(0, N): /* as x{1,n}? */ + case REP(0, INF): /* as x{1,}? */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); /* offset is wrong... */ + repeat(p, start+1, 1, to); + ASTERN(OOR1, start); + AHEAD(start); /* ... fix it */ + EMIT(OOR2, 0); + AHEAD(THERE()); + ASTERN(O_CH, THERETHERE()); + break; + case REP(1, 1): /* trivial case */ + /* done */ + break; + case REP(1, N): /* as x?x{1,n-1} */ + /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ + INSERT(OCH_, start); + ASTERN(OOR1, start); + AHEAD(start); + EMIT(OOR2, 0); /* offset very wrong... */ + AHEAD(THERE()); /* ...so fix it */ + ASTERN(O_CH, THERETHERE()); + copy = dupl(p, start+1, finish+1); + assert(copy == finish+4); + repeat(p, copy, 1, to-1); + break; + case REP(1, INF): /* as x+ */ + INSERT(OPLUS_, start); + ASTERN(O_PLUS, start); + break; + case REP(N, N): /* as xx{m-1,n-1} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to-1); + break; + case REP(N, INF): /* as xx{n-1,INF} */ + copy = dupl(p, start, finish); + repeat(p, copy, from-1, to); + break; + default: /* "can't happen" */ + SETERROR(REG_ASSERT); /* just in case */ + break; + } +} + +/* + - seterr - set an error condition + */ +static int /* useless but makes type checking happy */ +seterr(struct parse *p, int e) +{ + if (p->error == 0) /* keep earliest error condition */ + p->error = e; + p->next = nuls; /* try to bring things to a halt */ + p->end = nuls; + return(0); /* make the return value well-defined */ +} + +/* + - allocset - allocate a set of characters for [] + */ +static cset * +allocset(struct parse *p) +{ + int no = p->g->ncsets++; + size_t nc; + size_t nbytes; + cset *cs; + size_t css = (size_t)p->g->csetsize; + int i; + + if (no >= p->ncsalloc) { /* need another column of space */ + void *ptr; + + p->ncsalloc += CHAR_BIT; + nc = p->ncsalloc; + assert(nc % CHAR_BIT == 0); + nbytes = nc / CHAR_BIT * css; + + ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); + if (ptr == NULL) + goto nomem; + p->g->sets = ptr; + + ptr = (uch *)realloc((char *)p->g->setbits, nbytes); + if (ptr == NULL) + goto nomem; + p->g->setbits = ptr; + + for (i = 0; i < no; i++) + p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); + + (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); + } + /* XXX should not happen */ + if (p->g->sets == NULL || p->g->setbits == NULL) + goto nomem; + + cs = &p->g->sets[no]; + cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); + cs->mask = 1 << ((no) % CHAR_BIT); + cs->hash = 0; + cs->smultis = 0; + cs->multis = NULL; + + return(cs); +nomem: + free(p->g->sets); + p->g->sets = NULL; + free(p->g->setbits); + p->g->setbits = NULL; + + SETERROR(REG_ESPACE); + /* caller's responsibility not to do set ops */ + return(NULL); +} + +/* + - freeset - free a now-unused set + */ +static void +freeset(struct parse *p, cset *cs) +{ + size_t i; + cset *top = &p->g->sets[p->g->ncsets]; + size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + CHsub(cs, i); + if (cs == top-1) /* recover only the easy case */ + p->g->ncsets--; +} + +/* + - freezeset - final processing on a set of characters + * + * The main task here is merging identical sets. This is usually a waste + * of time (although the hash code minimizes the overhead), but can win + * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash + * is done using addition rather than xor -- all ASCII [aA] sets xor to + * the same value! + */ +static int /* set number */ +freezeset(struct parse *p, cset *cs) +{ + uch h = cs->hash; + size_t i; + cset *top = &p->g->sets[p->g->ncsets]; + cset *cs2; + size_t css = (size_t)p->g->csetsize; + + /* look for an earlier one which is the same */ + for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) + if (cs2->hash == h && cs2 != cs) { + /* maybe */ + for (i = 0; i < css; i++) + if (!!CHIN(cs2, i) != !!CHIN(cs, i)) + break; /* no */ + if (i == css) + break; /* yes */ + } + + if (cs2 < top) { /* found one */ + freeset(p, cs); + cs = cs2; + } + + return((int)(cs - p->g->sets)); +} + +/* + - firstch - return first character in a set (which must have at least one) + */ +static int /* character; there is no "none" value */ +firstch(struct parse *p, cset *cs) +{ + size_t i; + size_t css = (size_t)p->g->csetsize; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + return((char)i); + assert(never); + return(0); /* arbitrary */ +} + +/* + - nch - number of characters in a set + */ +static int +nch(struct parse *p, cset *cs) +{ + size_t i; + size_t css = (size_t)p->g->csetsize; + int n = 0; + + for (i = 0; i < css; i++) + if (CHIN(cs, i)) + n++; + return(n); +} + +/* + - mcadd - add a collating element to a cset + */ +static void +mcadd( struct parse *p, cset *cs, const char *cp) +{ + size_t oldend = cs->smultis; + void *np; + + cs->smultis += strlen(cp) + 1; + np = realloc(cs->multis, cs->smultis); + if (np == NULL) { + if (cs->multis) + free(cs->multis); + cs->multis = NULL; + SETERROR(REG_ESPACE); + return; + } + cs->multis = np; + + llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); +} + +/* + - mcinvert - invert the list of collating elements in a cset + * + * This would have to know the set of possibilities. Implementation + * is deferred. + */ +/* ARGSUSED */ +static void +mcinvert(struct parse *p, cset *cs) +{ + assert(cs->multis == NULL); /* xxx */ +} + +/* + - mccase - add case counterparts of the list of collating elements in a cset + * + * This would have to know the set of possibilities. Implementation + * is deferred. + */ +/* ARGSUSED */ +static void +mccase(struct parse *p, cset *cs) +{ + assert(cs->multis == NULL); /* xxx */ +} + +/* + - isinsets - is this character in any sets? + */ +static int /* predicate */ +isinsets(struct re_guts *g, int c) +{ + uch *col; + int i; + int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + unsigned uc = (uch)c; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc] != 0) + return(1); + return(0); +} + +/* + - samesets - are these two characters in exactly the same sets? + */ +static int /* predicate */ +samesets(struct re_guts *g, int c1, int c2) +{ + uch *col; + int i; + int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; + unsigned uc1 = (uch)c1; + unsigned uc2 = (uch)c2; + + for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) + if (col[uc1] != col[uc2]) + return(0); + return(1); +} + +/* + - categorize - sort out character categories + */ +static void +categorize(struct parse *p, struct re_guts *g) +{ + cat_t *cats = g->categories; + int c; + int c2; + cat_t cat; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (cats[c] == 0 && isinsets(g, c)) { + cat = g->ncategories++; + cats[c] = cat; + for (c2 = c+1; c2 <= CHAR_MAX; c2++) + if (cats[c2] == 0 && samesets(g, c, c2)) + cats[c2] = cat; + } +} + +/* + - dupl - emit a duplicate of a bunch of sops + */ +static sopno /* start of duplicate */ +dupl(struct parse *p, + sopno start, /* from here */ + sopno finish) /* to this less one */ +{ + sopno ret = HERE(); + sopno len = finish - start; + + assert(finish >= start); + if (len == 0) + return(ret); + enlarge(p, p->ssize + len); /* this many unexpected additions */ + assert(p->ssize >= p->slen + len); + (void) memmove((char *)(p->strip + p->slen), + (char *)(p->strip + start), (size_t)len*sizeof(sop)); + p->slen += len; + return(ret); +} + +/* + - doemit - emit a strip operator + * + * It might seem better to implement this as a macro with a function as + * hard-case backup, but it's just too big and messy unless there are + * some changes to the data structures. Maybe later. + */ +static void +doemit(struct parse *p, sop op, size_t opnd) +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* deal with oversize operands ("can't happen", more or less) */ + assert(opnd < 1<slen >= p->ssize) + enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ + assert(p->slen < p->ssize); + + /* finally, it's all reduced to the easy case */ + p->strip[p->slen++] = SOP(op, opnd); +} + +/* + - doinsert - insert a sop into the strip + */ +static void +doinsert(struct parse *p, sop op, size_t opnd, sopno pos) +{ + sopno sn; + sop s; + int i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + sn = HERE(); + EMIT(op, opnd); /* do checks, ensure space */ + assert(HERE() == sn+1); + s = p->strip[sn]; + + /* adjust paren pointers */ + assert(pos > 0); + for (i = 1; i < NPAREN; i++) { + if (p->pbegin[i] >= pos) { + p->pbegin[i]++; + } + if (p->pend[i] >= pos) { + p->pend[i]++; + } + } + + memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], + (HERE()-pos-1)*sizeof(sop)); + p->strip[pos] = s; +} + +/* + - dofwd - complete a forward reference + */ +static void +dofwd(struct parse *p, sopno pos, sop value) +{ + /* avoid making error situations worse */ + if (p->error != 0) + return; + + assert(value < 1<strip[pos] = OP(p->strip[pos]) | value; +} + +/* + - enlarge - enlarge the strip + */ +static void +enlarge(struct parse *p, sopno size) +{ + sop *sp; + + if (p->ssize >= size) + return; + + sp = (sop *)realloc(p->strip, size*sizeof(sop)); + if (sp == NULL) { + SETERROR(REG_ESPACE); + return; + } + p->strip = sp; + p->ssize = size; +} + +/* + - stripsnug - compact the strip + */ +static void +stripsnug(struct parse *p, struct re_guts *g) +{ + g->nstates = p->slen; + g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); + if (g->strip == NULL) { + SETERROR(REG_ESPACE); + g->strip = p->strip; + } +} + +/* + - findmust - fill in must and mlen with longest mandatory literal string + * + * This algorithm could do fancy things like analyzing the operands of | + * for common subsequences. Someday. This code is simple and finds most + * of the interesting cases. + * + * Note that must and mlen got initialized during setup. + */ +static void +findmust(struct parse *p, struct re_guts *g) +{ + sop *scan; + sop *start = 0; /* start initialized in the default case, after that */ + sop *newstart = 0; /* newstart was initialized in the OCHAR case */ + sopno newlen; + sop s; + char *cp; + sopno i; + + /* avoid making error situations worse */ + if (p->error != 0) + return; + + /* find the longest OCHAR sequence in strip */ + newlen = 0; + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OCHAR: /* sequence member */ + if (newlen == 0) /* new sequence */ + newstart = scan - 1; + newlen++; + break; + case OPLUS_: /* things that don't break one */ + case OLPAREN: + case ORPAREN: + break; + case OQUEST_: /* things that must be skipped */ + case OCH_: + scan--; + do { + scan += OPND(s); + s = *scan; + /* assert() interferes w debug printouts */ + if (OP(s) != O_QUEST && OP(s) != O_CH && + OP(s) != OOR2) { + g->iflags |= REGEX_BAD; + return; + } + } while (OP(s) != O_QUEST && OP(s) != O_CH); + /* fallthrough */ + default: /* things that break a sequence */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + } + newlen = 0; + break; + } + } while (OP(s) != OEND); + + if (g->mlen == 0) /* there isn't one */ + return; + + /* turn it into a character string */ + g->must = malloc((size_t)g->mlen + 1); + if (g->must == NULL) { /* argh; just forget it */ + g->mlen = 0; + return; + } + cp = g->must; + scan = start; + for (i = g->mlen; i > 0; i--) { + while (OP(s = *scan++) != OCHAR) + continue; + assert(cp < g->must + g->mlen); + *cp++ = (char)OPND(s); + } + assert(cp == g->must + g->mlen); + *cp++ = '\0'; /* just on general principles */ +} + +/* + - pluscount - count + nesting + */ +static sopno /* nesting depth */ +pluscount(struct parse *p, struct re_guts *g) +{ + sop *scan; + sop s; + sopno plusnest = 0; + sopno maxnest = 0; + + if (p->error != 0) + return(0); /* there may not be an OEND */ + + scan = g->strip + 1; + do { + s = *scan++; + switch (OP(s)) { + case OPLUS_: + plusnest++; + break; + case O_PLUS: + if (plusnest > maxnest) + maxnest = plusnest; + plusnest--; + break; + } + } while (OP(s) != OEND); + if (plusnest != 0) + g->iflags |= REGEX_BAD; + return(maxnest); +} diff --git a/lib/Support/regengine.inc b/lib/Support/regengine.inc new file mode 100644 index 0000000000000..0f27cfd4b5b8f --- /dev/null +++ b/lib/Support/regengine.inc @@ -0,0 +1,1027 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)engine.c 8.5 (Berkeley) 3/20/94 + */ + +/* + * The matching engine and friends. This file is #included by regexec.c + * after suitable #defines of a variety of macros used herein, so that + * different state representations can be used without duplicating masses + * of code. + */ + +#ifdef SNAMES +#define matcher smatcher +#define fast sfast +#define slow sslow +#define dissect sdissect +#define backref sbackref +#define step sstep +#define print sprint +#define at sat +#define match smat +#define nope snope +#endif +#ifdef LNAMES +#define matcher lmatcher +#define fast lfast +#define slow lslow +#define dissect ldissect +#define backref lbackref +#define step lstep +#define print lprint +#define at lat +#define match lmat +#define nope lnope +#endif + +/* another structure passed up and down to avoid zillions of parameters */ +struct match { + struct re_guts *g; + int eflags; + llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ + char *offp; /* offsets work from here */ + char *beginp; /* start of string -- virtual NUL precedes */ + char *endp; /* end of string -- virtual NUL here */ + char *coldp; /* can be no match starting before here */ + char **lastpos; /* [nplus+1] */ + STATEVARS; + states st; /* current states */ + states fresh; /* states for a fresh start */ + states tmp; /* temporary */ + states empty; /* empty set of states */ +}; + +static int matcher(struct re_guts *, char *, size_t, llvm_regmatch_t[], int); +static char *dissect(struct match *, char *, char *, sopno, sopno); +static char *backref(struct match *, char *, char *, sopno, sopno, sopno, int); +static char *fast(struct match *, char *, char *, sopno, sopno); +static char *slow(struct match *, char *, char *, sopno, sopno); +static states step(struct re_guts *, sopno, sopno, states, int, states); +#define MAX_RECURSION 100 +#define BOL (OUT+1) +#define EOL (BOL+1) +#define BOLEOL (BOL+2) +#define NOTHING (BOL+3) +#define BOW (BOL+4) +#define EOW (BOL+5) +#define CODEMAX (BOL+5) /* highest code used */ +#define NONCHAR(c) ((c) > CHAR_MAX) +#define NNONCHAR (CODEMAX-CHAR_MAX) +#ifdef REDEBUG +static void print(struct match *, char *, states, int, FILE *); +#endif +#ifdef REDEBUG +static void at(struct match *, char *, char *, char *, sopno, sopno); +#endif +#ifdef REDEBUG +static char *pchar(int); +#endif + +#ifdef REDEBUG +#define SP(t, s, c) print(m, t, s, c, stdout) +#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) +#define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } +static int nope = 0; +#else +#define SP(t, s, c) /* nothing */ +#define AT(t, p1, p2, s1, s2) /* nothing */ +#define NOTE(s) /* nothing */ +#endif + +/* + - matcher - the actual matching engine + */ +static int /* 0 success, REG_NOMATCH failure */ +matcher(struct re_guts *g, char *string, size_t nmatch, llvm_regmatch_t pmatch[], + int eflags) +{ + char *endp; + size_t i; + struct match mv; + struct match *m = &mv; + char *dp; + const sopno gf = g->firststate+1; /* +1 for OEND */ + const sopno gl = g->laststate; + char *start; + char *stop; + + /* simplify the situation where possible */ + if (g->cflags®_NOSUB) + nmatch = 0; + if (eflags®_STARTEND) { + start = string + pmatch[0].rm_so; + stop = string + pmatch[0].rm_eo; + } else { + start = string; + stop = start + strlen(start); + } + if (stop < start) + return(REG_INVARG); + + /* prescreening; this does wonders for this rather slow code */ + if (g->must != NULL) { + for (dp = start; dp < stop; dp++) + if (*dp == g->must[0] && stop - dp >= g->mlen && + memcmp(dp, g->must, (size_t)g->mlen) == 0) + break; + if (dp == stop) /* we didn't find g->must */ + return(REG_NOMATCH); + } + + /* match struct setup */ + m->g = g; + m->eflags = eflags; + m->pmatch = NULL; + m->lastpos = NULL; + m->offp = string; + m->beginp = start; + m->endp = stop; + STATESETUP(m, 4); + SETUP(m->st); + SETUP(m->fresh); + SETUP(m->tmp); + SETUP(m->empty); + CLEAR(m->empty); + + /* this loop does only one repetition except for backrefs */ + for (;;) { + endp = fast(m, start, stop, gf, gl); + if (endp == NULL) { /* a miss */ + free(m->pmatch); + free(m->lastpos); + STATETEARDOWN(m); + return(REG_NOMATCH); + } + if (nmatch == 0 && !g->backrefs) + break; /* no further info needed */ + + /* where? */ + assert(m->coldp != NULL); + for (;;) { + NOTE("finding start"); + endp = slow(m, m->coldp, stop, gf, gl); + if (endp != NULL) + break; + assert(m->coldp < m->endp); + m->coldp++; + } + if (nmatch == 1 && !g->backrefs) + break; /* no further info needed */ + + /* oh my, he wants the subexpressions... */ + if (m->pmatch == NULL) + m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * + sizeof(llvm_regmatch_t)); + if (m->pmatch == NULL) { + STATETEARDOWN(m); + return(REG_ESPACE); + } + for (i = 1; i <= m->g->nsub; i++) + m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; + if (!g->backrefs && !(m->eflags®_BACKR)) { + NOTE("dissecting"); + dp = dissect(m, m->coldp, endp, gf, gl); + } else { + if (g->nplus > 0 && m->lastpos == NULL) + m->lastpos = (char **)malloc((g->nplus+1) * + sizeof(char *)); + if (g->nplus > 0 && m->lastpos == NULL) { + free(m->pmatch); + STATETEARDOWN(m); + return(REG_ESPACE); + } + NOTE("backref dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + } + if (dp != NULL) + break; + + /* uh-oh... we couldn't find a subexpression-level match */ + assert(g->backrefs); /* must be back references doing it */ + assert(g->nplus == 0 || m->lastpos != NULL); + for (;;) { + if (dp != NULL || endp <= m->coldp) + break; /* defeat */ + NOTE("backoff"); + endp = slow(m, m->coldp, endp-1, gf, gl); + if (endp == NULL) + break; /* defeat */ + /* try it on a shorter possibility */ +#ifndef NDEBUG + for (i = 1; i <= m->g->nsub; i++) { + assert(m->pmatch[i].rm_so == -1); + assert(m->pmatch[i].rm_eo == -1); + } +#endif + NOTE("backoff dissect"); + dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); + } + assert(dp == NULL || dp == endp); + if (dp != NULL) /* found a shorter one */ + break; + + /* despite initial appearances, there is no match here */ + NOTE("false alarm"); + if (m->coldp == stop) + break; + start = m->coldp + 1; /* recycle starting later */ + } + + /* fill in the details if requested */ + if (nmatch > 0) { + pmatch[0].rm_so = m->coldp - m->offp; + pmatch[0].rm_eo = endp - m->offp; + } + if (nmatch > 1) { + assert(m->pmatch != NULL); + for (i = 1; i < nmatch; i++) + if (i <= m->g->nsub) + pmatch[i] = m->pmatch[i]; + else { + pmatch[i].rm_so = -1; + pmatch[i].rm_eo = -1; + } + } + + if (m->pmatch != NULL) + free((char *)m->pmatch); + if (m->lastpos != NULL) + free((char *)m->lastpos); + STATETEARDOWN(m); + return(0); +} + +/* + - dissect - figure out what matched what, no back references + */ +static char * /* == stop (success) always */ +dissect(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +{ + int i; + sopno ss; /* start sop of current subRE */ + sopno es; /* end sop of current subRE */ + char *sp; /* start of string matched by it */ + char *stp; /* string matched by it cannot pass here */ + char *rest; /* start of rest of string */ + char *tail; /* string unmatched by rest of RE */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + char *ssp; /* start of string matched by subsubRE */ + char *sep; /* end of string matched by subsubRE */ + char *oldssp; /* previous ssp */ + + AT("diss", start, stop, startst, stopst); + sp = start; + for (ss = startst; ss < stopst; ss = es) { + /* identify end of subRE */ + es = ss; + switch (OP(m->g->strip[es])) { + case OPLUS_: + case OQUEST_: + es += OPND(m->g->strip[es]); + break; + case OCH_: + while (OP(m->g->strip[es]) != O_CH) + es += OPND(m->g->strip[es]); + break; + } + es++; + + /* figure out what it matched */ + switch (OP(m->g->strip[ss])) { + case OEND: + assert(nope); + break; + case OCHAR: + sp++; + break; + case OBOL: + case OEOL: + case OBOW: + case OEOW: + break; + case OANY: + case OANYOF: + sp++; + break; + case OBACK_: + case O_BACK: + assert(nope); + break; + /* cases where length of match is hard to find */ + case OQUEST_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + /* did innards match? */ + if (slow(m, sp, rest, ssub, esub) != NULL) { + char *dp = dissect(m, sp, rest, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == rest); + } else /* no */ + assert(sp == rest); + sp = rest; + break; + case OPLUS_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = es - 1; + ssp = sp; + oldssp = ssp; + for (;;) { /* find last match of innards */ + sep = slow(m, ssp, rest, ssub, esub); + if (sep == NULL || sep == ssp) + break; /* failed or matched null */ + oldssp = ssp; /* on to next try */ + ssp = sep; + } + if (sep == NULL) { + /* last successful match */ + sep = ssp; + ssp = oldssp; + } + assert(sep == rest); /* must exhaust substring */ + assert(slow(m, ssp, sep, ssub, esub) == rest); + { + char *dp = dissect(m, ssp, sep, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == sep); + } + sp = rest; + break; + case OCH_: + stp = stop; + for (;;) { + /* how long could this one be? */ + rest = slow(m, sp, stp, ss, es); + assert(rest != NULL); /* it did match */ + /* could the rest match the rest? */ + tail = slow(m, rest, stop, es, stopst); + if (tail == stop) + break; /* yes! */ + /* no -- try a shorter match for this one */ + stp = rest - 1; + assert(stp >= sp); /* it did work */ + } + ssub = ss + 1; + esub = ss + OPND(m->g->strip[ss]) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + if (slow(m, sp, rest, ssub, esub) == rest) + break; /* it matched all of it */ + /* that one missed, try next one */ + assert(OP(m->g->strip[esub]) == OOR1); + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + { + char *dp = dissect(m, sp, rest, ssub, esub); + (void)dp; /* avoid warning if assertions off */ + assert(dp == rest); + } + sp = rest; + break; + case O_PLUS: + case O_QUEST: + case OOR1: + case OOR2: + case O_CH: + assert(nope); + break; + case OLPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_so = sp - m->offp; + break; + case ORPAREN: + i = OPND(m->g->strip[ss]); + assert(0 < i && i <= m->g->nsub); + m->pmatch[i].rm_eo = sp - m->offp; + break; + default: /* uh oh */ + assert(nope); + break; + } + } + + assert(sp == stop); + return(sp); +} + +/* + - backref - figure out what matched what, figuring in back references + */ +static char * /* == stop (success) or NULL (failure) */ +backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, + sopno lev, int rec) /* PLUS nesting level */ +{ + int i; + sopno ss; /* start sop of current subRE */ + char *sp; /* start of string matched by it */ + sopno ssub; /* start sop of subsubRE */ + sopno esub; /* end sop of subsubRE */ + char *ssp; /* start of string matched by subsubRE */ + char *dp; + size_t len; + int hard; + sop s; + llvm_regoff_t offsave; + cset *cs; + + AT("back", start, stop, startst, stopst); + sp = start; + + /* get as far as we can with easy stuff */ + hard = 0; + for (ss = startst; !hard && ss < stopst; ss++) + switch (OP(s = m->g->strip[ss])) { + case OCHAR: + if (sp == stop || *sp++ != (char)OPND(s)) + return(NULL); + break; + case OANY: + if (sp == stop) + return(NULL); + sp++; + break; + case OANYOF: + cs = &m->g->sets[OPND(s)]; + if (sp == stop || !CHIN(cs, *sp++)) + return(NULL); + break; + case OBOL: + if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOL: + if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) ) + { /* yes */ } + else + return(NULL); + break; + case OBOW: + if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || + (sp < m->endp && *(sp-1) == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp > m->beginp && + !ISWORD(*(sp-1))) ) && + (sp < m->endp && ISWORD(*sp)) ) + { /* yes */ } + else + return(NULL); + break; + case OEOW: + if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || + (sp < m->endp && *sp == '\n' && + (m->g->cflags®_NEWLINE)) || + (sp < m->endp && !ISWORD(*sp)) ) && + (sp > m->beginp && ISWORD(*(sp-1))) ) + { /* yes */ } + else + return(NULL); + break; + case O_QUEST: + break; + case OOR1: /* matches null but needs to skip */ + ss++; + s = m->g->strip[ss]; + do { + assert(OP(s) == OOR2); + ss += OPND(s); + } while (OP(s = m->g->strip[ss]) != O_CH); + /* note that the ss++ gets us past the O_CH */ + break; + default: /* have to make a choice */ + hard = 1; + break; + } + if (!hard) { /* that was it! */ + if (sp != stop) + return(NULL); + return(sp); + } + ss--; /* adjust for the for's final increment */ + + /* the hard stuff */ + AT("hard", sp, stop, ss, stopst); + s = m->g->strip[ss]; + switch (OP(s)) { + case OBACK_: /* the vilest depths */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + if (m->pmatch[i].rm_eo == -1) + return(NULL); + assert(m->pmatch[i].rm_so != -1); + len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; + if (len == 0 && rec++ > MAX_RECURSION) + return(NULL); + assert(stop - m->beginp >= len); + if (sp > stop - len) + return(NULL); /* not enough left to match */ + ssp = m->offp + m->pmatch[i].rm_so; + if (memcmp(sp, ssp, len) != 0) + return(NULL); + while (m->g->strip[ss] != SOP(O_BACK, i)) + ss++; + return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); + break; + case OQUEST_: /* to null or not */ + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); /* not */ + return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); + break; + case OPLUS_: + assert(m->lastpos != NULL); + assert(lev+1 <= m->g->nplus); + m->lastpos[lev+1] = sp; + return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); + break; + case O_PLUS: + if (sp == m->lastpos[lev]) /* last pass matched null */ + return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + /* try another pass */ + m->lastpos[lev] = sp; + dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); + if (dp == NULL) + return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); + else + return(dp); + break; + case OCH_: /* find the right one, if any */ + ssub = ss + 1; + esub = ss + OPND(s) - 1; + assert(OP(m->g->strip[esub]) == OOR1); + for (;;) { /* find first matching branch */ + dp = backref(m, sp, stop, ssub, esub, lev, rec); + if (dp != NULL) + return(dp); + /* that one missed, try next one */ + if (OP(m->g->strip[esub]) == O_CH) + return(NULL); /* there is none */ + esub++; + assert(OP(m->g->strip[esub]) == OOR2); + ssub = esub + 1; + esub += OPND(m->g->strip[esub]); + if (OP(m->g->strip[esub]) == OOR2) + esub--; + else + assert(OP(m->g->strip[esub]) == O_CH); + } + break; + case OLPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_so; + m->pmatch[i].rm_so = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_so = offsave; + return(NULL); + break; + case ORPAREN: /* must undo assignment if rest fails */ + i = OPND(s); + assert(0 < i && i <= m->g->nsub); + offsave = m->pmatch[i].rm_eo; + m->pmatch[i].rm_eo = sp - m->offp; + dp = backref(m, sp, stop, ss+1, stopst, lev, rec); + if (dp != NULL) + return(dp); + m->pmatch[i].rm_eo = offsave; + return(NULL); + break; + default: /* uh oh */ + assert(nope); + break; + } + + /* "can't happen" */ + assert(nope); + /* NOTREACHED */ + return NULL; +} + +/* + - fast - step through the string at top speed + */ +static char * /* where tentative match ended, or NULL */ +fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +{ + states st = m->st; + states fresh = m->fresh; + states tmp = m->tmp; + char *p = start; + int c = (start == m->beginp) ? OUT : *(start-1); + int lastc; /* previous c */ + int flagch; + int i; + char *coldp; /* last p after which no match was underway */ + + CLEAR(st); + SET1(st, startst); + st = step(m->g, startst, stopst, st, NOTHING, st); + ASSIGN(fresh, st); + SP("start", st, *p); + coldp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + if (EQ(st, fresh)) + coldp = p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("boleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("boweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, fresh); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("aft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + assert(coldp != NULL); + m->coldp = coldp; + if (ISSET(st, stopst)) + return(p+1); + else + return(NULL); +} + +/* + - slow - step through the string more deliberately + */ +static char * /* where it ended */ +slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) +{ + states st = m->st; + states empty = m->empty; + states tmp = m->tmp; + char *p = start; + int c = (start == m->beginp) ? OUT : *(start-1); + int lastc; /* previous c */ + int flagch; + int i; + char *matchp; /* last p at which a match ended */ + + AT("slow", start, stop, startst, stopst); + CLEAR(st); + SET1(st, startst); + SP("sstart", st, *p); + st = step(m->g, startst, stopst, st, NOTHING, st); + matchp = NULL; + for (;;) { + /* next character */ + lastc = c; + c = (p == m->endp) ? OUT : *p; + + /* is there an EOL and/or BOL between lastc and c? */ + flagch = '\0'; + i = 0; + if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || + (lastc == OUT && !(m->eflags®_NOTBOL)) ) { + flagch = BOL; + i = m->g->nbol; + } + if ( (c == '\n' && m->g->cflags®_NEWLINE) || + (c == OUT && !(m->eflags®_NOTEOL)) ) { + flagch = (flagch == BOL) ? BOLEOL : EOL; + i += m->g->neol; + } + if (i != 0) { + for (; i > 0; i--) + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboleol", st, c); + } + + /* how about a word boundary? */ + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && + (c != OUT && ISWORD(c)) ) { + flagch = BOW; + } + if ( (lastc != OUT && ISWORD(lastc)) && + (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + flagch = EOW; + } + if (flagch == BOW || flagch == EOW) { + st = step(m->g, startst, stopst, st, flagch, st); + SP("sboweow", st, c); + } + + /* are we done? */ + if (ISSET(st, stopst)) + matchp = p; + if (EQ(st, empty) || p == stop) + break; /* NOTE BREAK OUT */ + + /* no, we must deal with this character */ + ASSIGN(tmp, st); + ASSIGN(st, empty); + assert(c != OUT); + st = step(m->g, startst, stopst, tmp, c, st); + SP("saft", st, c); + assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); + p++; + } + + return(matchp); +} + + +/* + - step - map set of states reachable before char to set reachable after + */ +static states +step(struct re_guts *g, + sopno start, /* start state within strip */ + sopno stop, /* state after stop state within strip */ + states bef, /* states reachable before */ + int ch, /* character or NONCHAR code */ + states aft) /* states already known reachable after */ +{ + cset *cs; + sop s; + sopno pc; + onestate here; /* note, macros know this name */ + sopno look; + int i; + + for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { + s = g->strip[pc]; + switch (OP(s)) { + case OEND: + assert(pc == stop-1); + break; + case OCHAR: + /* only characters can match */ + assert(!NONCHAR(ch) || ch != (char)OPND(s)); + if (ch == (char)OPND(s)) + FWD(aft, bef, 1); + break; + case OBOL: + if (ch == BOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OEOL: + if (ch == EOL || ch == BOLEOL) + FWD(aft, bef, 1); + break; + case OBOW: + if (ch == BOW) + FWD(aft, bef, 1); + break; + case OEOW: + if (ch == EOW) + FWD(aft, bef, 1); + break; + case OANY: + if (!NONCHAR(ch)) + FWD(aft, bef, 1); + break; + case OANYOF: + cs = &g->sets[OPND(s)]; + if (!NONCHAR(ch) && CHIN(cs, ch)) + FWD(aft, bef, 1); + break; + case OBACK_: /* ignored here */ + case O_BACK: + FWD(aft, aft, 1); + break; + case OPLUS_: /* forward, this is just an empty */ + FWD(aft, aft, 1); + break; + case O_PLUS: /* both forward and back */ + FWD(aft, aft, 1); + i = ISSETBACK(aft, OPND(s)); + BACK(aft, aft, OPND(s)); + if (!i && ISSETBACK(aft, OPND(s))) { + /* oho, must reconsider loop body */ + pc -= OPND(s) + 1; + INIT(here, pc); + } + break; + case OQUEST_: /* two branches, both forward */ + FWD(aft, aft, 1); + FWD(aft, aft, OPND(s)); + break; + case O_QUEST: /* just an empty */ + FWD(aft, aft, 1); + break; + case OLPAREN: /* not significant here */ + case ORPAREN: + FWD(aft, aft, 1); + break; + case OCH_: /* mark the first two branches */ + FWD(aft, aft, 1); + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + break; + case OOR1: /* done a branch, find the O_CH */ + if (ISSTATEIN(aft, here)) { + for (look = 1; + OP(s = g->strip[pc+look]) != O_CH; + look += OPND(s)) + assert(OP(s) == OOR2); + FWD(aft, aft, look); + } + break; + case OOR2: /* propagate OCH_'s marking */ + FWD(aft, aft, 1); + if (OP(g->strip[pc+OPND(s)]) != O_CH) { + assert(OP(g->strip[pc+OPND(s)]) == OOR2); + FWD(aft, aft, OPND(s)); + } + break; + case O_CH: /* just empty */ + FWD(aft, aft, 1); + break; + default: /* ooooops... */ + assert(nope); + break; + } + } + + return(aft); +} + +#ifdef REDEBUG +/* + - print - print a set of states + */ +static void +print(struct match *m, char *caption, states st, int ch, FILE *d) +{ + struct re_guts *g = m->g; + int i; + int first = 1; + + if (!(m->eflags®_TRACE)) + return; + + (void)fprintf(d, "%s", caption); + if (ch != '\0') + (void)fprintf(d, " %s", pchar(ch)); + for (i = 0; i < g->nstates; i++) + if (ISSET(st, i)) { + (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); + first = 0; + } + (void)fprintf(d, "\n"); +} + +/* + - at - print current situation + */ +static void +at(struct match *m, char *title, char *start, char *stop, sopno startst, + sopno stopst) +{ + if (!(m->eflags®_TRACE)) + return; + + (void)printf("%s %s-", title, pchar(*start)); + (void)printf("%s ", pchar(*stop)); + (void)printf("%ld-%ld\n", (long)startst, (long)stopst); +} + +#ifndef PCHARDONE +#define PCHARDONE /* never again */ +/* + - pchar - make a character printable + * + * Is this identical to regchar() over in debug.c? Well, yes. But a + * duplicate here avoids having a debugging-capable regexec.o tied to + * a matching debug.o, and this is convenient. It all disappears in + * the non-debug compilation anyway, so it doesn't matter much. + */ +static char * /* -> representation */ +pchar(int ch) +{ + static char pbuf[10]; + + if (isprint(ch) || ch == ' ') + (void)snprintf(pbuf, sizeof pbuf, "%c", ch); + else + (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); + return(pbuf); +} +#endif +#endif + +#undef matcher +#undef fast +#undef slow +#undef dissect +#undef backref +#undef step +#undef print +#undef at +#undef match +#undef nope diff --git a/lib/Support/regerror.c b/lib/Support/regerror.c new file mode 100644 index 0000000000000..1d67c9a2b03b1 --- /dev/null +++ b/lib/Support/regerror.c @@ -0,0 +1,135 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regerror.c 8.4 (Berkeley) 3/20/94 + */ + +#include +#include +#include +#include +#include +#include +#include "regex_impl.h" + +#include "regutils.h" + +#ifdef _MSC_VER +#define snprintf _snprintf +#endif + +static const char *regatoi(const llvm_regex_t *, char *, int); + +static struct rerr { + int code; + const char *name; + const char *explain; +} rerrs[] = { + { REG_NOMATCH, "REG_NOMATCH", "llvm_regexec() failed to match" }, + { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, + { REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" }, + { REG_ECTYPE, "REG_ECTYPE", "invalid character class" }, + { REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)" }, + { REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" }, + { REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced" }, + { REG_EPAREN, "REG_EPAREN", "parentheses not balanced" }, + { REG_EBRACE, "REG_EBRACE", "braces not balanced" }, + { REG_BADBR, "REG_BADBR", "invalid repetition count(s)" }, + { REG_ERANGE, "REG_ERANGE", "invalid character range" }, + { REG_ESPACE, "REG_ESPACE", "out of memory" }, + { REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid" }, + { REG_EMPTY, "REG_EMPTY", "empty (sub)expression" }, + { REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" }, + { REG_INVARG, "REG_INVARG", "invalid argument to regex routine" }, + { 0, "", "*** unknown regexp error code ***" } +}; + +/* + - llvm_regerror - the interface to error numbers + = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t); + */ +/* ARGSUSED */ +size_t +llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size) +{ + struct rerr *r; + size_t len; + int target = errcode &~ REG_ITOA; + const char *s; + char convbuf[50]; + + if (errcode == REG_ATOI) + s = regatoi(preg, convbuf, sizeof convbuf); + else { + for (r = rerrs; r->code != 0; r++) + if (r->code == target) + break; + + if (errcode®_ITOA) { + if (r->code != 0) { + assert(strlen(r->name) < sizeof(convbuf)); + (void) llvm_strlcpy(convbuf, r->name, sizeof convbuf); + } else + (void)snprintf(convbuf, sizeof convbuf, + "REG_0x%x", target); + s = convbuf; + } else + s = r->explain; + } + + len = strlen(s) + 1; + if (errbuf_size > 0) { + llvm_strlcpy(errbuf, s, errbuf_size); + } + + return(len); +} + +/* + - regatoi - internal routine to implement REG_ATOI + */ +static const char * +regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize) +{ + struct rerr *r; + + for (r = rerrs; r->code != 0; r++) + if (strcmp(r->name, preg->re_endp) == 0) + break; + if (r->code == 0) + return("0"); + + (void)snprintf(localbuf, localbufsize, "%d", r->code); + return(localbuf); +} diff --git a/lib/Support/regex2.h b/lib/Support/regex2.h new file mode 100644 index 0000000000000..21659c34449a2 --- /dev/null +++ b/lib/Support/regex2.h @@ -0,0 +1,157 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regex2.h 8.4 (Berkeley) 3/20/94 + */ + +/* + * internals of regex_t + */ +#define MAGIC1 ((('r'^0200)<<8) | 'e') + +/* + * The internal representation is a *strip*, a sequence of + * operators ending with an endmarker. (Some terminology etc. is a + * historical relic of earlier versions which used multiple strips.) + * Certain oddities in the representation are there to permit running + * the machinery backwards; in particular, any deviation from sequential + * flow must be marked at both its source and its destination. Some + * fine points: + * + * - OPLUS_ and O_PLUS are *inside* the loop they create. + * - OQUEST_ and O_QUEST are *outside* the bypass they create. + * - OCH_ and O_CH are *outside* the multi-way branch they create, while + * OOR1 and OOR2 are respectively the end and the beginning of one of + * the branches. Note that there is an implicit OOR2 following OCH_ + * and an implicit OOR1 preceding O_CH. + * + * In state representations, an operator's bit is on to signify a state + * immediately *preceding* "execution" of that operator. + */ +typedef unsigned long sop; /* strip operator */ +typedef long sopno; +#define OPRMASK 0xf8000000LU +#define OPDMASK 0x07ffffffLU +#define OPSHIFT ((unsigned)27) +#define OP(n) ((n)&OPRMASK) +#define OPND(n) ((n)&OPDMASK) +#define SOP(op, opnd) ((op)|(opnd)) +/* operators meaning operand */ +/* (back, fwd are offsets) */ +#define OEND (1LU< uch [csetsize] */ + uch mask; /* bit within array */ + uch hash; /* hash code */ + size_t smultis; + char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ +} cset; +/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ +#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) +#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) +#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) +#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* llvm_regcomp() internal fns */ +#define MCsub(p, cs, cp) mcsub(p, cs, cp) +#define MCin(p, cs, cp) mcin(p, cs, cp) + +/* stuff for character categories */ +typedef unsigned char cat_t; + +/* + * main compiled-expression structure + */ +struct re_guts { + int magic; +# define MAGIC2 ((('R'^0200)<<8)|'E') + sop *strip; /* malloced area for strip */ + int csetsize; /* number of bits in a cset vector */ + int ncsets; /* number of csets in use */ + cset *sets; /* -> cset [ncsets] */ + uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ + int cflags; /* copy of llvm_regcomp() cflags argument */ + sopno nstates; /* = number of sops */ + sopno firststate; /* the initial OEND (normally 0) */ + sopno laststate; /* the final OEND */ + int iflags; /* internal flags */ +# define USEBOL 01 /* used ^ */ +# define USEEOL 02 /* used $ */ +# define REGEX_BAD 04 /* something wrong */ + int nbol; /* number of ^ used */ + int neol; /* number of $ used */ + int ncategories; /* how many character categories */ + cat_t *categories; /* ->catspace[-CHAR_MIN] */ + char *must; /* match must contain this string */ + int mlen; /* length of must */ + size_t nsub; /* copy of re_nsub */ + int backrefs; /* does it use back references? */ + sopno nplus; /* how deep does it nest +s? */ + /* catspace must be last */ + cat_t catspace[1]; /* actually [NC] */ +}; + +/* misc utilities */ +#define OUT (CHAR_MAX+1) /* a non-character value */ +#define ISWORD(c) (isalnum(c&0xff) || (c) == '_') diff --git a/lib/Support/regex_impl.h b/lib/Support/regex_impl.h new file mode 100644 index 0000000000000..f8296c9ff75e4 --- /dev/null +++ b/lib/Support/regex_impl.h @@ -0,0 +1,108 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992 Henry Spencer. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer of the University of Toronto. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regex.h 8.1 (Berkeley) 6/2/93 + */ + +#ifndef _REGEX_H_ +#define _REGEX_H_ + +#include +typedef off_t llvm_regoff_t; +typedef struct { + llvm_regoff_t rm_so; /* start of match */ + llvm_regoff_t rm_eo; /* end of match */ +} llvm_regmatch_t; + +typedef struct llvm_regex { + int re_magic; + size_t re_nsub; /* number of parenthesized subexpressions */ + const char *re_endp; /* end pointer for REG_PEND */ + struct re_guts *re_g; /* none of your business :-) */ +} llvm_regex_t; + +/* llvm_regcomp() flags */ +#define REG_BASIC 0000 +#define REG_EXTENDED 0001 +#define REG_ICASE 0002 +#define REG_NOSUB 0004 +#define REG_NEWLINE 0010 +#define REG_NOSPEC 0020 +#define REG_PEND 0040 +#define REG_DUMP 0200 + +/* llvm_regerror() flags */ +#define REG_NOMATCH 1 +#define REG_BADPAT 2 +#define REG_ECOLLATE 3 +#define REG_ECTYPE 4 +#define REG_EESCAPE 5 +#define REG_ESUBREG 6 +#define REG_EBRACK 7 +#define REG_EPAREN 8 +#define REG_EBRACE 9 +#define REG_BADBR 10 +#define REG_ERANGE 11 +#define REG_ESPACE 12 +#define REG_BADRPT 13 +#define REG_EMPTY 14 +#define REG_ASSERT 15 +#define REG_INVARG 16 +#define REG_ATOI 255 /* convert name to number (!) */ +#define REG_ITOA 0400 /* convert number to name (!) */ + +/* llvm_regexec() flags */ +#define REG_NOTBOL 00001 +#define REG_NOTEOL 00002 +#define REG_STARTEND 00004 +#define REG_TRACE 00400 /* tracing of execution */ +#define REG_LARGE 01000 /* force large representation */ +#define REG_BACKR 02000 /* force use of backref code */ + +#ifdef __cplusplus +extern "C" { +#endif + +int llvm_regcomp(llvm_regex_t *, const char *, int); +size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t); +int llvm_regexec(const llvm_regex_t *, const char *, size_t, + llvm_regmatch_t [], int); +void llvm_regfree(llvm_regex_t *); +size_t llvm_strlcpy(char *dst, const char *src, size_t siz); + +#ifdef __cplusplus +} +#endif + +#endif /* !_REGEX_H_ */ diff --git a/lib/Support/regexec.c b/lib/Support/regexec.c new file mode 100644 index 0000000000000..7d70f6e16c78e --- /dev/null +++ b/lib/Support/regexec.c @@ -0,0 +1,161 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regexec.c 8.3 (Berkeley) 3/20/94 + */ + +/* + * the outer shell of llvm_regexec() + * + * This file includes engine.inc *twice*, after muchos fiddling with the + * macros that code uses. This lets the same code operate on two different + * representations for state sets. + */ +#include +#include +#include +#include +#include +#include +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +/* macros for manipulating states, small version */ +#define states long +#define states1 states /* for later use in llvm_regexec() decision */ +#define CLEAR(v) ((v) = 0) +#define SET0(v, n) ((v) &= ~((unsigned long)1 << (n))) +#define SET1(v, n) ((v) |= (unsigned long)1 << (n)) +#define ISSET(v, n) (((v) & ((unsigned long)1 << (n))) != 0) +#define ASSIGN(d, s) ((d) = (s)) +#define EQ(a, b) ((a) == (b)) +#define STATEVARS long dummy /* dummy version */ +#define STATESETUP(m, n) /* nothing */ +#define STATETEARDOWN(m) /* nothing */ +#define SETUP(v) ((v) = 0) +#define onestate long +#define INIT(o, n) ((o) = (unsigned long)1 << (n)) +#define INC(o) ((o) <<= 1) +#define ISSTATEIN(v, o) (((v) & (o)) != 0) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) +#define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) +#define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) +/* function names */ +#define SNAMES /* engine.inc looks after details */ + +#include "regengine.inc" + +/* now undo things */ +#undef states +#undef CLEAR +#undef SET0 +#undef SET1 +#undef ISSET +#undef ASSIGN +#undef EQ +#undef STATEVARS +#undef STATESETUP +#undef STATETEARDOWN +#undef SETUP +#undef onestate +#undef INIT +#undef INC +#undef ISSTATEIN +#undef FWD +#undef BACK +#undef ISSETBACK +#undef SNAMES + +/* macros for manipulating states, large version */ +#define states char * +#define CLEAR(v) memset(v, 0, m->g->nstates) +#define SET0(v, n) ((v)[n] = 0) +#define SET1(v, n) ((v)[n] = 1) +#define ISSET(v, n) ((v)[n]) +#define ASSIGN(d, s) memmove(d, s, m->g->nstates) +#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) +#define STATEVARS long vn; char *space +#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ + if ((m)->space == NULL) return(REG_ESPACE); \ + (m)->vn = 0; } +#define STATETEARDOWN(m) { free((m)->space); } +#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) +#define onestate long +#define INIT(o, n) ((o) = (n)) +#define INC(o) ((o)++) +#define ISSTATEIN(v, o) ((v)[o]) +/* some abbreviations; note that some of these know variable names! */ +/* do "if I'm here, I can also be there" etc without branches */ +#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) +#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) +#define ISSETBACK(v, n) ((v)[here - (n)]) +/* function names */ +#define LNAMES /* flag */ + +#include "regengine.inc" + +/* + - llvm_regexec - interface for matching + * + * We put this here so we can exploit knowledge of the state representation + * when choosing which matcher to call. Also, by this point the matchers + * have been prototyped. + */ +int /* 0 success, REG_NOMATCH failure */ +llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch, + llvm_regmatch_t pmatch[], int eflags) +{ + struct re_guts *g = preg->re_g; +#ifdef REDEBUG +# define GOODFLAGS(f) (f) +#else +# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) +#endif + + if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) + return(REG_BADPAT); + assert(!(g->iflags®EX_BAD)); + if (g->iflags®EX_BAD) /* backstop for no-debug case */ + return(REG_BADPAT); + eflags = GOODFLAGS(eflags); + + if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags®_LARGE)) + return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); + else + return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); +} diff --git a/lib/Support/regfree.c b/lib/Support/regfree.c new file mode 100644 index 0000000000000..dc2b4af90fa74 --- /dev/null +++ b/lib/Support/regfree.c @@ -0,0 +1,72 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)regfree.c 8.3 (Berkeley) 3/20/94 + */ + +#include +#include +#include +#include "regex_impl.h" + +#include "regutils.h" +#include "regex2.h" + +/* + - llvm_regfree - free everything + */ +void +llvm_regfree(llvm_regex_t *preg) +{ + struct re_guts *g; + + if (preg->re_magic != MAGIC1) /* oops */ + return; /* nice to complain, but hard */ + + g = preg->re_g; + if (g == NULL || g->magic != MAGIC2) /* oops again */ + return; + preg->re_magic = 0; /* mark it invalid */ + g->magic = 0; /* mark it invalid */ + + if (g->strip != NULL) + free((char *)g->strip); + if (g->sets != NULL) + free((char *)g->sets); + if (g->setbits != NULL) + free((char *)g->setbits); + if (g->must != NULL) + free(g->must); + free((char *)g); +} diff --git a/lib/Support/regstrlcpy.c b/lib/Support/regstrlcpy.c new file mode 100644 index 0000000000000..8b68afdf75f16 --- /dev/null +++ b/lib/Support/regstrlcpy.c @@ -0,0 +1,52 @@ +/* + * This code is derived from OpenBSD's libc, original license follows: + * + * Copyright (c) 1998 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include "regex_impl.h" +/* + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +llvm_strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} diff --git a/lib/Support/regutils.h b/lib/Support/regutils.h new file mode 100644 index 0000000000000..d0ee100a382b3 --- /dev/null +++ b/lib/Support/regutils.h @@ -0,0 +1,53 @@ +/*- + * This code is derived from OpenBSD's libc/regex, original license follows: + * + * Copyright (c) 1992, 1993, 1994 Henry Spencer. + * Copyright (c) 1992, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Henry Spencer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)utils.h 8.3 (Berkeley) 3/20/94 + */ + +/* utility definitions */ +#define NC (CHAR_MAX - CHAR_MIN + 1) +typedef unsigned char uch; + +/* switch off assertions (if not already off) if no REDEBUG */ +#ifndef REDEBUG +#ifndef NDEBUG +#define NDEBUG /* no assertions please */ +#endif +#endif +#include + +/* for old systems with bcopy() but no memmove() */ +#ifdef USEBCOPY +#define memmove(d, s, c) bcopy(s, d, c) +#endif diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt index bf7a0c601e859..2945e33d5b1cc 100644 --- a/lib/System/CMakeLists.txt +++ b/lib/System/CMakeLists.txt @@ -13,9 +13,32 @@ add_llvm_library(LLVMSystem Program.cpp RWMutex.cpp Signals.cpp + ThreadLocal.cpp Threading.cpp TimeValue.cpp - ThreadLocal.cpp + Unix/Alarm.inc + Unix/Host.inc + Unix/Memory.inc + Unix/Mutex.inc + Unix/Path.inc + Unix/Process.inc + Unix/Program.inc + Unix/RWMutex.inc + Unix/Signals.inc + Unix/ThreadLocal.inc + Unix/TimeValue.inc + Win32/Alarm.inc + Win32/DynamicLibrary.inc + Win32/Host.inc + Win32/Memory.inc + Win32/Mutex.inc + Win32/Path.inc + Win32/Process.inc + Win32/Program.inc + Win32/RWMutex.inc + Win32/Signals.inc + Win32/ThreadLocal.inc + Win32/TimeValue.inc ) if( BUILD_SHARED_LIBS AND NOT WIN32 ) diff --git a/lib/System/Disassembler.cpp b/lib/System/Disassembler.cpp index 378fe262bc107..bad427a58d8cb 100644 --- a/lib/System/Disassembler.cpp +++ b/lib/System/Disassembler.cpp @@ -26,7 +26,7 @@ using namespace llvm; -bool llvm::sys::hasDisassembler(void) +bool llvm::sys::hasDisassembler() { #if defined (__i386__) || defined (__amd64__) || defined (__x86_64__) // We have option to enable udis86 library. diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp index ef5c9e6329910..6efab948fa76f 100644 --- a/lib/System/DynamicLibrary.cpp +++ b/lib/System/DynamicLibrary.cpp @@ -9,42 +9,43 @@ // // This header file implements the operating system DynamicLibrary concept. // +// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is +// not thread safe! +// //===----------------------------------------------------------------------===// #include "llvm/System/DynamicLibrary.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/System/RWMutex.h" #include "llvm/Config/config.h" #include #include #include +#include // Collection of symbol name/value pairs to be searched prior to any libraries. -static std::map symbols; -static llvm::sys::SmartRWMutex SymbolsLock; +static std::map *ExplicitSymbols = 0; +static struct ExplicitSymbolsDeleter { + ~ExplicitSymbolsDeleter() { + if (ExplicitSymbols) + delete ExplicitSymbols; + } +} Dummy; void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName, void *symbolValue) { - llvm::sys::SmartScopedWriter Writer(&SymbolsLock); - symbols[symbolName] = symbolValue; + if (ExplicitSymbols == 0) + ExplicitSymbols = new std::map(); + (*ExplicitSymbols)[symbolName] = symbolValue; } -// It is not possible to use ltdl.c on VC++ builds as the terms of its LGPL -// license and special exception would cause all of LLVM to be placed under -// the LGPL. This is because the exception applies only when libtool is -// used, and obviously libtool is not used with Visual Studio. An entirely -// separate implementation is provided in win32/DynamicLibrary.cpp. - #ifdef LLVM_ON_WIN32 #include "Win32/DynamicLibrary.inc" #else -//#include "ltdl.h" #include -#include using namespace llvm; using namespace llvm::sys; @@ -53,56 +54,44 @@ using namespace llvm::sys; //=== independent code. //===----------------------------------------------------------------------===// -//static std::vector OpenedHandles; -static std::vector OpenedHandles; - -DynamicLibrary::DynamicLibrary() {} +static std::vector *OpenedHandles = 0; -DynamicLibrary::~DynamicLibrary() { - SmartScopedWriter Writer(&SymbolsLock); - while(!OpenedHandles.empty()) { - void *H = OpenedHandles.back(); OpenedHandles.pop_back(); - dlclose(H); - } -} bool DynamicLibrary::LoadLibraryPermanently(const char *Filename, std::string *ErrMsg) { - SmartScopedWriter Writer(&SymbolsLock); void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL); if (H == 0) { - if (ErrMsg) - *ErrMsg = dlerror(); + if (ErrMsg) *ErrMsg = dlerror(); return true; } - OpenedHandles.push_back(H); + if (OpenedHandles == 0) + OpenedHandles = new std::vector(); + OpenedHandles->push_back(H); return false; } void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { - // check_ltdl_initialization(); - // First check symbols added via AddSymbol(). - SymbolsLock.reader_acquire(); - std::map::iterator I = symbols.find(symbolName); - std::map::iterator E = symbols.end(); - SymbolsLock.reader_release(); + if (ExplicitSymbols) { + std::map::iterator I = + ExplicitSymbols->find(symbolName); + std::map::iterator E = ExplicitSymbols->end(); - if (I != E) - return I->second; + if (I != E) + return I->second; + } - SymbolsLock.writer_acquire(); // Now search the libraries. - for (std::vector::iterator I = OpenedHandles.begin(), - E = OpenedHandles.end(); I != E; ++I) { - //lt_ptr ptr = lt_dlsym(*I, symbolName); - void *ptr = dlsym(*I, symbolName); - if (ptr) { - SymbolsLock.writer_release(); - return ptr; + if (OpenedHandles) { + for (std::vector::iterator I = OpenedHandles->begin(), + E = OpenedHandles->end(); I != E; ++I) { + //lt_ptr ptr = lt_dlsym(*I, symbolName); + void *ptr = dlsym(*I, symbolName); + if (ptr) { + return ptr; + } } } - SymbolsLock.writer_release(); #define EXPLICIT_SYMBOL(SYM) \ extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM diff --git a/lib/System/Errno.cpp b/lib/System/Errno.cpp index d046aba04dd5c..68f66f6e439bd 100644 --- a/lib/System/Errno.cpp +++ b/lib/System/Errno.cpp @@ -17,6 +17,10 @@ #if HAVE_STRING_H #include +#if HAVE_ERRNO_H +#include +#endif + //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system //=== independent code. @@ -26,7 +30,6 @@ namespace llvm { namespace sys { #if HAVE_ERRNO_H -#include std::string StrError() { return StrError(errno); } diff --git a/lib/System/Makefile b/lib/System/Makefile index 49704c3c625a6..d4fd60eee5f62 100644 --- a/lib/System/Makefile +++ b/lib/System/Makefile @@ -11,6 +11,12 @@ LEVEL = ../.. LIBRARYNAME = LLVMSystem BUILD_ARCHIVE = 1 +include $(LEVEL)/Makefile.config + +ifeq ($(HOST_OS),MingW) + REQUIRES_EH := 1 +endif + EXTRA_DIST = Unix Win32 README.txt include $(LEVEL)/Makefile.common diff --git a/lib/System/Memory.cpp b/lib/System/Memory.cpp index 375c73cf02049..e2d838dce0268 100644 --- a/lib/System/Memory.cpp +++ b/lib/System/Memory.cpp @@ -37,13 +37,16 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr, // icache invalidation for PPC and ARM. #if defined(__APPLE__) -#if (defined(__POWERPC__) || defined (__ppc__) || \ + +# if (defined(__POWERPC__) || defined (__ppc__) || \ defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__) sys_icache_invalidate(Addr, Len); -#endif +# endif + #else -#if (defined(__POWERPC__) || defined (__ppc__) || \ - defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__) + +# if (defined(__POWERPC__) || defined (__ppc__) || \ + defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__) const size_t LineSize = 32; const intptr_t Mask = ~(LineSize - 1); @@ -57,6 +60,12 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr, for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) asm volatile("icbi 0, %0" : : "r"(Line)); asm volatile("isync"); -#endif +# elif defined(__arm__) && defined(__GNUC__) + // FIXME: Can we safely always call this for __GNUC__ everywhere? + char *Start = (char*) Addr; + char *End = Start + Len; + __clear_cache(Start, End); +# endif + #endif // end apple } diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp index a5e9920ae3f03..8ccd6e52c4d5b 100644 --- a/lib/System/Mutex.cpp +++ b/lib/System/Mutex.cpp @@ -115,8 +115,7 @@ MutexImpl::acquire() int errorcode = pthread_mutex_lock(mutex); return errorcode == 0; - } - return false; + } else return false; } bool @@ -129,8 +128,7 @@ MutexImpl::release() int errorcode = pthread_mutex_unlock(mutex); return errorcode == 0; - } - return false; + } else return false; } bool @@ -143,8 +141,7 @@ MutexImpl::tryacquire() int errorcode = pthread_mutex_trylock(mutex); return errorcode == 0; - } - return false; + } else return false; } } diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp index 72bd7ad6f0468..df33574809375 100644 --- a/lib/System/Path.cpp +++ b/lib/System/Path.cpp @@ -13,6 +13,7 @@ #include "llvm/System/Path.h" #include "llvm/Config/config.h" +#include "llvm/Support/ErrorHandling.h" #include #include #include @@ -28,19 +29,10 @@ bool Path::operator==(const Path &that) const { return path == that.path; } -bool Path::operator!=(const Path &that) const { - return path != that.path; -} - bool Path::operator<(const Path& that) const { return path < that.path; } -std::ostream& llvm::operator<<(std::ostream &strm, const sys::Path &aPath) { - strm << aPath.toString(); - return strm; -} - Path Path::GetLLVMConfigDir() { Path result; @@ -207,18 +199,6 @@ bool Path::hasMagicNumber(const std::string &Magic) const { return false; } -void Path::makeAbsolute() { - if (isAbsolute()) - return; - - Path CWD = Path::GetCurrentDirectory(); - assert(CWD.isAbsolute() && "GetCurrentDirectory returned relative path!"); - - CWD.appendComponent(path); - - path = CWD.toString(); -} - static void getPathList(const char*path, std::vector& Paths) { const char* at = path; const char* delim = strchr(at, PathSeparator); diff --git a/lib/System/Program.cpp b/lib/System/Program.cpp index eb289d81b2e17..a3049d46fd654 100644 --- a/lib/System/Program.cpp +++ b/lib/System/Program.cpp @@ -22,6 +22,33 @@ using namespace sys; //=== independent code. //===----------------------------------------------------------------------===// +int +Program::ExecuteAndWait(const Path& path, + const char** args, + const char** envp, + const Path** redirects, + unsigned secondsToWait, + unsigned memoryLimit, + std::string* ErrMsg) { + Program prg; + if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg)) + return prg.Wait(secondsToWait, ErrMsg); + else + return -1; +} + +void +Program::ExecuteNoWait(const Path& path, + const char** args, + const char** envp, + const Path** redirects, + unsigned memoryLimit, + std::string* ErrMsg) { + Program prg; + prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg); +} + + } // Include the platform-specific parts of this class. diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp index 15d98cb8f418b..5faf220eb9168 100644 --- a/lib/System/RWMutex.cpp +++ b/lib/System/RWMutex.cpp @@ -117,8 +117,7 @@ RWMutexImpl::reader_acquire() int errorcode = pthread_rwlock_rdlock(rwlock); return errorcode == 0; - } - return false; + } else return false; } bool @@ -131,8 +130,7 @@ RWMutexImpl::reader_release() int errorcode = pthread_rwlock_unlock(rwlock); return errorcode == 0; - } - return false; + } else return false; } bool @@ -145,8 +143,7 @@ RWMutexImpl::writer_acquire() int errorcode = pthread_rwlock_wrlock(rwlock); return errorcode == 0; - } - return false; + } else return false; } bool @@ -159,8 +156,7 @@ RWMutexImpl::writer_release() int errorcode = pthread_rwlock_unlock(rwlock); return errorcode == 0; - } - return false; + } else return false; } } diff --git a/lib/System/Threading.cpp b/lib/System/Threading.cpp index a2d7f82715d94..466c46802647c 100644 --- a/lib/System/Threading.cpp +++ b/lib/System/Threading.cpp @@ -14,6 +14,7 @@ #include "llvm/System/Threading.h" #include "llvm/System/Atomic.h" #include "llvm/System/Mutex.h" +#include "llvm/Config/config.h" #include using namespace llvm; diff --git a/lib/System/Unix/Alarm.inc b/lib/System/Unix/Alarm.inc index 28ff1b8a63686..fb42b6c65da10 100644 --- a/lib/System/Unix/Alarm.inc +++ b/lib/System/Unix/Alarm.inc @@ -67,6 +67,6 @@ int sys::AlarmStatus() { return 0; } -void Sleep(unsigned n) { +void sys::Sleep(unsigned n) { ::sleep(n); } diff --git a/lib/System/Unix/Host.inc b/lib/System/Unix/Host.inc index fb319fd09e1ca..c76d6a4e18f10 100644 --- a/lib/System/Unix/Host.inc +++ b/lib/System/Unix/Host.inc @@ -16,7 +16,8 @@ //=== is guaranteed to work on *all* UNIX variants. //===----------------------------------------------------------------------===// -#include +#include "llvm/Config/config.h" +#include "llvm/ADT/StringRef.h" #include "Unix.h" #include #include @@ -33,10 +34,47 @@ static std::string getOSVersion() { } std::string sys::getHostTriple() { - // FIXME: Derive more directly instead of relying on the autoconf - // generated variable. + // FIXME: Derive directly instead of relying on the autoconf generated + // variable. - std::string Triple = LLVM_HOSTTRIPLE; + StringRef HostTripleString(LLVM_HOSTTRIPLE); + std::pair ArchSplit = HostTripleString.split('-'); + + // Normalize the arch, since the host triple may not actually match the host. + std::string Arch = ArchSplit.first; + + // It would be nice to do this in terms of llvm::Triple, but that is in + // Support which is layered above us. +#if defined(__x86_64__) + Arch = "x86_64"; +#elif defined(__i386__) + Arch = "i386"; +#elif defined(__ppc64__) + Arch = "powerpc64"; +#elif defined(__ppc__) + Arch = "powerpc"; +#elif defined(__arm__) + + // FIXME: We need to pick the right ARM triple (which involves querying the + // chip). However, for now this is most important for LLVM arch selection, so + // we only need to make sure to distinguish ARM and Thumb. +# if defined(__thumb__) + Arch = "thumb"; +# else + Arch = "arm"; +# endif + +#else + + // FIXME: When enough auto-detection is in place, this should just + // #error. Then at least the arch selection is done, and we only need the OS + // etc selection to kill off the use of LLVM_HOSTTRIPLE. + +#endif + + std::string Triple(Arch); + Triple += '-'; + Triple += ArchSplit.second; // Force i86 to i386. if (Triple[0] == 'i' && isdigit(Triple[1]) && diff --git a/lib/System/Unix/Memory.inc b/lib/System/Unix/Memory.inc index b7a70135bcb76..a80f56fbc144f 100644 --- a/lib/System/Unix/Memory.inc +++ b/lib/System/Unix/Memory.inc @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "Unix.h" +#include "llvm/Support/DataTypes.h" #include "llvm/System/Process.h" #ifdef HAVE_SYS_MMAN_H @@ -28,12 +29,12 @@ /// is very OS specific. /// llvm::sys::MemoryBlock -llvm::sys::Memory::AllocateRWX(unsigned NumBytes, const MemoryBlock* NearBlock, +llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock, std::string *ErrMsg) { if (NumBytes == 0) return MemoryBlock(); - unsigned pageSize = Process::GetPageSize(); - unsigned NumPages = (NumBytes+pageSize-1)/pageSize; + size_t pageSize = Process::GetPageSize(); + size_t NumPages = (NumBytes+pageSize-1)/pageSize; int fd = -1; #ifdef NEED_DEV_ZERO_FOR_MMAP diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index 1f73571cf140b..89285b48132f9 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -16,7 +16,7 @@ //=== is guaranteed to work on *all* UNIX variants. //===----------------------------------------------------------------------===// -#include "llvm/Config/alloca.h" +#include "llvm/ADT/SmallVector.h" #include "Unix.h" #if HAVE_SYS_STAT_H #include @@ -57,6 +57,10 @@ #include #endif +#ifdef __APPLE__ +#include +#endif + // Put in a hack for Cygwin which falsely reports that the mkdtemp function // is available when it is not. #ifdef __CYGWIN__ @@ -92,15 +96,7 @@ Path::isValid() const { // Check some obvious things if (path.empty()) return false; - else if (path.length() >= MAXPATHLEN) - return false; - - // Check that the characters are ascii chars - size_t len = path.length(); - unsigned i = 0; - while (i < len && isascii(path[i])) - ++i; - return i >= len; + return path.length() < MAXPATHLEN; } bool @@ -117,6 +113,19 @@ Path::isAbsolute() const { return false; return path[0] == '/'; } + +void Path::makeAbsolute() { + if (isAbsolute()) + return; + + Path CWD = Path::GetCurrentDirectory(); + assert(CWD.isAbsolute() && "GetCurrentDirectory returned relative path!"); + + CWD.appendComponent(path); + + path = CWD.str(); +} + Path Path::GetRootDirectory() { Path result; @@ -331,7 +340,17 @@ getprogpath(char ret[PATH_MAX], const char *bin) /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { -#if defined(__FreeBSD__) +#if defined(__APPLE__) + // On OS X the executable path is saved to the stack by dyld. Reading it + // from there is much faster than calling dladdr, especially for large + // binaries with symbols. + char exe_path[MAXPATHLEN]; + uint32_t size = sizeof(exe_path); + if (_NSGetExecutablePath(exe_path, &size) == 0) { + char link_path[MAXPATHLEN]; + return Path(std::string(realpath(exe_path, link_path))); + } +#elif defined(__FreeBSD__) char exe_path[PATH_MAX]; if (getprogpath(exe_path, argv0) != NULL) @@ -339,10 +358,8 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { #elif defined(__linux__) || defined(__CYGWIN__) char exe_path[MAXPATHLEN]; ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path)); - if (len > 0 && len < MAXPATHLEN - 1) { - exe_path[len] = '\0'; - return Path(std::string(exe_path)); - } + if (len >= 0) + return Path(std::string(exe_path, len)); #elif defined(HAVE_DLFCN_H) // Use dladdr to get executable path if available. Dl_info DLInfo; @@ -397,7 +414,9 @@ Path::getSuffix() const { bool Path::getMagicNumber(std::string& Magic, unsigned len) const { assert(len < 1024 && "Request for magic string too long"); - char* buf = (char*) alloca(1 + len); + SmallVector Buf; + Buf.resize(1 + len); + char* buf = Buf.data(); int fd = ::open(path.c_str(), O_RDONLY); if (fd < 0) return false; @@ -426,12 +445,12 @@ Path::isDirectory() const { bool Path::canRead() const { - return 0 == access(path.c_str(), F_OK | R_OK ); + return 0 == access(path.c_str(), R_OK); } bool Path::canWrite() const { - return 0 == access(path.c_str(), F_OK | W_OK ); + return 0 == access(path.c_str(), W_OK); } bool @@ -499,7 +518,7 @@ static bool AddPermissionBits(const Path &File, int bits) { // Get the file's current mode. struct stat buf; - if (0 != stat(File.toString().c_str(), &buf)) + if (0 != stat(File.c_str(), &buf)) return false; // Change the file to have whichever permissions bits from 'bits' // that the umask would not disable. @@ -631,7 +650,7 @@ Path::eraseSuffix() { static bool createDirectoryHelper(char* beg, char* end, bool create_parents) { - if (access(beg, F_OK | R_OK | W_OK) == 0) + if (access(beg, R_OK | W_OK) == 0) return false; if (create_parents) { @@ -756,7 +775,7 @@ bool Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) { if (0 != ::rename(path.c_str(), newName.c_str())) return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" + - newName.toString() + "'"); + newName.str() + "'"); return false; } @@ -778,13 +797,13 @@ sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){ int outFile = -1; inFile = ::open(Src.c_str(), O_RDONLY); if (inFile == -1) - return MakeErrMsg(ErrMsg, Src.toString() + + return MakeErrMsg(ErrMsg, Src.str() + ": can't open source file to copy"); outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666); if (outFile == -1) { ::close(inFile); - return MakeErrMsg(ErrMsg, Dest.toString() + + return MakeErrMsg(ErrMsg, Dest.str() + ": can't create destination file for copy"); } @@ -794,7 +813,7 @@ sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){ if (errno != EINTR && errno != EAGAIN) { ::close(inFile); ::close(outFile); - return MakeErrMsg(ErrMsg, Src.toString()+": can't read source file"); + return MakeErrMsg(ErrMsg, Src.str()+": can't read source file"); } } else { char *BufPtr = Buffer; @@ -804,7 +823,7 @@ sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){ if (errno != EINTR && errno != EAGAIN) { ::close(inFile); ::close(outFile); - return MakeErrMsg(ErrMsg, Dest.toString() + + return MakeErrMsg(ErrMsg, Dest.str() + ": can't write destination file"); } } else { @@ -826,7 +845,9 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { // Append an XXXXXX pattern to the end of the file for use with mkstemp, // mktemp or our own implementation. - char *FNBuffer = (char*) alloca(path.size()+8); + SmallVector Buf; + Buf.resize(path.size()+8); + char *FNBuffer = Buf.data(); path.copy(FNBuffer,path.size()); if (isDirectory()) strcpy(FNBuffer+path.size(), "/XXXXXX"); diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc index 2da31c9f215b5..94e4c1bde25c2 100644 --- a/lib/System/Unix/Process.inc +++ b/lib/System/Unix/Process.inc @@ -46,11 +46,11 @@ Process::GetPageSize() // On Cygwin, getpagesize() returns 64k but the page size for the purposes of // memory protection and mmap() is 4k. // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492 - static const int page_size = 0x1000; + const int page_size = 0x1000; #elif defined(HAVE_GETPAGESIZE) - static const int page_size = ::getpagesize(); + const int page_size = ::getpagesize(); #elif defined(HAVE_SYSCONF) - static long page_size = ::sysconf(_SC_PAGE_SIZE); + long page_size = ::sysconf(_SC_PAGE_SIZE); #else #warning Cannot get the page size on this machine #endif @@ -91,7 +91,7 @@ Process::GetTotalMemoryUsage() malloc_statistics_t Stats; malloc_zone_statistics(malloc_default_zone(), &Stats); return Stats.size_allocated; // darwin -#elif defined(HAVE_GETRUSAGE) +#elif defined(HAVE_GETRUSAGE) && !defined(__HAIKU__) struct rusage usage; ::getrusage(RUSAGE_SELF, &usage); return usage.ru_maxrss; @@ -179,27 +179,24 @@ void Process::PreventCoreFiles() { } bool Process::StandardInIsUserInput() { -#if HAVE_ISATTY - return isatty(0); -#endif - // If we don't have isatty, just return false. - return false; + return FileDescriptorIsDisplayed(STDIN_FILENO); } bool Process::StandardOutIsDisplayed() { -#if HAVE_ISATTY - return isatty(1); -#endif - // If we don't have isatty, just return false. - return false; + return FileDescriptorIsDisplayed(STDOUT_FILENO); } bool Process::StandardErrIsDisplayed() { + return FileDescriptorIsDisplayed(STDERR_FILENO); +} + +bool Process::FileDescriptorIsDisplayed(int fd) { #if HAVE_ISATTY - return isatty(2); -#endif + return isatty(fd); +#else // If we don't have isatty, just return false. return false; +#endif } static unsigned getColumns(int FileID) { diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index cdc6fee609491..56dea250a7793 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -1,10 +1,10 @@ //===- llvm/System/Unix/Program.cpp -----------------------------*- C++ -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file implements the Unix specific portion of the Program class. @@ -18,7 +18,6 @@ #include #include "Unix.h" -#include #if HAVE_SYS_STAT_H #include #endif @@ -35,6 +34,15 @@ namespace llvm { using namespace sys; +Program::Program() : Data_(0) {} + +Program::~Program() {} + +unsigned Program::GetPid() const { + uint64_t pid = reinterpret_cast(Data_); + return static_cast(pid); +} + // This function just uses the PATH environment variable to find the program. Path Program::FindProgramByName(const std::string& progName) { @@ -45,16 +53,17 @@ Program::FindProgramByName(const std::string& progName) { Path temp; if (!temp.set(progName)) // invalid name return Path(); - // FIXME: have to check for absolute filename - we cannot assume anything - // about "." being in $PATH - if (temp.canExecute()) // already executable as is + // Use the given path verbatim if it contains any slashes; this matches + // the behavior of sh(1) and friends. + if (progName.find('/') != std::string::npos) return temp; - // At this point, the file name is valid and its not executable - + // At this point, the file name does not contain slashes. Search for it + // through the directories specified in the PATH environment variable. + // Get the path. If its empty, we can't do anything to find it. const char *PathStr = getenv("PATH"); - if (PathStr == 0) + if (PathStr == 0) return Path(); // Now we have a colon separated list of directories to search; try them. @@ -93,7 +102,7 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { // Redirect empty paths to /dev/null File = "/dev/null"; else - File = Path->toString(); + File = Path->str(); // Open the file int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666); @@ -112,11 +121,6 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { return false; } -static bool Timeout = false; -static void TimeOutHandler(int Sig) { - Timeout = true; -} - static void SetMemoryLimits (unsigned size) { #if HAVE_SYS_RESOURCE_H @@ -142,49 +146,47 @@ static void SetMemoryLimits (unsigned size) #endif } -int -Program::ExecuteAndWait(const Path& path, - const char** args, - const char** envp, - const Path** redirects, - unsigned secondsToWait, - unsigned memoryLimit, - std::string* ErrMsg) +bool +Program::Execute(const Path& path, + const char** args, + const char** envp, + const Path** redirects, + unsigned memoryLimit, + std::string* ErrMsg) { if (!path.canExecute()) { if (ErrMsg) - *ErrMsg = path.toString() + " is not executable"; - return -1; + *ErrMsg = path.str() + " is not executable"; + return false; } -#ifdef HAVE_SYS_WAIT_H // Create a child process. int child = fork(); switch (child) { // An error occured: Return to the caller. case -1: MakeErrMsg(ErrMsg, "Couldn't fork"); - return -1; + return false; // Child process: Execute the program. case 0: { // Redirect file descriptors... if (redirects) { // Redirect stdin - if (RedirectIO(redirects[0], 0, ErrMsg)) { return -1; } + if (RedirectIO(redirects[0], 0, ErrMsg)) { return false; } // Redirect stdout - if (RedirectIO(redirects[1], 1, ErrMsg)) { return -1; } - if (redirects[1] && redirects[2] && + if (RedirectIO(redirects[1], 1, ErrMsg)) { return false; } + if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) { // If stdout and stderr should go to the same place, redirect stderr // to the FD already open for stdout. if (-1 == dup2(1,2)) { MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout"); - return -1; + return false; } } else { // Just redirect stderr - if (RedirectIO(redirects[2], 2, ErrMsg)) { return -1; } + if (RedirectIO(redirects[2], 2, ErrMsg)) { return false; } } } @@ -192,15 +194,19 @@ Program::ExecuteAndWait(const Path& path, if (memoryLimit!=0) { SetMemoryLimits(memoryLimit); } - + // Execute! if (envp != 0) - execve (path.c_str(), (char**)args, (char**)envp); + execve(path.c_str(), (char**)args, (char**)envp); else - execv (path.c_str(), (char**)args); - // If the execve() failed, we should exit and let the parent pick up - // our non-zero exit status. - exit (errno); + execv(path.c_str(), (char**)args); + // If the execve() failed, we should exit. Follow Unix protocol and + // return 127 if the executable was not found, and 126 otherwise. + // Use _exit rather than exit so that atexit functions and static + // object destructors cloned from the parent process aren't + // redundantly run, and so that any data buffered in stdio buffers + // cloned from the parent aren't redundantly written out. + _exit(errno == ENOENT ? 127 : 126); } // Parent process: Break out of the switch to do our processing. @@ -208,32 +214,41 @@ Program::ExecuteAndWait(const Path& path, break; } - // Make sure stderr and stdout have been flushed - std::cerr << std::flush; - std::cout << std::flush; - fsync(1); - fsync(2); + Data_ = reinterpret_cast(child); + + return true; +} +int +Program::Wait(unsigned secondsToWait, + std::string* ErrMsg) +{ +#ifdef HAVE_SYS_WAIT_H struct sigaction Act, Old; + if (Data_ == 0) { + MakeErrMsg(ErrMsg, "Process not started!"); + return -1; + } + // Install a timeout handler. if (secondsToWait) { - Timeout = false; - Act.sa_sigaction = 0; - Act.sa_handler = TimeOutHandler; + memset(&Act, 0, sizeof(Act)); + Act.sa_handler = SIG_IGN; sigemptyset(&Act.sa_mask); - Act.sa_flags = 0; sigaction(SIGALRM, &Act, &Old); alarm(secondsToWait); } // Parent process: Wait for the child process to terminate. int status; + uint64_t pid = reinterpret_cast(Data_); + pid_t child = static_cast(pid); while (wait(&status) != child) if (secondsToWait && errno == EINTR) { // Kill the child. kill(child, SIGKILL); - + // Turn off the alarm and restore the signal handler alarm(0); sigaction(SIGALRM, &Old, 0); @@ -271,7 +286,25 @@ Program::ExecuteAndWait(const Path& path, #else return -99; #endif - + +} + +bool +Program::Kill(std::string* ErrMsg) { + if (Data_ == 0) { + MakeErrMsg(ErrMsg, "Process not started!"); + return true; + } + + uint64_t pid64 = reinterpret_cast(Data_); + pid_t pid = static_cast(pid64); + + if (kill(pid, SIGKILL) != 0) { + MakeErrMsg(ErrMsg, "The process couldn't be killed!"); + return true; + } + + return false; } bool Program::ChangeStdinToBinary(){ diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc index e385e0c556625..d39e1e99a0c5d 100644 --- a/lib/System/Unix/Signals.inc +++ b/lib/System/Unix/Signals.inc @@ -14,6 +14,7 @@ #include "Unix.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/System/Mutex.h" #include #include #if HAVE_EXECINFO_H @@ -33,6 +34,8 @@ using namespace llvm; static RETSIGTYPE SignalHandler(int Sig); // defined below. +static SmartMutex SignalsMutex; + /// InterruptFunction - The function to call if ctrl-c is pressed. static void (*InterruptFunction)() = 0; @@ -113,6 +116,7 @@ static RETSIGTYPE SignalHandler(int Sig) { sigfillset(&SigMask); sigprocmask(SIG_UNBLOCK, &SigMask, 0); + SignalsMutex.acquire(); if (FilesToRemove != 0) while (!FilesToRemove->empty()) { FilesToRemove->back().eraseFromDisk(true); @@ -122,14 +126,19 @@ static RETSIGTYPE SignalHandler(int Sig) { if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) { if (InterruptFunction) { void (*IF)() = InterruptFunction; + SignalsMutex.release(); InterruptFunction = 0; IF(); // run the interrupt function. return; } + + SignalsMutex.release(); raise(Sig); // Execute the default handler. return; } + SignalsMutex.release(); + // Otherwise if it is a fault (like SEGV) run any handler. if (CallBacksToRun) for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i) @@ -139,18 +148,23 @@ static RETSIGTYPE SignalHandler(int Sig) { void llvm::sys::SetInterruptFunction(void (*IF)()) { + SignalsMutex.acquire(); InterruptFunction = IF; + SignalsMutex.release(); RegisterHandlers(); } // RemoveFileOnSignal - The public API bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { + SignalsMutex.acquire(); if (FilesToRemove == 0) FilesToRemove = new std::vector(); FilesToRemove->push_back(Filename); + SignalsMutex.release(); + RegisterHandlers(); return false; } diff --git a/lib/System/Unix/TimeValue.inc b/lib/System/Unix/TimeValue.inc index 8dd30b9322f92..1ae8c7184d559 100644 --- a/lib/System/Unix/TimeValue.inc +++ b/lib/System/Unix/TimeValue.inc @@ -21,7 +21,7 @@ namespace llvm { using namespace sys; -std::string TimeValue::toString() const { +std::string TimeValue::str() const { char buffer[32]; time_t ourTime = time_t(this->toEpochTime()); diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc index aa04268406d45..10e64aa990c95 100644 --- a/lib/System/Win32/DynamicLibrary.inc +++ b/lib/System/Win32/DynamicLibrary.inc @@ -67,7 +67,6 @@ extern "C" { PVOID UserContext) #endif { - llvm::sys::SmartScopedWriter Writer(&SymbolsLock); // Ignore VC++ runtimes prior to 7.1. Somehow some of them get loaded // into the process. if (stricmp(ModuleName, "msvci70") != 0 && @@ -89,36 +88,9 @@ extern "C" { } } -DynamicLibrary::DynamicLibrary() : handle(0) { - SmartScopedWriter Writer(&SymbolsLock); - handle = GetModuleHandle(NULL); - OpenedHandles.push_back((HMODULE)handle); -} - -DynamicLibrary::~DynamicLibrary() { - llvm::sys::SmartScopedWriter Writer(&SymbolsLock); - if (handle == 0) - return; - - // GetModuleHandle() does not increment the ref count, so we must not free - // the handle to the executable. - if (handle != GetModuleHandle(NULL)) - FreeLibrary((HMODULE)handle); - handle = 0; - - for (std::vector::iterator I = OpenedHandles.begin(), - E = OpenedHandles.end(); I != E; ++I) { - if (*I == handle) { - // Note: don't use the swap/pop_back trick here. Order is important. - OpenedHandles.erase(I); - } - } -} - bool DynamicLibrary::LoadLibraryPermanently(const char *filename, std::string *ErrMsg) { if (filename) { - llvm::sys::SmartScopedWriter Writer(&SymbolsLock); HMODULE a_handle = LoadLibrary(filename); if (a_handle == 0) @@ -170,24 +142,22 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename, void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { // First check symbols added via AddSymbol(). - SymbolsLock.reader_acquire(); - std::map::iterator I = symbols.find(symbolName); - std::map::iterator E = symbols.end(); - SymbolsLock.reader_release(); - if (I != E) - return I->second; + if (ExplicitSymbols) { + std::map::iterator I = + ExplicitSymbols->find(symbolName); + std::map::iterator E = ExplicitSymbols->end(); + if (I != E) + return I->second; + } // Now search the libraries. - SymbolsLock.writer_acquire(); for (std::vector::iterator I = OpenedHandles.begin(), E = OpenedHandles.end(); I != E; ++I) { FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName); if (ptr) { - SymbolsLock.writer_release(); return (void *) ptr; } } - SymbolsLock.writer_release(); #if defined(__MINGW32__) { diff --git a/lib/System/Win32/Memory.inc b/lib/System/Win32/Memory.inc index 5e5cf7a6762de..7611ecdb929a1 100644 --- a/lib/System/Win32/Memory.inc +++ b/lib/System/Win32/Memory.inc @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "Win32.h" +#include "llvm/Support/DataTypes.h" #include "llvm/System/Process.h" namespace llvm { @@ -23,13 +24,13 @@ using namespace sys; //=== and must not be UNIX code //===----------------------------------------------------------------------===// -MemoryBlock Memory::AllocateRWX(unsigned NumBytes, +MemoryBlock Memory::AllocateRWX(size_t NumBytes, const MemoryBlock *NearBlock, std::string *ErrMsg) { if (NumBytes == 0) return MemoryBlock(); - static const long pageSize = Process::GetPageSize(); - unsigned NumPages = (NumBytes+pageSize-1)/pageSize; + static const size_t pageSize = Process::GetPageSize(); + size_t NumPages = (NumBytes+pageSize-1)/pageSize; //FIXME: support NearBlock if ever needed on Win64. diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc index 683c94bba44ee..46b965f4b0520 100644 --- a/lib/System/Win32/Path.inc +++ b/lib/System/Win32/Path.inc @@ -125,9 +125,30 @@ Path::isValid() const { return true; } +void Path::makeAbsolute() { + TCHAR FullPath[MAX_PATH + 1] = {0}; + LPTSTR FilePart = NULL; + + DWORD RetLength = ::GetFullPathNameA(path.c_str(), + sizeof(FullPath)/sizeof(FullPath[0]), + FullPath, &FilePart); + + if (0 == RetLength) { + // FIXME: Report the error GetLastError() + assert(0 && "Unable to make absolute path!"); + } else if (RetLength > MAX_PATH) { + // FIXME: Report too small buffer (needed RetLength bytes). + assert(0 && "Unable to make absolute path!"); + } else { + path = FullPath; + } +} + bool Path::isAbsolute(const char *NameStart, unsigned NameLen) { assert(NameStart); + // FIXME: This does not handle correctly an absolute path starting from + // a drive letter or in UNC format. switch (NameLen) { case 0: return false; @@ -135,12 +156,15 @@ Path::isAbsolute(const char *NameStart, unsigned NameLen) { case 2: return NameStart[0] == '/'; default: - return NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/'); + return (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) || + (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\')); } } bool Path::isAbsolute() const { + // FIXME: This does not handle correctly an absolute path starting from + // a drive letter or in UNC format. switch (path.length()) { case 0: return false; @@ -784,8 +808,8 @@ CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) { // Can't use CopyFile macro defined in Windows.h because it would mess up the // above line. We use the expansion it would have in a non-UNICODE build. if (!::CopyFileA(Src.c_str(), Dest.c_str(), false)) - return MakeErrMsg(ErrMsg, "Can't copy '" + Src.toString() + - "' to '" + Dest.toString() + "': "); + return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() + + "' to '" + Dest.str() + "': "); return false; } diff --git a/lib/System/Win32/Process.inc b/lib/System/Win32/Process.inc index cfbe33c85a2f3..feb0806116e46 100644 --- a/lib/System/Win32/Process.inc +++ b/lib/System/Win32/Process.inc @@ -120,15 +120,19 @@ void Process::PreventCoreFiles() { } bool Process::StandardInIsUserInput() { - return GetFileType((HANDLE)_get_osfhandle(0)) == FILE_TYPE_CHAR; + return FileDescriptorIsDisplayed(0); } bool Process::StandardOutIsDisplayed() { - return GetFileType((HANDLE)_get_osfhandle(1)) == FILE_TYPE_CHAR; + return FileDescriptorIsDisplayed(1); } bool Process::StandardErrIsDisplayed() { - return GetFileType((HANDLE)_get_osfhandle(2)) == FILE_TYPE_CHAR; + return FileDescriptorIsDisplayed(2); +} + +bool Process::FileDescriptorIsDisplayed(int fd) { + return GetFileType((HANDLE)_get_osfhandle(fd)) == FILE_TYPE_CHAR; } unsigned Process::StandardOutColumns() { diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc index 49086b8348e6a..a69826fdcef4c 100644 --- a/lib/System/Win32/Program.inc +++ b/lib/System/Win32/Program.inc @@ -22,9 +22,32 @@ //=== and must not be UNIX code //===----------------------------------------------------------------------===// +namespace { + struct Win32ProcessInfo { + HANDLE hProcess; + DWORD dwProcessId; + }; +} + namespace llvm { using namespace sys; +Program::Program() : Data_(0) {} + +Program::~Program() { + if (Data_) { + Win32ProcessInfo* wpi = reinterpret_cast(Data_); + CloseHandle(wpi->hProcess); + delete wpi; + Data_ = 0; + } +} + +unsigned Program::GetPid() const { + Win32ProcessInfo* wpi = reinterpret_cast(Data_); + return wpi->dwProcessId; +} + // This function just uses the PATH environment variable to find the program. Path Program::FindProgramByName(const std::string& progName) { @@ -82,7 +105,7 @@ static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) { if (path->isEmpty()) fname = "NUL"; else - fname = path->toString().c_str(); + fname = path->c_str(); SECURITY_ATTRIBUTES sa; sa.nLength = sizeof(sa); @@ -109,29 +132,41 @@ static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) { DWORD cbJobObjectInfoLength); #endif -int -Program::ExecuteAndWait(const Path& path, - const char** args, - const char** envp, - const Path** redirects, - unsigned secondsToWait, - unsigned memoryLimit, - std::string* ErrMsg) { +/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling +/// CreateProcess. +static bool ArgNeedsQuotes(const char *Str) { + return Str[0] == '\0' || strchr(Str, ' ') != 0; +} + +bool +Program::Execute(const Path& path, + const char** args, + const char** envp, + const Path** redirects, + unsigned memoryLimit, + std::string* ErrMsg) { + if (Data_) { + Win32ProcessInfo* wpi = reinterpret_cast(Data_); + CloseHandle(wpi->hProcess); + delete wpi; + Data_ = 0; + } + if (!path.canExecute()) { if (ErrMsg) *ErrMsg = "program not executable"; - return -1; + return false; } // Windows wants a command line, not an array of args, to pass to the new // process. We have to concatenate them all, while quoting the args that - // have embedded spaces. + // have embedded spaces (or are empty). // First, determine the length of the command line. unsigned len = 0; for (unsigned i = 0; args[i]; i++) { len += strlen(args[i]) + 1; - if (strchr(args[i], ' ')) + if (ArgNeedsQuotes(args[i])) len += 2; } @@ -142,7 +177,7 @@ Program::ExecuteAndWait(const Path& path, for (unsigned i = 0; args[i]; i++) { const char *arg = args[i]; size_t len = strlen(arg); - bool needsQuoting = strchr(arg, ' ') != 0; + bool needsQuoting = ArgNeedsQuotes(arg); if (needsQuoting) *p++ = '"'; memcpy(p, arg, len); @@ -195,13 +230,13 @@ Program::ExecuteAndWait(const Path& path, si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg); if (si.hStdInput == INVALID_HANDLE_VALUE) { MakeErrMsg(ErrMsg, "can't redirect stdin"); - return -1; + return false; } si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg); if (si.hStdOutput == INVALID_HANDLE_VALUE) { CloseHandle(si.hStdInput); MakeErrMsg(ErrMsg, "can't redirect stdout"); - return -1; + return false; } if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) { // If stdout and stderr should go to the same place, redirect stderr @@ -216,7 +251,7 @@ Program::ExecuteAndWait(const Path& path, CloseHandle(si.hStdInput); CloseHandle(si.hStdOutput); MakeErrMsg(ErrMsg, "can't redirect stderr"); - return -1; + return false; } } } @@ -237,16 +272,18 @@ Program::ExecuteAndWait(const Path& path, CloseHandle(si.hStdError); // Now return an error if the process didn't get created. - if (!rc) - { + if (!rc) { SetLastError(err); MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") + - path.toString() + "'"); - return -1; + path.str() + "'"); + return false; } + Win32ProcessInfo* wpi = new Win32ProcessInfo; + wpi->hProcess = pi.hProcess; + wpi->dwProcessId = pi.dwProcessId; + Data_ = wpi; // Make sure these get closed no matter what. - AutoHandle hProcess(pi.hProcess); AutoHandle hThread(pi.hThread); // Assign the process to a job if a memory limit is defined. @@ -270,39 +307,68 @@ Program::ExecuteAndWait(const Path& path, MakeErrMsg(ErrMsg, std::string("Unable to set memory limit")); TerminateProcess(pi.hProcess, 1); WaitForSingleObject(pi.hProcess, INFINITE); - return -1; + return false; } } - // Wait for it to terminate. + return true; +} + +int +Program::Wait(unsigned secondsToWait, + std::string* ErrMsg) { + if (Data_ == 0) { + MakeErrMsg(ErrMsg, "Process not started!"); + return -1; + } + + Win32ProcessInfo* wpi = reinterpret_cast(Data_); + HANDLE hProcess = wpi->hProcess; + + // Wait for the process to terminate. DWORD millisecondsToWait = INFINITE; if (secondsToWait > 0) millisecondsToWait = secondsToWait * 1000; - if (WaitForSingleObject(pi.hProcess, millisecondsToWait) == WAIT_TIMEOUT) { - if (!TerminateProcess(pi.hProcess, 1)) { - MakeErrMsg(ErrMsg, std::string("Failed to terminate timed-out program '") - + path.toString() + "'"); + if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) { + if (!TerminateProcess(hProcess, 1)) { + MakeErrMsg(ErrMsg, "Failed to terminate timed-out program."); return -1; } - WaitForSingleObject(pi.hProcess, INFINITE); + WaitForSingleObject(hProcess, INFINITE); } // Get its exit status. DWORD status; - rc = GetExitCodeProcess(pi.hProcess, &status); - err = GetLastError(); + BOOL rc = GetExitCodeProcess(hProcess, &status); + DWORD err = GetLastError(); if (!rc) { SetLastError(err); - MakeErrMsg(ErrMsg, std::string("Failed getting status for program '") + - path.toString() + "'"); + MakeErrMsg(ErrMsg, "Failed getting status for program."); return -1; } return status; } +bool +Program::Kill(std::string* ErrMsg) { + if (Data_ == 0) { + MakeErrMsg(ErrMsg, "Process not started!"); + return true; + } + + Win32ProcessInfo* wpi = reinterpret_cast(Data_); + HANDLE hProcess = wpi->hProcess; + if (TerminateProcess(hProcess, 1) == 0) { + MakeErrMsg(ErrMsg, "The process couldn't be killed!"); + return true; + } + + return false; +} + bool Program::ChangeStdinToBinary(){ int result = _setmode( _fileno(stdin), _O_BINARY ); return result == -1; diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc index 3a8f77e3cdb9d..dba22185ac7f0 100644 --- a/lib/System/Win32/Signals.inc +++ b/lib/System/Win32/Signals.inc @@ -43,6 +43,9 @@ static std::vector *FilesToRemove = NULL; static std::vector > *CallBacksToRun = 0; static bool RegisteredUnhandledExceptionFilter = false; static bool CleanupExecuted = false; +#ifdef _MSC_VER +static bool ExitOnUnhandledExceptions = false; +#endif static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL; // Windows creates a new thread to execute the console handler when an event @@ -57,8 +60,38 @@ namespace llvm { //=== and must not be UNIX code //===----------------------------------------------------------------------===// +#ifdef _MSC_VER +/// CRTReportHook - Function called on a CRT debugging event. +static int CRTReportHook(int ReportType, char *Message, int *Return) { + // Don't cause a DebugBreak() on return. + if (Return) + *Return = 0; + + switch (ReportType) { + default: + case _CRT_ASSERT: + fprintf(stderr, "CRT assert: %s\n", Message); + // FIXME: Is there a way to just crash? Perhaps throw to the unhandled + // exception code? Perhaps SetErrorMode() handles this. + _exit(3); + break; + case _CRT_ERROR: + fprintf(stderr, "CRT error: %s\n", Message); + // FIXME: Is there a way to just crash? Perhaps throw to the unhandled + // exception code? Perhaps SetErrorMode() handles this. + _exit(3); + break; + case _CRT_WARN: + fprintf(stderr, "CRT warn: %s\n", Message); + break; + } + + // Don't call _CrtDbgReport. + return TRUE; +} +#endif -static void RegisterHandler() { +static void RegisterHandler() { if (RegisteredUnhandledExceptionFilter) { EnterCriticalSection(&CriticalSection); return; @@ -76,6 +109,14 @@ static void RegisterHandler() { OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter); SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE); + // Environment variable to disable any kind of crash dialog. +#ifdef _MSC_VER + if (getenv("LLVM_DISABLE_CRT_DEBUG")) { + _CrtSetReportHook(CRTReportHook); + ExitOnUnhandledExceptions = true; + } +#endif + // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or // else multi-threading problems will ensue. } @@ -136,10 +177,7 @@ static void Cleanup() { if (FilesToRemove != NULL) while (!FilesToRemove->empty()) { - try { - FilesToRemove->back().eraseFromDisk(); - } catch (...) { - } + FilesToRemove->back().eraseFromDisk(); FilesToRemove->pop_back(); } @@ -238,6 +276,11 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { assert(0 && "Crashed in LLVMUnhandledExceptionFilter"); } +#ifdef _MSC_VER + if (ExitOnUnhandledExceptions) + _exit(-3); +#endif + // Allow dialog box to pop up allowing choice to start debugger. if (OldFilter) return (*OldFilter)(ep); diff --git a/lib/System/Win32/TimeValue.inc b/lib/System/Win32/TimeValue.inc index 0ca87d423325d..e37f111fc77ca 100644 --- a/lib/System/Win32/TimeValue.inc +++ b/lib/System/Win32/TimeValue.inc @@ -30,7 +30,7 @@ TimeValue TimeValue::now() { return t; } -std::string TimeValue::toString() const { +std::string TimeValue::str() const { #ifdef __MINGW32__ // This ban may be lifted by either: // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 08dc07c641528..487ce1dd434b6 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -15,6 +15,7 @@ #ifndef TARGET_ARM_H #define TARGET_ARM_H +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include @@ -24,7 +25,8 @@ class ARMBaseTargetMachine; class FunctionPass; class MachineCodeEmitter; class JITCodeEmitter; -class raw_ostream; +class ObjectCodeEmitter; +class formatted_raw_ostream; // Enums corresponding to ARM condition codes namespace ARMCC { @@ -50,7 +52,7 @@ namespace ARMCC { inline static CondCodes getOppositeCondition(CondCodes CC){ switch (CC) { - default: assert(0 && "Unknown condition code"); + default: llvm_unreachable("Unknown condition code"); case EQ: return NE; case NE: return EQ; case HS: return LO; @@ -71,7 +73,7 @@ namespace ARMCC { inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { switch (CC) { - default: assert(0 && "Unknown condition code"); + default: llvm_unreachable("Unknown condition code"); case ARMCC::EQ: return "eq"; case ARMCC::NE: return "ne"; case ARMCC::HS: return "hs"; @@ -90,20 +92,23 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } -FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM); -FunctionPass *createARMCodePrinterPass(raw_ostream &O, - ARMBaseTargetMachine &TM, - bool Verbose); -FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM, - MachineCodeEmitter &MCE); +FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, + CodeGenOpt::Level OptLevel); FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM, MachineCodeEmitter &MCE); FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM, + ObjectCodeEmitter &OCE); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMConstantIslandPass(); +FunctionPass *createNEONPreAllocPass(); +FunctionPass *createThumb2ITBlockPass(); +FunctionPass *createThumb2SizeReductionPass(); + +extern Target TheARMTarget, TheThumbTarget; } // end namespace llvm; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 9001e5033c7db..8851fbbf24815 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -89,27 +89,20 @@ def : ProcNoItin<"xscale", [ArchV5TE]>; def : ProcNoItin<"iwmmxt", [ArchV5TE]>; // V6 Processors. -def : Processor<"arm1136j-s", V6Itineraries, - [ArchV6]>; -def : Processor<"arm1136jf-s", V6Itineraries, - [ArchV6, FeatureVFP2]>; -def : Processor<"arm1176jz-s", V6Itineraries, - [ArchV6]>; -def : Processor<"arm1176jzf-s", V6Itineraries, - [ArchV6, FeatureVFP2]>; -def : Processor<"mpcorenovfp", V6Itineraries, - [ArchV6]>; -def : Processor<"mpcore", V6Itineraries, - [ArchV6, FeatureVFP2]>; +def : ProcNoItin<"arm1136j-s", [ArchV6]>; +def : ProcNoItin<"arm1136jf-s", [ArchV6, FeatureVFP2]>; +def : ProcNoItin<"arm1176jz-s", [ArchV6]>; +def : ProcNoItin<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; +def : ProcNoItin<"mpcorenovfp", [ArchV6]>; +def : ProcNoItin<"mpcore", [ArchV6, FeatureVFP2]>; // V6T2 Processors. -def : Processor<"arm1156t2-s", V6Itineraries, - [ArchV6T2, FeatureThumb2]>; -def : Processor<"arm1156t2f-s", V6Itineraries, - [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +def : ProcNoItin<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>; +def : ProcNoItin<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>; // V7 Processors. -def : ProcNoItin<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : Processor<"cortex-a8", CortexA8Itineraries, + [ArchV7A, FeatureThumb2, FeatureNEON]>; def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// @@ -131,13 +124,13 @@ def ARMInstrInfo : InstrInfo { let TSFlagsFields = ["AddrModeBits", "SizeFlag", "IndexModeBits", - "isUnaryDataProc", - "Form"]; + "Form", + "isUnaryDataProc"]; let TSFlagsShifts = [0, 4, 7, 9, - 10]; + 15]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 15c9ec1fc23cf..1839153351921 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -15,11 +15,12 @@ #define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include namespace llvm { - + /// ARM_AM - ARM Addressing Mode Stuff namespace ARM_AM { enum ShiftOpc { @@ -30,14 +31,14 @@ namespace ARM_AM { ror, rrx }; - + enum AddrOpc { add = '+', sub = '-' }; - + static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return "asr"; case ARM_AM::lsl: return "lsl"; case ARM_AM::lsr: return "lsr"; @@ -45,7 +46,7 @@ namespace ARM_AM { case ARM_AM::rrx: return "rrx"; } } - + static inline ShiftOpc getShiftOpcForNode(SDValue N) { switch (N.getOpcode()) { default: return ARM_AM::no_shift; @@ -70,7 +71,7 @@ namespace ARM_AM { static inline const char *getAMSubModeStr(AMSubMode Mode) { switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); + default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::ia: return "ia"; case ARM_AM::ib: return "ib"; case ARM_AM::da: return "da"; @@ -80,7 +81,7 @@ namespace ARM_AM { static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); + default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::ia: return isLD ? "fd" : "ea"; case ARM_AM::ib: return isLD ? "ed" : "fa"; case ARM_AM::da: return isLD ? "fa" : "ed"; @@ -94,14 +95,14 @@ namespace ARM_AM { assert(Amt < 32 && "Invalid rotate amount"); return (Val >> Amt) | (Val << ((32-Amt)&31)); } - + /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits. /// static inline unsigned rotl32(unsigned Val, unsigned Amt) { assert(Amt < 32 && "Invalid rotate amount"); return (Val << Amt) | (Val >> ((32-Amt)&31)); } - + //===--------------------------------------------------------------------===// // Addressing Mode #1: shift_operand with registers //===--------------------------------------------------------------------===// @@ -136,7 +137,7 @@ namespace ARM_AM { static inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; } - + /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand, /// computing the rotate amount to use. If this immediate value cannot be /// handled with a single shifter-op, determine a good rotate amount that will @@ -145,14 +146,14 @@ namespace ARM_AM { // 8-bit (or less) immediates are trivially shifter_operands with a rotate // of zero. if ((Imm & ~255U) == 0) return 0; - + // Use CTZ to compute the rotate amount. unsigned TZ = CountTrailingZeros_32(Imm); - + // Rotate amount must be even. Something like 0x200 must be rotated 8 bits, // not 9. unsigned RotAmt = TZ & ~1; - + // If we can handle this spread, return it. if ((rotr32(Imm, RotAmt) & ~255U) == 0) return (32-RotAmt)&31; // HW rotates right, not left. @@ -165,16 +166,16 @@ namespace ARM_AM { // Restart the search for a high-order bit after the initial seconds of // ones. unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1)); - + // Rotate amount must be even. unsigned RotAmt2 = TZ2 & ~1; - + // If this fits, use it. if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0) return (32-RotAmt2)&31; // HW rotates right, not left. } } - + // Otherwise, we have no way to cover this span of bits with a single // shifter_op immediate. Return a chunk of bits that will be useful to // handle. @@ -188,17 +189,17 @@ namespace ARM_AM { // 8-bit (or less) immediates are trivially shifter_operands with a rotate // of zero. if ((Arg & ~255U) == 0) return Arg; - + unsigned RotAmt = getSOImmValRotate(Arg); // If this cannot be handled with a single shifter_op, bail out. if (rotr32(~255U, RotAmt) & Arg) return -1; - + // Encode this correctly. return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8); } - + /// isSOImmTwoPartVal - Return true if the specified value can be obtained by /// or'ing together two SOImmVal's. static inline bool isSOImmTwoPartVal(unsigned V) { @@ -206,12 +207,12 @@ namespace ARM_AM { V = rotr32(~255U, getSOImmValRotate(V)) & V; if (V == 0) return false; - + // If this can be handled with two shifter_op's, accept. V = rotr32(~255U, getSOImmValRotate(V)) & V; return V == 0; } - + /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, /// return the first chunk of it. static inline unsigned getSOImmTwoPartFirst(unsigned V) { @@ -221,14 +222,14 @@ namespace ARM_AM { /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, /// return the second chunk of it. static inline unsigned getSOImmTwoPartSecond(unsigned V) { - // Mask out the first hunk. + // Mask out the first hunk. V = rotr32(~255U, getSOImmValRotate(V)) & V; - + // Take what's left. assert(V == (rotr32(255U, getSOImmValRotate(V)) & V)); return V; } - + /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed /// by a left shift. Returns the shift amount to use. static inline unsigned getThumbImmValShift(unsigned Imm) { @@ -243,7 +244,7 @@ namespace ARM_AM { /// isThumbImmShiftedVal - Return true if the specified value can be obtained /// by left shifting a 8-bit immediate. static inline bool isThumbImmShiftedVal(unsigned V) { - // If this can be handled with + // If this can be handled with V = (~255U << getThumbImmValShift(V)) & V; return V == 0; } @@ -259,10 +260,10 @@ namespace ARM_AM { return CountTrailingZeros_32(Imm); } - /// isThumbImm16ShiftedVal - Return true if the specified value can be + /// isThumbImm16ShiftedVal - Return true if the specified value can be /// obtained by left shifting a 16-bit immediate. static inline bool isThumbImm16ShiftedVal(unsigned V) { - // If this can be handled with + // If this can be handled with V = (~65535U << getThumbImm16ValShift(V)) & V; return V == 0; } @@ -273,28 +274,6 @@ namespace ARM_AM { return V >> getThumbImmValShift(V); } - /// getT2SOImmValDecode - Given a 12-bit encoded Thumb-2 modified immediate, - /// return the corresponding 32-bit immediate value. - /// See ARM Reference Manual A6.3.2. - static inline unsigned getT2SOImmValDecode(unsigned Imm) { - unsigned Base = Imm & 0xff; - switch ((Imm >> 8) & 0xf) { - case 0: - return Base; - case 1: - return Base | (Base << 16); - case 2: - return (Base << 8) | (Base << 24); - case 3: - return Base | (Base << 8) | (Base << 16) | (Base << 24); - default: - break; - } - - // shifted immediate - unsigned RotAmount = ((Imm >> 7) & 0x1f) - 8; - return (Base | 0x80) << (24 - RotAmount); - } /// getT2SOImmValSplat - Return the 12-bit encoded representation /// if the specified value can be obtained by splatting the low 8 bits @@ -305,12 +284,12 @@ namespace ARM_AM { /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3 /// Return -1 if none of the above apply. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValSplat(unsigned V) { + static inline int getT2SOImmValSplatVal(unsigned V) { unsigned u, Vs, Imm; // control = 0 - if ((V & 0xffffff00) == 0) + if ((V & 0xffffff00) == 0) return V; - + // If the value is zeroes in the first byte, just shift those off Vs = ((V & 0xff) == 0) ? V >> 8 : V; // Any passing value only has 8 bits of payload, splatted across the word @@ -329,11 +308,11 @@ namespace ARM_AM { return -1; } - /// getT2SOImmValRotate - Return the 12-bit encoded representation if the + /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the /// specified value is a rotated 8-bit value. Return -1 if no rotation /// encoding is possible. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValRotate (unsigned V) { + static inline int getT2SOImmValRotateVal(unsigned V) { unsigned RotAmt = CountLeadingZeros_32(V); if (RotAmt >= 24) return -1; @@ -346,23 +325,23 @@ namespace ARM_AM { } /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit - /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit + /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit /// encoding for it. If not, return -1. /// See ARM Reference Manual A6.3.2. static inline int getT2SOImmVal(unsigned Arg) { // If 'Arg' is an 8-bit splat, then get the encoded value. - int Splat = getT2SOImmValSplat(Arg); + int Splat = getT2SOImmValSplatVal(Arg); if (Splat != -1) return Splat; - + // If 'Arg' can be handled with a single shifter_op return the value. - int Rot = getT2SOImmValRotate(Arg); + int Rot = getT2SOImmValRotateVal(Arg); if (Rot != -1) return Rot; return -1; } - + //===--------------------------------------------------------------------===// // Addressing Mode #2 @@ -380,7 +359,7 @@ namespace ARM_AM { // If this addressing mode is a frame index (before prolog/epilog insertion // and code rewriting), this operand will have the form: FI#, reg0, // with no shift amount for the frame offset. - // + // static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) { assert(Imm12 < (1 << 12) && "Imm too large!"); bool isSub = Opc == sub; @@ -395,8 +374,8 @@ namespace ARM_AM { static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { return (ShiftOpc)(AM2Opc >> 13); } - - + + //===--------------------------------------------------------------------===// // Addressing Mode #3 //===--------------------------------------------------------------------===// @@ -409,7 +388,7 @@ namespace ARM_AM { // The first operand is always a Reg. The second operand is a reg if in // reg/reg form, otherwise it's reg#0. The third field encodes the operation // in bit 8, the immediate in bits 0-7. - + /// getAM3Opc - This function encodes the addrmode3 opc field. static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) { bool isSub = Opc == sub; @@ -421,7 +400,7 @@ namespace ARM_AM { static inline AddrOpc getAM3Op(unsigned AM3Opc) { return ((AM3Opc >> 8) & 1) ? sub : add; } - + //===--------------------------------------------------------------------===// // Addressing Mode #4 //===--------------------------------------------------------------------===// @@ -469,7 +448,7 @@ namespace ARM_AM { // // IA - Increment after // DB - Decrement before - + /// getAM5Opc - This function encodes the addrmode5 opc field. static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { bool isSub = Opc == sub; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp new file mode 100644 index 0000000000000..ecdf5a0be6436 --- /dev/null +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -0,0 +1,1060 @@ +//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Base ARM implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARMBaseInstrInfo.h" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMGenInstrInfo.inc" +#include "ARMMachineFunctionInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +static cl::opt +EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, + cl::desc("Enable ARM 2-addr to 3-addr conv")); + +ARMBaseInstrInfo::ARMBaseInstrInfo() + : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) { +} + +MachineInstr * +ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { + // FIXME: Thumb2 support. + + if (!EnableARM3Addr) + return NULL; + + MachineInstr *MI = MBBI; + MachineFunction &MF = *MI->getParent()->getParent(); + unsigned TSFlags = MI->getDesc().TSFlags; + bool isPre = false; + switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { + default: return NULL; + case ARMII::IndexModePre: + isPre = true; + break; + case ARMII::IndexModePost: + break; + } + + // Try splitting an indexed load/store to an un-indexed one plus an add/sub + // operation. + unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); + if (MemOpc == 0) + return NULL; + + MachineInstr *UpdateMI = NULL; + MachineInstr *MemMI = NULL; + unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); + const TargetInstrDesc &TID = MI->getDesc(); + unsigned NumOps = TID.getNumOperands(); + bool isLoad = !TID.mayStore(); + const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); + const MachineOperand &Base = MI->getOperand(2); + const MachineOperand &Offset = MI->getOperand(NumOps-3); + unsigned WBReg = WB.getReg(); + unsigned BaseReg = Base.getReg(); + unsigned OffReg = Offset.getReg(); + unsigned OffImm = MI->getOperand(NumOps-2).getImm(); + ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); + switch (AddrMode) { + default: + assert(false && "Unknown indexed op!"); + return NULL; + case ARMII::AddrMode2: { + bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; + unsigned Amt = ARM_AM::getAM2Offset(OffImm); + if (OffReg == 0) { + if (ARM_AM::getSOImmVal(Amt) == -1) + // Can't encode it in a so_imm operand. This transformation will + // add more than 1 instruction. Abandon! + return NULL; + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) + .addReg(BaseReg).addImm(Amt) + .addImm(Pred).addReg(0).addReg(0); + } else if (Amt != 0) { + ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); + unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) + .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) + .addImm(Pred).addReg(0).addReg(0); + } else + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) + .addReg(BaseReg).addReg(OffReg) + .addImm(Pred).addReg(0).addReg(0); + break; + } + case ARMII::AddrMode3 : { + bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; + unsigned Amt = ARM_AM::getAM3Offset(OffImm); + if (OffReg == 0) + // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) + .addReg(BaseReg).addImm(Amt) + .addImm(Pred).addReg(0).addReg(0); + else + UpdateMI = BuildMI(MF, MI->getDebugLoc(), + get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) + .addReg(BaseReg).addReg(OffReg) + .addImm(Pred).addReg(0).addReg(0); + break; + } + } + + std::vector NewMIs; + if (isPre) { + if (isLoad) + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc), MI->getOperand(0).getReg()) + .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); + else + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc)).addReg(MI->getOperand(1).getReg()) + .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); + NewMIs.push_back(MemMI); + NewMIs.push_back(UpdateMI); + } else { + if (isLoad) + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc), MI->getOperand(0).getReg()) + .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); + else + MemMI = BuildMI(MF, MI->getDebugLoc(), + get(MemOpc)).addReg(MI->getOperand(1).getReg()) + .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); + if (WB.isDead()) + UpdateMI->getOperand(0).setIsDead(); + NewMIs.push_back(UpdateMI); + NewMIs.push_back(MemMI); + } + + // Transfer LiveVariables states, kill / dead info. + if (LV) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + unsigned Reg = MO.getReg(); + + LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); + if (MO.isDef()) { + MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; + if (MO.isDead()) + LV->addVirtualRegisterDead(Reg, NewMI); + } + if (MO.isUse() && MO.isKill()) { + for (unsigned j = 0; j < 2; ++j) { + // Look at the two new MI's in reverse order. + MachineInstr *NewMI = NewMIs[j]; + if (!NewMI->readsRegister(Reg)) + continue; + LV->addVirtualRegisterKilled(Reg, NewMI); + if (VI.removeKill(MI)) + VI.Kills.push_back(NewMI); + break; + } + } + } + } + } + + MFI->insert(MBBI, NewMIs[1]); + MFI->insert(MBBI, NewMIs[0]); + return NewMIs[0]; +} + +// Branch analysis. +bool +ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + unsigned LastOpc = LastInst->getOpcode(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (isUncondBranchOpcode(LastOpc)) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } + if (isCondBranchOpcode(LastOpc)) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(LastInst->getOperand(1)); + Cond.push_back(LastInst->getOperand(2)); + return false; + } + return true; // Can't handle indirect branch. + } + + // Get the instruction before it if it is a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with a B and a Bcc, handle it. + unsigned SecondLastOpc = SecondLastInst->getOpcode(); + if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(SecondLastInst->getOperand(1)); + Cond.push_back(SecondLastInst->getOperand(2)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // ...likewise if it ends with a branch table followed by an unconditional + // branch. The branch folder can create these, and we must get rid of them for + // correctness of Thumb constant islands. + if (isJumpTableBranchOpcode(SecondLastOpc) && + isUncondBranchOpcode(LastOpc)) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } + + // Otherwise, can't handle this. + return true; +} + + +unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) return 0; + --I; + if (!isUncondBranchOpcode(I->getOpcode()) && + !isCondBranchOpcode(I->getOpcode())) + return 0; + + // Remove the branch. + I->eraseFromParent(); + + I = MBB.end(); + + if (I == MBB.begin()) return 1; + --I; + if (!isCondBranchOpcode(I->getOpcode())) + return 1; + + // Remove the branch. + I->eraseFromParent(); + return 2; +} + +unsigned +ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc argument + DebugLoc dl = DebugLoc::getUnknownLoc(); + + ARMFunctionInfo *AFI = MBB.getParent()->getInfo(); + int BOpc = !AFI->isThumbFunction() + ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); + int BccOpc = !AFI->isThumbFunction() + ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc); + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "ARM branch conditions have two components!"); + + if (FBB == 0) { + if (Cond.empty()) // Unconditional branch? + BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB); + else + BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + return 1; + } + + // Two-way conditional branch. + BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); + BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB); + return 2; +} + +bool ARMBaseInstrInfo:: +ReverseBranchCondition(SmallVectorImpl &Cond) const { + ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); + Cond[0].setImm(ARMCC::getOppositeCondition(CC)); + return false; +} + +bool ARMBaseInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const { + unsigned Opc = MI->getOpcode(); + if (isUncondBranchOpcode(Opc)) { + MI->setDesc(get(getMatchingCondBranchOpcode(Opc))); + MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); + MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); + return true; + } + + int PIdx = MI->findFirstPredOperandIdx(); + if (PIdx != -1) { + MachineOperand &PMO = MI->getOperand(PIdx); + PMO.setImm(Pred[0].getImm()); + MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); + return true; + } + return false; +} + +bool ARMBaseInstrInfo:: +SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const { + if (Pred1.size() > 2 || Pred2.size() > 2) + return false; + + ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); + ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); + if (CC1 == CC2) + return true; + + switch (CC1) { + default: + return false; + case ARMCC::AL: + return true; + case ARMCC::HS: + return CC2 == ARMCC::HI; + case ARMCC::LS: + return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; + case ARMCC::GE: + return CC2 == ARMCC::GT; + case ARMCC::LE: + return CC2 == ARMCC::LT; + } +} + +bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const { + // FIXME: This confuses implicit_def with optional CPSR def. + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) + return false; + + bool Found = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == ARM::CPSR) { + Pred.push_back(MO); + Found = true; + } + } + + return Found; +} + + +/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing +static unsigned getNumJTEntries(const std::vector &JT, + unsigned JTI) DISABLE_INLINE; +static unsigned getNumJTEntries(const std::vector &JT, + unsigned JTI) { + return JT[JTI].MBBs.size(); +} + +/// GetInstSize - Return the size of the specified MachineInstr. +/// +unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); + + // Basic size info comes from the TSFlags field. + const TargetInstrDesc &TID = MI->getDesc(); + unsigned TSFlags = TID.TSFlags; + + unsigned Opc = MI->getOpcode(); + switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { + default: { + // If this machine instr is an inline asm, measure it. + if (MI->getOpcode() == ARM::INLINEASM) + return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); + if (MI->isLabel()) + return 0; + switch (Opc) { + default: + llvm_unreachable("Unknown or unset size field for instr!"); + case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::KILL: + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + return 0; + } + break; + } + case ARMII::Size8Bytes: return 8; // ARM instruction x 2. + case ARMII::Size4Bytes: return 4; // ARM / Thumb2 instruction. + case ARMII::Size2Bytes: return 2; // Thumb1 instruction. + case ARMII::SizeSpecial: { + switch (Opc) { + case ARM::CONSTPOOL_ENTRY: + // If this machine instr is a constant pool entry, its size is recorded as + // operand #2. + return MI->getOperand(2).getImm(); + case ARM::Int_eh_sjlj_setjmp: + return 24; + case ARM::t2Int_eh_sjlj_setjmp: + return 20; + case ARM::BR_JTr: + case ARM::BR_JTm: + case ARM::BR_JTadd: + case ARM::tBR_JTr: + case ARM::t2BR_JT: + case ARM::t2TBB: + case ARM::t2TBH: { + // These are jumptable branches, i.e. a branch followed by an inlined + // jumptable. The size is 4 + 4 * number of entries. For TBB, each + // entry is one byte; TBH two byte each. + unsigned EntrySize = (Opc == ARM::t2TBB) + ? 1 : ((Opc == ARM::t2TBH) ? 2 : 4); + unsigned NumOps = TID.getNumOperands(); + MachineOperand JTOP = + MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); + unsigned JTI = JTOP.getIndex(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector &JT = MJTI->getJumpTables(); + assert(JTI < JT.size()); + // Thumb instructions are 2 byte aligned, but JT entries are 4 byte + // 4 aligned. The assembler / linker may add 2 byte padding just before + // the JT entries. The size does not include this padding; the + // constant islands pass does separate bookkeeping for it. + // FIXME: If we know the size of the function is less than (1 << 16) *2 + // bytes, we can use 16-bit entries instead. Then there won't be an + // alignment issue. + unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; + unsigned NumEntries = getNumJTEntries(JT, JTI); + if (Opc == ARM::t2TBB && (NumEntries & 1)) + // Make sure the instruction that follows TBB is 2-byte aligned. + // FIXME: Constant island pass should insert an "ALIGN" instruction + // instead. + ++NumEntries; + return NumEntries * EntrySize + InstSize; + } + default: + // Otherwise, pseudo-instruction sizes are zero. + return 0; + } + } + } + return 0; // Not reached +} + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +/// +bool +ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned& SrcSubIdx, unsigned& DstSubIdx) const { + SrcSubIdx = DstSubIdx = 0; // No sub-registers. + + switch (MI.getOpcode()) { + default: break; + case ARM::FCPYS: + case ARM::FCPYD: + case ARM::VMOVD: + case ARM::VMOVQ: { + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } + case ARM::MOVr: + case ARM::tMOVr: + case ARM::tMOVgpr2tgpr: + case ARM::tMOVtgpr2gpr: + case ARM::tMOVgpr2gpr: + case ARM::t2MOVr: { + assert(MI.getDesc().getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "Invalid ARM MOV instruction"); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + return true; + } + } + + return false; +} + +unsigned +ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::LDR: + case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame. + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::t2LDRi12: + case ARM::tRestore: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FLDD: + case ARM::FLDS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + + return 0; +} + +unsigned +ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case ARM::STR: + case ARM::t2STRs: // FIXME: don't use t2STRs to access frame. + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isReg() && + MI->getOperand(3).isImm() && + MI->getOperand(2).getReg() == 0 && + MI->getOperand(3).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::t2STRi12: + case ARM::tSpill: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + case ARM::FSTD: + case ARM::FSTS: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + + return 0; +} + +bool +ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + if (DestRC != SrcRC) { + // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies + // Allow QPR / QPR_VFP2 cross-class copies + if (DestRC == ARM::DPRRegisterClass) { + if (SrcRC == ARM::DPR_VFP2RegisterClass || + SrcRC == ARM::DPR_8RegisterClass) { + } else + return false; + } else if (DestRC == ARM::DPR_VFP2RegisterClass) { + if (SrcRC == ARM::DPRRegisterClass || + SrcRC == ARM::DPR_8RegisterClass) { + } else + return false; + } else if (DestRC == ARM::DPR_8RegisterClass) { + if (SrcRC == ARM::DPRRegisterClass || + SrcRC == ARM::DPR_VFP2RegisterClass) { + } else + return false; + } else if ((DestRC == ARM::QPRRegisterClass && + SrcRC == ARM::QPR_VFP2RegisterClass) || + (DestRC == ARM::QPR_VFP2RegisterClass && + SrcRC == ARM::QPRRegisterClass)) { + } else + return false; + } + + if (DestRC == ARM::GPRRegisterClass) { + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), + DestReg).addReg(SrcReg))); + } else if (DestRC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) + .addReg(SrcReg)); + } else if ((DestRC == ARM::DPRRegisterClass) || + (DestRC == ARM::DPR_VFP2RegisterClass) || + (DestRC == ARM::DPR_8RegisterClass)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) + .addReg(SrcReg)); + } else if (DestRC == ARM::QPRRegisterClass || + DestRC == ARM::QPR_VFP2RegisterClass) { + BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); + } else { + return false; + } + + return true; +} + +void ARMBaseInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + MachineMemOperand::MOStore, 0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + + if (RC == ARM::GPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::DPRRegisterClass || + RC == ARM::DPR_VFP2RegisterClass || + RC == ARM::DPR_8RegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else { + assert((RC == ARM::QPRRegisterClass || + RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); + // FIXME: Neon instructions should support predicates + BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } +} + +void ARMBaseInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + MachineMemOperand::MOLoad, 0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + + if (RC == ARM::GPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) + .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::DPRRegisterClass || + RC == ARM::DPR_VFP2RegisterClass || + RC == ARM::DPR_8RegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else if (RC == ARM::SPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); + } else { + assert((RC == ARM::QPRRegisterClass || + RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); + // FIXME: Neon instructions should support predicates + BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } +} + +MachineInstr *ARMBaseInstrInfo:: +foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl &Ops, int FI) const { + if (Ops.size() != 1) return NULL; + + unsigned OpNum = Ops[0]; + unsigned Opc = MI->getOpcode(); + MachineInstr *NewMI = NULL; + if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { + // If it is updating CPSR, then it cannot be folded. + if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead()) + return NULL; + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + if (Opc == ARM::MOVr) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); + else // ARM::t2MOVr + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + if (Opc == ARM::MOVr) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef)) + .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); + else // ARM::t2MOVr + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } + } else if (Opc == ARM::tMOVgpr2gpr || + Opc == ARM::tMOVtgpr2gpr || + Opc == ARM::tMOVgpr2tgpr) { + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef)) + .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); + } + } else if (Opc == ARM::FCPYS) { + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addFrameIndex(FI) + .addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } + } + else if (Opc == ARM::FCPYD) { + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef)) + .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); + } + } + + return NewMI; +} + +MachineInstr* +ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + // FIXME + return 0; +} + +bool +ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const { + if (Ops.size() != 1) return false; + + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { + // If it is updating CPSR, then it cannot be folded. + return MI->getOperand(4).getReg() != ARM::CPSR || + MI->getOperand(4).isDead(); + } else if (Opc == ARM::tMOVgpr2gpr || + Opc == ARM::tMOVtgpr2gpr || + Opc == ARM::tMOVgpr2tgpr) { + return true; + } else if (Opc == ARM::FCPYS || Opc == ARM::FCPYD) { + return true; + } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVQ) { + return false; // FIXME + } + + return false; +} + +/// getInstrPredicate - If instruction is predicated, returns its predicate +/// condition, otherwise returns AL. It also returns the condition code +/// register by reference. +ARMCC::CondCodes +llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { + int PIdx = MI->findFirstPredOperandIdx(); + if (PIdx == -1) { + PredReg = 0; + return ARMCC::AL; + } + + PredReg = MI->getOperand(PIdx+1).getReg(); + return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); +} + + +int llvm::getMatchingCondBranchOpcode(int Opc) { + if (Opc == ARM::B) + return ARM::Bcc; + else if (Opc == ARM::tB) + return ARM::tBcc; + else if (Opc == ARM::t2B) + return ARM::t2Bcc; + + llvm_unreachable("Unknown unconditional branch opcode!"); + return 0; +} + + +void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII) { + bool isSub = NumBytes < 0; + if (isSub) NumBytes = -NumBytes; + + while (NumBytes) { + unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); + unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); + assert(ThisVal && "Didn't extract field correctly"); + + // We will handle these bits from offset, clear them. + NumBytes &= ~ThisVal; + + assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?"); + + // Build the new ADD / SUB. + unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, RegState::Kill).addImm(ThisVal) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + BaseReg = DestReg; + } +} + +bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + bool isSub = false; + + // Memory operands in inline assembly always use AddrMode2. + if (Opcode == ARM::INLINEASM) + AddrMode = ARMII::AddrMode2; + + if (Opcode == ARM::ADDri) { + Offset += MI.getOperand(FrameRegIdx+1).getImm(); + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::MOVr)); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(FrameRegIdx+1); + Offset = 0; + return true; + } else if (Offset < 0) { + Offset = -Offset; + isSub = true; + MI.setDesc(TII.get(ARM::SUBri)); + } + + // Common case: small offset, fits into instruction. + if (ARM_AM::getSOImmVal(Offset) != -1) { + // Replace the FrameIndex with sp / fp + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); + Offset = 0; + return true; + } + + // Otherwise, pull as much of the immedidate into this ADDri/SUBri + // as possible. + unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); + unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); + + // We will handle these bits from offset, clear them. + Offset &= ~ThisImmVal; + + // Get the properly encoded SOImmVal field. + assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 && + "Bit extraction didn't work?"); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); + } else { + unsigned ImmIdx = 0; + int InstrOffs = 0; + unsigned NumBits = 0; + unsigned Scale = 1; + switch (AddrMode) { + case ARMII::AddrMode2: { + ImmIdx = FrameRegIdx+2; + InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 12; + break; + } + case ARMII::AddrMode3: { + ImmIdx = FrameRegIdx+2; + InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + break; + } + case ARMII::AddrMode4: + // Can't fold any offset even if it's zero. + return false; + case ARMII::AddrMode5: { + ImmIdx = FrameRegIdx+1; + InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); + if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) + InstrOffs *= -1; + NumBits = 8; + Scale = 4; + break; + } + default: + llvm_unreachable("Unsupported addressing mode!"); + break; + } + + Offset += InstrOffs * Scale; + assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + if (Offset < 0) { + Offset = -Offset; + isSub = true; + } + + // Attempt to fold address comp. if opcode has offset bits + if (NumBits > 0) { + // Common case: small offset, fits into instruction. + MachineOperand &ImmOp = MI.getOperand(ImmIdx); + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with sp + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset = 0; + return true; + } + + // Otherwise, it didn't fit. Pull in what we can to simplify the immed. + ImmedOffset = ImmedOffset & Mask; + if (isSub) + ImmedOffset |= 1 << NumBits; + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); + } + } + + Offset = (isSub) ? -Offset : Offset; + return Offset == 0; +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h new file mode 100644 index 0000000000000..a13155b9fd0d1 --- /dev/null +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -0,0 +1,333 @@ +//===- ARMBaseInstrInfo.h - ARM Base Instruction Information -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Base ARM implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMBASEINSTRUCTIONINFO_H +#define ARMBASEINSTRUCTIONINFO_H + +#include "ARM.h" +#include "ARMRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + +/// ARMII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace ARMII { + enum { + //===------------------------------------------------------------------===// + // Instruction Flags. + + //===------------------------------------------------------------------===// + // This four-bit field describes the addressing mode used. + + AddrModeMask = 0xf, + AddrModeNone = 0, + AddrMode1 = 1, + AddrMode2 = 2, + AddrMode3 = 3, + AddrMode4 = 4, + AddrMode5 = 5, + AddrMode6 = 6, + AddrModeT1_1 = 7, + AddrModeT1_2 = 8, + AddrModeT1_4 = 9, + AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data + AddrModeT2_i12 = 11, + AddrModeT2_i8 = 12, + AddrModeT2_so = 13, + AddrModeT2_pc = 14, // +/- i12 for pc relative data + AddrModeT2_i8s4 = 15, // i8 * 4 + + // Size* - Flags to keep track of the size of an instruction. + SizeShift = 4, + SizeMask = 7 << SizeShift, + SizeSpecial = 1, // 0 byte pseudo or special case. + Size8Bytes = 2, + Size4Bytes = 3, + Size2Bytes = 4, + + // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load + // and store ops + IndexModeShift = 7, + IndexModeMask = 3 << IndexModeShift, + IndexModePre = 1, + IndexModePost = 2, + + //===------------------------------------------------------------------===// + // Instruction encoding formats. + // + FormShift = 9, + FormMask = 0x3f << FormShift, + + // Pseudo instructions + Pseudo = 0 << FormShift, + + // Multiply instructions + MulFrm = 1 << FormShift, + + // Branch instructions + BrFrm = 2 << FormShift, + BrMiscFrm = 3 << FormShift, + + // Data Processing instructions + DPFrm = 4 << FormShift, + DPSoRegFrm = 5 << FormShift, + + // Load and Store + LdFrm = 6 << FormShift, + StFrm = 7 << FormShift, + LdMiscFrm = 8 << FormShift, + StMiscFrm = 9 << FormShift, + LdStMulFrm = 10 << FormShift, + + // Miscellaneous arithmetic instructions + ArithMiscFrm = 11 << FormShift, + + // Extend instructions + ExtFrm = 12 << FormShift, + + // VFP formats + VFPUnaryFrm = 13 << FormShift, + VFPBinaryFrm = 14 << FormShift, + VFPConv1Frm = 15 << FormShift, + VFPConv2Frm = 16 << FormShift, + VFPConv3Frm = 17 << FormShift, + VFPConv4Frm = 18 << FormShift, + VFPConv5Frm = 19 << FormShift, + VFPLdStFrm = 20 << FormShift, + VFPLdStMulFrm = 21 << FormShift, + VFPMiscFrm = 22 << FormShift, + + // Thumb format + ThumbFrm = 23 << FormShift, + + // NEON format + NEONFrm = 24 << FormShift, + NEONGetLnFrm = 25 << FormShift, + NEONSetLnFrm = 26 << FormShift, + NEONDupFrm = 27 << FormShift, + + //===------------------------------------------------------------------===// + // Misc flags. + + // UnaryDP - Indicates this is a unary data processing instruction, i.e. + // it doesn't have a Rn operand. + UnaryDP = 1 << 15, + + // Xform16Bit - Indicates this Thumb2 instruction may be transformed into + // a 16-bit Thumb instruction if certain conditions are met. + Xform16Bit = 1 << 16, + + //===------------------------------------------------------------------===// + // Field shifts - such shifts are used to set field while generating + // machine instructions. + M_BitShift = 5, + ShiftImmShift = 5, + ShiftShift = 7, + N_BitShift = 7, + ImmHiShift = 8, + SoRotImmShift = 8, + RegRsShift = 8, + ExtRotImmShift = 10, + RegRdLoShift = 12, + RegRdShift = 12, + RegRdHiShift = 16, + RegRnShift = 16, + S_BitShift = 20, + W_BitShift = 21, + AM3_I_BitShift = 22, + D_BitShift = 22, + U_BitShift = 23, + P_BitShift = 24, + I_BitShift = 25, + CondShift = 28 + }; +} + +class ARMBaseInstrInfo : public TargetInstrInfoImpl { +protected: + // Can be only subclassed. + explicit ARMBaseInstrInfo(); +public: + // Return the non-pre/post incrementing version of 'Opc'. Return 0 + // if there is not such an opcode. + virtual unsigned getUnindexedOpcode(unsigned Opc) const =0; + + // Return true if the block does not fall through. + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0; + + virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + + virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; + + // Branch analysis. + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + + virtual + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + + // Predication support. + bool isPredicated(const MachineInstr *MI) const { + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; + } + + ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { + int PIdx = MI->findFirstPredOperandIdx(); + return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm() + : ARMCC::AL; + } + + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const; + + /// GetInstSize - Returns the size of the specified MachineInstr. + /// + virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; + + /// Return true if the instruction is a register to register move and return + /// the source and dest operands and their sub-register indices by reference. + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + + virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const; + +}; + +static inline +const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { + return MIB.addImm((int64_t)ARMCC::AL).addReg(0); +} + +static inline +const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { + return MIB.addReg(0); +} + +static inline +const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB, + bool isDead = false) { + return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead)); +} + +static inline +const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) { + return MIB.addReg(0); +} + +static inline +bool isUncondBranchOpcode(int Opc) { + return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B; +} + +static inline +bool isCondBranchOpcode(int Opc) { + return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc; +} + +static inline +bool isJumpTableBranchOpcode(int Opc) { + return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd || + Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT; +} + +/// getInstrPredicate - If instruction is predicated, returns its predicate +/// condition, otherwise returns AL. It also returns the condition code +/// register by reference. +ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg); + +int getMatchingCondBranchOpcode(int Opc); + +/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of +/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2 +/// code. +void emitARMRegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII); + +void emitT2RegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII); + + +/// rewriteARMFrameIndex / rewriteT2FrameIndex - +/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the +/// offset could not be handled directly in MI, and return the left-over +/// portion by reference. +bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII); + +bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII); + +} // End llvm namespace + +#endif diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp new file mode 100644 index 0000000000000..42ef183e5261b --- /dev/null +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -0,0 +1,1360 @@ +//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the base ARM implementation of TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMSubtarget.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/LLVMContext.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, + bool *isSPVFP) { + if (isSPVFP) + *isSPVFP = false; + + using namespace ARM; + switch (RegEnum) { + default: + llvm_unreachable("Unknown ARM register!"); + case R0: case D0: case Q0: return 0; + case R1: case D1: case Q1: return 1; + case R2: case D2: case Q2: return 2; + case R3: case D3: case Q3: return 3; + case R4: case D4: case Q4: return 4; + case R5: case D5: case Q5: return 5; + case R6: case D6: case Q6: return 6; + case R7: case D7: case Q7: return 7; + case R8: case D8: case Q8: return 8; + case R9: case D9: case Q9: return 9; + case R10: case D10: case Q10: return 10; + case R11: case D11: case Q11: return 11; + case R12: case D12: case Q12: return 12; + case SP: case D13: case Q13: return 13; + case LR: case D14: case Q14: return 14; + case PC: case D15: case Q15: return 15; + + case D16: return 16; + case D17: return 17; + case D18: return 18; + case D19: return 19; + case D20: return 20; + case D21: return 21; + case D22: return 22; + case D23: return 23; + case D24: return 24; + case D25: return 25; + case D26: return 27; + case D27: return 27; + case D28: return 28; + case D29: return 29; + case D30: return 30; + case D31: return 31; + + case S0: case S1: case S2: case S3: + case S4: case S5: case S6: case S7: + case S8: case S9: case S10: case S11: + case S12: case S13: case S14: case S15: + case S16: case S17: case S18: case S19: + case S20: case S21: case S22: case S23: + case S24: case S25: case S26: case S27: + case S28: case S29: case S30: case S31: { + if (isSPVFP) + *isSPVFP = true; + switch (RegEnum) { + default: return 0; // Avoid compile time warning. + case S0: return 0; + case S1: return 1; + case S2: return 2; + case S3: return 3; + case S4: return 4; + case S5: return 5; + case S6: return 6; + case S7: return 7; + case S8: return 8; + case S9: return 9; + case S10: return 10; + case S11: return 11; + case S12: return 12; + case S13: return 13; + case S14: return 14; + case S15: return 15; + case S16: return 16; + case S17: return 17; + case S18: return 18; + case S19: return 19; + case S20: return 20; + case S21: return 21; + case S22: return 22; + case S23: return 23; + case S24: return 24; + case S25: return 25; + case S26: return 26; + case S27: return 27; + case S28: return 28; + case S29: return 29; + case S30: return 30; + case S31: return 31; + } + } + } +} + +ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, + const ARMSubtarget &sti) + : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), + TII(tii), STI(sti), + FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { +} + +const unsigned* +ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const unsigned CalleeSavedRegs[] = { + ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, + ARM::R7, ARM::R6, ARM::R5, ARM::R4, + + ARM::D15, ARM::D14, ARM::D13, ARM::D12, + ARM::D11, ARM::D10, ARM::D9, ARM::D8, + 0 + }; + + static const unsigned DarwinCalleeSavedRegs[] = { + // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved + // register. + ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, + ARM::R11, ARM::R10, ARM::R8, + + ARM::D15, ARM::D14, ARM::D13, ARM::D12, + ARM::D11, ARM::D10, ARM::D9, ARM::D8, + 0 + }; + return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; +} + +const TargetRegisterClass* const * +ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + + static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = { + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass, + &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + + static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = { + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + + static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={ + &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass, + &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass, + &ARM::GPRRegClass, &ARM::GPRRegClass, + + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, + 0 + }; + + if (STI.isThumb1Only()) { + return STI.isTargetDarwin() + ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses; + } + return STI.isTargetDarwin() + ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; +} + +BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + // FIXME: avoid re-calculating this everytime. + BitVector Reserved(getNumRegs()); + Reserved.set(ARM::SP); + Reserved.set(ARM::PC); + if (STI.isTargetDarwin() || hasFP(MF)) + Reserved.set(FramePtr); + // Some targets reserve R9. + if (STI.isR9Reserved()) + Reserved.set(ARM::R9); + return Reserved; +} + +bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, + unsigned Reg) const { + switch (Reg) { + default: break; + case ARM::SP: + case ARM::PC: + return true; + case ARM::R7: + case ARM::R11: + if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF))) + return true; + break; + case ARM::R9: + return STI.isR9Reserved(); + } + + return false; +} + +const TargetRegisterClass * +ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { + return ARM::GPRRegisterClass; +} + +/// getAllocationOrder - Returns the register allocation order for a specified +/// register class in the form of a pair of TargetRegisterClass iterators. +std::pair +ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const { + // Alternative register allocation orders when favoring even / odd registers + // of register pairs. + + // No FP, R9 is available. + static const unsigned GPREven1[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, + ARM::R9, ARM::R11 + }; + static const unsigned GPROdd1[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, + ARM::R8, ARM::R10 + }; + + // FP is R7, R9 is available. + static const unsigned GPREven2[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, + ARM::R9, ARM::R11 + }; + static const unsigned GPROdd2[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, + ARM::R8, ARM::R10 + }; + + // FP is R11, R9 is available. + static const unsigned GPREven3[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, + ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, + ARM::R9 + }; + static const unsigned GPROdd3[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, + ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, + ARM::R8 + }; + + // No FP, R9 is not available. + static const unsigned GPREven4[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, + ARM::R11 + }; + static const unsigned GPROdd4[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, + ARM::R10 + }; + + // FP is R7, R9 is not available. + static const unsigned GPREven5[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, + ARM::R11 + }; + static const unsigned GPROdd5[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, + ARM::R10 + }; + + // FP is R11, R9 is not available. + static const unsigned GPREven6[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, + ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 + }; + static const unsigned GPROdd6[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, + ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 + }; + + + if (HintType == ARMRI::RegPairEven) { + if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) + // It's no longer possible to fulfill this hint. Return the default + // allocation order. + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); + + if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!STI.isR9Reserved()) + return std::make_pair(GPREven1, + GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); + else + return std::make_pair(GPREven4, + GPREven4 + (sizeof(GPREven4)/sizeof(unsigned))); + } else if (FramePtr == ARM::R7) { + if (!STI.isR9Reserved()) + return std::make_pair(GPREven2, + GPREven2 + (sizeof(GPREven2)/sizeof(unsigned))); + else + return std::make_pair(GPREven5, + GPREven5 + (sizeof(GPREven5)/sizeof(unsigned))); + } else { // FramePtr == ARM::R11 + if (!STI.isR9Reserved()) + return std::make_pair(GPREven3, + GPREven3 + (sizeof(GPREven3)/sizeof(unsigned))); + else + return std::make_pair(GPREven6, + GPREven6 + (sizeof(GPREven6)/sizeof(unsigned))); + } + } else if (HintType == ARMRI::RegPairOdd) { + if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) + // It's no longer possible to fulfill this hint. Return the default + // allocation order. + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); + + if (!STI.isTargetDarwin() && !hasFP(MF)) { + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd1, + GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); + else + return std::make_pair(GPROdd4, + GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned))); + } else if (FramePtr == ARM::R7) { + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd2, + GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned))); + else + return std::make_pair(GPROdd5, + GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned))); + } else { // FramePtr == ARM::R11 + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd3, + GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned))); + else + return std::make_pair(GPROdd6, + GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned))); + } + } + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); +} + +/// ResolveRegAllocHint - Resolves the specified register allocation hint +/// to a physical register. Returns the physical register if it is successful. +unsigned +ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const { + if (Reg == 0 || !isPhysicalRegister(Reg)) + return 0; + if (Type == 0) + return Reg; + else if (Type == (unsigned)ARMRI::RegPairOdd) + // Odd register. + return getRegisterPairOdd(Reg, MF); + else if (Type == (unsigned)ARMRI::RegPairEven) + // Even register. + return getRegisterPairEven(Reg, MF); + return 0; +} + +void +ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, + MachineFunction &MF) const { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + std::pair Hint = MRI->getRegAllocationHint(Reg); + if ((Hint.first == (unsigned)ARMRI::RegPairOdd || + Hint.first == (unsigned)ARMRI::RegPairEven) && + Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) { + // If 'Reg' is one of the even / odd register pair and it's now changed + // (e.g. coalesced) into a different register. The other register of the + // pair allocation hint must be updated to reflect the relationship + // change. + unsigned OtherReg = Hint.second; + Hint = MRI->getRegAllocationHint(OtherReg); + if (Hint.second == Reg) + // Make sure the pair has not already divorced. + MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); + } +} + +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. +/// +bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return (NoFramePointerElim || + MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); +} + +bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + if (NoFramePointerElim && MFI->hasCalls()) + return true; + return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); +} + +/// estimateStackSize - Estimate and return the size of the frame. +static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + int Offset = 0; + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -FFI->getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (FFI->isDeadObjectIndex(i)) + continue; + Offset += FFI->getObjectSize(i); + unsigned Align = FFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + } + return (unsigned)Offset; +} + +/// estimateRSStackSizeLimit - Look at each instruction that references stack +/// frames and return the stack size limit beyond which some of these +/// instructions will require scratch register during their expansion later. +unsigned +ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { + unsigned Limit = (1 << 12) - 1; + for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + if (!I->getOperand(i).isFI()) continue; + + const TargetInstrDesc &Desc = TII.get(I->getOpcode()); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + if (AddrMode == ARMII::AddrMode3 || + AddrMode == ARMII::AddrModeT2_i8) + return (1 << 8) - 1; + + if (AddrMode == ARMII::AddrMode5 || + AddrMode == ARMII::AddrModeT2_i8s4) + Limit = std::min(Limit, ((1U << 8) - 1) * 4); + + if (AddrMode == ARMII::AddrModeT2_i12 && hasFP(MF)) + // When the stack offset is negative, we will end up using + // the i8 instructions instead. + return (1 << 8) - 1; + break; // At most one FI per instruction + } + } + } + + return Limit; +} + +void +ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // This tells PEI to spill the FP as if it is any other callee-save register + // to take advantage the eliminateFrameIndex machinery. This also ensures it + // is spilled in the order specified by getCalleeSavedRegs() to make it easier + // to combine multiple loads / stores. + bool CanEliminateFrame = true; + bool CS1Spilled = false; + bool LRSpilled = false; + unsigned NumGPRSpills = 0; + SmallVector UnspilledCS1GPRs; + SmallVector UnspilledCS2GPRs; + ARMFunctionInfo *AFI = MF.getInfo(); + + // Don't spill FP if the frame can be eliminated. This is determined + // by scanning the callee-save registers to see if any is used. + const unsigned *CSRegs = getCalleeSavedRegs(); + const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + bool Spilled = false; + if (MF.getRegInfo().isPhysRegUsed(Reg)) { + AFI->setCSRegisterIsSpilled(Reg); + Spilled = true; + CanEliminateFrame = false; + } else { + // Check alias registers too. + for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) { + if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { + Spilled = true; + CanEliminateFrame = false; + } + } + } + + if (CSRegClasses[i] == ARM::GPRRegisterClass || + CSRegClasses[i] == ARM::tGPRRegisterClass) { + if (Spilled) { + NumGPRSpills++; + + if (!STI.isTargetDarwin()) { + if (Reg == ARM::LR) + LRSpilled = true; + CS1Spilled = true; + continue; + } + + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + if (!STI.isTargetDarwin()) { + UnspilledCS1GPRs.push_back(Reg); + continue; + } + + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; + } + } + } + } + + bool ForceLRSpill = false; + if (!LRSpilled && AFI->isThumb1OnlyFunction()) { + unsigned FnSize = TII.GetFunctionSizeInBytes(MF); + // Force LR to be spilled if the Thumb function size is > 2048. This enables + // use of BL to implement far jump. If it turns out that it's not needed + // then the branch fix up path will undo it. + if (FnSize >= (1 << 11)) { + CanEliminateFrame = false; + ForceLRSpill = true; + } + } + + bool ExtraCSSpill = false; + if (!CanEliminateFrame || cannotEliminateFrame(MF)) { + AFI->setHasStackFrame(true); + + // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. + // Spill LR as well so we can fold BX_RET to the registers restore (LDM). + if (!LRSpilled && CS1Spilled) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + AFI->setCSRegisterIsSpilled(ARM::LR); + NumGPRSpills++; + UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), + UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + ForceLRSpill = false; + ExtraCSSpill = true; + } + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + if (STI.isTargetDarwin() || hasFP(MF)) { + MF.getRegInfo().setPhysRegUsed(FramePtr); + NumGPRSpills++; + } + + // If stack and double are 8-byte aligned and we are spilling an odd number + // of GPRs. Spill one extra callee save GPR so we won't have to pad between + // the integer and double callee save areas. + unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + if (TargetAlign == 8 && (NumGPRSpills & 1)) { + if (CS1Spilled && !UnspilledCS1GPRs.empty()) { + for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { + unsigned Reg = UnspilledCS1GPRs[i]; + // Don't spill high register if the function is thumb1 + if (!AFI->isThumb1OnlyFunction() || + isARMLowRegister(Reg) || Reg == ARM::LR) { + MF.getRegInfo().setPhysRegUsed(Reg); + AFI->setCSRegisterIsSpilled(Reg); + if (!isReservedReg(MF, Reg)) + ExtraCSSpill = true; + break; + } + } + } else if (!UnspilledCS2GPRs.empty() && + !AFI->isThumb1OnlyFunction()) { + unsigned Reg = UnspilledCS2GPRs.front(); + MF.getRegInfo().setPhysRegUsed(Reg); + AFI->setCSRegisterIsSpilled(Reg); + if (!isReservedReg(MF, Reg)) + ExtraCSSpill = true; + } + } + + // Estimate if we might need to scavenge a register at some point in order + // to materialize a stack offset. If so, either spill one additional + // callee-saved register or reserve a special spill slot to facilitate + // register scavenging. Thumb1 needs a spill slot for stack pointer + // adjustments also, even when the frame itself is small. + if (RS && !ExtraCSSpill) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + // If any of the stack slot references may be out of range of an + // immediate offset, make sure a register (or a spill slot) is + // available for the register scavenger. Note that if we're indexing + // off the frame pointer, the effective stack size is 4 bytes larger + // since the FP points to the stack slot of the previous FP. + if (estimateStackSize(MF, MFI) + (hasFP(MF) ? 4 : 0) + >= estimateRSStackSizeLimit(MF)) { + // If any non-reserved CS register isn't spilled, just spill one or two + // extra. That should take care of it! + unsigned NumExtras = TargetAlign / 4; + SmallVector Extras; + while (NumExtras && !UnspilledCS1GPRs.empty()) { + unsigned Reg = UnspilledCS1GPRs.back(); + UnspilledCS1GPRs.pop_back(); + if (!isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + // For non-Thumb1 functions, also check for hi-reg CS registers + if (!AFI->isThumb1OnlyFunction()) { + while (NumExtras && !UnspilledCS2GPRs.empty()) { + unsigned Reg = UnspilledCS2GPRs.back(); + UnspilledCS2GPRs.pop_back(); + if (!isReservedReg(MF, Reg)) { + Extras.push_back(Reg); + NumExtras--; + } + } + } + if (Extras.size() && NumExtras == 0) { + for (unsigned i = 0, e = Extras.size(); i != e; ++i) { + MF.getRegInfo().setPhysRegUsed(Extras[i]); + AFI->setCSRegisterIsSpilled(Extras[i]); + } + } else if (!AFI->isThumb1OnlyFunction()) { + // note: Thumb1 functions spill to R12, not the stack. + // Reserve a slot closest to SP or frame pointer. + const TargetRegisterClass *RC = ARM::GPRRegisterClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment())); + } + } + } + } + + if (ForceLRSpill) { + MF.getRegInfo().setPhysRegUsed(ARM::LR); + AFI->setCSRegisterIsSpilled(ARM::LR); + AFI->setLRIsSpilledForFarJump(true); + } +} + +unsigned ARMBaseRegisterInfo::getRARegister() const { + return ARM::LR; +} + +unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const { + if (STI.isTargetDarwin() || hasFP(MF)) + return FramePtr; + return ARM::SP; +} + +unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { + llvm_unreachable("What is the exception register"); + return 0; +} + +unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { + llvm_unreachable("What is the exception handler register"); + return 0; +} + +int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); +} + +unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, + const MachineFunction &MF) const { + switch (Reg) { + default: break; + // Return 0 if either register of the pair is a special register. + // So no R12, etc. + case ARM::R1: + return ARM::R0; + case ARM::R3: + // FIXME! + return STI.isThumb1Only() ? 0 : ARM::R2; + case ARM::R5: + return ARM::R4; + case ARM::R7: + return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6; + case ARM::R9: + return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; + case ARM::R11: + return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; + + case ARM::S1: + return ARM::S0; + case ARM::S3: + return ARM::S2; + case ARM::S5: + return ARM::S4; + case ARM::S7: + return ARM::S6; + case ARM::S9: + return ARM::S8; + case ARM::S11: + return ARM::S10; + case ARM::S13: + return ARM::S12; + case ARM::S15: + return ARM::S14; + case ARM::S17: + return ARM::S16; + case ARM::S19: + return ARM::S18; + case ARM::S21: + return ARM::S20; + case ARM::S23: + return ARM::S22; + case ARM::S25: + return ARM::S24; + case ARM::S27: + return ARM::S26; + case ARM::S29: + return ARM::S28; + case ARM::S31: + return ARM::S30; + + case ARM::D1: + return ARM::D0; + case ARM::D3: + return ARM::D2; + case ARM::D5: + return ARM::D4; + case ARM::D7: + return ARM::D6; + case ARM::D9: + return ARM::D8; + case ARM::D11: + return ARM::D10; + case ARM::D13: + return ARM::D12; + case ARM::D15: + return ARM::D14; + case ARM::D17: + return ARM::D16; + case ARM::D19: + return ARM::D18; + case ARM::D21: + return ARM::D20; + case ARM::D23: + return ARM::D22; + case ARM::D25: + return ARM::D24; + case ARM::D27: + return ARM::D26; + case ARM::D29: + return ARM::D28; + case ARM::D31: + return ARM::D30; + } + + return 0; +} + +unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, + const MachineFunction &MF) const { + switch (Reg) { + default: break; + // Return 0 if either register of the pair is a special register. + // So no R12, etc. + case ARM::R0: + return ARM::R1; + case ARM::R2: + // FIXME! + return STI.isThumb1Only() ? 0 : ARM::R3; + case ARM::R4: + return ARM::R5; + case ARM::R6: + return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7; + case ARM::R8: + return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; + case ARM::R10: + return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; + + case ARM::S0: + return ARM::S1; + case ARM::S2: + return ARM::S3; + case ARM::S4: + return ARM::S5; + case ARM::S6: + return ARM::S7; + case ARM::S8: + return ARM::S9; + case ARM::S10: + return ARM::S11; + case ARM::S12: + return ARM::S13; + case ARM::S14: + return ARM::S15; + case ARM::S16: + return ARM::S17; + case ARM::S18: + return ARM::S19; + case ARM::S20: + return ARM::S21; + case ARM::S22: + return ARM::S23; + case ARM::S24: + return ARM::S25; + case ARM::S26: + return ARM::S27; + case ARM::S28: + return ARM::S29; + case ARM::S30: + return ARM::S31; + + case ARM::D0: + return ARM::D1; + case ARM::D2: + return ARM::D3; + case ARM::D4: + return ARM::D5; + case ARM::D6: + return ARM::D7; + case ARM::D8: + return ARM::D9; + case ARM::D10: + return ARM::D11; + case ARM::D12: + return ARM::D13; + case ARM::D14: + return ARM::D15; + case ARM::D16: + return ARM::D17; + case ARM::D18: + return ARM::D19; + case ARM::D20: + return ARM::D21; + case ARM::D22: + return ARM::D23; + case ARM::D24: + return ARM::D25; + case ARM::D26: + return ARM::D27; + case ARM::D28: + return ARM::D29; + case ARM::D30: + return ARM::D31; + } + + return 0; +} + +/// emitLoadConstPool - Emits a load from constpool to materialize the +/// specified immediate. +void ARMBaseRegisterInfo:: +emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, int Val, + ARMCC::CondCodes Pred, + unsigned PredReg) const { + MachineFunction &MF = *MBB.getParent(); + MachineConstantPool *ConstantPool = MF.getConstantPool(); + Constant *C = + ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); + unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx) + .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); +} + +bool ARMBaseRegisterInfo:: +requiresRegisterScavenging(const MachineFunction &MF) const { + return true; +} + +// hasReservedCallFrame - Under normal circumstances, when a frame pointer is +// not required, we reserve argument space for call sites in the function +// immediately on entry to the current function. This eliminates the need for +// add/sub sp brackets around call sites. Returns true if the call frame is +// included as part of the stack frame. +bool ARMBaseRegisterInfo:: +hasReservedCallFrame(MachineFunction &MF) const { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + unsigned CFSize = FFI->getMaxCallFrameSize(); + // It's not always a good idea to include the call frame as part of the + // stack frame. ARM (especially Thumb) has small immediate offset to + // address the stack frame. So a large call frame can cause poor codegen + // and may even makes it impossible to scavenge a register. + if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 + return false; + + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +static void +emitSPUpdate(bool isARM, + MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, const ARMBaseInstrInfo &TII, + int NumBytes, + ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + Pred, PredReg, TII); +} + + +void ARMBaseRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + // If we have alloca, convert as follows: + // ADJCALLSTACKDOWN -> sub, sp, sp, amount + // ADJCALLSTACKUP -> add, sp, sp, amount + MachineInstr *Old = I; + DebugLoc dl = Old->getDebugLoc(); + unsigned Amount = Old->getOperand(0).getImm(); + if (Amount != 0) { + // We need to keep the stack aligned properly. To do this, we round the + // amount of space needed for the outgoing arguments up to the next + // alignment boundary. + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); + Amount = (Amount+Align-1)/Align*Align; + + ARMFunctionInfo *AFI = MF.getInfo(); + assert(!AFI->isThumb1OnlyFunction() && + "This eliminateCallFramePseudoInstr does not suppor Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + + // Replace the pseudo instruction with a new instruction... + unsigned Opc = Old->getOpcode(); + ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm(); + // FIXME: Thumb2 version of ADJCALLSTACKUP and ADJCALLSTACKDOWN? + if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { + // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. + unsigned PredReg = Old->getOperand(2).getReg(); + emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg); + } else { + // Note: PredReg is operand 3 for ADJCALLSTACKUP. + unsigned PredReg = Old->getOperand(3).getReg(); + assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); + emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg); + } + } + } + MBB.erase(I); +} + +/// findScratchRegister - Find a 'free' ARM register. If register scavenger +/// is not being used, R12 is available. Otherwise, try for a call-clobbered +/// register first and then a spilled callee-saved register if that fails. +static +unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC, + ARMFunctionInfo *AFI) { + unsigned Reg = RS ? RS->FindUnusedReg(RC) : (unsigned) ARM::R12; + assert(!AFI->isThumb1OnlyFunction()); + return Reg; +} + +unsigned +ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo(); + assert(!AFI->isThumb1OnlyFunction() && + "This eliminateFrameIndex does not support Thumb1!"); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + unsigned FrameReg = ARM::SP; + int FrameIndex = MI.getOperand(i).getIndex(); + int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj; + + if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex)) + Offset -= AFI->getDPRCalleeSavedAreaOffset(); + else if (hasFP(MF) && AFI->hasStackFrame()) { + assert(SPAdj == 0 && "Unexpected stack offset!"); + // Use frame pointer to reference fixed objects unless this is a + // frameless function, + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } + + // modify MI as necessary to handle as much of 'Offset' as possible + bool Done = false; + if (!AFI->isThumbFunction()) + Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII); + else { + assert(AFI->isThumb2Function()); + Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII); + } + if (Done) + return 0; + + // If we get here, the immediate doesn't fit into the instruction. We folded + // as much as possible above, handle the rest, providing a register that is + // SP+LargeImm. + assert((Offset || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) && + "This code isn't needed if offset already handled!"); + + // Insert a set of r12 with the full address: r12 = sp + offset + // If the offset we have is too large to fit into the instruction, we need + // to form it with a series of ADDri's. Do this by taking 8-bit chunks + // out of 'Offset'. + unsigned ScratchReg = findScratchRegister(RS, ARM::GPRRegisterClass, AFI); + if (ScratchReg == 0) + // No register is "free". Scavenge a register. + ScratchReg = RS->scavengeRegister(ARM::GPRRegisterClass, II, SPAdj); + int PIdx = MI.findFirstPredOperandIdx(); + ARMCC::CondCodes Pred = (PIdx == -1) + ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); + unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); + if (Offset == 0) + // Must be addrmode4. + MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); + else { + if (!AFI->isThumbFunction()) + emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, + Offset, Pred, PredReg, TII); + else { + assert(AFI->isThumb2Function()); + emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, + Offset, Pred, PredReg, TII); + } + MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + } + return 0; +} + +/// Move iterator pass the next bunch of callee save load / store ops for +/// the particular spill area (1: integer area 1, 2: integer area 2, +/// 3: fp area, 0: don't care). +static void movePastCSLoadStoreOps(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int Opc1, int Opc2, unsigned Area, + const ARMSubtarget &STI) { + while (MBBI != MBB.end() && + ((MBBI->getOpcode() == Opc1) || (MBBI->getOpcode() == Opc2)) && + MBBI->getOperand(1).isFI()) { + if (Area != 0) { + bool Done = false; + unsigned Category = 0; + switch (MBBI->getOperand(0).getReg()) { + case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: + case ARM::LR: + Category = 1; + break; + case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + Category = STI.isTargetDarwin() ? 2 : 1; + break; + case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11: + case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15: + Category = 3; + break; + default: + Done = true; + break; + } + if (Done || Category != Area) + break; + } + + ++MBBI; + } +} + +void ARMBaseRegisterInfo:: +emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo(); + assert(!AFI->isThumb1OnlyFunction() && + "This emitPrologue does not suppor Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + unsigned NumBytes = MFI->getStackSize(); + const std::vector &CSI = MFI->getCalleeSavedInfo(); + DebugLoc dl = (MBBI != MBB.end() ? + MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); + + // Determine the sizes of each callee-save spill areas and record which frame + // belongs to which callee-save spill areas. + unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + int FramePtrSpillFI = 0; + + // Allocate the vararg register save area. This is not counted in NumBytes. + if (VARegSaveSize) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + return; + } + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + int FI = CSI[i].getFrameIdx(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + break; + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + if (STI.isTargetDarwin()) { + AFI->addGPRCalleeSavedArea2Frame(FI); + GPRCS2Size += 4; + } else { + AFI->addGPRCalleeSavedArea1Frame(FI); + GPRCS1Size += 4; + } + break; + default: + AFI->addDPRCalleeSavedAreaFrame(FI); + DPRCSSize += 8; + } + } + + // Build the new SUBri to adjust SP for integer callee-save spill area 1. + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS1Size); + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 1, STI); + + // Set FP to point to the stack slot that contains the previous FP. + // For Darwin, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not Darwin, all the callee-saved registers go + // into spill area 1, including the FP in R11. In either case, it is + // now safe to emit this assignment. + if (STI.isTargetDarwin() || hasFP(MF)) { + unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) + .addFrameIndex(FramePtrSpillFI).addImm(0); + AddDefaultCC(AddDefaultPred(MIB)); + } + + // Build the new SUBri to adjust SP for integer callee-save spill area 2. + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -GPRCS2Size); + + // Build the new SUBri to adjust SP for FP callee-save spill area. + movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, ARM::t2STRi12, 2, STI); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRCSSize); + + // Determine starting offsets of spill areas. + unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); + unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; + unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); + AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); + AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + + NumBytes = DPRCSOffset; + if (NumBytes) { + // Insert it after all the callee-save spills. + movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + } + + if (STI.isTargetELF() && hasFP(MF)) { + MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - + AFI->getFramePtrSpillOffset()); + } + + AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); + AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedAreaSize(DPRCSSize); +} + +static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, + const ARMBaseInstrInfo &TII, + const unsigned *CSRegs) { + return ((MI->getOpcode() == (int)ARM::FLDD || + MI->getOpcode() == (int)ARM::LDR || + MI->getOpcode() == (int)ARM::t2LDRi12) && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); +} + +void ARMBaseRegisterInfo:: +emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + assert(MBBI->getDesc().isReturn() && + "Can only insert epilog into returning blocks"); + DebugLoc dl = MBBI->getDebugLoc(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo(); + assert(!AFI->isThumb1OnlyFunction() && + "This emitEpilogue does not suppor Thumb1!"); + bool isARM = !AFI->isThumbFunction(); + + unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); + int NumBytes = (int)MFI->getStackSize(); + + if (!AFI->hasStackFrame()) { + if (NumBytes != 0) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + } else { + // Unwind MBBI to point to first LDR / FLDD. + const unsigned *CSRegs = getCalleeSavedRegs(); + if (MBBI != MBB.begin()) { + do + --MBBI; + while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); + if (!isCSRestore(MBBI, TII, CSRegs)) + ++MBBI; + } + + // Move SP to start of FP callee save spill area. + NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedAreaSize()); + + // Darwin ABI requires FP to point to the stack slot that contains the + // previous FP. + bool HasFP = hasFP(MF); + if ((STI.isTargetDarwin() && NumBytes) || HasFP) { + NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; + // Reset SP based on frame pointer only if the stack frame extends beyond + // frame pointer stack slot or target is ELF and the function has FP. + if (HasFP || + AFI->getGPRCalleeSavedArea2Size() || + AFI->getDPRCalleeSavedAreaSize() || + AFI->getDPRCalleeSavedAreaOffset()) { + if (NumBytes) { + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + } else { + // Thumb2 or ARM. + if (isARM) + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) + .addReg(FramePtr) + .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) + .addReg(FramePtr); + } + } + } else if (NumBytes) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + + // Move SP to start of integer callee save spill area 2. + movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize()); + + // Move SP to start of integer callee save spill area 1. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 2, STI); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea2Size()); + + // Move SP to SP upon entry to the function. + movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, ARM::t2LDRi12, 1, STI); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size()); + } + + if (VARegSaveSize) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); +} + +#include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h new file mode 100644 index 0000000000000..da703fbc8c193 --- /dev/null +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -0,0 +1,148 @@ +//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the base ARM implementation of TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMBASEREGISTERINFO_H +#define ARMBASEREGISTERINFO_H + +#include "ARM.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "ARMGenRegisterInfo.h.inc" + +namespace llvm { + class ARMSubtarget; + class ARMBaseInstrInfo; + class Type; + +/// Register allocation hints. +namespace ARMRI { + enum { + RegPairOdd = 1, + RegPairEven = 2 + }; +} + +/// isARMLowRegister - Returns true if the register is low register r0-r7. +/// +static inline bool isARMLowRegister(unsigned Reg) { + using namespace ARM; + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + return true; + default: + return false; + } +} + +struct ARMBaseRegisterInfo : public ARMGenRegisterInfo { +protected: + const ARMBaseInstrInfo &TII; + const ARMSubtarget &STI; + + /// FramePtr - ARM physical register used as frame ptr. + unsigned FramePtr; + + // Can be only subclassed. + explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, + const ARMSubtarget &STI); + + // Return the opcode that implements 'Op', or 0 if no opcode + unsigned getOpcode(int Op) const; + +public: + /// getRegisterNumbering - Given the enum value for some register, e.g. + /// ARM::LR, return the number that it corresponds to (e.g. 14). It + /// also returns true in isSPVFP if the register is a single precision + /// VFP register. + static unsigned getRegisterNumbering(unsigned RegEnum, bool *isSPVFP = 0); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + + std::pair + getAllocationOrder(const TargetRegisterClass *RC, + unsigned HintType, unsigned HintReg, + const MachineFunction &MF) const; + + unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const; + + void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, + MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const; + + bool cannotEliminateFrame(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + bool isLowRegister(unsigned Reg) const; + + + /// emitLoadConstPool - Emits a load from constpool to materialize the + /// specified immediate. + virtual void emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0) const; + + /// Code Generation virtual methods... + virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; + + virtual bool hasReservedCallFrame(MachineFunction &MF) const; + + virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; + + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + +private: + unsigned estimateRSStackSizeLimit(MachineFunction &MF) const; + + unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; + + unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 8a4c741faf95e..716163958d9c5 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -111,6 +111,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, @@ -122,6 +123,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index f29576148b328..6f1c624cbf524 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -26,14 +26,18 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #ifndef NDEBUG #include #endif @@ -57,12 +61,18 @@ namespace { ARMJITInfo *JTI; const ARMInstrInfo *II; const TargetData *TD; + const ARMSubtarget *Subtarget; TargetMachine &TM; CodeEmitter &MCE; const std::vector *MCPEs; const std::vector *MJTEs; bool IsPIC; + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + public: static char ID; explicit Emitter(TargetMachine &tm, CodeEmitter &mce) @@ -160,7 +170,7 @@ namespace { /// Routines that handle operands which add machine relocations which are /// fixed up by the relocation stage. void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, - bool NeedStub, intptr_t ACPV = 0); + bool NeedStub, bool Indirect, intptr_t ACPV = 0); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); void emitConstPoolAddress(unsigned CPI, unsigned Reloc); void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc); @@ -174,36 +184,39 @@ namespace { /// createARMCodeEmitterPass - Return a pass that emits the collected ARM code /// to the specified MCE object. -namespace llvm { - -FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM, - MachineCodeEmitter &MCE) { +FunctionPass *llvm::createARMCodeEmitterPass(ARMBaseTargetMachine &TM, + MachineCodeEmitter &MCE) { return new Emitter(TM, MCE); } -FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, - JITCodeEmitter &JCE) { +FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, + JITCodeEmitter &JCE) { return new Emitter(TM, JCE); } - -} // end namespace llvm +FunctionPass *llvm::createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM, + ObjectCodeEmitter &OCE) { + return new Emitter(TM, OCE); +} template bool Emitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); + JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo(); II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo(); TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData(); - JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo(); + Subtarget = &TM.getSubtarget(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = &MF.getJumpTableInfo()->getJumpTables(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; JTI->Initialize(MF, IsPIC); + MCE.setModuleInfo(&getAnalysis()); do { - DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n"; + DEBUG(errs() << "JITTing function '" + << MF.getFunction()->getName() << "'\n"); MCE.startFunction(MF); - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { MCE.StartMachineBasicBlock(MBB); for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); @@ -220,7 +233,7 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { template unsigned Emitter::getShiftOp(unsigned Imm) const { switch (ARM_AM::getAM2ShiftOpc(Imm)) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return 2; case ARM_AM::lsl: return 0; case ARM_AM::lsr: return 1; @@ -240,7 +253,7 @@ unsigned Emitter::getMachineOpValue(const MachineInstr &MI, else if (MO.isImm()) return static_cast(MO.getImm()); else if (MO.isGlobal()) - emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true); + emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false); else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); else if (MO.isCPI()) { @@ -254,8 +267,10 @@ unsigned Emitter::getMachineOpValue(const MachineInstr &MI, else if (MO.isMBB()) emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); else { - cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; - abort(); +#ifndef NDEBUG + errs() << MO; +#endif + llvm_unreachable(0); } return 0; } @@ -264,9 +279,14 @@ unsigned Emitter::getMachineOpValue(const MachineInstr &MI, /// template void Emitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, - bool NeedStub, intptr_t ACPV) { - MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, NeedStub)); + bool NeedStub, bool Indirect, + intptr_t ACPV) { + MachineRelocation MR = Indirect + ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, + GV, ACPV, NeedStub) + : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + GV, ACPV, NeedStub); + MCE.addRelocation(MR); } /// emitExternalSymbolAddress - Arrange for the address of an external symbol to @@ -294,7 +314,7 @@ void Emitter::emitConstPoolAddress(unsigned CPI, /// be emitted to the current location in the function, and allow it to be PC /// relative. template -void Emitter::emitJumpTableAddress(unsigned JTIndex, +void Emitter::emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) { MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), Reloc, JTIndex, 0, true)); @@ -310,32 +330,28 @@ void Emitter::emitMachineBasicBlock(MachineBasicBlock *BB, template void Emitter::emitWordLE(unsigned Binary) { -#ifndef NDEBUG - DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') - << Binary << std::dec << "\n"; -#endif + DEBUG(errs() << " 0x"; + errs().write_hex(Binary) << "\n"); MCE.emitWordLE(Binary); } template void Emitter::emitDWordLE(uint64_t Binary) { -#ifndef NDEBUG - DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') - << (unsigned)Binary << std::dec << "\n"; - DOUT << " 0x" << std::hex << std::setw(8) << std::setfill('0') - << (unsigned)(Binary >> 32) << std::dec << "\n"; -#endif + DEBUG(errs() << " 0x"; + errs().write_hex(Binary) << "\n"); MCE.emitDWordLE(Binary); } template void Emitter::emitInstruction(const MachineInstr &MI) { - DOUT << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI; + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); + + MCE.processDebugLoc(MI.getDebugLoc(), true); NumEmitted++; // Keep track of the # of mi's emitted switch (MI.getDesc().TSFlags & ARMII::FormMask) { default: { - assert(0 && "Unhandled instruction encoding format!"); + llvm_unreachable("Unhandled instruction encoding format!"); break; } case ARMII::Pseudo: @@ -393,6 +409,7 @@ void Emitter::emitInstruction(const MachineInstr &MI) { emitMiscInstruction(MI); break; } + MCE.processDebugLoc(MI.getDebugLoc(), false); } template @@ -400,7 +417,7 @@ void Emitter::emitConstPoolInstruction(const MachineInstr &MI) { unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index. unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index. const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex]; - + // Remember the CONSTPOOL_ENTRY address for later relocation. JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue()); @@ -410,55 +427,49 @@ void Emitter::emitConstPoolInstruction(const MachineInstr &MI) { ARMConstantPoolValue *ACPV = static_cast(MCPE.Val.MachineCPVal); - DOUT << " ** ARM constant pool #" << CPI << " @ " - << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'; + DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); GlobalValue *GV = ACPV->getGV(); if (GV) { - assert(!ACPV->isStub() && "Don't know how to deal this yet!"); - if (ACPV->isNonLazyPointer()) - MCE.addRelocation(MachineRelocation::getIndirectSymbol( - MCE.getCurrentPCOffset(), ARM::reloc_arm_machine_cp_entry, GV, - (intptr_t)ACPV, false)); - else - emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, - ACPV->isStub() || isa(GV), (intptr_t)ACPV); + Reloc::Model RelocM = TM.getRelocationModel(); + emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, + isa(GV), + Subtarget->GVIsIndirectSymbol(GV, RelocM), + (intptr_t)ACPV); } else { - assert(!ACPV->isNonLazyPointer() && "Don't know how to deal this yet!"); emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute); } emitWordLE(0); } else { Constant *CV = MCPE.Val.ConstVal; -#ifndef NDEBUG - DOUT << " ** Constant pool #" << CPI << " @ " - << (void*)MCE.getCurrentPCValue() << " "; - if (const Function *F = dyn_cast(CV)) - DOUT << F->getName(); - else - DOUT << *CV; - DOUT << '\n'; -#endif + DEBUG({ + errs() << " ** Constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " "; + if (const Function *F = dyn_cast(CV)) + errs() << F->getName(); + else + errs() << *CV; + errs() << '\n'; + }); if (GlobalValue *GV = dyn_cast(CV)) { - emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa(GV)); + emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa(GV), false); emitWordLE(0); } else if (const ConstantInt *CI = dyn_cast(CV)) { uint32_t Val = *(uint32_t*)CI->getValue().getRawData(); emitWordLE(Val); } else if (const ConstantFP *CFP = dyn_cast(CV)) { - if (CFP->getType() == Type::FloatTy) + if (CFP->getType()->isFloatTy()) emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); - else if (CFP->getType() == Type::DoubleTy) + else if (CFP->getType()->isDoubleTy()) emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); else { - assert(0 && "Unable to handle this constantpool entry!"); - abort(); + llvm_unreachable("Unable to handle this constantpool entry!"); } } else { - assert(0 && "Unable to handle this constantpool entry!"); - abort(); + llvm_unreachable("Unable to handle this constantpool entry!"); } } } @@ -467,7 +478,8 @@ template void Emitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { const MachineOperand &MO0 = MI.getOperand(0); const MachineOperand &MO1 = MI.getOperand(1); - assert(MO1.isImm() && "Not a valid so_imm value!"); + assert(MO1.isImm() && ARM_AM::getSOImmVal(MO1.isImm()) != -1 && + "Not a valid so_imm value!"); unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm()); unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm()); @@ -483,7 +495,7 @@ void Emitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { // Encode so_imm. // Set bit I(25) to identify this is the immediate form of Binary |= 1 << ARMII::I_BitShift; - Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V1)); + Binary |= getMachineSoImmOpValue(V1); emitWordLE(Binary); // Now the 'orr' instruction. @@ -501,14 +513,14 @@ void Emitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { // Encode so_imm. // Set bit I(25) to identify this is the immediate form of Binary |= 1 << ARMII::I_BitShift; - Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V2)); + Binary |= getMachineSoImmOpValue(V2); emitWordLE(Binary); } template void Emitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { // It's basically add r, pc, (LJTI - $+8) - + const TargetInstrDesc &TID = MI.getDesc(); // Emit the 'add' instruction. @@ -527,7 +539,6 @@ void Emitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift; // Encode the displacement. - // Set bit I(25) to identify this is the immediate form of . Binary |= 1 << ARMII::I_BitShift; emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base); @@ -576,8 +587,8 @@ void Emitter::emitPseudoMoveInstruction(const MachineInstr &MI) { template void Emitter::addPCLabel(unsigned LabelID) { - DOUT << " ** LPC" << LabelID << " @ " - << (void*)MCE.getCurrentPCValue() << '\n'; + DEBUG(errs() << " ** LPC" << LabelID << " @ " + << (void*)MCE.getCurrentPCValue() << '\n'); JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue()); } @@ -586,13 +597,13 @@ void Emitter::emitPseudoInstruction(const MachineInstr &MI) { unsigned Opcode = MI.getDesc().Opcode; switch (Opcode) { default: - abort(); // FIXME: + llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); + // FIXME: Add support for MOVimm32. case TargetInstrInfo::INLINEASM: { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) { - assert(0 && "JIT does not support inline asm!\n"); - abort(); + llvm_report_error("JIT does not support inline asm!"); } break; } @@ -601,7 +612,7 @@ void Emitter::emitPseudoInstruction(const MachineInstr &MI) { MCE.emitLabel(MI.getOperand(0).getImm()); break; case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: + case TargetInstrInfo::KILL: case ARM::DWARF_LOC: // Do nothing. break; @@ -674,7 +685,7 @@ unsigned Emitter::getMachineSoRegOpValue( // ROR - 0111 // RRX - 0110 and bit[11:8] clear. switch (SOpc) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::lsl: SBits = 0x1; break; case ARM_AM::lsr: SBits = 0x3; break; case ARM_AM::asr: SBits = 0x5; break; @@ -688,7 +699,7 @@ unsigned Emitter::getMachineSoRegOpValue( // ASR - 100 // ROR - 110 switch (SOpc) { - default: assert(0 && "Unknown shift opc!"); + default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::lsl: SBits = 0x0; break; case ARM_AM::lsr: SBits = 0x2; break; case ARM_AM::asr: SBits = 0x4; break; @@ -713,12 +724,15 @@ unsigned Emitter::getMachineSoRegOpValue( template unsigned Emitter::getMachineSoImmOpValue(unsigned SoImm) { + int SoImmVal = ARM_AM::getSOImmVal(SoImm); + assert(SoImmVal != -1 && "Not a valid so_imm value!"); + // Encode rotate_imm. - unsigned Binary = (ARM_AM::getSOImmValRot(SoImm) >> 1) + unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1) << ARMII::SoRotImmShift; // Encode immed_8. - Binary |= ARM_AM::getSOImmValImm(SoImm); + Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal); return Binary; } @@ -740,6 +754,10 @@ void Emitter::emitDataProcessingInstruction( unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); + if (TID.Opcode == ARM::BFC) { + llvm_report_error("ARMv6t2 JIT is not yet supported."); + } + // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -791,9 +809,7 @@ void Emitter::emitDataProcessingInstruction( } // Encode so_imm. - // Set bit I(25) to identify this is the immediate form of . - Binary |= 1 << ARMII::I_BitShift; - Binary |= getMachineSoImmOpValue(MO.getImm()); + Binary |= getMachineSoImmOpValue((unsigned)MO.getImm()); emitWordLE(Binary); } @@ -952,8 +968,8 @@ static unsigned getAddrModeUPBits(unsigned Mode) { // DA - Decrement after - bit U = 0 and bit P = 0 // DB - Decrement before - bit U = 0 and bit P = 1 switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); - case ARM_AM::da: break; + default: llvm_unreachable("Unknown addressing sub-mode!"); + case ARM_AM::da: break; case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break; case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break; case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break; @@ -983,7 +999,7 @@ void Emitter::emitLoadStoreMultipleInstruction( Binary |= 0x1 << ARMII::W_BitShift; // Set registers - for (unsigned i = 4, e = MI.getNumOperands(); i != e; ++i) { + for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) break; @@ -1107,7 +1123,7 @@ void Emitter::emitMiscArithInstruction(const MachineInstr &MI) { unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); Binary |= ShiftAmt << ARMII::ShiftShift; - + emitWordLE(Binary); } @@ -1115,8 +1131,9 @@ template void Emitter::emitBranchInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); - if (TID.Opcode == ARM::TPsoft) - abort(); // FIXME + if (TID.Opcode == ARM::TPsoft) { + llvm_unreachable("ARM::TPsoft FIXME"); // FIXME + } // Part of binary is determined by TableGn. unsigned Binary = getBinaryCodeForInstr(MI); @@ -1135,7 +1152,8 @@ void Emitter::emitInlineJumpTable(unsigned JTIndex) { // Remember the base address of the inline jump table. uintptr_t JTBase = MCE.getCurrentPCValue(); JTI->addJumpTableBaseAddr(JTIndex, JTBase); - DOUT << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase << '\n'; + DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase + << '\n'); // Now emit the jump table entries. const std::vector &MBBs = (*MJTEs)[JTIndex].MBBs; @@ -1155,17 +1173,17 @@ void Emitter::emitMiscBranchInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); // Handle jump tables. - if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd || - TID.Opcode == ARM::t2BR_JTr || TID.Opcode == ARM::t2BR_JTadd) { + if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) { // First emit a ldr pc, [] instruction. emitDataProcessingInstruction(MI, ARM::PC); // Then emit the inline jump table. - unsigned JTIndex = (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::t2BR_JTr) + unsigned JTIndex = + (TID.Opcode == ARM::BR_JTr) ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); emitInlineJumpTable(JTIndex); return; - } else if (TID.Opcode == ARM::BR_JTm || TID.Opcode == ARM::t2BR_JTm) { + } else if (TID.Opcode == ARM::BR_JTm) { // First emit a ldr pc, [] instruction. emitLoadStoreInstruction(MI, ARM::PC); @@ -1183,7 +1201,7 @@ void Emitter::emitMiscBranchInstruction(const MachineInstr &MI) { if (TID.Opcode == ARM::BX_RET) // The return register is LR. Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR); - else + else // otherwise, set the return register Binary |= getMachineOpValue(MI, 0); @@ -1194,7 +1212,7 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = false; - RegD = ARMRegisterInfo::getRegisterNumbering(RegD, isSPVFP); + RegD = ARMRegisterInfo::getRegisterNumbering(RegD, &isSPVFP); if (!isSPVFP) Binary |= RegD << ARMII::RegRdShift; else { @@ -1208,7 +1226,7 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = false; - RegN = ARMRegisterInfo::getRegisterNumbering(RegN, isSPVFP); + RegN = ARMRegisterInfo::getRegisterNumbering(RegN, &isSPVFP); if (!isSPVFP) Binary |= RegN << ARMII::RegRnShift; else { @@ -1222,7 +1240,7 @@ static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = false; - RegM = ARMRegisterInfo::getRegisterNumbering(RegM, isSPVFP); + RegM = ARMRegisterInfo::getRegisterNumbering(RegM, &isSPVFP); if (!isSPVFP) Binary |= RegM; else { @@ -1268,7 +1286,7 @@ void Emitter::emitVFPArithInstruction(const MachineInstr &MI) { // Encode Dm / Sm. Binary |= encodeVFPRm(MI, OpIdx); - + emitWordLE(Binary); } @@ -1386,11 +1404,11 @@ void Emitter::emitVFPLoadStoreMultipleInstruction( Binary |= 0x1 << ARMII::W_BitShift; // First register is encoded in Dd. - Binary |= encodeVFPRd(MI, 4); + Binary |= encodeVFPRd(MI, 5); // Number of registers are encoded in offset field. unsigned NumRegs = 1; - for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) { + for (unsigned i = 6, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) break; @@ -1413,4 +1431,3 @@ void Emitter::emitMiscInstruction(const MachineInstr &MI) { } #include "ARMGenCodeEmitter.inc" - diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 9fedaa465434a..309e3ba2ac251 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -15,24 +15,31 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" +#include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumCPEs, "Number of constpool entries"); -STATISTIC(NumSplit, "Number of uncond branches inserted"); -STATISTIC(NumCBrFixed, "Number of cond branches fixed"); -STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); +STATISTIC(NumCPEs, "Number of constpool entries"); +STATISTIC(NumSplit, "Number of uncond branches inserted"); +STATISTIC(NumCBrFixed, "Number of cond branches fixed"); +STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); +STATISTIC(NumTBs, "Number of table branches generated"); +STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk"); +STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM @@ -63,6 +70,8 @@ namespace { /// to a return, unreachable, or unconditional branch). std::vector WaterList; + typedef std::vector::iterator water_iterator; + /// CPUser - One user of a constant pool, keeping the machine instruction /// pointer, the constant pool being referenced, and the max displacement /// allowed from the instruction to the CP. @@ -70,8 +79,11 @@ namespace { MachineInstr *MI; MachineInstr *CPEMI; unsigned MaxDisp; - CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp) - : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {} + bool NegOk; + bool IsSoImm; + CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, + bool neg, bool soimm) + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {} }; /// CPUsers - Keep track of all of the machine instructions that use various @@ -117,29 +129,34 @@ namespace { /// SmallVector PushPopMIs; + /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions. + SmallVector T2JumpTables; + /// HasFarJump - True if any far jump instruction has been emitted during /// the branch fix up pass. bool HasFarJump; const TargetInstrInfo *TII; + const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool isThumb; + bool isThumb1; bool isThumb2; public: static char ID; ARMConstantIslands() : MachineFunctionPass(&ID) {} - virtual bool runOnMachineFunction(MachineFunction &Fn); + virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { return "ARM constant island placement and branch shortening pass"; } private: - void DoInitialPlacement(MachineFunction &Fn, + void DoInitialPlacement(MachineFunction &MF, std::vector &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); - void InitialFunctionScan(MachineFunction &Fn, + void InitialFunctionScan(MachineFunction &MF, const std::vector &CPEMIs); MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); @@ -147,58 +164,62 @@ namespace { bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); bool LookForWater(CPUser&U, unsigned UserOffset, - MachineBasicBlock** NewMBB); - MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB, - std::vector::iterator IP); + MachineBasicBlock *&NewMBB); + MachineBasicBlock *AcceptWater(water_iterator IP); void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, - MachineBasicBlock** NewMBB); - bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex); + MachineBasicBlock *&NewMBB); + bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); void RemoveDeadCPEMI(MachineInstr *CPEMI); bool RemoveUnusedCPEntries(); bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, - MachineInstr *CPEMI, unsigned Disp, - bool DoDump); + MachineInstr *CPEMI, unsigned Disp, bool NegOk, + bool DoDump = false); bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, CPUser &U); bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, - unsigned Disp, bool NegativeOK); + unsigned Disp, bool NegativeOK, bool IsSoImm = false); bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br); - bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br); - bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br); + bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br); + bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br); + bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br); bool UndoLRSpillRestore(); + bool OptimizeThumb2Instructions(MachineFunction &MF); + bool OptimizeThumb2Branches(MachineFunction &MF); + bool OptimizeThumb2JumpTables(MachineFunction &MF); unsigned GetOffsetOf(MachineInstr *MI) const; void dumpBBs(); - void verify(MachineFunction &Fn); + void verify(MachineFunction &MF); }; char ARMConstantIslands::ID = 0; } /// verify - check BBOffsets, BBSizes, alignment of islands -void ARMConstantIslands::verify(MachineFunction &Fn) { +void ARMConstantIslands::verify(MachineFunction &MF) { assert(BBOffsets.size() == BBSizes.size()); for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i) assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]); - if (isThumb) { - for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - if (!MBB->empty() && - MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) - assert((BBOffsets[MBB->getNumber()]%4 == 0 && - BBSizes[MBB->getNumber()]%4 == 0) || - (BBOffsets[MBB->getNumber()]%4 != 0 && - BBSizes[MBB->getNumber()]%4 != 0)); + if (!isThumb) + return; +#ifndef NDEBUG + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + if (!MBB->empty() && + MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { + unsigned MBBId = MBB->getNumber(); + assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || + (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0)); } } +#endif } /// print block size and offset information - debugging void ARMConstantIslands::dumpBBs() { for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) { - DOUT << "block " << J << " offset " << BBOffsets[J] << - " size " << BBSizes[J] << "\n"; + DEBUG(errs() << "block " << J << " offset " << BBOffsets[J] + << " size " << BBSizes[J] << "\n"); } } @@ -208,31 +229,36 @@ FunctionPass *llvm::createARMConstantIslandPass() { return new ARMConstantIslands(); } -bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { - MachineConstantPool &MCP = *Fn.getConstantPool(); +bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { + MachineConstantPool &MCP = *MF.getConstantPool(); + + TII = MF.getTarget().getInstrInfo(); + AFI = MF.getInfo(); + STI = &MF.getTarget().getSubtarget(); - TII = Fn.getTarget().getInstrInfo(); - AFI = Fn.getInfo(); isThumb = AFI->isThumbFunction(); + isThumb1 = AFI->isThumb1OnlyFunction(); isThumb2 = AFI->isThumb2Function(); HasFarJump = false; // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. - Fn.RenumberBlocks(); + MF.RenumberBlocks(); + + // Thumb1 functions containing constant pools get 4-byte alignment. + // This is so we can keep exact track of where the alignment padding goes. - /// Thumb functions containing constant pools get 2-byte alignment. - /// This is so we can keep exact track of where the alignment padding goes. - /// Set default. - AFI->setAlign(isThumb ? 1U : 2U); + // Set default. Thumb1 function is 2-byte aligned, ARM and Thumb2 are 4-byte + // aligned. + AFI->setAlign(isThumb1 ? 1U : 2U); // Perform the initial placement of the constant pool entries. To start with, // we put them all at the end of the function. std::vector CPEMIs; if (!MCP.isEmpty()) { - DoInitialPlacement(Fn, CPEMIs); - if (isThumb) + DoInitialPlacement(MF, CPEMIs); + if (isThumb1) AFI->setAlign(2U); } @@ -242,7 +268,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(Fn, CPEMIs); + InitialFunctionScan(MF, CPEMIs); CPEMIs.clear(); /// Remove dead constant pool entries. @@ -251,25 +277,37 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { // Iteratively place constant pool entries and fix up branches until there // is no change. bool MadeChange = false; + unsigned NoCPIters = 0, NoBRIters = 0; while (true) { - bool Change = false; + bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - Change |= HandleConstantPoolUser(Fn, i); + CPChange |= HandleConstantPoolUser(MF, i); + if (CPChange && ++NoCPIters > 30) + llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); + + bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - Change |= FixUpImmediateBr(Fn, ImmBranches[i]); + BRChange |= FixUpImmediateBr(MF, ImmBranches[i]); + if (BRChange && ++NoBRIters > 30) + llvm_unreachable("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); - if (!Change) + + if (!CPChange && !BRChange) break; MadeChange = true; } + // Shrink 32-bit Thumb2 branch, load, and store instructions. + if (isThumb2) + MadeChange |= OptimizeThumb2Instructions(MF); + // After a while, this might be made debug-only, but it is not expensive. - verify(Fn); + verify(MF); // If LR has been forced spilled and no far jumps (i.e. BL) has been issued. // Undo the spill / restore of LR if possible. - if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb) + if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); BBSizes.clear(); @@ -279,24 +317,25 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) { CPEntries.clear(); ImmBranches.clear(); PushPopMIs.clear(); + T2JumpTables.clear(); return MadeChange; } /// DoInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. -void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn, +void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, std::vector &CPEMIs) { // Create the basic block to hold the CPE's. - MachineBasicBlock *BB = Fn.CreateMachineBasicBlock(); - Fn.push_back(BB); + MachineBasicBlock *BB = MF.CreateMachineBasicBlock(); + MF.push_back(BB); // Add all of the constants from the constant pool to the end block, use an // identity mapping of CPI's to CPE's. const std::vector &CPs = - Fn.getConstantPool()->getConstants(); + MF.getConstantPool()->getConstants(); - const TargetData &TD = *Fn.getTarget().getTargetData(); + const TargetData &TD = *MF.getTarget().getTargetData(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); // Verify that all constant pool entries are a multiple of 4 bytes. If not, @@ -313,7 +352,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn, CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); NumCPEs++; - DOUT << "Moved CPI#" << i << " to end of function as #" << i << "\n"; + DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i + << "\n"); } } @@ -352,10 +392,10 @@ ARMConstantIslands::CPEntry /// InitialFunctionScan - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. -void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, +void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, const std::vector &CPEMIs) { unsigned Offset = 0; - for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end(); + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -377,18 +417,19 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, unsigned Scale = 1; int UOpc = Opc; switch (Opc) { + default: + continue; // Ignore other JT branches case ARM::tBR_JTr: - case ARM::t2BR_JTr: - case ARM::t2BR_JTm: - case ARM::t2BR_JTadd: - // A Thumb table jump may involve padding; for the offsets to + // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. AFI->setAlign(2U); if ((Offset+MBBSize)%4 != 0) + // FIXME: Add a pseudo ALIGN instruction instead. MBBSize += 2; // padding continue; // Does not get an entry in ImmBranches - default: - continue; // Ignore other JT branches + case ARM::t2BR_JT: + T2JumpTables.push_back(I); + continue; // Does not get an entry in ImmBranches case ARM::Bcc: isCond = true; UOpc = ARM::B; @@ -427,6 +468,9 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET) PushPopMIs.push_back(I); + if (Opc == ARM::CONSTPOOL_ENTRY) + continue; + // Scan the instructions for constant pool operands. for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) if (I->getOperand(op).isCPI()) { @@ -436,50 +480,52 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, // Basic size info comes from the TSFlags field. unsigned Bits = 0; unsigned Scale = 1; - unsigned TSFlags = I->getDesc().TSFlags; - switch (TSFlags & ARMII::AddrModeMask) { + bool NegOk = false; + bool IsSoImm = false; + + switch (Opc) { default: - // Constant pool entries can reach anything. - if (I->getOpcode() == ARM::CONSTPOOL_ENTRY) - continue; - if (I->getOpcode() == ARM::tLEApcrel) { - Bits = 8; // Taking the address of a CP entry. - break; - } - assert(0 && "Unknown addressing mode for CP reference!"); - case ARMII::AddrMode1: // AM1: 8 bits << 2 - Bits = 8; - Scale = 4; // Taking the address of a CP entry. - break; - case ARMII::AddrMode2: - Bits = 12; // +-offset_12 - break; - case ARMII::AddrMode3: - Bits = 8; // +-offset_8 + llvm_unreachable("Unknown addressing mode for CP reference!"); break; - // addrmode4 has no immediate offset. - case ARMII::AddrMode5: + + // Taking the address of a CP entry. + case ARM::LEApcrel: + // This takes a SoImm, which is 8 bit immediate rotated. We'll + // pretend the maximum offset is 255 * 4. Since each instruction + // 4 byte wide, this is always correct. We'llc heck for other + // displacements that fits in a SoImm as well. Bits = 8; - Scale = 4; // +-(offset_8*4) + Scale = 4; + NegOk = true; + IsSoImm = true; break; - // addrmode6 has no immediate offset. - case ARMII::AddrModeT1_1: - Bits = 5; // +offset_5 + case ARM::t2LEApcrel: + Bits = 12; + NegOk = true; break; - case ARMII::AddrModeT1_2: - Bits = 5; - Scale = 2; // +(offset_5*2) + case ARM::tLEApcrel: + Bits = 8; + Scale = 4; break; - case ARMII::AddrModeT1_4: - Bits = 5; - Scale = 4; // +(offset_5*4) + + case ARM::LDR: + case ARM::LDRcp: + case ARM::t2LDRpci: + Bits = 12; // +-offset_12 + NegOk = true; break; - case ARMII::AddrModeT1_s: + + case ARM::tLDRpci: + case ARM::tLDRcp: Bits = 8; Scale = 4; // +(offset_8*4) break; - case ARMII::AddrModeT2_pc: - Bits = 12; // +-offset_12 + + case ARM::FLDD: + case ARM::FLDS: + Bits = 8; + Scale = 4; // +-(offset_8*4) + NegOk = true; break; } @@ -487,7 +533,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn, unsigned CPI = I->getOperand(op).getIndex(); MachineInstr *CPEMI = CPEMIs[CPI]; unsigned MaxOffs = ((1 << Bits)-1) * Scale; - CPUsers.push_back(CPUser(I, CPEMI, MaxOffs)); + CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm)); // Increment corresponding CPEntry reference count. CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); @@ -563,7 +609,7 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { // Next, update WaterList. Specifically, we need to add NewMBB as having // available water after it. - std::vector::iterator IP = + water_iterator IP = std::lower_bound(WaterList.begin(), WaterList.end(), NewBB, CompareMBBNumbers); WaterList.insert(IP, NewBB); @@ -590,8 +636,8 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // Note the new unconditional branch is not being recorded. // There doesn't seem to be meaningful DebugInfo available; this doesn't // correspond to anything in the source. - BuildMI(OrigBB, DebugLoc::getUnknownLoc(), - TII->get(isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B)).addMBB(NewBB); + unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B; + BuildMI(OrigBB, DebugLoc::getUnknownLoc(), TII->get(Opc)).addMBB(NewBB); NumSplit++; // Update the CFG. All succs of OrigBB are now succs of NewBB. @@ -625,7 +671,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // available water after it (but not if it's already there, which happens // when splitting before a conditional branch that is followed by an // unconditional branch - in that case we want to insert NewBB). - std::vector::iterator IP = + water_iterator IP = std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB, CompareMBBNumbers); MachineBasicBlock* WaterBB = *IP; @@ -648,7 +694,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // We removed instructions from UserMBB, subtract that off from its size. // Add 2 or 4 to the block to count the unconditional branch we added to it. - unsigned delta = isThumb ? 2 : 4; + int delta = isThumb1 ? 2 : 4; BBSizes[OrigBBI] -= NewBBSize - delta; // ...and adjust BBOffsets for NewBB accordingly. @@ -664,24 +710,39 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { /// reference) is within MaxDisp of TrialOffset (a proposed location of a /// constant pool entry). bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, - unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK) { + unsigned TrialOffset, unsigned MaxDisp, + bool NegativeOK, bool IsSoImm) { // On Thumb offsets==2 mod 4 are rounded down by the hardware for // purposes of the displacement computation; compensate for that here. // Effectively, the valid range of displacements is 2 bytes smaller for such // references. - if (isThumb && UserOffset%4 !=0) + unsigned TotalAdj = 0; + if (isThumb && UserOffset%4 !=0) { UserOffset -= 2; + TotalAdj = 2; + } // CPEs will be rounded up to a multiple of 4. - if (isThumb && TrialOffset%4 != 0) + if (isThumb && TrialOffset%4 != 0) { TrialOffset += 2; + TotalAdj += 2; + } + + // In Thumb2 mode, later branch adjustments can shift instructions up and + // cause alignment change. In the worst case scenario this can cause the + // user's effective address to be subtracted by 2 and the CPE's address to + // be plus 2. + if (isThumb2 && TotalAdj != 4) + MaxDisp -= (4 - TotalAdj); if (UserOffset <= TrialOffset) { // User before the Trial. - if (TrialOffset-UserOffset <= MaxDisp) + if (TrialOffset - UserOffset <= MaxDisp) return true; + // FIXME: Make use full range of soimm values. } else if (NegativeOK) { - if (UserOffset-TrialOffset <= MaxDisp) + if (UserOffset - TrialOffset <= MaxDisp) return true; + // FIXME: Make use full range of soimm values. } return false; } @@ -690,39 +751,36 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, /// Water (a basic block) will be in range for the specific MI. bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, - MachineBasicBlock* Water, CPUser &U) -{ + MachineBasicBlock* Water, CPUser &U) { unsigned MaxDisp = U.MaxDisp; - MachineFunction::iterator I = next(MachineFunction::iterator(Water)); unsigned CPEOffset = BBOffsets[Water->getNumber()] + BBSizes[Water->getNumber()]; // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. (Currently applies only to ARM, so - // no alignment compensation attempted here.) + // the offset of the instruction. if (CPEOffset < UserOffset) UserOffset += U.CPEMI->getOperand(2).getImm(); - return OffsetIsInRange (UserOffset, CPEOffset, MaxDisp, !isThumb); + return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm); } /// CPEIsInRange - Returns true if the distance between specific MI and /// specific ConstPool entry instruction can fit in MI's displacement field. bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, - MachineInstr *CPEMI, - unsigned MaxDisp, bool DoDump) { + MachineInstr *CPEMI, unsigned MaxDisp, + bool NegOk, bool DoDump) { unsigned CPEOffset = GetOffsetOf(CPEMI); assert(CPEOffset%4 == 0 && "Misaligned CPE"); if (DoDump) { - DOUT << "User of CPE#" << CPEMI->getOperand(0).getImm() - << " max delta=" << MaxDisp - << " insn address=" << UserOffset - << " CPE address=" << CPEOffset - << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI; + DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm() + << " max delta=" << MaxDisp + << " insn address=" << UserOffset + << " CPE address=" << CPEOffset + << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI); } - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, !isThumb); + return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); } #ifndef NDEBUG @@ -745,52 +803,48 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta) { MachineFunction::iterator MBBI = BB; MBBI = next(MBBI); - for(unsigned i=BB->getNumber()+1; igetParent()->getNumBlockIDs(); i++) { + for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs(); + i < e; ++i) { BBOffsets[i] += delta; // If some existing blocks have padding, adjust the padding as needed, a // bit tricky. delta can be negative so don't use % on that. - if (isThumb) { - MachineBasicBlock *MBB = MBBI; - if (!MBB->empty()) { - // Constant pool entries require padding. - if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { - unsigned oldOffset = BBOffsets[i] - delta; - if (oldOffset%4==0 && BBOffsets[i]%4!=0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } else if (oldOffset%4!=0 && BBOffsets[i]%4==0) { - // remove existing padding - BBSizes[i] -=2; - delta -= 2; - } + if (!isThumb) + continue; + MachineBasicBlock *MBB = MBBI; + if (!MBB->empty()) { + // Constant pool entries require padding. + if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { + unsigned OldOffset = BBOffsets[i] - delta; + if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) { + // add new padding + BBSizes[i] += 2; + delta += 2; + } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) { + // remove existing padding + BBSizes[i] -= 2; + delta -= 2; } - // Thumb jump tables require padding. They should be at the end; - // following unconditional branches are removed by AnalyzeBranch. - MachineInstr *ThumbJTMI = NULL; - if ((prior(MBB->end())->getOpcode() == ARM::tBR_JTr) - || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTr) - || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTm) - || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTadd)) - ThumbJTMI = prior(MBB->end()); - if (ThumbJTMI) { - unsigned newMIOffset = GetOffsetOf(ThumbJTMI); - unsigned oldMIOffset = newMIOffset - delta; - if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) { - // remove existing padding - BBSizes[i] -= 2; - delta -= 2; - } else if (oldMIOffset%4 != 0 && newMIOffset%4 == 0) { - // add new padding - BBSizes[i] += 2; - delta += 2; - } + } + // Thumb1 jump tables require padding. They should be at the end; + // following unconditional branches are removed by AnalyzeBranch. + MachineInstr *ThumbJTMI = prior(MBB->end()); + if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) { + unsigned NewMIOffset = GetOffsetOf(ThumbJTMI); + unsigned OldMIOffset = NewMIOffset - delta; + if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) { + // remove existing padding + BBSizes[i] -= 2; + delta -= 2; + } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) { + // add new padding + BBSizes[i] += 2; + delta += 2; } - if (delta==0) - return; } - MBBI = next(MBBI); + if (delta==0) + return; } + MBBI = next(MBBI); } } @@ -824,8 +878,8 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) MachineInstr *CPEMI = U.CPEMI; // Check to see if the CPE is already in-range. - if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, true)) { - DOUT << "In range\n"; + if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) { + DEBUG(errs() << "In range\n"); return 1; } @@ -839,8 +893,9 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, false)) { - DOUT << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"; + if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) { + DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#" + << CPEs[i].CPI << "\n"); // Point the CPUser node to the replacement U.CPEMI = CPEs[i].CPEMI; // Change the CPI in the instruction operand to refer to the clone. @@ -870,15 +925,15 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { default: break; } - + return ((1<<23)-1)*4; } /// AcceptWater - Small amount of common code factored out of the following. - -MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB, - std::vector::iterator IP) { - DOUT << "found water in range\n"; +/// +MachineBasicBlock *ARMConstantIslands::AcceptWater(water_iterator IP) { + DEBUG(errs() << "found water in range\n"); + MachineBasicBlock *WaterBB = *IP; // Remove the original WaterList entry; we want subsequent // insertions in this vicinity to go after the one we're // about to insert. This considerably reduces the number @@ -890,41 +945,44 @@ MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB, /// LookForWater - look for an existing entry in the WaterList in which /// we can place the CPE referenced from U so it's within range of U's MI. -/// Returns true if found, false if not. If it returns true, *NewMBB -/// is set to the WaterList entry. -/// For ARM, we prefer the water that's farthest away. For Thumb, prefer -/// water that will not introduce padding to water that will; within each -/// group, prefer the water that's farthest away. - +/// Returns true if found, false if not. If it returns true, NewMBB +/// is set to the WaterList entry. For Thumb, prefer water that will not +/// introduce padding to water that will. To ensure that this pass +/// terminates, the CPE location for a particular CPUser is only allowed to +/// move to a lower address, so search backward from the end of the list and +/// prefer the first water that is in range. bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, - MachineBasicBlock** NewMBB) { - std::vector::iterator IPThatWouldPad; - MachineBasicBlock* WaterBBThatWouldPad = NULL; - if (!WaterList.empty()) { - for (std::vector::iterator IP = prior(WaterList.end()), - B = WaterList.begin();; --IP) { - MachineBasicBlock* WaterBB = *IP; - if (WaterIsInRange(UserOffset, WaterBB, U)) { - if (isThumb && - (BBOffsets[WaterBB->getNumber()] + - BBSizes[WaterBB->getNumber()])%4 != 0) { - // This is valid Water, but would introduce padding. Remember - // it in case we don't find any Water that doesn't do this. - if (!WaterBBThatWouldPad) { - WaterBBThatWouldPad = WaterBB; - IPThatWouldPad = IP; - } - } else { - *NewMBB = AcceptWater(WaterBB, IP); - return true; + MachineBasicBlock *&NewMBB) { + if (WaterList.empty()) + return false; + + bool FoundWaterThatWouldPad = false; + water_iterator IPThatWouldPad; + for (water_iterator IP = prior(WaterList.end()), + B = WaterList.begin();; --IP) { + MachineBasicBlock* WaterBB = *IP; + // Check if water is in range and at a lower address than the current one. + if (WaterIsInRange(UserOffset, WaterBB, U) && + WaterBB->getNumber() < U.CPEMI->getParent()->getNumber()) { + unsigned WBBId = WaterBB->getNumber(); + if (isThumb && + (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { + // This is valid Water, but would introduce padding. Remember + // it in case we don't find any Water that doesn't do this. + if (!FoundWaterThatWouldPad) { + FoundWaterThatWouldPad = true; + IPThatWouldPad = IP; } + } else { + NewMBB = AcceptWater(IP); + return true; + } } - if (IP == B) - break; - } + if (IP == B) + break; } - if (isThumb && WaterBBThatWouldPad) { - *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad); + if (FoundWaterThatWouldPad) { + NewMBB = AcceptWater(IPThatWouldPad); return true; } return false; @@ -934,12 +992,12 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, /// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the /// block is used if in range, and the conditional branch munged so control /// flow is correct. Otherwise the block is split to create a hole with an -/// unconditional branch around it. In either case *NewMBB is set to a +/// unconditional branch around it. In either case NewMBB is set to a /// block following which the new island can be inserted (the WaterList /// is not adjusted). - void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, - unsigned UserOffset, MachineBasicBlock** NewMBB) { + unsigned UserOffset, + MachineBasicBlock *&NewMBB) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; @@ -950,18 +1008,18 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // If the use is at the end of the block, or the end of the block // is within range, make new water there. (The addition below is - // for the unconditional branch we will be adding: 4 bytes on ARM, - // 2 on Thumb. Possible Thumb alignment padding is allowed for + // for the unconditional branch we will be adding: 4 bytes on ARM + Thumb2, + // 2 on Thumb1. Possible Thumb1 alignment padding is allowed for // inside OffsetIsInRange. // If the block ends in an unconditional branch already, it is water, // and is known to be out of range, so we'll always be adding a branch.) if (&UserMBB->back() == UserMI || - OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb ? 2: 4), - U.MaxDisp, !isThumb)) { - DOUT << "Split at end of block\n"; + OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4), + U.MaxDisp, U.NegOk, U.IsSoImm)) { + DEBUG(errs() << "Split at end of block\n"); if (&UserMBB->back() == UserMI) assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!"); - *NewMBB = next(MachineFunction::iterator(UserMBB)); + NewMBB = next(MachineFunction::iterator(UserMBB)); // Add an unconditional branch from UserMBB to fallthrough block. // Record it for branch lengthening; this new branch will not get out of // range, but if the preceding conditional branch is out of range, the @@ -969,16 +1027,16 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // range, so the machinery has to know about it. int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B; BuildMI(UserMBB, DebugLoc::getUnknownLoc(), - TII->get(UncondBr)).addMBB(*NewMBB); + TII->get(UncondBr)).addMBB(NewMBB); unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); - int delta = isThumb ? 2 : 4; + int delta = isThumb1 ? 2 : 4; BBSizes[UserMBB->getNumber()] += delta; AdjustBBOffsetsAfter(UserMBB, delta); } else { // What a big block. Find a place within the block to split it. - // This is a little tricky on Thumb since instructions are 2 bytes + // This is a little tricky on Thumb1 since instructions are 2 bytes // and constant pool entries are 4 bytes: if instruction I references // island CPE, and instruction I+1 references CPE', it will // not work well to put CPE as far forward as possible, since then @@ -991,7 +1049,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // if not, we back up the insertion point. // The 4 in the following is for the unconditional branch we'll be - // inserting (allows for long branch on Thumb). Alignment of the + // inserting (allows for long branch on Thumb1). Alignment of the // island is handled inside OffsetIsInRange. unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4; // This could point off the end of the block if we've already got @@ -1000,7 +1058,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // conditional and a maximally long unconditional). if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1]) BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] - - (isThumb ? 6 : 8); + (isThumb1 ? 6 : 8); unsigned EndInsertOffset = BaseInsertOffset + CPEMI->getOperand(2).getImm(); MachineBasicBlock::iterator MI = UserMI; @@ -1011,10 +1069,11 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, Offset += TII->GetInstSizeInBytes(MI), MI = next(MI)) { if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) { + CPUser &U = CPUsers[CPUIndex]; if (!OffsetIsInRange(Offset, EndInsertOffset, - CPUsers[CPUIndex].MaxDisp, !isThumb)) { - BaseInsertOffset -= (isThumb ? 2 : 4); - EndInsertOffset -= (isThumb ? 2 : 4); + U.MaxDisp, U.NegOk, U.IsSoImm)) { + BaseInsertOffset -= (isThumb1 ? 2 : 4); + EndInsertOffset -= (isThumb1 ? 2 : 4); } // This is overly conservative, as we don't account for CPEMIs // being reused within the block, but it doesn't matter much. @@ -1022,8 +1081,8 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, CPUIndex++; } } - DOUT << "Split in middle of big block\n"; - *NewMBB = SplitBlockBeforeInstr(prior(MI)); + DEBUG(errs() << "Split in middle of big block\n"); + NewMBB = SplitBlockBeforeInstr(prior(MI)); } } @@ -1031,7 +1090,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, +bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; @@ -1040,14 +1099,9 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, unsigned Size = CPEMI->getOperand(2).getImm(); MachineBasicBlock *NewMBB; // Compute this only once, it's expensive. The 4 or 8 is the value the - // hardware keeps in the PC (2 insns ahead of the reference). + // hardware keeps in the PC. unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); - // Special case: tLEApcrel are two instructions MI's. The actual user is the - // second instruction. - if (UserMI->getOpcode() == ARM::tLEApcrel) - UserOffset += 2; - // See if the current entry is within range, or there is a clone of it // in range. int result = LookForExistingCPEntry(U, UserOffset); @@ -1058,19 +1112,16 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, // We will be generating a new clone. Get a UID for it. unsigned ID = AFI->createConstPoolEntryUId(); - // Look for water where we can place this CPE. We look for the farthest one - // away that will work. Forward references only for now (although later - // we might find some that are backwards). - - if (!LookForWater(U, UserOffset, &NewMBB)) { + // Look for water where we can place this CPE. + if (!LookForWater(U, UserOffset, NewMBB)) { // No water found. - DOUT << "No water found\n"; - CreateNewWater(CPUserIndex, UserOffset, &NewMBB); + DEBUG(errs() << "No water found\n"); + CreateNewWater(CPUserIndex, UserOffset, NewMBB); } // Okay, we know we can put an island before NewMBB now, do it! - MachineBasicBlock *NewIsland = Fn.CreateMachineBasicBlock(); - Fn.insert(NewMBB, NewIsland); + MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); + MF.insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. UpdateForInsertedWaterBlock(NewIsland); @@ -1101,7 +1152,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn, break; } - DOUT << " Moved CPE to #" << ID << " CPI=" << CPI << "\t" << *UserMI; + DEBUG(errs() << " Moved CPE to #" << ID << " CPI=" << CPI + << '\t' << *UserMI); return true; } @@ -1115,7 +1167,7 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { BBSizes[CPEBB->getNumber()] -= Size; // All succeeding offsets have the current size value added in, fix this. if (CPEBB->empty()) { - // In thumb mode, the size of island may be padded by two to compensate for + // In thumb1 mode, the size of island may be padded by two to compensate for // the alignment requirement. Then it will now be 2 when the block is // empty, so fix this. // All succeeding offsets have the current size value added in, fix this. @@ -1157,11 +1209,11 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned BrOffset = GetOffsetOf(MI) + PCAdj; unsigned DestOffset = BBOffsets[DestBB->getNumber()]; - DOUT << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " max delta=" << MaxDisp - << " from " << GetOffsetOf(MI) << " to " << DestOffset - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI; + DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " max delta=" << MaxDisp + << " from " << GetOffsetOf(MI) << " to " << DestOffset + << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); if (BrOffset <= DestOffset) { // Branch before the Dest. @@ -1176,7 +1228,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, /// FixUpImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) { +bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1185,8 +1237,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) { return false; if (!Br.isCond) - return FixUpUnconditionalBr(Fn, Br); - return FixUpConditionalBr(Fn, Br); + return FixUpUnconditionalBr(MF, Br); + return FixUpConditionalBr(MF, Br); } /// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is @@ -1194,10 +1246,11 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) { /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) { +ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); - assert(isThumb && !isThumb2 && "Expected a Thumb-1 function!"); + if (!isThumb1) + llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!"); // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; @@ -1207,7 +1260,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) { HasFarJump = true; NumUBrFixed++; - DOUT << " Changed B to long jump " << *MI; + DEBUG(errs() << " Changed B to long jump " << *MI); return true; } @@ -1216,7 +1269,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) { /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) { +ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1251,7 +1304,8 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) { // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { - DOUT << " Invert Bcc condition and swap its destination with " << *BMI; + DEBUG(errs() << " Invert Bcc condition and swap its destination with " + << *BMI); BMI->getOperand(0).setMBB(DestBB); MI->getOperand(0).setMBB(NewDest); MI->getOperand(1).setImm(CC); @@ -1273,9 +1327,9 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) { } MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB)); - DOUT << " Insert B to BB#" << DestBB->getNumber() - << " also invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"; + DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber() + << " also invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"); // Insert a new conditional branch and a new unconditional branch. // Also update the ImmBranch as well as adding a new entry for the new branch. @@ -1300,14 +1354,17 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) { } /// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills -/// LR / restores LR to pc. +/// LR / restores LR to pc. FIXME: This is done here because it's only possible +/// to do this if tBfar is not used. bool ARMConstantIslands::UndoLRSpillRestore() { bool MadeChange = false; for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { MachineInstr *MI = PushPopMIs[i]; + // First two operands are predicates, the third is a zero since there + // is no writeback. if (MI->getOpcode() == ARM::tPOP_RET && - MI->getOperand(0).getReg() == ARM::PC && - MI->getNumExplicitOperands() == 1) { + MI->getOperand(3).getReg() == ARM::PC && + MI->getNumExplicitOperands() == 4) { BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET)); MI->eraseFromParent(); MadeChange = true; @@ -1315,3 +1372,201 @@ bool ARMConstantIslands::UndoLRSpillRestore() { } return MadeChange; } + +bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) { + bool MadeChange = false; + + // Shrink ADR and LDR from constantpool. + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { + CPUser &U = CPUsers[i]; + unsigned Opcode = U.MI->getOpcode(); + unsigned NewOpc = 0; + unsigned Scale = 1; + unsigned Bits = 0; + switch (Opcode) { + default: break; + case ARM::t2LEApcrel: + if (isARMLowRegister(U.MI->getOperand(0).getReg())) { + NewOpc = ARM::tLEApcrel; + Bits = 8; + Scale = 4; + } + break; + case ARM::t2LDRpci: + if (isARMLowRegister(U.MI->getOperand(0).getReg())) { + NewOpc = ARM::tLDRpci; + Bits = 8; + Scale = 4; + } + break; + } + + if (!NewOpc) + continue; + + unsigned UserOffset = GetOffsetOf(U.MI) + 4; + unsigned MaxOffs = ((1 << Bits) - 1) * Scale; + // FIXME: Check if offset is multiple of scale if scale is not 4. + if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + U.MI->setDesc(TII->get(NewOpc)); + MachineBasicBlock *MBB = U.MI->getParent(); + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumT2CPShrunk; + MadeChange = true; + } + } + + MadeChange |= OptimizeThumb2Branches(MF); + MadeChange |= OptimizeThumb2JumpTables(MF); + return MadeChange; +} + +bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { + bool MadeChange = false; + + for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { + ImmBranch &Br = ImmBranches[i]; + unsigned Opcode = Br.MI->getOpcode(); + unsigned NewOpc = 0; + unsigned Scale = 1; + unsigned Bits = 0; + switch (Opcode) { + default: break; + case ARM::t2B: + NewOpc = ARM::tB; + Bits = 11; + Scale = 2; + break; + case ARM::t2Bcc: + NewOpc = ARM::tBcc; + Bits = 8; + Scale = 2; + break; + } + if (!NewOpc) + continue; + + unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { + Br.MI->setDesc(TII->get(NewOpc)); + MachineBasicBlock *MBB = Br.MI->getParent(); + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumT2BrShrunk; + MadeChange = true; + } + } + + return MadeChange; +} + + +/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller +/// jumptables when it's possible. +bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { + bool MadeChange = false; + + // FIXME: After the tables are shrunk, can we get rid some of the + // constantpool tables? + const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + const std::vector &JT = MJTI->getJumpTables(); + for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { + MachineInstr *MI = T2JumpTables[i]; + const TargetInstrDesc &TID = MI->getDesc(); + unsigned NumOps = TID.getNumOperands(); + unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); + MachineOperand JTOP = MI->getOperand(JTOpIdx); + unsigned JTI = JTOP.getIndex(); + assert(JTI < JT.size()); + + bool ByteOk = true; + bool HalfWordOk = true; + unsigned JTOffset = GetOffsetOf(MI) + 4; + const std::vector &JTBBs = JT[JTI].MBBs; + for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { + MachineBasicBlock *MBB = JTBBs[j]; + unsigned DstOffset = BBOffsets[MBB->getNumber()]; + // Negative offset is not ok. FIXME: We should change BB layout to make + // sure all the branches are forward. + if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2) + ByteOk = false; + unsigned TBHLimit = ((1<<16)-1)*2; + if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit) + HalfWordOk = false; + if (!ByteOk && !HalfWordOk) + break; + } + + if (ByteOk || HalfWordOk) { + MachineBasicBlock *MBB = MI->getParent(); + unsigned BaseReg = MI->getOperand(0).getReg(); + bool BaseRegKill = MI->getOperand(0).isKill(); + if (!BaseRegKill) + continue; + unsigned IdxReg = MI->getOperand(1).getReg(); + bool IdxRegKill = MI->getOperand(1).isKill(); + MachineBasicBlock::iterator PrevI = MI; + if (PrevI == MBB->begin()) + continue; + + MachineInstr *AddrMI = --PrevI; + bool OptOk = true; + // Examine the instruction that calculate the jumptable entry address. + // If it's not the one just before the t2BR_JT, we won't delete it, then + // it's not worth doing the optimization. + for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { + const MachineOperand &MO = AddrMI->getOperand(k); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef() && MO.getReg() != BaseReg) { + OptOk = false; + break; + } + if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) { + OptOk = false; + break; + } + } + if (!OptOk) + continue; + + // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want + // to delete it as well. + MachineInstr *LeaMI = --PrevI; + if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && + LeaMI->getOpcode() != ARM::t2LEApcrelJT) || + LeaMI->getOperand(0).getReg() != BaseReg) + OptOk = false; + + if (!OptOk) + continue; + + unsigned Opc = ByteOk ? ARM::t2TBB : ARM::t2TBH; + MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) + .addReg(IdxReg, getKillRegState(IdxRegKill)) + .addJumpTableIndex(JTI, JTOP.getTargetFlags()) + .addImm(MI->getOperand(JTOpIdx+1).getImm()); + // FIXME: Insert an "ALIGN" instruction to ensure the next instruction + // is 2-byte aligned. For now, asm printer will fix it up. + unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); + unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI); + OrigSize += TII->GetInstSizeInBytes(LeaMI); + OrigSize += TII->GetInstSizeInBytes(MI); + + AddrMI->eraseFromParent(); + LeaMI->eraseFromParent(); + MI->eraseFromParent(); + + int delta = OrigSize - NewSize; + BBSizes[MBB->getNumber()] -= delta; + AdjustBBOffsetsAfter(MBB, -delta); + + ++NumTBs; + MadeChange = true; + } + } + + return MadeChange; +} diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index a75ed3bd53396..71700893a3e8e 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -15,33 +15,31 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/GlobalValue.h" #include "llvm/Type.h" -#include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id, - ARMCP::ARMCPKind k, + ARMCP::ARMCPKind K, unsigned char PCAdj, const char *Modif, bool AddCA) : MachineConstantPoolValue((const Type*)gv->getType()), - GV(gv), S(NULL), LabelId(id), Kind(k), PCAdjust(PCAdj), + GV(gv), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} -ARMConstantPoolValue::ARMConstantPoolValue(const char *s, unsigned id, - ARMCP::ARMCPKind k, +ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, + const char *s, unsigned id, unsigned char PCAdj, const char *Modif, bool AddCA) - : MachineConstantPoolValue((const Type*)Type::Int32Ty), - GV(NULL), S(s), LabelId(id), Kind(k), PCAdjust(PCAdj), + : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)), + GV(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPValue), PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} -ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, - ARMCP::ARMCPKind k, - const char *Modif) - : MachineConstantPoolValue((const Type*)Type::Int32Ty), - GV(gv), S(NULL), LabelId(0), Kind(k), PCAdjust(0), +ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif) + : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())), + GV(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), Modifier(Modif) {} int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, @@ -56,7 +54,6 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, if (CPV->GV == GV && CPV->S == S && CPV->LabelId == LabelId && - CPV->Kind == Kind && CPV->PCAdjust == PCAdjust) return i; } @@ -65,31 +62,28 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, return -1; } +ARMConstantPoolValue::~ARMConstantPoolValue() { + free((void*)S); +} + void ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { ID.AddPointer(GV); ID.AddPointer(S); ID.AddInteger(LabelId); - ID.AddInteger((unsigned)Kind); ID.AddInteger(PCAdjust); } void ARMConstantPoolValue::dump() const { - cerr << " " << *this; + errs() << " " << *this; } -void ARMConstantPoolValue::print(std::ostream &O) const { - raw_os_ostream RawOS(O); - print(RawOS); -} void ARMConstantPoolValue::print(raw_ostream &O) const { if (GV) O << GV->getName(); else O << S; - if (isNonLazyPointer()) O << "$non_lazy_ptr"; - else if (isStub()) O << "$stub"; if (Modifier) O << "(" << Modifier << ")"; if (PCAdjust != 0) { O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust; diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index d2b9066dcc97c..00c48086aef66 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -15,17 +15,16 @@ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" -#include namespace llvm { class GlobalValue; +class LLVMContext; namespace ARMCP { enum ARMCPKind { CPValue, - CPNonLazyPtr, - CPStub + CPLSDA }; } @@ -36,7 +35,7 @@ class ARMConstantPoolValue : public MachineConstantPoolValue { GlobalValue *GV; // GlobalValue being loaded. const char *S; // ExtSymbol being loaded. unsigned LabelId; // Label id of the load. - ARMCP::ARMCPKind Kind; // non_lazy_ptr or stub? + ARMCP::ARMCPKind Kind; // Value or LSDA? unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative. // 8 for ARM, 4 for Thumb. const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8)) @@ -47,12 +46,12 @@ public: ARMCP::ARMCPKind Kind = ARMCP::CPValue, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); - ARMConstantPoolValue(const char *s, unsigned id, - ARMCP::ARMCPKind Kind = ARMCP::CPValue, + ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); - ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind, - const char *Modifier); + ARMConstantPoolValue(GlobalValue *GV, const char *Modifier); + ARMConstantPoolValue(); + ~ARMConstantPoolValue(); GlobalValue *getGV() const { return GV; } @@ -61,27 +60,27 @@ public: bool hasModifier() const { return Modifier != NULL; } bool mustAddCurrentAddress() const { return AddCurrentAddress; } unsigned getLabelId() const { return LabelId; } - bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; } - bool isStub() const { return Kind == ARMCP::CPStub; } unsigned char getPCAdjustment() const { return PCAdjust; } + bool isLSDA() { return Kind == ARMCP::CPLSDA; } + + virtual unsigned getRelocationInfo() const { + // FIXME: This is conservatively claiming that these entries require a + // relocation, we may be able to do better than this. + return 2; + } + virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment); virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID); - void print(std::ostream *O) const { if (O) print(*O); } - void print(std::ostream &O) const; void print(raw_ostream *O) const { if (O) print(*O); } void print(raw_ostream &O) const; void dump() const; }; - inline std::ostream &operator<<(std::ostream &O, const ARMConstantPoolValue &V) { - V.print(O); - return O; -} - + inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { V.print(O); return O; diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h index 405b8f2b9f280..d5dae24424998 100644 --- a/lib/Target/ARM/ARMFrameInfo.h +++ b/lib/Target/ARM/ARMFrameInfo.h @@ -15,15 +15,15 @@ #define ARM_FRAMEINFO_H #include "ARM.h" -#include "llvm/Target/TargetFrameInfo.h" #include "ARMSubtarget.h" +#include "llvm/Target/TargetFrameInfo.h" namespace llvm { class ARMFrameInfo : public TargetFrameInfo { public: explicit ARMFrameInfo(const ARMSubtarget &ST) - : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) { + : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0, 4) { } }; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6485fc1d36006..bebf4e839994c 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -21,6 +21,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -30,10 +31,10 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -using namespace llvm; +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" -static const unsigned arm_dsubreg_0 = 5; -static const unsigned arm_dsubreg_1 = 6; +using namespace llvm; //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine @@ -48,8 +49,9 @@ class ARMDAGToDAGISel : public SelectionDAGISel { const ARMSubtarget *Subtarget; public: - explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm) - : SelectionDAGISel(tm), TM(tm), + explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(&TM.getSubtarget()) { } @@ -57,7 +59,8 @@ public: return "ARM Instruction Selection"; } - /// getI32Imm - Return a target constant with the specified value, of type i32. + /// getI32Imm - Return a target constant of type i32 with the specified + /// value. inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); } @@ -74,6 +77,8 @@ public: SDValue &Offset, SDValue &Opc); bool SelectAddrMode3Offset(SDValue Op, SDValue N, SDValue &Offset, SDValue &Opc); + bool SelectAddrMode4(SDValue Op, SDValue N, SDValue &Addr, + SDValue &Mode); bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, @@ -118,15 +123,63 @@ private: SDNode *SelectARMIndexedLoad(SDValue Op); SDNode *SelectT2IndexedLoad(SDValue Op); + /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. + SDNode *SelectDYN_ALLOC(SDValue Op); + + /// SelectVLD - Select NEON load intrinsics. NumVecs should + /// be 2, 3 or 4. The opcode arrays specify the instructions used for + /// loads of D registers and even subregs and odd subregs of Q registers. + /// For NumVecs == 2, QOpcodes1 is not used. + SDNode *SelectVLD(SDValue Op, unsigned NumVecs, unsigned *DOpcodes, + unsigned *QOpcodes0, unsigned *QOpcodes1); + + /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should + /// be 2, 3 or 4. The opcode arrays specify the instructions used for + /// load/store of D registers and even subregs and odd subregs of Q registers. + SDNode *SelectVLDSTLane(SDValue Op, bool IsLoad, unsigned NumVecs, + unsigned *DOpcodes, unsigned *QOpcodes0, + unsigned *QOpcodes1); + + /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. + SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps); + + /// PairDRegs - Insert a pair of double registers into an implicit def to + /// form a quad register. + SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); }; } +/// isInt32Immediate - This method tests to see if the node is a 32-bit constant +/// operand. If so Imm will receive the 32-bit value. +static bool isInt32Immediate(SDNode *N, unsigned &Imm) { + if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { + Imm = cast(N)->getZExtValue(); + return true; + } + return false; +} + +// isInt32Immediate - This method tests to see if a constant operand. +// If so Imm will receive the 32 bit value. +static bool isInt32Immediate(SDValue N, unsigned &Imm) { + return isInt32Immediate(N.getNode(), Imm); +} + +// isOpcWithIntImmediate - This method tests to see if the node is a specific +// opcode and that it has a immediate integer right operand. +// If so Imm will receive the 32 bit value. +static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { + return N->getOpcode() == Opc && + isInt32Immediate(N->getOperand(1).getNode(), Imm); +} + + void ARMDAGToDAGISel::InstructionSelect() { DEBUG(BB->dump()); @@ -144,7 +197,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op, // Don't match base register only case. That is matched to a separate // lower complexity pattern with explicit register operand. if (ShOpcVal == ARM_AM::no_shift) return false; - + BaseReg = N.getOperand(0); unsigned ShImmVal = 0; if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { @@ -198,7 +251,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, MVT::i32); return true; } - + // Match simple R +/- imm12 operands. if (N.getOpcode() == ISD::ADD) if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { @@ -223,15 +276,15 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, return true; } } - + // Otherwise this is R +/- [possibly shifted] R ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub; ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); unsigned ShAmt = 0; - + Base = N.getOperand(0); Offset = N.getOperand(1); - + if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't fold // it. @@ -243,7 +296,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, ShOpcVal = ARM_AM::no_shift; } } - + // Try matching (R shl C) + (R). if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) { ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0)); @@ -260,7 +313,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, } } } - + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), MVT::i32); return true; @@ -315,7 +368,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32); return true; } - + if (N.getOpcode() != ISD::ADD) { Base = N; if (N.getOpcode() == ISD::FrameIndex) { @@ -326,7 +379,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); return true; } - + // If the RHS is +/- imm8, fold into addr mode. if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); @@ -348,7 +401,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N, return true; } } - + Base = N.getOperand(0); Offset = N.getOperand(1); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32); @@ -377,6 +430,12 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N, return true; } +bool ARMDAGToDAGISel::SelectAddrMode4(SDValue Op, SDValue N, + SDValue &Addr, SDValue &Mode) { + Addr = N; + Mode = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset) { @@ -392,7 +451,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, MVT::i32); return true; } - + // If the RHS is +/- imm8, fold into addr mode. if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); @@ -417,7 +476,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, } } } - + Base = N; Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), MVT::i32); @@ -428,14 +487,14 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, SDValue &Opc) { Addr = N; - // The optional writeback is handled in ARMLoadStoreOpt. + // Default to no writeback. Update = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); return true; } bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N, - SDValue &Offset, SDValue &Label) { + SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { Offset = N.getOperand(0); SDValue N1 = N.getOperand(1); @@ -451,13 +510,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N, // FIXME dl should come from the parent load or store, not the address DebugLoc dl = Op.getDebugLoc(); if (N.getOpcode() != ISD::ADD) { - Base = N; - // We must materialize a zero in a reg! Returning a constant here - // wouldn't work without additional code to position the node within - // ISel's topological ordering in a place where ISel will process it - // normally. Instead, just explicitly issue a tMOVri8 node! - Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32, - CurDAG->getTargetConstant(0, MVT::i32)), 0); + ConstantSDNode *NC = dyn_cast(N); + if (!NC || NC->getZExtValue() != 0) + return false; + + Base = Offset = N; return true; } @@ -567,7 +624,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N, } } } - + return false; } @@ -594,41 +651,70 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N, bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N, SDValue &Base, SDValue &OffImm) { // Match simple R + imm12 operands. - if (N.getOpcode() != ISD::ADD) - return false; + + // Base only. + if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) { + if (N.getOpcode() == ISD::FrameIndex) { + // Match frame index... + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::TargetConstantPool) + return false; // We want to select t2LDRpci instead. + } else + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + if (SelectT2AddrModeImm8(Op, N, Base, OffImm)) + // Let t2LDRi8 handle (R - imm8). + return false; + int RHSC = (int)RHS->getZExtValue(); - if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits. + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + + if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; } } - return false; + // Base only. + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + return true; } bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N, SDValue &Base, SDValue &OffImm) { - if (N.getOpcode() == ISD::ADD) { + // Match simple R - imm8 operands. + if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - if (RHSC < 0 && RHSC > -0x100) { // 8 bits. - Base = N.getOperand(0); + int RHSC = (int)RHS->getSExtValue(); + if (N.getOpcode() == ISD::SUB) + RHSC = -RHSC; + + if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; } } - } else if (N.getOpcode() == ISD::SUB) { - if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); - if (RHSC >= 0 && RHSC < 0x100) { // 8 bits. - Base = N.getOperand(0); - OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32); - return true; - } - } } return false; @@ -643,7 +729,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N, if (ConstantSDNode *RHS = dyn_cast(N)) { int RHSC = (int)RHS->getZExtValue(); if (RHSC >= 0 && RHSC < 0x100) { // 8 bits. - OffImm = (AM == ISD::PRE_INC) + OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) ? CurDAG->getTargetConstant(RHSC, MVT::i32) : CurDAG->getTargetConstant(-RHSC, MVT::i32); return true; @@ -658,7 +744,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N, if (N.getOpcode() == ISD::ADD) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); - if (((RHSC & 0x3) == 0) && (RHSC < 0 && RHSC > -0x400)) { // 8 bits. + if (((RHSC & 0x3) == 0) && + ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits. Base = N.getOperand(0); OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -681,20 +768,17 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N, bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm) { - // Base only. - if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) { - Base = N; - if (N.getOpcode() == ISD::FrameIndex) { - int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } else if (N.getOpcode() == ARMISD::Wrapper) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::TargetConstantPool) - return false; // We want to select t2LDRpci instead. - } - OffReg = CurDAG->getRegister(0, MVT::i32); - ShImm = CurDAG->getTargetConstant(0, MVT::i32); - return true; + // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. + if (N.getOpcode() != ISD::ADD) + return false; + + // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) + return false; + else if (RHSC < 0 && RHSC >= -255) // 8 bits + return false; } // Look for (R + R) or (R + (R << [1,2,3])). @@ -708,8 +792,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N, ShOpcVal = ARM_AM::getShiftOpcForNode(Base); if (ShOpcVal == ARM_AM::lsl) std::swap(Base, OffReg); - } - + } + if (ShOpcVal == ARM_AM::lsl) { // Check to see if the RHS of the shift is a constant, if not, we can't fold // it. @@ -723,11 +807,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N, } else { ShOpcVal = ARM_AM::no_shift; } - } else if (SelectT2AddrModeImm12(Op, N, Base, ShImm) || - SelectT2AddrModeImm8 (Op, N, Base, ShImm)) - // Don't match if it's possible to match to one of the r +/- imm cases. - return false; - + } + ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32); return true; @@ -746,7 +827,7 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { if (AM == ISD::UNINDEXED) return NULL; - MVT LoadedVT = LD->getMemoryVT(); + EVT LoadedVT = LD->getMemoryVT(); SDValue Offset, AMOpc; bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); unsigned Opcode = 0; @@ -780,8 +861,8 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, - MVT::Other, Ops, 6); + return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, + MVT::Other, Ops, 6); } return NULL; @@ -793,14 +874,14 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { if (AM == ISD::UNINDEXED) return NULL; - MVT LoadedVT = LD->getMemoryVT(); + EVT LoadedVT = LD->getMemoryVT(); bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; SDValue Offset; bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); unsigned Opcode = 0; bool Match = false; if (SelectT2AddrModeImm8Offset(Op, LD->getOffset(), Offset)) { - switch (LoadedVT.getSimpleVT()) { + switch (LoadedVT.getSimpleVT().SimpleTy) { case MVT::i32: Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; break; @@ -828,13 +909,300 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, - MVT::Other, Ops, 5); + return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32, + MVT::Other, Ops, 5); + } + + return NULL; +} + +SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32); + int32_t AlignVal = cast(Align)->getSExtValue(); + if (AlignVal < 0) + // We need to align the stack. Use Thumb1 tAND which is the only thumb + // instruction that can read and write SP. This matches to a pseudo + // instruction that has a chain to ensure the result is written back to + // the stack pointer. + SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0); + + bool isC = isa(Size); + uint32_t C = isC ? cast(Size)->getZExtValue() : ~0UL; + // Handle the most common case for both Thumb1 and Thumb2: + // tSUBspi - immediate is between 0 ... 508 inclusive. + if (C <= 508 && ((C & 3) == 0)) + // FIXME: tSUBspi encode scale 4 implicitly. + return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP, + CurDAG->getTargetConstant(C/4, MVT::i32), + Chain); + + if (Subtarget->isThumb1Only()) { + // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering + // should have negated the size operand already. FIXME: We can't insert + // new target independent node at this stage so we are forced to negate + // it earlier. Is there a better solution? + return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size, + Chain); + } else if (Subtarget->isThumb2()) { + if (isC && Predicate_t2_so_imm(Size.getNode())) { + // t2SUBrSPi + SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3); + } else if (isC && Predicate_imm0_4095(Size.getNode())) { + // t2SUBrSPi12 + SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3); + } else { + // t2SUBrSPs + SDValue Ops[] = { SP, Size, + getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4); + } + } + + // FIXME: Add ADD / SUB sp instructions for ARM. + return 0; +} + +/// PairDRegs - Insert a pair of double registers into an implicit def to +/// form a quad register. +SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue Undef = + SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT), 0); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); + SDNode *Pair = CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + VT, Undef, V0, SubReg0); + return CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + VT, SDValue(Pair, 0), V1, SubReg1); +} + +/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type +/// for a 64-bit subregister of the vector. +static EVT GetNEONSubregVT(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled NEON type"); + case MVT::v16i8: return MVT::v8i8; + case MVT::v8i16: return MVT::v4i16; + case MVT::v4f32: return MVT::v2f32; + case MVT::v4i32: return MVT::v2i32; + case MVT::v2i64: return MVT::v1i64; + } +} + +SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, + unsigned *DOpcodes, unsigned *QOpcodes0, + unsigned *QOpcodes1) { + assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + + SDValue Chain = N->getOperand(0); + EVT VT = N->getValueType(0); + bool is64BitVector = VT.is64BitVector(); + + unsigned OpcodeIndex; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld type"); + // Double-register operations: + case MVT::v8i8: OpcodeIndex = 0; break; + case MVT::v4i16: OpcodeIndex = 1; break; + case MVT::v2f32: + case MVT::v2i32: OpcodeIndex = 2; break; + case MVT::v1i64: OpcodeIndex = 3; break; + // Quad-register operations: + case MVT::v16i8: OpcodeIndex = 0; break; + case MVT::v8i16: OpcodeIndex = 1; break; + case MVT::v4f32: + case MVT::v4i32: OpcodeIndex = 2; break; + } + + if (is64BitVector) { + unsigned Opc = DOpcodes[OpcodeIndex]; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + std::vector ResTys(NumVecs, VT); + ResTys.push_back(MVT::Other); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + } + + EVT RegVT = GetNEONSubregVT(VT); + if (NumVecs == 2) { + // Quad registers are directly supported for VLD2, + // loading 2 pairs of D regs. + unsigned Opc = QOpcodes0[OpcodeIndex]; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + std::vector ResTys(4, VT); + ResTys.push_back(MVT::Other); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + Chain = SDValue(VLd, 4); + + // Combine the even and odd subregs to produce the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } + } else { + // Otherwise, quad registers are loaded with two separate instructions, + // where one loads the even registers and the other loads the odd registers. + + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + std::vector ResTys(NumVecs, RegVT); + ResTys.push_back(MemAddr.getValueType()); + ResTys.push_back(MVT::Other); + + // Load the even subreg. + unsigned Opc = QOpcodes0[OpcodeIndex]; + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4); + Chain = SDValue(VLdA, NumVecs+1); + + // Load the odd subreg. + Opc = QOpcodes1[OpcodeIndex]; + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4); + Chain = SDValue(VLdB, NumVecs+1); + + // Combine the even and odd subregs to produce the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } + } + ReplaceUses(SDValue(N, NumVecs), Chain); + return NULL; +} + +SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, + unsigned NumVecs, unsigned *DOpcodes, + unsigned *QOpcodes0, + unsigned *QOpcodes1) { + assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + + SDValue Chain = N->getOperand(0); + unsigned Lane = + cast(N->getOperand(NumVecs+3))->getZExtValue(); + EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType(); + bool is64BitVector = VT.is64BitVector(); + + // Quad registers are handled by load/store of subregs. Find the subreg info. + unsigned NumElts = 0; + int SubregIdx = 0; + EVT RegVT = VT; + if (!is64BitVector) { + RegVT = GetNEONSubregVT(VT); + NumElts = RegVT.getVectorNumElements(); + SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + } + + unsigned OpcodeIndex; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vld/vst lane type"); + // Double-register operations: + case MVT::v8i8: OpcodeIndex = 0; break; + case MVT::v4i16: OpcodeIndex = 1; break; + case MVT::v2f32: + case MVT::v2i32: OpcodeIndex = 2; break; + // Quad-register operations: + case MVT::v8i16: OpcodeIndex = 0; break; + case MVT::v4f32: + case MVT::v4i32: OpcodeIndex = 1; break; + } + + SmallVector Ops; + Ops.push_back(MemAddr); + Ops.push_back(MemUpdate); + Ops.push_back(MemOpc); + + unsigned Opc = 0; + if (is64BitVector) { + Opc = DOpcodes[OpcodeIndex]; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } else { + // Check if this is loading the even or odd subreg of a Q register. + if (Lane < NumElts) { + Opc = QOpcodes0[OpcodeIndex]; + } else { + Lane -= NumElts; + Opc = QOpcodes1[OpcodeIndex]; + } + // Extract the subregs of the input vector. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(Vec+3))); + } + Ops.push_back(getI32Imm(Lane)); + Ops.push_back(Chain); + + if (!IsLoad) + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + + std::vector ResTys(NumVecs, RegVT); + ResTys.push_back(MVT::Other); + SDNode *VLdLn = + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+5); + // For a 64-bit vector load to D registers, nothing more needs to be done. + if (is64BitVector) + return VLdLn; + + // For 128-bit vectors, take the 64-bit results of the load and insert them + // as subregs into the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, + N->getOperand(Vec+3), + SDValue(VLdLn, Vec)); + ReplaceUses(SDValue(N, Vec), QuadVec); } + Chain = SDValue(VLdLn, NumVecs); + ReplaceUses(SDValue(N, NumVecs), Chain); return NULL; } +SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, + unsigned Opc) { + if (!Subtarget->hasV6T2Ops()) + return NULL; + + unsigned Shl_imm = 0; + if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)){ + assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); + unsigned Srl_imm = 0; + if (isInt32Immediate(Op.getOperand(1), Srl_imm)) { + assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); + unsigned Width = 32 - Srl_imm; + int LSB = Srl_imm - Shl_imm; + if ((LSB + Width) > 32) + return NULL; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { Op.getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + CurDAG->getTargetConstant(Width, MVT::i32), + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32, Ops, 5); + } + } + return NULL; +} SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); @@ -848,44 +1216,50 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case ISD::Constant: { unsigned Val = cast(N)->getZExtValue(); bool UseCP = true; - if (Subtarget->isThumb()) { - if (Subtarget->hasThumb2()) - // Thumb2 has the MOVT instruction, so all immediates can - // be done with MOV + MOVT, at worst. - UseCP = 0; - else + if (Subtarget->hasThumb2()) + // Thumb2-aware targets have the MOVT instruction, so all immediates can + // be done with MOV + MOVT, at worst. + UseCP = 0; + else { + if (Subtarget->isThumb()) { UseCP = (Val > 255 && // MOV ~Val > 255 && // MOV + MVN !ARM_AM::isThumbImmShiftedVal(Val)); // MOV + LSL - } else - UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV - ARM_AM::getSOImmVal(~Val) == -1 && // MVN - !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. + } else + UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV + ARM_AM::getSOImmVal(~Val) == -1 && // MVN + !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. + } + if (UseCP) { SDValue CPIdx = - CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val), + CurDAG->getTargetConstantPool(ConstantInt::get( + Type::getInt32Ty(*CurDAG->getContext()), Val), TLI.getPointerTy()); SDNode *ResNode; - if (Subtarget->isThumb()) - ResNode = CurDAG->getTargetNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, - CPIdx, CurDAG->getEntryNode()); - else { + if (Subtarget->isThumb1Only()) { + SDValue Pred = CurDAG->getTargetConstant(0xEULL, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; + ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, + Ops, 4); + } else { SDValue Ops[] = { - CPIdx, + CPIdx, CurDAG->getRegister(0, MVT::i32), CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getEntryNode() }; - ResNode=CurDAG->getTargetNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, - Ops, 6); + ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, + Ops, 6); } ReplaceUses(Op, SDValue(ResNode, 0)); return NULL; } - + // Other cases are autogenerated. break; } @@ -893,80 +1267,106 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - if (Subtarget->isThumb()) { + if (Subtarget->isThumb1Only()) { return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI, CurDAG->getTargetConstant(0, MVT::i32)); } else { + unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? + ARM::t2ADDri : ARM::ADDri); SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), - getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::ADDri, MVT::i32, Ops, 5); + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } } - case ISD::ADD: { - if (!Subtarget->isThumb()) - break; - // Select add sp, c to tADDhirr. - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - RegisterSDNode *LHSR = dyn_cast(Op.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(Op.getOperand(1)); - if (LHSR && LHSR->getReg() == ARM::SP) { - std::swap(N0, N1); - std::swap(LHSR, RHSR); - } - if (RHSR && RHSR->getReg() == ARM::SP) { - SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVlor2hir, dl, - Op.getValueType(), N0, N0), 0); - return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1); - } + case ARMISD::DYN_ALLOC: + return SelectDYN_ALLOC(Op); + case ISD::SRL: + if (SDNode *I = SelectV6T2BitfieldExtractOp(Op, + Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX)) + return I; + break; + case ISD::SRA: + if (SDNode *I = SelectV6T2BitfieldExtractOp(Op, + Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)) + return I; break; - } case ISD::MUL: - if (Subtarget->isThumb()) + if (Subtarget->isThumb1Only()) break; if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { unsigned RHSV = C->getZExtValue(); if (!RHSV) break; if (isPowerOf2_32(RHSV-1)) { // 2^n+1? + unsigned ShImm = Log2_32(RHSV-1); + if (ShImm >= 32) + break; SDValue V = Op.getOperand(0); - unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1)); - SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32), - CurDAG->getTargetConstant(ShImm, MVT::i32), - getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7); + ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); + SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + if (Subtarget->isThumb()) { + SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6); + } else { + SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7); + } } if (isPowerOf2_32(RHSV+1)) { // 2^n-1? + unsigned ShImm = Log2_32(RHSV+1); + if (ShImm >= 32) + break; SDValue V = Op.getOperand(0); - unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1)); - SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32), - CurDAG->getTargetConstant(ShImm, MVT::i32), - getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), - CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); + ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); + SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + if (Subtarget->isThumb()) { + SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5); + } else { + SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); + } } } break; case ARMISD::FMRRD: - return CurDAG->getTargetNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, - Op.getOperand(0), getAL(CurDAG), - CurDAG->getRegister(0, MVT::i32)); + return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, + Op.getOperand(0), getAL(CurDAG), + CurDAG->getRegister(0, MVT::i32)); case ISD::UMUL_LOHI: { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + if (Subtarget->isThumb1Only()) + break; + if (Subtarget->isThumb()) { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), + CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4); + } else { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); + return CurDAG->getMachineNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5); + } } case ISD::SMUL_LOHI: { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + if (Subtarget->isThumb1Only()) + break; + if (Subtarget->isThumb()) { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4); + } else { + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); + return CurDAG->getMachineNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5); + } } case ISD::LOAD: { SDNode *ResNode = 0; - if (Subtarget->isThumb2()) + if (Subtarget->isThumb() && Subtarget->hasThumb2()) ResNode = SelectT2IndexedLoad(Op); else ResNode = SelectARMIndexedLoad(Op); @@ -988,7 +1388,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) // Pattern complexity = 6 cost = 1 size = 0 - unsigned Opc = Subtarget->isThumb() ? + unsigned Opc = Subtarget->isThumb() ? ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; SDValue Chain = Op.getOperand(0); SDValue N1 = Op.getOperand(1); @@ -1003,8 +1403,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { cast(N2)->getZExtValue()), MVT::i32); SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; - SDNode *ResNode = CurDAG->getTargetNode(Opc, dl, MVT::Other, - MVT::Flag, Ops, 5); + SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, + MVT::Flag, Ops, 5); Chain = SDValue(ResNode, 0); if (Op.getNode()->getNumValues() == 2) { InFlag = SDValue(ResNode, 1); @@ -1014,8 +1414,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { return NULL; } case ARMISD::CMOV: { - bool isThumb = Subtarget->isThumb(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); @@ -1024,39 +1423,79 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { assert(N2.getOpcode() == ISD::Constant); assert(N3.getOpcode() == ISD::Register); - // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Pattern complexity = 18 cost = 1 size = 0 - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (!isThumb && VT == MVT::i32 && - SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) { - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7); - } + if (!Subtarget->isThumb1Only() && VT == MVT::i32) { + // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Pattern complexity = 18 cost = 1 size = 0 + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (Subtarget->isThumb()) { + if (SelectT2ShifterOperandReg(Op, N1, CPTmp0, CPTmp1)) { + unsigned SOVal = cast(CPTmp1)->getZExtValue(); + unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); + unsigned Opc = 0; + switch (SOShOp) { + case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; + case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; + case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; + case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; + default: + llvm_unreachable("Unknown so_reg opcode!"); + break; + } + SDValue SOShImm = + CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, CPTmp0, SOShImm, Tmp2, N3, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6); + } + } else { + if (SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) { + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), + ARM::MOVCCs, MVT::i32, Ops, 7); + } + } - // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<><>:$true, - // (imm:i32):$cc) - // Emits: (MOVCCi:i32 GPR:i32:$false, - // (so_imm_XFORM:i32 (imm:i32):$true), (imm:i32):$cc) - // Pattern complexity = 10 cost = 1 size = 0 - if (VT == MVT::i32 && - N3.getOpcode() == ISD::Constant && - Predicate_so_imm(N3.getNode())) { - SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) - cast(N1)->getZExtValue()), - MVT::i32); - Tmp1 = Transform_so_imm_XFORM(Tmp1.getNode()); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCi, MVT::i32, Ops, 5); + // Pattern: (ARMcmov:i32 GPR:i32:$false, + // (imm:i32)<>:$true, + // (imm:i32):$cc) + // Emits: (MOVCCi:i32 GPR:i32:$false, + // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) + // Pattern complexity = 10 cost = 1 size = 0 + if (N3.getOpcode() == ISD::Constant) { + if (Subtarget->isThumb()) { + if (Predicate_t2_so_imm(N3.getNode())) { + SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) + cast(N1)->getZExtValue()), + MVT::i32); + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), + ARM::t2MOVCCi, MVT::i32, Ops, 5); + } + } else { + if (Predicate_so_imm(N3.getNode())) { + SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) + cast(N1)->getZExtValue()), + MVT::i32); + SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) + cast(N2)->getZExtValue()), + MVT::i32); + SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), + ARM::MOVCCi, MVT::i32, Ops, 5); + } + } + } } // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) @@ -1073,23 +1512,25 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { MVT::i32); SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; unsigned Opc = 0; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Illegal conditional move type!"); break; case MVT::i32: - Opc = isThumb ? ARM::tMOVCCr : ARM::MOVCCr; + Opc = Subtarget->isThumb() + ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) + : ARM::MOVCCr; break; case MVT::f32: Opc = ARM::FCPYScc; break; case MVT::f64: Opc = ARM::FCPYDcc; - break; + break; } return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); } case ARMISD::CNEG: { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); @@ -1103,7 +1544,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { MVT::i32); SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; unsigned Opc = 0; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Illegal conditional move type!"); break; case MVT::f32: @@ -1116,104 +1557,308 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); } - case ISD::DECLARE: { - SDValue Chain = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - FrameIndexSDNode *FINode = dyn_cast(N1); - // FIXME: handle VLAs. - if (!FINode) { - ReplaceUses(Op.getValue(0), Chain); - return NULL; + case ARMISD::VZIP: { + unsigned Opc = 0; + EVT VT = N->getValueType(0); + switch (VT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::v8i8: Opc = ARM::VZIPd8; break; + case MVT::v4i16: Opc = ARM::VZIPd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VZIPd32; break; + case MVT::v16i8: Opc = ARM::VZIPq8; break; + case MVT::v8i16: Opc = ARM::VZIPq16; break; + case MVT::v4f32: + case MVT::v4i32: Opc = ARM::VZIPq32; break; } - if (N2.getOpcode() == ARMISD::PIC_ADD && isa(N2.getOperand(0))) - N2 = N2.getOperand(0); - LoadSDNode *Ld = dyn_cast(N2); - if (!Ld) { - ReplaceUses(Op.getValue(0), Chain); - return NULL; + return CurDAG->getMachineNode(Opc, dl, VT, VT, + N->getOperand(0), N->getOperand(1)); + } + case ARMISD::VUZP: { + unsigned Opc = 0; + EVT VT = N->getValueType(0); + switch (VT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::v8i8: Opc = ARM::VUZPd8; break; + case MVT::v4i16: Opc = ARM::VUZPd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VUZPd32; break; + case MVT::v16i8: Opc = ARM::VUZPq8; break; + case MVT::v8i16: Opc = ARM::VUZPq16; break; + case MVT::v4f32: + case MVT::v4i32: Opc = ARM::VUZPq32; break; } - SDValue BasePtr = Ld->getBasePtr(); - assert(BasePtr.getOpcode() == ARMISD::Wrapper && - isa(BasePtr.getOperand(0)) && - "llvm.dbg.variable should be a constantpool node"); - ConstantPoolSDNode *CP = cast(BasePtr.getOperand(0)); - GlobalValue *GV = 0; - if (CP->isMachineConstantPoolEntry()) { - ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)CP->getMachineCPVal(); - GV = ACPV->getGV(); - } else - GV = dyn_cast(CP->getConstVal()); - if (!GV) { - ReplaceUses(Op.getValue(0), Chain); - return NULL; + return CurDAG->getMachineNode(Opc, dl, VT, VT, + N->getOperand(0), N->getOperand(1)); + } + case ARMISD::VTRN: { + unsigned Opc = 0; + EVT VT = N->getValueType(0); + switch (VT.getSimpleVT().SimpleTy) { + default: return NULL; + case MVT::v8i8: Opc = ARM::VTRNd8; break; + case MVT::v4i16: Opc = ARM::VTRNd16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VTRNd32; break; + case MVT::v16i8: Opc = ARM::VTRNq8; break; + case MVT::v8i16: Opc = ARM::VTRNq16; break; + case MVT::v4f32: + case MVT::v4i32: Opc = ARM::VTRNq32; break; } - - SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(), - TLI.getPointerTy()); - SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy()); - SDValue Ops[] = { Tmp1, Tmp2, Chain }; - return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, - MVT::Other, Ops, 3); + return CurDAG->getMachineNode(Opc, dl, VT, VT, + N->getOperand(0), N->getOperand(1)); } - case ISD::CONCAT_VECTORS: { - MVT VT = Op.getValueType(); - assert(VT.is128BitVector() && Op.getNumOperands() == 2 && - "unexpected CONCAT_VECTORS"); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDNode *Result = - CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT); - if (N0.getOpcode() != ISD::UNDEF) - Result = CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, VT, - SDValue(Result, 0), N0, - CurDAG->getTargetConstant(arm_dsubreg_0, - MVT::i32)); - if (N1.getOpcode() != ISD::UNDEF) - Result = CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, VT, - SDValue(Result, 0), N1, - CurDAG->getTargetConstant(arm_dsubreg_1, - MVT::i32)); - return Result; - } + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + EVT VT = N->getValueType(0); + unsigned Opc = 0; + + switch (IntNo) { + default: + break; + + case Intrinsic::arm_neon_vld2: { + unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, + ARM::VLD2d32, ARM::VLD2d64 }; + unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 }; + return SelectVLD(Op, 2, DOpcodes, QOpcodes, 0); + } - case ISD::VECTOR_SHUFFLE: { - MVT VT = Op.getValueType(); - - // Match 128-bit splat to VDUPLANEQ. (This could be done with a Pat in - // ARMInstrNEON.td but it is awkward because the shuffle mask needs to be - // transformed first into a lane number and then to both a subregister - // index and an adjusted lane number.) If the source operand is a - // SCALAR_TO_VECTOR, leave it so it will be matched later as a VDUP. - ShuffleVectorSDNode *SVOp = cast(N); - if (VT.is128BitVector() && SVOp->isSplat() && - Op.getOperand(0).getOpcode() != ISD::SCALAR_TO_VECTOR && - Op.getOperand(1).getOpcode() == ISD::UNDEF) { - unsigned LaneVal = SVOp->getSplatIndex(); - - MVT HalfVT; - unsigned Opc = 0; - switch (VT.getVectorElementType().getSimpleVT()) { - default: assert(false && "unhandled VDUP splat type"); - case MVT::i8: Opc = ARM::VDUPLN8q; HalfVT = MVT::v8i8; break; - case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break; - case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break; - case MVT::f32: Opc = ARM::VDUPLNfq; HalfVT = MVT::v2f32; break; + case Intrinsic::arm_neon_vld3: { + unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, + ARM::VLD3d32, ARM::VLD3d64 }; + unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a }; + unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b }; + return SelectVLD(Op, 3, DOpcodes, QOpcodes0, QOpcodes1); + } + + case Intrinsic::arm_neon_vld4: { + unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, + ARM::VLD4d32, ARM::VLD4d64 }; + unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a }; + unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b }; + return SelectVLD(Op, 4, DOpcodes, QOpcodes0, QOpcodes1); + } + + case Intrinsic::arm_neon_vld2lane: { + unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 }; + unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a }; + unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b }; + return SelectVLDSTLane(Op, true, 2, DOpcodes, QOpcodes0, QOpcodes1); + } + + case Intrinsic::arm_neon_vld3lane: { + unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 }; + unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a }; + unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b }; + return SelectVLDSTLane(Op, true, 3, DOpcodes, QOpcodes0, QOpcodes1); + } + + case Intrinsic::arm_neon_vld4lane: { + unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 }; + unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a }; + unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b }; + return SelectVLDSTLane(Op, true, 4, DOpcodes, QOpcodes0, QOpcodes1); + } + + case Intrinsic::arm_neon_vst2: { + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + SDValue Chain = N->getOperand(0); + VT = N->getOperand(3).getValueType(); + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst2 type"); + case MVT::v8i8: Opc = ARM::VST2d8; break; + case MVT::v4i16: Opc = ARM::VST2d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VST2d32; break; + case MVT::v1i64: Opc = ARM::VST2d64; break; + } + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), Chain }; + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6); + } + // Quad registers are stored as pairs of double registers. + EVT RegVT; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst2 type"); + case MVT::v16i8: Opc = ARM::VST2q8; RegVT = MVT::v8i8; break; + case MVT::v8i16: Opc = ARM::VST2q16; RegVT = MVT::v4i16; break; + case MVT::v4f32: Opc = ARM::VST2q32; RegVT = MVT::v2f32; break; + case MVT::v4i32: Opc = ARM::VST2q32; RegVT = MVT::v2i32; break; + } + SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(3)); + SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(3)); + SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(4)); + SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(4)); + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + D0, D1, D2, D3, Chain }; + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8); + } + + case Intrinsic::arm_neon_vst3: { + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + SDValue Chain = N->getOperand(0); + VT = N->getOperand(3).getValueType(); + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst3 type"); + case MVT::v8i8: Opc = ARM::VST3d8; break; + case MVT::v4i16: Opc = ARM::VST3d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VST3d32; break; + case MVT::v1i64: Opc = ARM::VST3d64; break; + } + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), Chain }; + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7); } + // Quad registers are stored with two separate instructions, where one + // stores the even registers and the other stores the odd registers. + EVT RegVT; + unsigned Opc2 = 0; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst3 type"); + case MVT::v16i8: + Opc = ARM::VST3q8a; Opc2 = ARM::VST3q8b; RegVT = MVT::v8i8; break; + case MVT::v8i16: + Opc = ARM::VST3q16a; Opc2 = ARM::VST3q16b; RegVT = MVT::v4i16; break; + case MVT::v4f32: + Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2f32; break; + case MVT::v4i32: + Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2i32; break; + } + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(3)); + SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(4)); + SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(5)); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, D0, D2, D4, Chain }; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, OpsA, 7); + Chain = SDValue(VStA, 1); + + SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(3)); + SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(4)); + SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(5)); + MemAddr = SDValue(VStA, 0); + const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, D1, D3, D5, Chain }; + SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(), + MVT::Other, OpsB, 7); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } - // The source operand needs to be changed to a subreg of the original - // 128-bit operand, and the lane number needs to be adjusted accordingly. - unsigned NumElts = VT.getVectorNumElements() / 2; - unsigned SRVal = (LaneVal < NumElts ? arm_dsubreg_0 : arm_dsubreg_1); - SDValue SR = CurDAG->getTargetConstant(SRVal, MVT::i32); - SDValue NewLane = CurDAG->getTargetConstant(LaneVal % NumElts, MVT::i32); - SDNode *SubReg = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG, - dl, HalfVT, N->getOperand(0), SR); - return CurDAG->SelectNodeTo(N, Opc, VT, SDValue(SubReg, 0), NewLane); + case Intrinsic::arm_neon_vst4: { + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + SDValue Chain = N->getOperand(0); + VT = N->getOperand(3).getValueType(); + if (VT.is64BitVector()) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst4 type"); + case MVT::v8i8: Opc = ARM::VST4d8; break; + case MVT::v4i16: Opc = ARM::VST4d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VST4d32; break; + case MVT::v1i64: Opc = ARM::VST4d64; break; + } + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, + N->getOperand(3), N->getOperand(4), + N->getOperand(5), N->getOperand(6), Chain }; + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8); + } + // Quad registers are stored with two separate instructions, where one + // stores the even registers and the other stores the odd registers. + EVT RegVT; + unsigned Opc2 = 0; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst4 type"); + case MVT::v16i8: + Opc = ARM::VST4q8a; Opc2 = ARM::VST4q8b; RegVT = MVT::v8i8; break; + case MVT::v8i16: + Opc = ARM::VST4q16a; Opc2 = ARM::VST4q16b; RegVT = MVT::v4i16; break; + case MVT::v4f32: + Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2f32; break; + case MVT::v4i32: + Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2i32; break; + } + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(3)); + SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(4)); + SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(5)); + SDValue D6 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(6)); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, + D0, D2, D4, D6, Chain }; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, OpsA, 8); + Chain = SDValue(VStA, 1); + + SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(3)); + SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(4)); + SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(5)); + SDValue D7 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(6)); + MemAddr = SDValue(VStA, 0); + const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, + D1, D3, D5, D7, Chain }; + SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(), + MVT::Other, OpsB, 8); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; } - break; + case Intrinsic::arm_neon_vst2lane: { + unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 }; + unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a }; + unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b }; + return SelectVLDSTLane(Op, false, 2, DOpcodes, QOpcodes0, QOpcodes1); + } + + case Intrinsic::arm_neon_vst3lane: { + unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 }; + unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a }; + unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b }; + return SelectVLDSTLane(Op, false, 3, DOpcodes, QOpcodes0, QOpcodes1); + } + + case Intrinsic::arm_neon_vst4lane: { + unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 }; + unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a }; + unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b }; + return SelectVLDSTLane(Op, false, 4, DOpcodes, QOpcodes0, QOpcodes1); + } + } } } @@ -1224,20 +1869,17 @@ bool ARMDAGToDAGISel:: SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps) { assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); - - SDValue Base, Offset, Opc; - if (!SelectAddrMode2(Op, Op, Base, Offset, Opc)) - return true; - - OutOps.push_back(Base); - OutOps.push_back(Offset); - OutOps.push_back(Opc); + // Require the address to be in a register. That is safe for all ARM + // variants and it is hard to do anything much smarter without knowing + // how the operand is used. + OutOps.push_back(Op); return false; } /// createARMISelDag - This pass converts a legalized DAG into a /// ARM-specific DAG, ready for instruction scheduling. /// -FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM) { - return new ARMDAGToDAGISel(TM); +FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new ARMDAGToDAGISel(TM, OptLevel); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 41c9ecc43a9f0..426cecb28eb71 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -17,9 +17,11 @@ #include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" +#include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "ARMTargetObjectFile.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -36,74 +38,101 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include using namespace llvm; -static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, - MVT PromotedBitwiseVT) { +void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, + EVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { - setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); - setOperationAction(ISD::STORE, VT, Promote); - AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::STORE, VT.getSimpleVT(), + PromotedLdStVT.getSimpleVT()); } - MVT ElemTy = VT.getVectorElementType(); + EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) - setOperationAction(ISD::VSETCC, VT, Custom); + setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); if (ElemTy == MVT::i8 || ElemTy == MVT::i16) - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + if (ElemTy != MVT::i32) { + setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); + } + setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); if (VT.isInteger()) { - setOperationAction(ISD::SHL, VT, Custom); - setOperationAction(ISD::SRA, VT, Custom); - setOperationAction(ISD::SRL, VT, Custom); + setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); } // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { - setOperationAction(ISD::AND, VT, Promote); - AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); - setOperationAction(ISD::OR, VT, Promote); - AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); - setOperationAction(ISD::XOR, VT, Promote); - AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); - } -} - -void ARMTargetLowering::addDRTypeForNEON(MVT VT) { + setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::AND, VT.getSimpleVT(), + PromotedBitwiseVT.getSimpleVT()); + setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::OR, VT.getSimpleVT(), + PromotedBitwiseVT.getSimpleVT()); + setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); + AddPromotedToType (ISD::XOR, VT.getSimpleVT(), + PromotedBitwiseVT.getSimpleVT()); + } + + // Neon does not support vector divide/remainder operations. + setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); +} + +void ARMTargetLowering::addDRTypeForNEON(EVT VT) { addRegisterClass(VT, ARM::DPRRegisterClass); addTypeForNEON(VT, MVT::f64, MVT::v2i32); } -void ARMTargetLowering::addQRTypeForNEON(MVT VT) { +void ARMTargetLowering::addQRTypeForNEON(EVT VT) { addRegisterClass(VT, ARM::QPRRegisterClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } +static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { + if (TM.getSubtarget().isTargetDarwin()) + return new TargetLoweringObjectFileMachO(); + return new ARMElfTargetObjectFile(); +} + ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM), ARMPCLabelIndex(0) { + : TargetLowering(TM, createTLOF(TM)), ARMPCLabelIndex(0) { Subtarget = &TM.getSubtarget(); if (Subtarget->isTargetDarwin()) { @@ -188,11 +217,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); - if (Subtarget->isThumb()) + // Libcalls should use the AAPCS base standard ABI, even if hard float + // is in effect, as per the ARM RTABI specification, section 4.1.2. + if (Subtarget->isAAPCS_ABI()) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + setLibcallCallingConv(static_cast(i), + CallingConv::ARM_AAPCS); + } + } + + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else addRegisterClass(MVT::i32, ARM::GPRRegisterClass); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); addRegisterClass(MVT::f64, ARM::DPRRegisterClass); @@ -213,6 +251,39 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); + // v2f64 is legal so that QR subregs can be extracted as f64 elements, but + // neither Neon nor VFP support any arithmetic operations on it. + setOperationAction(ISD::FADD, MVT::v2f64, Expand); + setOperationAction(ISD::FSUB, MVT::v2f64, Expand); + setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + setOperationAction(ISD::FDIV, MVT::v2f64, Expand); + setOperationAction(ISD::FREM, MVT::v2f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); + setOperationAction(ISD::FNEG, MVT::v2f64, Expand); + setOperationAction(ISD::FABS, MVT::v2f64, Expand); + setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); + setOperationAction(ISD::FSIN, MVT::v2f64, Expand); + setOperationAction(ISD::FCOS, MVT::v2f64, Expand); + setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); + setOperationAction(ISD::FPOW, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); + setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); + setOperationAction(ISD::FEXP, MVT::v2f64, Expand); + setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); + setOperationAction(ISD::FRINT, MVT::v2f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + + // Neon does not support some operations on v1i64 and v2i64 types. + setOperationAction(ISD::MUL, MVT::v1i64, Expand); + setOperationAction(ISD::MUL, MVT::v2i64, Expand); + setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); + setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRL); @@ -246,7 +317,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // i64 operation support. - if (Subtarget->isThumb()) { + if (Subtarget->isThumb1Only()) { setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::MULHS, MVT::i32, Expand); @@ -287,7 +358,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::RET, MVT::Other, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); @@ -300,7 +370,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // FIXME: Shouldn't need this, since no register is used, but the legalizer + // doesn't yet know how to not do that for SjLj. + setExceptionSelectorRegister(ARM::R0); + if (Subtarget->isThumb()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { @@ -309,7 +386,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); @@ -339,7 +416,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } @@ -347,7 +424,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOW, MVT::f32, Expand); // int <-> fp are custom expanded into bit_convert + ARMISD ops. - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); @@ -361,26 +438,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(ARM::SP); setSchedulingPreference(SchedulingForRegPressure); - setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); - setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2); - - if (!Subtarget->isThumb()) { - // Use branch latency information to determine if-conversion limits. - // FIXME: If-converter should use instruction latency of the branch being - // eliminated to compute the threshold. For ARMv6, the branch "latency" - // varies depending on whether it's dynamically or statically predicted - // and on whether the destination is in the prefetch buffer. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData(); - unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass()); - if (Latency > 1) { - setIfCvtBlockSizeLimit(Latency-1); - if (Latency > 2) - setIfCvtDupBlockSizeLimit(Latency-2); - } else { - setIfCvtBlockSizeLimit(10); - setIfCvtDupBlockSizeLimit(2); - } + + // FIXME: If-converter should use instruction latency to determine + // profitability rather than relying on fixed limits. + if (Subtarget->getCPUString() == "generic") { + // Generic (and overly aggressive) if-conversion limits. + setIfCvtBlockSizeLimit(10); + setIfCvtDupBlockSizeLimit(2); + } else if (Subtarget->hasV6Ops()) { + setIfCvtBlockSizeLimit(2); + setIfCvtDupBlockSizeLimit(1); + } else { + setIfCvtBlockSizeLimit(3); + setIfCvtDupBlockSizeLimit(2); } maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type @@ -401,6 +471,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::tCALL: return "ARMISD::tCALL"; case ARMISD::BRCOND: return "ARMISD::BRCOND"; case ARMISD::BR_JT: return "ARMISD::BR_JT"; + case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; @@ -425,6 +496,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; + case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; @@ -453,13 +526,21 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; - case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ"; + case ARMISD::VDUP: return "ARMISD::VDUP"; + case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; + case ARMISD::VEXT: return "ARMISD::VEXT"; + case ARMISD::VREV64: return "ARMISD::VREV64"; + case ARMISD::VREV32: return "ARMISD::VREV32"; + case ARMISD::VREV16: return "ARMISD::VREV16"; + case ARMISD::VZIP: return "ARMISD::VZIP"; + case ARMISD::VUZP: return "ARMISD::VUZP"; + case ARMISD::VTRN: return "ARMISD::VTRN"; } } /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { - return getTargetMachine().getSubtarget().isThumb() ? 1 : 2; + return getTargetMachine().getSubtarget().isThumb() ? 0 : 1; } //===----------------------------------------------------------------------===// @@ -469,7 +550,7 @@ unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { switch (CC) { - default: assert(0 && "Unknown condition code!"); + default: llvm_unreachable("Unknown condition code!"); case ISD::SETNE: return ARMCC::NE; case ISD::SETEQ: return ARMCC::EQ; case ISD::SETGT: return ARMCC::GT; @@ -483,15 +564,12 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { } } -/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It -/// returns true if the operands should be inverted to form the proper -/// comparison. -static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, +/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. +static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2) { - bool Invert = false; CondCode2 = ARMCC::AL; switch (CC) { - default: assert(0 && "Unknown FP condition!"); + default: llvm_unreachable("Unknown FP condition!"); case ISD::SETEQ: case ISD::SETOEQ: CondCode = ARMCC::EQ; break; case ISD::SETGT: @@ -499,7 +577,7 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETGE: case ISD::SETOGE: CondCode = ARMCC::GE; break; case ISD::SETOLT: CondCode = ARMCC::MI; break; - case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break; + case ISD::SETOLE: CondCode = ARMCC::LS; break; case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; case ISD::SETO: CondCode = ARMCC::VC; break; case ISD::SETUO: CondCode = ARMCC::VS; break; @@ -513,24 +591,16 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, case ISD::SETNE: case ISD::SETUNE: CondCode = ARMCC::NE; break; } - return Invert; } //===----------------------------------------------------------------------===// // Calling Convention Implementation -// -// The lower operations present on calling convention works on this order: -// LowerCALL (virt regs --> phys regs, virt regs --> stack) -// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) -// LowerRET (virt regs --> phys regs) -// LowerCALL (phys regs --> virt regs) -// //===----------------------------------------------------------------------===// #include "ARMGenCallingConv.inc" // APCS f64 is in register pairs, possibly split to stack -static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -560,7 +630,7 @@ static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; } -static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -573,7 +643,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, } // AAPCS f64 is in aligned register pairs -static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; @@ -603,7 +673,7 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; } -static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -615,7 +685,7 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; // we handled it } -static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State) { static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; @@ -635,7 +705,7 @@ static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; } -static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -646,7 +716,7 @@ static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; // we handled it } -static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -656,49 +726,48 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, - bool Return) const { +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { switch (CC) { default: - assert(0 && "Unsupported calling convention"); + llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: - // Use target triple & subtarget features to do actual dispatch. - if (Subtarget->isAAPCS_ABI()) { - if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard) - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); - else - return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); - } else - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + // Use target triple & subtarget features to do actual dispatch. + if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && + FloatABIType == FloatABI::Hard && !isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + else + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + } else + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); case CallingConv::ARM_APCS: - return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); } } -/// LowerCallResult - Lower the result values of an ISD::CALL into the -/// appropriate copies out of appropriate physical registers. This assumes that -/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call -/// being lowered. The returns a SDNode with the same number of values as the -/// ISD::CALL. -SDNode *ARMTargetLowering:: -LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG) { +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +SDValue +ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { - DebugLoc dl = TheCall->getDebugLoc(); // Assign locations to each value returned by this call. SmallVector RVLocs; - bool isVarArg = TheCall->isVarArg(); - CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); - CCInfo.AnalyzeCallResult(TheCall, - CCAssignFnForNode(CallingConv, /* Return*/ true)); - - SmallVector ResultVals; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, + CCAssignFnForNode(CallConv, /* Return*/ true, + isVarArg)); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -743,20 +812,17 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, } switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); break; } - ResultVals.push_back(Val); + InVals.push_back(Val); } - // Merge everything together with a MERGE_VALUES node. - ResultVals.push_back(Chain); - return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()).getNode(); + return Chain; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified @@ -776,11 +842,11 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, /// LowerMemOpCallTo - Store the argument to the stack. SDValue -ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, - const SDValue &StackPtr, - const CCValAssign &VA, SDValue Chain, - SDValue Arg, ISD::ArgFlagsTy Flags) { - DebugLoc dl = TheCall->getDebugLoc(); +ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, + SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags) { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); @@ -791,14 +857,13 @@ ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, PseudoSourceValue::getStack(), LocMemOffset); } -void ARMTargetLowering::PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG, +void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVector &MemOpChains, ISD::ArgFlagsTy Flags) { - DebugLoc dl = TheCall->getDebugLoc(); SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); @@ -811,27 +876,31 @@ void ARMTargetLowering::PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG, if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, NextVA, - Chain, fmrrd.getValue(1), Flags)); + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), + dl, DAG, NextVA, + Flags)); } } -/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <- +/// LowerCall - Lowering a call into a callseq_start <- /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter /// nodes. -SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { - CallSDNode *TheCall = cast(Op.getNode()); - MVT RetVT = TheCall->getRetValType(0); - SDValue Chain = TheCall->getChain(); - unsigned CC = TheCall->getCallingConv(); - bool isVarArg = TheCall->isVarArg(); - SDValue Callee = TheCall->getCallee(); - DebugLoc dl = TheCall->getDebugLoc(); +SDValue +ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false)); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, + CCAssignFnForNode(CallConv, /* Return*/ false, + isVarArg)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -851,12 +920,12 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = TheCall->getArg(realArgIdx); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx); + SDValue Arg = Outs[realArgIdx].Val; + ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); @@ -872,7 +941,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { break; } - // f64 and v2f64 are passed in i32 pairs and must be split into pieces + // f64 and v2f64 might be passed in i32 pairs and must be split into pieces if (VA.needsCustom()) { if (VA.getLocVT() == MVT::v2f64) { SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, @@ -880,23 +949,23 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(1, MVT::i32)); - PassF64ArgInRegs(TheCall, DAG, Chain, Op0, RegsToPass, + PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); VA = ArgLocs[++i]; // skip ahead to next loc if (VA.isRegLoc()) { - PassF64ArgInRegs(TheCall, DAG, Chain, Op1, RegsToPass, + PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } else { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, - Chain, Op1, Flags)); + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, + dl, DAG, VA, Flags)); } } else { - PassF64ArgInRegs(TheCall, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], + PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); } } else if (VA.isRegLoc()) { @@ -906,8 +975,8 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, - Chain, Arg, Flags)); + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, + dl, DAG, VA, Flags)); } } @@ -933,17 +1002,17 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { GlobalValue *GV = G->getGlobal(); isDirect = true; - bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage()); + bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); bool isStub = (isExt && Subtarget->isTargetDarwin()) && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && !isExt; // tBX takes a register source operand. - if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, - ARMCP::CPStub, 4); + if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, + ARMPCLabelIndex, + ARMCP::CPValue, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -960,9 +1029,9 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { isARMFunc = !Subtarget->isThumb() || isStub; // tBX takes a register source operand. const char *Sym = S->getSymbol(); - if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex, - ARMCP::CPStub, 4); + if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), + Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, @@ -977,7 +1046,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // FIXME: handle tail calls differently. unsigned CallOpc; if (Subtarget->isThumb()) { - if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc)) + if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; @@ -986,7 +1055,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) : ARMISD::CALL_NOLINK; } - if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) { + if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) { // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); InFlag = Chain.getValue(1); @@ -1011,30 +1080,31 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); - if (RetVT != MVT::Other) + if (!Ins.empty()) InFlag = Chain.getValue(1); // Handle result values, copying them out of physregs into vregs that we // return. - return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), - Op.getResNo()); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, + dl, DAG, InVals); } -SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { - // The chain is always operand #0 - SDValue Chain = Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); +SDValue +ARMTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { // CCValAssign - represent the assignment of the return value to a location. SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); // CCState - Info about the registers and stack slots. - CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, + *DAG.getContext()); - // Analyze return values of ISD::RET. - CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true)); + // Analyze outgoing return values. + CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, + isVarArg)); // If this is the first return lowered for this function, add // the regs to the liveout set for the function. @@ -1053,12 +1123,10 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - // ISD::RET => ret chain, (regnum1,val1), ... - // So i*2+1 index only the regnums - SDValue Arg = Op.getOperand(realRVLocIdx*2+1); + SDValue Arg = Outs[realRVLocIdx].Val; switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); @@ -1112,13 +1180,13 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { } // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as -// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is +// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here DebugLoc dl = Op.getDebugLoc(); ConstantPoolSDNode *CP = cast(Op); @@ -1137,11 +1205,11 @@ SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) { DebugLoc dl = GA->getDebugLoc(); - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, - PCAdj, "tlsgd", true); + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, "tlsgd", true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0); @@ -1154,12 +1222,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; - Entry.Ty = (const Type *) Type::Int32Ty; + Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); // FIXME: is there useful debug info available here? std::pair CallResult = - LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false, - 0, CallingConv::C, false, + LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } @@ -1173,16 +1242,16 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, DebugLoc dl = GA->getDebugLoc(); SDValue Offset; SDValue Chain = DAG.getEntryNode(); - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); // Get the Thread Pointer SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); - if (GV->isDeclaration()){ + if (GV->isDeclaration()) { // initial exec model unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, - PCAdj, "gottpoff", true); + new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, "gottpoff", true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); @@ -1194,8 +1263,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); } else { // local exec model - ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff"); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); @@ -1222,59 +1290,47 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = - new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT"); + new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), - CPAddr, NULL, 0); + CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + Result = DAG.getLoad(PtrVT, dl, Chain, Result, + PseudoSourceValue::getGOT(), 0); return Result; } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); } } -/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol -/// even in non-static mode. -static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) { - // If symbol visibility is hidden, the extra load is not needed if - // the symbol is definitely defined in the current translation unit. - bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); - if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage())) - return false; - return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker()); -} - SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - bool IsIndirect = GVIsIndirectSymbol(GV, RelocM); SDValue CPAddr; if (RelocM == Reloc::Static) CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); else { - unsigned PCAdj = (RelocM != Reloc::PIC_) - ? 0 : (Subtarget->isThumb() ? 4 : 8); - ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr - : ARMCP::CPValue; - ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, - Kind, PCAdj); + unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1286,7 +1342,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } - if (IsIndirect) + + if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); return Result; @@ -1296,32 +1353,55 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG){ assert(Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); - MVT PtrVT = getPointerTy(); + EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_", - ARMPCLabelIndex, - ARMCP::CPValue, PCAdj); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), + "_GLOBAL_OFFSET_TABLE_", + ARMPCLabelIndex, PCAdj); SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. - case Intrinsic::arm_thread_pointer: - return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); + case Intrinsic::arm_thread_pointer: { + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); + } + case Intrinsic::eh_sjlj_lsda: { + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + SDValue CPAddr; + unsigned PCAdj = (RelocM != Reloc::PIC_) + ? 0 : (Subtarget->isThumb() ? 4 : 8); + ARMConstantPoolValue *CPV = + new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, + ARMCP::CPLSDA, PCAdj); + CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Chain = Result.getValue(1); + + if (RelocM == Reloc::PIC_) { + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + } + return Result; + } case Intrinsic::eh_sjlj_setjmp: - SDValue Res = DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, - Op.getOperand(1)); - return Res; + return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1)); } } @@ -1330,12 +1410,59 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. DebugLoc dl = Op.getDebugLoc(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + EVT VT = Node->getValueType(0); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + unsigned AlignVal = cast(Align)->getZExtValue(); + unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); + if (AlignVal > StackAlign) + // Do this now since selection pass cannot introduce new target + // independent node. + Align = DAG.getConstant(-(uint64_t)AlignVal, VT); + + // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up + // using a "add r, sp, r" instead. Negate the size now so we don't have to + // do even more horrible hack later. + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); + if (AFI->isThumb1OnlyFunction()) { + bool Negate = true; + ConstantSDNode *C = dyn_cast(Size); + if (C) { + uint32_t Val = C->getZExtValue(); + if (Val <= 508 && ((Val & 3) == 0)) + Negate = false; + } + if (Negate) + Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); + } + + SDVTList VTList = DAG.getVTList(VT, MVT::Other); + SDValue Ops1[] = { Chain, Size, Align }; + SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); + Chain = Res.getValue(1); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue()); + SDValue Ops2[] = { Res, Chain }; + return DAG.getMergeValues(Ops2, 2, dl); +} + SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, @@ -1344,7 +1471,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, ARMFunctionInfo *AFI = MF.getInfo(); TargetRegisterClass *RC; - if (AFI->isThumbFunction()) + if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else RC = ARM::GPRRegisterClass; @@ -1371,21 +1498,25 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, } SDValue -ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { +ARMTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - SDValue Root = Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - unsigned CC = MF.getFunction()->getCallingConv(); ARMFunctionInfo *AFI = MF.getInfo(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.getNode(), - CCAssignFnForNode(CC, /* Return*/ false)); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, + CCAssignFnForNode(CallConv, /* Return*/ false, + isVarArg)); SmallVector ArgValues; @@ -1394,7 +1525,7 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Arguments stored in registers. if (VA.isRegLoc()) { - MVT RegVT = VA.getLocVT(); + EVT RegVT = VA.getLocVT(); SDValue ArgValue; if (VA.needsCustom()) { @@ -1404,43 +1535,43 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { if (VA.getLocVT() == MVT::v2f64) { SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], - Root, DAG, dl); + Chain, DAG, dl); VA = ArgLocs[++i]; // skip ahead to next loc SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], - Root, DAG, dl); + Chain, DAG, dl); ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); } else - ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Root, DAG, dl); + ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } else { TargetRegisterClass *RC; - if (FloatABIType == FloatABI::Hard && RegVT == MVT::f32) + + if (RegVT == MVT::f32) RC = ARM::SPRRegisterClass; - else if (FloatABIType == FloatABI::Hard && RegVT == MVT::f64) + else if (RegVT == MVT::f64) RC = ARM::DPRRegisterClass; - else if (AFI->isThumbFunction()) - RC = ARM::tGPRRegisterClass; + else if (RegVT == MVT::v2f64) + RC = ARM::QPRRegisterClass; + else if (RegVT == MVT::i32) + RC = (AFI->isThumb1OnlyFunction() ? + ARM::tGPRRegisterClass : ARM::GPRRegisterClass); else - RC = ARM::GPRRegisterClass; - - assert((RegVT == MVT::i32 || RegVT == MVT::f32 || - (FloatABIType == FloatABI::Hard && RegVT == MVT::f64)) && - "RegVT not supported by FORMAL_ARGUMENTS Lowering"); + llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } // If this is an 8 or 16-bit value, it is really passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then // truncate to the right size. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); @@ -1457,7 +1588,7 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { break; } - ArgValues.push_back(ArgValue); + InVals.push_back(ArgValue); } else { // VA.isRegLoc() @@ -1470,7 +1601,7 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); } } @@ -1500,31 +1631,27 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { SmallVector MemOps; for (; NumGPRs < 4; ++NumGPRs) { TargetRegisterClass *RC; - if (AFI->isThumbFunction()) + if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else RC = ARM::GPRRegisterClass; unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); } - ArgValues.push_back(Root); - - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); + return Chain; } /// isFloatingPointZero - Return true if this is +0.0. @@ -1543,46 +1670,46 @@ static bool isFloatingPointZero(SDValue Op) { return false; } -static bool isLegalCmpImmediate(unsigned C, bool isThumb) { - return ( isThumb && (C & ~255U) == 0) || - (!isThumb && ARM_AM::getSOImmVal(C) != -1); +static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) { + return ( isThumb1Only && (C & ~255U) == 0) || + (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1); } /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, bool isThumb, + SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only, DebugLoc dl) { if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); - if (!isLegalCmpImmediate(C, isThumb)) { + if (!isLegalCmpImmediate(C, isThumb1Only)) { // Constant does not fit, try adjusting it by one? switch (CC) { default: break; case ISD::SETLT: case ISD::SETGE: - if (isLegalCmpImmediate(C-1, isThumb)) { + if (isLegalCmpImmediate(C-1, isThumb1Only)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: - if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) { + if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: - if (isLegalCmpImmediate(C+1, isThumb)) { + if (isLegalCmpImmediate(C+1, isThumb1Only)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C+1, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: - if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) { + if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C+1, MVT::i32); } @@ -1620,7 +1747,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); @@ -1631,13 +1758,12 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, if (LHS.getValueType() == MVT::i32) { SDValue ARMCC; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl); return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); } ARMCC::CondCodes CondCode, CondCode2; - if (FPCCToARMCC(CC, CondCode, CondCode2)) - std::swap(TrueVal, FalseVal); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); @@ -1666,16 +1792,14 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, if (LHS.getValueType() == MVT::i32) { SDValue ARMCC; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMCC, CCR,Cmp); } assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); ARMCC::CondCodes CondCode, CondCode2; - if (FPCCToARMCC(CC, CondCode, CondCode2)) - // Swap the LHS/RHS of the comparison if needed. - std::swap(LHS, RHS); + FPCCToARMCC(CC, CondCode, CondCode2); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); @@ -1697,21 +1821,32 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { SDValue Index = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - MVT PTy = getPointerTy(); + EVT PTy = getPointerTy(); JumpTableSDNode *JT = cast(Table); ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo(); - SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); + SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); - bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl, - Chain, Addr, NULL, 0); - Chain = Addr.getValue(1); - if (isPIC) + if (Subtarget->isThumb2()) { + // Thumb2 uses a two-level jump. That is, it jumps into the jump table + // which does another jump to the destination. This also makes it easier + // to translate it to TBB / TBH later. + // FIXME: This might not work if the function is extremely large. + return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, + Addr, Op.getOperand(2), JTI, UId); + } + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, NULL, 0); + Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); - return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); + } else { + Addr = DAG.getLoad(PTy, dl, Chain, Addr, NULL, 0); + Chain = Addr.getValue(1); + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); + } } static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { @@ -1723,7 +1858,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned Opc = Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF; @@ -1737,8 +1872,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); - MVT SrcVT = Tmp1.getValueType(); + EVT VT = Op.getValueType(); + EVT SrcVT = Tmp1.getValueType(); SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); @@ -1749,7 +1884,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) @@ -1784,7 +1919,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, unsigned BytesLeft = SizeVal & 3; unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; - MVT VT = MVT::i32; + EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; const unsigned MAX_LOADS_IN_LDM = 6; @@ -1890,45 +2025,55 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { /// getZeroVector - Returns a vector of specified type with all zero elements. /// -static SDValue getZeroVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); // Zero vectors are used to represent vector negation and in those cases // will be implemented with the NEON VNEG instruction. However, VNEG does // not support i64 elements, so sometimes the zero vectors will need to be // explicitly constructed. For those cases, and potentially other uses in - // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted // to their dest type. This ensures they get CSE'd. SDValue Vec; - SDValue Cst = DAG.getTargetConstant(0, MVT::i32); - if (VT.getSizeInBits() == 64) - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue Cst = DAG.getTargetConstant(0, MVT::i8); + SmallVector Ops; + MVT TVT; + + if (VT.getSizeInBits() == 64) { + Ops.assign(8, Cst); TVT = MVT::v8i8; + } else { + Ops.assign(16, Cst); TVT = MVT::v16i8; + } + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } /// getOnesVector - Returns a vector of specified type with all bits set. /// -static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest - // type. This ensures they get CSE'd. + // Always build ones vectors as <16 x i32> or <8 x i32> bitcasted to their + // dest type. This ensures they get CSE'd. SDValue Vec; - SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); - if (VT.getSizeInBits() == 64) - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); - else - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); + SmallVector Ops; + MVT TVT; + + if (VT.getSizeInBits() == 64) { + Ops.assign(8, Cst); TVT = MVT::v8i8; + } else { + Ops.assign(16, Cst); TVT = MVT::v16i8; + } + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size()); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); // Lower vector shifts on NEON to use VSHL. @@ -1947,7 +2092,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, // NEON uses the same intrinsics for both left and right shifts. For // right shifts, the shift amounts are negative, so negate the vector of // shift amounts. - MVT ShiftVT = N->getOperand(1).getValueType(); + EVT ShiftVT = N->getOperand(1).getValueType(); SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1)); @@ -1959,8 +2104,11 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, N->getOperand(0), NegatedCount); } - assert(VT == MVT::i64 && - (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && + // We can get here for a node like i32 = ISD::SHL i32, i64 + if (VT != MVT::i64) + return SDValue(); + + assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "Unknown shift to lower!"); // We only lower SRA, SRL of 1 here, all others use generic lowering. @@ -1969,7 +2117,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, return SDValue(); // If we are in thumb mode, we don't have RRX. - if (ST->isThumb()) return SDValue(); + if (ST->isThumb1Only()) return SDValue(); // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), @@ -1998,13 +2146,13 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); DebugLoc dl = Op.getDebugLoc(); if (Op.getOperand(1).getValueType().isFloatingPoint()) { switch (SetCCOpcode) { - default: assert(0 && "Illegal FP comparison"); break; + default: llvm_unreachable("Illegal FP comparison"); break; case ISD::SETUNE: case ISD::SETNE: Invert = true; // Fallthrough case ISD::SETOEQ: @@ -2043,7 +2191,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { } else { // Integer comparisons. switch (SetCCOpcode) { - default: assert(0 && "Illegal integer comparison"); break; + default: llvm_unreachable("Illegal integer comparison"); break; case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; @@ -2056,7 +2204,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } - // Detect VTST (Vector Test Bits) = vicmp ne (and (op0, op1), zero). + // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). if (Opc == ARMISD::VCEQ) { SDValue AndOp; @@ -2147,7 +2295,7 @@ static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, } default: - assert(0 && "unexpected size for isVMOVSplat"); + llvm_unreachable("unexpected size for isVMOVSplat"); break; } @@ -2174,22 +2322,123 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { SplatBitSize, DAG); } -static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) { +static bool isVEXTMask(const SmallVectorImpl &M, EVT VT, + bool &ReverseVEXT, unsigned &Imm) { + unsigned NumElts = VT.getVectorNumElements(); + ReverseVEXT = false; + Imm = M[0]; + + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = Imm; + for (unsigned i = 1; i < NumElts; ++i) { + // Increment the expected index. If it wraps around, it may still be + // a VEXT but the source vectors must be swapped. + ExpectedElt += 1; + if (ExpectedElt == NumElts * 2) { + ExpectedElt = 0; + ReverseVEXT = true; + } + + if (ExpectedElt != static_cast(M[i])) + return false; + } + + // Adjust the index value if the source operands will be swapped. + if (ReverseVEXT) + Imm -= NumElts; + + return true; +} + +/// isVREVMask - Check if a vector shuffle corresponds to a VREV +/// instruction with the specified blocksize. (The order of the elements +/// within each block of the vector is reversed.) +static bool isVREVMask(const SmallVectorImpl &M, EVT VT, + unsigned BlockSize) { + assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && + "Only possible block sizes for VREV are: 16, 32, 64"); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + unsigned BlockElts = M[0] + 1; + + if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) + return false; + + for (unsigned i = 0; i < NumElts; ++i) { + if ((unsigned) M[i] != + (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) + return false; + } + + return true; +} + +static bool isVTRNMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((unsigned) M[i] != i + WhichResult || + (unsigned) M[i+1] != i + NumElts + WhichResult) + return false; + } + return true; +} + +static bool isVUZPMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i != NumElts; ++i) { + if ((unsigned) M[i] != 2 * i + WhichResult) + return false; + } + + // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + return false; + + return true; +} + +static bool isVZIPMask(const SmallVectorImpl &M, EVT VT, + unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((unsigned) M[i] != Idx || + (unsigned) M[i+1] != Idx + NumElts) + return false; + Idx += 1; + } + + // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. + if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + return false; + + return true; +} + +static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) { // Canonicalize all-zeros and all-ones vectors. - ConstantSDNode *ConstVal = dyn_cast(Val.getNode()); + ConstantSDNode *ConstVal = cast(Val.getNode()); if (ConstVal->isNullValue()) return getZeroVector(VT, DAG, dl); if (ConstVal->isAllOnesValue()) return getOnesVector(VT, DAG, dl); - MVT CanonicalVT; + EVT CanonicalVT; if (VT.is64BitVector()) { switch (Val.getValueType().getSizeInBits()) { case 8: CanonicalVT = MVT::v8i8; break; case 16: CanonicalVT = MVT::v4i16; break; case 32: CanonicalVT = MVT::v2i32; break; case 64: CanonicalVT = MVT::v1i64; break; - default: assert(0 && "unexpected splat element type"); break; + default: llvm_unreachable("unexpected splat element type"); break; } } else { assert(VT.is128BitVector() && "unknown splat vector size"); @@ -2198,7 +2447,7 @@ static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) { case 16: CanonicalVT = MVT::v8i16; break; case 32: CanonicalVT = MVT::v4i32; break; case 64: CanonicalVT = MVT::v2i64; break; - default: assert(0 && "unexpected splat element type"); break; + default: llvm_unreachable("unexpected splat element type"); break; } } @@ -2213,69 +2462,291 @@ static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) { // If this is a case we can't handle, return null and let the default // expansion code take care of it. static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { - BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); - assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + BuildVectorSDNode *BVN = cast(Op.getNode()); DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, DAG); - if (Val.getNode()) - return BuildSplat(Val, Op.getValueType(), DAG, dl); + if (SplatBitSize <= 64) { + SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, DAG); + if (Val.getNode()) + return BuildSplat(Val, VT, DAG, dl); + } + } + + // If there are only 2 elements in a 128-bit vector, insert them into an + // undef vector. This handles the common case for 128-bit vector argument + // passing, where the insertions should be translated to subreg accesses + // with no real instructions. + if (VT.is128BitVector() && Op.getNumOperands() == 2) { + SDValue Val = DAG.getUNDEF(VT); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (Op0.getOpcode() != ISD::UNDEF) + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0, + DAG.getIntPtrConstant(0)); + if (Op1.getOpcode() != ISD::UNDEF) + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1, + DAG.getIntPtrConstant(1)); + return Val; } return SDValue(); } -static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { - return Op; +/// isShuffleMaskLegal - Targets can use this to indicate that they only +/// support *some* VECTOR_SHUFFLE operations, those with specific masks. +/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values +/// are assumed to be legal. +bool +ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, + EVT VT) const { + if (VT.getVectorNumElements() == 4 && + (VT.is128BitVector() || VT.is64BitVector())) { + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + if (M[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = M[i]; + } + + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = + PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + if (Cost <= 4) + return true; + } + + bool ReverseVEXT; + unsigned Imm, WhichResult; + + return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + isVREVMask(M, VT, 64) || + isVREVMask(M, VT, 32) || + isVREVMask(M, VT, 16) || + isVEXTMask(M, VT, ReverseVEXT, Imm) || + isVTRNMask(M, VT, WhichResult) || + isVUZPMask(M, VT, WhichResult) || + isVZIPMask(M, VT, WhichResult)); +} + +/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit +/// the specified operations to build the shuffle. +static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { + unsigned OpNum = (PFEntry >> 26) & 0x0F; + unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); + unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); + + enum { + OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> + OP_VREV, + OP_VDUP0, + OP_VDUP1, + OP_VDUP2, + OP_VDUP3, + OP_VEXT1, + OP_VEXT2, + OP_VEXT3, + OP_VUZPL, // VUZP, left result + OP_VUZPR, // VUZP, right result + OP_VZIPL, // VZIP, left result + OP_VZIPR, // VZIP, right result + OP_VTRNL, // VTRN, left result + OP_VTRNR // VTRN, right result + }; + + if (OpNum == OP_COPY) { + if (LHSID == (1*9+2)*9+3) return LHS; + assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); + return RHS; + } + + SDValue OpLHS, OpRHS; + OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); + OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); + EVT VT = OpLHS.getValueType(); + + switch (OpNum) { + default: llvm_unreachable("Unknown shuffle opcode!"); + case OP_VREV: + return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); + case OP_VDUP0: + case OP_VDUP1: + case OP_VDUP2: + case OP_VDUP3: + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, + OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); + case OP_VEXT1: + case OP_VEXT2: + case OP_VEXT3: + return DAG.getNode(ARMISD::VEXT, dl, VT, + OpLHS, OpRHS, + DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); + case OP_VUZPL: + case OP_VUZPR: + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); + case OP_VZIPL: + case OP_VZIPR: + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); + case OP_VTRNL: + case OP_VTRNR: + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); + } } -static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { - return Op; +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + SmallVector ShuffleMask; + + // Convert shuffles that are directly supported on NEON to target-specific + // DAG nodes, instead of keeping them as shuffles and matching them again + // during code selection. This is more efficient and avoids the possibility + // of inconsistencies between legalization and selection. + // FIXME: floating-point vectors should be canonicalized to integer vectors + // of the same time so that they get CSEd properly. + SVN->getMask(ShuffleMask); + + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + int Lane = SVN->getSplatIndex(); + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, + DAG.getConstant(Lane, MVT::i32)); + } + + bool ReverseVEXT; + unsigned Imm; + if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { + if (ReverseVEXT) + std::swap(V1, V2); + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, + DAG.getConstant(Imm, MVT::i32)); + } + + if (isVREVMask(ShuffleMask, VT, 64)) + return DAG.getNode(ARMISD::VREV64, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 32)) + return DAG.getNode(ARMISD::VREV32, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 16)) + return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + + // Check for Neon shuffles that modify both input vectors in place. + // If both results are used, i.e., if there are two shuffles with the same + // source operands and with masks corresponding to both results of one of + // these operations, DAG memoization will ensure that a single node is + // used for both shuffles. + unsigned WhichResult; + if (isVTRNMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVUZPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVZIPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + + // If the shuffle is not directly supported and it has 4 elements, use + // the PerfectShuffle-generated table to synthesize it from other shuffles. + if (VT.getVectorNumElements() == 4 && + (VT.is128BitVector() || VT.is64BitVector())) { + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + if (ShuffleMask[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = ShuffleMask[i]; + } + + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = + PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; + + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + if (Cost <= 4) + return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); + } + + return SDValue(); } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - assert((VT == MVT::i8 || VT == MVT::i16) && - "unexpected type for custom-lowering vector extract"); SDValue Vec = Op.getOperand(0); SDValue Lane = Op.getOperand(1); + + // FIXME: This is invalid for 8 and 16-bit elements - the information about + // sign / zero extension is lost! Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); + + if (VT.bitsLT(MVT::i32)) + Op = DAG.getNode(ISD::TRUNCATE, dl, VT, Op); + else if (VT.bitsGT(MVT::i32)) + Op = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op); + + return Op; } -static SDValue LowerCONCAT_VECTORS(SDValue Op) { - if (Op.getValueType().is128BitVector() && Op.getNumOperands() == 2) - return Op; - return SDValue(); +static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { + // The only time a CONCAT_VECTORS operation can have legal types is when + // two 64-bit vectors are concatenated to a 128-bit vector. + assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && + "unexpected CONCAT_VECTORS"); + DebugLoc dl = Op.getDebugLoc(); + SDValue Val = DAG.getUNDEF(MVT::v2f64); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (Op0.getOpcode() != ISD::UNDEF) + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), + DAG.getIntPtrConstant(0)); + if (Op1.getOpcode() != ISD::UNDEF) + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, + DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), + DAG.getIntPtrConstant(1)); + return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - default: assert(0 && "Don't know how to custom lower this!"); abort(); + default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::CALL: return LowerCALL(Op, DAG); - case ISD::RET: return LowerRET(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget); case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget); case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); - case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); case ISD::RETURNADDR: break; case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); @@ -2287,9 +2758,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::VSETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); - case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); } return SDValue(); } @@ -2301,7 +2771,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) { switch (N->getOpcode()) { default: - assert(0 && "Don't know how to custom expand this!"); + llvm_unreachable("Don't know how to custom expand this!"); return; case ISD::BIT_CONVERT: Results.push_back(ExpandBIT_CONVERT(N, DAG)); @@ -2322,12 +2792,14 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { - default: assert(false && "Unexpected instr type to insert"); - case ARM::tMOVCCr: { + default: + llvm_unreachable("Unexpected instr type to insert"); + case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the @@ -2352,12 +2824,15 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. - while(!BB->succ_empty()) + while (!BB->succ_empty()) BB->removeSuccessor(BB->succ_begin()); BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -2381,6 +2856,78 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } + + case ARM::tANDsp: + case ARM::tADDspr_: + case ARM::tSUBspi_: + case ARM::t2SUBrSPi_: + case ARM::t2SUBrSPi12_: + case ARM::t2SUBrSPs_: { + MachineFunction *MF = BB->getParent(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + bool DstIsDead = MI->getOperand(0).isDead(); + bool SrcIsKill = MI->getOperand(1).isKill(); + + if (SrcReg != ARM::SP) { + // Copy the source to SP from virtual register. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); + unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) + ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; + BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + } + + unsigned OpOpc = 0; + bool NeedPred = false, NeedCC = false, NeedOp3 = false; + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unexpected pseudo instruction!"); + case ARM::tANDsp: + OpOpc = ARM::tAND; + NeedPred = true; + break; + case ARM::tADDspr_: + OpOpc = ARM::tADDspr; + break; + case ARM::tSUBspi_: + OpOpc = ARM::tSUBspi; + break; + case ARM::t2SUBrSPi_: + OpOpc = ARM::t2SUBrSPi; + NeedPred = true; NeedCC = true; + break; + case ARM::t2SUBrSPi12_: + OpOpc = ARM::t2SUBrSPi12; + NeedPred = true; + break; + case ARM::t2SUBrSPs_: + OpOpc = ARM::t2SUBrSPs; + NeedPred = true; NeedCC = true; NeedOp3 = true; + break; + } + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); + if (OpOpc == ARM::tAND) + AddDefaultT1CC(MIB); + MIB.addReg(ARM::SP); + MIB.addOperand(MI->getOperand(2)); + if (NeedOp3) + MIB.addOperand(MI->getOperand(3)); + if (NeedPred) + AddDefaultPred(MIB); + if (NeedCC) + AddDefaultCC(MIB); + + // Copy the result from SP to virtual register. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); + unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) + ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; + BuildMI(BB, dl, TII->get(CopyOpc)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(ARM::SP); + MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; + } } } @@ -2393,7 +2940,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); unsigned Opc = N->getOpcode(); bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); @@ -2421,7 +2968,7 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, cast(RHS)->isNullValue()) { std::swap(LHS, RHS); SDValue Op0 = Slct.getOperand(0); - MVT OpVT = isSlctCC ? Op0.getValueType() : + EVT OpVT = isSlctCC ? Op0.getValueType() : Op0.getOperand(0).getValueType(); bool isInt = OpVT.isInteger(); CC = ISD::getSetCCInverse(CC, isInt); @@ -2516,7 +3063,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { /// operand of a vector shift left operation. That value must be in the range: /// 0 <= Value < ElementBits for a left shift; or /// 0 <= Value <= ElementBits for a long left shift. -static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) { +static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); if (! getVShiftImm(Op, ElementBits, Cnt)) @@ -2530,7 +3077,7 @@ static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) { /// absolute value must be in the range: /// 1 <= |Value| <= ElementBits for a right shift; or /// 1 <= |Value| <= ElementBits/2 for a narrow right shift. -static bool isVShiftRImm(SDValue Op, MVT VT, bool isNarrow, bool isIntrinsic, +static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, int64_t &Cnt) { assert(VT.isVector() && "vector shift count is not a vector type"); unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); @@ -2571,7 +3118,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { case Intrinsic::arm_neon_vqrshiftns: case Intrinsic::arm_neon_vqrshiftnu: case Intrinsic::arm_neon_vqrshiftnsu: { - MVT VT = N->getOperand(1).getValueType(); + EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; @@ -2593,8 +3140,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { case Intrinsic::arm_neon_vshiftlu: if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) break; - assert(0 && "invalid shift count for vshll intrinsic"); - abort(); + llvm_unreachable("invalid shift count for vshll intrinsic"); case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: @@ -2611,8 +3157,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { case Intrinsic::arm_neon_vqshiftsu: if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) break; - assert(0 && "invalid shift count for vqshlu intrinsic"); - abort(); + llvm_unreachable("invalid shift count for vqshlu intrinsic"); case Intrinsic::arm_neon_vshiftn: case Intrinsic::arm_neon_vrshiftn: @@ -2625,11 +3170,10 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // Narrowing shifts require an immediate right shift. if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) break; - assert(0 && "invalid shift count for narrowing vector shift intrinsic"); - abort(); + llvm_unreachable("invalid shift count for narrowing vector shift intrinsic"); default: - assert(0 && "unhandled vector shift"); + llvm_unreachable("unhandled vector shift"); } switch (IntNo) { @@ -2678,7 +3222,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { } case Intrinsic::arm_neon_vshiftins: { - MVT VT = N->getOperand(1).getValueType(); + EVT VT = N->getOperand(1).getValueType(); int64_t Cnt; unsigned VShiftOpc = 0; @@ -2687,8 +3231,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) VShiftOpc = ARMISD::VSRI; else { - assert(0 && "invalid shift count for vsli/vsri intrinsic"); - abort(); + llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); } return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), @@ -2712,7 +3255,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { /// their values after they get legalized to loads from a constant pool. static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // Nothing to be done for scalar shifts. if (! VT.isVector()) @@ -2722,7 +3265,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, int64_t Cnt; switch (N->getOpcode()) { - default: assert(0 && "unexpected shift opcode"); + default: llvm_unreachable("unexpected shift opcode"); case ISD::SHL: if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) @@ -2755,8 +3298,8 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue Vec = N0.getOperand(0); SDValue Lane = N0.getOperand(1); - MVT VT = N->getValueType(0); - MVT EltVT = N0.getValueType(); + EVT VT = N->getValueType(0); + EVT EltVT = N0.getValueType(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (VT == MVT::i32 && @@ -2765,7 +3308,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, unsigned Opc = 0; switch (N->getOpcode()) { - default: assert(0 && "unexpected opcode"); + default: llvm_unreachable("unexpected opcode"); case ISD::SIGN_EXTEND: Opc = ARMISD::VGETLANEs; break; @@ -2802,10 +3345,88 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { + if (!Subtarget->hasV6Ops()) + // Pre-v6 does not support unaligned mem access. + return false; + else if (!Subtarget->hasV6Ops()) { + // v6 may or may not support unaligned mem access. + if (!Subtarget->isTargetDarwin()) + return false; + } + + switch (VT.getSimpleVT().SimpleTy) { + default: + return false; + case MVT::i8: + case MVT::i16: + case MVT::i32: + return true; + // FIXME: VLD1 etc with standard alignment is legal. + } +} + +static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { + if (V < 0) + return false; + + unsigned Scale = 1; + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: + // Scale == 1; + break; + case MVT::i16: + // Scale == 2; + Scale = 2; + break; + case MVT::i32: + // Scale == 4; + Scale = 4; + break; + } + + if ((V & (Scale - 1)) != 0) + return false; + V /= Scale; + return V == (V & ((1LL << 5) - 1)); +} + +static bool isLegalT2AddressImmediate(int64_t V, EVT VT, + const ARMSubtarget *Subtarget) { + bool isNeg = false; + if (V < 0) { + isNeg = true; + V = - V; + } + + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + // + imm12 or - imm8 + if (isNeg) + return V == (V & ((1LL << 8) - 1)); + return V == (V & ((1LL << 12) - 1)); + case MVT::f32: + case MVT::f64: + // Same as ARM mode. FIXME: NEON? + if (!Subtarget->hasVFP2()) + return false; + if ((V & 3) != 0) + return false; + V >>= 2; + return V == (V & ((1LL << 8) - 1)); + } +} + /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. -static bool isLegalAddressImmediate(int64_t V, MVT VT, +static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget) { if (V == 0) return true; @@ -2813,36 +3434,15 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT, if (!VT.isSimple()) return false; - if (Subtarget->isThumb()) { - if (V < 0) - return false; - - unsigned Scale = 1; - switch (VT.getSimpleVT()) { - default: return false; - case MVT::i1: - case MVT::i8: - // Scale == 1; - break; - case MVT::i16: - // Scale == 2; - Scale = 2; - break; - case MVT::i32: - // Scale == 4; - Scale = 4; - break; - } - - if ((V & (Scale - 1)) != 0) - return false; - V /= Scale; - return V == (V & ((1LL << 5) - 1)); - } + if (Subtarget->isThumb1Only()) + return isLegalT1AddressImmediate(V, VT); + else if (Subtarget->isThumb2()) + return isLegalT2AddressImmediate(V, VT, Subtarget); + // ARM mode. if (V < 0) V = - V; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: @@ -2854,7 +3454,7 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT, return V == (V & ((1LL << 8) - 1)); case MVT::f32: case MVT::f64: - if (!Subtarget->hasVFP2()) + if (!Subtarget->hasVFP2()) // FIXME: NEON? return false; if ((V & 3) != 0) return false; @@ -2863,11 +3463,44 @@ static bool isLegalAddressImmediate(int64_t V, MVT VT, } } +bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, + EVT VT) const { + int Scale = AM.Scale; + if (Scale < 0) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + if (Scale == 1) + return true; + // r + r << imm + Scale = Scale & ~1; + return Scale == 2 || Scale == 4 || Scale == 8; + case MVT::i64: + // r + r + if (((unsigned)AM.HasBaseReg + Scale) <= 2) + return true; + return false; + case MVT::isVoid: + // Note, we allow "void" uses (basically, uses that aren't loads or + // stores), because arm allows folding a scale into many arithmetic + // operations. This should be made more precise and revisited later. + + // Allow r << imm, but the imm has to be a multiple of two. + if (Scale & 1) return false; + return isPowerOf2_32(Scale); + } +} + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { - MVT VT = getValueType(Ty, true); + EVT VT = getValueType(Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -2879,7 +3512,7 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, case 0: // no scale reg, must be "r+i" or "r", or "i". break; case 1: - if (Subtarget->isThumb()) + if (Subtarget->isThumb1Only()) return false; // FALL THROUGH. default: @@ -2890,22 +3523,22 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, if (!VT.isSimple()) return false; + if (Subtarget->isThumb2()) + return isLegalT2ScaledAddressingMode(AM, VT); + int Scale = AM.Scale; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: - case MVT::i64: - // This assumes i64 is legalized to a pair of i32. If not (i.e. - // ldrd / strd are used, then its address mode is same as i16. - // r + r if (Scale < 0) Scale = -Scale; if (Scale == 1) return true; // r + r << imm return isPowerOf2_32(Scale & ~1); case MVT::i16: + case MVT::i64: // r + r if (((unsigned)AM.HasBaseReg + Scale) <= 2) return true; @@ -2917,15 +3550,15 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, // operations. This should be made more precise and revisited later. // Allow r << imm, but the imm has to be a multiple of two. - if (AM.Scale & 1) return false; - return isPowerOf2_32(AM.Scale); + if (Scale & 1) return false; + return isPowerOf2_32(Scale); } break; } return true; } -static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT, +static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { @@ -2983,7 +3616,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT, return false; } -static bool getT2IndexedAddressParts(SDNode *Ptr, MVT VT, +static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG) { @@ -3019,7 +3652,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (Subtarget->isThumb1Only()) return false; - MVT VT; + EVT VT; SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { @@ -3037,7 +3670,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); - else + else isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) @@ -3058,7 +3691,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, if (Subtarget->isThumb1Only()) return false; - MVT VT; + EVT VT; SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { @@ -3074,7 +3707,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, if (Subtarget->isThumb2()) isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); - else + else isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); if (!isLegal) @@ -3128,12 +3761,12 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'l': - if (Subtarget->isThumb()) + if (Subtarget->isThumb1Only()) return std::make_pair(0U, ARM::tGPRRegisterClass); else return std::make_pair(0U, ARM::GPRRegisterClass); @@ -3152,7 +3785,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, std::vector ARMTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() != 1) return std::vector(); @@ -3214,10 +3847,16 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, switch (Constraint) { case 'I': - if (Subtarget->isThumb()) { - // This must be a constant between 0 and 255, for ADD immediates. + if (Subtarget->isThumb1Only()) { + // This must be a constant between 0 and 255, for ADD + // immediates. if (CVal >= 0 && CVal <= 255) break; + } else if (Subtarget->isThumb2()) { + // A constant that can be used as an immediate value in a + // data-processing instruction. + if (ARM_AM::getT2SOImmVal(CVal) != -1) + break; } else { // A constant that can be used as an immediate value in a // data-processing instruction. @@ -3227,7 +3866,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'J': - if (Subtarget->isThumb()) { + if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a constant between -255 and -1, for negated ADD // immediates. This can be used in GCC with an "n" modifier that // prints the negated value, for use with SUB instructions. It is @@ -3244,13 +3883,21 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'K': - if (Subtarget->isThumb()) { + if (Subtarget->isThumb1Only()) { // A 32-bit value where only one byte has a nonzero value. Exclude // zero to match GCC. This constraint is used by GCC internally for // constants that can be loaded with a move/shift combination. // It is not useful otherwise but is implemented for compatibility. if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) break; + } else if (Subtarget->isThumb2()) { + // A constant whose bitwise inverse can be used as an immediate + // value in a data-processing instruction. This can be used in GCC + // with a "B" modifier that prints the inverted value, for use with + // BIC and MVN instructions. It is not useful otherwise but is + // implemented for compatibility. + if (ARM_AM::getT2SOImmVal(~CVal) != -1) + break; } else { // A constant whose bitwise inverse can be used as an immediate // value in a data-processing instruction. This can be used in GCC @@ -3263,11 +3910,19 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'L': - if (Subtarget->isThumb()) { + if (Subtarget->isThumb1Only()) { // This must be a constant between -7 and 7, // for 3-operand ADD/SUB immediate instructions. if (CVal >= -7 && CVal < 7) break; + } else if (Subtarget->isThumb2()) { + // A constant whose negation can be used as an immediate value in a + // data-processing instruction. This can be used in GCC with an "n" + // modifier that prints the negated value, for use with SUB + // instructions. It is not useful otherwise but is implemented for + // compatibility. + if (ARM_AM::getT2SOImmVal(-CVal) != -1) + break; } else { // A constant whose negation can be used as an immediate value in a // data-processing instruction. This can be used in GCC with an "n" @@ -3280,7 +3935,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'M': - if (Subtarget->isThumb()) { + if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a multiple of 4 between 0 and 1020, for // ADD sp + immediate. if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) @@ -3295,7 +3950,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'N': - if (Subtarget->isThumb()) { + if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a constant between 0 and 31, for shift amounts. if (CVal >= 0 && CVal <= 31) break; @@ -3303,7 +3958,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; case 'O': - if (Subtarget->isThumb()) { + if (Subtarget->isThumb()) { // FIXME thumb2 // This must be a multiple of 4 between -508 and 508, for // ADD/SUB sp = sp + immediate. if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) @@ -3322,3 +3977,9 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, Ops, DAG); } + +bool +ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The ARM target isn't yet aware of offsets. + return false; +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 553a86d077b71..7d85f458d8e9f 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -40,6 +40,7 @@ namespace llvm { tCALL, // Thumb function call. BRCOND, // Conditional branch. BR_JT, // Jumptable branch. + BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). RET_FLAG, // Return with a flag operand. PIC_ADD, // Add with a PC operand and a PIC label. @@ -64,11 +65,13 @@ namespace llvm { FMRRD, // double to two gprs. FMDRR, // Two gprs to double. - EH_SJLJ_SETJMP, // SjLj exception handling setjmp - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp + EH_SJLJ_SETJMP, // SjLj exception handling setjmp. + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. THREAD_POINTER, + DYN_ALLOC, // Dynamic allocation on the stack. + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -112,8 +115,18 @@ namespace llvm { VGETLANEu, // zero-extend vector extract element VGETLANEs, // sign-extend vector extract element - // Vector duplicate lane (128-bit result only; 64-bit is a shuffle) - VDUPLANEQ // splat a lane from a 64-bit vector to a 128-bit vector + // Vector duplicate: + VDUP, + VDUPLANE, + + // Vector shuffles: + VEXT, // extract + VREV64, // reverse elements within 64-bit doublewords + VREV32, // reverse elements within 32-bit words + VREV16, // reverse elements within 16-bit halfwords + VZIP, // zip (interleave) + VUZP, // unzip (deinterleave) + VTRN // transpose }; } @@ -147,11 +160,18 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap*) const; + + /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// unaligned memory accesses. of the specified type. + /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON? + virtual bool allowsUnalignedMemoryAccesses(EVT VT) const; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; + bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address @@ -175,13 +195,15 @@ namespace llvm { APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const; + + ConstraintType getConstraintType(const std::string &Constraint) const; std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -200,21 +222,23 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + bool isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; - /// ARMPCLabelIndex - Keep track the number of ARM PC labels created. + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. /// unsigned ARMPCLabelIndex; - void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); - void addDRTypeForNEON(MVT VT); - void addQRTypeForNEON(MVT VT); + void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT); + void addDRTypeForNEON(EVT VT); + void addQRTypeForNEON(EVT VT); typedef SmallVector, 8> RegsToPassVector; - void PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG, + void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, @@ -224,15 +248,13 @@ namespace llvm { SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, DebugLoc dl); - CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const; - SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, - const SDValue &StackPtr, const CCValAssign &VA, - SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags); - SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); + CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; + SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags); + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG); SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); - SDValue LowerRET(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); @@ -241,9 +263,9 @@ namespace llvm { SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG); SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, @@ -252,6 +274,33 @@ namespace llvm { bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); }; } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 301a6c1a5cca4..3d19f2345d30a 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -54,9 +54,16 @@ def NEONGetLnFrm : Format<25>; def NEONSetLnFrm : Format<26>; def NEONDupFrm : Format<27>; -// Misc flag for data processing instructions that indicates whether +// Misc flags. + // the instruction has a Rn register operand. -class UnaryDP { bit isUnaryDataProc = 1; } +// UnaryDP - Indicates this is a unary data processing instruction, i.e. +// it doesn't have a Rn operand. +class UnaryDP { bit isUnaryDataProc = 1; } + +// Xform16Bit - Indicates this Thumb2 instruction may be transformed into +// a 16-bit Thumb instruction if certain conditions are met. +class Xform16Bit { bit canXformTo16Bit = 1; } //===----------------------------------------------------------------------===// // ARM Instruction flags. These need to match ARMInstrInfo.h. @@ -77,7 +84,7 @@ def AddrModeT1_1 : AddrMode<7>; def AddrModeT1_2 : AddrMode<8>; def AddrModeT1_4 : AddrMode<9>; def AddrModeT1_s : AddrMode<10>; -def AddrModeT2_i12: AddrMode<12>; +def AddrModeT2_i12: AddrMode<11>; def AddrModeT2_i8 : AddrMode<12>; def AddrModeT2_so : AddrMode<13>; def AddrModeT2_pc : AddrMode<14>; @@ -103,11 +110,33 @@ def IndexModePost : IndexMode<2>; //===----------------------------------------------------------------------===// +// ARM special operands. +// + +// ARM Predicate operand. Default to 14 = always (AL). Second part is CC +// register whose default is 0 (no register). +def pred : PredicateOperand { + let PrintMethod = "printPredicateOperand"; +} + +// Conditional code result for instructions whose 's' bit is set, e.g. subs. +def cc_out : OptionalDefOperand { + let PrintMethod = "printSBitModifierOperand"; +} + +// Same as cc_out except it defaults to setting CPSR. +def s_cc_out : OptionalDefOperand { + let PrintMethod = "printSBitModifierOperand"; +} + +//===----------------------------------------------------------------------===// + // ARM Instruction templates. // class InstARM + Format f, string cstr, InstrItinClass itin> : Instruction { field bits<32> Inst; @@ -130,12 +159,15 @@ class InstARM pattern> - : InstARM { +class PseudoInst pattern> + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -144,9 +176,10 @@ class PseudoInst pattern> // Almost all ARM instructions are predicable. class I pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -158,9 +191,10 @@ class I pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); @@ -170,8 +204,9 @@ class sI pattern> - : InstARM { + IndexMode im, Format f, InstrItinClass itin, + string asm, string cstr, list pattern> + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -179,90 +214,93 @@ class XI Predicates = [IsARM]; } -class AI pattern> - : I; -class AsI pattern> + : I; +class AsI pattern> + : sI; +class AXI pattern> - : sI; -class AXI pattern> - : XI; // Ctrl flow instructions -class ABI opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : I { +class ABI opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { let Inst{27-24} = opcod; } -class ABXI opcod, dag oops, dag iops, string asm, list pattern> - : XI { +class ABXI opcod, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : XI { let Inst{27-24} = opcod; } -class ABXIx2 pattern> - : XI; +class ABXIx2 pattern> + : XI; // BR_JT instructions -class JTI pattern> - : XI pattern> + : XI; // addrmode1 instructions -class AI1 opcod, dag oops, dag iops, Format f, string opc, - string asm, list pattern> - : I { +class AI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, list pattern> + : I { let Inst{24-21} = opcod; let Inst{27-26} = {0,0}; } -class AsI1 opcod, dag oops, dag iops, Format f, string opc, - string asm, list pattern> - : sI { +class AsI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, list pattern> + : sI { let Inst{24-21} = opcod; let Inst{27-26} = {0,0}; } -class AXI1 opcod, dag oops, dag iops, Format f, string asm, - list pattern> - : XI { +class AXI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, + string asm, list pattern> + : XI { let Inst{24-21} = opcod; let Inst{27-26} = {0,0}; } -class AI1x2 pattern> - : I; +class AI1x2 pattern> + : I; // addrmode2 loads and stores -class AI2 pattern> - : I { +class AI2 pattern> + : I { let Inst{27-26} = {0,1}; } // loads -class AI2ldw pattern> - : I { +class AI2ldw pattern> + : I { let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit let Inst{27-26} = {0,1}; } -class AXI2ldw pattern> - : XI pattern> + : XI { let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit @@ -270,19 +308,19 @@ class AXI2ldw pattern> - : I { +class AI2ldb pattern> + : I { let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit let Inst{27-26} = {0,1}; } -class AXI2ldb pattern> - : XI pattern> + : XI { let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit @@ -292,19 +330,19 @@ class AXI2ldb pattern> - : I { +class AI2stw pattern> + : I { let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit let Inst{27-26} = {0,1}; } -class AXI2stw pattern> - : XI pattern> + : XI { let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit @@ -312,19 +350,19 @@ class AXI2stw pattern> - : I { +class AI2stb pattern> + : I { let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit let Inst{24} = 1; // P bit let Inst{27-26} = {0,1}; } -class AXI2stb pattern> - : XI pattern> + : XI { let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit @@ -334,20 +372,20 @@ class AXI2stb pattern> - : I { +class AI2ldwpr pattern> + : I { let Inst{20} = 1; // L bit let Inst{21} = 1; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit let Inst{27-26} = {0,1}; } -class AI2ldbpr pattern> - : I { +class AI2ldbpr pattern> + : I { let Inst{20} = 1; // L bit let Inst{21} = 1; // W bit let Inst{22} = 1; // B bit @@ -356,20 +394,20 @@ class AI2ldbpr pattern> - : I { +class AI2stwpr pattern> + : I { let Inst{20} = 0; // L bit let Inst{21} = 1; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit let Inst{27-26} = {0,1}; } -class AI2stbpr pattern> - : I { +class AI2stbpr pattern> + : I { let Inst{20} = 0; // L bit let Inst{21} = 1; // W bit let Inst{22} = 1; // B bit @@ -378,20 +416,20 @@ class AI2stbpr pattern> - : I { +class AI2ldwpo pattern> + : I { let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 0; // P bit let Inst{27-26} = {0,1}; } -class AI2ldbpo pattern> - : I { +class AI2ldbpo pattern> + : I { let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit @@ -400,20 +438,20 @@ class AI2ldbpo pattern> - : I { +class AI2stwpo pattern> + : I { let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 0; // P bit let Inst{27-26} = {0,1}; } -class AI2stbpo pattern> - : I { +class AI2stbpo pattern> + : I { let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit let Inst{22} = 1; // B bit @@ -422,20 +460,20 @@ class AI2stbpo pattern> - : I; -class AXI3 pattern> - : XI; +class AI3 pattern> + : I; +class AXI3 pattern> + : XI; // loads -class AI3ldh pattern> - : I { +class AI3ldh pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit @@ -443,10 +481,11 @@ class AI3ldh pattern> - : XI pattern> + : XI { let Inst{4} = 1; let Inst{5} = 1; // H bit @@ -456,10 +495,10 @@ class AXI3ldh pattern> - : I { +class AI3ldsh pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 1; // S bit @@ -467,10 +506,11 @@ class AI3ldsh pattern> - : XI pattern> + : XI { let Inst{4} = 1; let Inst{5} = 1; // H bit @@ -480,10 +520,10 @@ class AXI3ldsh pattern> - : I { +class AI3ldsb pattern> + : I { let Inst{4} = 1; let Inst{5} = 0; // H bit let Inst{6} = 1; // S bit @@ -491,10 +531,11 @@ class AI3ldsb pattern> - : XI pattern> + : XI { let Inst{4} = 1; let Inst{5} = 0; // H bit @@ -504,10 +545,10 @@ class AXI3ldsb pattern> - : I { +class AI3ldd pattern> + : I { let Inst{4} = 1; let Inst{5} = 0; // H bit let Inst{6} = 1; // S bit @@ -515,13 +556,14 @@ class AI3ldd pattern> - : I { +class AI3sth pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit @@ -529,10 +571,11 @@ class AI3sth pattern> - : XI pattern> + : XI { let Inst{4} = 1; let Inst{5} = 1; // H bit @@ -542,10 +585,10 @@ class AXI3sth pattern> - : I { +class AI3std pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 1; // S bit @@ -553,13 +596,14 @@ class AI3std pattern> - : I { +class AI3ldhpr pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit @@ -567,11 +611,12 @@ class AI3ldhpr pattern> - : I { +class AI3ldshpr pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 1; // S bit @@ -579,11 +624,12 @@ class AI3ldshpr pattern> - : I { +class AI3ldsbpr pattern> + : I { let Inst{4} = 1; let Inst{5} = 0; // H bit let Inst{6} = 1; // S bit @@ -591,13 +637,14 @@ class AI3ldsbpr pattern> - : I { +class AI3sthpr pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit @@ -605,13 +652,14 @@ class AI3sthpr pattern> - : I { +class AI3ldhpo pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit @@ -619,11 +667,12 @@ class AI3ldhpo pattern> - : I { +class AI3ldshpo pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 1; // S bit @@ -631,11 +680,12 @@ class AI3ldshpo pattern> - : I { +class AI3ldsbpo pattern> + : I { let Inst{4} = 1; let Inst{5} = 0; // H bit let Inst{6} = 1; // S bit @@ -643,13 +693,14 @@ class AI3ldsbpo pattern> - : I { +class AI3sthpo pattern> + : I { let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit @@ -657,57 +708,60 @@ class AI3sthpo pattern> - : XI { +class AXI4ld pattern> + : XI { let Inst{20} = 1; // L bit let Inst{22} = 0; // S bit let Inst{27-25} = 0b100; } -class AXI4st pattern> - : XI { +class AXI4st pattern> + : XI { let Inst{20} = 0; // L bit let Inst{22} = 0; // S bit let Inst{27-25} = 0b100; } // Unsigned multiply, multiply-accumulate instructions. -class AMul1I opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : I { +class AMul1I opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { let Inst{7-4} = 0b1001; let Inst{20} = 0; // S bit let Inst{27-21} = opcod; } -class AsMul1I opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : sI { +class AsMul1I opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : sI { let Inst{7-4} = 0b1001; let Inst{27-21} = opcod; } // Most significant word multiply -class AMul2I opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : I { +class AMul2I opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { let Inst{7-4} = 0b1001; let Inst{20} = 1; let Inst{27-21} = opcod; } // SMUL / SMULW / SMLA / SMLAW -class AMulxyI opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : I { +class AMulxyI opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { let Inst{4} = 0; let Inst{7} = 1; let Inst{20} = 0; @@ -715,19 +769,19 @@ class AMulxyI opcod, dag oops, dag iops, string opc, } // Extend instructions. -class AExtI opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : I { +class AExtI opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { let Inst{7-4} = 0b0111; let Inst{27-20} = opcod; } // Misc Arithmetic instructions. -class AMiscA1I opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : I { +class AMiscA1I opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : I { let Inst{27-20} = opcod; } @@ -751,74 +805,120 @@ class ARMV6Pat : Pat { // TI - Thumb instruction. -class ThumbI pattern> - : InstARM { - let OutOperandList = outs; - let InOperandList = ins; +class ThumbI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; let AsmString = asm; let Pattern = pattern; list Predicates = [IsThumb]; } -class TI pattern> - : ThumbI; +class TI pattern> + : ThumbI; -// BL, BLX(1) are translated by assembler into two instructions -class TIx2 pattern> - : ThumbI; - -// BR_JT instructions -class TJTI pattern> - : ThumbI; +// Two-address instructions +class TIt pattern> + : ThumbI; -// TPat - Same as Pat<>, but requires that the compiler be in Thumb mode. -class TPat : Pat { - list Predicates = [IsThumb]; -} +// tBL, tBX instructions +class TIx2 pattern> + : ThumbI; -class Tv5Pat : Pat { - list Predicates = [IsThumb, HasV5T]; -} +// BR_JT instructions +class TJTI pattern> + : ThumbI; // Thumb1 only -class Thumb1I pattern> - : InstARM { - let OutOperandList = outs; - let InOperandList = ins; +class Thumb1I pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; let AsmString = asm; let Pattern = pattern; list Predicates = [IsThumb1Only]; } -class T1I pattern> - : Thumb1I; -class T1I1 pattern> - : Thumb1I; -class T1I2 pattern> - : Thumb1I; -class T1I4 pattern> - : Thumb1I; -class T1Is pattern> - : Thumb1I; -class T1Ix2 pattern> - : Thumb1I; -class T1JTI pattern> - : Thumb1I; +class T1I pattern> + : Thumb1I; +class T1Ix2 pattern> + : Thumb1I; +class T1JTI pattern> + : Thumb1I; // Two-address instructions -class T1It pattern> - : Thumb1I; +class T1It pattern> + : Thumb1I; -class T1Pat : Pat { +// Thumb1 instruction that can either be predicated or set CPSR. +class Thumb1sI pattern> + : InstARM { + let OutOperandList = !con(oops, (ops s_cc_out:$s)); + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); + let Pattern = pattern; + list Predicates = [IsThumb1Only]; +} + +class T1sI pattern> + : Thumb1sI; + +// Two-address instructions +class T1sIt pattern> + : Thumb1sI; + +// Thumb1 instruction that can be predicated. +class Thumb1pI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let Pattern = pattern; list Predicates = [IsThumb1Only]; } +class T1pI pattern> + : Thumb1pI; + +// Two-address instructions +class T1pIt pattern> + : Thumb1pI; + +class T1pI1 pattern> + : Thumb1pI; +class T1pI2 pattern> + : Thumb1pI; +class T1pI4 pattern> + : Thumb1pI; +class T1pIs pattern> + : Thumb1pI; + // Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable. class Thumb2I pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -832,8 +932,9 @@ class Thumb2I pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); @@ -843,8 +944,9 @@ class Thumb2sI pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -852,31 +954,46 @@ class Thumb2XI Predicates = [IsThumb2]; } -class T2I pattern> - : Thumb2I; -class T2Ii12 pattern> - : Thumb2I; -class T2Ii8 pattern> - : Thumb2I; -class T2Iso pattern> - : Thumb2I; -class T2Ipc pattern> - : Thumb2I; -class T2Ii8s4 pattern> - : Thumb2I; +class T2I pattern> + : Thumb2I; +class T2Ii12 pattern> + : Thumb2I; +class T2Ii8 pattern> + : Thumb2I; +class T2Iso pattern> + : Thumb2I; +class T2Ipc pattern> + : Thumb2I; +class T2Ii8s4 pattern> + : Thumb2I; + +class T2sI pattern> + : Thumb2sI; + +class T2XI pattern> + : Thumb2XI; +class T2JTI pattern> + : Thumb2XI; -class T2sI pattern> - : Thumb2sI; +class T2Ix2 pattern> + : Thumb2I; -class T2XI pattern> - : Thumb2XI; -class T2JTI pattern> - : Thumb2XI; // T2Iidxldst - Thumb2 indexed load / store instructions. class T2Iidxldst pattern> - : InstARM { + : InstARM { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -884,6 +1001,15 @@ class T2Iidxldst Predicates = [IsThumb2]; } +// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode. +class Tv5Pat : Pat { + list Predicates = [IsThumb1Only, HasV5T]; +} + +// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode. +class T1Pat : Pat { + list Predicates = [IsThumb1Only]; +} // T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode. class T2Pat : Pat { @@ -896,11 +1022,41 @@ class T2Pat : Pat { // ARM VFP Instruction templates. // +// Almost all VFP instructions are predicable. +class VFPI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat(opc, !strconcat("${p}", asm)); + let Pattern = pattern; + list Predicates = [HasVFP2]; +} + +// Special cases +class VFPXI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = asm; + let Pattern = pattern; + list Predicates = [HasVFP2]; +} + +class VFPAI pattern> + : VFPI; + // ARM VFP addrmode5 loads and stores class ADI5 opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> - : I { + : VFPI { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; @@ -908,9 +1064,10 @@ class ADI5 opcod1, bits<2> opcod2, dag oops, dag iops, } class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> - : I { + : VFPI { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; @@ -918,27 +1075,28 @@ class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, } // Load / store multiple -class AXSI5 pattern> - : XI { +class AXDI5 pattern> + : VFPXI { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-8} = 0b1011; } -class AXDI5 pattern> - : XI { +class AXSI5 pattern> + : VFPXI { // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-8} = 0b1010; } - // Double precision, unary class ADuI opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, - string opc, string asm, list pattern> - : AI { + InstrItinClass itin, string opc, string asm, list pattern> + : VFPAI { let Inst{27-20} = opcod1; let Inst{19-16} = opcod2; let Inst{11-8} = 0b1011; @@ -946,17 +1104,17 @@ class ADuI opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, } // Double precision, binary -class ADbI opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : AI { +class ADbI opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : VFPAI { let Inst{27-20} = opcod; let Inst{11-8} = 0b1011; } // Single precision, unary class ASuI opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, - string opc, string asm, list pattern> - : AI { + InstrItinClass itin, string opc, string asm, list pattern> + : VFPAI { // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding. let Inst{27-20} = opcod1; let Inst{19-16} = opcod2; @@ -964,48 +1122,74 @@ class ASuI opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, let Inst{7-4} = opcod3; } +// Single precision unary, if no NEON +// Same as ASuI except not available if NEON is enabled +class ASuIn opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : ASuI { + list Predicates = [HasVFP2,DontUseNEONForFP]; +} + // Single precision, binary -class ASbI opcod, dag oops, dag iops, string opc, - string asm, list pattern> - : AI { +class ASbI opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : VFPAI { // Bit 22 (D bit) can be changed during instruction encoding. let Inst{27-20} = opcod; let Inst{11-8} = 0b1010; } +// Single precision binary, if no NEON +// Same as ASbI except not available if NEON is enabled +class ASbIn opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : ASbI { + list Predicates = [HasVFP2,DontUseNEONForFP]; +} + // VFP conversion instructions class AVConv1I opcod1, bits<4> opcod2, bits<4> opcod3, - dag oops, dag iops, string opc, string asm, list pattern> - : AI { + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : VFPAI { let Inst{27-20} = opcod1; let Inst{19-16} = opcod2; let Inst{11-8} = opcod3; let Inst{6} = 1; } +// VFP conversion instructions, if no NEON +class AVConv1In opcod1, bits<4> opcod2, bits<4> opcod3, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AVConv1I { + list Predicates = [HasVFP2,DontUseNEONForFP]; +} + class AVConvXI opcod1, bits<4> opcod2, dag oops, dag iops, Format f, - string opc, string asm, list pattern> - : AI { + InstrItinClass itin, + string opc, string asm, list pattern> + : VFPAI { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{4} = 1; } -class AVConv2I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, - string asm, list pattern> - : AVConvXI; +class AVConv2I opcod1, bits<4> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AVConvXI; -class AVConv3I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, - string asm, list pattern> - : AVConvXI; +class AVConv3I opcod1, bits<4> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AVConvXI; -class AVConv4I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, - string asm, list pattern> - : AVConvXI; +class AVConv4I opcod1, bits<4> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AVConvXI; -class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, - string asm, list pattern> - : AVConvXI; +class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list pattern> + : AVConvXI; //===----------------------------------------------------------------------===// @@ -1013,9 +1197,9 @@ class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, string opc, // ARM NEON Instruction templates. // -class NeonI pattern> - : InstARM { +class NeonI pattern> + : InstARM { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -1023,20 +1207,33 @@ class NeonI Predicates = [HasNEON]; } -class NI pattern> - : NeonI { +class NI pattern> + : NeonI { +} + +class NI4 pattern> + : NeonI { +} + +class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list pattern> + : NeonI { + let Inst{31-24} = 0b11110100; } -class NDataI pattern> - : NeonI { +class NDataI pattern> + : NeonI { let Inst{31-25} = 0b1111001; } // NEON "one register and a modified immediate" format. class N1ModImm op21_19, bits<4> op11_8, bit op7, bit op6, bit op5, bit op4, - dag oops, dag iops, string asm, string cstr, list pattern> - : NDataI { + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list pattern> + : NDataI { let Inst{23} = op23; let Inst{21-19} = op21_19; let Inst{11-8} = op11_8; @@ -1049,8 +1246,9 @@ class N1ModImm op21_19, bits<4> op11_8, bit op7, bit op6, // NEON 2 vector register format. class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - dag oops, dag iops, string asm, string cstr, list pattern> - : NDataI { + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list pattern> + : NDataI { let Inst{24-23} = op24_23; let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; @@ -1063,8 +1261,9 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, // NEON 2 vector register with immediate. class N2VImm op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, - dag oops, dag iops, string asm, string cstr, list pattern> - : NDataI { + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list pattern> + : NDataI { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-16} = op21_16; @@ -1076,8 +1275,9 @@ class N2VImm op21_16, bits<4> op11_8, bit op7, // NEON 3 vector register format. class N3V op21_20, bits<4> op11_8, bit op6, bit op4, - dag oops, dag iops, string asm, string cstr, list pattern> - : NDataI { + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list pattern> + : NDataI { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1088,9 +1288,9 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, // NEON VMOVs between scalar and core registers. class NVLaneOp opcod1, bits<4> opcod2, bits<2> opcod3, - dag oops, dag iops, Format f, string opc, string asm, - list pattern> - : AI { + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string asm, list pattern> + : AI { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{6-5} = opcod3; @@ -1098,13 +1298,23 @@ class NVLaneOp opcod1, bits<4> opcod2, bits<2> opcod3, list Predicates = [HasNEON]; } class NVGetLane opcod1, bits<4> opcod2, bits<2> opcod3, - dag oops, dag iops, string opc, string asm, list pattern> - : NVLaneOp; + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : NVLaneOp; class NVSetLane opcod1, bits<4> opcod2, bits<2> opcod3, - dag oops, dag iops, string opc, string asm, list pattern> - : NVLaneOp; + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : NVLaneOp; class NVDup opcod1, bits<4> opcod2, bits<2> opcod3, - dag oops, dag iops, string opc, string asm, list pattern> - : NVLaneOp; + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : NVLaneOp; + +// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON +// for single-precision FP. +class NEONFPPat : Pat { + list Predicates = [HasNEON,UseNEONForFP]; +} diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 443fdc742eab5..4c92891c82bd6 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -21,52 +21,15 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt -EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, - cl::desc("Enable ARM 2-addr to 3-addr conv")); - -static inline -const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { - return MIB.addImm((int64_t)ARMCC::AL).addReg(0); -} - -static inline -const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { - return MIB.addReg(0); -} - -ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI) - : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) { -} - ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { + : RI(*this, STI), Subtarget(STI) { } -void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, - const MachineInstr *Orig) const { - DebugLoc dl = Orig->getDebugLoc(); - if (Orig->getOpcode() == ARM::MOVi2pieces) { - RI.emitLoadConstPool(MBB, I, this, dl, - DestReg, - Orig->getOperand(1).getImm(), - (ARMCC::CondCodes)Orig->getOperand(2).getImm(), - Orig->getOperand(3).getReg()); - return; - } - - MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); - MBB.insert(I, MI); -} - -static unsigned getUnindexedOpcode(unsigned Opc) { +unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { switch (Opc) { default: break; case ARM::LDR_PRE: @@ -94,820 +57,45 @@ static unsigned getUnindexedOpcode(unsigned Opc) { case ARM::STRB_POST: return ARM::STRB; } - return 0; -} - -MachineInstr * -ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, - LiveVariables *LV) const { - if (!EnableARM3Addr) - return NULL; - - MachineInstr *MI = MBBI; - MachineFunction &MF = *MI->getParent()->getParent(); - unsigned TSFlags = MI->getDesc().TSFlags; - bool isPre = false; - switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { - default: return NULL; - case ARMII::IndexModePre: - isPre = true; - break; - case ARMII::IndexModePost: - break; - } - - // Try splitting an indexed load/store to an un-indexed one plus an add/sub - // operation. - unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); - if (MemOpc == 0) - return NULL; - - MachineInstr *UpdateMI = NULL; - MachineInstr *MemMI = NULL; - unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); - const TargetInstrDesc &TID = MI->getDesc(); - unsigned NumOps = TID.getNumOperands(); - bool isLoad = !TID.mayStore(); - const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0); - const MachineOperand &Base = MI->getOperand(2); - const MachineOperand &Offset = MI->getOperand(NumOps-3); - unsigned WBReg = WB.getReg(); - unsigned BaseReg = Base.getReg(); - unsigned OffReg = Offset.getReg(); - unsigned OffImm = MI->getOperand(NumOps-2).getImm(); - ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm(); - switch (AddrMode) { - default: - assert(false && "Unknown indexed op!"); - return NULL; - case ARMII::AddrMode2: { - bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub; - unsigned Amt = ARM_AM::getAM2Offset(OffImm); - if (OffReg == 0) { - int SOImmVal = ARM_AM::getSOImmVal(Amt); - if (SOImmVal == -1) - // Can't encode it in a so_imm operand. This transformation will - // add more than 1 instruction. Abandon! - return NULL; - UpdateMI = BuildMI(MF, MI->getDebugLoc(), - get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) - .addReg(BaseReg).addImm(SOImmVal) - .addImm(Pred).addReg(0).addReg(0); - } else if (Amt != 0) { - ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm); - unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt); - UpdateMI = BuildMI(MF, MI->getDebugLoc(), - get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg) - .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc) - .addImm(Pred).addReg(0).addReg(0); - } else - UpdateMI = BuildMI(MF, MI->getDebugLoc(), - get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) - .addReg(BaseReg).addReg(OffReg) - .addImm(Pred).addReg(0).addReg(0); - break; - } - case ARMII::AddrMode3 : { - bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub; - unsigned Amt = ARM_AM::getAM3Offset(OffImm); - if (OffReg == 0) - // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand. - UpdateMI = BuildMI(MF, MI->getDebugLoc(), - get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) - .addReg(BaseReg).addImm(Amt) - .addImm(Pred).addReg(0).addReg(0); - else - UpdateMI = BuildMI(MF, MI->getDebugLoc(), - get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg) - .addReg(BaseReg).addReg(OffReg) - .addImm(Pred).addReg(0).addReg(0); - break; - } - } - - std::vector NewMIs; - if (isPre) { - if (isLoad) - MemMI = BuildMI(MF, MI->getDebugLoc(), - get(MemOpc), MI->getOperand(0).getReg()) - .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); - else - MemMI = BuildMI(MF, MI->getDebugLoc(), - get(MemOpc)).addReg(MI->getOperand(1).getReg()) - .addReg(WBReg).addReg(0).addImm(0).addImm(Pred); - NewMIs.push_back(MemMI); - NewMIs.push_back(UpdateMI); - } else { - if (isLoad) - MemMI = BuildMI(MF, MI->getDebugLoc(), - get(MemOpc), MI->getOperand(0).getReg()) - .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); - else - MemMI = BuildMI(MF, MI->getDebugLoc(), - get(MemOpc)).addReg(MI->getOperand(1).getReg()) - .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred); - if (WB.isDead()) - UpdateMI->getOperand(0).setIsDead(); - NewMIs.push_back(UpdateMI); - NewMIs.push_back(MemMI); - } - - // Transfer LiveVariables states, kill / dead info. - if (LV) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned Reg = MO.getReg(); - - LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); - if (MO.isDef()) { - MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI; - if (MO.isDead()) - LV->addVirtualRegisterDead(Reg, NewMI); - } - if (MO.isUse() && MO.isKill()) { - for (unsigned j = 0; j < 2; ++j) { - // Look at the two new MI's in reverse order. - MachineInstr *NewMI = NewMIs[j]; - if (!NewMI->readsRegister(Reg)) - continue; - LV->addVirtualRegisterKilled(Reg, NewMI); - if (VI.removeKill(MI)) - VI.Kills.push_back(NewMI); - break; - } - } - } - } - } - - MFI->insert(MBBI, NewMIs[1]); - MFI->insert(MBBI, NewMIs[0]); - return NewMIs[0]; -} - -// Branch analysis. -bool -ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } - if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc || LastOpc == ARM::t2Bcc) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(LastInst->getOperand(1)); - Cond.push_back(LastInst->getOperand(2)); - return false; - } - return true; // Can't handle indirect branch. - } - - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with ARM::B/ARM::tB/ARM::t2B and a - // ARM::Bcc/ARM::tBcc/ARM::t2Bcc, handle it. - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) || - (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB) || - (SecondLastOpc == ARM::t2Bcc && LastOpc == ARM::t2B)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - Cond.push_back(SecondLastInst->getOperand(1)); - Cond.push_back(SecondLastInst->getOperand(2)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB || - SecondLastOpc==ARM::t2B) && - (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } - - // ...likewise if it ends with a branch table followed by an unconditional - // branch. The branch folder can create these, and we must get rid of them for - // correctness of Thumb constant islands. - if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm || - SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr || - SecondLastOpc == ARM::t2BR_JTr || SecondLastOpc==ARM::t2BR_JTm || - SecondLastOpc == ARM::t2BR_JTadd) && - (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; - } - - // Otherwise, can't handle this. - return true; -} - - -unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); - int BOpc = AFI->isThumbFunction() ? - (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B; - int BccOpc = AFI->isThumbFunction() ? - (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; - - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) return 1; - --I; - if (I->getOpcode() != BccOpc) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; -} - -unsigned -ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl = DebugLoc::getUnknownLoc(); - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); - int BOpc = AFI->isThumbFunction() ? - (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B; - int BccOpc = AFI->isThumbFunction() ? - (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; - - // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && - "ARM branch conditions have two components!"); - if (FBB == 0) { - if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB); - else - BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); - return 1; - } - - // Two-way conditional branch. - BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); - BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB); - return 2; + return 0; } -bool -ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { +bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { if (MBB.empty()) return false; switch (MBB.back().getOpcode()) { case ARM::BX_RET: // Return. case ARM::LDM_RET: - case ARM::tBX_RET: - case ARM::tBX_RET_vararg: - case ARM::tPOP_RET: case ARM::B: - case ARM::tB: - case ARM::t2B: // Uncond branch. - case ARM::tBR_JTr: - case ARM::t2BR_JTr: case ARM::BR_JTr: // Jumptable branch. - case ARM::t2BR_JTm: case ARM::BR_JTm: // Jumptable branch through mem. - case ARM::t2BR_JTadd: case ARM::BR_JTadd: // Jumptable branch add to pc. return true; - default: return false; - } -} - -bool ARMBaseInstrInfo:: -ReverseBranchCondition(SmallVectorImpl &Cond) const { - ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm(); - Cond[0].setImm(ARMCC::getOppositeCondition(CC)); - return false; -} - -bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const { - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL; -} - -bool ARMBaseInstrInfo:: -PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const { - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B) { - MI->setDesc(get((Opc == ARM::B) ? ARM::Bcc : - ((Opc == ARM::tB) ? ARM::tBcc : ARM::t2Bcc))); - MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm())); - MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false)); - return true; - } - - int PIdx = MI->findFirstPredOperandIdx(); - if (PIdx != -1) { - MachineOperand &PMO = MI->getOperand(PIdx); - PMO.setImm(Pred[0].getImm()); - MI->getOperand(PIdx+1).setReg(Pred[1].getReg()); - return true; - } - return false; -} - -bool ARMBaseInstrInfo:: -SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const { - if (Pred1.size() > 2 || Pred2.size() > 2) - return false; - - ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm(); - ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm(); - if (CC1 == CC2) - return true; - - switch (CC1) { - default: - return false; - case ARMCC::AL: - return true; - case ARMCC::HS: - return CC2 == ARMCC::HI; - case ARMCC::LS: - return CC2 == ARMCC::LO || CC2 == ARMCC::EQ; - case ARMCC::GE: - return CC2 == ARMCC::GT; - case ARMCC::LE: - return CC2 == ARMCC::LT; - } -} - -bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const { - const TargetInstrDesc &TID = MI->getDesc(); - if (!TID.getImplicitDefs() && !TID.hasOptionalDef()) - return false; - - bool Found = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == ARM::CPSR) { - Pred.push_back(MO); - Found = true; - } - } - - return Found; -} - - -/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing -static unsigned getNumJTEntries(const std::vector &JT, - unsigned JTI) DISABLE_INLINE; -static unsigned getNumJTEntries(const std::vector &JT, - unsigned JTI) { - return JT[JTI].MBBs.size(); -} - -/// GetInstSize - Return the size of the specified MachineInstr. -/// -unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const MachineBasicBlock &MBB = *MI->getParent(); - const MachineFunction *MF = MBB.getParent(); - const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo(); - - // Basic size info comes from the TSFlags field. - const TargetInstrDesc &TID = MI->getDesc(); - unsigned TSFlags = TID.TSFlags; - - switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { - default: { - // If this machine instr is an inline asm, measure it. - if (MI->getOpcode() == ARM::INLINEASM) - return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName()); - if (MI->isLabel()) - return 0; - switch (MI->getOpcode()) { - default: - assert(0 && "Unknown or unset size field for instr!"); - break; - case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: - case TargetInstrInfo::DBG_LABEL: - case TargetInstrInfo::EH_LABEL: - return 0; - } - break; - } - case ARMII::Size8Bytes: return 8; // Arm instruction x 2. - case ARMII::Size4Bytes: return 4; // Arm instruction. - case ARMII::Size2Bytes: return 2; // Thumb instruction. - case ARMII::SizeSpecial: { - switch (MI->getOpcode()) { - case ARM::CONSTPOOL_ENTRY: - // If this machine instr is a constant pool entry, its size is recorded as - // operand #2. - return MI->getOperand(2).getImm(); - case ARM::Int_eh_sjlj_setjmp: return 12; - case ARM::BR_JTr: - case ARM::BR_JTm: - case ARM::BR_JTadd: - case ARM::t2BR_JTr: - case ARM::t2BR_JTm: - case ARM::t2BR_JTadd: - case ARM::tBR_JTr: { - // These are jumptable branches, i.e. a branch followed by an inlined - // jumptable. The size is 4 + 4 * number of entries. - unsigned NumOps = TID.getNumOperands(); - MachineOperand JTOP = - MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2)); - unsigned JTI = JTOP.getIndex(); - const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - const std::vector &JT = MJTI->getJumpTables(); - assert(JTI < JT.size()); - // Thumb instructions are 2 byte aligned, but JT entries are 4 byte - // 4 aligned. The assembler / linker may add 2 byte padding just before - // the JT entries. The size does not include this padding; the - // constant islands pass does separate bookkeeping for it. - // FIXME: If we know the size of the function is less than (1 << 16) *2 - // bytes, we can use 16-bit entries instead. Then there won't be an - // alignment issue. - return getNumJTEntries(JT, JTI) * 4 + - ((MI->getOpcode()==ARM::tBR_JTr) ? 2 : 4); - } - default: - // Otherwise, pseudo-instruction sizes are zero. - return 0; - } - } - } - return 0; // Not reached -} - -/// Return true if the instruction is a register to register move and -/// leave the source and dest operands in the passed parameters. -/// -bool -ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - unsigned oc = MI.getOpcode(); - switch (oc) { default: - return false; - case ARM::FCPYS: - case ARM::FCPYD: - case ARM::VMOVD: - case ARM::VMOVQ: - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - case ARM::MOVr: - assert(MI.getDesc().getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "Invalid ARM MOV instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - } -} - -unsigned -ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case ARM::LDR: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isReg() && - MI->getOperand(3).isImm() && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - case ARM::FLDD: - case ARM::FLDS: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -unsigned -ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case ARM::STR: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isReg() && - MI->getOperand(3).isImm() && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - case ARM::FSTD: - case ARM::FSTS: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } break; } - return 0; -} - -bool -ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (I != MBB.end()) DL = I->getDebugLoc(); - - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == ARM::GPRRegisterClass) - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) - .addReg(SrcReg))); - else if (DestRC == ARM::SPRRegisterClass) - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) - .addReg(SrcReg)); - else if (DestRC == ARM::DPRRegisterClass) - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) - .addReg(SrcReg)); - else if (DestRC == ARM::QPRRegisterClass) - BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); - else - return false; - - return true; -} - -void ARMBaseInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (I != MBB.end()) DL = I->getDebugLoc(); - - if (RC == ARM::GPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addReg(0).addImm(0)); - } else if (RC == ARM::DPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); - } else { - assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); - } -} - -void -ARMBaseInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const{ - DebugLoc DL = DebugLoc::getUnknownLoc(); - unsigned Opc = 0; - if (RC == ARM::GPRRegisterClass) { - Opc = ARM::STR; - } else if (RC == ARM::DPRRegisterClass) { - Opc = ARM::FSTD; - } else { - assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); - Opc = ARM::FSTS; - } - - MachineInstrBuilder MIB = - BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - AddDefaultPred(MIB); - NewMIs.push_back(MIB); - return; -} - -void ARMBaseInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (I != MBB.end()) DL = I->getDebugLoc(); - - if (RC == ARM::GPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) - .addFrameIndex(FI).addReg(0).addImm(0)); - } else if (RC == ARM::DPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) - .addFrameIndex(FI).addImm(0)); - } else { - assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) - .addFrameIndex(FI).addImm(0)); - } -} - -void ARMBaseInstrInfo:: -loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - unsigned Opc = 0; - if (RC == ARM::GPRRegisterClass) { - Opc = ARM::LDR; - } else if (RC == ARM::DPRRegisterClass) { - Opc = ARM::FLDD; - } else { - assert(RC == ARM::SPRRegisterClass && "Unknown regclass!"); - Opc = ARM::FLDS; - } - - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - AddDefaultPred(MIB); - NewMIs.push_back(MIB); - return; + return false; } -MachineInstr *ARMBaseInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - switch (Opc) { - default: break; - case ARM::MOVr: { - if (MI->getOperand(4).getReg() == ARM::CPSR) - // If it is updating CPSR, then it cannot be folded. - break; - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - } - break; - } - case ARM::FCPYS: { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - break; - } - case ARM::FCPYD: { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - break; - } +void ARMInstrInfo:: +reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig) const { + DebugLoc dl = Orig->getDebugLoc(); + if (Orig->getOpcode() == ARM::MOVi2pieces) { + RI.emitLoadConstPool(MBB, I, dl, + DestReg, SubIdx, + Orig->getOperand(1).getImm(), + (ARMCC::CondCodes)Orig->getOperand(2).getImm(), + Orig->getOperand(3).getReg()); + return; } - return NewMI; -} - -MachineInstr* -ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); } -bool -ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const { - if (Ops.size() != 1) return false; - - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: break; - case ARM::MOVr: - // If it is updating CPSR, then it cannot be folded. - return MI->getOperand(4).getReg() != ARM::CPSR; - case ARM::FCPYS: - case ARM::FCPYD: - return true; - - case ARM::VMOVD: - case ARM::VMOVQ: - return false; // FIXME - } - - return false; -} diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 8c8f7883a06ce..c616949e37903 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -15,247 +15,27 @@ #define ARMINSTRUCTIONINFO_H #include "llvm/Target/TargetInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" +#include "ARMSubtarget.h" #include "ARM.h" namespace llvm { class ARMSubtarget; -/// ARMII - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace ARMII { - enum { - //===------------------------------------------------------------------===// - // Instruction Flags. - - //===------------------------------------------------------------------===// - // This four-bit field describes the addressing mode used. - - AddrModeMask = 0xf, - AddrModeNone = 0, - AddrMode1 = 1, - AddrMode2 = 2, - AddrMode3 = 3, - AddrMode4 = 4, - AddrMode5 = 5, - AddrMode6 = 6, - AddrModeT1_1 = 7, - AddrModeT1_2 = 8, - AddrModeT1_4 = 9, - AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data - AddrModeT2_i12 = 11, - AddrModeT2_i8 = 12, - AddrModeT2_so = 13, - AddrModeT2_pc = 14, // +/- i12 for pc relative data - AddrModeT2_i8s4 = 15, // i8 * 4 - - // Size* - Flags to keep track of the size of an instruction. - SizeShift = 4, - SizeMask = 7 << SizeShift, - SizeSpecial = 1, // 0 byte pseudo or special case. - Size8Bytes = 2, - Size4Bytes = 3, - Size2Bytes = 4, - - // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load - // and store ops - IndexModeShift = 7, - IndexModeMask = 3 << IndexModeShift, - IndexModePre = 1, - IndexModePost = 2, - - //===------------------------------------------------------------------===// - // Misc flags. - - // UnaryDP - Indicates this is a unary data processing instruction, i.e. - // it doesn't have a Rn operand. - UnaryDP = 1 << 9, - - //===------------------------------------------------------------------===// - // Instruction encoding formats. - // - FormShift = 10, - FormMask = 0x1f << FormShift, - - // Pseudo instructions - Pseudo = 0 << FormShift, - - // Multiply instructions - MulFrm = 1 << FormShift, - - // Branch instructions - BrFrm = 2 << FormShift, - BrMiscFrm = 3 << FormShift, - - // Data Processing instructions - DPFrm = 4 << FormShift, - DPSoRegFrm = 5 << FormShift, - - // Load and Store - LdFrm = 6 << FormShift, - StFrm = 7 << FormShift, - LdMiscFrm = 8 << FormShift, - StMiscFrm = 9 << FormShift, - LdStMulFrm = 10 << FormShift, - - // Miscellaneous arithmetic instructions - ArithMiscFrm = 11 << FormShift, - - // Extend instructions - ExtFrm = 12 << FormShift, - - // VFP formats - VFPUnaryFrm = 13 << FormShift, - VFPBinaryFrm = 14 << FormShift, - VFPConv1Frm = 15 << FormShift, - VFPConv2Frm = 16 << FormShift, - VFPConv3Frm = 17 << FormShift, - VFPConv4Frm = 18 << FormShift, - VFPConv5Frm = 19 << FormShift, - VFPLdStFrm = 20 << FormShift, - VFPLdStMulFrm = 21 << FormShift, - VFPMiscFrm = 22 << FormShift, - - // Thumb format - ThumbFrm = 23 << FormShift, - - // NEON format - NEONFrm = 24 << FormShift, - NEONGetLnFrm = 25 << FormShift, - NEONSetLnFrm = 26 << FormShift, - NEONDupFrm = 27 << FormShift, - - //===------------------------------------------------------------------===// - // Field shifts - such shifts are used to set field while generating - // machine instructions. - M_BitShift = 5, - ShiftImmShift = 5, - ShiftShift = 7, - N_BitShift = 7, - ImmHiShift = 8, - SoRotImmShift = 8, - RegRsShift = 8, - ExtRotImmShift = 10, - RegRdLoShift = 12, - RegRdShift = 12, - RegRdHiShift = 16, - RegRnShift = 16, - S_BitShift = 20, - W_BitShift = 21, - AM3_I_BitShift = 22, - D_BitShift = 22, - U_BitShift = 23, - P_BitShift = 24, - I_BitShift = 25, - CondShift = 28 - }; -} - -class ARMBaseInstrInfo : public TargetInstrInfoImpl { -protected: - // Can be only subclassed. - explicit ARMBaseInstrInfo(const ARMSubtarget &STI); -public: - virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI, - LiveVariables *LV) const; - - virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; - - // Branch analysis. - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; - - virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; - virtual - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - - // Predication support. - virtual bool isPredicated(const MachineInstr *MI) const; - - ARMCC::CondCodes getPredicate(const MachineInstr *MI) const { - int PIdx = MI->findFirstPredOperandIdx(); - return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm() - : ARMCC::AL; - } - - virtual - bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const; - - virtual - bool SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const; - - virtual bool DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const; - - /// GetInstSize - Returns the size of the specified MachineInstr. - /// - virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; - - /// Return true if the instruction is a register to register move and return - /// the source and dest operands and their sub-register indices by reference. - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; - - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; - - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const; -}; - class ARMInstrInfo : public ARMBaseInstrInfo { ARMRegisterInfo RI; + const ARMSubtarget &Subtarget; public: explicit ARMInstrInfo(const ARMSubtarget &STI); + // Return the non-pre/post incrementing version of 'Opc'. Return 0 + // if there is not such an opcode. + unsigned getUnindexedOpcode(unsigned Opc) const; + + // Return true if the block does not fall through. + bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). @@ -263,7 +43,8 @@ public: const ARMRegisterInfo &getRegisterInfo() const { return RI; } void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, const MachineInstr *Orig) const; + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig) const; }; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 408f47a6e1065..8adfac3fb4c5f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -34,6 +34,10 @@ def SDT_ARMBrJT : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_ARMBr2JT : SDTypeProfile<0, 4, + [SDTCisPtrTy<0>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, @@ -71,6 +75,8 @@ def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, [SDNPHasChain]>; +def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, + [SDNPHasChain]>; def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, [SDNPOutFlag]>; @@ -93,10 +99,14 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>; def HasV5T : Predicate<"Subtarget->hasV5TOps()">; def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">; def HasV6 : Predicate<"Subtarget->hasV6Ops()">; +def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; +def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; +def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">; +def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">; def IsThumb : Predicate<"Subtarget->isThumb()">; def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">; def IsThumb2 : Predicate<"Subtarget->isThumb2()">; @@ -117,25 +127,16 @@ class RegConstraint { // ARM specific transformation functions and pattern fragments. // -// so_imm_XFORM - Return a so_imm value packed into the format described for -// so_imm def below. -def so_imm_XFORM : SDNodeXFormgetTargetConstant(ARM_AM::getSOImmVal(N->getZExtValue()), - MVT::i32); -}]>; - // so_imm_neg_XFORM - Return a so_imm value packed into the format described for // so_imm_neg def below. def so_imm_neg_XFORM : SDNodeXFormgetTargetConstant(ARM_AM::getSOImmVal(-(int)N->getZExtValue()), - MVT::i32); + return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); }]>; // so_imm_not_XFORM - Return a so_imm value packed into the format described for // so_imm_not def below. def so_imm_not_XFORM : SDNodeXFormgetTargetConstant(ARM_AM::getSOImmVal(~(int)N->getZExtValue()), - MVT::i32); + return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32); }]>; // rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24. @@ -169,6 +170,48 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; }]>; +/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield +/// e.g., 0xf000ffff +def bf_inv_mask_imm : Operand, + PatLeaf<(imm), [{ + uint32_t v = (uint32_t)N->getZExtValue(); + if (v == 0xffffffff) + return 0; + // there can be 1's on either or both "outsides", all the "inside" + // bits must be 0's + unsigned int lsb = 0, msb = 31; + while (v & (1 << msb)) --msb; + while (v & (1 << lsb)) ++lsb; + for (unsigned int i = lsb; i <= msb; ++i) { + if (v & (1 << i)) + return 0; + } + return 1; +}] > { + let PrintMethod = "printBitfieldInvMaskImmOperand"; +} + +/// Split a 32-bit immediate into two 16 bit parts. +def lo16 : SDNodeXFormgetTargetConstant((uint32_t)N->getZExtValue() & 0xffff, + MVT::i32); +}]>; + +def hi16 : SDNodeXFormgetTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32); +}]>; + +def lo16AllZero : PatLeaf<(i32 imm), [{ + // Returns true if all low 16-bits are 0. + return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; + }], hi16>; + +/// imm0_65535 predicate - True if the 32-bit immediate is in the range +/// [0.65535]. +def imm0_65535 : PatLeaf<(i32 imm), [{ + return (uint32_t)N->getZExtValue() < 65536; +}]>; + class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; class UnOpFrag : PatFrag<(ops node:$Src), res>; @@ -192,6 +235,9 @@ def cpinst_operand : Operand { def jtblock_operand : Operand { let PrintMethod = "printJTBlockOperand"; } +def jt2block_operand : Operand { + let PrintMethod = "printJT2BlockOperand"; +} // Local PC labels. def pclabel : Operand { @@ -212,9 +258,9 @@ def so_reg : Operand, // reg reg imm // into so_imm instructions: the 8-bit immediate is the least significant bits // [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11]. def so_imm : Operand, - PatLeaf<(imm), - [{ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; }], - so_imm_XFORM> { + PatLeaf<(imm), [{ + return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; + }]> { let PrintMethod = "printSOImmOperand"; } @@ -230,14 +276,18 @@ def so_imm2part : Operand, def so_imm2part_1 : SDNodeXFormgetZExtValue()); - return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32); + return CurDAG->getTargetConstant(V, MVT::i32); }]>; def so_imm2part_2 : SDNodeXFormgetZExtValue()); - return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32); + return CurDAG->getTargetConstant(V, MVT::i32); }]>; +/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. +def imm0_31 : Operand, PatLeaf<(imm), [{ + return (int32_t)N->getZExtValue() < 32; +}]>; // Define ARM specific addressing modes. @@ -274,7 +324,7 @@ def am3offset : Operand, // addrmode4 := reg, // def addrmode4 : Operand, - ComplexPattern { + ComplexPattern { let PrintMethod = "printAddrMode4Operand"; let MIOperandInfo = (ops GPR, i32imm); } @@ -303,17 +353,8 @@ def addrmodepc : Operand, let MIOperandInfo = (ops GPR, i32imm); } -// ARM Predicate operand. Default to 14 = always (AL). Second part is CC -// register whose default is 0 (no register). -def pred : PredicateOperand { - let PrintMethod = "printPredicateOperand"; -} - -// Conditional code result for instructions whose 's' bit is set, e.g. subs. -// -def cc_out : OptionalDefOperand { - let PrintMethod = "printSBitModifierOperand"; +def nohash_imm : Operand { + let PrintMethod = "printNoHashImmediate"; } //===----------------------------------------------------------------------===// @@ -329,34 +370,44 @@ include "ARMInstrFormats.td" multiclass AsI1_bin_irs opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AsI1; + IIC_iALUi, opc, " $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { + let Inst{25} = 1; + } def rr : AsI1 { + let Inst{25} = 0; let isCommutable = Commutable; } def rs : AsI1; + IIC_iALUsr, opc, " $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { + let Inst{25} = 0; + } } /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the -/// instruction modifies the CSPR register. +/// instruction modifies the CPSR register. let Defs = [CPSR] in { multiclass AI1_bin_s_irs opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1; + IIC_iALUi, opc, "s $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { + let Inst{25} = 1; + } def rr : AI1 { let isCommutable = Commutable; + let Inst{25} = 0; } def rs : AI1; + IIC_iALUsr, opc, "s $dst, $a, $b", + [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { + let Inst{25} = 0; + } } } @@ -366,17 +417,25 @@ multiclass AI1_bin_s_irs opcod, string opc, PatFrag opnode, let Defs = [CPSR] in { multiclass AI1_cmp_irs opcod, string opc, PatFrag opnode, bit Commutable = 0> { - def ri : AI1; - def rr : AI1 { + let Inst{20} = 1; + let Inst{25} = 1; + } + def rr : AI1 { + let Inst{20} = 1; + let Inst{25} = 0; let isCommutable = Commutable; } - def rs : AI1; + [(opnode GPR:$a, so_reg:$b)]> { + let Inst{20} = 1; + let Inst{25} = 0; + } } } @@ -384,15 +443,15 @@ multiclass AI1_cmp_irs opcod, string opc, PatFrag opnode, /// register and one whose operand is a register rotated by 8/16/24. /// FIXME: Remove the 'r' variant. Its rot_imm is zero. multiclass AI_unary_rrot opcod, string opc, PatFrag opnode> { - def r : AExtI, + def r : AExtI, Requires<[IsARM, HasV6]> { let Inst{19-16} = 0b1111; } - def r_rot : AExtI, + def r_rot : AExtI, Requires<[IsARM, HasV6]> { let Inst{19-16} = 0b1111; } @@ -402,11 +461,11 @@ multiclass AI_unary_rrot opcod, string opc, PatFrag opnode> { /// register and one whose operand is a register rotated by 8/16/24. multiclass AI_bin_rrot opcod, string opc, PatFrag opnode> { def rr : AExtI, Requires<[IsARM, HasV6]>; def rr_rot : AExtI, Requires<[IsARM, HasV6]>; @@ -417,37 +476,45 @@ let Uses = [CPSR] in { multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AsI1, - Requires<[IsARM, CarryDefIsUnused]>; + Requires<[IsARM, CarryDefIsUnused]> { + let Inst{25} = 1; + } def rr : AsI1, Requires<[IsARM, CarryDefIsUnused]> { let isCommutable = Commutable; + let Inst{25} = 0; } def rs : AsI1, - Requires<[IsARM, CarryDefIsUnused]>; + Requires<[IsARM, CarryDefIsUnused]> { + let Inst{25} = 0; + } // Carry setting variants def Sri : AXI1, Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + let Defs = [CPSR]; + let Inst{25} = 1; } def Srr : AXI1, Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + let Defs = [CPSR]; + let Inst{25} = 0; } def Srs : AXI1, Requires<[IsARM, CarryDefIsUsed]> { - let Defs = [CPSR]; + let Defs = [CPSR]; + let Inst{25} = 0; } } } @@ -467,23 +534,23 @@ multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, let neverHasSideEffects = 1, isNotDuplicable = 1 in def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, - i32imm:$size), + i32imm:$size), NoItinerary, "${instid:label} ${cpidx:cpentry}", []>; let Defs = [SP], Uses = [SP] in { def ADJCALLSTACKUP : -PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), +PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, "@ ADJCALLSTACKUP $amt1", [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKDOWN : -PseudoInst<(outs), (ins i32imm:$amt, pred:$p), +PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, "@ ADJCALLSTACKDOWN $amt", [(ARMcallseq_start timm:$amt)]>; } def DWARF_LOC : -PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), +PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), NoItinerary, ".loc $file, $line, $col", [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; @@ -491,42 +558,42 @@ PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), - Pseudo, "$cp:\n\tadd$p $dst, pc, $a", + Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p $dst, pc, $a", [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; let AddedComplexity = 10 in { let canFoldAsLoad = 1 in def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, "${addr:label}:\n\tldr$p $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p $dst, $addr", [(set GPR:$dst, (load addrmodepc:$addr))]>; def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, "${addr:label}:\n\tldr${p}h $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h $dst, $addr", [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>; def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, "${addr:label}:\n\tldr${p}b $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b $dst, $addr", [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>; def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, "${addr:label}:\n\tldr${p}sh $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh $dst, $addr", [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>; def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, "${addr:label}:\n\tldr${p}sb $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb $dst, $addr", [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>; } let AddedComplexity = 10 in { def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, "${addr:label}:\n\tstr$p $src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p $src, $addr", [(store GPR:$src, addrmodepc:$addr)]>; def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, "${addr:label}:\n\tstr${p}h $src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h $src, $addr", [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, "${addr:label}:\n\tstr${p}b $src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b $src, $addr", [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; } } // isNotDuplicable = 1 @@ -534,135 +601,152 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), // LEApcrel - Load a pc-relative address into a register without offending the // assembler. -def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, - !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p $dst, pc, #PCRELV${:uid}")), +def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), + Pseudo, IIC_iALUi, + !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p $dst, pc, #${:private}PCRELV${:uid}")), []>; def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), - (ins i32imm:$label, i32imm:$id, pred:$p), - Pseudo, - !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p $dst, pc, #PCRELV${:uid}")), - []>; + (ins i32imm:$label, nohash_imm:$id, pred:$p), + Pseudo, IIC_iALUi, + !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, " + "(${label}_${id}-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p $dst, pc, #${:private}PCRELV${:uid}")), + []> { + let Inst{25} = 1; +} //===----------------------------------------------------------------------===// // Control Flow Instructions. // -let isReturn = 1, isTerminator = 1 in - def BX_RET : AI<(outs), (ins), BrMiscFrm, "bx", " lr", [(ARMretflag)]> { +let isReturn = 1, isTerminator = 1, isBarrier = 1 in + def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, + "bx", " lr", [(ARMretflag)]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; } // FIXME: remove when we have a way to marking a MI with these properties. -// FIXME: $dst1 should be a def. But the extra ops must be in the end of the -// operand list. // FIXME: Should pc be an implicit operand like PICADD, etc? -let isReturn = 1, isTerminator = 1 in +let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, + hasExtraDefRegAllocReq = 1 in def LDM_RET : AXI4ld<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops), - LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", + (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + LdStMulFrm, IIC_Br, "ldm${p}${addr:submode} $addr, $wb", []>; // On non-Darwin platforms R9 is callee-saved. -let isCall = 1, Itinerary = IIC_Br, - Defs = [R0, R1, R2, R3, R12, LR, - D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in { +let isCall = 1, + Defs = [R0, R1, R2, R3, R12, LR, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), - "bl ${func:call}", - [(ARMcall tglobaladdr:$func)]>, Requires<[IsNotDarwin]>; + IIC_Br, "bl ${func:call}", + [(ARMcall tglobaladdr:$func)]>, + Requires<[IsARM, IsNotDarwin]>; def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), - "bl", " ${func:call}", - [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsNotDarwin]>; + IIC_Br, "bl", " ${func:call}", + [(ARMcall_pred tglobaladdr:$func)]>, + Requires<[IsARM, IsNotDarwin]>; // ARMv5T and above def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, - "blx $func", - [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsNotDarwin]> { + IIC_Br, "blx $func", + [(ARMcall GPR:$func)]>, + Requires<[IsARM, HasV5T, IsNotDarwin]> { let Inst{7-4} = 0b0011; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; } - let Uses = [LR] in { - // ARMv4T - def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops), - "mov lr, pc\n\tbx $func", - [(ARMcall_nolink GPR:$func)]>, Requires<[IsNotDarwin]>; + // ARMv4T + def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops), + IIC_Br, "mov lr, pc\n\tbx $func", + [(ARMcall_nolink GPR:$func)]>, + Requires<[IsARM, IsNotDarwin]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; } } // On Darwin R9 is call-clobbered. -let isCall = 1, Itinerary = IIC_Br, - Defs = [R0, R1, R2, R3, R9, R12, LR, - D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in { +let isCall = 1, + Defs = [R0, R1, R2, R3, R9, R12, LR, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { def BLr9 : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), - "bl ${func:call}", - [(ARMcall tglobaladdr:$func)]>, Requires<[IsDarwin]>; + IIC_Br, "bl ${func:call}", + [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>; def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), - "bl", " ${func:call}", - [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsDarwin]>; + IIC_Br, "bl", " ${func:call}", + [(ARMcall_pred tglobaladdr:$func)]>, + Requires<[IsARM, IsDarwin]>; // ARMv5T and above def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, - "blx $func", + IIC_Br, "blx $func", [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> { let Inst{7-4} = 0b0011; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; } - let Uses = [LR] in { - // ARMv4T - def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops), - "mov lr, pc\n\tbx $func", - [(ARMcall_nolink GPR:$func)]>, Requires<[IsDarwin]>; + // ARMv4T + def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops), + IIC_Br, "mov lr, pc\n\tbx $func", + [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; } } -let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { +let isBranch = 1, isTerminator = 1 in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { let isPredicable = 1 in - def B : ABXI<0b1010, (outs), (ins brtarget:$target), "b $target", - [(br bb:$target)]>; + def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br, + "b $target", [(br bb:$target)]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), - "mov pc, $target \n$jt", + IIC_Br, "mov pc, $target \n$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { let Inst{20} = 0; // S Bit let Inst{24-21} = 0b1101; - let Inst{27-26} = {0,0}; + let Inst{27-25} = 0b000; } def BR_JTm : JTI<(outs), (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id), - "ldr pc, $target \n$jt", - [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, - imm:$id)]> { + IIC_Br, "ldr pc, $target \n$jt", + [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, + imm:$id)]> { let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit let Inst{24} = 1; // P bit - let Inst{27-26} = {0,1}; + let Inst{27-25} = 0b011; } def BR_JTadd : JTI<(outs), (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), - "add pc, $target, $idx \n$jt", + IIC_Br, "add pc, $target, $idx \n$jt", [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]> { let Inst{20} = 0; // S bit let Inst{24-21} = 0b0100; - let Inst{27-26} = {0,0}; + let Inst{27-25} = 0b000; } } // isNotDuplicable = 1, isIndirectBranch = 1 } // isBarrier = 1 @@ -670,7 +754,7 @@ let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( def Bcc : ABI<0b1010, (outs), (ins brtarget:$target), - "b", " $target", + IIC_Br, "b", " $target", [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>; } @@ -679,133 +763,141 @@ let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { // // Load -let canFoldAsLoad = 1 in -def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, +let canFoldAsLoad = 1, isReMaterializable = 1 in +def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", " $dst, $addr", [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in -def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, +def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", " $dst, $addr", []>; // Loads with zero extension def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - "ldr", "h $dst, $addr", - [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; + IIC_iLoadr, "ldr", "h $dst, $addr", + [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; -def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, - "ldr", "b $dst, $addr", - [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; +def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, + IIC_iLoadr, "ldr", "b $dst, $addr", + [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; // Loads with sign extension def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - "ldr", "sh $dst, $addr", - [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; + IIC_iLoadr, "ldr", "sh $dst, $addr", + [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - "ldr", "sb $dst, $addr", - [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; + IIC_iLoadr, "ldr", "sb $dst, $addr", + [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1 in { +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>; + IIC_iLoadr, "ldr", "d $dst1, $addr", + []>, Requires<[IsARM, HasV5TE]>; // Indexed loads def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode2:$addr), LdFrm, + (ins addrmode2:$addr), LdFrm, IIC_iLoadru, "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>; def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), LdFrm, + (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>; def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>; def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>; def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode2:$addr), LdFrm, + (ins addrmode2:$addr), LdFrm, IIC_iLoadru, "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>; def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am2offset:$offset), LdFrm, + (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>; def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>; def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>; def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), - (ins addrmode3:$addr), LdMiscFrm, + (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>; def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am3offset:$offset), LdMiscFrm, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>; } // Store -def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, +def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, "str", " $src, $addr", [(store GPR:$src, addrmode2:$addr)]>; // Stores with truncate -def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, +def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer, "str", "h $src, $addr", [(truncstorei16 GPR:$src, addrmode3:$addr)]>; -def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, +def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, "str", "b $src, $addr", [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword -let mayStore = 1 in -def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm, - "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>; +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), + StMiscFrm, IIC_iStorer, + "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm, + (ins GPR:$src, GPR:$base, am2offset:$offset), + StFrm, IIC_iStoreru, "str", " $src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STR_POST : AI2stwpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, "str", " $src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, + (ins GPR:$src, GPR:$base,am3offset:$offset), + StMiscFrm, IIC_iStoreru, "str", "h $src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; def STRH_POST: AI3sthpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, + (ins GPR:$src, GPR:$base,am3offset:$offset), + StMiscFrm, IIC_iStoreru, "str", "h $src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, am3offset:$offset))]>; def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, "str", "b $src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; def STRB_POST: AI2stbpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, + (ins GPR:$src, GPR:$base,am2offset:$offset), + StFrm, IIC_iStoreru, "str", "b $src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; @@ -814,17 +906,16 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb), // Load / store multiple Instructions. // -// FIXME: $dst1 should be a def. -let mayLoad = 1 in +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM : AXI4ld<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops), - LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1", + (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode} $addr, $wb", []>; -let mayStore = 1 in +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STM : AXI4st<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops), - LdStMulFrm, "stm${p}${addr:submode} $addr, $src1", + (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode} $addr, $wb", []>; //===----------------------------------------------------------------------===// @@ -832,16 +923,42 @@ def STM : AXI4st<(outs), // let neverHasSideEffects = 1 in -def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, - "mov", " $dst, $src", []>, UnaryDP; -def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, - "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP; +def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, + "mov", " $dst, $src", []>, UnaryDP; +def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), + DPSoRegFrm, IIC_iMOVsr, + "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP; let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, - "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP; +def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi, + "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP { + let Inst{25} = 1; +} -def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), + DPFrm, IIC_iMOVi, + "movw", " $dst, $src", + [(set GPR:$dst, imm0_65535:$src)]>, + Requires<[IsARM, HasV6T2]> { + let Inst{20} = 0; + let Inst{25} = 1; +} + +let Constraints = "$src = $dst" in +def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), + DPFrm, IIC_iMOVi, + "movt", " $dst, $imm", + [(set GPR:$dst, + (or (and GPR:$src, 0xffff), + lo16AllZero:$imm))]>, UnaryDP, + Requires<[IsARM, HasV6T2]> { + let Inst{20} = 0; + let Inst{25} = 1; +} + +let Uses = [CPSR] in +def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, "mov", " $dst, $src, rrx", [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP; @@ -849,11 +966,11 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, // due to flag operands. let Defs = [CPSR] in { -def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - "mov", "s $dst, $src, lsr #1", +def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, + IIC_iMOVsi, "mov", "s $dst, $src, lsr #1", [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - "mov", "s $dst, $src, asr #1", + IIC_iMOVsi, "mov", "s $dst, $src, asr #1", [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP; } @@ -901,6 +1018,24 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah", // TODO: UXT(A){B|H}16 +def SBFX : I<(outs GPR:$dst), + (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi, + "sbfx", " $dst, $src, $lsb, $width", "", []>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-21} = 0b0111101; + let Inst{6-4} = 0b101; +} + +def UBFX : I<(outs GPR:$dst), + (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi, + "ubfx", " $dst, $src, $lsb, $width", "", []>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-21} = 0b0111111; + let Inst{6-4} = 0b101; +} + //===----------------------------------------------------------------------===// // Arithmetic Instructions. // @@ -923,30 +1058,36 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc", // These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - "rsb", " $dst, $a, $b", - [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]>; + IIC_iALUi, "rsb", " $dst, $a, $b", + [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { + let Inst{25} = 1; +} def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - "rsb", " $dst, $a, $b", + IIC_iALUsr, "rsb", " $dst, $a, $b", [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>; // RSB with 's' bit set. let Defs = [CPSR] in { def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - "rsb", "s $dst, $a, $b", - [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]>; + IIC_iALUi, "rsb", "s $dst, $a, $b", + [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> { + let Inst{25} = 1; +} def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - "rsb", "s $dst, $a, $b", + IIC_iALUsr, "rsb", "s $dst, $a, $b", [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>; } let Uses = [CPSR] in { def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, "rsc", " $dst, $a, $b", + DPFrm, IIC_iALUi, "rsc", " $dst, $a, $b", [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]>; + Requires<[IsARM, CarryDefIsUnused]> { + let Inst{25} = 1; +} def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, "rsc", " $dst, $a, $b", + DPSoRegFrm, IIC_iALUsr, "rsc", " $dst, $a, $b", [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, Requires<[IsARM, CarryDefIsUnused]>; } @@ -954,11 +1095,13 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), // FIXME: Allow these to be predicated. let Defs = [CPSR], Uses = [CPSR] in { def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, "rscs $dst, $a, $b", + DPFrm, IIC_iALUi, "rscs $dst, $a, $b", [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]>; + Requires<[IsARM, CarryDefIsUnused]> { + let Inst{25} = 1; +} def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, "rscs $dst, $a, $b", + DPSoRegFrm, IIC_iALUsr, "rscs $dst, $a, $b", [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, Requires<[IsARM, CarryDefIsUnused]>; } @@ -992,16 +1135,27 @@ defm EOR : AsI1_bin_irs<0b0001, "eor", defm BIC : AsI1_bin_irs<0b1110, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; -def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, +def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi, + "bfc", " $dst, $imm", "$src = $dst", + [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>, + Requires<[IsARM, HasV6T2]> { + let Inst{27-21} = 0b0111110; + let Inst{6-0} = 0b0011111; +} + +def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mvn", " $dst, $src", [(set GPR:$dst, (not GPR:$src))]>, UnaryDP; def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, - "mvn", " $dst, $src", + IIC_iMOVsr, "mvn", " $dst, $src", [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP; let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, - "mvn", " $dst, $imm", - [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP; +def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, + IIC_iMOVi, "mvn", " $dst, $imm", + [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP { + let Inst{25} = 1; +} def : ARMPat<(and GPR:$src, so_imm_not:$imm), (BICri GPR:$src, so_imm_not:$imm)>; @@ -1012,43 +1166,48 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm), let isCommutable = 1 in def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - "mul", " $dst, $a, $b", + IIC_iMUL32, "mul", " $dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - "mla", " $dst, $a, $b, $c", + IIC_iMAC32, "mla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; +def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), + IIC_iMAC32, "mls", " $dst, $a, $b, $c", + [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, + Requires<[IsARM, HasV6T2]>; + // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), + (ins GPR:$a, GPR:$b), IIC_iMUL64, "smull", " $ldst, $hdst, $a, $b", []>; def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), + (ins GPR:$a, GPR:$b), IIC_iMUL64, "umull", " $ldst, $hdst, $a, $b", []>; } // Multiply + accumulate def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), + (ins GPR:$a, GPR:$b), IIC_iMAC64, "smlal", " $ldst, $hdst, $a, $b", []>; def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), + (ins GPR:$a, GPR:$b), IIC_iMAC64, "umlal", " $ldst, $hdst, $a, $b", []>; def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), + (ins GPR:$a, GPR:$b), IIC_iMAC64, "umaal", " $ldst, $hdst, $a, $b", []>, Requires<[IsARM, HasV6]>; } // neverHasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - "smmul", " $dst, $a, $b", + IIC_iMUL32, "smmul", " $dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1056,7 +1215,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), } def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - "smmla", " $dst, $a, $b, $c", + IIC_iMAC32, "smmla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1064,7 +1223,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - "smmls", " $dst, $a, $b, $c", + IIC_iMAC32, "smmls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b1101; @@ -1072,7 +1231,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), multiclass AI_smul { def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - !strconcat(opc, "bb"), " $dst, $a, $b", + IIC_iMUL32, !strconcat(opc, "bb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1081,7 +1240,7 @@ multiclass AI_smul { } def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - !strconcat(opc, "bt"), " $dst, $a, $b", + IIC_iMUL32, !strconcat(opc, "bt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1090,7 +1249,7 @@ multiclass AI_smul { } def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - !strconcat(opc, "tb"), " $dst, $a, $b", + IIC_iMUL32, !strconcat(opc, "tb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1099,7 +1258,7 @@ multiclass AI_smul { } def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - !strconcat(opc, "tt"), " $dst, $a, $b", + IIC_iMUL32, !strconcat(opc, "tt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1108,7 +1267,7 @@ multiclass AI_smul { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - !strconcat(opc, "wb"), " $dst, $a, $b", + IIC_iMUL16, !strconcat(opc, "wb"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1117,7 +1276,7 @@ multiclass AI_smul { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - !strconcat(opc, "wt"), " $dst, $a, $b", + IIC_iMUL16, !strconcat(opc, "wt"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1129,7 +1288,7 @@ multiclass AI_smul { multiclass AI_smla { def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>, @@ -1139,7 +1298,7 @@ multiclass AI_smla { } def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1148,7 +1307,7 @@ multiclass AI_smla { } def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1157,7 +1316,7 @@ multiclass AI_smla { } def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1166,7 +1325,7 @@ multiclass AI_smla { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1175,7 +1334,7 @@ multiclass AI_smla { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1194,7 +1353,7 @@ defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // Misc. Arithmetic Instructions. // -def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), +def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, "clz", " $dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> { let Inst{7-4} = 0b0001; @@ -1202,7 +1361,7 @@ def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), let Inst{19-16} = 0b1111; } -def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), +def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, "rev", " $dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0011; @@ -1210,7 +1369,7 @@ def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), let Inst{19-16} = 0b1111; } -def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), +def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, "rev16", " $dst, $src", [(set GPR:$dst, (or (and (srl GPR:$src, (i32 8)), 0xFF), @@ -1223,7 +1382,7 @@ def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), let Inst{19-16} = 0b1111; } -def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), +def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, "revsh", " $dst, $src", [(set GPR:$dst, (sext_inreg @@ -1237,7 +1396,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - "pkhbt", " $dst, $src1, $src2, LSL $shamt", + IIC_iALUsi, "pkhbt", " $dst, $src1, $src2, LSL $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), 0xFFFF0000)))]>, @@ -1254,7 +1413,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - "pkhtb", " $dst, $src1, $src2, ASR $shamt", + IIC_iALUsi, "pkhtb", " $dst, $src1, $src2, ASR $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), 0xFFFF)))]>, Requires<[IsARM, HasV6]> { @@ -1300,21 +1459,23 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, - "mov", " $dst, $true", + IIC_iCMOVr, "mov", " $dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP; def MOVCCs : AI1<0b1101, (outs GPR:$dst), - (ins GPR:$false, so_reg:$true), DPSoRegFrm, + (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr, "mov", " $dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP; def MOVCCi : AI1<0b1101, (outs GPR:$dst), - (ins GPR:$false, so_imm:$true), DPFrm, + (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi, "mov", " $dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst">, UnaryDP; + RegConstraint<"$false = $dst">, UnaryDP { + let Inst{25} = 1; +} //===----------------------------------------------------------------------===// @@ -1324,14 +1485,14 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), // __aeabi_read_tp preserves the registers r1-r3. let isCall = 1, Defs = [R0, R12, LR, CPSR] in { - def TPsoft : ABXI<0b1011, (outs), (ins), + def TPsoft : ABXI<0b1011, (outs), (ins), IIC_Br, "bl __aeabi_read_tp", [(set R0, ARMthread_pointer)]>; } //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics -// eh_sjlj_setjmp() is a three instruction sequence to store the return +// eh_sjlj_setjmp() is an instruction sequence to store the return // address and save #0 in R0 for the non-longjmp case. // Since by its nature we may be coming from some other function to get // here, and we're using the stack frame for the containing function to @@ -1342,13 +1503,19 @@ let isCall = 1, // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. let Defs = - [ R0, R1, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, - D0, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15 ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, + D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, + D31 ] in { def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src), - AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, - "add r0, pc, #4\n\t" - "str r0, [$src, #+4]\n\t" - "mov r0, #0 @ eh_setjmp", "", + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "str sp, [$src, #+8] @ eh_setjmp begin\n\t" + "add r12, pc, #8\n\t" + "str r12, [$src, #+4]\n\t" + "mov r0, #0\n\t" + "add pc, pc, #0\n\t" + "mov r0, #1 @ eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; } @@ -1366,25 +1533,36 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Two piece so_imms. let isReMaterializable = 1 in -def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo, +def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), + Pseudo, IIC_iMOVi, "mov", " $dst, $src", - [(set GPR:$dst, so_imm2part:$src)]>; + [(set GPR:$dst, so_imm2part:$src)]>, + Requires<[IsARM, NoV6T2]>; def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), - (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; + (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), - (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; + (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; + +// 32-bit immediate using movw + movt. +// This is a single pseudo instruction to make it re-materializable. Remove +// when we can do generalized remat. +let isReMaterializable = 1 in +def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, + "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", + [(set GPR:$dst, (i32 imm:$src))]>, + Requires<[IsARM, HasV6T2]>; // TODO: add,sub,and, 3-instr forms? // Direct calls def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsNotDarwin]>; + Requires<[IsARM, IsNotDarwin]>; def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsDarwin]>; + Requires<[IsARM, IsDarwin]>; // zextload i1 -> zextload i8 def : ARMPat<(zextloadi1 addrmode2:$addr), (LDRB addrmode2:$addr)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a62597bad8409..cd370aa97adbd 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -65,8 +65,28 @@ def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; -def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ", - SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>; +def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; + +// VDUPLANE can produce a quad-register result from a double-register source, +// so the result is not constrained to match the source. +def NEONvduplane : SDNode<"ARMISD::VDUPLANE", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisVT<2, i32>]>>; + +def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; +def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; + +def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; +def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; +def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; +def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; + +def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; +def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; +def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; +def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; //===----------------------------------------------------------------------===// // NEON operand definitions @@ -87,28 +107,409 @@ def addrmode_neonldstm : Operand, //===----------------------------------------------------------------------===// /* TODO: Take advantage of vldm. -let mayLoad = 1 in { +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { def VLDMD : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), + IIC_fpLoadm, "vldm${addr:submode} ${addr:base}, $dst1", - []>; + []> { + let Inst{27-25} = 0b110; + let Inst{20} = 1; + let Inst{11-9} = 0b101; +} def VLDMS : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), + IIC_fpLoadm, "vldm${addr:submode} ${addr:base}, $dst1", - []>; + []> { + let Inst{27-25} = 0b110; + let Inst{20} = 1; + let Inst{11-9} = 0b101; +} } */ // Use vldmia to load a Q register as a D register pair. -def VLDRQ : NI<(outs QPR:$dst), (ins GPR:$addr), +def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), + IIC_fpLoadm, "vldmia $addr, ${dst:dregpair}", - [(set QPR:$dst, (v2f64 (load GPR:$addr)))]>; + [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { + let Inst{27-25} = 0b110; + let Inst{24} = 0; // P bit + let Inst{23} = 1; // U bit + let Inst{20} = 1; + let Inst{11-9} = 0b101; +} // Use vstmia to store a Q register as a D register pair. -def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr), +def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), + IIC_fpStorem, "vstmia $addr, ${src:dregpair}", - [(store (v2f64 QPR:$src), GPR:$addr)]>; + [(store (v2f64 QPR:$src), addrmode4:$addr)]> { + let Inst{27-25} = 0b110; + let Inst{24} = 0; // P bit + let Inst{23} = 1; // U bit + let Inst{20} = 0; + let Inst{11-9} = 0b101; +} + +// VLD1 : Vector Load (multiple single elements) +class VLD1D op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, + !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "", + [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; +class VLD1Q op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, + !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "", + [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; + +def VLD1d8 : VLD1D<0b0000, "vld1.8", v8i8, int_arm_neon_vld1>; +def VLD1d16 : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>; +def VLD1d32 : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>; +def VLD1df : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>; +def VLD1d64 : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>; + +def VLD1q8 : VLD1Q<0b0000, "vld1.8", v16i8, int_arm_neon_vld1>; +def VLD1q16 : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>; +def VLD1q32 : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>; +def VLD1qf : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>; +def VLD1q64 : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>; + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { + +// VLD2 : Vector Load (multiple 2-element structures) +class VLD2D op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr), IIC_VLD2, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; +class VLD2Q op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0011,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD2, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + "", []>; + +def VLD2d8 : VLD2D<0b0000, "vld2.8">; +def VLD2d16 : VLD2D<0b0100, "vld2.16">; +def VLD2d32 : VLD2D<0b1000, "vld2.32">; +def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr), IIC_VLD1, + "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>; + +def VLD2q8 : VLD2Q<0b0000, "vld2.8">; +def VLD2q16 : VLD2Q<0b0100, "vld2.16">; +def VLD2q32 : VLD2Q<0b1000, "vld2.32">; + +// VLD3 : Vector Load (multiple 3-element structures) +class VLD3D op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD3, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; +class VLD3WB op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr), IIC_VLD3, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), + "$addr.addr = $wb", []>; + +def VLD3d8 : VLD3D<0b0000, "vld3.8">; +def VLD3d16 : VLD3D<0b0100, "vld3.16">; +def VLD3d32 : VLD3D<0b1000, "vld3.32">; +def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD1, + "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; + +// vld3 to double-spaced even registers. +def VLD3q8a : VLD3WB<0b0000, "vld3.8">; +def VLD3q16a : VLD3WB<0b0100, "vld3.16">; +def VLD3q32a : VLD3WB<0b1000, "vld3.32">; + +// vld3 to double-spaced odd registers. +def VLD3q8b : VLD3WB<0b0000, "vld3.8">; +def VLD3q16b : VLD3WB<0b0100, "vld3.16">; +def VLD3q32b : VLD3WB<0b1000, "vld3.32">; + +// VLD4 : Vector Load (multiple 4-element structures) +class VLD4D op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0000,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD4, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + "", []>; +class VLD4WB op7_4, string OpcodeStr> + : NLdSt<0,0b10,0b0001,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr), IIC_VLD4, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + "$addr.addr = $wb", []>; + +def VLD4d8 : VLD4D<0b0000, "vld4.8">; +def VLD4d16 : VLD4D<0b0100, "vld4.16">; +def VLD4d32 : VLD4D<0b1000, "vld4.32">; +def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD1, + "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; + +// vld4 to double-spaced even registers. +def VLD4q8a : VLD4WB<0b0000, "vld4.8">; +def VLD4q16a : VLD4WB<0b0100, "vld4.16">; +def VLD4q32a : VLD4WB<0b1000, "vld4.32">; + +// vld4 to double-spaced odd registers. +def VLD4q8b : VLD4WB<0b0000, "vld4.8">; +def VLD4q16b : VLD4WB<0b0100, "vld4.16">; +def VLD4q32b : VLD4WB<0b1000, "vld4.32">; + +// VLD1LN : Vector Load (single element to one lane) +// FIXME: Not yet implemented. + +// VLD2LN : Vector Load (single 2-element structure to one lane) +class VLD2LN op11_8, string OpcodeStr> + : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2, + !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), + "$src1 = $dst1, $src2 = $dst2", []>; + +def VLD2LNd8 : VLD2LN<0b0001, "vld2.8">; +def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">; +def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">; + +// vld2 to double-spaced even registers. +def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">; +def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">; + +// vld2 to double-spaced odd registers. +def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">; +def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">; + +// VLD3LN : Vector Load (single 3-element structure to one lane) +class VLD3LN op11_8, string OpcodeStr> + : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3, + !strconcat(OpcodeStr, + "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; + +def VLD3LNd8 : VLD3LN<0b0010, "vld3.8">; +def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">; +def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">; + +// vld3 to double-spaced even registers. +def VLD3LNq16a: VLD3LN<0b0110, "vld3.16">; +def VLD3LNq32a: VLD3LN<0b1010, "vld3.32">; + +// vld3 to double-spaced odd registers. +def VLD3LNq16b: VLD3LN<0b0110, "vld3.16">; +def VLD3LNq32b: VLD3LN<0b1010, "vld3.32">; + +// VLD4LN : Vector Load (single 4-element structure to one lane) +class VLD4LN op11_8, string OpcodeStr> + : NLdSt<1,0b10,op11_8,0b0000, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4, + !strconcat(OpcodeStr, + "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"), + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; + +def VLD4LNd8 : VLD4LN<0b0011, "vld4.8">; +def VLD4LNd16 : VLD4LN<0b0111, "vld4.16">; +def VLD4LNd32 : VLD4LN<0b1011, "vld4.32">; + +// vld4 to double-spaced even registers. +def VLD4LNq16a: VLD4LN<0b0111, "vld4.16">; +def VLD4LNq32a: VLD4LN<0b1011, "vld4.32">; + +// vld4 to double-spaced odd registers. +def VLD4LNq16b: VLD4LN<0b0111, "vld4.16">; +def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">; + +// VLD1DUP : Vector Load (single element to all lanes) +// VLD2DUP : Vector Load (single 2-element structure to all lanes) +// VLD3DUP : Vector Load (single 3-element structure to all lanes) +// VLD4DUP : Vector Load (single 4-element structure to all lanes) +// FIXME: Not yet implemented. +} // mayLoad = 1, hasExtraDefRegAllocReq = 1 + +// VST1 : Vector Store (multiple single elements) +class VST1D op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "", + [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; +class VST1Q op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, + !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "", + [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; + +let hasExtraSrcRegAllocReq = 1 in { +def VST1d8 : VST1D<0b0000, "vst1.8", v8i8, int_arm_neon_vst1>; +def VST1d16 : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>; +def VST1d32 : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>; +def VST1df : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>; +def VST1d64 : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>; + +def VST1q8 : VST1Q<0b0000, "vst1.8", v16i8, int_arm_neon_vst1>; +def VST1q16 : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>; +def VST1q32 : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>; +def VST1qf : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>; +def VST1q64 : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>; +} // hasExtraSrcRegAllocReq + +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + +// VST2 : Vector Store (multiple 2-element structures) +class VST2D op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b1000,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>; +class VST2Q op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0011,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + "", []>; + +def VST2d8 : VST2D<0b0000, "vst2.8">; +def VST2d16 : VST2D<0b0100, "vst2.16">; +def VST2d32 : VST2D<0b1000, "vst2.32">; +def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, + "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>; + +def VST2q8 : VST2Q<0b0000, "vst2.8">; +def VST2q16 : VST2Q<0b0100, "vst2.16">; +def VST2q32 : VST2Q<0b1000, "vst2.32">; + +// VST3 : Vector Store (multiple 3-element structures) +class VST3D op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0100,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>; +class VST3WB op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), + "$addr.addr = $wb", []>; + +def VST3d8 : VST3D<0b0000, "vst3.8">; +def VST3d16 : VST3D<0b0100, "vst3.16">; +def VST3d32 : VST3D<0b1000, "vst3.32">; +def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VST, + "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>; + +// vst3 to double-spaced even registers. +def VST3q8a : VST3WB<0b0000, "vst3.8">; +def VST3q16a : VST3WB<0b0100, "vst3.16">; +def VST3q32a : VST3WB<0b1000, "vst3.32">; + +// vst3 to double-spaced odd registers. +def VST3q8b : VST3WB<0b0000, "vst3.8">; +def VST3q16b : VST3WB<0b0100, "vst3.16">; +def VST3q32b : VST3WB<0b1000, "vst3.32">; + +// VST4 : Vector Store (multiple 4-element structures) +class VST4D op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0000,op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + "", []>; +class VST4WB op7_4, string OpcodeStr> + : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + "$addr.addr = $wb", []>; + +def VST4d8 : VST4D<0b0000, "vst4.8">; +def VST4d16 : VST4D<0b0100, "vst4.16">; +def VST4d32 : VST4D<0b1000, "vst4.32">; +def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + DPR:$src4), IIC_VST, + "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; + +// vst4 to double-spaced even registers. +def VST4q8a : VST4WB<0b0000, "vst4.8">; +def VST4q16a : VST4WB<0b0100, "vst4.16">; +def VST4q32a : VST4WB<0b1000, "vst4.32">; + +// vst4 to double-spaced odd registers. +def VST4q8b : VST4WB<0b0000, "vst4.8">; +def VST4q16b : VST4WB<0b0100, "vst4.16">; +def VST4q32b : VST4WB<0b1000, "vst4.32">; + +// VST1LN : Vector Store (single element from one lane) +// FIXME: Not yet implemented. + +// VST2LN : Vector Store (single 2-element structure from one lane) +class VST2LN op11_8, string OpcodeStr> + : NLdSt<1,0b00,op11_8,0b0000, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VST, + !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), + "", []>; + +def VST2LNd8 : VST2LN<0b0000, "vst2.8">; +def VST2LNd16 : VST2LN<0b0100, "vst2.16">; +def VST2LNd32 : VST2LN<0b1000, "vst2.32">; + +// vst2 to double-spaced even registers. +def VST2LNq16a: VST2LN<0b0100, "vst2.16">; +def VST2LNq32a: VST2LN<0b1000, "vst2.32">; + +// vst2 to double-spaced odd registers. +def VST2LNq16b: VST2LN<0b0100, "vst2.16">; +def VST2LNq32b: VST2LN<0b1000, "vst2.32">; + +// VST3LN : Vector Store (single 3-element structure from one lane) +class VST3LN op11_8, string OpcodeStr> + : NLdSt<1,0b00,op11_8,0b0000, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST, + !strconcat(OpcodeStr, + "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>; + +def VST3LNd8 : VST3LN<0b0010, "vst3.8">; +def VST3LNd16 : VST3LN<0b0110, "vst3.16">; +def VST3LNd32 : VST3LN<0b1010, "vst3.32">; + +// vst3 to double-spaced even registers. +def VST3LNq16a: VST3LN<0b0110, "vst3.16">; +def VST3LNq32a: VST3LN<0b1010, "vst3.32">; + +// vst3 to double-spaced odd registers. +def VST3LNq16b: VST3LN<0b0110, "vst3.16">; +def VST3LNq32b: VST3LN<0b1010, "vst3.32">; + +// VST4LN : Vector Store (single 4-element structure from one lane) +class VST4LN op11_8, string OpcodeStr> + : NLdSt<1,0b00,op11_8,0b0000, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST, + !strconcat(OpcodeStr, + "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"), + "", []>; + +def VST4LNd8 : VST4LN<0b0011, "vst4.8">; +def VST4LNd16 : VST4LN<0b0111, "vst4.16">; +def VST4LNd32 : VST4LN<0b1011, "vst4.32">; + +// vst4 to double-spaced even registers. +def VST4LNq16a: VST4LN<0b0111, "vst4.16">; +def VST4LNq32a: VST4LN<0b1011, "vst4.32">; + +// vst4 to double-spaced odd registers. +def VST4LNq16b: VST4LN<0b0111, "vst4.16">; +def VST4LNq32b: VST4LN<0b1011, "vst4.32">; + +} // mayStore = 1, hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// @@ -117,18 +518,27 @@ def VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr), // Extract D sub-registers of Q registers. // (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6) -def SubReg_i8_reg : SDNodeXFormgetTargetConstant(5 + N->getZExtValue() / 8, MVT::i32); }]>; -def SubReg_i16_reg : SDNodeXFormgetTargetConstant(5 + N->getZExtValue() / 4, MVT::i32); }]>; -def SubReg_i32_reg : SDNodeXFormgetTargetConstant(5 + N->getZExtValue() / 2, MVT::i32); }]>; -def SubReg_f64_reg : SDNodeXFormgetTargetConstant(5 + N->getZExtValue(), MVT::i32); }]>; +def DSubReg_f64_other_reg : SDNodeXFormgetTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32); +}]>; + +// Extract S sub-registers of Q/D registers. +// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.) +def SSubReg_f32_reg : SDNodeXFormgetTargetConstant(1 + N->getZExtValue(), MVT::i32); +}]>; // Translate lane numbers from Q registers to D subregs. def SubReg_i8_lane : SDNodeXForm op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; +// Basic 2-register operations, scalar single-precision. +class N2VDs op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2V; + +class N2VDsPat + : NEONFPPat<(ResTy (OpNode SPR:$a)), + (EXTRACT_SUBREG + (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), + arm_ssubreg_0)>; + // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; +// Basic 2-register intrinsics, scalar single-precision +class N2VDInts op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N2V; + +class N2VDIntsPat + : NEONFPPat<(f32 (OpNode SPR:$a)), + (EXTRACT_SUBREG + (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)), + arm_ssubreg_0)>; + // Narrow 2-register intrinsics. class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType TyD, ValueType TyQ, Intrinsic IntOp> : N2V; // Long 2-register intrinsics. (This is currently only used for VMOVL and is // derived from N2VImm instead of N2V because of the way the size is encoded.) class N2VLInt op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD, - Intrinsic IntOp> + bit op6, bit op4, InstrItinClass itin, string OpcodeStr, + ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N2VImm; +// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. +class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr> + : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), + (ins DPR:$src1, DPR:$src2), IIC_VPERMD, + !strconcat(OpcodeStr, "\t$dst1, $dst2"), + "$src1 = $dst1, $src2 = $dst2", []>; +class N2VQShuffle op19_18, bits<5> op11_7, + InstrItinClass itin, string OpcodeStr> + : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), + (ins QPR:$src1, QPR:$src2), itin, + !strconcat(OpcodeStr, "\t$dst1, $dst2"), + "$src1 = $dst1, $src2 = $dst2", []>; + // Basic 3-register operations, both double- and quad-register. class N3VD op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { let isCommutable = Commutable; } +class N3VDSL op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> + : N3V<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + [(set (Ty DPR:$dst), + (Ty (ShOp (Ty DPR:$src1), + (Ty (NEONvduplane (Ty DPR_VFP2:$src2), + imm:$lane)))))]> { + let isCommutable = 0; +} +class N3VDSL16 op21_20, bits<4> op11_8, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N3V<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), + IIC_VMULi16D, + !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + [(set (Ty DPR:$dst), + (Ty (ShOp (Ty DPR:$src1), + (Ty (NEONvduplane (Ty DPR_8:$src2), + imm:$lane)))))]> { + let isCommutable = 0; +} + class N3VQ op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { let isCommutable = Commutable; } +class N3VQSL op21_20, bits<4> op11_8, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode ShOp> + : N3V<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + [(set (ResTy QPR:$dst), + (ResTy (ShOp (ResTy QPR:$src1), + (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), + imm:$lane)))))]> { + let isCommutable = 0; +} +class N3VQSL16 op21_20, bits<4> op11_8, + string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> + : N3V<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), + IIC_VMULi16Q, + !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + [(set (ResTy QPR:$dst), + (ResTy (ShOp (ResTy QPR:$src1), + (ResTy (NEONvduplane (OpTy DPR_8:$src2), + imm:$lane)))))]> { + let isCommutable = 0; +} + +// Basic 3-register operations, scalar single-precision +class N3VDs op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3V { + let isCommutable = Commutable; +} +class N3VDsPat + : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)), + (EXTRACT_SUBREG + (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } +class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : N3V<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + [(set (Ty DPR:$dst), + (Ty (IntOp (Ty DPR:$src1), + (Ty (NEONvduplane (Ty DPR_VFP2:$src2), + imm:$lane)))))]> { + let isCommutable = 0; +} +class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType Ty, Intrinsic IntOp> + : N3V<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + [(set (Ty DPR:$dst), + (Ty (IntOp (Ty DPR:$src1), + (Ty (NEONvduplane (Ty DPR_8:$src2), + imm:$lane)))))]> { + let isCommutable = 0; +} + class N3VQInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } +class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N3V<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + [(set (ResTy QPR:$dst), + (ResTy (IntOp (ResTy QPR:$src1), + (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), + imm:$lane)))))]> { + let isCommutable = 0; +} +class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N3V<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + [(set (ResTy QPR:$dst), + (ResTy (IntOp (ResTy QPR:$src1), + (ResTy (NEONvduplane (OpTy DPR_8:$src2), + imm:$lane)))))]> { + let isCommutable = 0; +} // Multiply-Add/Sub operations, both double- and quad-register. class N3VDMulOp op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; +class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> + : N3V<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, + !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + [(set (Ty DPR:$dst), + (Ty (ShOp (Ty DPR:$src1), + (Ty (MulOp DPR:$src2, + (Ty (NEONvduplane (Ty DPR_VFP2:$src3), + imm:$lane)))))))]>; +class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> + : N3V<0, 1, op21_20, op11_8, 1, 0, + (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, + !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + [(set (Ty DPR:$dst), + (Ty (ShOp (Ty DPR:$src1), + (Ty (MulOp DPR:$src2, + (Ty (NEONvduplane (Ty DPR_8:$src3), + imm:$lane)))))))]>; + class N3VQMulOp op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, ValueType Ty, + SDNode MulOp, SDNode OpNode> : N3V; +class N3VQMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + SDNode MulOp, SDNode ShOp> + : N3V<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, + !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + [(set (ResTy QPR:$dst), + (ResTy (ShOp (ResTy QPR:$src1), + (ResTy (MulOp QPR:$src2, + (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), + imm:$lane)))))))]>; +class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + SDNode MulOp, SDNode ShOp> + : N3V<1, 1, op21_20, op11_8, 1, 0, + (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, + !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + [(set (ResTy QPR:$dst), + (ResTy (ShOp (ResTy QPR:$src1), + (ResTy (MulOp QPR:$src2, + (ResTy (NEONvduplane (OpTy DPR_8:$src3), + imm:$lane)))))))]>; + +// Multiply-Add/Sub operations, scalar single-precision +class N3VDMulOps op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode MulOp, SDNode OpNode> + : N3V; + +class N3VDMulOpsPat + : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), + (EXTRACT_SUBREG + (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)), + arm_ssubreg_0)>; // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; class N3VQInt3 op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; @@ -268,19 +898,44 @@ class N3VQInt3 op21_20, bits<4> op11_8, bit op4, // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3 op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V; +class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N3V; +class N3VLInt3SL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + Intrinsic IntOp> + : N3V; + // Narrowing 3-register intrinsics. class N3VNInt op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -288,21 +943,40 @@ class N3VNInt op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics. class N3VLInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, + InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } +class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + : N3V; +class N3VLIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, + string OpcodeStr, ValueType ResTy, ValueType OpTy, + Intrinsic IntOp> + : N3V; // Wide 3-register intrinsics. class N3VWInt op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; @@ -313,13 +987,13 @@ class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Pairwise long 2-register accumulate intrinsics, @@ -329,29 +1003,31 @@ class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Shift by immediate, // both double- and quad-register. class N2VDSh op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> + bit op4, InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode OpNode> : N2VImm; class N2VQSh op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> + bit op4, InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode OpNode> : N2VImm; @@ -360,17 +1036,17 @@ class N2VLSh op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; // Narrow shift by immediate. class N2VNSh op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, - ValueType OpTy, SDNode OpNode> + bit op6, bit op4, InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; @@ -381,6 +1057,7 @@ class N2VDShAdd op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm; @@ -388,6 +1065,7 @@ class N2VQShAdd op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm; @@ -398,12 +1076,14 @@ class N2VDShIns op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm; class N2VQShIns op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm; @@ -413,14 +1093,14 @@ class N2VCvtD op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; class N2VCvtQ op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; @@ -428,50 +1108,68 @@ class N2VCvtQ op21_16, bits<4> op11_8, bit op7, // Multiclasses //===----------------------------------------------------------------------===// +// Abbreviations used in multiclass suffixes: +// Q = quarter int (8 bit) elements +// H = half int (16 bit) elements +// S = single int (32 bit) elements +// D = double int (64 bit) elements + // Neon 3-register vector operations. // First with only element sizes of 8, 16 and 32 bits: multiclass N3V_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, SDNode OpNode, bit Commutable = 0> { // 64-bit vector types. - def v8i8 : N3VD; - def v4i16 : N3VD; - def v2i32 : N3VD; + def v8i8 : N3VD; + def v4i16 : N3VD; + def v2i32 : N3VD; // 128-bit vector types. - def v16i8 : N3VQ; - def v8i16 : N3VQ; - def v4i32 : N3VQ; + def v16i8 : N3VQ; + def v8i16 : N3VQ; + def v4i32 : N3VQ; +} + +multiclass N3VSL_HS op11_8, string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; } // ....then also with element size 64 bits: multiclass N3V_QHSD op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, string OpcodeStr, SDNode OpNode, bit Commutable = 0> - : N3V_QHS { - def v1i64 : N3VD; - def v2i64 : N3VQ; + : N3V_QHS { + def v1i64 : N3VD; + def v2i64 : N3VQ; } // Neon Narrowing 2-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op6, bit op4, string OpcodeStr, + bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { def v8i8 : N2VNInt; + itin, !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>; def v4i16 : N2VNInt; + itin, !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>; def v2i32 : N2VNInt; + itin, !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>; } @@ -480,11 +1178,11 @@ multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VLInt_QHS op11_8, bit op7, bit op6, bit op4, string OpcodeStr, Intrinsic IntOp> { def v8i16 : N2VLInt; + IIC_VQUNAiD, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; def v4i32 : N2VLInt; + IIC_VQUNAiD, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; def v2i64 : N2VLInt; + IIC_VQUNAiD, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; } @@ -492,38 +1190,56 @@ multiclass N2VLInt_QHS op11_8, bit op7, bit op6, // First with only element sizes of 16 and 32 bits: multiclass N3VInt_HS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { // 64-bit vector types. - def v4i16 : N3VDInt; - def v2i32 : N3VDInt; // 128-bit vector types. - def v8i16 : N3VQInt; - def v4i32 : N3VQInt; } +multiclass N3VIntSL_HS op11_8, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, Intrinsic IntOp> { + def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; + def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; + def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; + def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; +} + // ....then also with element size of 8 bits: multiclass N3VInt_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_HS { - def v8i8 : N3VDInt; - def v16i8 : N3VQInt; + : N3VInt_HS { + def v8i8 : N3VDInt; + def v16i8 : N3VQInt; } // ....then also with element size of 64 bits: multiclass N3VInt_QHSD op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_QHS { - def v1i64 : N3VDInt; - def v2i64 : N3VQInt; + : N3VInt_QHS { + def v1i64 : N3VDInt; + def v2i64 : N3VQInt; } @@ -544,19 +1260,29 @@ multiclass N3VNInt_HSD op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt; - def v2i64 : N3VLInt; + InstrItinClass itin, string OpcodeStr, + Intrinsic IntOp, bit Commutable = 0> { + def v4i32 : N3VLInt; + def v2i64 : N3VLInt; +} + +multiclass N3VLIntSL_HS op11_8, + InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { + def v4i16 : N3VLIntSL16; + def v2i32 : N3VLIntSL; } // ....then also with element size of 8 bits: multiclass N3VLInt_QHS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS { - def v8i16 : N3VLInt; + InstrItinClass itin, string OpcodeStr, + Intrinsic IntOp, bit Commutable = 0> + : N3VLInt_HS { + def v8i16 : N3VLInt; } @@ -576,43 +1302,58 @@ multiclass N3VWInt_QHS op11_8, bit op4, // Neon Multiply-Op vector operations, // element sizes of 8, 16 and 32 bits: multiclass N3VMulOp_QHS op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N3VDMulOp; - def v4i16 : N3VDMulOp; - def v2i32 : N3VDMulOp; // 128-bit vector types. - def v16i8 : N3VQMulOp; - def v8i16 : N3VQMulOp; - def v4i32 : N3VQMulOp; } +multiclass N3VMulOpSL_HS op11_8, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, + !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; + def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, + !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; + def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, + !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; + def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, + !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; +} // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3; - def v4i16 : N3VDInt3; - def v2i32 : N3VDInt3; // 128-bit vector types. - def v16i8 : N3VQInt3; - def v8i16 : N3VQInt3; - def v4i32 : N3VQInt3; } @@ -622,17 +1363,25 @@ multiclass N3VInt3_QHS op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> { - def v4i32 : N3VLInt3; - def v2i64 : N3VLInt3; } +multiclass N3VLInt3SL_HS op11_8, + string OpcodeStr, Intrinsic IntOp> { + def v4i16 : N3VLInt3SL16; + def v2i32 : N3VLInt3SL; +} + // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> : N3VLInt3_HS { - def v8i16 : N3VLInt3; } @@ -640,23 +1389,24 @@ multiclass N3VLInt3_QHS op11_8, bit op4, // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, - bits<5> op11_7, bit op4, string OpcodeStr, - Intrinsic IntOp> { + bits<5> op11_7, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, + string OpcodeStr, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDInt; + itinD, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; def v4i16 : N2VDInt; + itinD, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; def v2i32 : N2VDInt; + itinD, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQInt; + itinQ, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; def v8i16 : N2VQInt; + itinQ, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; def v4i32 : N2VQInt; + itinQ, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; } @@ -709,25 +1459,25 @@ multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 2-register vector shift by immediate, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VSh_QHSD op11_8, bit op4, - string OpcodeStr, SDNode OpNode> { + InstrItinClass itin, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh; - def v4i16 : N2VDSh; - def v2i32 : N2VDSh; - def v1i64 : N2VDSh; // 128-bit vector types. - def v16i8 : N2VQSh; - def v8i16 : N2VQSh; - def v4i32 : N2VQSh; - def v2i64 : N2VQSh; } @@ -790,24 +1540,30 @@ multiclass N2VShIns_QHSD op11_8, bit op4, // Vector Add Operations. // VADD : Vector Add (integer and floating-point) -defm VADD : N3V_QHSD<0, 0, 0b1000, 0, "vadd.i", add, 1>; -def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd, 1>; -def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, "vadd.f32", v4f32, v4f32, fadd, 1>; +defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>; +def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>; +def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, "vaddl.s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, "vaddl.u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; // VHADD : Vector Halving Add -defm VHADDs : N3VInt_QHS<0,0,0b0000,0, "vhadd.s", int_arm_neon_vhadds, 1>; -defm VHADDu : N3VInt_QHS<1,0,0b0000,0, "vhadd.u", int_arm_neon_vhaddu, 1>; +defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; +defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; // VRHADD : Vector Rounding Halving Add -defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, "vrhadd.s", int_arm_neon_vrhadds, 1>; -defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, "vrhadd.u", int_arm_neon_vrhaddu, 1>; +defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; +defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; // VQADD : Vector Saturating Add -defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, "vqadd.s", int_arm_neon_vqadds, 1>; -defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, "vqadd.u", int_arm_neon_vqaddu, 1>; +defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; +defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) @@ -816,64 +1572,208 @@ defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) -defm VMUL : N3V_QHS<0, 0, 0b1001, 1, "vmul.i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v8i8, v8i8, +defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, + IIC_VMULi32Q, "vmul.i", mul, 1>; +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v16i8, v16i8, +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; -def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul, 1>; -def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, "vmul.f32", v4f32, v4f32, fmul, 1>; +def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>; +def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>; +defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; +def : Pat<(v8i16 (mul (v8i16 QPR:$src1), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), + (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), + (v4i16 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (mul (v4i32 QPR:$src1), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), + (v4i32 (VMULslv4i32 (v4i32 QPR:$src1), + (v2i32 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), + (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))), + (v4f32 (VMULslfq (v4f32 QPR:$src1), + (v2f32 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + // VQDMULH : Vector Saturating Doubling Multiply Returning High Half -defm VQDMULH : N3VInt_HS<0,0,0b1011,0, "vqdmulh.s", int_arm_neon_vqdmulh, 1>; +defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqdmulh.s", int_arm_neon_vqdmulh, 1>; +defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqdmulh.s", int_arm_neon_vqdmulh>; +def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), + (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), + (v4i16 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), + (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), + (v2i32 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half -defm VQRDMULH : N3VInt_HS<1,0,0b1011,0, "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; +defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; +defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqrdmulh.s", int_arm_neon_vqrdmulh>; +def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), + (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), + (v4i16 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), + (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), + (v2i32 (EXTRACT_SUBREG QPR:$src2, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, "vmull.s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, "vmull.u", int_arm_neon_vmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8, +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>; +def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; +defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>; +defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>; + // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. // VMLA : Vector Multiply Accumulate (integer and floating-point) -defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmla.i", add>; -def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32, fmul, fadd>; -def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v4f32, fmul, fadd>; +defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; +def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; +def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>; +defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; +def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; +def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>; + +def : Pat<(v8i16 (add (v8i16 QPR:$src1), + (mul (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), + (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + +def : Pat<(v4i32 (add (v4i32 QPR:$src1), + (mul (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), + (fmul (v4f32 QPR:$src2), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (v4f32 (VMLAslfq (v4f32 QPR:$src1), + (v4f32 QPR:$src2), + (v2f32 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + // VMLAL : Vector Multiply Accumulate Long (Q += D * D) defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; + +defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>; +defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>; + // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; + // VMLS : Vector Multiply Subtract (integer and floating-point) -defm VMLS : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmls.i", sub>; -def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32, fmul, fsub>; -def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v4f32, fmul, fsub>; +defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; +def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; +def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>; +defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; +def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; +def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>; + +def : Pat<(v8i16 (sub (v8i16 QPR:$src1), + (mul (v8i16 QPR:$src2), + (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))), + (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), + (v8i16 QPR:$src2), + (v4i16 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; + +def : Pat<(v4i32 (sub (v4i32 QPR:$src1), + (mul (v4i32 QPR:$src2), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), + (v4i32 QPR:$src2), + (v2i32 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), + (fmul (v4f32 QPR:$src2), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (v4f32 (VMLSslfq (v4f32 QPR:$src1), + (v4f32 QPR:$src2), + (v2f32 (EXTRACT_SUBREG QPR:$src3, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + // VMLSL : Vector Multiply Subtract Long (Q -= D * D) defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; + +defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>; +defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>; + // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>; // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) -defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, "vsub.i", sub, 0>; -def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub, 0>; -def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, "vsub.f32", v4f32, v4f32, fsub, 0>; +defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>; +def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>; +def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, "vsubl.s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, "vsubl.u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract -defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, "vhsub.s", int_arm_neon_vhsubs, 0>; -defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, "vhsub.u", int_arm_neon_vhsubu, 0>; +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>; +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract -defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, "vqsub.s", int_arm_neon_vqsubs, 0>; -defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, "vqsub.u", int_arm_neon_vqsubu, 0>; +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>; +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) @@ -882,85 +1782,101 @@ defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, "vceq.i", NEONvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; +defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vceq.i", NEONvceq, 1>; +def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; +def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, "vcge.s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, "vcge.u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; +defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vcge.s", NEONvcge, 0>; +defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>; +def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; +def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, "vcgt.s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, "vcgt.u", NEONvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; +defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>; +defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>; +def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; +def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v2i32, v2f32, +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32, int_arm_neon_vacged, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v4i32, v4f32, +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v2i32, v2f32, +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v4i32, v4f32, +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits -defm VTST : N3V_QHS<0, 0, 0b1000, 1, "vtst.i", NEONvtst, 1>; +defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vtst.i", NEONvtst, 1>; // Vector Bitwise Operations. // VAND : Vector Bitwise AND -def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, "vand", v2i32, v2i32, and, 1>; -def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, "vand", v4i32, v4i32, and, 1>; +def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>; +def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>; // VEOR : Vector Bitwise Exclusive OR -def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, "veor", v2i32, v2i32, xor, 1>; -def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, "veor", v4i32, v4i32, xor, 1>; +def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>; +def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>; // VORR : Vector Bitwise OR -def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, "vorr", v2i32, v2i32, or, 1>; -def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, "vorr", v4i32, v4i32, or, 1>; +def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>; +def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; // VBIC : Vector Bitwise Bit Clear (AND NOT) def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2), "vbic\t$dst, $src1, $src2", "", - [(set DPR:$dst, (v2i32 (and DPR:$src1,(vnot DPR:$src2))))]>; + (ins DPR:$src1, DPR:$src2), IIC_VBINiD, + "vbic\t$dst, $src1, $src2", "", + [(set DPR:$dst, (v2i32 (and DPR:$src1, + (vnot_conv DPR:$src2))))]>; def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), "vbic\t$dst, $src1, $src2", "", - [(set QPR:$dst, (v4i32 (and QPR:$src1,(vnot QPR:$src2))))]>; + (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, + "vbic\t$dst, $src1, $src2", "", + [(set QPR:$dst, (v4i32 (and QPR:$src1, + (vnot_conv QPR:$src2))))]>; // VORN : Vector Bitwise OR NOT def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2), "vorn\t$dst, $src1, $src2", "", - [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot DPR:$src2))))]>; + (ins DPR:$src1, DPR:$src2), IIC_VBINiD, + "vorn\t$dst, $src1, $src2", "", + [(set DPR:$dst, (v2i32 (or DPR:$src1, + (vnot_conv DPR:$src2))))]>; def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), "vorn\t$dst, $src1, $src2", "", - [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot QPR:$src2))))]>; + (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, + "vorn\t$dst, $src1, $src2", "", + [(set QPR:$dst, (v4i32 (or QPR:$src1, + (vnot_conv QPR:$src2))))]>; // VMVN : Vector Bitwise NOT def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), "vmvn\t$dst, $src", "", + (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, + "vmvn\t$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), "vmvn\t$dst, $src", "", + (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, + "vmvn\t$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR:$src3), + (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, "vbsl\t$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (v2i32 (or (and DPR:$src2, DPR:$src1), - (and DPR:$src3, (vnot DPR:$src1)))))]>; + (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, QPR:$src3), + (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, "vbsl\t$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (v4i32 (or (and QPR:$src2, QPR:$src1), - (and QPR:$src3, (vnot QPR:$src1)))))]>; + (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", @@ -973,16 +1889,18 @@ def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), // Vector Absolute Differences. // VABD : Vector Absolute Difference -defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>; -defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>; -def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32, - int_arm_neon_vabdf, 0>; -def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32, - int_arm_neon_vabdf, 0>; +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>; +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>; +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32, + int_arm_neon_vabds, 0>; +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32, + int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, "vabdl.u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>; +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; @@ -995,32 +1913,36 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum -defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>; -defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32, - int_arm_neon_vmaxf, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32, - int_arm_neon_vmaxf, 1>; +defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; +defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, + int_arm_neon_vmaxs, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, + int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum -defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>; -defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32, - int_arm_neon_vminf, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32, - int_arm_neon_vminf, 1>; +defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; +defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, + int_arm_neon_vmins, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, + int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8, - int_arm_neon_vpaddi, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16, - int_arm_neon_vpaddi, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32, - int_arm_neon_vpaddi, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32, - int_arm_neon_vpaddf, 0>; +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, + int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, + int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, + int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, + int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", @@ -1035,81 +1957,91 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, "vpmax.s8", v8i8, v8i8, +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, "vpmax.s16", v4i16, v4i16, +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, "vpmax.s32", v2i32, v2i32, +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, "vpmax.u8", v8i8, v8i8, +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16, +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32, +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32, - int_arm_neon_vpmaxf, 0>; +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, + int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8, +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, "vpmin.s16", v4i16, v4i16, +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, "vpmin.s32", v2i32, v2i32, +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, "vpmin.u8", v8i8, v8i8, +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16, +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32, +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32, - int_arm_neon_vpminf, 0>; +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, + int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. // VRECPE : Vector Reciprocal Estimate -def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32", +def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, + IIC_VUNAD, "vrecpe.u32", v2i32, v2i32, int_arm_neon_vrecpe>; -def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32", +def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, + IIC_VUNAQ, "vrecpe.u32", v4i32, v4i32, int_arm_neon_vrecpe>; -def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32", - v2f32, v2f32, int_arm_neon_vrecpef>; -def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32", - v4f32, v4f32, int_arm_neon_vrecpef>; +def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, + IIC_VUNAD, "vrecpe.f32", + v2f32, v2f32, int_arm_neon_vrecpe>; +def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, + IIC_VUNAQ, "vrecpe.f32", + v4f32, v4f32, int_arm_neon_vrecpe>; // VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32, +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v4f32, v4f32, +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, int_arm_neon_vrecps, 1>; // VRSQRTE : Vector Reciprocal Square Root Estimate -def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32", - v2i32, v2i32, int_arm_neon_vrsqrte>; -def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32", - v4i32, v4i32, int_arm_neon_vrsqrte>; -def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32", - v2f32, v2f32, int_arm_neon_vrsqrtef>; -def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32", - v4f32, v4f32, int_arm_neon_vrsqrtef>; +def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, + IIC_VUNAD, "vrsqrte.u32", + v2i32, v2i32, int_arm_neon_vrsqrte>; +def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, + IIC_VUNAQ, "vrsqrte.u32", + v4i32, v4i32, int_arm_neon_vrsqrte>; +def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, + IIC_VUNAD, "vrsqrte.f32", + v2f32, v2f32, int_arm_neon_vrsqrte>; +def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, + IIC_VUNAQ, "vrsqrte.f32", + v4f32, v4f32, int_arm_neon_vrsqrte>; // VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32, +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v4f32, v4f32, +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, int_arm_neon_vrsqrts, 1>; // Vector Shifts. // VSHL : Vector Shift -defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, "vshl.s", int_arm_neon_vshifts, 0>; -defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, "vshl.u", int_arm_neon_vshiftu, 0>; +defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, + IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; +defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, + IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, "vshl.i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, "vshr.s", NEONvshrs>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, "vshr.u", NEONvshru>; +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; // VSHLL : Vector Shift Left Long def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", @@ -1134,86 +2066,90 @@ def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, "vshrn.i16", - v8i8, v8i16, NEONvshrn>; -def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, "vshrn.i32", - v4i16, v4i32, NEONvshrn>; -def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, "vshrn.i64", - v2i32, v2i64, NEONvshrn>; +def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; +def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; +def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; // VRSHL : Vector Rounding Shift -defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, "vrshl.s", int_arm_neon_vrshifts, 0>; -defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, "vrshl.u", int_arm_neon_vrshiftu, 0>; +defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; +defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, "vrshr.s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, "vrshr.u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, "vrshrn.i16", - v8i8, v8i16, NEONvrshrn>; -def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, "vrshrn.i32", - v4i16, v4i32, NEONvrshrn>; -def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, "vrshrn.i64", - v2i32, v2i64, NEONvrshrn>; +def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; +def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; +def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; // VQSHL : Vector Saturating Shift -defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, "vqshl.s", int_arm_neon_vqshifts, 0>; -defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, "vqshl.u", int_arm_neon_vqshiftu, 0>; +defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; +defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, "vqshl.s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, "vqshl.u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, "vqshlu.s", NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.s16", - v8i8, v8i16, NEONvqshrns>; -def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.s32", - v4i16, v4i32, NEONvqshrns>; -def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.s64", - v2i32, v2i64, NEONvqshrns>; -def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.u16", - v8i8, v8i16, NEONvqshrnu>; -def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.u32", - v4i16, v4i32, NEONvqshrnu>; -def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.u64", - v2i32, v2i64, NEONvqshrnu>; +def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; +def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; +def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; +def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; +def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; +def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, "vqshrun.s16", - v8i8, v8i16, NEONvqshrnsu>; -def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, "vqshrun.s32", - v4i16, v4i32, NEONvqshrnsu>; -def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, "vqshrun.s64", - v2i32, v2i64, NEONvqshrnsu>; +def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; +def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; +def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, "vqrshl.s", - int_arm_neon_vqrshifts, 0>; -defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, "vqrshl.u", - int_arm_neon_vqrshiftu, 0>; +defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; +defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.s16", - v8i8, v8i16, NEONvqrshrns>; -def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.s32", - v4i16, v4i32, NEONvqrshrns>; -def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.s64", - v2i32, v2i64, NEONvqrshrns>; -def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.u16", - v8i8, v8i16, NEONvqrshrnu>; -def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.u32", - v4i16, v4i32, NEONvqrshrnu>; -def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.u64", - v2i32, v2i64, NEONvqrshrnu>; +def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; +def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; +def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; +def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; +def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; +def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, "vqrshrun.s16", - v8i8, v8i16, NEONvqrshrnsu>; -def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, "vqrshrun.s32", - v4i16, v4i32, NEONvqrshrnsu>; -def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, "vqrshrun.s64", - v2i32, v2i64, NEONvqrshrnsu>; +def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; +def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; +def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; @@ -1230,15 +2166,19 @@ defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; // Vector Absolute and Saturating Absolute. // VABS : Vector Absolute Value -defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, "vabs.s", +defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, + IIC_VUNAiD, IIC_VUNAiQ, "vabs.s", int_arm_neon_vabs>; -def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", - v2f32, v2f32, int_arm_neon_vabsf>; -def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32", - v4f32, v4f32, int_arm_neon_vabsf>; +def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + IIC_VUNAD, "vabs.f32", + v2f32, v2f32, int_arm_neon_vabs>; +def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + IIC_VUNAQ, "vabs.f32", + v4f32, v4f32, int_arm_neon_vabs>; // VQABS : Vector Saturating Absolute Value -defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s", +defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, + IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s", int_arm_neon_vqabs>; // Vector Negate. @@ -1248,11 +2188,11 @@ def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; class VNEGD size, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), - !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; class VNEGQ size, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), - !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; // VNEG : Vector Negate @@ -1265,10 +2205,12 @@ def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; // VNEG : Vector Negate (floating-point) def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR:$dst), (ins DPR:$src), "vneg.f32\t$dst, $src", "", + (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, + "vneg.f32\t$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, - (outs QPR:$dst), (ins QPR:$src), "vneg.f32\t$dst, $src", "", + (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, + "vneg.f32\t$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; @@ -1279,21 +2221,26 @@ def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>; def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate -defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, "vqneg.s", +defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, + IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s", int_arm_neon_vqneg>; // Vector Bit Counting Operations. // VCLS : Vector Count Leading Sign Bits -defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, "vcls.s", +defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, + IIC_VCNTiD, IIC_VCNTiQ, "vcls.s", int_arm_neon_vcls>; // VCLZ : Vector Count Leading Zeros -defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, "vclz.i", +defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, + IIC_VCNTiD, IIC_VCNTiQ, "vclz.i", int_arm_neon_vclz>; // VCNT : Vector Count One Bits -def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", +def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, + IIC_VCNTiD, "vcnt.8", v8i8, v8i8, int_arm_neon_vcnt>; -def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", +def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, + IIC_VCNTiQ, "vcnt.8", v16i8, v16i8, int_arm_neon_vcnt>; // Vector Move Operations. @@ -1301,9 +2248,9 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8", // VMOV : Vector Move (Register) def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - "vmov\t$dst, $src", "", []>; + IIC_VMOVD, "vmov\t$dst, $src", "", []>; def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - "vmov\t$dst, $src", "", []>; + IIC_VMOVD, "vmov\t$dst, $src", "", []>; // VMOV : Vector Move (Immediate) @@ -1343,146 +2290,188 @@ def vmovImm64 : PatLeaf<(build_vector), [{ // be encoded based on the immed values. def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "", + (ins i8imm:$SIMM), IIC_VMOVImm, + "vmov.i8\t$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "", + (ins i8imm:$SIMM), IIC_VMOVImm, + "vmov.i8\t$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "", + (ins i16imm:$SIMM), IIC_VMOVImm, + "vmov.i16\t$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "", + (ins i16imm:$SIMM), IIC_VMOVImm, + "vmov.i16\t$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "", + (ins i32imm:$SIMM), IIC_VMOVImm, + "vmov.i32\t$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "", + (ins i32imm:$SIMM), IIC_VMOVImm, + "vmov.i32\t$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "", + (ins i64imm:$SIMM), IIC_VMOVImm, + "vmov.i64\t$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "", + (ins i64imm:$SIMM), IIC_VMOVImm, + "vmov.i64\t$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; // VMOV : Vector Get Lane (move scalar to ARM core register) def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, - (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), - "vmov", ".s8\t$dst, $src[$lane]", + (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), + IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), imm:$lane))]>; def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, - (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), - "vmov", ".s16\t$dst, $src[$lane]", + (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), + IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), imm:$lane))]>; def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, - (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), - "vmov", ".u8\t$dst, $src[$lane]", + (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), + IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), imm:$lane))]>; def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, - (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), - "vmov", ".u16\t$dst, $src[$lane]", + (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), + IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), imm:$lane))]>; def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, - (outs GPR:$dst), (ins DPR:$src, i32imm:$lane), - "vmov", ".32\t$dst, $src[$lane]", + (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), + IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", [(set GPR:$dst, (extractelt (v2i32 DPR:$src), imm:$lane))]>; // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane), (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src, - (SubReg_i8_reg imm:$lane))), + (DSubReg_i8_reg imm:$lane))), (SubReg_i8_lane imm:$lane))>; def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane), (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src, - (SubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane))>; def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane), (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src, - (SubReg_i8_reg imm:$lane))), + (DSubReg_i8_reg imm:$lane))), (SubReg_i8_lane imm:$lane))>; def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane), (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src, - (SubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane))>; def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src, - (SubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane))>; +def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), + (EXTRACT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), + (SSubReg_f32_reg imm:$src2))>; +def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), + (EXTRACT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), + (SSubReg_f32_reg imm:$src2))>; //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), -// (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>; +// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), - (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>; + (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; // VMOV : Vector Set Lane (move ARM core register to scalar) let Constraints = "$src1 = $dst" in { def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), - (ins DPR:$src1, GPR:$src2, i32imm:$lane), - "vmov", ".8\t$dst[$lane], $src2", + (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), + IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), GPR:$src2, imm:$lane))]>; def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), - (ins DPR:$src1, GPR:$src2, i32imm:$lane), - "vmov", ".16\t$dst[$lane], $src2", + (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), + IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), GPR:$src2, imm:$lane))]>; def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), - (ins DPR:$src1, GPR:$src2, i32imm:$lane), - "vmov", ".32\t$dst[$lane], $src2", + (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), + IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), GPR:$src2, imm:$lane))]>; } def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), (v16i8 (INSERT_SUBREG QPR:$src1, (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1, - (SubReg_i8_reg imm:$lane))), + (DSubReg_i8_reg imm:$lane))), GPR:$src2, (SubReg_i8_lane imm:$lane)), - (SubReg_i8_reg imm:$lane)))>; + (DSubReg_i8_reg imm:$lane)))>; def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane), (v8i16 (INSERT_SUBREG QPR:$src1, (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1, - (SubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), GPR:$src2, (SubReg_i16_lane imm:$lane)), - (SubReg_i16_reg imm:$lane)))>; + (DSubReg_i16_reg imm:$lane)))>; def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), (v4i32 (INSERT_SUBREG QPR:$src1, (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1, - (SubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), GPR:$src2, (SubReg_i32_lane imm:$lane)), - (SubReg_i32_reg imm:$lane)))>; + (DSubReg_i32_reg imm:$lane)))>; + +def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), + (INSERT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), + SPR:$src2, (SSubReg_f32_reg imm:$src3))>; +def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), + (INSERT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), + SPR:$src2, (SSubReg_f32_reg imm:$src3))>; //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), -// (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>; +// (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), - (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>; + (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; + +def : Pat<(v2f32 (scalar_to_vector SPR:$src)), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; +def : Pat<(v2f64 (scalar_to_vector DPR:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>; +def : Pat<(v4f32 (scalar_to_vector SPR:$src)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>; + +def : Pat<(v8i8 (scalar_to_vector GPR:$src)), + (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; +def : Pat<(v4i16 (scalar_to_vector GPR:$src)), + (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; +def : Pat<(v2i32 (scalar_to_vector GPR:$src)), + (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>; + +def : Pat<(v16i8 (scalar_to_vector GPR:$src)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)), + arm_dsubreg_0)>; +def : Pat<(v8i16 (scalar_to_vector GPR:$src)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)), + arm_dsubreg_0)>; +def : Pat<(v4i32 (scalar_to_vector GPR:$src)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)), + arm_dsubreg_0)>; // VDUP : Vector Duplicate (from ARM core register to all elements) -def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast(N); - return SVOp->isSplat() && SVOp->getSplatIndex() == 0; -}]>; - class VDUPD opcod1, bits<2> opcod3, string asmSize, ValueType Ty> : NVDup; + IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), + [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; class VDUPQ opcod1, bits<2> opcod3, string asmSize, ValueType Ty> : NVDup; + IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), + [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; @@ -1492,45 +2481,28 @@ def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), - "vdup", ".32\t$dst, $src", - [(set DPR:$dst, (v2f32 (splat_lo - (scalar_to_vector - (f32 (bitconvert GPR:$src))), - undef)))]>; + IIC_VMOVIS, "vdup", ".32\t$dst, $src", + [(set DPR:$dst, (v2f32 (NEONvdup + (f32 (bitconvert GPR:$src)))))]>; def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), - "vdup", ".32\t$dst, $src", - [(set QPR:$dst, (v4f32 (splat_lo - (scalar_to_vector - (f32 (bitconvert GPR:$src))), - undef)))]>; + IIC_VMOVIS, "vdup", ".32\t$dst, $src", + [(set QPR:$dst, (v4f32 (NEONvdup + (f32 (bitconvert GPR:$src)))))]>; // VDUP : Vector Duplicate Lane (from scalar to all elements) -def SHUFFLE_get_splat_lane : SDNodeXForm(N); - return CurDAG->getTargetConstant(SVOp->getSplatIndex(), MVT::i32); -}]>; - -def splat_lane : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast(N); - return SVOp->isSplat(); -}], SHUFFLE_get_splat_lane>; - class VDUPLND op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, - (outs DPR:$dst), (ins DPR:$src, i32imm:$lane), + (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", - [(set DPR:$dst, (Ty (splat_lane:$lane DPR:$src, undef)))]>; + [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -// vector_shuffle requires that the source and destination types match, so -// VDUP to a 128-bit result uses a target-specific VDUPLANEQ node. class VDUPLNQ op19_18, bits<2> op17_16, string OpcodeStr, ValueType ResTy, ValueType OpTy> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, - (outs QPR:$dst), (ins DPR:$src, i32imm:$lane), + (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", - [(set QPR:$dst, (ResTy (NEONvduplaneq (OpTy DPR:$src), imm:$lane)))]>; + [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; @@ -1541,15 +2513,51 @@ def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; +def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), + (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i8_reg imm:$lane))), + (SubReg_i8_lane imm:$lane)))>; +def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)), + (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i16_reg imm:$lane))), + (SubReg_i16_lane imm:$lane)))>; +def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), + (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; +def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), + (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, + (DSubReg_i32_reg imm:$lane))), + (SubReg_i32_lane imm:$lane)))>; + +def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, + (outs DPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", + [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; + +def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, + (outs QPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", + [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; + +def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), + (INSERT_SUBREG QPR:$src, + (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), + (DSubReg_f64_other_reg imm:$lane))>; +def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), + (INSERT_SUBREG QPR:$src, + (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))), + (DSubReg_f64_other_reg imm:$lane))>; + // VMOVN : Vector Narrowing Move -defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i", +defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i", int_arm_neon_vmovn>; // VQMOVN : Vector Saturating Narrowing Move -defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, "vqmovn.s", +defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s", int_arm_neon_vqmovns>; -defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, "vqmovn.u", +defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", int_arm_neon_vqmovnu>; -defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, "vqmovun.s", +defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; @@ -1597,6 +2605,247 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +// Vector Reverse. + +// VREV64 : Vector Reverse elements within 64-bit doublewords + +class VREV64D op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), + (ins DPR:$src), IIC_VMOVD, + !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; +class VREV64Q op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), + (ins QPR:$src), IIC_VMOVD, + !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; + +def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>; +def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>; +def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>; +def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>; + +def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>; +def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>; +def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>; +def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; + +// VREV32 : Vector Reverse elements within 32-bit words + +class VREV32D op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), + (ins DPR:$src), IIC_VMOVD, + !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; +class VREV32Q op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), + (ins QPR:$src), IIC_VMOVD, + !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; + +def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>; +def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>; + +def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>; +def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; + +// VREV16 : Vector Reverse elements within 16-bit halfwords + +class VREV16D op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), + (ins DPR:$src), IIC_VMOVD, + !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; +class VREV16Q op19_18, string OpcodeStr, ValueType Ty> + : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), + (ins QPR:$src), IIC_VMOVD, + !strconcat(OpcodeStr, "\t$dst, $src"), "", + [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; + +def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; +def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; + +// Other Vector Shuffles. + +// VEXT : Vector Extract + +class VEXTd + : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, + !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", + [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), + (Ty DPR:$rhs), imm:$index)))]>; + +class VEXTq + : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, + !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", + [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), + (Ty QPR:$rhs), imm:$index)))]>; + +def VEXTd8 : VEXTd<"vext.8", v8i8>; +def VEXTd16 : VEXTd<"vext.16", v4i16>; +def VEXTd32 : VEXTd<"vext.32", v2i32>; +def VEXTdf : VEXTd<"vext.32", v2f32>; + +def VEXTq8 : VEXTq<"vext.8", v16i8>; +def VEXTq16 : VEXTq<"vext.16", v8i16>; +def VEXTq32 : VEXTq<"vext.32", v4i32>; +def VEXTqf : VEXTq<"vext.32", v4f32>; + +// VTRN : Vector Transpose + +def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; +def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">; +def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">; + +def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">; +def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">; +def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">; + +// VUZP : Vector Unzip (Deinterleave) + +def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">; +def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">; +def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">; + +def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">; +def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">; +def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">; + +// VZIP : Vector Zip (Interleave) + +def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">; +def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">; +def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">; + +def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">; +def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">; +def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; + +// Vector Table Lookup and Table Extension. + +// VTBL : Vector Table Lookup +def VTBL1 + : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), + (ins DPR:$tbl1, DPR:$src), IIC_VTB1, + "vtbl.8\t$dst, \\{$tbl1\\}, $src", "", + [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; +let hasExtraSrcRegAllocReq = 1 in { +def VTBL2 + : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), + (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, + "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "", + [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 + DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; +def VTBL3 + : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, + "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", + [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 + DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; +def VTBL4 + : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, + "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", + [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, + DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; +} // hasExtraSrcRegAllocReq = 1 + +// VTBX : Vector Table Extension +def VTBX1 + : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), + (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, + "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", + [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 + DPR:$orig, DPR:$tbl1, DPR:$src)))]>; +let hasExtraSrcRegAllocReq = 1 in { +def VTBX2 + : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, + "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", + [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 + DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; +def VTBX3 + : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, + "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", + [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, + DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; +def VTBX4 + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, + DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, + "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", + [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, + DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; +} // hasExtraSrcRegAllocReq = 1 + +//===----------------------------------------------------------------------===// +// NEON instructions for single-precision FP math +//===----------------------------------------------------------------------===// + +// These need separate instructions because they must use DPR_VFP2 register +// class which have SPR sub-registers. + +// Vector Add Operations used for single-precision FP +let neverHasSideEffects = 1 in +def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>; +def : N3VDsPat; + +// Vector Sub Operations used for single-precision FP +let neverHasSideEffects = 1 in +def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; +def : N3VDsPat; + +// Vector Multiply Operations used for single-precision FP +let neverHasSideEffects = 1 in +def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; +def : N3VDsPat; + +// Vector Multiply-Accumulate/Subtract used for single-precision FP +let neverHasSideEffects = 1 in +def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; +def : N3VDMulOpsPat; + +let neverHasSideEffects = 1 in +def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; +def : N3VDMulOpsPat; + +// Vector Absolute used for single-precision FP +let neverHasSideEffects = 1 in +def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, + IIC_VUNAD, "vabs.f32", + v2f32, v2f32, int_arm_neon_vabs>; +def : N2VDIntsPat; + +// Vector Negate used for single-precision FP +let neverHasSideEffects = 1 in +def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, + "vneg.f32\t$dst, $src", "", []>; +def : N2VDIntsPat; + +// Vector Convert between single-precision FP and integer +let neverHasSideEffects = 1 in +def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", + v2i32, v2f32, fp_to_sint>; +def : N2VDsPat; + +let neverHasSideEffects = 1 in +def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", + v2i32, v2f32, fp_to_uint>; +def : N2VDsPat; + +let neverHasSideEffects = 1 in +def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", + v2f32, v2i32, sint_to_fp>; +def : N2VDsPat; + +let neverHasSideEffects = 1 in +def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", + v2f32, v2i32, uint_to_fp>; +def : N2VDsPat; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 904d9b1d52737..9816addf7d6ac 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -117,86 +117,150 @@ def t_addrmode_sp : Operand, let Defs = [SP], Uses = [SP] in { def tADJCALLSTACKUP : -PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), +PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, "@ tADJCALLSTACKUP $amt1", - [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb]>; + [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>; def tADJCALLSTACKDOWN : -PseudoInst<(outs), (ins i32imm:$amt), +PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, "@ tADJCALLSTACKDOWN $amt", - [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb]>; + [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; } +// For both thumb1 and thumb2. let isNotDuplicable = 1 in -def tPICADD : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp), - "$cp:\n\tadd $dst, pc", - [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>; +def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, + "\n$cp:\n\tadd $dst, pc", + [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>; // PC relative add. -def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), +def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALUi, "add $dst, pc, $rhs * 4", []>; // ADD rd, sp, #imm8 -// FIXME: hard code sp? -def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), - "add $dst, $sp, $rhs * 4 @ addrspi", []>; +def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALUi, + "add $dst, $sp, $rhs * 4", []>; // ADD sp, sp, #imm7 -// FIXME: hard code sp? -def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), +def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi, "add $dst, $rhs * 4", []>; -// FIXME: Make use of the following? -// ADD rm, sp, rm +// SUB sp, sp, #imm7 +def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi, + "sub $dst, $rhs * 4", []>; + +// ADD rm, sp +def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + "add $dst, $rhs", []>; + // ADD sp, rm +def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + "add $dst, $rhs", []>; + +// Pseudo instruction that will expand into a tSUBspi + a copy. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + NoItinerary, "@ sub $dst, $rhs * 4", []>; + +def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), + NoItinerary, "@ add $dst, $rhs", []>; + +let Defs = [CPSR] in +def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + NoItinerary, "@ and $dst, $rhs", []>; +} // usesCustomDAGSchedInserter //===----------------------------------------------------------------------===// // Control Flow Instructions. // -let isReturn = 1, isTerminator = 1 in { - def tBX_RET : TI<(outs), (ins), "bx lr", [(ARMretflag)]>; +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def tBX_RET : TI<(outs), (ins), IIC_Br, "bx lr", [(ARMretflag)]>; // Alternative return instruction used by vararg functions. - def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), "bx $target", []>; + def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx $target", []>; } // FIXME: remove when we have a way to marking a MI with these properties. -let isReturn = 1, isTerminator = 1 in -def tPOP_RET : TI<(outs reglist:$dst1, variable_ops), (ins), - "pop $dst1", []>; +let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, + hasExtraDefRegAllocReq = 1 in +def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, + "pop${p} $wb", []>; let isCall = 1, - Defs = [R0, R1, R2, R3, LR, - D0, D1, D2, D3, D4, D5, D6, D7] in { - def tBL : TIx2<(outs), (ins i32imm:$func, variable_ops), + Defs = [R0, R1, R2, R3, R12, LR, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { + // Also used for Thumb2 + def tBL : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, "bl ${func:call}", - [(ARMtcall tglobaladdr:$func)]>; - // ARMv5T and above - def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops), + [(ARMtcall tglobaladdr:$func)]>, + Requires<[IsThumb, IsNotDarwin]>; + + // ARMv5T and above, also used for Thumb2 + def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, "blx ${func:call}", - [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>; - def tBLXr : TI<(outs), (ins tGPR:$func, variable_ops), + [(ARMcall tglobaladdr:$func)]>, + Requires<[IsThumb, HasV5T, IsNotDarwin]>; + + // Also used for Thumb2 + def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, + "blx $func", + [(ARMtcall GPR:$func)]>, + Requires<[IsThumb, HasV5T, IsNotDarwin]>; + + // ARMv4T + def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, + "mov lr, pc\n\tbx $func", + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsThumb1Only, IsNotDarwin]>; +} + +// On Darwin R9 is call-clobbered. +let isCall = 1, + Defs = [R0, R1, R2, R3, R9, R12, LR, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { + // Also used for Thumb2 + def tBLr9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, + "bl ${func:call}", + [(ARMtcall tglobaladdr:$func)]>, + Requires<[IsThumb, IsDarwin]>; + + // ARMv5T and above, also used for Thumb2 + def tBLXi_r9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, + "blx ${func:call}", + [(ARMcall tglobaladdr:$func)]>, + Requires<[IsThumb, HasV5T, IsDarwin]>; + + // Also used for Thumb2 + def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, "blx $func", - [(ARMtcall tGPR:$func)]>, Requires<[HasV5T]>; + [(ARMtcall GPR:$func)]>, + Requires<[IsThumb, HasV5T, IsDarwin]>; + // ARMv4T - def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops), - "cpy lr, pc\n\tbx $func", - [(ARMcall_nolink tGPR:$func)]>; + def tBXr9 : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, + "mov lr, pc\n\tbx $func", + [(ARMcall_nolink tGPR:$func)]>, + Requires<[IsThumb1Only, IsDarwin]>; } let isBranch = 1, isTerminator = 1 in { let isBarrier = 1 in { let isPredicable = 1 in - def tB : T1I<(outs), (ins brtarget:$target), "b $target", - [(br bb:$target)]>; + def tB : T1I<(outs), (ins brtarget:$target), IIC_Br, + "b $target", [(br bb:$target)]>; // Far jump - def tBfar : T1Ix2<(outs), (ins brtarget:$target), + let Defs = [LR] in + def tBfar : TIx2<(outs), (ins brtarget:$target), IIC_Br, "bl $target\t@ far jump",[]>; def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), - "cpy pc, $target \n\t.align\t2\n$jt", + IIC_Br, "mov pc, $target\n\t.align\t2\n$jt", [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>; } } @@ -204,7 +268,8 @@ let isBranch = 1, isTerminator = 1 in { // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( let isBranch = 1, isTerminator = 1 in - def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target", + def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br, + "b$cc $target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>; //===----------------------------------------------------------------------===// @@ -212,384 +277,363 @@ let isBranch = 1, isTerminator = 1 in // let canFoldAsLoad = 1 in -def tLDR : T1I4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), - "ldr $dst, $addr", +def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, + "ldr", " $dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; -def tLDRB : T1I1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), - "ldrb $dst, $addr", +def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, + "ldrb", " $dst, $addr", [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>; -def tLDRH : T1I2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), - "ldrh $dst, $addr", +def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, + "ldrh", " $dst, $addr", [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>; -def tLDRSB : T1I1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), - "ldrsb $dst, $addr", +let AddedComplexity = 10 in +def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, + "ldrsb", " $dst, $addr", [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; -def tLDRSH : T1I2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), - "ldrsh $dst, $addr", +let AddedComplexity = 10 in +def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, + "ldrsh", " $dst, $addr", [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; let canFoldAsLoad = 1 in -def tLDRspi : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), - "ldr $dst, $addr", +def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, + "ldr", " $dst, $addr", [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>; // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). let canFoldAsLoad = 1, mayLoad = 1 in -def tRestore : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), - "ldr $dst, $addr", []>; +def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, + "ldr", " $dst, $addr", []>; // Load tconstpool let canFoldAsLoad = 1 in -def tLDRpci : T1Is<(outs tGPR:$dst), (ins i32imm:$addr), - "ldr $dst, $addr", +def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, + "ldr", " $dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in -def tLDRcp : T1Is<(outs tGPR:$dst), (ins i32imm:$addr), - "ldr $dst, $addr", []>; +def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, + "ldr", " $dst, $addr", []>; -def tSTR : T1I4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), - "str $src, $addr", +def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, + "str", " $src, $addr", [(store tGPR:$src, t_addrmode_s4:$addr)]>; -def tSTRB : T1I1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), - "strb $src, $addr", +def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, + "strb", " $src, $addr", [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>; -def tSTRH : T1I2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), - "strh $src, $addr", +def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, + "strh", " $src, $addr", [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>; -def tSTRspi : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), - "str $src, $addr", +def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, + "str", " $src, $addr", [(store tGPR:$src, t_addrmode_sp:$addr)]>; let mayStore = 1 in { // Special instruction for spill. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). -def tSpill : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), - "str $src, $addr", []>; +def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, + "str", " $src, $addr", []>; } //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // -// TODO: A7-44: LDMIA - load multiple +// These requires base address to be written back or one of the loaded regs. +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in +def tLDM : T1I<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + IIC_iLoadm, + "ldm${addr:submode}${p} $addr, $wb", []>; -let mayLoad = 1 in -def tPOP : TI<(outs reglist:$dst1, variable_ops), (ins), - "pop $dst1", []>; +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in +def tSTM : T1I<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + IIC_iStorem, + "stm${addr:submode}${p} $addr, $wb", []>; -let mayStore = 1 in -def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops), - "push $src1", []>; +let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in +def tPOP : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, + "pop${p} $wb", []>; + +let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in +def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, + "push${p} $wb", []>; //===----------------------------------------------------------------------===// // Arithmetic Instructions. // // Add with carry register -let isCommutable = 1, Defs = [CPSR], Uses = [CPSR] in -def tADCS : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "adc $dst, $rhs", - [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>; +let isCommutable = 1, Uses = [CPSR] in +def tADC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, + "adc", " $dst, $rhs", + [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>; // Add immediate -let Defs = [CPSR] in { -def tADDi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "add $dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>; -def tADDSi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "add $dst, $lhs, $rhs", - [(set tGPR:$dst, (addc tGPR:$lhs, imm0_7:$rhs))]>; -} +def tADDi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, + "add", " $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>; -let Defs = [CPSR] in { -def tADDi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "add $dst, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>; -def tADDSi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "add $dst, $rhs", - [(set tGPR:$dst, (addc tGPR:$lhs, imm8_255:$rhs))]>; -} +def tADDi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, + "add", " $dst, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>; // Add register -let isCommutable = 1, Defs = [CPSR] in { -def tADDrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "add $dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; -def tADDSrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "add $dst, $lhs, $rhs", - [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>; -} +let isCommutable = 1 in +def tADDrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, + "add", " $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; let neverHasSideEffects = 1 in -def tADDhirr : T1It<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs), - "add $dst, $rhs @ addhirr", []>; +def tADDhirr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + "add", " $dst, $rhs", []>; // And register -let isCommutable = 1, Defs = [CPSR] in -def tAND : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "and $dst, $rhs", - [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>; +let isCommutable = 1 in +def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, + "and", " $dst, $rhs", + [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>; // ASR immediate -let Defs = [CPSR] in -def tASRri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "asr $dst, $lhs, $rhs", - [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>; +def tASRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, + "asr", " $dst, $lhs, $rhs", + [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>; // ASR register -let Defs = [CPSR] in -def tASRrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "asr $dst, $rhs", - [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>; +def tASRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, + "asr", " $dst, $rhs", + [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>; // BIC register -let Defs = [CPSR] in -def tBIC : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "bic $dst, $rhs", - [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>; +def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, + "bic", " $dst, $rhs", + [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>; // CMN register let Defs = [CPSR] in { -def tCMN : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), - "cmn $lhs, $rhs", - [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; -def tCMNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), - "cmn $lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>; +def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, + "cmn", " $lhs, $rhs", + [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; +def tCMNZ : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, + "cmn", " $lhs, $rhs", + [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>; } // CMP immediate let Defs = [CPSR] in { -def tCMPi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs), - "cmp $lhs, $rhs", - [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>; -def tCMPZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs), - "cmp $lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>; +def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, + "cmp", " $lhs, $rhs", + [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>; +def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, + "cmp", " $lhs, $rhs", + [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>; } // CMP register let Defs = [CPSR] in { -def tCMPr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), - "cmp $lhs, $rhs", - [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>; -def tCMPZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), - "cmp $lhs, $rhs", - [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>; +def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, + "cmp", " $lhs, $rhs", + [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>; +def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, + "cmp", " $lhs, $rhs", + [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>; + +def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, + "cmp", " $lhs, $rhs", []>; +def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, + "cmp", " $lhs, $rhs", []>; } -// TODO: A7-37: CMP(3) - cmp hi regs // XOR register -let isCommutable = 1, Defs = [CPSR] in -def tEOR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "eor $dst, $rhs", - [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>; +let isCommutable = 1 in +def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, + "eor", " $dst, $rhs", + [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>; // LSL immediate -let Defs = [CPSR] in -def tLSLri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "lsl $dst, $lhs, $rhs", - [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>; +def tLSLri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, + "lsl", " $dst, $lhs, $rhs", + [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>; // LSL register -let Defs = [CPSR] in -def tLSLrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "lsl $dst, $rhs", - [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>; +def tLSLrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, + "lsl", " $dst, $rhs", + [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>; // LSR immediate -let Defs = [CPSR] in -def tLSRri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "lsr $dst, $lhs, $rhs", - [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>; +def tLSRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, + "lsr", " $dst, $lhs, $rhs", + [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>; // LSR register -let Defs = [CPSR] in -def tLSRrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "lsr $dst, $rhs", - [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>; +def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, + "lsr", " $dst, $rhs", + [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>; // move register -let Defs = [CPSR] in -def tMOVi8 : T1I<(outs tGPR:$dst), (ins i32imm:$src), - "mov $dst, $src", - [(set tGPR:$dst, imm0_255:$src)]>; +def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi, + "mov", " $dst, $src", + [(set tGPR:$dst, imm0_255:$src)]>; // TODO: A7-73: MOV(2) - mov setting flag. -// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy', -// which is MOV(3). This also supports high registers. let neverHasSideEffects = 1 in { -def tMOVr : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "cpy $dst, $src", []>; -def tMOVhir2lor : T1I<(outs tGPR:$dst), (ins GPR:$src), - "cpy $dst, $src\t@ hir2lor", []>; -def tMOVlor2hir : T1I<(outs GPR:$dst), (ins tGPR:$src), - "cpy $dst, $src\t@ lor2hir", []>; -def tMOVhir2hir : T1I<(outs GPR:$dst), (ins GPR:$src), - "cpy $dst, $src\t@ hir2hir", []>; +// FIXME: Make this predicable. +def tMOVr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, + "mov $dst, $src", []>; +let Defs = [CPSR] in +def tMOVSr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, + "movs $dst, $src", []>; + +// FIXME: Make these predicable. +def tMOVgpr2tgpr : T1I<(outs tGPR:$dst), (ins GPR:$src), IIC_iMOVr, + "mov $dst, $src", []>; +def tMOVtgpr2gpr : T1I<(outs GPR:$dst), (ins tGPR:$src), IIC_iMOVr, + "mov $dst, $src", []>; +def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + "mov $dst, $src", []>; } // neverHasSideEffects // multiply register -let isCommutable = 1, Defs = [CPSR] in -def tMUL : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "mul $dst, $rhs", - [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; +let isCommutable = 1 in +def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, + "mul", " $dst, $rhs", + [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; // move inverse register -let Defs = [CPSR] in -def tMVN : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "mvn $dst, $src", - [(set tGPR:$dst, (not tGPR:$src))]>; - -// negate register -let Defs = [CPSR] in -def tNEG : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "neg $dst, $src", - [(set tGPR:$dst, (ineg tGPR:$src))]>; +def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, + "mvn", " $dst, $src", + [(set tGPR:$dst, (not tGPR:$src))]>; // bitwise or register -let isCommutable = 1, Defs = [CPSR] in -def tORR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "orr $dst, $rhs", - [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>; +let isCommutable = 1 in +def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, + "orr", " $dst, $rhs", + [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>; // swaps -def tREV : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "rev $dst, $src", - [(set tGPR:$dst, (bswap tGPR:$src))]>, - Requires<[IsThumb, HasV6]>; - -def tREV16 : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "rev16 $dst, $src", - [(set tGPR:$dst, - (or (and (srl tGPR:$src, (i32 8)), 0xFF), - (or (and (shl tGPR:$src, (i32 8)), 0xFF00), - (or (and (srl tGPR:$src, (i32 8)), 0xFF0000), - (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>, - Requires<[IsThumb, HasV6]>; - -def tREVSH : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "revsh $dst, $src", - [(set tGPR:$dst, - (sext_inreg - (or (srl (and tGPR:$src, 0xFFFF), (i32 8)), - (shl tGPR:$src, (i32 8))), i16))]>, - Requires<[IsThumb, HasV6]>; +def tREV : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, + "rev", " $dst, $src", + [(set tGPR:$dst, (bswap tGPR:$src))]>, + Requires<[IsThumb1Only, HasV6]>; + +def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, + "rev16", " $dst, $src", + [(set tGPR:$dst, + (or (and (srl tGPR:$src, (i32 8)), 0xFF), + (or (and (shl tGPR:$src, (i32 8)), 0xFF00), + (or (and (srl tGPR:$src, (i32 8)), 0xFF0000), + (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>, + Requires<[IsThumb1Only, HasV6]>; + +def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, + "revsh", " $dst, $src", + [(set tGPR:$dst, + (sext_inreg + (or (srl (and tGPR:$src, 0xFF00), (i32 8)), + (shl tGPR:$src, (i32 8))), i16))]>, + Requires<[IsThumb1Only, HasV6]>; // rotate right register -let Defs = [CPSR] in -def tROR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "ror $dst, $rhs", - [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>; +def tROR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, + "ror", " $dst, $rhs", + [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>; + +// negate register +def tRSB : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALUi, + "rsb", " $dst, $src, #0", + [(set tGPR:$dst, (ineg tGPR:$src))]>; // Subtract with carry register -let Defs = [CPSR], Uses = [CPSR] in -def tSBCS : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "sbc $dst, $rhs", - [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>; +let Uses = [CPSR] in +def tSBC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, + "sbc", " $dst, $rhs", + [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>; // Subtract immediate -let Defs = [CPSR] in { -def tSUBi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "sub $dst, $lhs, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>; -def tSUBSi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "sub $dst, $lhs, $rhs", - [(set tGPR:$dst, (addc tGPR:$lhs, imm0_7_neg:$rhs))]>; -} +def tSUBi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, + "sub", " $dst, $lhs, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>; -let Defs = [CPSR] in { -def tSUBi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "sub $dst, $rhs", - [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>; -def tSUBSi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "sub $dst, $rhs", - [(set tGPR:$dst, (addc tGPR:$lhs, imm8_255_neg:$rhs))]>; -} +def tSUBi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, + "sub", " $dst, $rhs", + [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>; // subtract register -let Defs = [CPSR] in { -def tSUBrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "sub $dst, $lhs, $rhs", - [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>; -def tSUBSrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - "sub $dst, $lhs, $rhs", - [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>; -} +def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, + "sub", " $dst, $lhs, $rhs", + [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>; // TODO: A7-96: STMIA - store multiple. -def tSUBspi : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), - "sub $dst, $rhs * 4", []>; - // sign-extend byte -def tSXTB : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "sxtb $dst, $src", - [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>, - Requires<[IsThumb, HasV6]>; +def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, + "sxtb", " $dst, $src", + [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>, + Requires<[IsThumb1Only, HasV6]>; // sign-extend short -def tSXTH : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "sxth $dst, $src", - [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>, - Requires<[IsThumb, HasV6]>; +def tSXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, + "sxth", " $dst, $src", + [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>, + Requires<[IsThumb1Only, HasV6]>; // test let isCommutable = 1, Defs = [CPSR] in -def tTST : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs), - "tst $lhs, $rhs", - [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>; +def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, + "tst", " $lhs, $rhs", + [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>; // zero-extend byte -def tUXTB : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "uxtb $dst, $src", - [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>, - Requires<[IsThumb, HasV6]>; +def tUXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, + "uxtb", " $dst, $src", + [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>, + Requires<[IsThumb1Only, HasV6]>; // zero-extend short -def tUXTH : T1I<(outs tGPR:$dst), (ins tGPR:$src), - "uxth $dst, $src", - [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>, - Requires<[IsThumb, HasV6]>; +def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, + "uxth", " $dst, $src", + [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>, + Requires<[IsThumb1Only, HasV6]>; // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation. // Expanded by the scheduler into a branch sequence. let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler. - def tMOVCCr : + def tMOVCCr_pseudo : PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), - "@ tMOVCCr $cc", - [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; + NoItinerary, "@ tMOVCCr $cc", + [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; + + +// 16-bit movcc in IT blocks for Thumb2. +def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, + "mov", " $dst, $rhs", []>; + +def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi, + "mov", " $dst, $rhs", []>; // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. -def tLEApcrel : TIx2<(outs tGPR:$dst), (ins i32imm:$label), - !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", - "${:private}PCRELL${:uid}+4))\n"), - !strconcat("\tmov $dst, #PCRELV${:uid}\n", - "${:private}PCRELL${:uid}:\n\tadd $dst, pc")), - []>; - -def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id), - !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", - "${:private}PCRELL${:uid}+4))\n"), - !strconcat("\tmov $dst, #PCRELV${:uid}\n", - "${:private}PCRELL${:uid}:\n\tadd $dst, pc")), - []>; +def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, + "adr$p $dst, #$label", []>; + +def tLEApcrelJT : T1I<(outs tGPR:$dst), + (ins i32imm:$label, nohash_imm:$id, pred:$p), + IIC_iALUi, "adr$p $dst, #${label}_${id}", []>; //===----------------------------------------------------------------------===// // TLS Instructions @@ -598,7 +642,7 @@ def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id), // __aeabi_read_tp preserves the registers r1-r3. let isCall = 1, Defs = [R0, LR] in { - def tTPsoft : TIx2<(outs), (ins), + def tTPsoft : TIx2<(outs), (ins), IIC_Br, "bl __aeabi_read_tp", [(set R0, ARMthread_pointer)]>; } @@ -607,20 +651,46 @@ let isCall = 1, // Non-Instruction Patterns // +// Add with carry +def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs), + (tADDi3 tGPR:$lhs, imm0_7:$rhs)>; +def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs), + (tADDi8 tGPR:$lhs, imm8_255:$rhs)>; +def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs), + (tADDrr tGPR:$lhs, tGPR:$rhs)>; + +// Subtract with carry +def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs), + (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>; +def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs), + (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>; +def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), + (tSUBrr tGPR:$lhs, tGPR:$rhs)>; + // ConstantPool, GlobalAddress -def : TPat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>; -def : TPat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; +def : T1Pat<(ARMWrapper tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>; +def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; // JumpTable -def : TPat<(ARMWrapperJT tjumptable:$dst, imm:$id), - (tLEApcrelJT tjumptable:$dst, imm:$id)>; +def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (tLEApcrelJT tjumptable:$dst, imm:$id)>; // Direct calls -def : TPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>; -def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>; +def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, + Requires<[IsThumb, IsNotDarwin]>; +def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>, + Requires<[IsThumb, IsDarwin]>; + +def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, + Requires<[IsThumb, HasV5T, IsNotDarwin]>; +def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>, + Requires<[IsThumb, HasV5T, IsDarwin]>; // Indirect calls to ARM routines -def : Tv5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>; +def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, + Requires<[IsThumb, HasV5T, IsNotDarwin]>; +def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>, + Requires<[IsThumb, HasV5T, IsDarwin]>; // zextload i1 -> zextload i8 def : T1Pat<(zextloadi1 t_addrmode_s1:$addr), @@ -631,6 +701,20 @@ def : T1Pat<(extloadi1 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; def : T1Pat<(extloadi8 t_addrmode_s1:$addr), (tLDRB t_addrmode_s1:$addr)>; def : T1Pat<(extloadi16 t_addrmode_s2:$addr), (tLDRH t_addrmode_s2:$addr)>; +// If it's impossible to use [r,r] address mode for sextload, select to +// ldr{b|h} + sxt{b|h} instead. +def : T1Pat<(sextloadi8 t_addrmode_s1:$addr), + (tSXTB (tLDRB t_addrmode_s1:$addr))>, + Requires<[IsThumb1Only, HasV6]>; +def : T1Pat<(sextloadi16 t_addrmode_s2:$addr), + (tSXTH (tLDRH t_addrmode_s2:$addr))>, + Requires<[IsThumb1Only, HasV6]>; + +def : T1Pat<(sextloadi8 t_addrmode_s1:$addr), + (tASRri (tLSLri (tLDRB t_addrmode_s1:$addr), 24), 24)>; +def : T1Pat<(sextloadi16 t_addrmode_s1:$addr), + (tASRri (tLSLri (tLDRH t_addrmode_s1:$addr), 16), 16)>; + // Large immediate handling. // Two piece imms. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 50345a68fddda..0750dcc7fdc4f 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -11,6 +11,21 @@ // //===----------------------------------------------------------------------===// +// IT block predicate field +def it_pred : Operand { + let PrintMethod = "printPredicateOperand"; +} + +// IT block condition mask +def it_mask : Operand { + let PrintMethod = "printThumbITMask"; +} + +// Table branch address +def tb_addrmode : Operand { + let PrintMethod = "printTBAddrMode"; +} + // Shifted operands. No register controlled shifts for Thumb2. // Note: We do not support rrx shifted operands yet. def t2_so_reg : Operand, // reg imm @@ -20,23 +35,14 @@ def t2_so_reg : Operand, // reg imm let MIOperandInfo = (ops GPR, i32imm); } -// t2_so_imm_XFORM - Return a t2_so_imm value packed into the format -// described for t2_so_imm def below. -def t2_so_imm_XFORM : SDNodeXFormgetTargetConstant( - ARM_AM::getT2SOImmVal(N->getZExtValue()), MVT::i32); -}]>; - // t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value def t2_so_imm_not_XFORM : SDNodeXFormgetTargetConstant( - ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())), MVT::i32); + return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32); }]>; // t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value def t2_so_imm_neg_XFORM : SDNodeXFormgetTargetConstant( - ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())), MVT::i32); + return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32); }]>; // t2_so_imm - Match a 32-bit immediate operand, which is an @@ -47,27 +53,21 @@ def t2_so_imm_neg_XFORM : SDNodeXForm, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; - }], t2_so_imm_XFORM> { - let PrintMethod = "printT2SOImmOperand"; -} + return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; +}]>; // t2_so_imm_not - Match an immediate that is a complement // of a t2_so_imm. def t2_so_imm_not : Operand, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1; - }], t2_so_imm_not_XFORM> { - let PrintMethod = "printT2SOImmOperand"; -} + return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1; +}], t2_so_imm_not_XFORM>; // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg : Operand, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1; - }], t2_so_imm_neg_XFORM> { - let PrintMethod = "printT2SOImmOperand"; -} + return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1; +}], t2_so_imm_neg_XFORM>; /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. def imm1_31 : PatLeaf<(i32 imm), [{ @@ -75,7 +75,8 @@ def imm1_31 : PatLeaf<(i32 imm), [{ }]>; /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095 : PatLeaf<(i32 imm), [{ +def imm0_4095 : Operand, + PatLeaf<(i32 imm), [{ return (uint32_t)N->getZExtValue() < 4096; }]>; @@ -83,48 +84,9 @@ def imm0_4095_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 4096; }], imm_neg_XFORM>; -/// imm0_65535 predicate - True if the 32-bit immediate is in the range -/// [0.65535]. -def imm0_65535 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 65536; -}]>; - -/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield -/// e.g., 0xf000ffff -def bf_inv_mask_imm : Operand, - PatLeaf<(imm), [{ - uint32_t v = (uint32_t)N->getZExtValue(); - if (v == 0xffffffff) - return 0; - // naive checker. should do better, but simple is best for now since it's - // more likely to be correct. - while (v & 1) v >>= 1; // shift off the leading 1's - if (v) - { - while (!(v & 1)) v >>=1; // shift off the mask - while (v & 1) v >>= 1; // shift off the trailing 1's - } - // if this is a mask for clearing a bitfield, what's left should be zero. - return (v == 0); -}] > { - let PrintMethod = "printBitfieldInvMaskImmOperand"; -} - -/// Split a 32-bit immediate into two 16 bit parts. -def t2_lo16 : SDNodeXFormgetTargetConstant((uint32_t)N->getZExtValue() & 0xffff, - MVT::i32); -}]>; - -def t2_hi16 : SDNodeXFormgetTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32); -}]>; - -def t2_lo16AllZero : PatLeaf<(i32 imm), [{ - // Returns true if all low 16-bits are 0. - return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; - }], t2_hi16>; - +def imm0_255_neg : PatLeaf<(i32 imm), [{ + return (uint32_t)(-N->getZExtValue()) < 255; +}], imm_neg_XFORM>; // Define Thumb2 specific addressing modes. @@ -147,14 +109,14 @@ def t2am_imm8_offset : Operand, let PrintMethod = "printT2AddrModeImm8OffsetOperand"; } -// t2addrmode_imm8s4 := reg + (imm8 << 2) +// t2addrmode_imm8s4 := reg +/- (imm8 << 2) def t2addrmode_imm8s4 : Operand, ComplexPattern { - let PrintMethod = "printT2AddrModeImm8Operand"; + let PrintMethod = "printT2AddrModeImm8s4Operand"; let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } -// t2addrmode_so_reg := reg + reg << imm2 +// t2addrmode_so_reg := reg + (reg << imm2) def t2addrmode_so_reg : Operand, ComplexPattern { let PrintMethod = "printT2AddrModeSoRegOperand"; @@ -171,52 +133,58 @@ def t2addrmode_so_reg : Operand, /// changed to modify CPSR. multiclass T2I_un_irs{ // shifted imm - def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), + def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, opc, " $dst, $src", [(set GPR:$dst, (opnode t2_so_imm:$src))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; } // register - def r : T2I<(outs GPR:$dst), (ins GPR:$src), - opc, " $dst, $src", + def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + opc, ".w $dst, $src", [(set GPR:$dst, (opnode GPR:$src))]>; // shifted register - def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), - opc, " $dst, $src", + def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, + opc, ".w $dst, $src", [(set GPR:$dst, (opnode t2_so_reg:$src))]>; } /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a // binary operation that produces a value. These are predicable and can be /// changed to modify CPSR. -multiclass T2I_bin_irs { +multiclass T2I_bin_irs { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), + def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, " $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - opc, " $dst, $lhs, $rhs", + def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + opc, !strconcat(wide, " $dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), - opc, " $dst, $lhs, $rhs", + def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + opc, !strconcat(wide, " $dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; } +/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need +// the ".w" prefix to indicate that they are wide. +multiclass T2I_bin_w_irs : + T2I_bin_irs; + /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are /// reversed. It doesn't define the 'rr' form since it's handled by its /// T2I_bin_irs counterpart. multiclass T2I_rbin_is { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), - opc, " $dst, $rhs, $lhs", + def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, + opc, ".w $dst, $rhs, $lhs", [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>; // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), + def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, opc, " $dst, $rhs, $lhs", [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>; } @@ -226,18 +194,18 @@ multiclass T2I_rbin_is { let Defs = [CPSR] in { multiclass T2I_bin_s_irs { // shifted imm - def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), - !strconcat(opc, "s"), " $dst, $lhs, $rhs", + def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + !strconcat(opc, "s"), ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; // register - def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - !strconcat(opc, "s"), " $dst, $lhs, $rhs", + def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + !strconcat(opc, "s"), ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; } // shifted register - def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), - !strconcat(opc, "s"), " $dst, $lhs, $rhs", + def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + !strconcat(opc, "s"), ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; } } @@ -246,22 +214,22 @@ multiclass T2I_bin_s_irs { /// patterns for a binary operation that produces a value. multiclass T2I_bin_ii12rs { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), - opc, " $dst, $lhs, $rhs", + def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, + opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; // 12-bit imm - def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, !strconcat(opc, "w"), " $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>; // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - opc, " $dst, $lhs, $rhs", + def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), - opc, " $dst, $lhs, $rhs", + def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; } @@ -271,41 +239,41 @@ multiclass T2I_bin_ii12rs { let Uses = [CPSR] in { multiclass T2I_adde_sube_irs { // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), + def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, opc, " $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2, CarryDefIsUnused]>; // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - opc, " $dst, $lhs, $rhs", + def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, Requires<[IsThumb2, CarryDefIsUnused]> { let isCommutable = Commutable; } // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), - opc, " $dst, $lhs, $rhs", + def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2, CarryDefIsUnused]>; // Carry setting variants // shifted imm - def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), + def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, !strconcat(opc, "s $dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; } // register - def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - !strconcat(opc, "s $dst, $lhs, $rhs"), + def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, + !strconcat(opc, "s.w $dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; let isCommutable = Commutable; } // shifted register - def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), - !strconcat(opc, "s $dst, $lhs, $rhs"), + def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, + !strconcat(opc, "s.w $dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; @@ -313,49 +281,17 @@ multiclass T2I_adde_sube_irs { } } -/// T2I_rsc_is - Same as T2I_adde_sube_irs except the order of operands are -/// reversed. It doesn't define the 'rr' form since it's handled by its -/// T2I_adde_sube_irs counterpart. -let Defs = [CPSR], Uses = [CPSR] in { -multiclass T2I_rsc_is { - // shifted imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), - opc, " $dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]>; - // shifted register - def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), - opc, " $dst, $rhs, $lhs", - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUnused]>; - // shifted imm - def Sri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), - !strconcat(opc, "s $dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; - } - // shifted register - def Srs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), - !strconcat(opc, "s $dst, $rhs, $lhs"), - [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>, - Requires<[IsThumb2, CarryDefIsUsed]> { - let Defs = [CPSR]; - } -} -} - -/// T2I_rbin_s_is - Same as T2I_bin_s_irs except the order of operands are -/// reversed. It doesn't define the 'rr' form since it's handled by its -/// T2I_bin_s_irs counterpart. +/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit. let Defs = [CPSR] in { multiclass T2I_rbin_s_is { // shifted imm def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s), - !strconcat(opc, "${s} $dst, $rhs, $lhs"), + IIC_iALUi, + !strconcat(opc, "${s}.w $dst, $rhs, $lhs"), [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>; // shifted register def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s), + IIC_iALUsi, !strconcat(opc, "${s} $dst, $rhs, $lhs"), [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>; } @@ -365,96 +301,96 @@ multiclass T2I_rbin_s_is { // rotate operation that produces a value. multiclass T2I_sh_ir { // 5-bit imm - def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), - opc, " $dst, $lhs, $rhs", + def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi, + opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]>; // register - def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - opc, " $dst, $lhs, $rhs", + def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr, + opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>; } -/// T21_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test +/// T2I_cmp_is - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test /// patterns. Similar to T2I_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. -let Uses = [CPSR] in { +let Defs = [CPSR] in { multiclass T2I_cmp_is { // shifted imm - def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), - opc, " $lhs, $rhs", + def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi, + opc, ".w $lhs, $rhs", [(opnode GPR:$lhs, t2_so_imm:$rhs)]>; // register - def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), - opc, " $lhs, $rhs", + def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, + opc, ".w $lhs, $rhs", [(opnode GPR:$lhs, GPR:$rhs)]>; // shifted register - def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), - opc, " $lhs, $rhs", + def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi, + opc, ".w $lhs, $rhs", [(opnode GPR:$lhs, t2_so_reg:$rhs)]>; } } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. multiclass T2I_ld { - def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), - opc, " $dst, $addr", + def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi, + opc, ".w $dst, $addr", [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]>; - def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), + def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, " $dst, $addr", [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]>; - def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), - opc, " $dst, $addr", + def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr, + opc, ".w $dst, $addr", [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]>; - def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), - opc, " $dst, $addr", + def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi, + opc, ".w $dst, $addr", [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]>; } /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. multiclass T2I_st { - def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), - opc, " $src, $addr", + def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei, + opc, ".w $src, $addr", [(opnode GPR:$src, t2addrmode_imm12:$addr)]>; - def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), + def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei, opc, " $src, $addr", [(opnode GPR:$src, t2addrmode_imm8:$addr)]>; - def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), - opc, " $src, $addr", + def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer, + opc, ".w $src, $addr", [(opnode GPR:$src, t2addrmode_so_reg:$addr)]>; } /// T2I_picld - Defines the PIC load pattern. class T2I_picld : - T2I<(outs GPR:$dst), (ins addrmodepc:$addr), - !strconcat("${addr:label}:\n\t", opc), " $dst, $addr", + T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi, + !strconcat("\n${addr:label}:\n\t", opc), " $dst, $addr", [(set GPR:$dst, (opnode addrmodepc:$addr))]>; /// T2I_picst - Defines the PIC store pattern. class T2I_picst : - T2I<(outs), (ins GPR:$src, addrmodepc:$addr), - !strconcat("${addr:label}:\n\t", opc), " $src, $addr", + T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer, + !strconcat("\n${addr:label}:\n\t", opc), " $src, $addr", [(opnode GPR:$src, addrmodepc:$addr)]>; /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_unary_rrot { - def r : T2I<(outs GPR:$dst), (ins GPR:$Src), - opc, " $dst, $Src", - [(set GPR:$dst, (opnode GPR:$Src))]>; - def r_rot : T2I<(outs GPR:$dst), (ins GPR:$Src, i32imm:$rot), - opc, " $dst, $Src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>; + def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + opc, ".w $dst, $src", + [(set GPR:$dst, (opnode GPR:$src))]>; + def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + opc, ".w $dst, $src, ror $rot", + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>; } /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_bin_rrot { - def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), + def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, opc, " $dst, $LHS, $RHS", [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>; def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), - opc, " $dst, $LHS, $RHS, ror $rot", + IIC_iALUsr, opc, " $dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)))]>; } @@ -467,42 +403,46 @@ multiclass T2I_bin_rrot { // Miscellaneous Instructions. // -let isNotDuplicable = 1 in -def t2PICADD : T2XI<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp), - "$cp:\n\tadd $dst, pc", - [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>; - - // LEApcrel - Load a pc-relative address into a register without offending the // assembler. -def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), - !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p $dst, pc, #PCRELV${:uid}")), - []>; +def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, + "adr$p.w $dst, #$label", []>; def t2LEApcrelJT : T2XI<(outs GPR:$dst), - (ins i32imm:$label, i32imm:$id, pred:$p), - !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p $dst, pc, #PCRELV${:uid}")), - []>; - -// ADD rd, sp, #so_imm -def t2ADDrSPi : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - "add $dst, $sp, $imm", - []>; - -// ADD rd, sp, #imm12 -def t2ADDrSPi12 : T2XI<(outs GPR:$dst), (ins GPR:$sp, i32imm:$imm), - "addw $dst, $sp, $imm", - []>; - -def t2ADDrSPs : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - "addw $dst, $sp, $rhs", - []>; + (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, + "adr$p.w $dst, #${label}_${id}", []>; + +// ADD r, sp, {so_imm|i12} +def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), + IIC_iALUi, "add", ".w $dst, $sp, $imm", []>; +def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), + IIC_iALUi, "addw", " $dst, $sp, $imm", []>; + +// ADD r, sp, so_reg +def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), + IIC_iALUsi, "add", ".w $dst, $sp, $rhs", []>; + +// SUB r, sp, {so_imm|i12} +def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), + IIC_iALUi, "sub", ".w $dst, $sp, $imm", []>; +def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), + IIC_iALUi, "subw", " $dst, $sp, $imm", []>; + +// SUB r, sp, so_reg +def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), + IIC_iALUsi, + "sub", " $dst, $sp, $rhs", []>; + + +// Pseudo instruction that will expand into a t2SUBrSPi + a copy. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), + NoItinerary, "@ sub.w $dst, $sp, $imm", []>; +def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), + NoItinerary, "@ subw $dst, $sp, $imm", []>; +def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), + NoItinerary, "@ sub $dst, $sp, $rhs", []>; +} // usesCustomDAGSchedInserter //===----------------------------------------------------------------------===// @@ -521,12 +461,14 @@ defm t2LDRB : T2I_ld<"ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; defm t2LDRSH : T2I_ld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; -let mayLoad = 1 in { +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword -def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst), (ins t2addrmode_imm8s4:$addr), - "ldrd", " $dst, $addr", []>; -def t2LDRDpci : T2Ii8s4<(outs GPR:$dst), (ins i32imm:$addr), - "ldrd", " $dst, $addr", []>; +def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2), + (ins t2addrmode_imm8s4:$addr), + IIC_iLoadi, "ldrd", " $dst1, $addr", []>; +def t2LDRDpci : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2), + (ins i32imm:$addr), IIC_iLoadi, + "ldrd", " $dst1, $addr", []>; } // zextload i1 -> zextload i8 @@ -573,57 +515,57 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), let mayLoad = 1 in { def t2LDR_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, + AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>; def t2LDR_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, + AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, + AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldrb", " $dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, + AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrb", " $dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, + AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldrh", " $dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, + AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrh", " $dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRSB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, + AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldrsb", " $dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRSB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, + AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrsb", " $dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRSH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), - AddrModeT2_i8, IndexModePre, + AddrModeT2_i8, IndexModePre, IIC_iLoadiu, "ldrsh", " $dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRSH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, + AddrModeT2_i8, IndexModePost, IIC_iLoadiu, "ldrsh", " $dst, [$base], $offset", "$base = $base_wb", []>; } @@ -634,108 +576,95 @@ defm t2STRB : T2I_st<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; defm t2STRH : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword -let mayLoad = 1 in -def t2STRDi8 : T2Ii8s4<(outs), (ins GPR:$src, t2addrmode_imm8s4:$addr), - "strd", " $src, $addr", []>; +let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in +def t2STRDi8 : T2Ii8s4<(outs), + (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr), + IIC_iStorer, "strd", " $src1, $addr", []>; // Indexed stores def t2STR_PRE : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, + AddrModeT2_i8, IndexModePre, IIC_iStoreiu, "str", " $src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, + AddrModeT2_i8, IndexModePost, IIC_iStoreiu, "str", " $src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRH_PRE : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, + AddrModeT2_i8, IndexModePre, IIC_iStoreiu, "strh", " $src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRH_POST : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, + AddrModeT2_i8, IndexModePost, IIC_iStoreiu, "strh", " $src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRB_PRE : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePre, + AddrModeT2_i8, IndexModePre, IIC_iStoreiu, "strb", " $src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), - AddrModeT2_i8, IndexModePost, + AddrModeT2_i8, IndexModePost, IIC_iStoreiu, "strb", " $src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; -// Address computation and loads and stores in PIC mode. -let isNotDuplicable = 1, AddedComplexity = 10 in { -let canFoldAsLoad = 1 in -def t2PICLDR : T2I_picld<"ldr", UnOpFrag<(load node:$Src)>>; - -def t2PICLDRH : T2I_picld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>; -def t2PICLDRB : T2I_picld<"ldrb", UnOpFrag<(zextloadi8 node:$Src)>>; -def t2PICLDRSH : T2I_picld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>; -def t2PICLDRSB : T2I_picld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>; - -def t2PICSTR : T2I_picst<"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; -def t2PICSTRH : T2I_picst<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; -def t2PICSTRB : T2I_picst<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; -} // isNotDuplicable = 1, AddedComplexity = 10 - +// FIXME: ldrd / strd pre / post variants //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // -let mayLoad = 1 in +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def t2LDM : T2XI<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops), - "ldm${p}${addr:submode} $addr, $dst1", []>; + (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide} $addr, $wb", []>; -let mayStore = 1 in +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def t2STM : T2XI<(outs), - (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops), - "stm${p}${addr:submode} $addr, $src1", []>; + (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + IIC_iStorem, "stm${addr:submode}${p}${addr:wide} $addr, $wb", []>; //===----------------------------------------------------------------------===// // Move Instructions. // let neverHasSideEffects = 1 in -def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), - "mov", " $dst, $src", []>; +def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, + "mov", ".w $dst, $src", []>; -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), - "mov", " $dst, $src", +// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. +let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in +def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, + "mov", ".w $dst, $src", [(set GPR:$dst, t2_so_imm:$src)]>; let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), +def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", " $dst, $src", [(set GPR:$dst, imm0_65535:$src)]>; -// FIXME: Also available in ARM mode. let Constraints = "$src = $dst" in -def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), - "movt", " $dst, $imm", - [(set GPR:$dst, - (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>; +def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, + "movt", " $dst, $imm", + [(set GPR:$dst, + (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>; //===----------------------------------------------------------------------===// // Extend Instructions. @@ -785,12 +714,14 @@ defm t2SUBS : T2I_bin_s_irs <"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm t2ADC : T2I_adde_sube_irs<"adc",BinOpFrag<(adde node:$LHS, node:$RHS)>,1>; defm t2SBC : T2I_adde_sube_irs<"sbc",BinOpFrag<(sube node:$LHS, node:$RHS)>>; -// RSB, RSC +// RSB defm t2RSB : T2I_rbin_is <"rsb", BinOpFrag<(sub node:$LHS, node:$RHS)>>; defm t2RSBS : T2I_rbin_s_is <"rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>; -defm t2RSC : T2I_rsc_is <"rsc", BinOpFrag<(sube node:$LHS, node:$RHS)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. +let AddedComplexity = 1 in +def : T2Pat<(add GPR:$src, imm0_255_neg:$imm), + (t2SUBri GPR:$src, imm0_255_neg:$imm)>; def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), @@ -806,105 +737,250 @@ defm t2LSR : T2I_sh_ir<"lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>; defm t2ASR : T2I_sh_ir<"asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<"ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; -def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), - "mov", " $dst, $src, rrx", +let Uses = [CPSR] in { +def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "rrx", " $dst, $src", [(set GPR:$dst, (ARMrrx GPR:$src))]>; +} + +let Defs = [CPSR] in { +def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "lsrs.w $dst, $src, #1", + [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>; +def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, + "asrs.w $dst, $src, #1", + [(set GPR:$dst, (ARMsra_flag GPR:$src))]>; +} //===----------------------------------------------------------------------===// // Bitwise Instructions. // -defm t2AND : T2I_bin_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; -defm t2ORR : T2I_bin_irs<"orr", BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; -defm t2EOR : T2I_bin_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; +defm t2AND : T2I_bin_w_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; +defm t2ORR : T2I_bin_w_irs<"orr", BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; +defm t2EOR : T2I_bin_w_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; -defm t2BIC : T2I_bin_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm t2BIC : T2I_bin_w_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; -def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm), - (t2BICri GPR:$src, t2_so_imm_not:$imm)>; +let Constraints = "$src = $dst" in +def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), + IIC_iALUi, "bfc", " $dst, $imm", + [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>; -defm t2ORN : T2I_bin_irs<"orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>>; +def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), + IIC_iALUi, "sbfx", " $dst, $src, $lsb, $width", []>; -def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm), - (t2ORNri GPR:$src, t2_so_imm_not:$imm)>; +def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), + IIC_iALUi, "ubfx", " $dst, $src, $lsb, $width", []>; + +// FIXME: A8.6.18 BFI - Bitfield insert (Encoding T1) + +defm t2ORN : T2I_bin_irs<"orn", BinOpFrag<(or node:$LHS, (not node:$RHS))>>; // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version let AddedComplexity = 1 in defm t2MVN : T2I_un_irs <"mvn", UnOpFrag<(not node:$Src)>, 1, 1>; -def : T2Pat<(t2_so_imm_not:$src), - (t2MVNi t2_so_imm_not:$src)>; -// A8.6.17 BFC - Bitfield clear -// FIXME: Also available in ARM mode. -let Constraints = "$src = $dst" in -def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), - "bfc", " $dst, $imm", - [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>; +def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm), + (t2BICri GPR:$src, t2_so_imm_not:$imm)>; -// FIXME: A8.6.18 BFI - Bitfield insert (Encoding T1) +// FIXME: Disable this pattern on Darwin to workaround an assembler bug. +def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm), + (t2ORNri GPR:$src, t2_so_imm_not:$imm)>, + Requires<[IsThumb2]>; + +def : T2Pat<(t2_so_imm_not:$src), + (t2MVNi t2_so_imm_not:$src)>; //===----------------------------------------------------------------------===// // Multiply Instructions. // let isCommutable = 1 in -def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), +def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, "mul", " $dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; -def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), +def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "mla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; -def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), +def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, "mls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>; -// FIXME: SMULL, etc. +// Extra precision multiplies with low / high results +let neverHasSideEffects = 1 in { +let isCommutable = 1 in { +def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, + "smull", " $ldst, $hdst, $a, $b", []>; + +def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, + "umull", " $ldst, $hdst, $a, $b", []>; +} + +// Multiply + accumulate +def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, + "smlal", " $ldst, $hdst, $a, $b", []>; + +def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, + "umlal", " $ldst, $hdst, $a, $b", []>; + +def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, + "umaal", " $ldst, $hdst, $a, $b", []>; +} // neverHasSideEffects + +// Most significant word multiply +def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + "smmul", " $dst, $a, $b", + [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>; + +def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, + "smmla", " $dst, $a, $b, $c", + [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>; + + +def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, + "smmls", " $dst, $a, $b, $c", + [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>; + +multiclass T2I_smul { + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + !strconcat(opc, "bb"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16)))]>; + + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + !strconcat(opc, "bt"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, (i32 16))))]>; + + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + !strconcat(opc, "tb"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), + (sext_inreg GPR:$b, i16)))]>; + + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, + !strconcat(opc, "tt"), " $dst, $a, $b", + [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), + (sra GPR:$b, (i32 16))))]>; + + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, + !strconcat(opc, "wb"), " $dst, $a, $b", + [(set GPR:$dst, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), (i32 16)))]>; + + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, + !strconcat(opc, "wt"), " $dst, $a, $b", + [(set GPR:$dst, (sra (opnode GPR:$a, + (sra GPR:$b, (i32 16))), (i32 16)))]>; +} + + +multiclass T2I_smla { + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, + (opnode (sext_inreg GPR:$a, i16), + (sext_inreg GPR:$b, i16))))]>; + + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), + (sra GPR:$b, (i32 16)))))]>; + + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), + (sext_inreg GPR:$b, i16))))]>; + + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), + (sra GPR:$b, (i32 16)))))]>; + + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sext_inreg GPR:$b, i16)), (i32 16))))]>; + + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, + !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, + (sra GPR:$b, (i32 16))), (i32 16))))]>; +} + +defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>; +defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; + +// TODO: Halfword multiple accumulate long: SMLAL +// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD + //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. // -def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src), +def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, "clz", " $dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>; -def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src), - "rev", " $dst, $src", +def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + "rev", ".w $dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>; -def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src), - "rev16", " $dst, $src", +def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + "rev16", ".w $dst, $src", [(set GPR:$dst, (or (and (srl GPR:$src, (i32 8)), 0xFF), (or (and (shl GPR:$src, (i32 8)), 0xFF00), (or (and (srl GPR:$src, (i32 8)), 0xFF0000), (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>; -///// -/// A8.6.137 REVSH -///// -def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src), - "revsh", " $dst, $src", +def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + "revsh", ".w $dst, $src", [(set GPR:$dst, (sext_inreg - (or (srl (and GPR:$src, 0xFFFF), (i32 8)), + (or (srl (and GPR:$src, 0xFF00), (i32 8)), (shl GPR:$src, (i32 8))), i16))]>; -// FIXME: PKHxx etc. +def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), + IIC_iALUsi, "pkhbt", " $dst, $src1, $src2, LSL $shamt", + [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), + (and (shl GPR:$src2, (i32 imm:$shamt)), + 0xFFFF0000)))]>; + +// Alternate cases for PKHBT where identities eliminate some nodes. +def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)), + (t2PKHBT GPR:$src1, GPR:$src2, 0)>; +def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), + (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; + +def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), + IIC_iALUsi, "pkhtb", " $dst, $src1, $src2, ASR $shamt", + [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), + (and (sra GPR:$src2, imm16_31:$shamt), + 0xFFFF)))]>; + +// Alternate cases for PKHTB where identities eliminate some nodes. Note that +// a shift amount of 0 is *not legal* here, it is PKHBT instead. +def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))), + (t2PKHTB GPR:$src1, GPR:$src2, 16)>; +def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), + (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)), + (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>; //===----------------------------------------------------------------------===// // Comparison Instructions... // -defm t2CMP : T2I_cmp_is<"cmp", - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; +defm t2CMP : T2I_cmp_is<"cmp", + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; defm t2CMPz : T2I_cmp_is<"cmp", BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>; -defm t2CMN : T2I_cmp_is<"cmn", - BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; +defm t2CMN : T2I_cmp_is<"cmn", + BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>; defm t2CMNz : T2I_cmp_is<"cmn", BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>; @@ -923,45 +999,132 @@ defm t2TEQ : T2I_cmp_is<"teq", // Short range conditional branch. Looks awesome for loops. Need to figure // out how to use this one. -// FIXME: Conditional moves + +// Conditional moves +// FIXME: should be able to write a pattern for ARMcmov, but can't use +// a two-value operand where a dag node expects two operands. :( +def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, + "mov", ".w $dst, $true", + [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $dst">; + +def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true), + IIC_iCMOVi, "mov", ".w $dst, $true", +[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $dst">; + +def t2MOVCClsl : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), + IIC_iCMOVsi, "lsl", ".w $dst, $true, $rhs", []>, + RegConstraint<"$false = $dst">; +def t2MOVCClsr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), + IIC_iCMOVsi, "lsr", ".w $dst, $true, $rhs", []>, + RegConstraint<"$false = $dst">; +def t2MOVCCasr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), + IIC_iCMOVsi, "asr", ".w $dst, $true, $rhs", []>, + RegConstraint<"$false = $dst">; +def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), + IIC_iCMOVsi, "ror", ".w $dst, $true, $rhs", []>, + RegConstraint<"$false = $dst">; + +//===----------------------------------------------------------------------===// +// TLS Instructions +// + +// __aeabi_read_tp preserves the registers r1-r3. +let isCall = 1, + Defs = [R0, R12, LR, CPSR] in { + def t2TPsoft : T2XI<(outs), (ins), IIC_Br, + "bl __aeabi_read_tp", + [(set R0, ARMthread_pointer)]>; +} + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling intrinsics +// eh_sjlj_setjmp() is an instruction sequence to store the return +// address and save #0 in R0 for the non-longjmp case. +// Since by its nature we may be coming from some other function to get +// here, and we're using the stack frame for the containing function to +// save/restore registers, we can't keep anything live in regs across +// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon +// when we get here from a longjmp(). We force everthing out of registers +// except for our own input by listing the relevant registers in Defs. By +// doing so, we also cause the prologue/epilogue code to actively preserve +// all of the callee-saved resgisters, which is exactly what we want. +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, + D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, + D31 ] in { + def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src), + AddrModeNone, SizeSpecial, NoItinerary, + "str.w sp, [$src, #+8] @ eh_setjmp begin\n" + "\tadr r12, 0f\n" + "\torr r12, #1\n" + "\tstr.w r12, [$src, #+4]\n" + "\tmovs r0, #0\n" + "\tb 1f\n" + "0:\tmovs r0, #1 @ eh_setjmp end\n" + "1:", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; +} + + //===----------------------------------------------------------------------===// // Control-Flow Instructions // +// FIXME: remove when we have a way to marking a MI with these properties. +// FIXME: $dst1 should be a def. But the extra ops must be in the end of the +// operand list. +// FIXME: Should pc be an implicit operand like PICADD, etc? +let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, + hasExtraDefRegAllocReq = 1 in + def t2LDM_RET : T2XI<(outs), + (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), + IIC_Br, "ldm${addr:submode}${p}${addr:wide} $addr, $wb", + []>; + let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in -def t2B : T2XI<(outs), (ins brtarget:$target), - "b $target", +def t2B : T2XI<(outs), (ins brtarget:$target), IIC_Br, + "b.w $target", [(br bb:$target)]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { -def t2BR_JTr : T2JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), - "mov pc, $target \n$jt", - [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; +def t2BR_JT : + T2JTI<(outs), + (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id), + IIC_Br, "mov pc, $target\n$jt", + [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>; -def t2BR_JTm : +// FIXME: Add a non-pc based case that can be predicated. +def t2TBB : T2JTI<(outs), - (ins t2addrmode_so_reg:$target, jtblock_operand:$jt, i32imm:$id), - "ldr pc, $target \n$jt", - [(ARMbrjt (i32 (load t2addrmode_so_reg:$target)), tjumptable:$jt, - imm:$id)]>; + (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), + IIC_Br, "tbb $index\n$jt", []>; -def t2BR_JTadd : +def t2TBH : T2JTI<(outs), - (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), - "add pc, $target, $idx \n$jt", - [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]>; -} // isNotDuplicate, isIndirectBranch + (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), + IIC_Br, "tbh $index\n$jt", []>; +} // isNotDuplicable, isIndirectBranch + } // isBranch, isTerminator, isBarrier // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( let isBranch = 1, isTerminator = 1 in -def t2Bcc : T2I<(outs), (ins brtarget:$target), - "b", " $target", +def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, + "b", ".w $target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>; + +// IT block +def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), + AddrModeNone, Size2Bytes, IIC_iALUx, + "it$mask $cc", "", []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -972,7 +1135,10 @@ def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), (t2LEApcrelJT tjumptable:$dst, imm:$id)>; -// Large immediate handling. - -def : T2Pat<(i32 imm:$src), - (t2MOVTi16 (t2MOVi16 (t2_lo16 imm:$src)), (t2_hi16 imm:$src))>; +// 32-bit immediate using movw + movt. +// This is a single pseudo instruction to make it re-materializable. Remove +// when we can do generalized remat. +let isReMaterializable = 1 in +def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, + "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", + [(set GPR:$dst, (i32 imm:$src))]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 9104c77115f0d..56336d131abc3 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -36,57 +36,57 @@ def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; let canFoldAsLoad = 1 in { def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), - "fldd", " $dst, $addr", + IIC_fpLoad64, "fldd", " $dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), - "flds", " $dst, $addr", + IIC_fpLoad32, "flds", " $dst, $addr", [(set SPR:$dst, (load addrmode5:$addr))]>; } // canFoldAsLoad def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), - "fstd", " $src, $addr", + IIC_fpStore64, "fstd", " $src, $addr", [(store DPR:$src, addrmode5:$addr)]>; def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), - "fsts", " $src, $addr", + IIC_fpStore32, "fsts", " $src, $addr", [(store SPR:$src, addrmode5:$addr)]>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // -let mayLoad = 1 in { -def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, - variable_ops), - "fldm${addr:submode}d${p} ${addr:base}, $dst1", +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, + variable_ops), IIC_fpLoadm, + "fldm${addr:submode}d${p} ${addr:base}, $wb", []> { let Inst{20} = 1; } -def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1, - variable_ops), - "fldm${addr:submode}s${p} ${addr:base}, $dst1", +def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, + variable_ops), IIC_fpLoadm, + "fldm${addr:submode}s${p} ${addr:base}, $wb", []> { let Inst{20} = 1; } -} +} // mayLoad, hasExtraDefRegAllocReq -let mayStore = 1 in { -def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, - variable_ops), - "fstm${addr:submode}d${p} ${addr:base}, $src1", +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, + variable_ops), IIC_fpStorem, + "fstm${addr:submode}d${p} ${addr:base}, $wb", []> { let Inst{20} = 0; } -def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, - variable_ops), - "fstm${addr:submode}s${p} ${addr:base}, $src1", +def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, + variable_ops), IIC_fpStorem, + "fstm${addr:submode}s${p} ${addr:base}, $wb", []> { let Inst{20} = 0; } -} // mayStore +} // mayStore, hasExtraSrcRegAllocReq // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores @@ -95,46 +95,48 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1, // def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - "faddd", " $dst, $a, $b", + IIC_fpALU64, "faddd", " $dst, $a, $b", [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; -def FADDS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - "fadds", " $dst, $a, $b", - [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; +def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpALU32, "fadds", " $dst, $a, $b", + [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; // These are encoded as unary instructions. +let Defs = [FPSCR] in { def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), - "fcmped", " $a, $b", + IIC_fpCMP64, "fcmped", " $a, $b", [(arm_cmpfp DPR:$a, DPR:$b)]>; def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), - "fcmpes", " $a, $b", + IIC_fpCMP32, "fcmpes", " $a, $b", [(arm_cmpfp SPR:$a, SPR:$b)]>; +} def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - "fdivd", " $dst, $a, $b", + IIC_fpDIV64, "fdivd", " $dst, $a, $b", [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - "fdivs", " $dst, $a, $b", + IIC_fpDIV32, "fdivs", " $dst, $a, $b", [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - "fmuld", " $dst, $a, $b", + IIC_fpMUL64, "fmuld", " $dst, $a, $b", [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; -def FMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - "fmuls", " $dst, $a, $b", - [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; +def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpMUL32, "fmuls", " $dst, $a, $b", + [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - "fnmuld", " $dst, $a, $b", + IIC_fpMUL64, "fnmuld", " $dst, $a, $b", [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { let Inst{6} = 1; } def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - "fnmuls", " $dst, $a, $b", + IIC_fpMUL32, "fnmuls", " $dst, $a, $b", [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { let Inst{6} = 1; } @@ -147,14 +149,14 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - "fsubd", " $dst, $a, $b", + IIC_fpALU64, "fsubd", " $dst, $a, $b", [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { let Inst{6} = 1; } -def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - "fsubs", " $dst, $a, $b", - [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { +def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpALU32, "fsubs", " $dst, $a, $b", + [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { let Inst{6} = 1; } @@ -163,29 +165,31 @@ def FSUBS : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), // def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), - "fabsd", " $dst, $a", + IIC_fpUNA64, "fabsd", " $dst, $a", [(set DPR:$dst, (fabs DPR:$a))]>; -def FABSS : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), - "fabss", " $dst, $a", - [(set SPR:$dst, (fabs SPR:$a))]>; +def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "fabss", " $dst, $a", + [(set SPR:$dst, (fabs SPR:$a))]>; +let Defs = [FPSCR] in { def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), - "fcmpezd", " $a", + IIC_fpCMP64, "fcmpezd", " $a", [(arm_cmpfp0 DPR:$a)]>; def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), - "fcmpezs", " $a", + IIC_fpCMP32, "fcmpezs", " $a", [(arm_cmpfp0 SPR:$a)]>; +} def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), - "fcvtds", " $dst, $a", + IIC_fpCVTDS, "fcvtds", " $dst, $a", [(set DPR:$dst, (fextend SPR:$a))]>; // Special case encoding: bits 11-8 is 0b1011. -def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, - "fcvtsd", " $dst, $a", - [(set SPR:$dst, (fround DPR:$a))]> { +def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, + IIC_fpCVTSD, "fcvtsd", " $dst, $a", + [(set SPR:$dst, (fround DPR:$a))]> { let Inst{27-23} = 0b11101; let Inst{21-16} = 0b110111; let Inst{11-8} = 0b1011; @@ -194,26 +198,26 @@ def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, let neverHasSideEffects = 1 in { def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), - "fcpyd", " $dst, $a", []>; + IIC_fpUNA64, "fcpyd", " $dst, $a", []>; def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), - "fcpys", " $dst, $a", []>; + IIC_fpUNA32, "fcpys", " $dst, $a", []>; } // neverHasSideEffects def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), - "fnegd", " $dst, $a", + IIC_fpUNA64, "fnegd", " $dst, $a", [(set DPR:$dst, (fneg DPR:$a))]>; -def FNEGS : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), - "fnegs", " $dst, $a", - [(set SPR:$dst, (fneg SPR:$a))]>; +def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "fnegs", " $dst, $a", + [(set SPR:$dst, (fneg SPR:$a))]>; def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), - "fsqrtd", " $dst, $a", + IIC_fpSQRT64, "fsqrtd", " $dst, $a", [(set DPR:$dst, (fsqrt DPR:$a))]>; def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), - "fsqrts", " $dst, $a", + IIC_fpSQRT32, "fsqrts", " $dst, $a", [(set SPR:$dst, (fsqrt SPR:$a))]>; //===----------------------------------------------------------------------===// @@ -221,16 +225,16 @@ def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), // def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - "fmrs", " $dst, $src", + IIC_VMOVSI, "fmrs", " $dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - "fmsr", " $dst, $src", + IIC_VMOVIS, "fmsr", " $dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; def FMRRD : AVConv3I<0b11000101, 0b1011, - (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src), - "fmrrd", " $dst1, $dst2, $src", + (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), + IIC_VMOVDI, "fmrrd", " $wb, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]>; // FMDHR: GPR -> SPR @@ -238,7 +242,7 @@ def FMRRD : AVConv3I<0b11000101, 0b1011, def FMDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - "fmdrr", " $dst, $src1, $src2", + IIC_VMOVID, "fmdrr", " $dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; // FMRDH: SPR -> GPR @@ -254,23 +258,23 @@ def FMDRR : AVConv5I<0b11000100, 0b1011, // Int to FP: def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - "fsitod", " $dst, $a", + IIC_fpCVTID, "fsitod", " $dst, $a", [(set DPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } -def FSITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), - "fsitos", " $dst, $a", +def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), + IIC_fpCVTIS, "fsitos", " $dst, $a", [(set SPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - "fuitod", " $dst, $a", + IIC_fpCVTID, "fuitod", " $dst, $a", [(set DPR:$dst, (arm_uitof SPR:$a))]>; -def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), - "fuitos", " $dst, $a", +def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), + IIC_fpCVTIS, "fuitos", " $dst, $a", [(set SPR:$dst, (arm_uitof SPR:$a))]>; // FP to Int: @@ -278,28 +282,28 @@ def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a), def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), - "ftosizd", " $dst, $a", + IIC_fpCVTDI, "ftosizd", " $dst, $a", [(set SPR:$dst, (arm_ftosi DPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOSIZS : AVConv1I<0b11101011, 0b1101, 0b1010, - (outs SPR:$dst), (ins SPR:$a), - "ftosizs", " $dst, $a", +def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, + (outs SPR:$dst), (ins SPR:$a), + IIC_fpCVTSI, "ftosizs", " $dst, $a", [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit } def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), - "ftouizd", " $dst, $a", + IIC_fpCVTDI, "ftouizd", " $dst, $a", [(set SPR:$dst, (arm_ftoui DPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010, - (outs SPR:$dst), (ins SPR:$a), - "ftouizs", " $dst, $a", +def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, + (outs SPR:$dst), (ins SPR:$a), + IIC_fpCVTSI, "ftouizs", " $dst, $a", [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit } @@ -309,48 +313,53 @@ def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010, // def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - "fmacd", " $dst, $a, $b", + IIC_fpMAC64, "fmacd", " $dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - "fmacs", " $dst, $a, $b", - [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, - RegConstraint<"$dstin = $dst">; +def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "fmacs", " $dst, $a, $b", + [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, + RegConstraint<"$dstin = $dst">; def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - "fmscd", " $dst, $a, $b", + IIC_fpMAC64, "fmscd", " $dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - "fmscs", " $dst, $a, $b", + IIC_fpMAC32, "fmscs", " $dst, $a, $b", [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - "fnmacd", " $dst, $a, $b", + IIC_fpMAC64, "fnmacd", " $dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } -def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - "fnmacs", " $dst, $a, $b", +def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "fnmacs", " $dst, $a, $b", [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } +def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)), + (FNMACD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; +def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), + (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; + def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - "fnmscd", " $dst, $a, $b", + IIC_fpMAC64, "fnmscd", " $dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - "fnmscs", " $dst, $a, $b", + IIC_fpMAC32, "fnmscs", " $dst, $a, $b", [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; @@ -362,25 +371,25 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - "fcpyd", " $dst, $true", + IIC_fpUNA64, "fcpyd", " $dst, $true", [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - "fcpys", " $dst, $true", + IIC_fpUNA32, "fcpys", " $dst, $true", [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - "fnegd", " $dst, $true", + IIC_fpUNA64, "fnegd", " $dst, $true", [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - "fnegs", " $dst, $true", + IIC_fpUNA32, "fnegs", " $dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; @@ -389,8 +398,8 @@ def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, // Misc. // -let Defs = [CPSR] in -def FMSTAT : AI<(outs), (ins), VFPMiscFrm, "fmstat", "", [(arm_fmstat)]> { +let Defs = [CPSR], Uses = [FPSCR] in +def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", [(arm_fmstat)]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; let Inst{15-12} = 0b1111; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index e551c41936f8e..24990e67a3819 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -19,15 +19,15 @@ #include "ARMSubtarget.h" #include "llvm/Function.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/Config/alloca.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Memory.h" #include using namespace llvm; void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - abort(); + llvm_report_error("ARMJITInfo::replaceMachineCodeForFunction"); } /// JITCompilerFunction - This contains the address of the JIT function used to @@ -45,11 +45,11 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; // CompilationCallback stub - We can't use a C function with inline assembly in // it, because we the prolog/epilog inserted by GCC won't work for us (we need // to preserve more context and manipulate the stack directly). Instead, -// write our own wrapper, which does things our way, so we have complete +// write our own wrapper, which does things our way, so we have complete // control over register saving and restoring. extern "C" { #if defined(__arm__) - void ARMCompilationCallback(void); + void ARMCompilationCallback(); asm( ".text\n" ".align 2\n" @@ -77,11 +77,11 @@ extern "C" { // order for the registers. // +--------+ // 0 | LR | Original return address - // +--------+ + // +--------+ // 1 | LR | Stub address (start of stub) // 2-5 | R3..R0 | Saved registers (we need to preserve all regs) // 6-20 | D0..D7 | Saved VFP registers - // +--------+ + // +--------+ // #ifndef __SOFTFP__ // Restore VFP caller-saved registers. @@ -103,15 +103,14 @@ extern "C" { ); #else // Not an ARM host void ARMCompilationCallback() { - assert(0 && "Cannot call ARMCompilationCallback() on a non-ARM arch!\n"); - abort(); + llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!"); } #endif } -/// ARMCompilationCallbackC - This is the target-specific function invoked -/// by the function stub when we did not know the real target of a call. -/// This function must locate the start of the stub or call site and pass +/// ARMCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass /// it into the JIT compiler function. extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) { // Get the address of the compiled code for this function. @@ -123,14 +122,12 @@ extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) { // ldr pc, [pc,#-4] // if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) { - cerr << "ERROR: Unable to mark stub writable\n"; - abort(); + llvm_unreachable("ERROR: Unable to mark stub writable"); } *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4] *(intptr_t *)(StubAddr+4) = NewVal; if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) { - cerr << "ERROR: Unable to mark stub executable\n"; - abort(); + llvm_unreachable("ERROR: Unable to mark stub executable"); } } @@ -143,7 +140,14 @@ ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, JITCodeEmitter &JCE) { JCE.startGVStub(GV, 4, 4); + intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable((void*)Addr, 4)) { + llvm_unreachable("ERROR: Unable to mark indirect symbol writable"); + } JCE.emitWordLE((intptr_t)Ptr); + if (!sys::Memory::setRangeExecutable((void*)Addr, 4)) { + llvm_unreachable("ERROR: Unable to mark indirect symbol executable"); + } void *PtrAddr = JCE.finishGVStub(GV); addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); return PtrAddr; @@ -161,31 +165,43 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, if (!LazyPtr) { // In PIC mode, the function stub is loading a lazy-ptr. LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE); - if (F) - DOUT << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '" - << F->getName() << "'\n"; - else - DOUT << "JIT: Stub emitted at [" << LazyPtr - << "] for external function at '" << Fn << "'\n"; + DEBUG(if (F) + errs() << "JIT: Indirect symbol emitted at [" << LazyPtr + << "] for GV '" << F->getName() << "'\n"; + else + errs() << "JIT: Stub emitted at [" << LazyPtr + << "] for external function at '" << Fn << "'\n"); } JCE.startGVStub(F, 16, 4); intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable((void*)Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } JCE.emitWordLE(0xe59fc004); // ldr pc, [pc, #+4] JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] JCE.emitWordLE(LazyPtr - (Addr+4+8)); // func - (L_func$scv+8) sys::Memory::InvalidateInstructionCache((void*)Addr, 16); + if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } } else { // The stub is 8-byte size and 4-aligned. JCE.startGVStub(F, 8, 4); intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable((void*)Addr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] JCE.emitWordLE((intptr_t)Fn); // addr of function sys::Memory::InvalidateInstructionCache((void*)Addr, 8); + if (!sys::Memory::setRangeExecutable((void*)Addr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } } } else { // The compilation callback will overwrite the first two words of this - // stub with indirect branch instructions targeting the compiled code. + // stub with indirect branch instructions targeting the compiled code. // This stub sets the return address to restart the stub, so that // the new branch will be invoked when we come back. // @@ -193,6 +209,9 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, // The stub is 16-byte size and 4-byte aligned. JCE.startGVStub(F, 16, 4); intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable((void*)Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } // Save LR so the callback can determine which stub called it. // The compilation callback is responsible for popping this prior // to returning. @@ -204,6 +223,9 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, // The address of the compilation callback. JCE.emitWordLE((intptr_t)ARMCompilationCallback); sys::Memory::InvalidateInstructionCache((void*)Addr, 16); + if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } } return JCE.finishGVStub(F); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 59cf125a9b995..d2ec9ee6cdf97 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -15,9 +15,11 @@ #define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMAddressingModes.h" +#include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -29,6 +31,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -61,6 +64,7 @@ namespace { const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; RegScavenger *RS; + bool isThumb2; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -93,6 +97,15 @@ namespace { void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); + bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const TargetInstrInfo *TII, + bool &Advance, + MachineBasicBlock::iterator &I); + bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool &Advance, + MachineBasicBlock::iterator &I); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -107,6 +120,14 @@ static int getLoadStoreMultipleOpcode(int Opcode) { case ARM::STR: NumSTMGened++; return ARM::STM; + case ARM::t2LDRi8: + case ARM::t2LDRi12: + NumLDMGened++; + return ARM::t2LDM; + case ARM::t2STRi8: + case ARM::t2STRi12: + NumSTMGened++; + return ARM::t2STM; case ARM::FLDS: NumFLDMGened++; return ARM::FLDMS; @@ -119,14 +140,30 @@ static int getLoadStoreMultipleOpcode(int Opcode) { case ARM::FSTD: NumFSTMGened++; return ARM::FSTMD; - default: abort(); + default: llvm_unreachable("Unhandled opcode!"); } return 0; } +static bool isT2i32Load(unsigned Opc) { + return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; +} + +static bool isi32Load(unsigned Opc) { + return Opc == ARM::LDR || isT2i32Load(Opc); +} + +static bool isT2i32Store(unsigned Opc) { + return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8; +} + +static bool isi32Store(unsigned Opc) { + return Opc == ARM::STR || isT2i32Store(Opc); +} + /// MergeOps - Create and insert a LDM or STM with Base as base register and /// registers in Regs as the register operands that would be loaded / stored. -/// It returns true if the transformation is done. +/// It returns true if the transformation is done. bool ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -140,14 +177,20 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; - if (isAM4 && Offset == 4) + bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); + if (isAM4 && Offset == 4) { + if (isThumb2) + // Thumb2 does not support ldmib / stmib. + return false; Mode = ARM_AM::ib; - else if (isAM4 && Offset == -4 * (int)NumRegs + 4) + } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) { + if (isThumb2) + // Thumb2 does not support ldmda / stmda. + return false; Mode = ARM_AM::da; - else if (isAM4 && Offset == -4 * (int)NumRegs) + } else if (isAM4 && Offset == -4 * (int)NumRegs) { Mode = ARM_AM::db; - else if (Offset != 0) { + } else if (Offset != 0) { // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. @@ -155,7 +198,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; unsigned NewBase; - if (Opcode == ARM::LDR) + if (isi32Load(Opcode)) // If it is a load, then just use one of the destination register to // use as the new base. NewBase = Regs[NumRegs-1].first; @@ -165,24 +208,30 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NewBase == 0) return false; } - int BaseOpc = ARM::ADDri; + int BaseOpc = !isThumb2 + ? ARM::ADDri + : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); if (Offset < 0) { - BaseOpc = ARM::SUBri; + BaseOpc = !isThumb2 + ? ARM::SUBri + : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); Offset = - Offset; } - int ImmedOffset = ARM_AM::getSOImmVal(Offset); + int ImmedOffset = isThumb2 + ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); if (ImmedOffset == -1) + // FIXME: Try t2ADDri12 or t2SUBri12? return false; // Probably not worth it then. BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg).addReg(0); Base = NewBase; BaseKill = true; // New base is always killed right its use. } bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; - bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; Opcode = getLoadStoreMultipleOpcode(Opcode); MachineInstrBuilder MIB = (isAM4) ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) @@ -192,6 +241,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, .addReg(Base, getKillRegState(BaseKill)) .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) .addImm(Pred).addReg(PredReg); + MIB.addReg(0); // Add optional writeback (0 for now). for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -207,7 +257,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVector &Merges) { - bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; + bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode); int Offset = MemOps[SIndex].Offset; int SOffset = Offset; unsigned Pos = MemOps[SIndex].Position; @@ -265,41 +315,53 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, return; } -/// getInstrPredicate - If instruction is predicated, returns its predicate -/// condition, otherwise returns AL. It also returns the condition code -/// register by reference. -static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) { - int PIdx = MI->findFirstPredOperandIdx(); - if (PIdx == -1) { - PredReg = 0; - return ARMCC::AL; - } - - PredReg = MI->getOperand(PIdx+1).getReg(); - return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm(); -} - static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, ARMCC::CondCodes Pred, - unsigned PredReg) { + unsigned Bytes, unsigned Limit, + ARMCC::CondCodes Pred, unsigned PredReg){ unsigned MyPredReg = 0; - return (MI && MI->getOpcode() == ARM::SUBri && - MI->getOperand(0).getReg() == Base && + if (!MI) + return false; + if (MI->getOpcode() != ARM::t2SUBri && + MI->getOpcode() != ARM::t2SUBrSPi && + MI->getOpcode() != ARM::t2SUBrSPi12 && + MI->getOpcode() != ARM::tSUBspi && + MI->getOpcode() != ARM::SUBri) + return false; + + // Make sure the offset fits in 8 bits. + if (Bytes <= 0 || (Limit && Bytes >= Limit)) + return false; + + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME + return (MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && - getInstrPredicate(MI, MyPredReg) == Pred && + (MI->getOperand(2).getImm()*Scale) == Bytes && + llvm::getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg); } static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, - unsigned Bytes, ARMCC::CondCodes Pred, - unsigned PredReg) { + unsigned Bytes, unsigned Limit, + ARMCC::CondCodes Pred, unsigned PredReg){ unsigned MyPredReg = 0; - return (MI && MI->getOpcode() == ARM::ADDri && - MI->getOperand(0).getReg() == Base && + if (!MI) + return false; + if (MI->getOpcode() != ARM::t2ADDri && + MI->getOpcode() != ARM::t2ADDrSPi && + MI->getOpcode() != ARM::t2ADDrSPi12 && + MI->getOpcode() != ARM::tADDspi && + MI->getOpcode() != ARM::ADDri) + return false; + + if (Bytes <= 0 || (Limit && Bytes >= Limit)) + // Make sure the offset fits in 8 bits. + return false; + + unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME + return (MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes && - getInstrPredicate(MI, MyPredReg) == Pred && + (MI->getOperand(2).getImm()*Scale) == Bytes && + llvm::getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg); } @@ -308,6 +370,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { default: return 0; case ARM::LDR: case ARM::STR: + case ARM::t2LDRi8: + case ARM::t2LDRi12: + case ARM::t2STRi8: + case ARM::t2STRi12: case ARM::FLDS: case ARM::FSTS: return 4; @@ -316,7 +382,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { return 8; case ARM::LDM: case ARM::STM: - return (MI->getNumOperands() - 4) * 4; + case ARM::t2LDM: + case ARM::t2STM: + return (MI->getNumOperands() - 5) * 4; case ARM::FLDMS: case ARM::FSTMS: case ARM::FLDMD: @@ -325,7 +393,7 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { } } -/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base +/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base /// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: /// /// stmia rn, @@ -337,17 +405,18 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { /// ldmia rn, /// => /// ldmdb rn!, -static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - bool &Advance, - MachineBasicBlock::iterator &I) { +bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool &Advance, + MachineBasicBlock::iterator &I) { MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(0).getReg(); unsigned Bytes = getLSMultipleTransferSize(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); - bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM; + bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM || + Opcode == ARM::STM || Opcode == ARM::t2STM; if (isAM4) { if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())) @@ -364,13 +433,17 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MBBI != MBB.begin()) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true)); + MI->getOperand(4).setReg(Base); + MI->getOperand(4).setIsDef(); MBB.erase(PrevMBBI); return true; } else if (Mode == ARM_AM::ib && - isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true)); + MI->getOperand(4).setReg(Base); // WB to base + MI->getOperand(4).setIsDef(); MBB.erase(PrevMBBI); return true; } @@ -379,8 +452,10 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MBBI != MBB.end()) { MachineBasicBlock::iterator NextMBBI = next(MBBI); if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && - isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); + MI->getOperand(4).setReg(Base); // WB to base + MI->getOperand(4).setIsDef(); if (NextMBBI == I) { Advance = true; ++I; @@ -388,8 +463,10 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MBB.erase(NextMBBI); return true; } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) && - isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true)); + MI->getOperand(4).setReg(Base); // WB to base + MI->getOperand(4).setIsDef(); if (NextMBBI == I) { Advance = true; ++I; @@ -408,8 +485,10 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MBBI != MBB.begin()) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); if (Mode == ARM_AM::ia && - isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset)); + MI->getOperand(4).setReg(Base); // WB to base + MI->getOperand(4).setIsDef(); MBB.erase(PrevMBBI); return true; } @@ -418,8 +497,10 @@ static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MBBI != MBB.end()) { MachineBasicBlock::iterator NextMBBI = next(MBBI); if (Mode == ARM_AM::ia && - isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset)); + MI->getOperand(4).setReg(Base); // WB to base + MI->getOperand(4).setIsDef(); if (NextMBBI == I) { Advance = true; ++I; @@ -441,7 +522,13 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { case ARM::FLDD: return ARM::FLDMD; case ARM::FSTS: return ARM::FSTMS; case ARM::FSTD: return ARM::FSTMD; - default: abort(); + case ARM::t2LDRi8: + case ARM::t2LDRi12: + return ARM::t2LDR_PRE; + case ARM::t2STRi8: + case ARM::t2STRi12: + return ARM::t2STR_PRE; + default: llvm_unreachable("Unhandled opcode!"); } return 0; } @@ -454,48 +541,62 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { case ARM::FLDD: return ARM::FLDMD; case ARM::FSTS: return ARM::FSTMS; case ARM::FSTD: return ARM::FSTMD; - default: abort(); + case ARM::t2LDRi8: + case ARM::t2LDRi12: + return ARM::t2LDR_POST; + case ARM::t2STRi8: + case ARM::t2STRi12: + return ARM::t2STR_POST; + default: llvm_unreachable("Unhandled opcode!"); } return 0; } -/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base +/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible: -static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const TargetInstrInfo *TII, - bool &Advance, - MachineBasicBlock::iterator &I) { +bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const TargetInstrInfo *TII, + bool &Advance, + MachineBasicBlock::iterator &I) { MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(1).getReg(); bool BaseKill = MI->getOperand(1).isKill(); unsigned Bytes = getLSMultipleTransferSize(MI); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); + bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS || + Opcode == ARM::FSTD || Opcode == ARM::FSTS; bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; - if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) || - (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)) + if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) return false; + else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0) + return false; + else if (isT2i32Load(Opcode) || isT2i32Store(Opcode)) + if (MI->getOperand(2).getImm() != 0) + return false; - bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; // Can't do the merge if the destination register is the same as the would-be // writeback register. if (isLd && MI->getOperand(0).getReg() == Base) return false; unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); bool DoMerge = false; ARM_AM::AddrOpc AddSub = ARM_AM::add; unsigned NewOpc = 0; + // AM2 - 12 bits, thumb2 - 8 bits. + unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); if (MBBI != MBB.begin()) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); - if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) { + if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; AddSub = ARM_AM::sub; NewOpc = getPreIndexedLoadStoreOpcode(Opcode); - } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes, - Pred, PredReg)) { + } else if (!isAM5 && + isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) { DoMerge = true; NewOpc = getPreIndexedLoadStoreOpcode(Opcode); } @@ -505,11 +606,12 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge && MBBI != MBB.end()) { MachineBasicBlock::iterator NextMBBI = next(MBBI); - if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + if (!isAM5 && + isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; AddSub = ARM_AM::sub; NewOpc = getPostIndexedLoadStoreOpcode(Opcode); - } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) { + } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) { DoMerge = true; NewOpc = getPostIndexedLoadStoreOpcode(Opcode); } @@ -526,33 +628,51 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; - unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift) - : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia, - true, isDPR ? 2 : 1); + unsigned Offset = 0; + if (isAM5) + Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) + ? ARM_AM::db + : ARM_AM::ia, true, (isDPR ? 2 : 1)); + else if (isAM2) + Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); + else + Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; if (isLd) { - if (isAM2) - // LDR_PRE, LDR_POST; - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) - .addReg(Base, RegState::Define) - .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); - else + if (isAM5) // FLDMS, FLDMD BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getKillRegState(BaseKill)) .addImm(Offset).addImm(Pred).addReg(PredReg) + .addReg(Base, getDefRegState(true)) // WB base register .addReg(MI->getOperand(0).getReg(), RegState::Define); - } else { - MachineOperand &MO = MI->getOperand(0); - if (isAM2) - // STR_PRE, STR_POST; - BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) - .addReg(MO.getReg(), getKillRegState(MO.isKill())) + else if (isAM2) + // LDR_PRE, LDR_POST, + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, RegState::Define) .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); else + // t2LDR_PRE, t2LDR_POST + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg()) + .addReg(Base, RegState::Define) + .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); + } else { + MachineOperand &MO = MI->getOperand(0); + if (isAM5) // FSTMS, FSTMD BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) .addImm(Pred).addReg(PredReg) + .addReg(Base, getDefRegState(true)) // WB base register .addReg(MO.getReg(), getKillRegState(MO.isKill())); + else if (isAM2) + // STR_PRE, STR_POST + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) + .addReg(MO.getReg(), getKillRegState(MO.isKill())) + .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); + else + // t2STR_PRE, t2STR_POST + BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) + .addReg(MO.getReg(), getKillRegState(MO.isKill())) + .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg); } MBB.erase(MBBI); @@ -561,7 +681,7 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, /// isMemoryOp - Returns true if instruction is a memory operations (that this /// pass is capable of operating on). -static bool isMemoryOp(MachineInstr *MI) { +static bool isMemoryOp(const MachineInstr *MI) { int Opcode = MI->getOpcode(); switch (Opcode) { default: break; @@ -574,6 +694,11 @@ static bool isMemoryOp(MachineInstr *MI) { case ARM::FLDD: case ARM::FSTD: return MI->getOperand(1).isReg(); + case ARM::t2LDRi8: + case ARM::t2LDRi12: + case ARM::t2STRi8: + case ARM::t2STRi12: + return MI->getOperand(1).isReg(); } return false; } @@ -600,6 +725,12 @@ static int getMemoryOpOffset(const MachineInstr *MI) { bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; unsigned NumOperands = MI->getDesc().getNumOperands(); unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + + if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || + Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || + Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) + return OffField; + int Offset = isAM2 ? ARM_AM::getAM2Offset(OffField) : (isAM3 ? ARM_AM::getAM3Offset(OffField) @@ -621,37 +752,43 @@ static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int OffImm, bool isDef, DebugLoc dl, unsigned NewOpc, - unsigned Reg, bool RegDeadKill, - unsigned BaseReg, bool BaseKill, - unsigned OffReg, bool OffKill, + unsigned Reg, bool RegDeadKill, bool RegUndef, + unsigned BaseReg, bool BaseKill, bool BaseUndef, + unsigned OffReg, bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, unsigned PredReg, - const TargetInstrInfo *TII) { - unsigned Offset; - if (OffImm < 0) - Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); - else - Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); - if (isDef) - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + const TargetInstrInfo *TII, bool isT2) { + int Offset = OffImm; + if (!isT2) { + if (OffImm < 0) + Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); + else + Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); + } + if (isDef) { + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), + TII->get(NewOpc)) .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill)) - .addReg(BaseReg, getKillRegState(BaseKill)) - .addReg(OffReg, getKillRegState(OffKill)) - .addImm(Offset) - .addImm(Pred).addReg(PredReg); - else - BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) - .addReg(Reg, getKillRegState(RegDeadKill)) - .addReg(BaseReg, getKillRegState(BaseKill)) - .addReg(OffReg, getKillRegState(OffKill)) - .addImm(Offset) - .addImm(Pred).addReg(PredReg); + .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); + if (!isT2) + MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); + MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + } else { + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), + TII->get(NewOpc)) + .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef)) + .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef)); + if (!isT2) + MIB.addReg(OffReg, getKillRegState(OffKill)|getUndefRegState(OffUndef)); + MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + } } bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = &*MBBI; unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { + if (Opcode == ARM::LDRD || Opcode == ARM::STRD || + Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { unsigned EvenReg = MI->getOperand(0).getReg(); unsigned OddReg = MI->getOperand(1).getReg(); unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); @@ -659,45 +796,59 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) return false; - bool isLd = Opcode == ARM::LDRD; + bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; + bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; bool EvenDeadKill = isLd ? MI->getOperand(0).isDead() : MI->getOperand(0).isKill(); + bool EvenUndef = MI->getOperand(0).isUndef(); bool OddDeadKill = isLd ? MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); + bool OddUndef = MI->getOperand(1).isUndef(); const MachineOperand &BaseOp = MI->getOperand(2); unsigned BaseReg = BaseOp.getReg(); bool BaseKill = BaseOp.isKill(); - const MachineOperand &OffOp = MI->getOperand(3); - unsigned OffReg = OffOp.getReg(); - bool OffKill = OffOp.isKill(); + bool BaseUndef = BaseOp.isUndef(); + unsigned OffReg = isT2 ? 0 : MI->getOperand(3).getReg(); + bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); + bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); int OffImm = getMemoryOpOffset(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a // ldm or stm. - unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM; + unsigned NewOpc = (isLd) + ? (isT2 ? ARM::t2LDM : ARM::LDM) + : (isT2 ? ARM::t2STM : ARM::STM); if (isLd) { BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(BaseReg, getKillRegState(BaseKill)) .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(Pred).addReg(PredReg) + .addReg(0) .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill)) - .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); + .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill)); ++NumLDRD2LDM; } else { BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) .addReg(BaseReg, getKillRegState(BaseKill)) .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) .addImm(Pred).addReg(PredReg) - .addReg(EvenReg, getKillRegState(EvenDeadKill)) - .addReg(OddReg, getKillRegState(OddDeadKill)); + .addReg(0) + .addReg(EvenReg, + getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef)) + .addReg(OddReg, + getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } } else { // Split into two instructions. - unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR; + assert((!isT2 || !OffReg) && + "Thumb2 ldrd / strd does not encode offset register!"); + unsigned NewOpc = (isLd) + ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR) + : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. @@ -707,17 +858,23 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { assert(!TRI->regsOverlap(OddReg, BaseReg) && (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, - BaseReg, false, OffReg, false, Pred, PredReg, TII); - InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, - BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + OddReg, OddDeadKill, false, + BaseReg, false, BaseUndef, OffReg, false, OffUndef, + Pred, PredReg, TII, isT2); + InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, + EvenReg, EvenDeadKill, false, + BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, + Pred, PredReg, TII, isT2); } else { InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, - EvenReg, EvenDeadKill, BaseReg, false, OffReg, false, - Pred, PredReg, TII); + EvenReg, EvenDeadKill, EvenUndef, + BaseReg, false, BaseUndef, OffReg, false, OffUndef, + Pred, PredReg, TII, isT2); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, - OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill, - Pred, PredReg, TII); + OddReg, OddDeadKill, OddUndef, + BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, + Pred, PredReg, TII, isT2); } if (isLd) ++NumLDRD2LDR; @@ -761,7 +918,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { unsigned Size = getLSMultipleTransferSize(MBBI); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); + ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] @@ -772,7 +929,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // looks like the later ldr(s) use the same base register. Try to // merge the ldr's so far, including this one. But don't try to // combine the following ldr(s). - Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg()); + Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg()); if (CurrBase == 0 && !Clobber) { // Start of a new chain. CurrBase = Base; @@ -825,12 +982,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // Try to find a free register to use as a new base in case it's needed. // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); - // Find a scratch register. Make sure it's a call clobbered register or - // a spilled callee-saved register. - unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true); - if (!Scratch) - Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, - AFI->getSpilledCSRegisters()); + // Find a scratch register. + unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass); // Process the load / store instructions. RS->forward(prior(MBBI)); @@ -842,7 +995,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // Try folding preceeding/trailing base inc/dec into the generated // LDM/STM ops. for (unsigned i = 0, e = Merges.size(); i < e; ++i) - if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) + if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) ++NumMerges; NumMerges += Merges.size(); @@ -850,15 +1003,15 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // that were not merged to form LDM/STM ops. for (unsigned i = 0; i != NumMemOps; ++i) if (!MemOps[i].Merged) - if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) + if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI)) ++NumMerges; - // RS may be pointing to an instruction that's deleted. + // RS may be pointing to an instruction that's deleted. RS->skipTo(prior(MBBI)); } else if (NumMemOps == 1) { // Try folding preceeding/trailing base inc/dec into the single // load/store. - if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { + if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { ++NumMerges; RS->forward(prior(MBBI)); } @@ -907,16 +1060,18 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = prior(MBB.end()); - if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) { + if (MBBI != MBB.begin() && + (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { MachineInstr *PrevMI = prior(MBBI); - if (PrevMI->getOpcode() == ARM::LDM) { + if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) { MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1); - if (MO.getReg() == ARM::LR) { - PrevMI->setDesc(TII->get(ARM::LDM_RET)); - MO.setReg(ARM::PC); - MBB.erase(MBBI); - return true; - } + if (MO.getReg() != ARM::LR) + return false; + unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET; + PrevMI->setDesc(TII->get(NewOpc)); + MO.setReg(ARM::PC); + MBB.erase(MBBI); + return true; } } return false; @@ -928,6 +1083,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); RS = new RegScavenger(); + isThumb2 = AFI->isThumb2Function(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; @@ -956,6 +1112,7 @@ namespace { const TargetRegisterInfo *TRI; const ARMSubtarget *STI; MachineRegisterInfo *MRI; + MachineFunction *MF; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -967,8 +1124,9 @@ namespace { bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, unsigned &OddReg, unsigned &BaseReg, - unsigned &OffReg, unsigned &Offset, - unsigned &PredReg, ARMCC::CondCodes &Pred); + unsigned &OffReg, int &Offset, + unsigned &PredReg, ARMCC::CondCodes &Pred, + bool &isT2); bool RescheduleOps(MachineBasicBlock *MBB, SmallVector &Ops, unsigned Base, bool isLd, @@ -984,6 +1142,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { TRI = Fn.getTarget().getRegisterInfo(); STI = &Fn.getTarget().getSubtarget(); MRI = &Fn.getRegInfo(); + MF = &Fn; bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; @@ -1045,48 +1204,83 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc, unsigned &EvenReg, unsigned &OddReg, unsigned &BaseReg, - unsigned &OffReg, unsigned &Offset, + unsigned &OffReg, int &Offset, unsigned &PredReg, - ARMCC::CondCodes &Pred) { + ARMCC::CondCodes &Pred, + bool &isT2) { + // Make sure we're allowed to generate LDRD/STRD. + if (!STI->hasV5TEOps()) + return false; + // FIXME: FLDS / FSTS -> FLDD / FSTD + unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); if (Opcode == ARM::LDR) NewOpc = ARM::LDRD; else if (Opcode == ARM::STR) NewOpc = ARM::STRD; - else - return 0; + else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { + NewOpc = ARM::t2LDRDi8; + Scale = 4; + isT2 = true; + } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) { + NewOpc = ARM::t2STRDi8; + Scale = 4; + isT2 = true; + } else + return false; + + // Make sure the offset registers match. + if (!isT2 && + (Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg())) + return false; // Must sure the base address satisfies i64 ld / st alignment requirement. if (!Op0->hasOneMemOperand() || - !Op0->memoperands_begin()->getValue() || - Op0->memoperands_begin()->isVolatile()) + !(*Op0->memoperands_begin())->getValue() || + (*Op0->memoperands_begin())->isVolatile()) return false; - unsigned Align = Op0->memoperands_begin()->getAlignment(); + unsigned Align = (*Op0->memoperands_begin())->getAlignment(); + Function *Func = MF->getFunction(); unsigned ReqAlign = STI->hasV6Ops() - ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align + ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) + : 8; // Pre-v6 need 8-byte align if (Align < ReqAlign) return false; // Then make sure the immediate offset fits. int OffImm = getMemoryOpOffset(Op0); - ARM_AM::AddrOpc AddSub = ARM_AM::add; - if (OffImm < 0) { - AddSub = ARM_AM::sub; - OffImm = - OffImm; + if (isT2) { + if (OffImm < 0) { + if (OffImm < -255) + // Can't fall back to t2LDRi8 / t2STRi8. + return false; + } else { + int Limit = (1 << 8) * Scale; + if (OffImm >= Limit || (OffImm & (Scale-1))) + return false; + } + Offset = OffImm; + } else { + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (OffImm < 0) { + AddSub = ARM_AM::sub; + OffImm = - OffImm; + } + int Limit = (1 << 8) * Scale; + if (OffImm >= Limit || (OffImm & (Scale-1))) + return false; + Offset = ARM_AM::getAM3Opc(AddSub, OffImm); } - if (OffImm >= 256) // 8 bits - return false; - Offset = ARM_AM::getAM3Opc(AddSub, OffImm); - EvenReg = Op0->getOperand(0).getReg(); OddReg = Op1->getOperand(0).getReg(); if (EvenReg == OddReg) return false; BaseReg = Op0->getOperand(1).getReg(); - OffReg = Op0->getOperand(2).getReg(); - Pred = getInstrPredicate(Op0, PredReg); + if (!isT2) + OffReg = Op0->getOperand(2).getReg(); + Pred = llvm::getInstrPredicate(Op0, PredReg); dl = Op0->getDebugLoc(); return true; } @@ -1138,7 +1332,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, LastOffset = Offset; LastBytes = Bytes; LastOpcode = Opcode; - if (++NumMove == 8) // FIXME: Tune + if (++NumMove == 8) // FIXME: Tune this limit. break; } @@ -1174,29 +1368,36 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, unsigned EvenReg = 0, OddReg = 0; unsigned BaseReg = 0, OffReg = 0, PredReg = 0; ARMCC::CondCodes Pred = ARMCC::AL; + bool isT2 = false; unsigned NewOpc = 0; - unsigned Offset = 0; + int Offset = 0; DebugLoc dl; if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, EvenReg, OddReg, BaseReg, OffReg, - Offset, PredReg, Pred)) { + Offset, PredReg, Pred, isT2)) { Ops.pop_back(); Ops.pop_back(); // Form the pair instruction. if (isLd) { - BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) + MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, + dl, TII->get(NewOpc)) .addReg(EvenReg, RegState::Define) .addReg(OddReg, RegState::Define) - .addReg(BaseReg).addReg(0).addImm(Offset) - .addImm(Pred).addReg(PredReg); + .addReg(BaseReg); + if (!isT2) + MIB.addReg(OffReg); + MIB.addImm(Offset).addImm(Pred).addReg(PredReg); ++NumLDRDFormed; } else { - BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc)) + MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, + dl, TII->get(NewOpc)) .addReg(EvenReg) .addReg(OddReg) - .addReg(BaseReg).addReg(0).addImm(Offset) - .addImm(Pred).addReg(PredReg); + .addReg(BaseReg); + if (!isT2) + MIB.addReg(OffReg); + MIB.addImm(Offset).addImm(Pred).addReg(PredReg); ++NumSTRDFormed; } MBB->erase(Op0); @@ -1249,12 +1450,11 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; - if (getInstrPredicate(MI, PredReg) != ARMCC::AL) + if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) continue; - int Opcode = MI->getOpcode(); - bool isLd = Opcode == ARM::LDR || - Opcode == ARM::FLDS || Opcode == ARM::FLDD; + int Opc = MI->getOpcode(); + bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD; unsigned Base = MI->getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp new file mode 100644 index 0000000000000..0ff65d2af88b8 --- /dev/null +++ b/lib/Target/ARM/ARMMCAsmInfo.cpp @@ -0,0 +1,72 @@ +//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the ARMMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "ARMMCAsmInfo.h" +using namespace llvm; + +static const char *const arm_asm_table[] = { + "{r0}", "r0", + "{r1}", "r1", + "{r2}", "r2", + "{r3}", "r3", + "{r4}", "r4", + "{r5}", "r5", + "{r6}", "r6", + "{r7}", "r7", + "{r8}", "r8", + "{r9}", "r9", + "{r10}", "r10", + "{r11}", "r11", + "{r12}", "r12", + "{r13}", "r13", + "{r14}", "r14", + "{lr}", "lr", + "{sp}", "sp", + "{ip}", "ip", + "{fp}", "fp", + "{sl}", "sl", + "{memory}", "memory", + "{cc}", "cc", + 0,0 +}; + +ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() { + AsmTransCBE = arm_asm_table; + Data64bitsDirective = 0; + CommentString = "@"; + COMMDirectiveTakesAlignment = false; + SupportsDebugInformation = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::SjLj; + AbsoluteEHSectionOffsets = false; +} + +ARMELFMCAsmInfo::ARMELFMCAsmInfo() { + AlignmentIsInBytes = false; + Data64bitsDirective = 0; + CommentString = "@"; + COMMDirectiveTakesAlignment = false; + + NeedsSet = false; + HasLEB128 = true; + AbsoluteDebugSectionOffsets = true; + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + LCOMMDirective = "\t.lcomm\t"; + + DwarfRequiresFrameSection = false; + + SupportsDebugInformation = true; +} diff --git a/lib/Target/ARM/ARMMCAsmInfo.h b/lib/Target/ARM/ARMMCAsmInfo.h new file mode 100644 index 0000000000000..90f7822ea580e --- /dev/null +++ b/lib/Target/ARM/ARMMCAsmInfo.h @@ -0,0 +1,31 @@ +//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ARMMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ARMTARGETASMINFO_H +#define LLVM_ARMTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfoDarwin.h" + +namespace llvm { + + struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { + explicit ARMMCAsmInfoDarwin(); + }; + + struct ARMELFMCAsmInfo : public MCAsmInfo { + explicit ARMELFMCAsmInfo(); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 66d3df60e0786..2176b2735a2b9 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -1,10 +1,10 @@ //====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file declares ARM-specific per-machine-function information. @@ -52,10 +52,6 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// enable far jump. bool LRSpilledForFarJump; - /// R3IsLiveIn - True if R3 is live in to this function. - /// FIXME: Remove when register scavenger for Thumb is done. - bool R3IsLiveIn; - /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer /// spill stack offset. unsigned FramePtrSpillOffset; @@ -100,7 +96,7 @@ public: hasThumb2(false), Align(2U), VarArgsRegSaveSize(0), HasStackFrame(false), - LRSpilledForFarJump(false), R3IsLiveIn(false), + LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), @@ -111,7 +107,7 @@ public: hasThumb2(MF.getTarget().getSubtarget().hasThumb2()), Align(isThumb ? 1U : 2U), VarArgsRegSaveSize(0), HasStackFrame(false), - LRSpilledForFarJump(false), R3IsLiveIn(false), + LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), @@ -119,6 +115,7 @@ public: JumpTableUId(0), ConstPoolEntryUId(0) {} bool isThumbFunction() const { return isThumb; } + bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } bool isThumb2Function() const { return isThumb && hasThumb2; } unsigned getAlign() const { return Align; } @@ -133,13 +130,9 @@ public: bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; } void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; } - // FIXME: Remove when register scavenger for Thumb is done. - bool isR3LiveIn() const { return R3IsLiveIn; } - void setR3IsLiveIn(bool l) { R3IsLiveIn = l; } - unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; } void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; } - + unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; } unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; } unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; } diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h new file mode 100644 index 0000000000000..5ff7c381bc518 --- /dev/null +++ b/lib/Target/ARM/ARMPerfectShuffle.h @@ -0,0 +1,6586 @@ +//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file, which was autogenerated by llvm-PerfectShuffle, contains data +// for the optimal way to build a perfect shuffle using neon instructions. +// +//===----------------------------------------------------------------------===// + +// 31 entries have cost 0 +// 242 entries have cost 1 +// 1447 entries have cost 2 +// 3602 entries have cost 3 +// 1237 entries have cost 4 +// 2 entries have cost 5 + +// This table is 6561*4 = 26244 bytes in size. +static const unsigned PerfectShuffleTable[6561+1] = { + 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS + 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS + 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0> + 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> + 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS + 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3> + 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3> + 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> + 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS + 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0> + 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS + 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS + 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0> + 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5> + 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7> + 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1> + 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1> + 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS + 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0> + 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1> + 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS + 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0> + 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6> + 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6> + 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7> + 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> + 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS + 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> + 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0> + 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0> + 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3> + 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6> + 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6> + 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7> + 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0> + 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> + 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1> + 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS + 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS + 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4> + 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6> + 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS + 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS + 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5> + 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS + 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7> + 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3> + 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7> + 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5> + 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6> + 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5> + 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7> + 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7> + 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7> + 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7> + 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS + 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3> + 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7> + 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS + 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0> + 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6> + 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0> + 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0> + 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> + 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0> + 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7> + 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0> + 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6> + 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0> + 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7> + 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7> + 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> + 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS + 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS + 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS + 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u> + 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS + 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS + 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS + 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u> + 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS + 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1> + 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS + 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1> + 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0> + 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5> + 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7> + 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1> + 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0> + 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS + 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1> + 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1> + 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0> + 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3> + 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS + 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7> + 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3> + 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1> + 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS + 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS + 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1> + 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2> + 835584U, // <0,1,2,3>: Cost 0 copy LHS + 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS + 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7> + 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7> + 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2> + 835584U, // <0,1,2,u>: Cost 0 copy LHS + 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0> + 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3> + 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0> + 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0> + 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS + 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7> + 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0> + 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1> + 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3> + 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS + 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1> + 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1> + 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4> + 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS + 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS + 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS + 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4> + 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS + 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1> + 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1> + 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0> + 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7> + 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6> + 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1> + 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1> + 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1> + 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7> + 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS + 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7> + 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1> + 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7> + 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS + 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7> + 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1> + 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1> + 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1> + 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0> + 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1> + 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0> + 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1> + 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6> + 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0> + 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0> + 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7> + 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2> + 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS + 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS + 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS + 835584U, // <0,1,u,3>: Cost 0 copy LHS + 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS + 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS + 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS + 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u> + 835584U, // <0,1,u,u>: Cost 0 copy LHS + 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0> + 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS + 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS + 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0> + 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6> + 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7> + 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7> + 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0> + 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS + 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2> + 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1> + 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2> + 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> + 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS + 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7> + 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7> + 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7> + 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2> + 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2> + 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2> + 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2> + 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3> + 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS + 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3> + 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7> + 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2> + 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS + 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2> + 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> + 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3> + 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> + 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6> + 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> + 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3> + 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0> + 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> + 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS + 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3> + 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4> + 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4> + 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS + 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS + 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS + 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> + 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS + 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7> + 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3> + 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7> + 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6> + 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6> + 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5> + 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0> + 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS + 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS + 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1> + 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2> + 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3> + 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7> + 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5> + 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6> + 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6> + 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2> + 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7> + 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> + 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2> + 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2> + 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0> + 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6> + 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2> + 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2> + 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7> + 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> + 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u> + 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS + 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS + 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3> + 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS + 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS + 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS + 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS + 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS + 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0> + 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2> + 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0> + 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3> + 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS + 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6> + 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7> + 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0> + 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS + 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2> + 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1> + 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3> + 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3> + 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6> + 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6> + 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1> + 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3> + 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2> + 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS + 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2> + 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2> + 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3> + 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS + 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6> + 2598154746U, // <0,3,2,6>: Cost 3 vext1 , <6,2,7,3> + 2598155258U, // <0,3,2,7>: Cost 3 vext1 , <7,0,1,2> + 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS + 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2> + 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3> + 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3> + 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3> + 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6> + 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6> + 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7> + 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7> + 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3> + 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2> + 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4> + 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4> + 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3> + 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6> + 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6> + 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS + 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4> + 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6> + 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS + 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2> + 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2> + 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7> + 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5> + 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7> + 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7> + 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0> + 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5> + 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7> + 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3> + 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7> + 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7> + 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7> + 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6> + 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6> + 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0> + 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0> + 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1> + 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3> + 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7> + 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7> + 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5> + 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7> + 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7> + 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0> + 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3> + 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS + 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u> + 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2> + 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3> + 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS + 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6> + 2598203898U, // <0,3,u,6>: Cost 3 vext1 , <6,2,7,3> + 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0> + 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS + 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4> + 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS + 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4> + 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0> + 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6> + 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1> + 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS + 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0> + 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS + 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS + 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1> + 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0> + 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1> + 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS + 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS + 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS + 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> + 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS + 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS + 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2> + 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2> + 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4> + 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS + 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS + 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS + 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> + 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS + 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2> + 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2> + 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4> + 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3> + 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6> + 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6> + 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS + 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4> + 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2> + 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4> + 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0> + 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3> + 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4> + 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4> + 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS + 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS + 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4> + 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS + 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS + 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0> + 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5> + 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5> + 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS + 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6> + 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS + 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5> + 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS + 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6> + 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2> + 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6> + 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0> + 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6> + 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6> + 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0> + 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4> + 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2> + 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS + 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1> + 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4> + 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4> + 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS + 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> + 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0> + 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2> + 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> + 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS + 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS + 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS + 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u> + 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS + 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS + 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS + 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u> + 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS + 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0> + 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS + 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2> + 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5> + 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS + 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1> + 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1> + 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS + 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS + 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS + 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3> + 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2> + 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2> + 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1> + 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5> + 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0> + 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3> + 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS + 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS + 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2> + 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7> + 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2> + 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS + 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5> + 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6> + 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS + 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS + 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2> + 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3> + 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1> + 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3> + 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5> + 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0> + 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7> + 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0> + 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0> + 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1> + 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4> + 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4> + 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5> + 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6> + 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS + 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5> + 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6> + 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS + 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0> + 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0> + 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5> + 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0> + 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5> + 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5> + 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0> + 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7> + 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7> + 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS + 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0> + 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3> + 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4> + 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS + 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0> + 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7> + 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0> + 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0> + 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS + 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0> + 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7> + 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2> + 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS + 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7> + 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0> + 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0> + 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS + 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS + 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS + 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0> + 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u> + 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u> + 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS + 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u> + 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0> + 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0> + 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS + 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS + 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2> + 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4> + 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6> + 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0> + 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0> + 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS + 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS + 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS + 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1> + 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3> + 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3> + 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS + 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1> + 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6> + 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS + 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS + 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS + 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2> + 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2> + 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1> + 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2> + 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3> + 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6> + 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS + 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS + 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2> + 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7> + 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0> + 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3> + 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6> + 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7> + 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0> + 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0> + 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0> + 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS + 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2> + 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4> + 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2> + 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS + 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS + 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0> + 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS + 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS + 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> + 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0> + 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7> + 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0> + 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5> + 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0> + 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7> + 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS + 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> + 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0> + 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3> + 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6> + 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0> + 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4> + 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6> + 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6> + 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7> + 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7> + 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1> + 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0> + 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7> + 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0> + 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5> + 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6> + 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2> + 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0> + 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1> + 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS + 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS + 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u> + 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0> + 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u> + 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS + 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0> + 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS + 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS + 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0> + 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS + 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0> + 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0> + 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5> + 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6> + 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7> + 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7> + 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS + 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1> + 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1> + 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0> + 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5> + 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1> + 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3> + 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7> + 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7> + 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1> + 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS + 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2> + 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2> + 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0> + 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS + 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7> + 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2> + 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7> + 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2> + 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2> + 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3> + 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3> + 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3> + 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6> + 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7> + 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> + 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0> + 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7> + 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS + 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4> + 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4> + 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7> + 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6> + 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS + 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7> + 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5> + 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS + 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0> + 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7> + 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5> + 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7> + 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5> + 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7> + 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7> + 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0> + 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7> + 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0> + 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6> + 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7> + 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7> + 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS + 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7> + 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6> + 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0> + 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7> + 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1> + 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0> + 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7> + 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0> + 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS + 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7> + 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7> + 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7> + 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7> + 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u> + 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u> + 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u> + 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0> + 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u> + 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS + 2669066421U, // <0,7,u,6>: Cost 3 vext2 , + 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0> + 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u> + 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS + 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS + 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS + 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, + 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS + 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, + 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS + 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0> + 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS + 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1> + 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS + 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS + 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> + 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS + 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS + 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, + 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS + 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS + 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS + 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2> + 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS + 835584U, // <0,u,2,3>: Cost 0 copy LHS + 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS + 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, + 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS + 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2> + 835584U, // <0,u,2,u>: Cost 0 copy LHS + 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2> + 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> + 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u> + 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> + 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6> + 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> + 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> + 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u> + 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> + 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS + 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS + 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS + 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4> + 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS + 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS + 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS + 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, + 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS + 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS + 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0> + 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, + 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, + 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS + 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u> + 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS + 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS + 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS + 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS + 2262496983U, // <0,u,6,1>: Cost 3 vrev + 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u> + 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, + 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS + 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u> + 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u> + 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u> + 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u> + 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS + 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0> + 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7> + 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u> + 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS + 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, + 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7> + 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7> + 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS + 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS + 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS + 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS + 835584U, // <0,u,u,3>: Cost 0 copy LHS + 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS + 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS + 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS + 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u> + 835584U, // <0,u,u,u>: Cost 0 copy LHS + 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0> + 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1> + 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2> + 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0> + 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1> + 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0> + 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7> + 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0> + 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1> + 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS + 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1> + 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS + 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3> + 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS + 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1> + 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7> + 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2> + 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS + 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1> + 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1> + 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0> + 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1> + 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6> + 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7> + 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0> + 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2> + 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1> + 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0> + 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1> + 67944550U, // <1,0,3,2>: Cost 1 vrev LHS + 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3> + 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS + 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7> + 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7> + 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3> + 68386972U, // <1,0,3,u>: Cost 1 vrev LHS + 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1> + 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5> + 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6> + 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1> + 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1> + 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS + 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1> + 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4> + 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS + 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0> + 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS + 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS + 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5> + 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5> + 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0> + 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0> + 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS + 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS + 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1> + 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7> + 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7> + 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6> + 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1> + 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0> + 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0> + 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0> + 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0> + 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0> + 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1> + 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7> + 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0> + 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6> + 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0> + 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0> + 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7> + 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0> + 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0> + 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1> + 67985515U, // <1,0,u,2>: Cost 1 vrev LHS + 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1> + 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6> + 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS + 2669082807U, // <1,0,u,6>: Cost 3 vext2 , + 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u> + 68427937U, // <1,0,u,u>: Cost 1 vrev LHS + 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1> + 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS + 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1> + 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2> + 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5> + 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1> + 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7> + 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0> + 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1> + 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS + 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0> + 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3> + 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS + 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7> + 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7> + 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> + 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS + 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2> + 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1> + 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2> + 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1> + 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS + 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7> + 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7> + 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0> + 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1> + 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2> + 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1> + 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2> + 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS + 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6> + 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7> + 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7> + 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3> + 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS + 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS + 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4> + 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0> + 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5> + 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS + 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS + 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS + 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4> + 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS + 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1> + 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3> + 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2> + 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7> + 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5> + 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5> + 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0> + 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS + 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7> + 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2> + 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7> + 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3> + 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7> + 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6> + 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5> + 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6> + 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0> + 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0> + 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1> + 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> + 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3> + 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS + 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6> + 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7> + 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0> + 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7> + 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> + 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS + 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, + 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS + 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS + 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS + 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, + 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS + 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS + 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0> + 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS + 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2> + 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1> + 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5> + 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7> + 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2> + 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1> + 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS + 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2> + 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1> + 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0> + 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS + 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS + 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7> + 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7> + 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0> + 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS + 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2> + 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2> + 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2> + 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3> + 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5> + 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7> + 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7> + 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1> + 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3> + 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS + 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3> + 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS + 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> + 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> + 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> + 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS + 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2> + 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6> + 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4> + 2598759198U, // <1,2,4,3>: Cost 3 vext1 , <3,u,1,2> + 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4> + 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS + 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6> + 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0> + 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS + 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS + 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7> + 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2> + 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS + 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS + 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5> + 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0> + 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7> + 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS + 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1> + 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2> + 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3> + 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7> + 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5> + 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7> + 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6> + 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2> + 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7> + 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2> + 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2> + 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3> + 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1> + 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6> + 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0> + 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1> + 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1> + 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2> + 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS + 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2> + 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS + 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS + 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3> + 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> + 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS + 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0> + 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS + 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2> + 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3> + 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5> + 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6> + 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7> + 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1> + 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS + 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2> + 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1> + 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3> + 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS + 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS + 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7> + 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7> + 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1> + 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS + 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> + 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3> + 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> + 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> + 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> + 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5> + 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> + 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3> + 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> + 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS + 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3> + 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2> + 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3> + 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS + 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> + 2598826490U, // <1,3,3,6>: Cost 3 vext1 , <6,2,7,3> + 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7> + 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS + 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS + 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3> + 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3> + 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4> + 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS + 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS + 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6> + 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4> + 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS + 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS + 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7> + 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5> + 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5> + 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS + 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5> + 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4> + 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS + 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS + 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0> + 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1> + 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3> + 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7> + 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3> + 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, + 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3> + 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> + 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> + 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1> + 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3> + 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7> + 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3> + 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6> + 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5> + 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1> + 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7> + 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1> + 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS + 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u> + 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2> + 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS + 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS + 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS + 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6> + 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS + 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS + 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4> + 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS + 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4> + 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> + 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5> + 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1> + 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2> + 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1> + 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1> + 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2> + 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4> + 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0> + 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6> + 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1> + 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS + 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS + 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0> + 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS + 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2> + 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3> + 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2> + 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1> + 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4> + 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS + 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7> + 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2> + 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS + 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS + 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4> + 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3> + 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3> + 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6> + 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5> + 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6> + 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3> + 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u> + 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1> + 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4> + 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4> + 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4> + 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4> + 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS + 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6> + 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1> + 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS + 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS + 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5> + 2598913774U, // <1,4,5,2>: Cost 3 vext1 , <2,3,u,1> + 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2> + 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS + 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS + 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS + 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS + 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS + 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS + 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1> + 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2> + 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2> + 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS + 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7> + 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7> + 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> + 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS + 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4> + 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1> + 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4> + 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4> + 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS + 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0> + 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1> + 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2> + 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4> + 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS + 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS + 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u> + 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1> + 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, + 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1> + 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS + 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> + 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS + 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0> + 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS + 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5> + 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4> + 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> + 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5> + 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1> + 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1> + 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS + 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2> + 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1> + 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0> + 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7> + 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5> + 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1> + 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7> + 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS + 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3> + 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1> + 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5> + 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2> + 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1> + 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5> + 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7> + 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7> + 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS + 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1> + 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2> + 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7> + 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2> + 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3> + 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6> + 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5> + 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6> + 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS + 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS + 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1> + 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> + 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3> + 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4> + 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4> + 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS + 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS + 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6> + 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS + 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS + 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5> + 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5> + 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3> + 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS + 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5> + 2667794530U, // <1,5,5,6>: Cost 3 vext2 , <5,6,7,0> + 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7> + 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS + 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1> + 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5> + 2667794938U, // <1,5,6,2>: Cost 3 vext2 , <6,2,7,3> + 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4> + 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6> + 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6> + 2667795256U, // <1,5,6,6>: Cost 3 vext2 , <6,6,6,6> + 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0> + 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0> + 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS + 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1> + 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1> + 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7> + 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS + 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7> + 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0> + 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1> + 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS + 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, + 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS + 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, + 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, + 1594054682U, // <1,5,u,4>: Cost 2 vext2 , + 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS + 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, + 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS + 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS + 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0> + 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS + 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6> + 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1> + 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5> + 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6> + 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6> + 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS + 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS + 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2> + 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1> + 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0> + 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3> + 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6> + 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7> + 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1> + 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS + 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS + 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1> + 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0> + 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2> + 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1> + 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS + 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7> + 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7> + 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS + 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS + 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS + 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1> + 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3> + 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3> + 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS + 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, + 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3> + 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7> + 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u> + 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1> + 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0> + 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4> + 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4> + 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5> + 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS + 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS + 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS + 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS + 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2> + 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5> + 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5> + 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5> + 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6> + 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5> + 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0> + 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS + 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS + 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1> + 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6> + 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7> + 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0> + 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS + 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7> + 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6> + 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7> + 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7> + 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1> + 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1> + 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3> + 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7> + 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5> + 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1> + 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7> + 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS + 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1> + 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1> + 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS + 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2> + 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, + 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5> + 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS + 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, + 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7> + 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1> + 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1> + 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS + 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1> + 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1> + 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1> + 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0> + 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7> + 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7> + 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS + 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS + 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1> + 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0> + 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1> + 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS + 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7> + 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7> + 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1> + 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7> + 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS + 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7> + 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2> + 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1> + 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS + 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7> + 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2> + 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2> + 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS + 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS + 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7> + 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2> + 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3> + 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS + 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3> + 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3> + 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2> + 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS + 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1> + 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1> + 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4> + 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4> + 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4> + 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS + 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0> + 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6> + 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS + 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS + 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7> + 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2> + 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5> + 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS + 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6> + 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7> + 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS + 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS + 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1> + 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0> + 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7> + 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6> + 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS + 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6> + 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6> + 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0> + 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0> + 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1> + 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1> + 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1> + 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0> + 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS + 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7> + 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7> + 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7> + 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1> + 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS + 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS + 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2> + 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u> + 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS + 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u> + 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3> + 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2> + 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS + 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u> + 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS + 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2> + 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u> + 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u> + 1658771190U, // <1,u,0,5>: Cost 2 vext3 , + 2736789248U, // <1,u,0,6>: Cost 3 vext3 , + 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1> + 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS + 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS + 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS + 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS + 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS + 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7> + 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS + 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS + 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS + 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> + 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS + 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> + 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> + 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> + 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS + 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> + 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS + 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> + 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS + 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 115726126U, // <1,u,3,2>: Cost 1 vrev LHS + 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS + 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS + 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3> + 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> + 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS + 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS + 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1> + 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0> + 2263217967U, // <1,u,4,2>: Cost 3 vrev + 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4> + 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS + 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS + 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6> + 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, + 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS + 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS + 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7> + 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS + 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS + 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS + 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS + 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS + 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS + 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS + 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u> + 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, + 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3> + 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, + 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4> + 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, + 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u> + 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> + 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> + 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u> + 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1> + 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1> + 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7> + 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS + 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u> + 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, + 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7> + 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u> + 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS + 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS + 115767091U, // <1,u,u,2>: Cost 1 vrev LHS + 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS + 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS + 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS + 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS + 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS + 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS + 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0> + 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1> + 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2> + 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0> + 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS + 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5> + 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0> + 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7> + 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2> + 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1> + 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0> + 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS + 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS + 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS + 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7> + 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1> + 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2> + 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS + 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS + 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2> + 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0> + 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2> + 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS + 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3> + 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2> + 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2> + 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS + 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> + 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1> + 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2> + 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3> + 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS + 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5> + 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6> + 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7> + 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u> + 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS + 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5> + 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6> + 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2> + 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS + 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS + 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS + 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5> + 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6> + 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS + 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5> + 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7> + 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5> + 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5> + 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5> + 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0> + 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS + 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5> + 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0> + 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS + 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7> + 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6> + 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6> + 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0> + 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6> + 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7> + 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6> + 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> + 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3> + 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1> + 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0> + 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6> + 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2> + 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0> + 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7> + 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> + 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS + 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1> + 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS + 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2> + 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS + 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS + 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS + 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS + 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS + 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS + 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS + 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0> + 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2> + 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS + 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2> + 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0> + 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1> + 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2> + 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS + 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1> + 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0> + 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3> + 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS + 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7> + 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1> + 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2> + 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3> + 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS + 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1> + 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2> + 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0> + 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS + 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3> + 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7> + 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0> + 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0> + 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS + 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1> + 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2> + 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3> + 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS + 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5> + 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> + 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7> + 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u> + 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5> + 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6> + 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5> + 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4> + 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS + 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS + 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS + 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6> + 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4> + 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS + 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7> + 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1> + 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7> + 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS + 3740913668U, // <2,1,5,5>: Cost 4 vext2 , <5,5,5,5> + 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5> + 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS + 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7> + 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS + 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2> + 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7> + 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS + 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS + 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5> + 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6> + 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1> + 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS + 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2> + 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1> + 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0> + 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7> + 3740915046U, // <2,1,7,4>: Cost 4 vext2 , <7,4,5,6> + 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7> + 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1> + 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u> + 2669827714U, // <2,1,7,u>: Cost 3 vext2 , <7,u,1,2> + 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS + 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1> + 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2> + 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u> + 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS + 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5> + 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> + 2669828370U, // <2,1,u,7>: Cost 3 vext2 , + 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u> + 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2> + 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS + 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0> + 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2> + 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS + 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7> + 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6> + 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2> + 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2> + 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2> + 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1> + 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0> + 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS + 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS + 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7> + 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3> + 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1> + 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS + 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS + 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2> + 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS + 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3> + 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS + 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7> + 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7> + 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> + 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS + 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1> + 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0> + 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2> + 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS + 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5> + 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5> + 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6> + 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3> + 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS + 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS + 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2> + 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5> + 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5> + 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS + 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS + 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS + 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4> + 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS + 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2> + 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3> + 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7> + 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS + 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5> + 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5> + 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0> + 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS + 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS + 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6> + 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3> + 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3> + 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7> + 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS + 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7> + 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6> + 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2> + 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7> + 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1> + 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5> + 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2> + 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS + 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6> + 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7> + 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7> + 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7> + 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1> + 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS + 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS + 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS + 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS + 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS + 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS + 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6> + 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS + 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS + 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS + 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3> + 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7> + 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> + 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0> + 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS + 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS + 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7> + 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0> + 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> + 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2> + 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1> + 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> + 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> + 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5> + 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7> + 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1> + 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> + 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> + 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3> + 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3> + 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3> + 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> + 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7> + 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3> + 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> + 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2> + 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS + 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4> + 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> + 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2> + 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS + 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS + 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> + 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4> + 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS + 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS + 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3> + 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4> + 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6> + 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> + 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7> + 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7> + 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> + 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6> + 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3> + 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5> + 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> + 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7> + 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> + 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1> + 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2> + 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3> + 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3> + 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1> + 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6> + 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2> + 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1> + 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2> + 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, + 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS + 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, + 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, + 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, + 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS + 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, + 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, + 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS + 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4> + 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS + 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4> + 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4> + 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2> + 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1> + 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> + 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1> + 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> + 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2> + 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1> + 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4> + 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS + 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4> + 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7> + 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3> + 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1> + 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS + 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4> + 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3> + 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4> + 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5> + 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4> + 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS + 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS + 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0> + 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS + 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4> + 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1> + 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4> + 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3> + 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4> + 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> + 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> + 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7> + 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> + 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2> + 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1> + 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2> + 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3> + 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4> + 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS + 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS + 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7> + 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS + 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5> + 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2> + 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5> + 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5> + 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS + 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5> + 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS + 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS + 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS + 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS + 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2> + 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4> + 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2> + 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS + 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS + 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6> + 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1> + 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS + 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> + 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2> + 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4> + 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4> + 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6> + 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0> + 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1> + 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7> + 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> + 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS + 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS + 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2> + 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2> + 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS + 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5> + 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS + 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS + 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS + 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0> + 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS + 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2> + 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5> + 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5> + 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5> + 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7> + 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS + 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS + 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2> + 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1> + 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0> + 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7> + 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5> + 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7> + 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2> + 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3> + 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5> + 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2> + 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1> + 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2> + 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1> + 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5> + 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1> + 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7> + 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS + 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS + 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS + 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3> + 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5> + 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3> + 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6> + 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3> + 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> + 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7> + 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u> + 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS + 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4> + 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5> + 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4> + 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS + 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS + 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6> + 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS + 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS + 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS + 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7> + 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5> + 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5> + 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS + 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5> + 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6> + 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7> + 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7> + 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS + 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6> + 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3> + 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6> + 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5> + 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7> + 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6> + 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS + 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS + 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS + 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2> + 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7> + 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2> + 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS + 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7> + 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2> + 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS + 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS + 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS + 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS + 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u> + 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u> + 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, + 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS + 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> + 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7> + 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS + 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0> + 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS + 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2> + 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0> + 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6> + 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6> + 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1> + 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2> + 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS + 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2> + 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1> + 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0> + 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS + 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6> + 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7> + 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7> + 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2> + 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS + 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1> + 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3> + 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2> + 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1> + 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6> + 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7> + 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6> + 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS + 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1> + 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2> + 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1> + 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7> + 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3> + 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6> + 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5> + 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> + 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS + 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS + 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2> + 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3> + 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0> + 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4> + 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4> + 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS + 2667875700U, // <2,6,4,6>: Cost 3 vext2 , <4,6,4,6> + 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS + 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS + 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2> + 2667876048U, // <2,6,5,1>: Cost 3 vext2 , <5,1,7,3> + 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7> + 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5> + 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5> + 2667876356U, // <2,6,5,5>: Cost 3 vext2 , <5,5,5,5> + 2667876450U, // <2,6,5,6>: Cost 3 vext2 , <5,6,7,0> + 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS + 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS + 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS + 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1> + 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6> + 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3> + 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS + 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5> + 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6> + 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS + 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS + 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1> + 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2> + 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2> + 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0> + 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5> + 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7> + 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2> + 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0> + 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1> + 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, + 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS + 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, + 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS + 1594136612U, // <2,6,u,4>: Cost 2 vext2 , + 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS + 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, + 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS + 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS + 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2> + 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2> + 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2> + 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0> + 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2> + 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7> + 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7> + 2599760953U, // <2,7,0,7>: Cost 3 vext1 , <7,0,u,2> + 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2> + 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> + 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1> + 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0> + 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7> + 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5> + 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7> + 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7> + 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0> + 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> + 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2> + 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7> + 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2> + 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1> + 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6> + 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7> + 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7> + 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1> + 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7> + 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS + 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7> + 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2> + 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3> + 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS + 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3> + 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3> + 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2> + 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS + 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6> + 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4> + 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7> + 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4> + 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4> + 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS + 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u> + 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0> + 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS + 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2> + 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7> + 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7> + 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5> + 2599800118U, // <2,7,5,4>: Cost 3 vext1 , RHS + 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5> + 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7> + 2599802214U, // <2,7,5,7>: Cost 3 vext1 , <7,4,5,6> + 2599802670U, // <2,7,5,u>: Cost 3 vext1 , LHS + 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS + 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7> + 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3> + 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6> + 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS + 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6> + 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6> + 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0> + 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS + 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1> + 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2> + 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7> + 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7> + 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS + 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5> + 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7> + 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7> + 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1> + 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS + 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2> + 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2> + 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u> + 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS + 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS + 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u> + 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2> + 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS + 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS + 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 1658631909U, // <2,u,0,3>: Cost 2 vext3 , + 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7> + 1658853120U, // <2,u,0,6>: Cost 2 vext3 , + 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS + 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS + 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5> + 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7> + 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, + 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3> + 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS + 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1> + 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS + 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1> + 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS + 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> + 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS + 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS + 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2> + 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1> + 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3> + 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS + 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6> + 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5> + 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3> + 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS + 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS + 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS + 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4> + 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5> + 1190213513U, // <2,u,4,3>: Cost 2 vrev + 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS + 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS + 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6> + 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS + 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS + 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5> + 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3> + 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5> + 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, + 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS + 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7> + 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS + 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS + 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2> + 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3> + 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, + 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS + 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS + 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1> + 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS + 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2> + 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u> + 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7> + 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1> + 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6> + 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2> + 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7> + 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2> + 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS + 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS + 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS + 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS + 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS + 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS + 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS + 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS + 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS + 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0> + 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> + 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> + 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1> + 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1> + 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7> + 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1> + 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0> + 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2> + 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS + 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0> + 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS + 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3> + 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS + 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7> + 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7> + 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1> + 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS + 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> + 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> + 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0> + 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> + 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> + 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7> + 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7> + 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7> + 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> + 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2> + 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> + 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1> + 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3> + 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6> + 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7> + 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7> + 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1> + 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3> + 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4> + 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> + 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> + 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1> + 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6> + 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS + 2666573172U, // <3,0,4,6>: Cost 3 vext2 , <4,6,4,6> + 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4> + 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> + 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7> + 2666573520U, // <3,0,5,1>: Cost 3 vext2 , <5,1,7,3> + 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS + 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6> + 2666573766U, // <3,0,5,4>: Cost 3 vext2 , <5,4,7,6> + 2666573828U, // <3,0,5,5>: Cost 3 vext2 , <5,5,5,5> + 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7> + 2666573992U, // <3,0,5,7>: Cost 3 vext2 , <5,7,5,7> + 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS + 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7> + 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7> + 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7> + 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2> + 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1> + 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0> + 2666574648U, // <3,0,6,6>: Cost 3 vext2 , <6,6,6,6> + 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0> + 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7> + 2666574842U, // <3,0,7,0>: Cost 3 vext2 , <7,0,1,2> + 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7> + 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0> + 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7> + 2666575206U, // <3,0,7,4>: Cost 3 vext2 , <7,4,5,6> + 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7> + 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3> + 2666575468U, // <3,0,7,7>: Cost 3 vext2 , <7,7,7,7> + 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0> + 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2> + 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> + 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS + 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> + 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> + 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS + 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, + 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u> + 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS + 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS + 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS + 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1> + 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> + 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS + 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1> + 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6> + 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0> + 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> + 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1> + 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1> + 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1> + 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> + 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5> + 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> + 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5> + 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3> + 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3> + 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1> + 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> + 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2> + 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0> + 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS + 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> + 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7> + 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0> + 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0> + 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS + 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> + 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> + 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1> + 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS + 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> + 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> + 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3> + 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> + 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS + 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5> + 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5> + 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5> + 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS + 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS + 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS + 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4> + 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5> + 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1> + 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7> + 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5> + 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> + 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5> + 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7> + 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0> + 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS + 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> + 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1> + 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> + 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7> + 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> + 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5> + 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> + 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7> + 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0> + 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7> + 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS + 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1> + 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2> + 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS + 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS + 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7> + 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7> + 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7> + 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS + 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS + 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3> + 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> + 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0> + 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS + 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> + 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> + 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS + 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3> + 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0> + 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS + 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0> + 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> + 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5> + 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7> + 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> + 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0> + 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS + 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2> + 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1> + 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0> + 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> + 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS + 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7> + 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> + 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1> + 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1> + 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1> + 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3> + 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2> + 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> + 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5> + 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> + 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6> + 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3> + 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3> + 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1> + 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0> + 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2> + 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3> + 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5> + 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1> + 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2> + 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0> + 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1> + 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS + 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4> + 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4> + 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> + 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS + 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS + 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0> + 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4> + 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS + 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS + 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5> + 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> + 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> + 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5> + 2665263108U, // <3,2,5,5>: Cost 3 vext2 , <5,5,5,5> + 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> + 2665263272U, // <3,2,5,7>: Cost 3 vext2 , <5,7,5,7> + 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5> + 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1> + 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3> + 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6> + 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> + 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5> + 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> + 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> + 2665263950U, // <3,2,6,7>: Cost 3 vext2 , <6,7,0,1> + 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> + 2665264122U, // <3,2,7,0>: Cost 3 vext2 , <7,0,1,2> + 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3> + 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2> + 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS + 2665264486U, // <3,2,7,4>: Cost 3 vext2 , <7,4,5,6> + 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7> + 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7> + 2665264748U, // <3,2,7,7>: Cost 3 vext2 , <7,7,7,7> + 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS + 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1> + 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS + 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2> + 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> + 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5> + 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS + 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0> + 2665265408U, // <3,2,u,7>: Cost 3 vext2 , + 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1> + 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0> + 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2> + 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0> + 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2> + 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1> + 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2> + 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2> + 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7> + 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2> + 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3> + 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3> + 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3> + 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1> + 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS + 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3> + 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3> + 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3> + 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3> + 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS + 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3> + 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3> + 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0> + 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS + 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4> + 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3> + 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3> + 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3> + 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS + 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3> + 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3> + 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS + 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS + 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5> + 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7> + 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3> + 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS + 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS + 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4> + 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4> + 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6> + 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4> + 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6> + 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS + 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7> + 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6> + 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS + 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5> + 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5> + 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5> + 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS + 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5> + 2665934946U, // <3,3,5,6>: Cost 3 vext2 , <5,6,7,0> + 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS + 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS + 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7> + 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7> + 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7> + 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7> + 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7> + 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7> + 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6> + 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3> + 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3> + 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS + 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7> + 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7> + 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3> + 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS + 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7> + 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3> + 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7> + 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS + 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS + 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2> + 1592858504U, // <3,3,u,2>: Cost 2 vext2 , + 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS + 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS + 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6> + 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3> + 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS + 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS + 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0> + 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS + 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2> + 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4> + 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5> + 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1> + 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2> + 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0> + 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS + 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2> + 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1> + 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4> + 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3> + 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS + 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0> + 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> + 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4> + 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4> + 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS + 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3> + 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2> + 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1> + 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3> + 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3> + 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0> + 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4> + 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0> + 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2> + 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4> + 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4> + 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3> + 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1> + 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS + 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS + 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1> + 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2> + 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS + 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4> + 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4> + 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> + 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4> + 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS + 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6> + 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4> + 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS + 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS + 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> + 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5> + 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2> + 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS + 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7> + 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS + 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5> + 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS + 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1> + 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6> + 2666607098U, // <3,4,6,2>: Cost 3 vext2 , <6,2,7,3> + 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6> + 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6> + 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> + 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7> + 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4> + 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2> + 2666607610U, // <3,4,7,0>: Cost 3 vext2 , <7,0,1,2> + 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5> + 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4> + 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7> + 2666607974U, // <3,4,7,4>: Cost 3 vext2 , <7,4,5,6> + 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0> + 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0> + 2666608236U, // <3,4,7,7>: Cost 3 vext2 , <7,7,7,7> + 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4> + 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS + 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS + 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u> + 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, + 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS + 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS + 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS + 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u> + 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS + 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0> + 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS + 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5> + 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4> + 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1> + 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> + 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1> + 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0> + 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS + 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS + 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1> + 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5> + 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5> + 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5> + 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3> + 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7> + 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3> + 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3> + 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3> + 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5> + 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2> + 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5> + 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5> + 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3> + 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7> + 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3> + 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5> + 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2> + 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5> + 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4> + 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3> + 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6> + 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5> + 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6> + 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS + 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS + 2600304742U, // <3,5,4,0>: Cost 3 vext1 , LHS + 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5> + 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4> + 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0> + 2600308022U, // <3,5,4,4>: Cost 3 vext1 , RHS + 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS + 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS + 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6> + 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6> + 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS + 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3> + 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5> + 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5> + 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS + 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5> + 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6> + 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7> + 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7> + 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1> + 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7> + 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6> + 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4> + 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5> + 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7> + 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7> + 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0> + 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0> + 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS + 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7> + 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2> + 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2> + 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS + 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7> + 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0> + 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7> + 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS + 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS + 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u> + 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2> + 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2> + 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS + 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7> + 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS + 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3> + 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS + 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS + 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2> + 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4> + 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4> + 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2> + 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7> + 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0> + 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS + 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2> + 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3> + 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1> + 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6> + 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1> + 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6> + 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3> + 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3> + 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3> + 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6> + 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS + 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3> + 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6> + 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0> + 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6> + 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6> + 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3> + 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3> + 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3> + 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2> + 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3> + 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3> + 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3> + 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6> + 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6> + 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2> + 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS + 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS + 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6> + 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3> + 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5> + 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6> + 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6> + 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6> + 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0> + 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS + 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6> + 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS + 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2> + 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7> + 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> + 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5> + 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6> + 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6> + 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5> + 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS + 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1> + 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3> + 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3> + 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6> + 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4> + 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7> + 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7> + 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7> + 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1> + 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7> + 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7> + 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3> + 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5> + 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1> + 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2> + 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS + 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1> + 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1> + 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2> + 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u> + 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6> + 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5> + 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6> + 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3> + 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1> + 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0> + 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS + 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2> + 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0> + 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5> + 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0> + 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0> + 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1> + 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS + 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2> + 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1> + 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0> + 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7> + 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS + 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7> + 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7> + 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3> + 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7> + 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS + 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3> + 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2> + 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1> + 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS + 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7> + 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7> + 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3> + 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7> + 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2> + 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3> + 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3> + 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3> + 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6> + 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7> + 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7> + 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7> + 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2> + 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS + 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7> + 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7> + 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7> + 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS + 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS + 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4> + 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6> + 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS + 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2> + 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3> + 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3> + 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0> + 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5> + 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5> + 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7> + 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS + 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS + 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1> + 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0> + 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3> + 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0> + 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5> + 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4> + 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6> + 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0> + 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7> + 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS + 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7> + 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7> + 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7> + 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS + 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7> + 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7> + 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7> + 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, + 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS + 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, + 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS + 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, + 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS + 1595545808U, // <3,7,u,6>: Cost 2 vext2 , + 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS + 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0> + 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, + 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, + 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, + 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, + 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, + 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, + 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS + 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, + 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u> + 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u> + 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS + 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, + 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS + 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u> + 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u> + 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, + 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS + 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2> + 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3> + 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u> + 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, + 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6> + 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, + 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u> + 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, + 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, + 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, + 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3> + 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, + 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS + 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, + 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, + 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, + 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS + 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS + 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS + 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, + 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, + 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, + 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4> + 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, + 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, + 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, + 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, + 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS + 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, + 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5> + 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, + 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS + 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5> + 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS + 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, + 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS + 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, + 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6> + 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, + 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, + 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6> + 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, + 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, + 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, + 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS + 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7> + 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2> + 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS + 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS + 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7> + 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, + 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS + 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, + 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, + 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS + 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS + 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, + 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, + 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS + 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, + 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS + 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0> + 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1> + 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2> + 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4> + 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4> + 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0> + 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0> + 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0> + 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1> + 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS + 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4> + 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1> + 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS + 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4> + 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1> + 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1> + 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4> + 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4> + 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4> + 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0> + 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6> + 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7> + 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> + 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2> + 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4> + 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS + 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> + 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4> + 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4> + 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS + 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5> + 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7> + 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0> + 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> + 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS + 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5> + 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6> + 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2> + 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS + 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS + 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2> + 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4> + 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS + 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS + 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS + 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2> + 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5> + 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS + 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7> + 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7> + 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5> + 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS + 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS + 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1> + 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS + 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6> + 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS + 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6> + 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6> + 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0> + 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS + 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2> + 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS + 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS + 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0> + 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5> + 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5> + 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0> + 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7> + 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS + 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS + 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS + 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u> + 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS + 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS + 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> + 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u> + 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1> + 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS + 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6> + 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2> + 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5> + 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4> + 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1> + 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4> + 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS + 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2> + 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4> + 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4> + 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3> + 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5> + 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1> + 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7> + 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1> + 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3> + 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS + 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4> + 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2> + 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4> + 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS + 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3> + 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7> + 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2> + 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4> + 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS + 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3> + 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4> + 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4> + 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS + 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7> + 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2> + 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3> + 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1> + 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0> + 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5> + 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS + 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5> + 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS + 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4> + 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0> + 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS + 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS + 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2> + 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5> + 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2> + 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS + 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7> + 2600686074U, // <4,1,5,6>: Cost 3 vext1 , <6,2,7,3> + 2600686586U, // <4,1,5,7>: Cost 3 vext1 , <7,0,1,2> + 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS + 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS + 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1> + 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2> + 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS + 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS + 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6> + 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7> + 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1> + 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS + 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> + 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1> + 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1> + 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4> + 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6> + 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4> + 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7> + 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7> + 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> + 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS + 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2> + 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2> + 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4> + 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS + 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS + 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 2600686586U, // <4,1,u,7>: Cost 3 vext1 , <7,0,1,2> + 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS + 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2> + 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS + 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6> + 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2> + 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6> + 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7> + 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4> + 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2> + 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS + 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2> + 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1> + 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0> + 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS + 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> + 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7> + 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3> + 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3> + 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> + 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4> + 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3> + 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2> + 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3> + 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0> + 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7> + 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6> + 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2> + 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3> + 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1> + 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1> + 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2> + 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4> + 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5> + 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4> + 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4> + 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4> + 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1> + 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS + 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4> + 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4> + 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4> + 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS + 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS + 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4> + 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0> + 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS + 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS + 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0> + 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2> + 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS + 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS + 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7> + 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7> + 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS + 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS + 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS + 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2> + 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2> + 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS + 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS + 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6> + 2600767994U, // <4,2,6,6>: Cost 3 vext1 , <6,2,7,3> + 2600768506U, // <4,2,6,7>: Cost 3 vext1 , <7,0,1,2> + 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS + 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> + 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2> + 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2> + 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4> + 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4> + 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7> + 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4> + 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7> + 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> + 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS + 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2> + 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2> + 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS + 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS + 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS + 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u> + 2600784890U, // <4,2,u,7>: Cost 3 vext1 , <7,0,1,2> + 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS + 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0> + 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2> + 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4> + 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3> + 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1> + 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0> + 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0> + 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0> + 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2> + 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1> + 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1> + 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4> + 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4> + 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0> + 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3> + 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1> + 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3> + 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4> + 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS + 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3> + 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2> + 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4> + 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS + 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4> + 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3> + 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3> + 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3> + 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1> + 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1> + 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3> + 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> + 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4> + 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7> + 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7> + 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7> + 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> + 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1> + 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2> + 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4> + 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0> + 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5> + 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6> + 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4> + 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2> + 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1> + 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS + 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5> + 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5> + 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3> + 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS + 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5> + 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5> + 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4> + 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS + 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS + 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6> + 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6> + 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3> + 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS + 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6> + 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6> + 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4> + 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6> + 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1> + 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5> + 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7> + 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7> + 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5> + 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7> + 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7> + 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4> + 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1> + 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS + 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2> + 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u> + 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4> + 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS + 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u> + 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u> + 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4> + 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u> + 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4> + 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS + 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS + 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1> + 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0> + 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1> + 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2> + 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0> + 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS + 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2> + 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1> + 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3> + 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3> + 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3> + 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4> + 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3> + 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3> + 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3> + 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4> + 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4> + 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2> + 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4> + 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4> + 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7> + 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4> + 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4> + 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4> + 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2> + 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4> + 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3> + 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> + 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4> + 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5> + 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4> + 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4> + 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> + 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS + 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4> + 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2> + 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4> + 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS + 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS + 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS + 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> + 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS + 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS + 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4> + 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5> + 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5> + 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS + 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS + 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS + 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5> + 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS + 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS + 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2> + 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5> + 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6> + 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS + 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6> + 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS + 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4> + 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS + 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2> + 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4> + 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7> + 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4> + 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> + 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4> + 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4> + 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7> + 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> + 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS + 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS + 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS + 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u> + 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS + 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS + 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS + 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u> + 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS + 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0> + 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS + 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5> + 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0> + 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5> + 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7> + 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5> + 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0> + 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS + 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2> + 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5> + 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0> + 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS + 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4> + 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5> + 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6> + 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3> + 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS + 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS + 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3> + 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5> + 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5> + 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5> + 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7> + 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7> + 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS + 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5> + 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2> + 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1> + 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4> + 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3> + 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0> + 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5> + 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4> + 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5> + 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2> + 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS + 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4> + 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5> + 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4> + 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS + 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS + 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5> + 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6> + 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS + 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS + 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3> + 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4> + 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2> + 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5> + 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5> + 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0> + 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS + 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS + 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS + 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6> + 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6> + 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6> + 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS + 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5> + 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6> + 27705344U, // <4,5,6,7>: Cost 0 copy RHS + 27705344U, // <4,5,6,u>: Cost 0 copy RHS + 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS + 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4> + 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7> + 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5> + 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS + 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7> + 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4> + 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4> + 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS + 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS + 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS + 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, + 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u> + 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS + 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS + 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, + 27705344U, // <4,5,u,7>: Cost 0 copy RHS + 27705344U, // <4,5,u,u>: Cost 0 copy RHS + 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0> + 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS + 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6> + 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0> + 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5> + 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7> + 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0> + 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS + 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS + 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2> + 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1> + 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0> + 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3> + 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5> + 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7> + 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7> + 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS + 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3> + 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4> + 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3> + 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2> + 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1> + 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6> + 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7> + 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7> + 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3> + 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1> + 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2> + 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6> + 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2> + 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3> + 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6> + 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6> + 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6> + 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4> + 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2> + 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS + 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3> + 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4> + 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4> + 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS + 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS + 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS + 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4> + 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS + 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS + 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3> + 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3> + 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3> + 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6> + 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5> + 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6> + 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS + 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS + 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS + 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2> + 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3> + 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2> + 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6> + 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6> + 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6> + 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS + 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS + 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2> + 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2> + 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7> + 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4> + 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6> + 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> + 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3> + 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> + 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2> + 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS + 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS + 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS + 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, + 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u> + 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS + 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS + 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS + 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS + 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0> + 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS + 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4> + 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4> + 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5> + 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0> + 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7> + 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4> + 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS + 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1> + 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1> + 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4> + 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5> + 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS + 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7> + 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7> + 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3> + 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1> + 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS + 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3> + 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2> + 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4> + 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7> + 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7> + 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7> + 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3> + 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7> + 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2> + 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4> + 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4> + 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3> + 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6> + 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7> + 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7> + 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4> + 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7> + 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1> + 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3> + 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7> + 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5> + 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4> + 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS + 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4> + 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7> + 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS + 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2> + 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7> + 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5> + 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5> + 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6> + 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5> + 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5> + 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7> + 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2> + 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS + 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2> + 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2> + 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2> + 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS + 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6> + 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3> + 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7> + 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS + 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS + 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4> + 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7> + 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4> + 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7> + 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7> + 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7> + 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7> + 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7> + 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS + 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS + 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2> + 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2> + 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS + 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u> + 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3> + 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7> + 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS + 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0> + 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS + 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u> + 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, + 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5> + 2265397305U, // <4,u,0,5>: Cost 3 vrev + 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u> + 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0> + 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS + 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2> + 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1> + 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3> + 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, + 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7> + 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7> + 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, + 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS + 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u> + 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2> + 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u> + 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u> + 2733864859U, // <4,u,2,5>: Cost 3 vext3 , + 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7> + 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, + 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u> + 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2> + 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2> + 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u> + 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3> + 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6> + 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, + 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u> + 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2> + 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS + 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, + 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4> + 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4> + 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS + 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS + 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS + 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, + 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS + 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS + 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS + 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5> + 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS + 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS + 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS + 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS + 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS + 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS + 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS + 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2> + 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS + 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6> + 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS + 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6> + 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS + 27705344U, // <4,u,6,7>: Cost 0 copy RHS + 27705344U, // <4,u,6,u>: Cost 0 copy RHS + 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS + 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4> + 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7> + 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u> + 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS + 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> + 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7> + 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> + 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS + 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS + 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS + 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS + 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u> + 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS + 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS + 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS + 27705344U, // <4,u,u,7>: Cost 0 copy RHS + 27705344U, // <4,u,u,u>: Cost 0 copy RHS + 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0> + 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1> + 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2> + 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5> + 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5> + 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0> + 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0> + 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0> + 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2> + 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS + 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1> + 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS + 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7> + 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS + 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1> + 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7> + 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2> + 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2> + 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> + 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4> + 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5> + 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5> + 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5> + 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4> + 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> + 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5> + 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5> + 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4> + 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5> + 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5> + 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0> + 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4> + 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0> + 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7> + 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5> + 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS + 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> + 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6> + 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5> + 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS + 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS + 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5> + 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> + 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> + 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1> + 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS + 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS + 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0> + 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5> + 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0> + 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0> + 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS + 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS + 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0> + 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS + 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7> + 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6> + 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5> + 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7> + 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6> + 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5> + 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS + 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS + 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0> + 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7> + 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2> + 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS + 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0> + 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7> + 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7> + 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS + 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2> + 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5> + 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS + 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5> + 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6> + 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS + 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u> + 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> + 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0> + 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS + 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2> + 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2> + 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5> + 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0> + 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7> + 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0> + 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS + 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1> + 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1> + 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0> + 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3> + 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5> + 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5> + 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5> + 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5> + 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3> + 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2> + 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3> + 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2> + 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0> + 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5> + 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3> + 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7> + 3808199610U, // <5,1,2,7>: Cost 4 vext3 , <1,2,7,0> + 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0> + 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS + 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3> + 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5> + 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5> + 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5> + 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7> + 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7> + 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5> + 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3> + 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1> + 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5> + 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5> + 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5> + 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4> + 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS + 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6> + 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4> + 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1> + 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1> + 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1> + 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1> + 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7> + 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5> + 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5> + 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0> + 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7> + 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1> + 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS + 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7> + 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3> + 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7> + 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS + 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7> + 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6> + 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1> + 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1> + 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS + 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1> + 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1> + 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS + 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS + 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3> + 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6> + 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7> + 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS + 1591662326U, // <5,1,u,0>: Cost 2 vext2 , + 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS + 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5> + 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS + 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5> + 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS + 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, + 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, + 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS + 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0> + 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS + 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2> + 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2> + 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1> + 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1> + 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4> + 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0> + 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS + 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2> + 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2> + 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5> + 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5> + 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS + 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0> + 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3> + 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1> + 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5> + 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS + 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3> + 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2> + 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3> + 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5> + 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7> + 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6> + 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5> + 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3> + 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1> + 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5> + 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5> + 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5> + 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5> + 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5> + 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5> + 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7> + 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5> + 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2> + 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3> + 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5> + 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5> + 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6> + 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS + 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2> + 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4> + 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS + 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS + 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3> + 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7> + 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS + 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS + 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5> + 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7> + 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1> + 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS + 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS + 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3> + 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3> + 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7> + 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5> + 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7> + 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7> + 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1> + 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7> + 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS + 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2> + 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7> + 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS + 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS + 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5> + 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6> + 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7> + 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS + 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1> + 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS + 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u> + 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3> + 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5> + 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS + 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5> + 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u> + 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5> + 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0> + 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2> + 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0> + 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2> + 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1> + 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2> + 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0> + 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0> + 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2> + 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3> + 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1> + 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3> + 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5> + 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3> + 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7> + 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7> + 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5> + 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3> + 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1> + 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5> + 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2> + 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4> + 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5> + 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4> + 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3> + 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3> + 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4> + 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1> + 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3> + 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2> + 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3> + 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5> + 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5> + 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7> + 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5> + 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5> + 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5> + 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0> + 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3> + 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5> + 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5> + 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6> + 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5> + 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4> + 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6> + 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS + 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5> + 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5> + 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5> + 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS + 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5> + 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0> + 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5> + 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS + 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS + 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6> + 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6> + 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6> + 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS + 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0> + 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6> + 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4> + 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS + 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS + 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7> + 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2> + 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2> + 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS + 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3> + 2601513466U, // <5,3,7,6>: Cost 3 vext1 , <6,2,7,3> + 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7> + 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS + 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS + 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u> + 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2> + 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2> + 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS + 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6> + 2601521658U, // <5,3,u,6>: Cost 3 vext1 , <6,2,7,3> + 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u> + 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS + 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS + 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS + 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2> + 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5> + 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5> + 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1> + 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0> + 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0> + 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS + 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1> + 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4> + 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4> + 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7> + 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4> + 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0> + 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5> + 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1> + 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1> + 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4> + 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4> + 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4> + 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5> + 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4> + 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3> + 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3> + 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5> + 2666752099U, // <5,4,2,u>: Cost 3 vext2 , <2,u,4,5> + 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS + 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4> + 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4> + 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3> + 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4> + 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0> + 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5> + 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7> + 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4> + 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS + 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4> + 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3> + 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4> + 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4> + 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5> + 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4> + 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4> + 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5> + 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS + 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5> + 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3> + 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2> + 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS + 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5> + 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS + 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS + 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS + 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS + 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7> + 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2> + 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6> + 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS + 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5> + 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7> + 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5> + 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS + 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS + 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4> + 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7> + 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7> + 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS + 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5> + 94817590U, // <5,4,7,6>: Cost 1 vrev RHS + 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7> + 94965064U, // <5,4,7,u>: Cost 1 vrev RHS + 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS + 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u> + 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u> + 2667419628U, // <5,4,u,3>: Cost 3 vext2 , + 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS + 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5> + 94825783U, // <5,4,u,6>: Cost 1 vrev RHS + 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5> + 94973257U, // <5,4,u,u>: Cost 1 vrev RHS + 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0> + 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS + 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2> + 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2> + 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1> + 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0> + 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7> + 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS + 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS + 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2> + 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5> + 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0> + 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3> + 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5> + 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7> + 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7> + 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3> + 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5> + 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS + 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3> + 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2> + 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4> + 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3> + 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3> + 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7> + 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7> + 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4> + 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2> + 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5> + 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3> + 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5> + 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6> + 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5> + 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7> + 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1> + 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2> + 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1> + 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5> + 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3> + 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4> + 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> + 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS + 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5> + 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6> + 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5> + 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS + 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3> + 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2> + 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2> + 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS + 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0> + 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7> + 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS + 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS + 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6> + 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3> + 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6> + 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5> + 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5> + 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6> + 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1> + 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1> + 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS + 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7> + 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5> + 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7> + 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS + 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5> + 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6> + 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS + 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS + 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS + 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS + 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5> + 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u> + 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS + 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, + 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS + 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS + 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0> + 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS + 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2> + 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4> + 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5> + 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6> + 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7> + 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS + 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS + 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2> + 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1> + 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0> + 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3> + 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6> + 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7> + 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7> + 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS + 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6> + 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2> + 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3> + 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2> + 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1> + 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6> + 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6> + 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7> + 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3> + 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6> + 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2> + 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3> + 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6> + 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3> + 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6> + 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6> + 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7> + 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS + 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6> + 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS + 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5> + 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5> + 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5> + 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6> + 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS + 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6> + 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5> + 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS + 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS + 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3> + 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6> + 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4> + 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6> + 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5> + 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1> + 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS + 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS + 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS + 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4> + 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3> + 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6> + 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS + 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6> + 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6> + 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS + 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS + 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS + 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> + 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> + 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS + 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6> + 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6> + 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7> + 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS + 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS + 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS + 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> + 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS + 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS + 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3> + 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2> + 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS + 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0> + 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS + 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2> + 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0> + 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5> + 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7> + 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7> + 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0> + 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS + 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2> + 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1> + 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0> + 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7> + 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS + 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7> + 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7> + 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7> + 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7> + 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7> + 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3> + 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2> + 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1> + 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7> + 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7> + 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7> + 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7> + 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1> + 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2> + 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5> + 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1> + 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3> + 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6> + 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0> + 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7> + 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7> + 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2> + 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS + 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7> + 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0> + 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4> + 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS + 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS + 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> + 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7> + 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS + 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS + 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3> + 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3> + 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7> + 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS + 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5> + 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7> + 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS + 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS + 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0> + 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5> + 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2> + 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6> + 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4> + 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u> + 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> + 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7> + 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u> + 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS + 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1> + 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2> + 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3> + 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS + 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7> + 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3> + 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7> + 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS + 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS + 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS + 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, + 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS + 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS + 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS + 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, + 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS + 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS + 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0> + 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS + 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2> + 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, + 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, + 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, + 2266134675U, // <5,u,0,6>: Cost 3 vrev + 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0> + 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS + 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2> + 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1> + 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS + 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u> + 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u> + 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, + 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u> + 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS + 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0> + 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> + 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2> + 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, + 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u> + 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u> + 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7> + 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, + 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1> + 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, + 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u> + 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u> + 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3> + 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u> + 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0> + 2734610422U, // <5,u,3,6>: Cost 3 vext3 , + 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u> + 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u> + 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u> + 1661163546U, // <5,u,4,1>: Cost 2 vext3 , + 2734463012U, // <5,u,4,2>: Cost 3 vext3 , + 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, + 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> + 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS + 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> + 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u> + 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS + 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS + 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5> + 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, + 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, + 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS + 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS + 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS + 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS + 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS + 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS + 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6> + 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, + 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS + 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS + 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> + 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS + 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, + 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS + 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7> + 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2> + 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS + 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS + 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> + 118708378U, // <5,u,7,6>: Cost 1 vrev RHS + 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS + 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS + 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS + 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS + 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS + 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS + 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS + 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS + 118716571U, // <5,u,u,6>: Cost 1 vrev RHS + 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS + 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS + 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0> + 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1> + 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2> + 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5> + 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6> + 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0> + 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6> + 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7> + 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2> + 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS + 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0> + 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS + 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6> + 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS + 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1> + 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1> + 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1> + 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS + 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2> + 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6> + 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6> + 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5> + 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6> + 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7> + 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6> + 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2> + 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6> + 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2> + 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4> + 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5> + 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3> + 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6> + 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6> + 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6> + 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7> + 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5> + 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6> + 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5> + 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6> + 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6> + 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6> + 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS + 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0> + 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0> + 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6> + 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS + 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6> + 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6> + 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0> + 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6> + 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6> + 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0> + 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS + 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS + 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0> + 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS + 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS + 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5> + 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0> + 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7> + 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0> + 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1> + 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS + 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS + 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0> + 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2> + 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7> + 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS + 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5> + 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0> + 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7> + 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS + 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2> + 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1> + 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS + 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5> + 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6> + 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS + 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u> + 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS + 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS + 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS + 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS + 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6> + 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2> + 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS + 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2> + 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1> + 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0> + 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2> + 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1> + 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1> + 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6> + 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3> + 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6> + 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5> + 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6> + 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1> + 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3> + 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS + 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3> + 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2> + 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0> + 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS + 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3> + 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3> + 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0> + 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0> + 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS + 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3> + 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6> + 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1> + 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6> + 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7> + 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3> + 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2> + 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3> + 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1> + 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6> + 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4> + 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6> + 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS + 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6> + 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0> + 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1> + 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6> + 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1> + 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7> + 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6> + 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7> + 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6> + 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6> + 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0> + 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS + 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7> + 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS + 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7> + 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6> + 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS + 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS + 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7> + 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6> + 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1> + 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS + 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS + 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7> + 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2> + 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS + 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS + 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5> + 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0> + 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7> + 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS + 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS + 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3> + 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6> + 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0> + 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6> + 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7> + 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u> + 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u> + 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0> + 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0> + 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS + 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2> + 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0> + 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6> + 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3> + 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4> + 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0> + 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS + 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1> + 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1> + 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0> + 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS + 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6> + 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7> + 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3> + 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1> + 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1> + 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1> + 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3> + 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2> + 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3> + 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6> + 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7> + 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6> + 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7> + 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3> + 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1> + 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0> + 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6> + 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4> + 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5> + 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6> + 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6> + 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4> + 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1> + 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> + 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u> + 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6> + 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6> + 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6> + 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS + 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0> + 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2> + 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2> + 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3> + 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3> + 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7> + 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6> + 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5> + 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5> + 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0> + 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS + 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6> + 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1> + 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3> + 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6> + 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7> + 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5> + 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7> + 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6> + 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1> + 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7> + 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS + 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2> + 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7> + 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS + 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS + 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7> + 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> + 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7> + 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS + 1591744256U, // <6,2,u,0>: Cost 2 vext2 , + 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS + 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6> + 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS + 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5> + 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS + 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0> + 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS + 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS + 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0> + 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2> + 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4> + 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2> + 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2> + 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2> + 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0> + 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0> + 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2> + 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3> + 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1> + 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3> + 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1> + 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> + 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3> + 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0> + 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3> + 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> + 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4> + 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> + 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2> + 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0> + 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6> + 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7> + 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7> + 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6> + 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> + 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1> + 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3> + 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3> + 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3> + 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6> + 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5> + 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7> + 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7> + 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5> + 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS + 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3> + 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3> + 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6> + 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS + 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6> + 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6> + 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4> + 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6> + 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS + 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7> + 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5> + 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5> + 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6> + 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7> + 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6> + 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0> + 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6> + 2602164326U, // <6,3,6,0>: Cost 3 vext1 , LHS + 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3> + 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6> + 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1> + 2602167524U, // <6,3,6,4>: Cost 3 vext1 , <4,4,6,6> + 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7> + 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6> + 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7> + 2602170158U, // <6,3,6,u>: Cost 3 vext1 , LHS + 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS + 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7> + 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7> + 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2> + 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS + 2602176208U, // <6,3,7,5>: Cost 3 vext1 , <5,1,7,3> + 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3> + 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7> + 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS + 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS + 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2> + 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u> + 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2> + 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS + 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6> + 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0> + 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7> + 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS + 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0> + 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS + 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6> + 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1> + 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6> + 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1> + 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2> + 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0> + 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS + 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1> + 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1> + 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0> + 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS + 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS + 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0> + 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3> + 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1> + 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1> + 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4> + 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3> + 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2> + 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1> + 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4> + 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS + 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0> + 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0> + 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4> + 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2> + 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3> + 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6> + 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3> + 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6> + 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6> + 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5> + 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7> + 2668817222U, // <6,4,3,u>: Cost 3 vext2 , <3,u,5,6> + 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS + 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4> + 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4> + 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4> + 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4> + 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS + 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> + 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4> + 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> + 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS + 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3> + 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5> + 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6> + 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS + 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5> + 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS + 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS + 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS + 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS + 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2> + 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2> + 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2> + 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS + 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3> + 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3> + 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2> + 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS + 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS + 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4> + 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5> + 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7> + 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS + 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5> + 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6> + 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7> + 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS + 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS + 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS + 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u> + 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6> + 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6> + 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS + 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS + 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS + 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS + 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0> + 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS + 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6> + 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2> + 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1> + 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5> + 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7> + 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0> + 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0> + 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1> + 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5> + 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0> + 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS + 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6> + 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7> + 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4> + 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3> + 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3> + 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS + 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5> + 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2> + 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5> + 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS + 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6> + 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7> + 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS + 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5> + 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2> + 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5> + 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3> + 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3> + 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6> + 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6> + 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0> + 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3> + 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3> + 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS + 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5> + 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5> + 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5> + 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS + 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS + 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0> + 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6> + 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6> + 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS + 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2> + 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2> + 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2> + 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS + 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5> + 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6> + 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7> + 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7> + 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1> + 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4> + 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4> + 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4> + 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5> + 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5> + 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6> + 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0> + 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1> + 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS + 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7> + 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7> + 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2> + 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS + 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5> + 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6> + 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS + 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS + 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS + 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u> + 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u> + 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, + 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS + 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS + 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6> + 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u> + 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u> + 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS + 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS + 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4> + 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1> + 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2> + 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3> + 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0> + 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS + 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS + 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2> + 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1> + 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0> + 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS + 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3> + 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7> + 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3> + 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3> + 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS + 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6> + 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3> + 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6> + 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1> + 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3> + 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7> + 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7> + 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3> + 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3> + 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2> + 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4> + 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6> + 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3> + 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5> + 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4> + 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6> + 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS + 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5> + 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS + 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2> + 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6> + 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6> + 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> + 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS + 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0> + 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6> + 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6> + 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS + 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3> + 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5> + 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0> + 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6> + 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5> + 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0> + 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS + 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS + 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS + 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2> + 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3> + 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2> + 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS + 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3> + 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS + 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7> + 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS + 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS + 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7> + 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7> + 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6> + 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS + 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4> + 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6> + 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS + 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS + 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS + 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS + 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u> + 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS + 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS + 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS + 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS + 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS + 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS + 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS + 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0> + 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0> + 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7> + 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2> + 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS + 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> + 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3> + 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5> + 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7> + 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7> + 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2> + 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3> + 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2> + 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3> + 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> + 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6> + 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7> + 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7> + 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1> + 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1> + 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3> + 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1> + 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7> + 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7> + 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1> + 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3> + 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> + 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5> + 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS + 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> + 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7> + 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS + 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> + 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> + 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> + 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> + 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> + 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> + 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2> + 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3> + 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> + 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6> + 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7> + 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> + 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> + 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> + 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> + 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7> + 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3> + 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7> + 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> + 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7> + 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7> + 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7> + 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2> + 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, + 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS + 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, + 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, + 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, + 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS + 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, + 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, + 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS + 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS + 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, + 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0> + 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7> + 1193130221U, // <6,u,0,7>: Cost 2 vrev + 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS + 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3> + 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS + 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7> + 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7> + 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, + 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS + 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS + 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3> + 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1> + 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS + 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u> + 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7> + 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, + 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1> + 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3> + 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3> + 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, + 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, + 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1> + 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3> + 1661245476U, // <6,u,4,2>: Cost 2 vext3 , + 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, + 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS + 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> + 1661614161U, // <6,u,4,7>: Cost 2 vext3 , + 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS + 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS + 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5> + 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, + 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS + 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7> + 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS + 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS + 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2> + 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, + 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS + 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5> + 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS + 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1> + 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS + 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS + 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7> + 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7> + 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS + 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS + 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5> + 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7> + 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS + 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS + 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS + 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS + 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, + 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS + 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS + 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS + 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS + 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS + 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0> + 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1> + 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2> + 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0> + 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1> + 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6> + 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0> + 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7> + 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1> + 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS + 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5> + 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS + 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7> + 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS + 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7> + 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1> + 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0> + 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS + 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2> + 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5> + 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0> + 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1> + 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6> + 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> + 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7> + 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7> + 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2> + 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2> + 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0> + 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0> + 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3> + 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6> + 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0> + 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0> + 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7> + 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0> + 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4> + 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5> + 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6> + 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4> + 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6> + 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS + 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6> + 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5> + 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5> + 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS + 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3> + 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> + 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0> + 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6> + 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5> + 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0> + 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7> + 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0> + 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7> + 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> + 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7> + 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0> + 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7> + 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7> + 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6> + 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1> + 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7> + 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2> + 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS + 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS + 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7> + 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6> + 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7> + 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0> + 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7> + 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2> + 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2> + 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1> + 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS + 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u> + 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6> + 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS + 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u> + 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, + 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS + 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS + 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS + 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS + 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2> + 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS + 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> + 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0> + 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1> + 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2> + 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1> + 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1> + 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6> + 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3> + 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5> + 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7> + 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1> + 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5> + 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3> + 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1> + 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3> + 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2> + 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0> + 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5> + 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3> + 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2> + 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0> + 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0> + 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0> + 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3> + 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0> + 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7> + 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5> + 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7> + 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7> + 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3> + 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7> + 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5> + 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5> + 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5> + 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5> + 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5> + 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS + 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1> + 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0> + 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS + 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS + 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7> + 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6> + 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7> + 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS + 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7> + 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1> + 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1> + 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7> + 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7> + 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7> + 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> + 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7> + 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7> + 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7> + 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7> + 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1> + 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7> + 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2> + 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1> + 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3> + 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS + 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS + 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7> + 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0> + 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7> + 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS + 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS + 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3> + 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0> + 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7> + 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS + 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7> + 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7> + 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS + 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7> + 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2> + 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2> + 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0> + 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0> + 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6> + 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7> + 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1> + 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2> + 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7> + 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3> + 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0> + 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0> + 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1> + 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3> + 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0> + 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3> + 2602718850U, // <7,2,1,7>: Cost 3 vext1 , <7,u,1,2> + 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1> + 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1> + 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3> + 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3> + 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5> + 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7> + 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6> + 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5> + 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3> + 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1> + 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5> + 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6> + 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7> + 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5> + 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7> + 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6> + 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0> + 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1> + 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6> + 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3> + 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4> + 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5> + 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6> + 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7> + 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0> + 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0> + 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7> + 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7> + 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3> + 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7> + 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7> + 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7> + 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7> + 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7> + 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0> + 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7> + 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS + 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3> + 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7> + 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7> + 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS + 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7> + 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7> + 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7> + 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7> + 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1> + 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0> + 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5> + 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS + 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS + 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7> + 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6> + 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7> + 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS + 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1> + 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5> + 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7> + 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5> + 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7> + 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7> + 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0> + 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7> + 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0> + 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2> + 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0> + 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2> + 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1> + 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2> + 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0> + 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0> + 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2> + 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1> + 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1> + 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3> + 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5> + 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6> + 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3> + 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1> + 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5> + 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5> + 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1> + 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0> + 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2> + 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0> + 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5> + 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4> + 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3> + 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3> + 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0> + 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1> + 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3> + 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3> + 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3> + 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4> + 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7> + 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3> + 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7> + 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3> + 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1> + 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2> + 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4> + 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5> + 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5> + 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6> + 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6> + 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4> + 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6> + 2602819686U, // <7,3,5,0>: Cost 3 vext1 , LHS + 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3> + 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3> + 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7> + 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5> + 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7> + 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0> + 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0> + 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3> + 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7> + 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3> + 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3> + 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7> + 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7> + 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7> + 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6> + 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7> + 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3> + 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1> + 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5> + 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6> + 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7> + 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5> + 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7> + 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7> + 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7> + 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1> + 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u> + 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2> + 1593153452U, // <7,3,u,2>: Cost 2 vext2 , + 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3> + 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u> + 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6> + 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3> + 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0> + 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2> + 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0> + 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS + 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2> + 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1> + 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5> + 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> + 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> + 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> + 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1> + 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2> + 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1> + 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3> + 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7> + 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3> + 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0> + 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3> + 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3> + 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0> + 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1> + 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3> + 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2> + 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1> + 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7> + 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> + 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0> + 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5> + 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0> + 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2> + 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5> + 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1> + 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3> + 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6> + 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4> + 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5> + 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7> + 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4> + 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2> + 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1> + 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4> + 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3> + 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4> + 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5> + 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> + 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7> + 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5> + 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS + 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7> + 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3> + 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5> + 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS + 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7> + 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS + 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7> + 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS + 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1> + 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5> + 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3> + 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4> + 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6> + 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7> + 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7> + 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> + 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2> + 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2> + 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2> + 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5> + 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6> + 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1> + 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> + 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1> + 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7> + 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7> + 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS + 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS + 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3> + 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u> + 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6> + 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1> + 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS + 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5> + 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS + 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS + 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS + 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS + 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0> + 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1> + 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1> + 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1> + 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2> + 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS + 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1> + 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1> + 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0> + 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7> + 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5> + 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3> + 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1> + 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> + 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> + 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1> + 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0> + 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7> + 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4> + 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7> + 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> + 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3> + 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3> + 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4> + 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS + 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5> + 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5> + 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3> + 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS + 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5> + 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5> + 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0> + 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0> + 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS + 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7> + 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5> + 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4> + 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS + 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS + 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5> + 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> + 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6> + 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1> + 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3> + 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3> + 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3> + 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4> + 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5> + 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> + 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> + 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7> + 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1> + 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7> + 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2> + 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4> + 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5> + 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> + 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> + 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0> + 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0> + 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS + 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3> + 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2> + 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7> + 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS + 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> + 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> + 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1> + 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3> + 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS + 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS + 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0> + 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u> + 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS + 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7> + 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7> + 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0> + 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3> + 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0> + 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS + 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4> + 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0> + 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2> + 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0> + 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0> + 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2> + 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS + 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1> + 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1> + 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0> + 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3> + 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS + 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7> + 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> + 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3> + 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1> + 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1> + 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3> + 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2> + 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1> + 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5> + 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7> + 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7> + 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> + 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> + 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2> + 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3> + 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3> + 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3> + 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5> + 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7> + 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6> + 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0> + 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0> + 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1> + 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3> + 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5> + 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4> + 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6> + 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS + 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0> + 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5> + 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS + 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS + 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3> + 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7> + 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4> + 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6> + 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5> + 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0> + 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7> + 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5> + 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1> + 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3> + 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3> + 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4> + 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4> + 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7> + 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6> + 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> + 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7> + 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1> + 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0> + 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2> + 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0> + 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5> + 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4> + 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6> + 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7> + 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1> + 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1> + 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS + 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2> + 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0> + 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5> + 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS + 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6> + 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3> + 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1> + 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0> + 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2> + 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0> + 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0> + 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1> + 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0> + 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0> + 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2> + 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2> + 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3> + 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1> + 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3> + 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5> + 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS + 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3> + 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1> + 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1> + 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3> + 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5> + 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0> + 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2> + 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0> + 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5> + 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3> + 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3> + 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3> + 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3> + 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1> + 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3> + 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6> + 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7> + 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5> + 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3> + 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7> + 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7> + 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7> + 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5> + 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7> + 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3> + 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4> + 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4> + 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6> + 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4> + 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6> + 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6> + 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS + 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7> + 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3> + 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7> + 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7> + 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7> + 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7> + 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5> + 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7> + 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS + 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0> + 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7> + 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7> + 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS + 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7> + 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7> + 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0> + 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7> + 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS + 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2> + 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2> + 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2> + 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS + 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7> + 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7> + 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS + 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS + 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS + 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2> + 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3> + 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0> + 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS + 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6> + 1595840756U, // <7,7,u,6>: Cost 2 vext2 , + 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS + 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS + 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0> + 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, + 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, + 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, + 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, + 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, + 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, + 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS + 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, + 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS + 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1> + 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS + 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, + 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS + 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, + 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1> + 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, + 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS + 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2> + 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, + 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, + 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6> + 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, + 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, + 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, + 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, + 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, + 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3> + 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, + 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3> + 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, + 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, + 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, + 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, + 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, + 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, + 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, + 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, + 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, + 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4> + 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, + 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, + 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, + 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, + 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS + 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u> + 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, + 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, + 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u> + 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u> + 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS + 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, + 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS + 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, + 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, + 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u> + 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, + 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6> + 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, + 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u> + 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, + 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, + 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, + 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, + 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, + 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7> + 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, + 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7> + 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, + 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS + 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS + 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, + 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, + 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS + 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, + 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, + 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, + 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS + 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS + 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS + 135053414U, // : Cost 1 vdup0 LHS + 1611489290U, // : Cost 2 vext3 LHS, <0,0,1,1> + 1611489300U, // : Cost 2 vext3 LHS, <0,0,2,2> + 2568054923U, // : Cost 3 vext1 <3,0,0,0>, <3,0,0,0> + 1481706806U, // : Cost 2 vext1 <0,u,0,0>, RHS + 2555449040U, // : Cost 3 vext1 <0,u,0,0>, <5,1,7,3> + 2591282078U, // : Cost 3 vext1 <6,u,0,0>, <6,u,0,0> + 2591945711U, // : Cost 3 vext1 <7,0,0,0>, <7,0,0,0> + 135053414U, // : Cost 1 vdup0 LHS + 1493655654U, // : Cost 2 vext1 <2,u,0,1>, LHS + 1860550758U, // : Cost 2 vzipl LHS, LHS + 537747563U, // : Cost 1 vext3 LHS, LHS + 2625135576U, // : Cost 3 vext2 <1,2,u,0>, <1,3,1,3> + 1493658934U, // : Cost 2 vext1 <2,u,0,1>, RHS + 2625135760U, // : Cost 3 vext2 <1,2,u,0>, <1,5,3,7> + 1517548447U, // : Cost 2 vext1 <6,u,0,1>, <6,u,0,1> + 2591290362U, // : Cost 3 vext1 <6,u,0,1>, <7,0,1,2> + 537747612U, // : Cost 1 vext3 LHS, LHS + 1611489444U, // : Cost 2 vext3 LHS, <0,2,0,2> + 2685231276U, // : Cost 3 vext3 LHS, <0,2,1,1> + 1994768486U, // : Cost 2 vtrnl LHS, LHS + 2685231294U, // : Cost 3 vext3 LHS, <0,2,3,1> + 1611489484U, // : Cost 2 vext3 LHS, <0,2,4,6> + 2712068310U, // : Cost 3 vext3 RHS, <0,2,5,7> + 2625136570U, // : Cost 3 vext2 <1,2,u,0>, <2,6,3,7> + 2591962097U, // : Cost 3 vext1 <7,0,0,2>, <7,0,0,2> + 1611489516U, // : Cost 2 vext3 LHS, <0,2,u,2> + 2954067968U, // : Cost 3 vzipr LHS, <0,0,0,0> + 2685231356U, // : Cost 3 vext3 LHS, <0,3,1,0> + 72589981U, // : Cost 1 vrev LHS + 2625137052U, // : Cost 3 vext2 <1,2,u,0>, <3,3,3,3> + 2625137154U, // : Cost 3 vext2 <1,2,u,0>, <3,4,5,6> + 2639071848U, // : Cost 3 vext2 <3,5,u,0>, <3,5,u,0> + 2639735481U, // : Cost 3 vext2 <3,6,u,0>, <3,6,u,0> + 2597279354U, // : Cost 3 vext1 <7,u,0,3>, <7,u,0,3> + 73032403U, // : Cost 1 vrev LHS + 2687074636U, // : Cost 3 vext3 <0,4,0,u>, <0,4,0,u> + 1611489618U, // : Cost 2 vext3 LHS, <0,4,1,5> + 1611489628U, // : Cost 2 vext3 LHS, <0,4,2,6> + 3629222038U, // : Cost 4 vext1 <0,u,0,4>, <3,0,1,2> + 2555481398U, // : Cost 3 vext1 <0,u,0,4>, RHS + 1551396150U, // : Cost 2 vext2 <1,2,u,0>, RHS + 2651680116U, // : Cost 3 vext2 <5,6,u,0>, <4,6,4,6> + 2646150600U, // : Cost 3 vext2 <4,7,5,0>, <4,7,5,0> + 1611932050U, // : Cost 2 vext3 LHS, <0,4,u,6> + 2561458278U, // : Cost 3 vext1 <1,u,0,5>, LHS + 1863532646U, // : Cost 2 vzipl RHS, LHS + 2712068526U, // : Cost 3 vext3 RHS, <0,5,2,7> + 2649689976U, // : Cost 3 vext2 <5,3,u,0>, <5,3,u,0> + 2220237489U, // : Cost 3 vrev <0,u,4,5> + 2651680772U, // : Cost 3 vext2 <5,6,u,0>, <5,5,5,5> + 1577939051U, // : Cost 2 vext2 <5,6,u,0>, <5,6,u,0> + 2830077238U, // : Cost 3 vuzpr <1,u,3,0>, RHS + 1579266317U, // : Cost 2 vext2 <5,u,u,0>, <5,u,u,0> + 2555494502U, // : Cost 3 vext1 <0,u,0,6>, LHS + 2712068598U, // : Cost 3 vext3 RHS, <0,6,1,7> + 1997750374U, // : Cost 2 vtrnl RHS, LHS + 2655662673U, // : Cost 3 vext2 <6,3,u,0>, <6,3,u,0> + 2555497782U, // : Cost 3 vext1 <0,u,0,6>, RHS + 2651681459U, // : Cost 3 vext2 <5,6,u,0>, <6,5,0,u> + 2651681592U, // : Cost 3 vext2 <5,6,u,0>, <6,6,6,6> + 2651681614U, // : Cost 3 vext2 <5,6,u,0>, <6,7,0,1> + 1997750428U, // : Cost 2 vtrnl RHS, LHS + 2567446630U, // : Cost 3 vext1 <2,u,0,7>, LHS + 2567447446U, // : Cost 3 vext1 <2,u,0,7>, <1,2,3,0> + 2567448641U, // : Cost 3 vext1 <2,u,0,7>, <2,u,0,7> + 2573421338U, // : Cost 3 vext1 <3,u,0,7>, <3,u,0,7> + 2567449910U, // : Cost 3 vext1 <2,u,0,7>, RHS + 2651682242U, // : Cost 3 vext2 <5,6,u,0>, <7,5,6,u> + 2591339429U, // : Cost 3 vext1 <6,u,0,7>, <6,u,0,7> + 2651682412U, // : Cost 3 vext2 <5,6,u,0>, <7,7,7,7> + 2567452462U, // : Cost 3 vext1 <2,u,0,7>, LHS + 135053414U, // : Cost 1 vdup0 LHS + 1611489938U, // : Cost 2 vext3 LHS, <0,u,1,1> + 537748125U, // : Cost 1 vext3 LHS, LHS + 2685674148U, // : Cost 3 vext3 LHS, <0,u,3,1> + 1611932338U, // : Cost 2 vext3 LHS, <0,u,4,6> + 1551399066U, // : Cost 2 vext2 <1,2,u,0>, RHS + 1517605798U, // : Cost 2 vext1 <6,u,0,u>, <6,u,0,u> + 2830077481U, // : Cost 3 vuzpr <1,u,3,0>, RHS + 537748179U, // : Cost 1 vext3 LHS, LHS + 1544101961U, // : Cost 2 vext2 <0,0,u,1>, <0,0,u,1> + 1558036582U, // : Cost 2 vext2 <2,3,u,1>, LHS + 2619171051U, // : Cost 3 vext2 <0,2,u,1>, <0,2,u,1> + 1611490038U, // : Cost 2 vext3 LHS, <1,0,3,2> + 2555522358U, // : Cost 3 vext1 <0,u,1,0>, RHS + 2712068871U, // : Cost 3 vext3 RHS, <1,0,5,1> + 2591355815U, // : Cost 3 vext1 <6,u,1,0>, <6,u,1,0> + 2597328512U, // : Cost 3 vext1 <7,u,1,0>, <7,u,1,0> + 1611490083U, // : Cost 2 vext3 LHS, <1,0,u,2> + 1481785446U, // : Cost 2 vext1 <0,u,1,1>, LHS + 202162278U, // : Cost 1 vdup1 LHS + 2555528808U, // : Cost 3 vext1 <0,u,1,1>, <2,2,2,2> + 1611490120U, // : Cost 2 vext3 LHS, <1,1,3,3> + 1481788726U, // : Cost 2 vext1 <0,u,1,1>, RHS + 2689876828U, // : Cost 3 vext3 LHS, <1,1,5,5> + 2591364008U, // : Cost 3 vext1 <6,u,1,1>, <6,u,1,1> + 2592691274U, // : Cost 3 vext1 <7,1,1,1>, <7,1,1,1> + 202162278U, // : Cost 1 vdup1 LHS + 1499709542U, // : Cost 2 vext1 <3,u,1,2>, LHS + 2689876871U, // : Cost 3 vext3 LHS, <1,2,1,3> + 2631116445U, // : Cost 3 vext2 <2,2,u,1>, <2,2,u,1> + 835584U, // : Cost 0 copy LHS + 1499712822U, // : Cost 2 vext1 <3,u,1,2>, RHS + 2689876907U, // : Cost 3 vext3 LHS, <1,2,5,3> + 2631780282U, // : Cost 3 vext2 <2,3,u,1>, <2,6,3,7> + 1523603074U, // : Cost 2 vext1 <7,u,1,2>, <7,u,1,2> + 835584U, // : Cost 0 copy LHS + 1487773798U, // : Cost 2 vext1 <1,u,1,3>, LHS + 1611490264U, // : Cost 2 vext3 LHS, <1,3,1,3> + 2685232094U, // : Cost 3 vext3 LHS, <1,3,2,0> + 2018746470U, // : Cost 2 vtrnr LHS, LHS + 1487777078U, // : Cost 2 vext1 <1,u,1,3>, RHS + 1611490304U, // : Cost 2 vext3 LHS, <1,3,5,7> + 2685674505U, // : Cost 3 vext3 LHS, <1,3,6,7> + 2640407307U, // : Cost 3 vext2 <3,7,u,1>, <3,7,u,1> + 1611490327U, // : Cost 2 vext3 LHS, <1,3,u,3> + 1567992749U, // : Cost 2 vext2 <4,0,u,1>, <4,0,u,1> + 2693121070U, // : Cost 3 vext3 <1,4,1,u>, <1,4,1,u> + 2693194807U, // : Cost 3 vext3 <1,4,2,u>, <1,4,2,u> + 1152386432U, // : Cost 2 vrev <1,u,3,4> + 2555555126U, // : Cost 3 vext1 <0,u,1,4>, RHS + 1558039862U, // : Cost 2 vext2 <2,3,u,1>, RHS + 2645716371U, // : Cost 3 vext2 <4,6,u,1>, <4,6,u,1> + 2597361284U, // : Cost 3 vext1 <7,u,1,4>, <7,u,1,4> + 1152755117U, // : Cost 2 vrev <1,u,u,4> + 1481818214U, // : Cost 2 vext1 <0,u,1,5>, LHS + 2555560694U, // : Cost 3 vext1 <0,u,1,5>, <1,0,3,2> + 2555561576U, // : Cost 3 vext1 <0,u,1,5>, <2,2,2,2> + 1611490448U, // : Cost 2 vext3 LHS, <1,5,3,7> + 1481821494U, // : Cost 2 vext1 <0,u,1,5>, RHS + 2651025435U, // : Cost 3 vext2 <5,5,u,1>, <5,5,u,1> + 2651689068U, // : Cost 3 vext2 <5,6,u,1>, <5,6,u,1> + 2823966006U, // : Cost 3 vuzpr <0,u,1,1>, RHS + 1611932861U, // : Cost 2 vext3 LHS, <1,5,u,7> + 2555568230U, // : Cost 3 vext1 <0,u,1,6>, LHS + 2689877199U, // : Cost 3 vext3 LHS, <1,6,1,7> + 2712069336U, // : Cost 3 vext3 RHS, <1,6,2,7> + 2685232353U, // : Cost 3 vext3 LHS, <1,6,3,7> + 2555571510U, // : Cost 3 vext1 <0,u,1,6>, RHS + 2689877235U, // : Cost 3 vext3 LHS, <1,6,5,7> + 2657661765U, // : Cost 3 vext2 <6,6,u,1>, <6,6,u,1> + 1584583574U, // : Cost 2 vext2 <6,7,u,1>, <6,7,u,1> + 1585247207U, // : Cost 2 vext2 <6,u,u,1>, <6,u,u,1> + 2561548390U, // : Cost 3 vext1 <1,u,1,7>, LHS + 2561549681U, // : Cost 3 vext1 <1,u,1,7>, <1,u,1,7> + 2573493926U, // : Cost 3 vext1 <3,u,1,7>, <2,3,0,1> + 2042962022U, // : Cost 2 vtrnr RHS, LHS + 2561551670U, // : Cost 3 vext1 <1,u,1,7>, RHS + 2226300309U, // : Cost 3 vrev <1,u,5,7> + 2658325990U, // : Cost 3 vext2 <6,7,u,1>, <7,6,1,u> + 2658326124U, // : Cost 3 vext2 <6,7,u,1>, <7,7,7,7> + 2042962027U, // : Cost 2 vtrnr RHS, LHS + 1481842790U, // : Cost 2 vext1 <0,u,1,u>, LHS + 202162278U, // : Cost 1 vdup1 LHS + 2685674867U, // : Cost 3 vext3 LHS, <1,u,2,0> + 835584U, // : Cost 0 copy LHS + 1481846070U, // : Cost 2 vext1 <0,u,1,u>, RHS + 1611933077U, // : Cost 2 vext3 LHS, <1,u,5,7> + 2685674910U, // : Cost 3 vext3 LHS, <1,u,6,7> + 1523652232U, // : Cost 2 vext1 <7,u,1,u>, <7,u,1,u> + 835584U, // : Cost 0 copy LHS + 1544110154U, // : Cost 2 vext2 <0,0,u,2>, <0,0,u,2> + 1545437286U, // : Cost 2 vext2 <0,2,u,2>, LHS + 1545437420U, // : Cost 2 vext2 <0,2,u,2>, <0,2,u,2> + 2685232589U, // : Cost 3 vext3 LHS, <2,0,3,0> + 2619179346U, // : Cost 3 vext2 <0,2,u,2>, <0,4,1,5> + 2712069606U, // : Cost 3 vext3 RHS, <2,0,5,7> + 2689877484U, // : Cost 3 vext3 LHS, <2,0,6,4> + 2659656273U, // : Cost 3 vext2 <7,0,u,2>, <0,7,2,u> + 1545437853U, // : Cost 2 vext2 <0,2,u,2>, LHS + 1550082851U, // : Cost 2 vext2 <1,0,u,2>, <1,0,u,2> + 2619179828U, // : Cost 3 vext2 <0,2,u,2>, <1,1,1,1> + 2619179926U, // : Cost 3 vext2 <0,2,u,2>, <1,2,3,0> + 2685232671U, // : Cost 3 vext3 LHS, <2,1,3,1> + 2555604278U, // : Cost 3 vext1 <0,u,2,1>, RHS + 2619180176U, // : Cost 3 vext2 <0,2,u,2>, <1,5,3,7> + 2689877564U, // : Cost 3 vext3 LHS, <2,1,6,3> + 2602718850U, // : Cost 3 vext1 , <7,u,1,2> + 1158703235U, // : Cost 2 vrev <2,u,u,1> + 1481867366U, // : Cost 2 vext1 <0,u,2,2>, LHS + 2555609846U, // : Cost 3 vext1 <0,u,2,2>, <1,0,3,2> + 269271142U, // : Cost 1 vdup2 LHS + 1611490930U, // : Cost 2 vext3 LHS, <2,2,3,3> + 1481870646U, // : Cost 2 vext1 <0,u,2,2>, RHS + 2689877640U, // : Cost 3 vext3 LHS, <2,2,5,7> + 2619180986U, // : Cost 3 vext2 <0,2,u,2>, <2,6,3,7> + 2593436837U, // : Cost 3 vext1 <7,2,2,2>, <7,2,2,2> + 269271142U, // : Cost 1 vdup2 LHS + 408134301U, // : Cost 1 vext1 LHS, LHS + 1481876214U, // : Cost 2 vext1 LHS, <1,0,3,2> + 1481877096U, // : Cost 2 vext1 LHS, <2,2,2,2> + 1880326246U, // : Cost 2 vzipr LHS, LHS + 408137014U, // : Cost 1 vext1 LHS, RHS + 1529654992U, // : Cost 2 vext1 LHS, <5,1,7,3> + 1529655802U, // : Cost 2 vext1 LHS, <6,2,7,3> + 1529656314U, // : Cost 2 vext1 LHS, <7,0,1,2> + 408139566U, // : Cost 1 vext1 LHS, LHS + 1567853468U, // : Cost 2 vext2 <4,0,6,2>, <4,0,6,2> + 2561598362U, // : Cost 3 vext1 <1,u,2,4>, <1,2,3,4> + 2555627214U, // : Cost 3 vext1 <0,u,2,4>, <2,3,4,5> + 2685232918U, // : Cost 3 vext3 LHS, <2,4,3,5> + 2555628854U, // : Cost 3 vext1 <0,u,2,4>, RHS + 1545440566U, // : Cost 2 vext2 <0,2,u,2>, RHS + 1571982740U, // : Cost 2 vext2 <4,6,u,2>, <4,6,u,2> + 2592125957U, // : Cost 3 vext1 <7,0,2,4>, <7,0,2,4> + 1545440809U, // : Cost 2 vext2 <0,2,u,2>, RHS + 2555633766U, // : Cost 3 vext1 <0,u,2,5>, LHS + 2561606550U, // : Cost 3 vext1 <1,u,2,5>, <1,2,3,0> + 2689877856U, // : Cost 3 vext3 LHS, <2,5,2,7> + 2685233000U, // : Cost 3 vext3 LHS, <2,5,3,6> + 1158441059U, // : Cost 2 vrev <2,u,4,5> + 2645725188U, // : Cost 3 vext2 <4,6,u,2>, <5,5,5,5> + 2689877892U, // : Cost 3 vext3 LHS, <2,5,6,7> + 2823900470U, // : Cost 3 vuzpr <0,u,0,2>, RHS + 1158736007U, // : Cost 2 vrev <2,u,u,5> + 1481900134U, // : Cost 2 vext1 <0,u,2,6>, LHS + 2555642614U, // : Cost 3 vext1 <0,u,2,6>, <1,0,3,2> + 2555643496U, // : Cost 3 vext1 <0,u,2,6>, <2,2,2,2> + 1611491258U, // : Cost 2 vext3 LHS, <2,6,3,7> + 1481903414U, // : Cost 2 vext1 <0,u,2,6>, RHS + 2689877964U, // : Cost 3 vext3 LHS, <2,6,5,7> + 2689877973U, // : Cost 3 vext3 LHS, <2,6,6,7> + 2645726030U, // : Cost 3 vext2 <4,6,u,2>, <6,7,0,1> + 1611933671U, // : Cost 2 vext3 LHS, <2,6,u,7> + 1585919033U, // : Cost 2 vext2 <7,0,u,2>, <7,0,u,2> + 2573566710U, // : Cost 3 vext1 <3,u,2,7>, <1,0,3,2> + 2567596115U, // : Cost 3 vext1 <2,u,2,7>, <2,u,2,7> + 1906901094U, // : Cost 2 vzipr RHS, LHS + 2555653430U, // : Cost 3 vext1 <0,u,2,7>, RHS + 2800080230U, // : Cost 3 vuzpl LHS, <7,4,5,6> + 2980643164U, // : Cost 3 vzipr RHS, <0,4,2,6> + 2645726828U, // : Cost 3 vext2 <4,6,u,2>, <7,7,7,7> + 1906901099U, // : Cost 2 vzipr RHS, LHS + 408175266U, // : Cost 1 vext1 LHS, LHS + 1545443118U, // : Cost 2 vext2 <0,2,u,2>, LHS + 269271142U, // : Cost 1 vdup2 LHS + 1611491416U, // : Cost 2 vext3 LHS, <2,u,3,3> + 408177974U, // : Cost 1 vext1 LHS, RHS + 1545443482U, // : Cost 2 vext2 <0,2,u,2>, RHS + 1726339226U, // : Cost 2 vuzpl LHS, RHS + 1529697274U, // : Cost 2 vext1 LHS, <7,0,1,2> + 408180526U, // : Cost 1 vext1 LHS, LHS + 1544781824U, // : Cost 2 vext2 LHS, <0,0,0,0> + 471040156U, // : Cost 1 vext2 LHS, LHS + 1544781988U, // : Cost 2 vext2 LHS, <0,2,0,2> + 2618523900U, // : Cost 3 vext2 LHS, <0,3,1,0> + 1544782162U, // : Cost 2 vext2 LHS, <0,4,1,5> + 2238188352U, // : Cost 3 vrev <3,u,5,0> + 2623169023U, // : Cost 3 vext2 LHS, <0,6,2,7> + 2238335826U, // : Cost 3 vrev <3,u,7,0> + 471040669U, // : Cost 1 vext2 LHS, LHS + 1544782582U, // : Cost 2 vext2 LHS, <1,0,3,2> + 1544782644U, // : Cost 2 vext2 LHS, <1,1,1,1> + 1544782742U, // : Cost 2 vext2 LHS, <1,2,3,0> + 1544782808U, // : Cost 2 vext2 LHS, <1,3,1,3> + 2618524733U, // : Cost 3 vext2 LHS, <1,4,3,5> + 1544782992U, // : Cost 2 vext2 LHS, <1,5,3,7> + 2618524897U, // : Cost 3 vext2 LHS, <1,6,3,7> + 2703517987U, // : Cost 3 vext3 <3,1,7,u>, <3,1,7,u> + 1544783213U, // : Cost 2 vext2 LHS, <1,u,1,3> + 1529716838U, // : Cost 2 vext1 , LHS + 1164167966U, // : Cost 2 vrev <3,u,1,2> + 1544783464U, // : Cost 2 vext2 LHS, <2,2,2,2> + 1544783526U, // : Cost 2 vext2 LHS, <2,3,0,1> + 1529720118U, // : Cost 2 vext1 , RHS + 2618525544U, // : Cost 3 vext2 LHS, <2,5,3,6> + 1544783802U, // : Cost 2 vext2 LHS, <2,6,3,7> + 2704181620U, // : Cost 3 vext3 <3,2,7,u>, <3,2,7,u> + 1544783931U, // : Cost 2 vext2 LHS, <2,u,0,1> + 1544784022U, // : Cost 2 vext2 LHS, <3,0,1,2> + 1487922559U, // : Cost 2 vext1 <1,u,3,3>, <1,u,3,3> + 1493895256U, // : Cost 2 vext1 <2,u,3,3>, <2,u,3,3> + 336380006U, // : Cost 1 vdup3 LHS + 1544784386U, // : Cost 2 vext2 LHS, <3,4,5,6> + 2824054478U, // : Cost 3 vuzpr LHS, <2,3,4,5> + 2238286668U, // : Cost 3 vrev <3,u,6,3> + 2954069136U, // : Cost 3 vzipr LHS, <1,5,3,7> + 336380006U, // : Cost 1 vdup3 LHS + 1487929446U, // : Cost 2 vext1 <1,u,3,4>, LHS + 1487930752U, // : Cost 2 vext1 <1,u,3,4>, <1,u,3,4> + 2623171644U, // : Cost 3 vext2 LHS, <4,2,6,0> + 2561673366U, // : Cost 3 vext1 <1,u,3,4>, <3,0,1,2> + 1487932726U, // : Cost 2 vext1 <1,u,3,4>, RHS + 471043382U, // : Cost 1 vext2 LHS, RHS + 1592561012U, // : Cost 2 vext2 LHS, <4,6,4,6> + 2238368598U, // : Cost 3 vrev <3,u,7,4> + 471043625U, // : Cost 1 vext2 LHS, RHS + 2555707494U, // : Cost 3 vext1 <0,u,3,5>, LHS + 1574645465U, // : Cost 2 vext2 <5,1,u,3>, <5,1,u,3> + 2567653106U, // : Cost 3 vext1 <2,u,3,5>, <2,3,u,5> + 2555709954U, // : Cost 3 vext1 <0,u,3,5>, <3,4,5,6> + 1592561606U, // : Cost 2 vext2 LHS, <5,4,7,6> + 1592561668U, // : Cost 2 vext2 LHS, <5,5,5,5> + 1592561762U, // : Cost 2 vext2 LHS, <5,6,7,0> + 1750314294U, // : Cost 2 vuzpr LHS, RHS + 1750314295U, // : Cost 2 vuzpr LHS, RHS + 2623172897U, // : Cost 3 vext2 LHS, <6,0,1,2> + 2561688962U, // : Cost 3 vext1 <1,u,3,6>, <1,u,3,6> + 1581281795U, // : Cost 2 vext2 <6,2,u,3>, <6,2,u,3> + 2706541204U, // : Cost 3 vext3 <3,6,3,u>, <3,6,3,u> + 2623173261U, // : Cost 3 vext2 LHS, <6,4,5,6> + 1164495686U, // : Cost 2 vrev <3,u,5,6> + 1592562488U, // : Cost 2 vext2 LHS, <6,6,6,6> + 1592562510U, // : Cost 2 vext2 LHS, <6,7,0,1> + 1164716897U, // : Cost 2 vrev <3,u,u,6> + 1487954022U, // : Cost 2 vext1 <1,u,3,7>, LHS + 1487955331U, // : Cost 2 vext1 <1,u,3,7>, <1,u,3,7> + 1493928028U, // : Cost 2 vext1 <2,u,3,7>, <2,u,3,7> + 2561697942U, // : Cost 3 vext1 <1,u,3,7>, <3,0,1,2> + 1487957302U, // : Cost 2 vext1 <1,u,3,7>, RHS + 2707352311U, // : Cost 3 vext3 <3,7,5,u>, <3,7,5,u> + 2655024623U, // : Cost 3 vext2 <6,2,u,3>, <7,6,2,u> + 1592563308U, // : Cost 2 vext2 LHS, <7,7,7,7> + 1487959854U, // : Cost 2 vext1 <1,u,3,7>, LHS + 1544787667U, // : Cost 2 vext2 LHS, + 471045934U, // : Cost 1 vext2 LHS, LHS + 1549432709U, // : Cost 2 vext2 LHS, + 336380006U, // : Cost 1 vdup3 LHS + 1544788031U, // : Cost 2 vext2 LHS, + 471046298U, // : Cost 1 vext2 LHS, RHS + 1549433040U, // : Cost 2 vext2 LHS, + 1750314537U, // : Cost 2 vuzpr LHS, RHS + 471046501U, // : Cost 1 vext2 LHS, LHS + 2625167360U, // : Cost 3 vext2 <1,2,u,4>, <0,0,0,0> + 1551425638U, // : Cost 2 vext2 <1,2,u,4>, LHS + 2619195630U, // : Cost 3 vext2 <0,2,u,4>, <0,2,u,4> + 2619343104U, // : Cost 3 vext2 <0,3,1,4>, <0,3,1,4> + 2625167698U, // : Cost 3 vext2 <1,2,u,4>, <0,4,1,5> + 1638329234U, // : Cost 2 vext3 RHS, <4,0,5,1> + 1638329244U, // : Cost 2 vext3 RHS, <4,0,6,2> + 3787803556U, // : Cost 4 vext3 RHS, <4,0,7,1> + 1551426205U, // : Cost 2 vext2 <1,2,u,4>, LHS + 2555748454U, // : Cost 3 vext1 <0,u,4,1>, LHS + 2625168180U, // : Cost 3 vext2 <1,2,u,4>, <1,1,1,1> + 1551426503U, // : Cost 2 vext2 <1,2,u,4>, <1,2,u,4> + 2625168344U, // : Cost 3 vext2 <1,2,u,4>, <1,3,1,3> + 2555751734U, // : Cost 3 vext1 <0,u,4,1>, RHS + 1860554038U, // : Cost 2 vzipl LHS, RHS + 2689879022U, // : Cost 3 vext3 LHS, <4,1,6,3> + 2592248852U, // : Cost 3 vext1 <7,0,4,1>, <7,0,4,1> + 1555408301U, // : Cost 2 vext2 <1,u,u,4>, <1,u,u,4> + 2555756646U, // : Cost 3 vext1 <0,u,4,2>, LHS + 2625168943U, // : Cost 3 vext2 <1,2,u,4>, <2,1,4,u> + 2625169000U, // : Cost 3 vext2 <1,2,u,4>, <2,2,2,2> + 2619197134U, // : Cost 3 vext2 <0,2,u,4>, <2,3,4,5> + 2555759926U, // : Cost 3 vext1 <0,u,4,2>, RHS + 2712071222U, // : Cost 3 vext3 RHS, <4,2,5,3> + 1994771766U, // : Cost 2 vtrnl LHS, RHS + 2592257045U, // : Cost 3 vext1 <7,0,4,2>, <7,0,4,2> + 1994771784U, // : Cost 2 vtrnl LHS, RHS + 2625169558U, // : Cost 3 vext2 <1,2,u,4>, <3,0,1,2> + 2567709594U, // : Cost 3 vext1 <2,u,4,3>, <1,2,3,4> + 2567710817U, // : Cost 3 vext1 <2,u,4,3>, <2,u,4,3> + 2625169820U, // : Cost 3 vext2 <1,2,u,4>, <3,3,3,3> + 2625169922U, // : Cost 3 vext2 <1,2,u,4>, <3,4,5,6> + 2954069710U, // : Cost 3 vzipr LHS, <2,3,4,5> + 2954068172U, // : Cost 3 vzipr LHS, <0,2,4,6> + 3903849472U, // : Cost 4 vuzpr <1,u,3,4>, <1,3,5,7> + 2954068174U, // : Cost 3 vzipr LHS, <0,2,4,u> + 1505919078U, // : Cost 2 vext1 <4,u,4,4>, LHS + 2567717831U, // : Cost 3 vext1 <2,u,4,4>, <1,2,u,4> + 2567719010U, // : Cost 3 vext1 <2,u,4,4>, <2,u,4,4> + 2570373542U, // : Cost 3 vext1 <3,3,4,4>, <3,3,4,4> + 161926454U, // : Cost 1 vdup0 RHS + 1551428918U, // : Cost 2 vext2 <1,2,u,4>, RHS + 1638329572U, // : Cost 2 vext3 RHS, <4,4,6,6> + 2594927963U, // : Cost 3 vext1 <7,4,4,4>, <7,4,4,4> + 161926454U, // : Cost 1 vdup0 RHS + 1493983334U, // : Cost 2 vext1 <2,u,4,5>, LHS + 2689879301U, // : Cost 3 vext3 LHS, <4,5,1,3> + 1493985379U, // : Cost 2 vext1 <2,u,4,5>, <2,u,4,5> + 2567727254U, // : Cost 3 vext1 <2,u,4,5>, <3,0,1,2> + 1493986614U, // : Cost 2 vext1 <2,u,4,5>, RHS + 1863535926U, // : Cost 2 vzipl RHS, RHS + 537750838U, // : Cost 1 vext3 LHS, RHS + 2830110006U, // : Cost 3 vuzpr <1,u,3,4>, RHS + 537750856U, // : Cost 1 vext3 LHS, RHS + 1482047590U, // : Cost 2 vext1 <0,u,4,6>, LHS + 2555790070U, // : Cost 3 vext1 <0,u,4,6>, <1,0,3,2> + 2555790952U, // : Cost 3 vext1 <0,u,4,6>, <2,2,2,2> + 2555791510U, // : Cost 3 vext1 <0,u,4,6>, <3,0,1,2> + 1482050870U, // : Cost 2 vext1 <0,u,4,6>, RHS + 2689879422U, // : Cost 3 vext3 LHS, <4,6,5,7> + 1997753654U, // : Cost 2 vtrnl RHS, RHS + 2712071562U, // : Cost 3 vext3 RHS, <4,6,7,1> + 1482053422U, // : Cost 2 vext1 <0,u,4,6>, LHS + 2567741542U, // : Cost 3 vext1 <2,u,4,7>, LHS + 2567742362U, // : Cost 3 vext1 <2,u,4,7>, <1,2,3,4> + 2567743589U, // : Cost 3 vext1 <2,u,4,7>, <2,u,4,7> + 2573716286U, // : Cost 3 vext1 <3,u,4,7>, <3,u,4,7> + 2567744822U, // : Cost 3 vext1 <2,u,4,7>, RHS + 2712071624U, // : Cost 3 vext3 RHS, <4,7,5,0> + 96808489U, // : Cost 1 vrev RHS + 2651715180U, // : Cost 3 vext2 <5,6,u,4>, <7,7,7,7> + 96955963U, // : Cost 1 vrev RHS + 1482063974U, // : Cost 2 vext1 <0,u,4,u>, LHS + 1551431470U, // : Cost 2 vext2 <1,2,u,4>, LHS + 1494009958U, // : Cost 2 vext1 <2,u,4,u>, <2,u,4,u> + 2555807894U, // : Cost 3 vext1 <0,u,4,u>, <3,0,1,2> + 161926454U, // : Cost 1 vdup0 RHS + 1551431834U, // : Cost 2 vext2 <1,2,u,4>, RHS + 537751081U, // : Cost 1 vext3 LHS, RHS + 2830110249U, // : Cost 3 vuzpr <1,u,3,4>, RHS + 537751099U, // : Cost 1 vext3 LHS, RHS + 2631811072U, // : Cost 3 vext2 <2,3,u,5>, <0,0,0,0> + 1558069350U, // : Cost 2 vext2 <2,3,u,5>, LHS + 2619203823U, // : Cost 3 vext2 <0,2,u,5>, <0,2,u,5> + 2619867456U, // : Cost 3 vext2 <0,3,u,5>, <0,3,u,5> + 1546273106U, // : Cost 2 vext2 <0,4,1,5>, <0,4,1,5> + 2733010539U, // : Cost 3 vext3 LHS, <5,0,5,1> + 2597622682U, // : Cost 3 vext1 <7,u,5,0>, <6,7,u,5> + 1176539396U, // : Cost 2 vrev <5,u,7,0> + 1558069917U, // : Cost 2 vext2 <2,3,u,5>, LHS + 1505968230U, // : Cost 2 vext1 <4,u,5,1>, LHS + 2624512887U, // : Cost 3 vext2 <1,1,u,5>, <1,1,u,5> + 2631811990U, // : Cost 3 vext2 <2,3,u,5>, <1,2,3,0> + 2618541056U, // : Cost 3 vext2 <0,1,u,5>, <1,3,5,7> + 1505971510U, // : Cost 2 vext1 <4,u,5,1>, RHS + 2627167419U, // : Cost 3 vext2 <1,5,u,5>, <1,5,u,5> + 2579714554U, // : Cost 3 vext1 <4,u,5,1>, <6,2,7,3> + 1638330064U, // : Cost 2 vext3 RHS, <5,1,7,3> + 1638477529U, // : Cost 2 vext3 RHS, <5,1,u,3> + 2561802342U, // : Cost 3 vext1 <1,u,5,2>, LHS + 2561803264U, // : Cost 3 vext1 <1,u,5,2>, <1,3,5,7> + 2631149217U, // : Cost 3 vext2 <2,2,u,5>, <2,2,u,5> + 1558071026U, // : Cost 2 vext2 <2,3,u,5>, <2,3,u,5> + 2561805622U, // : Cost 3 vext1 <1,u,5,2>, RHS + 2714062607U, // : Cost 3 vext3 RHS, <5,2,5,3> + 2631813050U, // : Cost 3 vext2 <2,3,u,5>, <2,6,3,7> + 3092335926U, // : Cost 3 vtrnr <0,u,0,2>, RHS + 1561389191U, // : Cost 2 vext2 <2,u,u,5>, <2,u,u,5> + 2561810534U, // : Cost 3 vext1 <1,u,5,3>, LHS + 2561811857U, // : Cost 3 vext1 <1,u,5,3>, <1,u,5,3> + 2631813474U, // : Cost 3 vext2 <2,3,u,5>, <3,2,5,u> + 2631813532U, // : Cost 3 vext2 <2,3,u,5>, <3,3,3,3> + 2619869698U, // : Cost 3 vext2 <0,3,u,5>, <3,4,5,6> + 3001847002U, // : Cost 3 vzipr LHS, <4,4,5,5> + 2954070530U, // : Cost 3 vzipr LHS, <3,4,5,6> + 2018749750U, // : Cost 2 vtrnr LHS, RHS + 2018749751U, // : Cost 2 vtrnr LHS, RHS + 2573762662U, // : Cost 3 vext1 <3,u,5,4>, LHS + 2620017634U, // : Cost 3 vext2 <0,4,1,5>, <4,1,5,0> + 2573764338U, // : Cost 3 vext1 <3,u,5,4>, <2,3,u,5> + 2573765444U, // : Cost 3 vext1 <3,u,5,4>, <3,u,5,4> + 1570680053U, // : Cost 2 vext2 <4,4,u,5>, <4,4,u,5> + 1558072630U, // : Cost 2 vext2 <2,3,u,5>, RHS + 2645749143U, // : Cost 3 vext2 <4,6,u,5>, <4,6,u,5> + 1638330310U, // : Cost 2 vext3 RHS, <5,4,7,6> + 1558072873U, // : Cost 2 vext2 <2,3,u,5>, RHS + 1506000998U, // : Cost 2 vext1 <4,u,5,5>, LHS + 2561827984U, // : Cost 3 vext1 <1,u,5,5>, <1,5,3,7> + 2579744360U, // : Cost 3 vext1 <4,u,5,5>, <2,2,2,2> + 2579744918U, // : Cost 3 vext1 <4,u,5,5>, <3,0,1,2> + 1506004278U, // : Cost 2 vext1 <4,u,5,5>, RHS + 229035318U, // : Cost 1 vdup1 RHS + 2712072206U, // : Cost 3 vext3 RHS, <5,5,6,6> + 1638330392U, // : Cost 2 vext3 RHS, <5,5,7,7> + 229035318U, // : Cost 1 vdup1 RHS + 1500037222U, // : Cost 2 vext1 <3,u,5,6>, LHS + 2561836436U, // : Cost 3 vext1 <1,u,5,6>, <1,u,5,6> + 2567809133U, // : Cost 3 vext1 <2,u,5,6>, <2,u,5,6> + 1500040006U, // : Cost 2 vext1 <3,u,5,6>, <3,u,5,6> + 1500040502U, // : Cost 2 vext1 <3,u,5,6>, RHS + 2714062935U, // : Cost 3 vext3 RHS, <5,6,5,7> + 2712072288U, // : Cost 3 vext3 RHS, <5,6,6,7> + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 1488101478U, // : Cost 2 vext1 <1,u,5,7>, LHS + 1488102805U, // : Cost 2 vext1 <1,u,5,7>, <1,u,5,7> + 2561844840U, // : Cost 3 vext1 <1,u,5,7>, <2,2,2,2> + 2561845398U, // : Cost 3 vext1 <1,u,5,7>, <3,0,1,2> + 1488104758U, // : Cost 2 vext1 <1,u,5,7>, RHS + 1638330536U, // : Cost 2 vext3 RHS, <5,7,5,7> + 2712072362U, // : Cost 3 vext3 RHS, <5,7,6,0> + 2042965302U, // : Cost 2 vtrnr RHS, RHS + 1488107310U, // : Cost 2 vext1 <1,u,5,7>, LHS + 1488109670U, // : Cost 2 vext1 <1,u,5,u>, LHS + 1488110998U, // : Cost 2 vext1 <1,u,5,u>, <1,u,5,u> + 2561853032U, // : Cost 3 vext1 <1,u,5,u>, <2,2,2,2> + 1500056392U, // : Cost 2 vext1 <3,u,5,u>, <3,u,5,u> + 1488112950U, // : Cost 2 vext1 <1,u,5,u>, RHS + 229035318U, // : Cost 1 vdup1 RHS + 2954111490U, // : Cost 3 vzipr LHS, <3,4,5,6> + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 2619211776U, // : Cost 3 vext2 <0,2,u,6>, <0,0,0,0> + 1545470054U, // : Cost 2 vext2 <0,2,u,6>, LHS + 1545470192U, // : Cost 2 vext2 <0,2,u,6>, <0,2,u,6> + 2255958969U, // : Cost 3 vrev <6,u,3,0> + 1546797458U, // : Cost 2 vext2 <0,4,u,6>, <0,4,u,6> + 2720624971U, // : Cost 3 vext3 <6,0,5,u>, <6,0,5,u> + 2256180180U, // : Cost 3 vrev <6,u,6,0> + 2960682294U, // : Cost 3 vzipr <1,2,u,0>, RHS + 1545470621U, // : Cost 2 vext2 <0,2,u,6>, LHS + 1182004127U, // : Cost 2 vrev <6,u,0,1> + 2619212596U, // : Cost 3 vext2 <0,2,u,6>, <1,1,1,1> + 2619212694U, // : Cost 3 vext2 <0,2,u,6>, <1,2,3,0> + 2619212760U, // : Cost 3 vext2 <0,2,u,6>, <1,3,1,3> + 2626511979U, // : Cost 3 vext2 <1,4,u,6>, <1,4,u,6> + 2619212944U, // : Cost 3 vext2 <0,2,u,6>, <1,5,3,7> + 2714063264U, // : Cost 3 vext3 RHS, <6,1,6,3> + 2967326006U, // : Cost 3 vzipr <2,3,u,1>, RHS + 1182594023U, // : Cost 2 vrev <6,u,u,1> + 1506050150U, // : Cost 2 vext1 <4,u,6,2>, LHS + 2579792630U, // : Cost 3 vext1 <4,u,6,2>, <1,0,3,2> + 2619213416U, // : Cost 3 vext2 <0,2,u,6>, <2,2,2,2> + 2619213478U, // : Cost 3 vext2 <0,2,u,6>, <2,3,0,1> + 1506053430U, // : Cost 2 vext1 <4,u,6,2>, RHS + 2633148309U, // : Cost 3 vext2 <2,5,u,6>, <2,5,u,6> + 2619213754U, // : Cost 3 vext2 <0,2,u,6>, <2,6,3,7> + 1638330874U, // : Cost 2 vext3 RHS, <6,2,7,3> + 1638478339U, // : Cost 2 vext3 RHS, <6,2,u,3> + 2619213974U, // : Cost 3 vext2 <0,2,u,6>, <3,0,1,2> + 2255836074U, // : Cost 3 vrev <6,u,1,3> + 2255909811U, // : Cost 3 vrev <6,u,2,3> + 2619214236U, // : Cost 3 vext2 <0,2,u,6>, <3,3,3,3> + 1564715549U, // : Cost 2 vext2 <3,4,u,6>, <3,4,u,6> + 2639121006U, // : Cost 3 vext2 <3,5,u,6>, <3,5,u,6> + 3001847012U, // : Cost 3 vzipr LHS, <4,4,6,6> + 1880329526U, // : Cost 2 vzipr LHS, RHS + 1880329527U, // : Cost 2 vzipr LHS, RHS + 2567864422U, // : Cost 3 vext1 <2,u,6,4>, LHS + 2733011558U, // : Cost 3 vext3 LHS, <6,4,1,3> + 2567866484U, // : Cost 3 vext1 <2,u,6,4>, <2,u,6,4> + 2638458005U, // : Cost 3 vext2 <3,4,u,6>, <4,3,6,u> + 1570540772U, // : Cost 2 vext2 <4,4,6,6>, <4,4,6,6> + 1545473334U, // : Cost 2 vext2 <0,2,u,6>, RHS + 1572015512U, // : Cost 2 vext2 <4,6,u,6>, <4,6,u,6> + 2960715062U, // : Cost 3 vzipr <1,2,u,4>, RHS + 1545473577U, // : Cost 2 vext2 <0,2,u,6>, RHS + 2567872614U, // : Cost 3 vext1 <2,u,6,5>, LHS + 2645757648U, // : Cost 3 vext2 <4,6,u,6>, <5,1,7,3> + 2567874490U, // : Cost 3 vext1 <2,u,6,5>, <2,6,3,7> + 2576501250U, // : Cost 3 vext1 <4,3,6,5>, <3,4,5,6> + 1576660943U, // : Cost 2 vext2 <5,4,u,6>, <5,4,u,6> + 2645757956U, // : Cost 3 vext2 <4,6,u,6>, <5,5,5,5> + 2645758050U, // : Cost 3 vext2 <4,6,u,6>, <5,6,7,0> + 2824080694U, // : Cost 3 vuzpr <0,u,2,6>, RHS + 1182626795U, // : Cost 2 vrev <6,u,u,5> + 1506082918U, // : Cost 2 vext1 <4,u,6,6>, LHS + 2579825398U, // : Cost 3 vext1 <4,u,6,6>, <1,0,3,2> + 2645758458U, // : Cost 3 vext2 <4,6,u,6>, <6,2,7,3> + 2579826838U, // : Cost 3 vext1 <4,u,6,6>, <3,0,1,2> + 1506086198U, // : Cost 2 vext1 <4,u,6,6>, RHS + 2579828432U, // : Cost 3 vext1 <4,u,6,6>, <5,1,7,3> + 296144182U, // : Cost 1 vdup2 RHS + 1638331202U, // : Cost 2 vext3 RHS, <6,6,7,7> + 296144182U, // : Cost 1 vdup2 RHS + 432349286U, // : Cost 1 vext1 RHS, LHS + 1506091766U, // : Cost 2 vext1 RHS, <1,0,3,2> + 1506092648U, // : Cost 2 vext1 RHS, <2,2,2,2> + 1506093206U, // : Cost 2 vext1 RHS, <3,0,1,2> + 432352809U, // : Cost 1 vext1 RHS, RHS + 1506094800U, // : Cost 2 vext1 RHS, <5,1,7,3> + 1506095610U, // : Cost 2 vext1 RHS, <6,2,7,3> + 1906904374U, // : Cost 2 vzipr RHS, RHS + 432355118U, // : Cost 1 vext1 RHS, LHS + 432357478U, // : Cost 1 vext1 RHS, LHS + 1545475886U, // : Cost 2 vext2 <0,2,u,6>, LHS + 1506100840U, // : Cost 2 vext1 RHS, <2,2,2,2> + 1506101398U, // : Cost 2 vext1 RHS, <3,0,1,2> + 432361002U, // : Cost 1 vext1 RHS, RHS + 1545476250U, // : Cost 2 vext2 <0,2,u,6>, RHS + 296144182U, // : Cost 1 vdup2 RHS + 1880370486U, // : Cost 2 vzipr LHS, RHS + 432363310U, // : Cost 1 vext1 RHS, LHS + 1571356672U, // : Cost 2 vext2 RHS, <0,0,0,0> + 497614950U, // : Cost 1 vext2 RHS, LHS + 1571356836U, // : Cost 2 vext2 RHS, <0,2,0,2> + 2573880146U, // : Cost 3 vext1 <3,u,7,0>, <3,u,7,0> + 1571357010U, // : Cost 2 vext2 RHS, <0,4,1,5> + 1512083716U, // : Cost 2 vext1 <5,u,7,0>, <5,u,7,0> + 2621874741U, // : Cost 3 vext2 <0,6,u,7>, <0,6,u,7> + 2585826298U, // : Cost 3 vext1 <5,u,7,0>, <7,0,1,2> + 497615517U, // : Cost 1 vext2 RHS, LHS + 1571357430U, // : Cost 2 vext2 RHS, <1,0,3,2> + 1571357492U, // : Cost 2 vext2 RHS, <1,1,1,1> + 1571357590U, // : Cost 2 vext2 RHS, <1,2,3,0> + 1552114715U, // : Cost 2 vext2 <1,3,u,7>, <1,3,u,7> + 2573888822U, // : Cost 3 vext1 <3,u,7,1>, RHS + 1553441981U, // : Cost 2 vext2 <1,5,u,7>, <1,5,u,7> + 2627847438U, // : Cost 3 vext2 <1,6,u,7>, <1,6,u,7> + 2727408775U, // : Cost 3 vext3 <7,1,7,u>, <7,1,7,u> + 1555432880U, // : Cost 2 vext2 <1,u,u,7>, <1,u,u,7> + 2629838337U, // : Cost 3 vext2 <2,0,u,7>, <2,0,u,7> + 1188058754U, // : Cost 2 vrev <7,u,1,2> + 1571358312U, // : Cost 2 vext2 RHS, <2,2,2,2> + 1571358374U, // : Cost 2 vext2 RHS, <2,3,0,1> + 2632492869U, // : Cost 3 vext2 <2,4,u,7>, <2,4,u,7> + 2633156502U, // : Cost 3 vext2 <2,5,u,7>, <2,5,u,7> + 1560078311U, // : Cost 2 vext2 <2,6,u,7>, <2,6,u,7> + 2728072408U, // : Cost 3 vext3 <7,2,7,u>, <7,2,7,u> + 1561405577U, // : Cost 2 vext2 <2,u,u,7>, <2,u,u,7> + 1571358870U, // : Cost 2 vext2 RHS, <3,0,1,2> + 2627184913U, // : Cost 3 vext2 <1,5,u,7>, <3,1,5,u> + 2633820523U, // : Cost 3 vext2 <2,6,u,7>, <3,2,6,u> + 1571359132U, // : Cost 2 vext2 RHS, <3,3,3,3> + 1571359234U, // : Cost 2 vext2 RHS, <3,4,5,6> + 1512108295U, // : Cost 2 vext1 <5,u,7,3>, <5,u,7,3> + 1518080992U, // : Cost 2 vext1 <6,u,7,3>, <6,u,7,3> + 2640456465U, // : Cost 3 vext2 <3,7,u,7>, <3,7,u,7> + 1571359518U, // : Cost 2 vext2 RHS, <3,u,1,2> + 1571359634U, // : Cost 2 vext2 RHS, <4,0,5,1> + 2573911067U, // : Cost 3 vext1 <3,u,7,4>, <1,3,u,7> + 2645101622U, // : Cost 3 vext2 RHS, <4,2,5,3> + 2573912918U, // : Cost 3 vext1 <3,u,7,4>, <3,u,7,4> + 1571359952U, // : Cost 2 vext2 RHS, <4,4,4,4> + 497618248U, // : Cost 1 vext2 RHS, RHS + 1571360116U, // : Cost 2 vext2 RHS, <4,6,4,6> + 2645102024U, // : Cost 3 vext2 RHS, <4,7,5,0> + 497618473U, // : Cost 1 vext2 RHS, RHS + 2645102152U, // : Cost 3 vext2 RHS, <5,0,1,2> + 1571360464U, // : Cost 2 vext2 RHS, <5,1,7,3> + 2645102334U, // : Cost 3 vext2 RHS, <5,2,3,4> + 2645102447U, // : Cost 3 vext2 RHS, <5,3,7,0> + 1571360710U, // : Cost 2 vext2 RHS, <5,4,7,6> + 1571360772U, // : Cost 2 vext2 RHS, <5,5,5,5> + 1571360866U, // : Cost 2 vext2 RHS, <5,6,7,0> + 1571360936U, // : Cost 2 vext2 RHS, <5,7,5,7> + 1571361017U, // : Cost 2 vext2 RHS, <5,u,5,7> + 1530044518U, // : Cost 2 vext1 , LHS + 2645103016U, // : Cost 3 vext2 RHS, <6,1,7,2> + 1571361274U, // : Cost 2 vext2 RHS, <6,2,7,3> + 2645103154U, // : Cost 3 vext2 RHS, <6,3,4,5> + 1530047798U, // : Cost 2 vext1 , RHS + 1188386474U, // : Cost 2 vrev <7,u,5,6> + 1571361592U, // : Cost 2 vext2 RHS, <6,6,6,6> + 1571361614U, // : Cost 2 vext2 RHS, <6,7,0,1> + 1571361695U, // : Cost 2 vext2 RHS, <6,u,0,1> + 1571361786U, // : Cost 2 vext2 RHS, <7,0,1,2> + 2573935616U, // : Cost 3 vext1 <3,u,7,7>, <1,3,5,7> + 2645103781U, // : Cost 3 vext2 RHS, <7,2,2,2> + 2573937497U, // : Cost 3 vext1 <3,u,7,7>, <3,u,7,7> + 1571362150U, // : Cost 2 vext2 RHS, <7,4,5,6> + 1512141067U, // : Cost 2 vext1 <5,u,7,7>, <5,u,7,7> + 1518113764U, // : Cost 2 vext1 <6,u,7,7>, <6,u,7,7> + 363253046U, // : Cost 1 vdup3 RHS + 363253046U, // : Cost 1 vdup3 RHS + 1571362515U, // : Cost 2 vext2 RHS, + 497620782U, // : Cost 1 vext2 RHS, LHS + 1571362693U, // : Cost 2 vext2 RHS, + 1571362748U, // : Cost 2 vext2 RHS, + 1571362879U, // : Cost 2 vext2 RHS, + 497621146U, // : Cost 1 vext2 RHS, RHS + 1571363024U, // : Cost 2 vext2 RHS, + 363253046U, // : Cost 1 vdup3 RHS + 497621349U, // : Cost 1 vext2 RHS, LHS + 135053414U, // : Cost 1 vdup0 LHS + 471081121U, // : Cost 1 vext2 LHS, LHS + 1544822948U, // : Cost 2 vext2 LHS, <0,2,0,2> + 1616140005U, // : Cost 2 vext3 LHS, + 1544823122U, // : Cost 2 vext2 LHS, <0,4,1,5> + 1512157453U, // : Cost 2 vext1 <5,u,u,0>, <5,u,u,0> + 1662220032U, // : Cost 2 vext3 RHS, + 1194457487U, // : Cost 2 vrev + 471081629U, // : Cost 1 vext2 LHS, LHS + 1544823542U, // : Cost 2 vext2 LHS, <1,0,3,2> + 202162278U, // : Cost 1 vdup1 LHS + 537753390U, // : Cost 1 vext3 LHS, LHS + 1544823768U, // : Cost 2 vext2 LHS, <1,3,1,3> + 1494248758U, // : Cost 2 vext1 <2,u,u,1>, RHS + 1544823952U, // : Cost 2 vext2 LHS, <1,5,3,7> + 1518138343U, // : Cost 2 vext1 <6,u,u,1>, <6,u,u,1> + 1640322907U, // : Cost 2 vext3 RHS, + 537753444U, // : Cost 1 vext3 LHS, LHS + 1482309734U, // : Cost 2 vext1 <0,u,u,2>, LHS + 1194031451U, // : Cost 2 vrev + 269271142U, // : Cost 1 vdup2 LHS + 835584U, // : Cost 0 copy LHS + 1482313014U, // : Cost 2 vext1 <0,u,u,2>, RHS + 2618566504U, // : Cost 3 vext2 LHS, <2,5,3,6> + 1544824762U, // : Cost 2 vext2 LHS, <2,6,3,7> + 1638479788U, // : Cost 2 vext3 RHS, + 835584U, // : Cost 0 copy LHS + 408576723U, // : Cost 1 vext1 LHS, LHS + 1482318582U, // : Cost 2 vext1 LHS, <1,0,3,2> + 120371557U, // : Cost 1 vrev LHS + 336380006U, // : Cost 1 vdup3 LHS + 408579382U, // : Cost 1 vext1 LHS, RHS + 1616140271U, // : Cost 2 vext3 LHS, + 1530098170U, // : Cost 2 vext1 LHS, <6,2,7,3> + 1880329544U, // : Cost 2 vzipr LHS, RHS + 408581934U, // : Cost 1 vext1 LHS, LHS + 1488298086U, // : Cost 2 vext1 <1,u,u,4>, LHS + 1488299437U, // : Cost 2 vext1 <1,u,u,4>, <1,u,u,4> + 1659271204U, // : Cost 2 vext3 LHS, + 1194195311U, // : Cost 2 vrev + 161926454U, // : Cost 1 vdup0 RHS + 471084342U, // : Cost 1 vext2 LHS, RHS + 1571368308U, // : Cost 2 vext2 RHS, <4,6,4,6> + 1640323153U, // : Cost 2 vext3 RHS, + 471084585U, // : Cost 1 vext2 LHS, RHS + 1494278246U, // : Cost 2 vext1 <2,u,u,5>, LHS + 1571368656U, // : Cost 2 vext2 RHS, <5,1,7,3> + 1494280327U, // : Cost 2 vext1 <2,u,u,5>, <2,u,u,5> + 1616140415U, // : Cost 2 vext3 LHS, + 1494281526U, // : Cost 2 vext1 <2,u,u,5>, RHS + 229035318U, // : Cost 1 vdup1 RHS + 537753754U, // : Cost 1 vext3 LHS, RHS + 1750355254U, // : Cost 2 vuzpr LHS, RHS + 537753772U, // : Cost 1 vext3 LHS, RHS + 1482342502U, // : Cost 2 vext1 <0,u,u,6>, LHS + 2556084982U, // : Cost 3 vext1 <0,u,u,6>, <1,0,3,2> + 1571369466U, // : Cost 2 vext2 RHS, <6,2,7,3> + 1611938000U, // : Cost 2 vext3 LHS, + 1482345782U, // : Cost 2 vext1 <0,u,u,6>, RHS + 1194359171U, // : Cost 2 vrev + 296144182U, // : Cost 1 vdup2 RHS + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 432496742U, // : Cost 1 vext1 RHS, LHS + 1488324016U, // : Cost 2 vext1 <1,u,u,7>, <1,u,u,7> + 1494296713U, // : Cost 2 vext1 <2,u,u,7>, <2,u,u,7> + 1906901148U, // : Cost 2 vzipr RHS, LHS + 432500283U, // : Cost 1 vext1 RHS, RHS + 1506242256U, // : Cost 2 vext1 RHS, <5,1,7,3> + 120699277U, // : Cost 1 vrev RHS + 363253046U, // : Cost 1 vdup3 RHS + 432502574U, // : Cost 1 vext1 RHS, LHS + 408617688U, // : Cost 1 vext1 LHS, LHS + 471086894U, // : Cost 1 vext2 LHS, LHS + 537753957U, // : Cost 1 vext3 LHS, LHS + 835584U, // : Cost 0 copy LHS + 408620342U, // : Cost 1 vext1 LHS, RHS + 471087258U, // : Cost 1 vext2 LHS, RHS + 537753997U, // : Cost 1 vext3 LHS, RHS + 27705344U, // : Cost 0 copy RHS + 835584U, // : Cost 0 copy LHS + 0 +}; diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index f809f37509216..d5bc3f60b01a9 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -13,6 +13,7 @@ #include "ARM.h" #include "ARMAddressingModes.h" +#include "ARMBaseInstrInfo.h" #include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" @@ -26,6 +27,7 @@ #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -33,1370 +35,7 @@ #include "llvm/ADT/SmallVector.h" using namespace llvm; -unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum) { - using namespace ARM; - switch (RegEnum) { - case R0: case S0: case D0: return 0; - case R1: case S1: case D1: return 1; - case R2: case S2: case D2: return 2; - case R3: case S3: case D3: return 3; - case R4: case S4: case D4: return 4; - case R5: case S5: case D5: return 5; - case R6: case S6: case D6: return 6; - case R7: case S7: case D7: return 7; - case R8: case S8: case D8: return 8; - case R9: case S9: case D9: return 9; - case R10: case S10: case D10: return 10; - case R11: case S11: case D11: return 11; - case R12: case S12: case D12: return 12; - case SP: case S13: case D13: return 13; - case LR: case S14: case D14: return 14; - case PC: case S15: case D15: return 15; - case S16: return 16; - case S17: return 17; - case S18: return 18; - case S19: return 19; - case S20: return 20; - case S21: return 21; - case S22: return 22; - case S23: return 23; - case S24: return 24; - case S25: return 25; - case S26: return 26; - case S27: return 27; - case S28: return 28; - case S29: return 29; - case S30: return 30; - case S31: return 31; - default: - assert(0 && "Unknown ARM register!"); - abort(); - } -} - -unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, - bool &isSPVFP) { - isSPVFP = false; - - using namespace ARM; - switch (RegEnum) { - default: - assert(0 && "Unknown ARM register!"); - abort(); - case R0: case D0: return 0; - case R1: case D1: return 1; - case R2: case D2: return 2; - case R3: case D3: return 3; - case R4: case D4: return 4; - case R5: case D5: return 5; - case R6: case D6: return 6; - case R7: case D7: return 7; - case R8: case D8: return 8; - case R9: case D9: return 9; - case R10: case D10: return 10; - case R11: case D11: return 11; - case R12: case D12: return 12; - case SP: case D13: return 13; - case LR: case D14: return 14; - case PC: case D15: return 15; - - case S0: case S1: case S2: case S3: - case S4: case S5: case S6: case S7: - case S8: case S9: case S10: case S11: - case S12: case S13: case S14: case S15: - case S16: case S17: case S18: case S19: - case S20: case S21: case S22: case S23: - case S24: case S25: case S26: case S27: - case S28: case S29: case S30: case S31: { - isSPVFP = true; - switch (RegEnum) { - default: return 0; // Avoid compile time warning. - case S0: return 0; - case S1: return 1; - case S2: return 2; - case S3: return 3; - case S4: return 4; - case S5: return 5; - case S6: return 6; - case S7: return 7; - case S8: return 8; - case S9: return 9; - case S10: return 10; - case S11: return 11; - case S12: return 12; - case S13: return 13; - case S14: return 14; - case S15: return 15; - case S16: return 16; - case S17: return 17; - case S18: return 18; - case S19: return 19; - case S20: return 20; - case S21: return 21; - case S22: return 22; - case S23: return 23; - case S24: return 24; - case S25: return 25; - case S26: return 26; - case S27: return 27; - case S28: return 28; - case S29: return 29; - case S30: return 30; - case S31: return 31; - } - } - } -} - -ARMBaseRegisterInfo::ARMBaseRegisterInfo(const TargetInstrInfo &tii, - const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP), - TII(tii), STI(sti), - FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) { -} - -ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii, +ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { } - -static inline -const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) { - return MIB.addImm((int64_t)ARMCC::AL).addReg(0); -} - -static inline -const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) { - return MIB.addReg(0); -} - -/// emitLoadConstPool - Emits a load from constpool to materialize the -/// specified immediate. -void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo *TII, DebugLoc dl, - unsigned DestReg, int Val, - ARMCC::CondCodes Pred, - unsigned PredReg) const { - MachineFunction &MF = *MBB.getParent(); - MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get(Type::Int32Ty, Val); - unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - - BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg) - .addConstantPoolIndex(Idx) - .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); -} - -const unsigned* -ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const unsigned CalleeSavedRegs[] = { - ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8, - ARM::R7, ARM::R6, ARM::R5, ARM::R4, - - ARM::D15, ARM::D14, ARM::D13, ARM::D12, - ARM::D11, ARM::D10, ARM::D9, ARM::D8, - 0 - }; - - static const unsigned DarwinCalleeSavedRegs[] = { - // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved - // register. - ARM::LR, ARM::R7, ARM::R6, ARM::R5, ARM::R4, - ARM::R11, ARM::R10, ARM::R8, - - ARM::D15, ARM::D14, ARM::D13, ARM::D12, - ARM::D11, ARM::D10, ARM::D9, ARM::D8, - 0 - }; - return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; -} - -const TargetRegisterClass* const * -ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass, - &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={ - &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass, - &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - if (STI.isThumb()) { - return STI.isTargetDarwin() - ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses; - } - return STI.isTargetDarwin() - ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; -} - -BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - // FIXME: avoid re-calculating this everytime. - BitVector Reserved(getNumRegs()); - Reserved.set(ARM::SP); - Reserved.set(ARM::PC); - if (STI.isTargetDarwin() || hasFP(MF)) - Reserved.set(FramePtr); - // Some targets reserve R9. - if (STI.isR9Reserved()) - Reserved.set(ARM::R9); - return Reserved; -} - -bool -ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { - switch (Reg) { - default: break; - case ARM::SP: - case ARM::PC: - return true; - case ARM::R7: - case ARM::R11: - if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF))) - return true; - break; - case ARM::R9: - return STI.isR9Reserved(); - } - - return false; -} - -const TargetRegisterClass *ARMBaseRegisterInfo::getPointerRegClass() const { - return &ARM::GPRRegClass; -} - -/// getAllocationOrder - Returns the register allocation order for a specified -/// register class in the form of a pair of TargetRegisterClass iterators. -std::pair -ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const { - // Alternative register allocation orders when favoring even / odd registers - // of register pairs. - - // No FP, R9 is available. - static const unsigned GPREven1[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, - ARM::R9, ARM::R11 - }; - static const unsigned GPROdd1[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, - ARM::R8, ARM::R10 - }; - - // FP is R7, R9 is available. - static const unsigned GPREven2[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, - ARM::R9, ARM::R11 - }; - static const unsigned GPROdd2[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, - ARM::R8, ARM::R10 - }; - - // FP is R11, R9 is available. - static const unsigned GPREven3[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, - ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, - ARM::R9 - }; - static const unsigned GPROdd3[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, - ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, - ARM::R8 - }; - - // No FP, R9 is not available. - static const unsigned GPREven4[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, - ARM::R11 - }; - static const unsigned GPROdd4[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, - ARM::R10 - }; - - // FP is R7, R9 is not available. - static const unsigned GPREven5[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R10, - ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, - ARM::R11 - }; - static const unsigned GPROdd5[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R11, - ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, - ARM::R10 - }; - - // FP is R11, R9 is not available. - static const unsigned GPREven6[] = { - ARM::R0, ARM::R2, ARM::R4, ARM::R6, - ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 - }; - static const unsigned GPROdd6[] = { - ARM::R1, ARM::R3, ARM::R5, ARM::R7, - ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 - }; - - - if (HintType == ARMRI::RegPairEven) { - if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) - // It's no longer possible to fulfill this hint. Return the default - // allocation order. - return std::make_pair(RC->allocation_order_begin(MF), - RC->allocation_order_end(MF)); - - if (!STI.isTargetDarwin() && !hasFP(MF)) { - if (!STI.isR9Reserved()) - return std::make_pair(GPREven1, - GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); - else - return std::make_pair(GPREven4, - GPREven4 + (sizeof(GPREven4)/sizeof(unsigned))); - } else if (FramePtr == ARM::R7) { - if (!STI.isR9Reserved()) - return std::make_pair(GPREven2, - GPREven2 + (sizeof(GPREven2)/sizeof(unsigned))); - else - return std::make_pair(GPREven5, - GPREven5 + (sizeof(GPREven5)/sizeof(unsigned))); - } else { // FramePtr == ARM::R11 - if (!STI.isR9Reserved()) - return std::make_pair(GPREven3, - GPREven3 + (sizeof(GPREven3)/sizeof(unsigned))); - else - return std::make_pair(GPREven6, - GPREven6 + (sizeof(GPREven6)/sizeof(unsigned))); - } - } else if (HintType == ARMRI::RegPairOdd) { - if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0) - // It's no longer possible to fulfill this hint. Return the default - // allocation order. - return std::make_pair(RC->allocation_order_begin(MF), - RC->allocation_order_end(MF)); - - if (!STI.isTargetDarwin() && !hasFP(MF)) { - if (!STI.isR9Reserved()) - return std::make_pair(GPROdd1, - GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); - else - return std::make_pair(GPROdd4, - GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned))); - } else if (FramePtr == ARM::R7) { - if (!STI.isR9Reserved()) - return std::make_pair(GPROdd2, - GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned))); - else - return std::make_pair(GPROdd5, - GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned))); - } else { // FramePtr == ARM::R11 - if (!STI.isR9Reserved()) - return std::make_pair(GPROdd3, - GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned))); - else - return std::make_pair(GPROdd6, - GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned))); - } - } - return std::make_pair(RC->allocation_order_begin(MF), - RC->allocation_order_end(MF)); -} - -/// ResolveRegAllocHint - Resolves the specified register allocation hint -/// to a physical register. Returns the physical register if it is successful. -unsigned -ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, - const MachineFunction &MF) const { - if (Reg == 0 || !isPhysicalRegister(Reg)) - return 0; - if (Type == 0) - return Reg; - else if (Type == (unsigned)ARMRI::RegPairOdd) - // Odd register. - return getRegisterPairOdd(Reg, MF); - else if (Type == (unsigned)ARMRI::RegPairEven) - // Even register. - return getRegisterPairEven(Reg, MF); - return 0; -} - -void -ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, - MachineFunction &MF) const { - MachineRegisterInfo *MRI = &MF.getRegInfo(); - std::pair Hint = MRI->getRegAllocationHint(Reg); - if ((Hint.first == (unsigned)ARMRI::RegPairOdd || - Hint.first == (unsigned)ARMRI::RegPairEven) && - Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) { - // If 'Reg' is one of the even / odd register pair and it's now changed - // (e.g. coalesced) into a different register. The other register of the - // pair allocation hint must be updated to reflect the relationship - // change. - unsigned OtherReg = Hint.second; - Hint = MRI->getRegAllocationHint(OtherReg); - if (Hint.second == Reg) - // Make sure the pair has not already divorced. - MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg); - } -} - -bool -ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return true; -} - -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. This is true if the function has variable sized allocas -/// or if frame pointer elimination is disabled. -/// -bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (NoFramePointerElim || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); -} - -// hasReservedCallFrame - Under normal circumstances, when a frame pointer is -// not required, we reserve argument space for call sites in the function -// immediately on entry to the current function. This eliminates the need for -// add/sub sp brackets around call sites. Returns true if the call frame is -// included as part of the stack frame. -bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - unsigned CFSize = FFI->getMaxCallFrameSize(); - // It's not always a good idea to include the call frame as part of the - // stack frame. ARM (especially Thumb) has small immediate offset to - // address the stack frame. So a large call frame can cause poor codegen - // and may even makes it impossible to scavenge a register. - if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 - return false; - - return !MF.getFrameInfo()->hasVarSizedObjects(); -} - -/// emitARMRegPlusImmediate - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in ARM code. -static -void emitARMRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, unsigned BaseReg, int NumBytes, - ARMCC::CondCodes Pred, unsigned PredReg, - const TargetInstrInfo &TII, - DebugLoc dl) { - bool isSub = NumBytes < 0; - if (isSub) NumBytes = -NumBytes; - - while (NumBytes) { - unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes); - unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt); - assert(ThisVal && "Didn't extract field correctly"); - - // We will handle these bits from offset, clear them. - NumBytes &= ~ThisVal; - - // Get the properly encoded SOImmVal field. - int SOImmVal = ARM_AM::getSOImmVal(ThisVal); - assert(SOImmVal != -1 && "Bit extraction didn't work?"); - - // Build the new ADD / SUB. - BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(SOImmVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); - BaseReg = DestReg; - } -} - -static void -emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - int NumBytes, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { - emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII, dl); -} - -void ARMRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - // Note: PredReg is operand 2 for ADJCALLSTACKDOWN. - unsigned PredReg = Old->getOperand(2).getReg(); - emitSPUpdate(MBB, I, TII, dl, -Amount, Pred, PredReg); - } else { - // Note: PredReg is operand 3 for ADJCALLSTACKUP. - unsigned PredReg = Old->getOperand(3).getReg(); - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(MBB, I, TII, dl, Amount, Pred, PredReg); - } - } - } - MBB.erase(I); -} - -/// findScratchRegister - Find a 'free' ARM register. If register scavenger -/// is not being used, R12 is available. Otherwise, try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC, - ARMFunctionInfo *AFI) { - unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12; - assert (!AFI->isThumbFunction()); - if (Reg == 0) - // Try a already spilled CS register. - Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters()); - - return Reg; -} - -void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ - unsigned i = 0; - MachineInstr &MI = *II; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc dl = MI.getDebugLoc(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - unsigned FrameReg = ARM::SP; - int FrameIndex = MI.getOperand(i).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MF.getFrameInfo()->getStackSize() + SPAdj; - - if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex)) - Offset -= AFI->getDPRCalleeSavedAreaOffset(); - else if (hasFP(MF)) { - assert(SPAdj == 0 && "Unexpected"); - // There is alloca()'s in this function, must reference off the frame - // pointer instead. - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - } - - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); - unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - bool isSub = false; - - // Memory operands in inline assembly always use AddrMode2. - if (Opcode == ARM::INLINEASM) - AddrMode = ARMII::AddrMode2; - - if (Opcode == ARM::ADDri) { - Offset += MI.getOperand(i+1).getImm(); - if (Offset == 0) { - // Turn it into a move. - MI.setDesc(TII.get(ARM::MOVr)); - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.RemoveOperand(i+1); - return; - } else if (Offset < 0) { - Offset = -Offset; - isSub = true; - MI.setDesc(TII.get(ARM::SUBri)); - } - - // Common case: small offset, fits into instruction. - int ImmedOffset = ARM_AM::getSOImmVal(Offset); - if (ImmedOffset != -1) { - // Replace the FrameIndex with sp / fp - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(ImmedOffset); - return; - } - - // Otherwise, we fallback to common code below to form the imm offset with - // a sequence of ADDri instructions. First though, pull as much of the imm - // into this ADDri as possible. - unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset); - unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt); - - // We will handle these bits from offset, clear them. - Offset &= ~ThisImmVal; - - // Get the properly encoded SOImmVal field. - int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal); - assert(ThisSOImmVal != -1 && "Bit extraction didn't work?"); - MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal); - } else { - unsigned ImmIdx = 0; - int InstrOffs = 0; - unsigned NumBits = 0; - unsigned Scale = 1; - switch (AddrMode) { - case ARMII::AddrMode2: { - ImmIdx = i+2; - InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm()); - if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) - InstrOffs *= -1; - NumBits = 12; - break; - } - case ARMII::AddrMode3: { - ImmIdx = i+2; - InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm()); - if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) - InstrOffs *= -1; - NumBits = 8; - break; - } - case ARMII::AddrMode5: { - ImmIdx = i+1; - InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm()); - if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub) - InstrOffs *= -1; - NumBits = 8; - Scale = 4; - break; - } - default: - assert(0 && "Unsupported addressing mode!"); - abort(); - break; - } - - Offset += InstrOffs * Scale; - assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); - if (Offset < 0) { - Offset = -Offset; - isSub = true; - } - - // Common case: small offset, fits into instruction. - MachineOperand &ImmOp = MI.getOperand(ImmIdx); - int ImmedOffset = Offset / Scale; - unsigned Mask = (1 << NumBits) - 1; - if ((unsigned)Offset <= Mask * Scale) { - // Replace the FrameIndex with sp - MI.getOperand(i).ChangeToRegister(FrameReg, false); - if (isSub) - ImmedOffset |= 1 << NumBits; - ImmOp.ChangeToImmediate(ImmedOffset); - return; - } - - // Otherwise, it didn't fit. Pull in what we can to simplify the immed. - ImmedOffset = ImmedOffset & Mask; - if (isSub) - ImmedOffset |= 1 << NumBits; - ImmOp.ChangeToImmediate(ImmedOffset); - Offset &= ~(Mask*Scale); - } - - // If we get here, the immediate doesn't fit into the instruction. We folded - // as much as possible above, handle the rest, providing a register that is - // SP+LargeImm. - assert(Offset && "This code isn't needed if offset already handled!"); - - // Insert a set of r12 with the full address: r12 = sp + offset - // If the offset we have is too large to fit into the instruction, we need - // to form it with a series of ADDri's. Do this by taking 8-bit chunks - // out of 'Offset'. - unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI); - if (ScratchReg == 0) - // No register is "free". Scavenge a register. - ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj); - int PIdx = MI.findFirstPredOperandIdx(); - ARMCC::CondCodes Pred = (PIdx == -1) - ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); - unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); - emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg, - isSub ? -Offset : Offset, Pred, PredReg, TII, dl); - MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); -} - -static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - } - return (unsigned)Offset; -} - -void -ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - // This tells PEI to spill the FP as if it is any other callee-save register - // to take advantage the eliminateFrameIndex machinery. This also ensures it - // is spilled in the order specified by getCalleeSavedRegs() to make it easier - // to combine multiple loads / stores. - bool CanEliminateFrame = true; - bool CS1Spilled = false; - bool LRSpilled = false; - unsigned NumGPRSpills = 0; - SmallVector UnspilledCS1GPRs; - SmallVector UnspilledCS2GPRs; - ARMFunctionInfo *AFI = MF.getInfo(); - - // Don't spill FP if the frame can be eliminated. This is determined - // by scanning the callee-save registers to see if any is used. - const unsigned *CSRegs = getCalleeSavedRegs(); - const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses(); - for (unsigned i = 0; CSRegs[i]; ++i) { - unsigned Reg = CSRegs[i]; - bool Spilled = false; - if (MF.getRegInfo().isPhysRegUsed(Reg)) { - AFI->setCSRegisterIsSpilled(Reg); - Spilled = true; - CanEliminateFrame = false; - } else { - // Check alias registers too. - for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) { - if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { - Spilled = true; - CanEliminateFrame = false; - } - } - } - - if (CSRegClasses[i] == &ARM::GPRRegClass) { - if (Spilled) { - NumGPRSpills++; - - if (!STI.isTargetDarwin()) { - if (Reg == ARM::LR) - LRSpilled = true; - CS1Spilled = true; - continue; - } - - // Keep track if LR and any of R4, R5, R6, and R7 is spilled. - switch (Reg) { - case ARM::LR: - LRSpilled = true; - // Fallthrough - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - CS1Spilled = true; - break; - default: - break; - } - } else { - if (!STI.isTargetDarwin()) { - UnspilledCS1GPRs.push_back(Reg); - continue; - } - - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - UnspilledCS1GPRs.push_back(Reg); - break; - default: - UnspilledCS2GPRs.push_back(Reg); - break; - } - } - } - } - - bool ForceLRSpill = false; - if (!LRSpilled && AFI->isThumbFunction()) { - unsigned FnSize = TII.GetFunctionSizeInBytes(MF); - // Force LR to be spilled if the Thumb function size is > 2048. This enables - // use of BL to implement far jump. If it turns out that it's not needed - // then the branch fix up path will undo it. - if (FnSize >= (1 << 11)) { - CanEliminateFrame = false; - ForceLRSpill = true; - } - } - - bool ExtraCSSpill = false; - if (!CanEliminateFrame || hasFP(MF)) { - AFI->setHasStackFrame(true); - - // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. - // Spill LR as well so we can fold BX_RET to the registers restore (LDM). - if (!LRSpilled && CS1Spilled) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); - AFI->setCSRegisterIsSpilled(ARM::LR); - NumGPRSpills++; - UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), - UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); - ForceLRSpill = false; - ExtraCSSpill = true; - } - - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(FramePtr); - NumGPRSpills++; - } - - // If stack and double are 8-byte aligned and we are spilling an odd number - // of GPRs. Spill one extra callee save GPR so we won't have to pad between - // the integer and double callee save areas. - unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); - if (TargetAlign == 8 && (NumGPRSpills & 1)) { - if (CS1Spilled && !UnspilledCS1GPRs.empty()) { - for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { - unsigned Reg = UnspilledCS1GPRs[i]; - // Don't spiil high register if the function is thumb - if (!AFI->isThumbFunction() || - isARMLowRegister(Reg) || Reg == ARM::LR) { - MF.getRegInfo().setPhysRegUsed(Reg); - AFI->setCSRegisterIsSpilled(Reg); - if (!isReservedReg(MF, Reg)) - ExtraCSSpill = true; - break; - } - } - } else if (!UnspilledCS2GPRs.empty() && - !AFI->isThumbFunction()) { - unsigned Reg = UnspilledCS2GPRs.front(); - MF.getRegInfo().setPhysRegUsed(Reg); - AFI->setCSRegisterIsSpilled(Reg); - if (!isReservedReg(MF, Reg)) - ExtraCSSpill = true; - } - } - - // Estimate if we might need to scavenge a register at some point in order - // to materialize a stack offset. If so, either spill one additional - // callee-saved register or reserve a special spill slot to facilitate - // register scavenging. - if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned Size = estimateStackSize(MF, MFI); - unsigned Limit = (1 << 12) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end();BB != E; ++BB) - for (MachineBasicBlock::iterator I= BB->begin(); I != BB->end(); ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (I->getOperand(i).isFI()) { - unsigned Opcode = I->getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); - unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - if (AddrMode == ARMII::AddrMode3) { - Limit = (1 << 8) - 1; - goto DoneEstimating; - } else if (AddrMode == ARMII::AddrMode5) { - unsigned ThisLimit = ((1 << 8) - 1) * 4; - if (ThisLimit < Limit) - Limit = ThisLimit; - } - } - } - DoneEstimating: - if (Size >= Limit) { - // If any non-reserved CS register isn't spilled, just spill one or two - // extra. That should take care of it! - unsigned NumExtras = TargetAlign / 4; - SmallVector Extras; - while (NumExtras && !UnspilledCS1GPRs.empty()) { - unsigned Reg = UnspilledCS1GPRs.back(); - UnspilledCS1GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { - Extras.push_back(Reg); - NumExtras--; - } - } - while (NumExtras && !UnspilledCS2GPRs.empty()) { - unsigned Reg = UnspilledCS2GPRs.back(); - UnspilledCS2GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { - Extras.push_back(Reg); - NumExtras--; - } - } - if (Extras.size() && NumExtras == 0) { - for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - MF.getRegInfo().setPhysRegUsed(Extras[i]); - AFI->setCSRegisterIsSpilled(Extras[i]); - } - } else { - // Reserve a slot closest to SP or frame pointer. - const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); - } - } - } - } - - if (ForceLRSpill) { - MF.getRegInfo().setPhysRegUsed(ARM::LR); - AFI->setCSRegisterIsSpilled(ARM::LR); - AFI->setLRIsSpilledForFarJump(true); - } -} - -/// Move iterator pass the next bunch of callee save load / store ops for -/// the particular spill area (1: integer area 1, 2: integer area 2, -/// 3: fp area, 0: don't care). -static void movePastCSLoadStoreOps(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - int Opc, unsigned Area, - const ARMSubtarget &STI) { - while (MBBI != MBB.end() && - MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFI()) { - if (Area != 0) { - bool Done = false; - unsigned Category = 0; - switch (MBBI->getOperand(0).getReg()) { - case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: - case ARM::LR: - Category = 1; - break; - case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: - Category = STI.isTargetDarwin() ? 2 : 1; - break; - case ARM::D8: case ARM::D9: case ARM::D10: case ARM::D11: - case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15: - Category = 3; - break; - default: - Done = true; - break; - } - if (Done || Category != Area) - break; - } - - ++MBBI; - } -} - -void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); - unsigned NumBytes = MFI->getStackSize(); - const std::vector &CSI = MFI->getCalleeSavedInfo(); - DebugLoc dl = (MBBI != MBB.end() ? - MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); - - // Determine the sizes of each callee-save spill areas and record which frame - // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; - int FramePtrSpillFI = 0; - - if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, -VARegSaveSize); - - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes); - return; - } - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - int FI = CSI[i].getFrameIdx(); - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - break; - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { - AFI->addGPRCalleeSavedArea2Frame(FI); - GPRCS2Size += 4; - } else { - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - } - break; - default: - AFI->addDPRCalleeSavedAreaFrame(FI); - DPRCSSize += 8; - } - } - - // Build the new SUBri to adjust SP for integer callee-save spill area 1. - emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS1Size); - movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI); - - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ARM::ADDri), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); - AddDefaultCC(AddDefaultPred(MIB)); - } - - // Build the new SUBri to adjust SP for integer callee-save spill area 2. - emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS2Size); - - // Build the new SUBri to adjust SP for FP callee-save spill area. - movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI); - emitSPUpdate(MBB, MBBI, TII, dl, -DPRCSSize); - - // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); - unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; - unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); - AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); - AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); - AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - - NumBytes = DPRCSOffset; - if (NumBytes) { - // Insert it after all the callee-save spills. - movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI); - emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes); - } - - if (STI.isTargetELF() && hasFP(MF)) { - MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - - AFI->getFramePtrSpillOffset()); - } - - AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); - AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); - AFI->setDPRCalleeSavedAreaSize(DPRCSSize); -} - -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - -static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - return ((MI->getOpcode() == ARM::FLDD || - MI->getOpcode() == ARM::LDR) && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); -} - -void ARMRegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - assert(MBBI->getOpcode() == ARM::BX_RET && - "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); - int NumBytes = (int)MFI->getStackSize(); - - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, NumBytes); - } else { - // Unwind MBBI to point to first LDR / FLDD. - const unsigned *CSRegs = getCalleeSavedRegs(); - if (MBBI != MBB.begin()) { - do - --MBBI; - while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); - if (!isCSRestore(MBBI, CSRegs)) - ++MBBI; - } - - // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + - AFI->getGPRCalleeSavedArea2Size() + - AFI->getDPRCalleeSavedAreaSize()); - - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) { - NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. - if (AFI->getGPRCalleeSavedArea2Size() || - AFI->getDPRCalleeSavedAreaSize() || - AFI->getDPRCalleeSavedAreaOffset()|| - hasFP(MF)) { - if (NumBytes) - BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr) - .addImm(NumBytes) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); - } - } else if (NumBytes) { - emitSPUpdate(MBB, MBBI, TII, dl, NumBytes); - } - - // Move SP to start of integer callee save spill area 2. - movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI); - emitSPUpdate(MBB, MBBI, TII, dl, AFI->getDPRCalleeSavedAreaSize()); - - // Move SP to start of integer callee save spill area 1. - movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI); - emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea2Size()); - - // Move SP to SP upon entry to the function. - movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI); - emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea1Size()); - } - - if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, VARegSaveSize); - -} - -unsigned ARMBaseRegisterInfo::getRARegister() const { - return ARM::LR; -} - -unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const { - if (STI.isTargetDarwin() || hasFP(MF)) - return FramePtr; - return ARM::SP; -} - -unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { - assert(0 && "What is the exception register"); - return 0; -} - -unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { - assert(0 && "What is the exception handler register"); - return 0; -} - -int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); -} - -unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, - const MachineFunction &MF) const { - switch (Reg) { - default: break; - // Return 0 if either register of the pair is a special register. - // So no R12, etc. - case ARM::R1: - return ARM::R0; - case ARM::R3: - // FIXME! - return STI.isThumb() ? 0 : ARM::R2; - case ARM::R5: - return ARM::R4; - case ARM::R7: - return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6; - case ARM::R9: - return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; - case ARM::R11: - return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; - - case ARM::S1: - return ARM::S0; - case ARM::S3: - return ARM::S2; - case ARM::S5: - return ARM::S4; - case ARM::S7: - return ARM::S6; - case ARM::S9: - return ARM::S8; - case ARM::S11: - return ARM::S10; - case ARM::S13: - return ARM::S12; - case ARM::S15: - return ARM::S14; - case ARM::S17: - return ARM::S16; - case ARM::S19: - return ARM::S18; - case ARM::S21: - return ARM::S20; - case ARM::S23: - return ARM::S22; - case ARM::S25: - return ARM::S24; - case ARM::S27: - return ARM::S26; - case ARM::S29: - return ARM::S28; - case ARM::S31: - return ARM::S30; - - case ARM::D1: - return ARM::D0; - case ARM::D3: - return ARM::D2; - case ARM::D5: - return ARM::D4; - case ARM::D7: - return ARM::D6; - case ARM::D9: - return ARM::D8; - case ARM::D11: - return ARM::D10; - case ARM::D13: - return ARM::D12; - case ARM::D15: - return ARM::D14; - } - - return 0; -} - -unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, - const MachineFunction &MF) const { - switch (Reg) { - default: break; - // Return 0 if either register of the pair is a special register. - // So no R12, etc. - case ARM::R0: - return ARM::R1; - case ARM::R2: - // FIXME! - return STI.isThumb() ? 0 : ARM::R3; - case ARM::R4: - return ARM::R5; - case ARM::R6: - return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7; - case ARM::R8: - return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; - case ARM::R10: - return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; - - case ARM::S0: - return ARM::S1; - case ARM::S2: - return ARM::S3; - case ARM::S4: - return ARM::S5; - case ARM::S6: - return ARM::S7; - case ARM::S8: - return ARM::S9; - case ARM::S10: - return ARM::S11; - case ARM::S12: - return ARM::S13; - case ARM::S14: - return ARM::S15; - case ARM::S16: - return ARM::S17; - case ARM::S18: - return ARM::S19; - case ARM::S20: - return ARM::S21; - case ARM::S22: - return ARM::S23; - case ARM::S24: - return ARM::S25; - case ARM::S26: - return ARM::S27; - case ARM::S28: - return ARM::S29; - case ARM::S30: - return ARM::S31; - - case ARM::D0: - return ARM::D1; - case ARM::D2: - return ARM::D3; - case ARM::D4: - return ARM::D5; - case ARM::D6: - return ARM::D7; - case ARM::D8: - return ARM::D9; - case ARM::D10: - return ARM::D11; - case ARM::D12: - return ARM::D13; - case ARM::D14: - return ARM::D15; - } - - return 0; -} - -#include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 7fe075a65ee8e..041afd0414027 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -16,127 +16,26 @@ #include "ARM.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "ARMGenRegisterInfo.h.inc" +#include "ARMBaseRegisterInfo.h" namespace llvm { class ARMSubtarget; - class TargetInstrInfo; + class ARMBaseInstrInfo; class Type; -/// Register allocation hints. -namespace ARMRI { - enum { - RegPairOdd = 1, - RegPairEven = 2 +namespace ARM { + /// SubregIndex - The index of various subregister classes. Note that + /// these indices must be kept in sync with the class indices in the + /// ARMRegisterInfo.td file. + enum SubregIndex { + SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, + DSUBREG_0 = 5, DSUBREG_1 = 6 }; } -/// isARMLowRegister - Returns true if the register is low register r0-r7. -/// -static inline bool isARMLowRegister(unsigned Reg) { - using namespace ARM; - switch (Reg) { - case R0: case R1: case R2: case R3: - case R4: case R5: case R6: case R7: - return true; - default: - return false; - } -} - -struct ARMBaseRegisterInfo : public ARMGenRegisterInfo { -protected: - const TargetInstrInfo &TII; - const ARMSubtarget &STI; - - /// FramePtr - ARM physical register used as frame ptr. - unsigned FramePtr; -public: - ARMBaseRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); - - /// getRegisterNumbering - Given the enum value for some register, e.g. - /// ARM::LR, return the number that it corresponds to (e.g. 14). - static unsigned getRegisterNumbering(unsigned RegEnum); - - /// Same as previous getRegisterNumbering except it returns true in isSPVFP - /// if the register is a single precision VFP register. - static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP); - - /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - - BitVector getReservedRegs(const MachineFunction &MF) const; - - bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - - const TargetRegisterClass *getPointerRegClass() const; - - std::pair - getAllocationOrder(const TargetRegisterClass *RC, - unsigned HintType, unsigned HintReg, - const MachineFunction &MF) const; - - unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, - const MachineFunction &MF) const; - - void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, - MachineFunction &MF) const; - - bool hasFP(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; - - // Debug information queries. - unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; - - // Exception handling queries. - unsigned getEHExceptionRegister() const; - unsigned getEHHandlerRegister() const; - - int getDwarfRegNum(unsigned RegNum, bool isEH) const; - - bool isLowRegister(unsigned Reg) const; - -private: - unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; - - unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; -}; - struct ARMRegisterInfo : public ARMBaseRegisterInfo { public: - ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); - - /// emitLoadConstPool - Emits a load from constpool to materialize the - /// specified immediate. - void emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo *TII, DebugLoc dl, - unsigned DestReg, int Val, - ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; - - /// Code Generation virtual methods... - bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - - bool requiresRegisterScavenging(const MachineFunction &MF) const; - - bool hasReservedCallFrame(MachineFunction &MF) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; - - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index a057e5cabf60c..20a7355b76530 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -18,8 +18,8 @@ class ARMReg num, string n, list subregs = []> : Register { let SubRegs = subregs; } -class ARMFReg num, string n> : Register { - field bits<5> Num; +class ARMFReg num, string n> : Register { + field bits<6> Num; let Namespace = "ARM"; } @@ -58,10 +58,11 @@ def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">; def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">; def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">; def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; +def SDummy : ARMFReg<63, "sINVALID">; // Aliases of the F* registers used to hold 64-bit fp values (doubles) def D0 : ARMReg< 0, "d0", [S0, S1]>; -def D1 : ARMReg< 1, "d1", [S2, S3]>; +def D1 : ARMReg< 1, "d1", [S2, S3]>; def D2 : ARMReg< 2, "d2", [S4, S5]>; def D3 : ARMReg< 3, "d3", [S6, S7]>; def D4 : ARMReg< 4, "d4", [S8, S9]>; @@ -78,18 +79,18 @@ def D14 : ARMReg<14, "d14", [S28, S29]>; def D15 : ARMReg<15, "d15", [S30, S31]>; // VFP3 defines 16 additional double registers -def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d16">; -def D18 : ARMFReg<18, "d16">; def D19 : ARMFReg<19, "d16">; -def D20 : ARMFReg<20, "d16">; def D21 : ARMFReg<21, "d16">; -def D22 : ARMFReg<22, "d16">; def D23 : ARMFReg<23, "d16">; -def D24 : ARMFReg<24, "d16">; def D25 : ARMFReg<25, "d16">; -def D26 : ARMFReg<26, "d16">; def D27 : ARMFReg<27, "d16">; -def D28 : ARMFReg<28, "d16">; def D29 : ARMFReg<29, "d16">; -def D30 : ARMFReg<30, "d16">; def D31 : ARMFReg<31, "d16">; +def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">; +def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">; +def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">; +def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">; +def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">; +def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">; +def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">; +def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">; // Advanced SIMD (NEON) defines 16 quad-word aliases def Q0 : ARMReg< 0, "q0", [D0, D1]>; -def Q1 : ARMReg< 1, "q1", [D2, D3]>; +def Q1 : ARMReg< 1, "q1", [D2, D3]>; def Q2 : ARMReg< 2, "q2", [D4, D5]>; def Q3 : ARMReg< 3, "q3", [D6, D7]>; def Q4 : ARMReg< 4, "q4", [D8, D9]>; @@ -106,7 +107,9 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; // Current Program Status Register. -def CPSR : ARMReg<0, "cpsr">; +def CPSR : ARMReg<0, "cpsr">; + +def FPSCR : ARMReg<1, "fpscr">; // Register classes. // @@ -158,6 +161,13 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, ARM::R4, ARM::R5, ARM::R6, ARM::R8, ARM::R10,ARM::R11, ARM::R7 }; + // FP is R7, R9 is available as callee-saved register. + // This is used by non-Darwin platform in Thumb mode. + static const unsigned ARM_GPR_AO_5[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R12,ARM::LR, + ARM::R4, ARM::R5, ARM::R6, + ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 }; GPRClass::iterator GPRClass::allocation_order_begin(const MachineFunction &MF) const { @@ -171,6 +181,8 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, } else { if (Subtarget.isR9Reserved()) return ARM_GPR_AO_2; + else if (Subtarget.isThumb()) + return ARM_GPR_AO_5; else return ARM_GPR_AO_1; } @@ -191,6 +203,8 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, } else { if (Subtarget.isR9Reserved()) I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned)); + else if (Subtarget.isThumb()) + I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned)); else I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned)); } @@ -240,32 +254,45 @@ def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31]>; +// Subset of SPR which can be used as a source of NEON scalars for 16-bit +// operations +def SPR_8 : RegisterClass<"ARM", [f32], 32, + [S0, S1, S2, S3, S4, S5, S6, S7, + S8, S9, S10, S11, S12, S13, S14, S15]>; + +// Dummy f32 regclass to represent impossible subreg indices. +def SPR_INVALID : RegisterClass<"ARM", [f32], 32, [SDummy]> { + let CopyCost = -1; +} + // Scalar double precision floating point / generic 64-bit vector register // class. // ARM requires only word alignment for double. It's more performant if it // is double-word alignment though. def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15]> { - let SubRegClassList = [SPR, SPR]; + D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31]> { + let SubRegClassList = [SPR_INVALID, SPR_INVALID]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ // VFP2 - static const unsigned ARM_DPR_VFP2[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10, ARM::D11, + static const unsigned ARM_DPR_VFP2[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10, ARM::D11, ARM::D12, ARM::D13, ARM::D14, ARM::D15 }; // VFP3 static const unsigned ARM_DPR_VFP3[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10, ARM::D11, + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10, ARM::D11, ARM::D12, ARM::D13, ARM::D14, ARM::D15, - ARM::D16, ARM::D17, ARM::D18, ARM::D15, + ARM::D16, ARM::D17, ARM::D18, ARM::D19, ARM::D20, ARM::D21, ARM::D22, ARM::D23, ARM::D24, ARM::D25, ARM::D26, ARM::D27, ARM::D28, ARM::D29, ARM::D30, ARM::D31 }; @@ -290,11 +317,34 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, }]; } +// Subset of DPR that are accessible with VFP2 (and so that also have +// 32-bit SPR subregs). +def DPR_VFP2 : RegisterClass<"ARM", [f64, v2i32, v2f32], 64, + [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15]> { + let SubRegClassList = [SPR, SPR]; +} + +// Subset of DPR which can be used as a source of NEON scalars for 16-bit +// operations +def DPR_8 : RegisterClass<"ARM", [f64, v4i16, v2f32], 64, + [D0, D1, D2, D3, D4, D5, D6, D7]> { + let SubRegClassList = [SPR_8, SPR_8]; +} + // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { - let SubRegClassList = [SPR, SPR, SPR, SPR, DPR, DPR]; + let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID, + DPR, DPR]; +} + +// Subset of QPR that have 32-bit SPR subregs. +def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> { + let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2]; } // Condition code registers. @@ -341,4 +391,3 @@ def : SubRegSet<6, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], [D1, D3, D5, D7, D9, D11, D13, D15, D17, D19, D21, D23, D25, D27, D29, D31]>; - diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 75fa707f9f93d..fc4c5f5830b00 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -10,26 +10,151 @@ //===----------------------------------------------------------------------===// // Functional units across ARM processors // -def FU_iALU : FuncUnit; // Integer alu unit -def FU_iLdSt : FuncUnit; // Integer load / store unit -def FU_FpALU : FuncUnit; // FP alu unit -def FU_FpLdSt : FuncUnit; // FP load / store unit -def FU_Br : FuncUnit; // Branch unit +def FU_Issue : FuncUnit; // issue +def FU_Pipe0 : FuncUnit; // pipeline 0 +def FU_Pipe1 : FuncUnit; // pipeline 1 +def FU_LdSt0 : FuncUnit; // pipeline 0 load/store +def FU_LdSt1 : FuncUnit; // pipeline 1 load/store +def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe +def FU_NLSPipe : FuncUnit; // NEON LS pipe //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM // -def IIC_iALU : InstrItinClass; -def IIC_iLoad : InstrItinClass; -def IIC_iStore : InstrItinClass; -def IIC_fpALU : InstrItinClass; -def IIC_fpLoad : InstrItinClass; -def IIC_fpStore : InstrItinClass; -def IIC_Br : InstrItinClass; +def IIC_iALUx : InstrItinClass; +def IIC_iALUi : InstrItinClass; +def IIC_iALUr : InstrItinClass; +def IIC_iALUsi : InstrItinClass; +def IIC_iALUsr : InstrItinClass; +def IIC_iUNAr : InstrItinClass; +def IIC_iUNAsi : InstrItinClass; +def IIC_iUNAsr : InstrItinClass; +def IIC_iCMPi : InstrItinClass; +def IIC_iCMPr : InstrItinClass; +def IIC_iCMPsi : InstrItinClass; +def IIC_iCMPsr : InstrItinClass; +def IIC_iMOVi : InstrItinClass; +def IIC_iMOVr : InstrItinClass; +def IIC_iMOVsi : InstrItinClass; +def IIC_iMOVsr : InstrItinClass; +def IIC_iCMOVi : InstrItinClass; +def IIC_iCMOVr : InstrItinClass; +def IIC_iCMOVsi : InstrItinClass; +def IIC_iCMOVsr : InstrItinClass; +def IIC_iMUL16 : InstrItinClass; +def IIC_iMAC16 : InstrItinClass; +def IIC_iMUL32 : InstrItinClass; +def IIC_iMAC32 : InstrItinClass; +def IIC_iMUL64 : InstrItinClass; +def IIC_iMAC64 : InstrItinClass; +def IIC_iLoadi : InstrItinClass; +def IIC_iLoadr : InstrItinClass; +def IIC_iLoadsi : InstrItinClass; +def IIC_iLoadiu : InstrItinClass; +def IIC_iLoadru : InstrItinClass; +def IIC_iLoadsiu : InstrItinClass; +def IIC_iLoadm : InstrItinClass; +def IIC_iStorei : InstrItinClass; +def IIC_iStorer : InstrItinClass; +def IIC_iStoresi : InstrItinClass; +def IIC_iStoreiu : InstrItinClass; +def IIC_iStoreru : InstrItinClass; +def IIC_iStoresiu : InstrItinClass; +def IIC_iStorem : InstrItinClass; +def IIC_Br : InstrItinClass; +def IIC_fpSTAT : InstrItinClass; +def IIC_fpUNA32 : InstrItinClass; +def IIC_fpUNA64 : InstrItinClass; +def IIC_fpCMP32 : InstrItinClass; +def IIC_fpCMP64 : InstrItinClass; +def IIC_fpCVTSD : InstrItinClass; +def IIC_fpCVTDS : InstrItinClass; +def IIC_fpCVTIS : InstrItinClass; +def IIC_fpCVTID : InstrItinClass; +def IIC_fpCVTSI : InstrItinClass; +def IIC_fpCVTDI : InstrItinClass; +def IIC_fpALU32 : InstrItinClass; +def IIC_fpALU64 : InstrItinClass; +def IIC_fpMUL32 : InstrItinClass; +def IIC_fpMUL64 : InstrItinClass; +def IIC_fpMAC32 : InstrItinClass; +def IIC_fpMAC64 : InstrItinClass; +def IIC_fpDIV32 : InstrItinClass; +def IIC_fpDIV64 : InstrItinClass; +def IIC_fpSQRT32 : InstrItinClass; +def IIC_fpSQRT64 : InstrItinClass; +def IIC_fpLoad32 : InstrItinClass; +def IIC_fpLoad64 : InstrItinClass; +def IIC_fpLoadm : InstrItinClass; +def IIC_fpStore32 : InstrItinClass; +def IIC_fpStore64 : InstrItinClass; +def IIC_fpStorem : InstrItinClass; +def IIC_VLD1 : InstrItinClass; +def IIC_VLD2 : InstrItinClass; +def IIC_VLD3 : InstrItinClass; +def IIC_VLD4 : InstrItinClass; +def IIC_VST : InstrItinClass; +def IIC_VUNAD : InstrItinClass; +def IIC_VUNAQ : InstrItinClass; +def IIC_VBIND : InstrItinClass; +def IIC_VBINQ : InstrItinClass; +def IIC_VMOVImm : InstrItinClass; +def IIC_VMOVD : InstrItinClass; +def IIC_VMOVQ : InstrItinClass; +def IIC_VMOVIS : InstrItinClass; +def IIC_VMOVID : InstrItinClass; +def IIC_VMOVISL : InstrItinClass; +def IIC_VMOVSI : InstrItinClass; +def IIC_VMOVDI : InstrItinClass; +def IIC_VPERMD : InstrItinClass; +def IIC_VPERMQ : InstrItinClass; +def IIC_VPERMQ3 : InstrItinClass; +def IIC_VMACD : InstrItinClass; +def IIC_VMACQ : InstrItinClass; +def IIC_VRECSD : InstrItinClass; +def IIC_VRECSQ : InstrItinClass; +def IIC_VCNTiD : InstrItinClass; +def IIC_VCNTiQ : InstrItinClass; +def IIC_VUNAiD : InstrItinClass; +def IIC_VUNAiQ : InstrItinClass; +def IIC_VQUNAiD : InstrItinClass; +def IIC_VQUNAiQ : InstrItinClass; +def IIC_VBINiD : InstrItinClass; +def IIC_VBINiQ : InstrItinClass; +def IIC_VSUBiD : InstrItinClass; +def IIC_VSUBiQ : InstrItinClass; +def IIC_VBINi4D : InstrItinClass; +def IIC_VBINi4Q : InstrItinClass; +def IIC_VSHLiD : InstrItinClass; +def IIC_VSHLiQ : InstrItinClass; +def IIC_VSHLi4D : InstrItinClass; +def IIC_VSHLi4Q : InstrItinClass; +def IIC_VPALiD : InstrItinClass; +def IIC_VPALiQ : InstrItinClass; +def IIC_VMULi16D : InstrItinClass; +def IIC_VMULi32D : InstrItinClass; +def IIC_VMULi16Q : InstrItinClass; +def IIC_VMULi32Q : InstrItinClass; +def IIC_VMACi16D : InstrItinClass; +def IIC_VMACi32D : InstrItinClass; +def IIC_VMACi16Q : InstrItinClass; +def IIC_VMACi32Q : InstrItinClass; +def IIC_VEXTD : InstrItinClass; +def IIC_VEXTQ : InstrItinClass; +def IIC_VTB1 : InstrItinClass; +def IIC_VTB2 : InstrItinClass; +def IIC_VTB3 : InstrItinClass; +def IIC_VTB4 : InstrItinClass; +def IIC_VTBX1 : InstrItinClass; +def IIC_VTBX2 : InstrItinClass; +def IIC_VTBX3 : InstrItinClass; +def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. def GenericItineraries : ProcessorItineraries<[]>; + include "ARMScheduleV6.td" +include "ARMScheduleV7.td" diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 596a57f8aefd5..1ace718c9e178 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -1,4 +1,4 @@ -//===- ARMSchedule.td - ARM v6 Scheduling Definitions ------*- tablegen -*-===// +//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -11,12 +11,4 @@ // //===----------------------------------------------------------------------===// -def V6Itineraries : ProcessorItineraries<[ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> -]>; +// TODO: Add model for an ARM11 diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td new file mode 100644 index 0000000000000..e56581395237e --- /dev/null +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -0,0 +1,587 @@ +//===- ARMScheduleV7.td - ARM v7 Scheduling Definitions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM v7 processors. +// +//===----------------------------------------------------------------------===// + +// +// Scheduling information derived from "Cortex-A8 Technical Reference Manual". +// +// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 +// +def CortexA8Itineraries : ProcessorItineraries<[ + + // Two fully-pipelined integer ALU pipelines + // + // No operand cycles + InstrItinData]>, + // + // Binary Instructions that produce a result + InstrItinData], [2, 2]>, + InstrItinData], [2, 2, 2]>, + InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1, 1]>, + // + // Unary Instructions that produce a result + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, + // + // Compare instructions + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, + // + // Move instructions, unconditional + InstrItinData], [1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1, 1]>, + // + // Move instructions, conditional + InstrItinData], [2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, + + // Integer multiply pipeline + // Result written in E5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + // + InstrItinData], [5, 1, 1]>, + InstrItinData, + InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData, + InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>, + InstrItinData, + InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData, + InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData, + InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, + + // Integer load pipeline + // + // loads have an extra cycle of latency, but are fully pipelined + // use FU_Issue to enforce the 1 load/store per cycle limit + // + // Immediate offset + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 1]>, + // + // Register offset + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData, + InstrStage<1, [FU_Pipe0], 0>, + InstrStage<1, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>, + // + // Immediate offset with update + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>, + // + // Register offset with update + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData, + InstrStage<1, [FU_Pipe0], 0>, + InstrStage<1, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>, + // + // Load multiple + InstrItinData, + InstrStage<2, [FU_Pipe0], 0>, + InstrStage<2, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>]>, + + // Integer store pipeline + // + // use FU_Issue to enforce the 1 load/store per cycle limit + // + // Immediate offset + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 1]>, + // + // Register offset + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData, + InstrStage<1, [FU_Pipe0], 0>, + InstrStage<1, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, + // + // Immediate offset with update + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>, + // + // Register offset with update + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData, + InstrStage<1, [FU_Pipe0], 0>, + InstrStage<1, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>, + // + // Store multiple + InstrItinData, + InstrStage<2, [FU_Pipe0], 0>, + InstrStage<2, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0]>]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData]>, + + // VFP + // Issue through integer pipeline, and execute in NEON unit. We assume + // RunFast mode so that NFP pipeline is used for single-precision when + // possible. + // + // FP Special Register to Integer Register File Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>]>, + // + // Single-precision FP Unary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP Unary + InstrItinData, + InstrStage<4, [FU_NPipe], 0>, + InstrStage<4, [FU_NLSPipe]>]>, + // + // Single-precision FP Compare + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP Compare + InstrItinData, + InstrStage<4, [FU_NPipe], 0>, + InstrStage<4, [FU_NLSPipe]>]>, + // + // Single to Double FP Convert + InstrItinData, + InstrStage<7, [FU_NPipe], 0>, + InstrStage<7, [FU_NLSPipe]>]>, + // + // Double to Single FP Convert + InstrItinData, + InstrStage<5, [FU_NPipe], 0>, + InstrStage<5, [FU_NLSPipe]>]>, + // + // Single-Precision FP to Integer Convert + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData, + InstrStage<8, [FU_NPipe], 0>, + InstrStage<8, [FU_NLSPipe]>]>, + // + // Integer to Single-Precision FP Convert + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData, + InstrStage<8, [FU_NPipe], 0>, + InstrStage<8, [FU_NLSPipe]>]>, + // + // Single-precision FP ALU + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP ALU + InstrItinData, + InstrStage<9, [FU_NPipe], 0>, + InstrStage<9, [FU_NLSPipe]>]>, + // + // Single-precision FP Multiply + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP Multiply + InstrItinData, + InstrStage<11, [FU_NPipe], 0>, + InstrStage<11, [FU_NLSPipe]>]>, + // + // Single-precision FP MAC + InstrItinData, + InstrStage<1, [FU_NPipe]>], [7, 1]>, + // + // Double-precision FP MAC + InstrItinData, + InstrStage<19, [FU_NPipe], 0>, + InstrStage<19, [FU_NLSPipe]>]>, + // + // Single-precision FP DIV + InstrItinData, + InstrStage<20, [FU_NPipe], 0>, + InstrStage<20, [FU_NLSPipe]>]>, + // + // Double-precision FP DIV + InstrItinData, + InstrStage<29, [FU_NPipe], 0>, + InstrStage<29, [FU_NLSPipe]>]>, + // + // Single-precision FP SQRT + InstrItinData, + InstrStage<19, [FU_NPipe], 0>, + InstrStage<19, [FU_NLSPipe]>]>, + // + // Double-precision FP SQRT + InstrItinData, + InstrStage<29, [FU_NPipe], 0>, + InstrStage<29, [FU_NLSPipe]>]>, + // + // Single-precision FP Load + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // Double-precision FP Load + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0], 0>, + InstrStage<1, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // FP Load Multiple + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [FU_Pipe0], 0>, + InstrStage<2, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // Single-precision FP Store + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // Double-precision FP Store + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<1, [FU_Pipe0], 0>, + InstrStage<1, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // FP Store Multiple + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, + InstrStage<2, [FU_Pipe0], 0>, + InstrStage<2, [FU_Pipe1]>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + + // NEON + // Issue through integer pipeline, and execute in NEON unit. + // + // VLD1 + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // VLD2 + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>, + // + // VLD3 + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>, + // + // VLD4 + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>, + // + // VST + InstrItinData, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_LdSt0], 0>, + InstrStage<1, [FU_NLSPipe]>]>, + // + // Double-register FP Unary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [5, 2]>, + // + // Quad-register FP Unary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData, + InstrStage<2, [FU_NPipe]>], [6, 2]>, + // + // Double-register FP Binary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [5, 2, 2]>, + // + // Quad-register FP Binary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData, + InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, + // + // Move Immediate + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3]>, + // + // Double-register Permute Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + // + // Quad-register Permute Move + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1]>, + // + // Integer to Single-precision Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [20, 1]>, + // + // Double-precision to Integer Move + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>, + // + // Integer to Lane Move + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + // + // Double-register Permute + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>, + // + // Quad-register Permute + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>, + // + // Quad-register Permute (3 cycle issue) + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData, + InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData, + InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>, + // + // Double-register Reciprical Step + InstrItinData, + InstrStage<1, [FU_NPipe]>], [9, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData, + InstrStage<2, [FU_NPipe]>], [10, 2, 2]>, + // + // Double-register Integer Count + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Count + // Result written in N3, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData, + InstrStage<2, [FU_NPipe]>], [4, 2, 2]>, + // + // Double-register Integer Unary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [4, 2]>, + // + // Quad-register Integer Unary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [4, 2]>, + // + // Double-register Integer Q-Unary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [4, 1]>, + // + // Double-register Integer Binary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Binary + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData, + InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData, + InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, + // + // Double-register Integer Subtract + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, + // + // Double-register Integer Shift + InstrItinData, + InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData, + InstrStage<2, [FU_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData, + InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData, + InstrStage<2, [FU_NPipe]>], [5, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData, + InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData, + InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, + // + // Double-register Integer Multiply (.8, .16) + InstrItinData, + InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, + // + // Double-register Integer Multiply (.32) + InstrItinData, + InstrStage<2, [FU_NPipe]>], [7, 2, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData, + InstrStage<2, [FU_NPipe]>], [7, 2, 2]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData, + InstrStage<1, [FU_NPipe]>, + InstrStage<2, [FU_NLSPipe], 0>, + InstrStage<3, [FU_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData, + InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData, + InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData, + InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData, + InstrStage<1, [FU_NPipe]>, + InstrStage<2, [FU_NLSPipe], 0>, + InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>, + // + // Double-register VEXT + InstrItinData, + InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>, + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>, + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>, + InstrItinData, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> +]>; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e611088574137..cf1ee3f029532 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -13,8 +13,7 @@ #include "ARMSubtarget.h" #include "ARMGenSubtarget.inc" -#include "llvm/Module.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/GlobalValue.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -22,13 +21,19 @@ using namespace llvm; static cl::opt ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); +static cl::opt +UseNEONFP("arm-use-neon-fp", + cl::desc("Use NEON for single-precision FP"), + cl::init(false), cl::Hidden); -ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, +ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb) : ARMArchVersion(V4T) , ARMFPUType(None) + , UseNEONForSinglePrecisionFP(UseNEONFP) , IsThumb(isThumb) , ThumbMode(Thumb1) + , PostRAScheduler(false) , IsR9Reserved(ReserveR9) , stackAlignment(4) , CPUString("generic") @@ -45,7 +50,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. - const std::string& TT = M.getTargetTriple(); unsigned Len = TT.length(); unsigned Idx = 0; @@ -75,14 +79,14 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, } } + // Thumb2 implies at least V6T2. + if (ARMArchVersion < V6T2 && ThumbMode >= Thumb2) + ARMArchVersion = V6T2; + if (Len >= 10) { if (TT.find("-darwin") != std::string::npos) // arm-darwin TargetType = isDarwin; - } else if (TT.empty()) { -#if defined(__APPLE__) - TargetType = isDarwin; -#endif } if (TT.find("eabi") != std::string::npos) @@ -93,4 +97,61 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, if (isTargetDarwin()) IsR9Reserved = ReserveR9 | (ARMArchVersion < V6); + + // Set CPU specific features. + if (CPUString == "cortex-a8") { + PostRAScheduler = true; + if (UseNEONFP.getPosition() == 0) + UseNEONForSinglePrecisionFP = true; + } +} + +/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. +bool +ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const { + if (RelocM == Reloc::Static) + return false; + + // GV with ghost linkage (in JIT lazy compilation mode) do not require an + // extra load from stub. + bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + + if (!isTargetDarwin()) { + // Extra load is needed for all externally visible. + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + return false; + return true; + } else { + if (RelocM == Reloc::PIC_) { + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (!isDecl && !GV->isWeakForLinker()) + return false; + + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return true; + + // If symbol visibility is hidden, we have a stub for common symbol + // references and external declarations. + if (isDecl || GV->hasCommonLinkage()) + // Hidden $non_lazy_ptr reference. + return true; + + return false; + } else { + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (!isDecl && !GV->isWeakForLinker()) + return false; + + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return true; + } + } + + return false; } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 5110b3157c482..7098fd4f36ba5 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -15,11 +15,12 @@ #define ARMSUBTARGET_H #include "llvm/Target/TargetInstrItineraries.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtarget.h" #include namespace llvm { -class Module; +class GlobalValue; class ARMSubtarget : public TargetSubtarget { protected: @@ -43,12 +44,20 @@ protected: /// ARMFPUType - Floating Point Unit type. ARMFPEnum ARMFPUType; + /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been + /// specified. Use the method useNEONForSinglePrecisionFP() to + /// determine if NEON should actually be used. + bool UseNEONForSinglePrecisionFP; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; /// ThumbMode - Indicates supported Thumb version. ThumbTypeEnum ThumbMode; + /// PostRAScheduler - True if using post-register-allocation scheduler. + bool PostRAScheduler; + /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; @@ -61,7 +70,7 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; - + public: enum { isELF, isDarwin @@ -73,9 +82,9 @@ protected: } TargetABI; /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - ARMSubtarget(const Module &M, const std::string &FS, bool isThumb); + ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -99,6 +108,8 @@ protected: bool hasVFP2() const { return ARMFPUType >= VFPv2; } bool hasVFP3() const { return ARMFPUType >= VFPv3; } bool hasNEON() const { return ARMFPUType >= NEON; } + bool useNEONForSinglePrecisionFP() const { + return hasNEON() && UseNEONForSinglePrecisionFP; } bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } @@ -108,14 +119,18 @@ protected: bool isThumb() const { return IsThumb; } bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); } - bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } + bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); } bool hasThumb2() const { return ThumbMode >= Thumb2; } bool isR9Reserved() const { return IsR9Reserved; } const std::string & getCPUString() const { return CPUString; } + + /// enablePostRAScheduler - From TargetSubtarget, return true to + /// enable post-RA scheduler. + bool enablePostRAScheduler() const { return PostRAScheduler; } - /// getInstrItins - Return the instruction itineraies based on subtarget + /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } @@ -123,6 +138,10 @@ protected: /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. unsigned getStackAlignment() const { return stackAlignment; } + + /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect + /// symbol. + bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 23447332198da..32ddc20a56041 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,188 +11,122 @@ //===----------------------------------------------------------------------===// #include "ARMTargetMachine.h" -#include "ARMTargetAsmInfo.h" +#include "ARMMCAsmInfo.h" #include "ARMFrameInfo.h" #include "ARM.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -static cl::opt DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, - cl::desc("Disable load store optimization pass")); -static cl::opt DisableIfConversion("disable-arm-if-conversion",cl::Hidden, - cl::desc("Disable if-conversion pass")); - -/// ARMTargetMachineModule - Note that this is used on hosts that cannot link -/// in a library unless there are references into the library. In particular, -/// it seems that it is not possible to get things to work on Win32 without -/// this. Though it is unused, do not remove it. -extern "C" int ARMTargetMachineModule; -int ARMTargetMachineModule = 0; - -// Register the target. -static RegisterTarget X("arm", "ARM"); -static RegisterTarget Y("thumb", "Thumb"); - -// Force static initialization. -extern "C" void LLVMInitializeARMTarget() { } - -// No assembler printer by default -ARMBaseTargetMachine::AsmPrinterCtorFn ARMBaseTargetMachine::AsmPrinterCtor = 0; - -/// ThumbTargetMachine - Create an Thumb architecture model. -/// -unsigned ThumbTargetMachine::getJITMatchQuality() { -#if defined(__thumb__) - return 10; -#endif - return 0; +static const MCAsmInfo *createMCAsmInfo(const Target &T, + const StringRef &TT) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + case Triple::Darwin: + return new ARMMCAsmInfoDarwin(); + default: + return new ARMELFMCAsmInfo(); + } } -unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) { - std::string TT = M.getTargetTriple(); - // Match thumb-foo-bar, as well as things like thumbv5blah-* - if (TT.size() >= 6 && - (TT.substr(0, 6) == "thumb-" || TT.substr(0, 6) == "thumbv")) - return 20; - // If the target triple is something non-thumb, we don't match. - if (!TT.empty()) return 0; +extern "C" void LLVMInitializeARMTarget() { + // Register the target. + RegisterTargetMachine X(TheARMTarget); + RegisterTargetMachine Y(TheThumbTarget); - if (M.getEndianness() == Module::LittleEndian && - M.getPointerSize() == Module::Pointer32) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target - - return getJITMatchQuality()/2; + // Register the target asm info. + RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo); + RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo); } /// TargetMachine ctor - Create an ARM architecture model. /// -ARMBaseTargetMachine::ARMBaseTargetMachine(const Module &M, +ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, + const std::string &TT, const std::string &FS, bool isThumb) - : Subtarget(M, FS, isThumb), + : LLVMTargetMachine(T, TT), + Subtarget(TT, FS, isThumb), FrameInfo(Subtarget), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { DefRelocModel = getRelocationModel(); } -ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS) - : ARMBaseTargetMachine(M, FS, false), InstrInfo(Subtarget), +ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32") : std::string("e-p:32:32-f64:64:64-i64:64:64")), TLInfo(*this) { } -ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS) - : ARMBaseTargetMachine(M, FS, true), +ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : ARMBaseTargetMachine(T, TT, FS, true), + InstrInfo(Subtarget.hasThumb2() + ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) + : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-" "i16:16:32-i8:8:32-i1:8:32-a:0:32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-a:0:32")), TLInfo(*this) { - // Create the approriate type of Thumb InstrInfo - if (Subtarget.hasThumb2()) - InstrInfo = new Thumb2InstrInfo(Subtarget); - else - InstrInfo = new Thumb1InstrInfo(Subtarget); -} - -unsigned ARMTargetMachine::getJITMatchQuality() { -#if defined(__arm__) - return 10; -#endif - return 0; -} - -unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) { - std::string TT = M.getTargetTriple(); - // Match arm-foo-bar, as well as things like armv5blah-* - if (TT.size() >= 4 && - (TT.substr(0, 4) == "arm-" || TT.substr(0, 4) == "armv")) - return 20; - // If the target triple is something non-arm, we don't match. - if (!TT.empty()) return 0; - - if (M.getEndianness() == Module::LittleEndian && - M.getPointerSize() == Module::Pointer32) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target - - return getJITMatchQuality()/2; } -const TargetAsmInfo *ARMBaseTargetMachine::createTargetAsmInfo() const { - switch (Subtarget.TargetType) { - case ARMSubtarget::isDarwin: - return new ARMDarwinTargetAsmInfo(*this); - case ARMSubtarget::isELF: - return new ARMELFTargetAsmInfo(*this); - default: - return new ARMGenericTargetAsmInfo(*this); - } -} - // Pass Pipeline Configuration bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - PM.add(createARMISelDag(*this)); + PM.add(createARMISelDag(*this, OptLevel)); return false; } bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - // FIXME: temporarily disabling load / store optimization pass for Thumb mode. - if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) + if (Subtarget.hasNEON()) + PM.add(createNEONPreAllocPass()); + + // FIXME: temporarily disabling load / store optimization pass for Thumb1. + if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); return true; } -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - // FIXME: temporarily disabling load / store optimization pass for Thumb mode. - if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) +bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // FIXME: temporarily disabling load / store optimization pass for Thumb1. + if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); - if (OptLevel != CodeGenOpt::None && - !DisableIfConversion && !Subtarget.isThumb()) - PM.add(createIfConverterPass()); - - PM.add(createARMConstantIslandPass()); return true; } -bool ARMBaseTargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { - // Output assembly language. - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, Verbose)); +bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // FIXME: temporarily disabling load / store optimization pass for Thumb1. + if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) + PM.add(createIfConverterPass()); + + if (Subtarget.isThumb2()) { + PM.add(createThumb2ITBlockPass()); + PM.add(createThumb2SizeReductionPass()); + } - return false; + PM.add(createARMConstantIslandPass()); + return true; } - bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { // FIXME: Move this to TargetJITInfo! if (DefRelocModel == Reloc::Default) @@ -200,18 +134,11 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, // Machine code emitter pass for ARM. PM.add(createARMCodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } - return false; } bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { // FIXME: Move this to TargetJITInfo! if (DefRelocModel == Reloc::Default) @@ -219,43 +146,42 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } + return false; +} + +bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + // FIXME: Move this to TargetJITInfo! + if (DefRelocModel == Reloc::Default) + setRelocationModel(Reloc::Static); + // Machine code emitter pass for ARM. + PM.add(createARMObjectCodeEmitterPass(*this, OCE)); return false; } bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { // Machine code emitter pass for ARM. PM.add(createARMCodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } - return false; } bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } - return false; } +bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + // Machine code emitter pass for ARM. + PM.add(createARMObjectCodeEmitterPass(*this, OCE)); + return false; +} diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index a0df54d6d5282..71a53488f164c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -16,7 +16,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameInfo.h" #include "ARMInstrInfo.h" #include "ARMFrameInfo.h" #include "ARMJITInfo.h" @@ -27,8 +26,6 @@ namespace llvm { -class Module; - class ARMBaseTargetMachine : public LLVMTargetMachine { protected: ARMSubtarget Subtarget; @@ -39,16 +36,9 @@ private: InstrItineraryData InstrItins; Reloc::Model DefRelocModel; // Reloc model before it's overridden. -protected: - // To avoid having target depend on the asmprinter stuff libraries, asmprinter - // set this functions to ctor pointer at startup time if they are linked in. - typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, - ARMBaseTargetMachine &tm, - bool verbose); - static AsmPrinterCtorFn AsmPrinterCtor; - public: - ARMBaseTargetMachine(const Module &M, const std::string &FS, bool isThumb); + ARMBaseTargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool isThumb); virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual ARMJITInfo *getJITInfo() { return &JITInfo; } @@ -57,34 +47,26 @@ public: return InstrItins; } - static void registerAsmPrinter(AsmPrinterCtorFn F) { - AsmPrinterCtor = F; - } - - static unsigned getModuleMatchQuality(const Module &M); - static unsigned getJITMatchQuality(); - - virtual const TargetAsmInfo *createTargetAsmInfo() const; - // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); + MachineCodeEmitter &MCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + JITCodeEmitter &MCE); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &MCE); + ObjectCodeEmitter &OCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &MCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE); }; /// ARMTargetMachine - ARM target machine. @@ -94,7 +76,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine { const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; public: - ARMTargetMachine(const Module &M, const std::string &FS); + ARMTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); virtual const ARMRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -106,9 +89,6 @@ public: virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } - - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); }; /// ThumbTargetMachine - Thumb target machine. @@ -120,7 +100,8 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; public: - ThumbTargetMachine(const Module &M, const std::string &FS); + ThumbTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); /// returns either Thumb1RegisterInfo of Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { @@ -134,9 +115,6 @@ public: /// returns either Thumb1InstrInfo or Thumb2InstrInfo virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } - - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h new file mode 100644 index 0000000000000..9703403db22a4 --- /dev/null +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -0,0 +1,39 @@ +//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H +#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H + +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + + class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF { + public: + ARMElfTargetObjectFile() : TargetLoweringObjectFileELF() {} + + void Initialize(MCContext &Ctx, const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + if (TM.getSubtarget().isAAPCS_ABI()) { + StaticCtorSection = + getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + StaticDtorSection = + getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + } + } + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp new file mode 100644 index 0000000000000..7438ea9c79f3d --- /dev/null +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -0,0 +1,618 @@ +//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmLexer.h" +#include "llvm/MC/MCAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + +namespace { +struct ARMOperand; + +// The shift types for register controlled shifts in arm memory addressing +enum ShiftType { + Lsl, + Lsr, + Asr, + Ror, + Rrx +}; + +class ARMAsmParser : public TargetAsmParser { + MCAsmParser &Parser; + +private: + MCAsmParser &getParser() const { return Parser; } + + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + + bool ParseRegister(ARMOperand &Op); + + bool ParseRegisterList(ARMOperand &Op); + + bool ParseMemory(ARMOperand &Op); + + bool ParseShift(enum ShiftType *St, const MCExpr *&ShiftAmount); + + bool ParseOperand(ARMOperand &Op); + + bool ParseDirectiveWord(unsigned Size, SMLoc L); + + // TODO - For now hacked versions of the next two are in here in this file to + // allow some parser testing until the table gen versions are implemented. + + /// @name Auto-generated Match Functions + /// { + bool MatchInstruction(SmallVectorImpl &Operands, + MCInst &Inst); + + /// MatchRegisterName - Match the given string to a register name and return + /// its register number, or -1 if there is no match. To allow return values + /// to be used directly in register lists, arm registers have values between + /// 0 and 15. + int MatchRegisterName(const StringRef &Name); + + /// } + + +public: + ARMAsmParser(const Target &T, MCAsmParser &_Parser) + : TargetAsmParser(T), Parser(_Parser) {} + + virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + + virtual bool ParseDirective(AsmToken DirectiveID); +}; + +} // end anonymous namespace + +namespace { + +/// ARMOperand - Instances of this class represent a parsed ARM machine +/// instruction. +struct ARMOperand { + enum { + Token, + Register, + Immediate, + Memory + } Kind; + + + union { + struct { + const char *Data; + unsigned Length; + } Tok; + + struct { + unsigned RegNum; + bool Writeback; + } Reg; + + struct { + const MCExpr *Val; + } Imm; + + // This is for all forms of ARM address expressions + struct { + unsigned BaseRegNum; + bool OffsetIsReg; + const MCExpr *Offset; // used when OffsetIsReg is false + unsigned OffsetRegNum; // used when OffsetIsReg is true + bool OffsetRegShifted; // only used when OffsetIsReg is true + enum ShiftType ShiftType; // used when OffsetRegShifted is true + const MCExpr *ShiftAmount; // used when OffsetRegShifted is true + bool Preindexed; + bool Postindexed; + bool Negative; // only used when OffsetIsReg is true + bool Writeback; + } Mem; + + }; + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNum; + } + + const MCExpr *getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + bool isToken() const {return Kind == Token; } + + bool isReg() const { return Kind == Register; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + static ARMOperand CreateToken(StringRef Str) { + ARMOperand Res; + Res.Kind = Token; + Res.Tok.Data = Str.data(); + Res.Tok.Length = Str.size(); + return Res; + } + + static ARMOperand CreateReg(unsigned RegNum, bool Writeback) { + ARMOperand Res; + Res.Kind = Register; + Res.Reg.RegNum = RegNum; + Res.Reg.Writeback = Writeback; + return Res; + } + + static ARMOperand CreateImm(const MCExpr *Val) { + ARMOperand Res; + Res.Kind = Immediate; + Res.Imm.Val = Val; + return Res; + } + + static ARMOperand CreateMem(unsigned BaseRegNum, bool OffsetIsReg, + const MCExpr *Offset, unsigned OffsetRegNum, + bool OffsetRegShifted, enum ShiftType ShiftType, + const MCExpr *ShiftAmount, bool Preindexed, + bool Postindexed, bool Negative, bool Writeback) { + ARMOperand Res; + Res.Kind = Memory; + Res.Mem.BaseRegNum = BaseRegNum; + Res.Mem.OffsetIsReg = OffsetIsReg; + Res.Mem.Offset = Offset; + Res.Mem.OffsetRegNum = OffsetRegNum; + Res.Mem.OffsetRegShifted = OffsetRegShifted; + Res.Mem.ShiftType = ShiftType; + Res.Mem.ShiftAmount = ShiftAmount; + Res.Mem.Preindexed = Preindexed; + Res.Mem.Postindexed = Postindexed; + Res.Mem.Negative = Negative; + Res.Mem.Writeback = Writeback; + return Res; + } +}; + +} // end anonymous namespace. + +// Try to parse a register name. The token must be an Identifier when called, +// and if it is a register name a Reg operand is created, the token is eaten +// and false is returned. Else true is returned and no token is eaten. +// TODO this is likely to change to allow different register types and or to +// parse for a specific register type. +bool ARMAsmParser::ParseRegister(ARMOperand &Op) { + const AsmToken &Tok = getLexer().getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + + // FIXME: Validate register for the current architecture; we have to do + // validation later, so maybe there is no need for this here. + int RegNum; + + RegNum = MatchRegisterName(Tok.getString()); + if (RegNum == -1) + return true; + getLexer().Lex(); // Eat identifier token. + + bool Writeback = false; + const AsmToken &ExclaimTok = getLexer().getTok(); + if (ExclaimTok.is(AsmToken::Exclaim)) { + Writeback = true; + getLexer().Lex(); // Eat exclaim token + } + + Op = ARMOperand::CreateReg(RegNum, Writeback); + + return false; +} + +// Try to parse a register list. The first token must be a '{' when called +// for now. +bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { + assert(getLexer().getTok().is(AsmToken::LCurly) && + "Token is not an Left Curly Brace"); + getLexer().Lex(); // Eat left curly brace token. + + const AsmToken &RegTok = getLexer().getTok(); + SMLoc RegLoc = RegTok.getLoc(); + if (RegTok.isNot(AsmToken::Identifier)) + return Error(RegLoc, "register expected"); + int RegNum = MatchRegisterName(RegTok.getString()); + if (RegNum == -1) + return Error(RegLoc, "register expected"); + getLexer().Lex(); // Eat identifier token. + unsigned RegList = 1 << RegNum; + + int HighRegNum = RegNum; + // TODO ranges like "{Rn-Rm}" + while (getLexer().getTok().is(AsmToken::Comma)) { + getLexer().Lex(); // Eat comma token. + + const AsmToken &RegTok = getLexer().getTok(); + SMLoc RegLoc = RegTok.getLoc(); + if (RegTok.isNot(AsmToken::Identifier)) + return Error(RegLoc, "register expected"); + int RegNum = MatchRegisterName(RegTok.getString()); + if (RegNum == -1) + return Error(RegLoc, "register expected"); + + if (RegList & (1 << RegNum)) + Warning(RegLoc, "register duplicated in register list"); + else if (RegNum <= HighRegNum) + Warning(RegLoc, "register not in ascending order in register list"); + RegList |= 1 << RegNum; + HighRegNum = RegNum; + + getLexer().Lex(); // Eat identifier token. + } + const AsmToken &RCurlyTok = getLexer().getTok(); + if (RCurlyTok.isNot(AsmToken::RCurly)) + return Error(RCurlyTok.getLoc(), "'}' expected"); + getLexer().Lex(); // Eat left curly brace token. + + return false; +} + +// Try to parse an arm memory expression. It must start with a '[' token. +// TODO Only preindexing and postindexing addressing are started, unindexed +// with option, etc are still to do. +bool ARMAsmParser::ParseMemory(ARMOperand &Op) { + assert(getLexer().getTok().is(AsmToken::LBrac) && + "Token is not an Left Bracket"); + getLexer().Lex(); // Eat left bracket token. + + const AsmToken &BaseRegTok = getLexer().getTok(); + if (BaseRegTok.isNot(AsmToken::Identifier)) + return Error(BaseRegTok.getLoc(), "register expected"); + int BaseRegNum = MatchRegisterName(BaseRegTok.getString()); + if (BaseRegNum == -1) + return Error(BaseRegTok.getLoc(), "register expected"); + getLexer().Lex(); // Eat identifier token. + + bool Preindexed = false; + bool Postindexed = false; + bool OffsetIsReg = false; + bool Negative = false; + bool Writeback = false; + + // First look for preindexed address forms: + // [Rn, +/-Rm] + // [Rn, #offset] + // [Rn, +/-Rm, shift] + // that is after the "[Rn" we now have see if the next token is a comma. + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + Preindexed = true; + getLexer().Lex(); // Eat comma token. + + const AsmToken &NextTok = getLexer().getTok(); + if (NextTok.is(AsmToken::Plus)) + getLexer().Lex(); // Eat plus token. + else if (NextTok.is(AsmToken::Minus)) { + Negative = true; + getLexer().Lex(); // Eat minus token + } + + // See if there is a register following the "[Rn," we have so far. + const AsmToken &OffsetRegTok = getLexer().getTok(); + int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + bool OffsetRegShifted = false; + enum ShiftType ShiftType; + const MCExpr *ShiftAmount; + const MCExpr *Offset; + if (OffsetRegNum != -1) { + OffsetIsReg = true; + getLexer().Lex(); // Eat identifier token for the offset register. + // Look for a comma then a shift + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + getLexer().Lex(); // Eat comma token. + + const AsmToken &Tok = getLexer().getTok(); + if (ParseShift(&ShiftType, ShiftAmount)) + return Error(Tok.getLoc(), "shift expected"); + OffsetRegShifted = true; + } + } + else { // "[Rn," we have so far was not followed by "Rm" + // Look for #offset following the "[Rn," + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(Offset)) + return true; + } + const AsmToken &RBracTok = getLexer().getTok(); + if (RBracTok.isNot(AsmToken::RBrac)) + return Error(RBracTok.getLoc(), "']' expected"); + getLexer().Lex(); // Eat right bracket token. + + const AsmToken &ExclaimTok = getLexer().getTok(); + if (ExclaimTok.is(AsmToken::Exclaim)) { + Writeback = true; + getLexer().Lex(); // Eat exclaim token + } + Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, + OffsetRegShifted, ShiftType, ShiftAmount, + Preindexed, Postindexed, Negative, Writeback); + return false; + } + // The "[Rn" we have so far was not followed by a comma. + else if (Tok.is(AsmToken::RBrac)) { + // This is a post indexing addressing forms: + // [Rn], #offset + // [Rn], +/-Rm + // [Rn], +/-Rm, shift + // that is a ']' follows after the "[Rn". + Postindexed = true; + Writeback = true; + getLexer().Lex(); // Eat right bracket token. + + const AsmToken &CommaTok = getLexer().getTok(); + if (CommaTok.isNot(AsmToken::Comma)) + return Error(CommaTok.getLoc(), "',' expected"); + getLexer().Lex(); // Eat comma token. + + const AsmToken &NextTok = getLexer().getTok(); + if (NextTok.is(AsmToken::Plus)) + getLexer().Lex(); // Eat plus token. + else if (NextTok.is(AsmToken::Minus)) { + Negative = true; + getLexer().Lex(); // Eat minus token + } + + // See if there is a register following the "[Rn]," we have so far. + const AsmToken &OffsetRegTok = getLexer().getTok(); + int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + bool OffsetRegShifted = false; + enum ShiftType ShiftType; + const MCExpr *ShiftAmount; + const MCExpr *Offset; + if (OffsetRegNum != -1) { + OffsetIsReg = true; + getLexer().Lex(); // Eat identifier token for the offset register. + // Look for a comma then a shift + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + getLexer().Lex(); // Eat comma token. + + const AsmToken &Tok = getLexer().getTok(); + if (ParseShift(&ShiftType, ShiftAmount)) + return Error(Tok.getLoc(), "shift expected"); + OffsetRegShifted = true; + } + } + else { // "[Rn]," we have so far was not followed by "Rm" + // Look for #offset following the "[Rn]," + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(Offset)) + return true; + } + Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, + OffsetRegShifted, ShiftType, ShiftAmount, + Preindexed, Postindexed, Negative, Writeback); + return false; + } + + return true; +} + +/// ParseShift as one of these two: +/// ( lsl | lsr | asr | ror ) , # shift_amount +/// rrx +/// and returns true if it parses a shift otherwise it returns false. +bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *&ShiftAmount) { + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return true; + const StringRef &ShiftName = Tok.getString(); + if (ShiftName == "lsl" || ShiftName == "LSL") + *St = Lsl; + else if (ShiftName == "lsr" || ShiftName == "LSR") + *St = Lsr; + else if (ShiftName == "asr" || ShiftName == "ASR") + *St = Asr; + else if (ShiftName == "ror" || ShiftName == "ROR") + *St = Ror; + else if (ShiftName == "rrx" || ShiftName == "RRX") + *St = Rrx; + else + return true; + getLexer().Lex(); // Eat shift type token. + + // For all but a Rotate right there must be a '#' and a shift amount + if (*St != Rrx) { + // Look for # following the shift type + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(ShiftAmount)) + return true; + } + + return false; +} + +// A hack to allow some testing +int ARMAsmParser::MatchRegisterName(const StringRef &Name) { + if (Name == "r0" || Name == "R0") + return 0; + else if (Name == "r1" || Name == "R1") + return 1; + else if (Name == "r2" || Name == "R2") + return 2; + else if (Name == "r3" || Name == "R3") + return 3; + else if (Name == "r3" || Name == "R3") + return 3; + else if (Name == "r4" || Name == "R4") + return 4; + else if (Name == "r5" || Name == "R5") + return 5; + else if (Name == "r6" || Name == "R6") + return 6; + else if (Name == "r7" || Name == "R7") + return 7; + else if (Name == "r8" || Name == "R8") + return 8; + else if (Name == "r9" || Name == "R9") + return 9; + else if (Name == "r10" || Name == "R10") + return 10; + else if (Name == "r11" || Name == "R11" || Name == "fp") + return 11; + else if (Name == "r12" || Name == "R12" || Name == "ip") + return 12; + else if (Name == "r13" || Name == "R13" || Name == "sp") + return 13; + else if (Name == "r14" || Name == "R14" || Name == "lr") + return 14; + else if (Name == "r15" || Name == "R15" || Name == "pc") + return 15; + return -1; +} + +// A hack to allow some testing +bool ARMAsmParser::MatchInstruction(SmallVectorImpl &Operands, + MCInst &Inst) { + struct ARMOperand Op0 = Operands[0]; + assert(Op0.Kind == ARMOperand::Token && "First operand not a Token"); + const StringRef &Mnemonic = Op0.getToken(); + if (Mnemonic == "add" || + Mnemonic == "stmfd" || + Mnemonic == "str" || + Mnemonic == "ldmfd" || + Mnemonic == "ldr" || + Mnemonic == "mov" || + Mnemonic == "sub") + return false; + + return true; +} + +// TODO - this is a work in progress +bool ARMAsmParser::ParseOperand(ARMOperand &Op) { + switch (getLexer().getKind()) { + case AsmToken::Identifier: + if (!ParseRegister(Op)) + return false; + // TODO parse other operands that start with an identifier like labels + return Error(getLexer().getTok().getLoc(), "labels not yet supported"); + case AsmToken::LBrac: + if (!ParseMemory(Op)) + return false; + case AsmToken::LCurly: + if (!ParseRegisterList(Op)) + return false; + case AsmToken::Hash: + // #42 -> immediate. + // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate + getLexer().Lex(); + const MCExpr *Val; + if (getParser().ParseExpression(Val)) + return true; + Op = ARMOperand::CreateImm(Val); + return false; + default: + return Error(getLexer().getTok().getLoc(), "unexpected token in operand"); + } +} + +bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { + SmallVector Operands; + + Operands.push_back(ARMOperand::CreateToken(Name)); + + SMLoc Loc = getLexer().getTok().getLoc(); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + + // Read the first operand. + Operands.push_back(ARMOperand()); + if (ParseOperand(Operands.back())) + return true; + + while (getLexer().is(AsmToken::Comma)) { + getLexer().Lex(); // Eat the comma. + + // Parse and remember the operand. + Operands.push_back(ARMOperand()); + if (ParseOperand(Operands.back())) + return true; + } + } + if (!MatchInstruction(Operands, Inst)) + return false; + + Error(Loc, "ARMAsmParser::ParseInstruction only partly implemented"); + return true; +} + +bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".word") + return ParseDirectiveWord(4, DirectiveID.getLoc()); + return true; +} + +/// ParseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + getLexer().Lex(); + } + } + + getLexer().Lex(); + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeARMAsmParser() { + RegisterAsmParser X(TheARMTarget); + RegisterAsmParser Y(TheThumbTarget); +} diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt new file mode 100644 index 0000000000000..308c6cff8da90 --- /dev/null +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMARMAsmParser + ARMAsmParser.cpp + ) + diff --git a/lib/Target/ARM/AsmParser/Makefile b/lib/Target/ARM/AsmParser/Makefile new file mode 100644 index 0000000000000..97e56126d8eb5 --- /dev/null +++ b/lib/Target/ARM/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMAsmParser + +# Hack: we need to include 'main' ARM target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 434a19abef62d..546731b00d3c0 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -1,5 +1,3 @@ -//===-- ARMAsmPrinter.cpp - ARM LLVM assembly writer ----------------------===// -// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -21,23 +19,30 @@ #include "ARMMachineFunctionInfo.h" #include "llvm/Constants.h" #include "llvm/Module.h" -#include "llvm/MDNode.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormattedStream.h" #include using namespace llvm; @@ -45,7 +50,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed"); namespace { class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter { - DwarfWriter *DW; /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when printing asm code for different targets. @@ -68,22 +72,18 @@ namespace { /// GVNonLazyPtrs - Keeps the set of GlobalValues that require /// non-lazy-pointers for indirect access. - StringSet<> GVNonLazyPtrs; + StringMap GVNonLazyPtrs; /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden /// visibility that require non-lazy-pointers for indirect access. - StringSet<> HiddenGVNonLazyPtrs; - - /// FnStubs - Keeps the set of external function GlobalAddresses that the - /// asm printer should generate stubs for. - StringSet<> FnStubs; + StringMap HiddenGVNonLazyPtrs; /// True if asm printer is printing a series of CONSTPOOL_ENTRY. bool InCPMode; public: - explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V), DW(0), AFI(NULL), MCP(NULL), + explicit ARMAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V), AFI(NULL), MCP(NULL), InCPMode(false) { Subtarget = &TM.getSubtarget(); } @@ -110,6 +110,7 @@ namespace { const char *Modifier = 0); void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum); + void printThumbITMask(const MachineInstr *MI, int OpNum); void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum); void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum, unsigned Scale); @@ -118,10 +119,10 @@ namespace { void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNum); void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNum); - void printT2SOImmOperand(const MachineInstr *MI, int OpNum); void printT2SOOperand(const MachineInstr *MI, int OpNum); void printT2AddrModeImm12Operand(const MachineInstr *MI, int OpNum); void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum); + void printT2AddrModeImm8s4Operand(const MachineInstr *MI, int OpNum); void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum); void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum); @@ -132,6 +133,9 @@ namespace { void printCPInstOperand(const MachineInstr *MI, int OpNum, const char *Modifier); void printJTBlockOperand(const MachineInstr *MI, int OpNum); + void printJT2BlockOperand(const MachineInstr *MI, int OpNum); + void printTBAddrMode(const MachineInstr *MI, int OpNum); + void printNoHashImmediate(const MachineInstr *MI, int OpNum); virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode); @@ -139,12 +143,14 @@ namespace { unsigned AsmVariant, const char *ExtraCode); - void printModuleLevelGV(const GlobalVariable* GVar); - bool printInstruction(const MachineInstr *MI); // autogenerated. + void PrintGlobalVariable(const GlobalVariable* GVar); + void printInstruction(const MachineInstr *MI); // autogenerated. + static const char *getRegisterName(unsigned RegNo); + void printMachineInstruction(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &F); - bool doInitialization(Module &M); bool doFinalization(Module &M); + void EmitStartOfAsmFile(Module &M); /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. @@ -153,24 +159,35 @@ namespace { ARMConstantPoolValue *ACPV = static_cast(MCPV); GlobalValue *GV = ACPV->getGV(); - std::string Name = GV ? Mang->getValueName(GV) : TAI->getGlobalPrefix(); - if (!GV) - Name += ACPV->getSymbol(); - if (ACPV->isNonLazyPointer()) { - if (GV->hasHiddenVisibility()) - HiddenGVNonLazyPtrs.insert(Name); - else - GVNonLazyPtrs.insert(Name); - printSuffixedName(Name, "$non_lazy_ptr"); - } else if (ACPV->isStub()) { - FnStubs.insert(Name); - printSuffixedName(Name, "$stub"); + std::string Name; + + if (ACPV->isLSDA()) { + SmallString<16> LSDAName; + raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() << + "_LSDA_" << getFunctionNumber(); + Name = LSDAName.str(); + } else if (GV) { + bool isIndirect = Subtarget->isTargetDarwin() && + Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); + if (!isIndirect) + Name = Mang->getMangledName(GV); + else { + // FIXME: Remove this when Darwin transition to @GOT like syntax. + std::string SymName = Mang->getMangledName(GV); + Name = Mang->getMangledName(GV, "$non_lazy_ptr", true); + if (GV->hasHiddenVisibility()) + HiddenGVNonLazyPtrs[SymName] = Name; + else + GVNonLazyPtrs[SymName] = Name; + } } else - O << Name; + Name = Mang->makeNameProper(ACPV->getSymbol()); + O << Name; + if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; if (ACPV->getPCAdjustment() != 0) { - O << "-(" << TAI->getPrivateGlobalPrefix() << "PC" - << utostr(ACPV->getLabelId()) + O << "-(" << MAI->getPrivateGlobalPrefix() << "PC" + << ACPV->getLabelId() << "+" << (unsigned)ACPV->getPCAdjustment(); if (ACPV->mustAddCurrentAddress()) O << "-."; @@ -178,7 +195,7 @@ namespace { } O << "\n"; } - + void getAnalysisUsage(AnalysisUsage &AU) const { AsmPrinter::getAnalysisUsage(AU); AU.setPreservesAll(); @@ -205,38 +222,39 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // NOTE: we don't print out constant pools here, they are handled as // instructions. - O << "\n"; + O << '\n'; + // Print out labels for the function. const Function *F = MF.getFunction(); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + switch (F->getLinkage()) { - default: assert(0 && "Unknown linkage type!"); + default: llvm_unreachable("Unknown linkage type!"); case Function::PrivateLinkage: case Function::InternalLinkage: - SwitchToTextSection("\t.text", F); break; case Function::ExternalLinkage: - SwitchToTextSection("\t.text", F); O << "\t.globl\t" << CurrentFnName << "\n"; break; + case Function::LinkerPrivateLinkage: case Function::WeakAnyLinkage: case Function::WeakODRLinkage: case Function::LinkOnceAnyLinkage: case Function::LinkOnceODRLinkage: if (Subtarget->isTargetDarwin()) { - SwitchToTextSection( - ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F); O << "\t.globl\t" << CurrentFnName << "\n"; O << "\t.weak_definition\t" << CurrentFnName << "\n"; } else { - O << TAI->getWeakRefDirective() << CurrentFnName << "\n"; + O << MAI->getWeakRefDirective() << CurrentFnName << "\n"; } break; } printVisibility(CurrentFnName, F->getVisibility()); + unsigned FnAlign = 1 << MF.getAlignment(); // MF alignment is log2. if (AFI->isThumbFunction()) { - EmitAlignment(MF.getAlignment(), F, AFI->getAlign()); + EmitAlignment(FnAlign, F, AFI->getAlign()); O << "\t.code\t16\n"; O << "\t.thumb_func"; if (Subtarget->isTargetDarwin()) @@ -244,7 +262,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { O << "\n"; InCPMode = false; } else { - EmitAlignment(MF.getAlignment(), F); + EmitAlignment(FnAlign, F); } O << CurrentFnName << ":\n"; @@ -266,8 +284,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { I != E; ++I) { // Print a label for the basic block. if (I != MF.begin()) { - printBasicBlockLabel(I, true, true, VerboseAsm); - O << '\n'; + EmitBasicBlockStart(I); } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { @@ -276,14 +293,12 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } } - if (TAI->hasDotTypeDotSizeDirective()) + if (MAI->hasDotTypeDotSizeDirective()) O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n"; // Emit post-function debug information. DW->EndFunction(&MF); - O.flush(); - return false; } @@ -298,37 +313,39 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 O << '{' - << TRI->getAsmName(DRegLo) << "-" << TRI->getAsmName(DRegHi) + << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}'; + } else if (Modifier && strcmp(Modifier, "lane") == 0) { + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); + unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, + &ARM::DPR_VFP2RegClass); + O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; } else { - O << TRI->getAsmName(Reg); + O << getRegisterName(Reg); } } else - assert(0 && "not implemented"); + llvm_unreachable("not implemented"); break; } case MachineOperand::MO_Immediate: { - if (!Modifier || strcmp(Modifier, "no_hash") != 0) - O << "#"; - - O << MO.getImm(); + int64_t Imm = MO.getImm(); + O << '#'; + if (Modifier) { + if (strcmp(Modifier, "lo16") == 0) + O << ":lower16:"; + else if (strcmp(Modifier, "hi16") == 0) + O << ":upper16:"; + } + O << Imm; break; } case MachineOperand::MO_MachineBasicBlock: - printBasicBlockLabel(MO.getMBB()); + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); GlobalValue *GV = MO.getGlobal(); - std::string Name = Mang->getValueName(GV); - bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() || - GV->hasLinkOnceLinkage()); - if (isExt && isCallOp && Subtarget->isTargetDarwin() && - TM.getRelocationModel() != Reloc::Static) { - printSuffixedName(Name, "$stub"); - FnStubs.insert(Name); - } else - O << Name; + O << Mang->getMangledName(GV); printOffset(MO.getOffset()); @@ -339,25 +356,20 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, } case MachineOperand::MO_ExternalSymbol: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); - std::string Name(TAI->getGlobalPrefix()); - Name += MO.getSymbolName(); - if (isCallOp && Subtarget->isTargetDarwin() && - TM.getRelocationModel() != Reloc::Static) { - printSuffixedName(Name, "$stub"); - FnStubs.insert(Name); - } else - O << Name; + std::string Name = Mang->makeNameProper(MO.getSymbolName()); + + O << Name; if (isCallOp && Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) O << "(PLT)"; break; } case MachineOperand::MO_ConstantPoolIndex: - O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); break; case MachineOperand::MO_JumpTableIndex: - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); break; default: @@ -365,9 +377,12 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, } } -static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, - const TargetAsmInfo *TAI) { - assert(V < (1 << 12) && "Not a valid so_imm value!"); +static void printSOImm(formatted_raw_ostream &O, int64_t V, bool VerboseAsm, + const MCAsmInfo *MAI) { + // Break it up into two parts that make up a shifter immediate. + V = ARM_AM::getSOImmVal(V); + assert(V != -1 && "Not a valid so_imm value!"); + unsigned Imm = ARM_AM::getSOImmValImm(V); unsigned Rot = ARM_AM::getSOImmValRot(V); @@ -377,7 +392,7 @@ static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, O << "#" << Imm << ", " << Rot; // Pretty printed version. if (VerboseAsm) - O << ' ' << TAI->getCommentString() + O << ' ' << MAI->getCommentString() << ' ' << (int)ARM_AM::rotr32(Imm, Rot); } else { O << "#" << Imm; @@ -389,7 +404,7 @@ static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) { const MachineOperand &MO = MI->getOperand(OpNum); assert(MO.isImm() && "Not a valid so_imm value!"); - printSOImm(O, MO.getImm(), VerboseAsm, TAI); + printSOImm(O, MO.getImm(), VerboseAsm, MAI); } /// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov' @@ -399,15 +414,15 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) { assert(MO.isImm() && "Not a valid so_imm value!"); unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm()); unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm()); - printSOImm(O, ARM_AM::getSOImmVal(V1), VerboseAsm, TAI); + printSOImm(O, V1, VerboseAsm, MAI); O << "\n\torr"; printPredicateOperand(MI, 2); O << " "; - printOperand(MI, 0); + printOperand(MI, 0); O << ", "; - printOperand(MI, 0); + printOperand(MI, 0); O << ", "; - printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI); + printSOImm(O, V2, VerboseAsm, MAI); } // so_reg is a 4-operand unit corresponding to register forms of the A5.1 @@ -420,8 +435,7 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) { const MachineOperand &MO2 = MI->getOperand(Op+1); const MachineOperand &MO3 = MI->getOperand(Op+2); - assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << getRegisterName(MO1.getReg()); // Print the shift opc. O << ", " @@ -429,8 +443,7 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) { << " "; if (MO2.getReg()) { - assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); - O << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + O << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); } else { O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); @@ -447,7 +460,7 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) { return; } - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. @@ -460,8 +473,8 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) { O << ", " << (char)ARM_AM::getAM2Op(MO3.getImm()) - << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; - + << getRegisterName(MO2.getReg()); + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) O << ", " << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) @@ -483,8 +496,8 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){ } O << (char)ARM_AM::getAM2Op(MO2.getImm()) - << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; - + << getRegisterName(MO1.getReg()); + if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) O << ", " << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) @@ -495,18 +508,18 @@ void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); const MachineOperand &MO3 = MI->getOperand(Op+2); - + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << "[" << getRegisterName(MO1.getReg()); if (MO2.getReg()) { O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm()) - << TM.getRegisterInfo()->get(MO2.getReg()).AsmName + << getRegisterName(MO2.getReg()) << "]"; return; } - + if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) O << ", #" << (char)ARM_AM::getAM3Op(MO3.getImm()) @@ -520,7 +533,7 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){ if (MO1.getReg()) { O << (char)ARM_AM::getAM3Op(MO2.getImm()) - << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + << getRegisterName(MO1.getReg()); return; } @@ -530,7 +543,7 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){ << (char)ARM_AM::getAM3Op(MO2.getImm()) << ImmOffs; } - + void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, const char *Modifier) { const MachineOperand &MO1 = MI->getOperand(Op); @@ -538,11 +551,18 @@ void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Modifier && strcmp(Modifier, "submode") == 0) { if (MO1.getReg() == ARM::SP) { + // FIXME bool isLDM = (MI->getOpcode() == ARM::LDM || - MI->getOpcode() == ARM::LDM_RET); + MI->getOpcode() == ARM::LDM_RET || + MI->getOpcode() == ARM::t2LDM || + MI->getOpcode() == ARM::t2LDM_RET); O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); } else O << ARM_AM::getAMSubModeStr(Mode); + } else if (Modifier && strcmp(Modifier, "wide") == 0) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); + if (Mode == ARM_AM::ia) + O << ".w"; } else { printOperand(MI, Op); if (ARM_AM::getAM4WBFlag(MO2.getImm())) @@ -559,7 +579,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, printOperand(MI, Op); return; } - + assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); if (Modifier && strcmp(Modifier, "submode") == 0) { @@ -573,14 +593,14 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, return; } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. - O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << getRegisterName(MO1.getReg()); if (ARM_AM::getAM5WBFlag(MO2.getImm())) O << "!"; return; } - - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; - + + O << "[" << getRegisterName(MO1.getReg()); + if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { O << ", #" << (char)ARM_AM::getAM5Op(MO2.getImm()) @@ -595,13 +615,13 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO3 = MI->getOperand(Op+2); // FIXME: No support yet for specifying alignment. - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]"; + O << "[" << getRegisterName(MO1.getReg()) << "]"; if (ARM_AM::getAM6WBFlag(MO3.getImm())) { if (MO2.getReg() == 0) O << "!"; else - O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + O << ", " << getRegisterName(MO2.getReg()); } } @@ -614,7 +634,7 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, const MachineOperand &MO1 = MI->getOperand(Op); assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]"; + O << "[pc, +" << getRegisterName(MO1.getReg()) << "]"; } void @@ -629,12 +649,27 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) { //===--------------------------------------------------------------------===// +void +ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) { + // (3 - the number of trailing zeros) is the number of then / else. + unsigned Mask = MI->getOperand(Op).getImm(); + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { + bool T = (Mask & (1 << Pos)) == 0; + if (T) + O << 't'; + else + O << 'e'; + } +} + void ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; - O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName << "]"; + O << "[" << getRegisterName(MO1.getReg()); + O << ", " << getRegisterName(MO2.getReg()) << "]"; } void @@ -649,9 +684,9 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, return; } - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << "[" << getRegisterName(MO1.getReg()); if (MO3.getReg()) - O << ", " << TM.getRegisterInfo()->get(MO3.getReg()).AsmName; + O << ", " << getRegisterName(MO3.getReg()); else if (unsigned ImmOffs = MO2.getImm()) { O << ", #" << ImmOffs; if (Scale > 1) @@ -676,7 +711,7 @@ ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op) { void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) O << ", #" << ImmOffs << " * 4"; O << "]"; @@ -684,20 +719,6 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { //===--------------------------------------------------------------------===// -/// printT2SOImmOperand - T2SOImm is: -/// 1. a 4-bit splat control value and 8 bit immediate value -/// 2. a 5-bit rotate amount and a non-zero 8-bit immediate value -/// represented by a normalizedin 7-bit value (msb is always 1) -void ARMAsmPrinter::printT2SOImmOperand(const MachineInstr *MI, int OpNum) { - const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isImm() && "Not a valid so_imm value!"); - - unsigned Imm = ARM_AM::getT2SOImmValDecode(MO.getImm()); - // Always print the immediate directly, as the "rotate" form - // is deprecated in some contexts. - O << "#" << Imm; -} - // Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 // register with shift forms. // REG 0 0 - e.g. R5 @@ -708,7 +729,7 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum) { unsigned Reg = MO1.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); - O << TM.getRegisterInfo()->getAsmName(Reg); + O << getRegisterName(Reg); // Print the shift opc. O << ", " @@ -724,7 +745,7 @@ void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << "[" << getRegisterName(MO1.getReg()); unsigned OffImm = MO2.getImm(); if (OffImm) // Don't print +0. @@ -737,7 +758,7 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI, const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << "[" << getRegisterName(MO1.getReg()); int32_t OffImm = (int32_t)MO2.getImm(); // Don't print +0. @@ -748,6 +769,22 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI, O << "]"; } +void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI, + int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm << " * 4"; + else if (OffImm > 0) + O << ", #+" << OffImm << " * 4"; + O << "]"; +} + void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum) { const MachineOperand &MO1 = MI->getOperand(OpNum); @@ -765,17 +802,15 @@ void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI, const MachineOperand &MO2 = MI->getOperand(OpNum+1); const MachineOperand &MO3 = MI->getOperand(OpNum+2); - O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName; + O << "[" << getRegisterName(MO1.getReg()); - if (MO2.getReg()) { - O << ", +" - << TM.getRegisterInfo()->get(MO2.getReg()).AsmName; + assert(MO2.getReg() && "Invalid so_reg load / store address!"); + O << ", " << getRegisterName(MO2.getReg()); - unsigned ShAmt = MO3.getImm(); - if (ShAmt) { - assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); - O << ", lsl #" << ShAmt; - } + unsigned ShAmt = MO3.getImm(); + if (ShAmt) { + assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); + O << ", lsl #" << ShAmt; } O << "]"; } @@ -799,14 +834,17 @@ void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){ void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) { int Id = (int)MI->getOperand(OpNum).getImm(); - O << TAI->getPrivateGlobalPrefix() << "PC" << Id; + O << MAI->getPrivateGlobalPrefix() << "PC" << Id; } void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) { O << "{"; - for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { + // Always skip the first operand, it's the optional (and implicit writeback). + for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isImplicit()) + continue; + if ((int)i != OpNum+1) O << ", "; printOperand(MI, i); - if (i != e-1) O << ", "; } O << "}"; } @@ -818,14 +856,14 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum, // data itself. if (!strcmp(Modifier, "label")) { unsigned ID = MI->getOperand(OpNum).getImm(); - O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << ID << ":\n"; } else { assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE"); unsigned CPI = MI->getOperand(OpNum).getIndex(); const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; - + if (MCPE.isMachineConstantPoolEntry()) { EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); } else { @@ -835,57 +873,119 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum, } void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) { + assert(!Subtarget->isThumb2() && "Thumb2 should use double-jump jumptables!"); + const MachineOperand &MO1 = MI->getOperand(OpNum); const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id unsigned JTI = MO1.getIndex(); - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm() << ":\n"; - const char *JTEntryDirective = TAI->getJumpTableDirective(); - if (!JTEntryDirective) - JTEntryDirective = TAI->getData32bitsDirective(); + const char *JTEntryDirective = MAI->getData32bitsDirective(); const MachineFunction *MF = MI->getParent()->getParent(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); const std::vector &JT = MJTI->getJumpTables(); const std::vector &JTBBs = JT[JTI].MBBs; - bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_; - std::set JTSets; + bool UseSet= MAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_; + SmallPtrSet JTSets; for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; - if (UseSet && JTSets.insert(MBB).second) + bool isNew = JTSets.insert(MBB); + + if (UseSet && isNew) printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB); O << JTEntryDirective << ' '; if (UseSet) - O << TAI->getPrivateGlobalPrefix() << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm() << "_set_" << MBB->getNumber(); else if (TM.getRelocationModel() == Reloc::PIC_) { - printBasicBlockLabel(MBB, false, false, false); - // If the arch uses custom Jump Table directives, don't calc relative to JT - if (!TAI->getJumpTableDirective()) - O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" - << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm(); - } else - printBasicBlockLabel(MBB, false, false, false); + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" + << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm(); + } else { + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + } + if (i != e-1) + O << '\n'; + } +} + +void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) { + const MachineOperand &MO1 = MI->getOperand(OpNum); + const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id + unsigned JTI = MO1.getIndex(); + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImm() << ":\n"; + + const MachineFunction *MF = MI->getParent()->getParent(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + const std::vector &JT = MJTI->getJumpTables(); + const std::vector &JTBBs = JT[JTI].MBBs; + bool ByteOffset = false, HalfWordOffset = false; + if (MI->getOpcode() == ARM::t2TBB) + ByteOffset = true; + else if (MI->getOpcode() == ARM::t2TBH) + HalfWordOffset = true; + + for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { + MachineBasicBlock *MBB = JTBBs[i]; + if (ByteOffset) + O << MAI->getData8bitsDirective(); + else if (HalfWordOffset) + O << MAI->getData16bitsDirective(); + if (ByteOffset || HalfWordOffset) { + O << '('; + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << "-" << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << JTI << '_' << MO2.getImm() << ")/2"; + } else { + O << "\tb.w "; + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + } if (i != e-1) O << '\n'; } + + // Make sure the instruction that follows TBB is 2-byte aligned. + // FIXME: Constant island pass should insert an "ALIGN" instruction instead. + if (ByteOffset && (JTBBs.size() & 1)) { + O << '\n'; + EmitAlignment(1); + } +} + +void ARMAsmPrinter::printTBAddrMode(const MachineInstr *MI, int OpNum) { + O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg()); + if (MI->getOpcode() == ARM::t2TBH) + O << ", lsl #1"; + O << ']'; } +void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum) { + O << MI->getOperand(OpNum).getImm(); +} bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode){ // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. - + switch (ExtraCode[0]) { default: return true; // Unknown modifier. - case 'a': // Don't print "#" before a global var name or constant. - case 'c': // Don't print "$" before a global var name or constant. - printOperand(MI, OpNum, "no_hash"); + case 'a': // Print as a memory address. + if (MI->getOperand(OpNum).isReg()) { + O << "[" << getRegisterName(MI->getOperand(OpNum).getReg()) << "]"; + return false; + } + // Fallthrough + case 'c': // Don't print "#" before an immediate operand. + if (!MI->getOperand(OpNum).isImm()) + return true; + printNoHashImmediate(MI, OpNum); return false; case 'P': // Print a VFP double precision register. printOperand(MI, OpNum); @@ -898,7 +998,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (TM.getTargetData()->isBigEndian()) break; // Fallthrough - case 'H': // Write second word of DI / DF reference. + case 'H': // Write second word of DI / DF reference. // Verify that this operand has two consecutive registers. if (!MI->getOperand(OpNum).isReg() || OpNum+1 == MI->getNumOperands() || @@ -907,7 +1007,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, ++OpNum; // Return the high-part. } } - + printOperand(MI, OpNum); return false; } @@ -917,7 +1017,10 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, const char *ExtraCode) { if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. - printAddrMode2Operand(MI, OpNum); + + const MachineOperand &MO = MI->getOperand(OpNum); + assert(MO.isReg() && "unexpected inline asm memory operand"); + O << "[" << getRegisterName(MO.getReg()) << "]"; return false; } @@ -938,16 +1041,47 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { }} // Call the autogenerated instruction printer routines. + processDebugLoc(MI, true); printInstruction(MI); + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + O << '\n'; + processDebugLoc(MI, false); } -bool ARMAsmPrinter::doInitialization(Module &M) { - - bool Result = AsmPrinter::doInitialization(M); - DW = getAnalysisIfAvailable(); +void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (Subtarget->isTargetDarwin()) { + Reloc::Model RelocM = TM.getRelocationModel(); + if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) { + // Declare all the text sections up front (before the DWARF sections + // emitted by AsmPrinter::doInitialization) so the assembler will keep + // them together at the beginning of the object file. This helps + // avoid out-of-range branches that are due a fundamental limitation of + // the way symbol offsets are encoded with the current Darwin ARM + // relocations. + TargetLoweringObjectFileMachO &TLOFMacho = + static_cast(getObjFileLowering()); + OutStreamer.SwitchSection(TLOFMacho.getTextSection()); + OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); + OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection()); + if (RelocM == Reloc::DynamicNoPIC) { + const MCSection *sect = + TLOFMacho.getMachOSection("__TEXT", "__symbol_stub4", + MCSectionMachO::S_SYMBOL_STUBS, + 12, SectionKind::getText()); + OutStreamer.SwitchSection(sect); + } else { + const MCSection *sect = + TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub4", + MCSectionMachO::S_SYMBOL_STUBS, + 16, SectionKind::getText()); + OutStreamer.SwitchSection(sect); + } + } + } - // Thumb-2 instructions are supported only in unified assembler syntax mode. - if (Subtarget->hasThumb2()) + // Use unified assembler syntax mode for Thumb. + if (Subtarget->isThumb()) O << "\t.syntax unified\n"; // Emit ARM Build Attributes @@ -975,22 +1109,16 @@ bool ARMAsmPrinter::doInitialization(Module &M) { O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_needed << ", 1\n" << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_preserved << ", 1\n"; + // Hard float. Use both S and D registers and conform to AAPCS-VFP. + if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) + O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_HardFP_use << ", 3\n" + << "\t.eabi_attribute " << ARMBuildAttrs::ABI_VFP_args << ", 1\n"; + // FIXME: Should we signal R9 usage? } - - return Result; -} - -/// PrintUnmangledNameSafely - Print out the printable characters in the name. -/// Don't print things like \\n or \\0. -static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { - for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); - Name != E; ++Name) - if (isprint(*Name)) - OS << *Name; } -void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { +void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { const TargetData *TD = TM.getTargetData(); if (!GVar->hasInitializer()) // External global require no code @@ -1009,10 +1137,8 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { return; } - std::string name = Mang->getValueName(GVar); + std::string name = Mang->getMangledName(GVar); Constant *C = GVar->getInitializer(); - if (isa(C) || isa(C)) - return; const Type *Type = C->getType(); unsigned Size = TD->getTypeAllocSize(Type); unsigned Align = TD->getPreferredAlignmentLog(GVar); @@ -1023,14 +1149,16 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { if (Subtarget->isTargetELF()) O << "\t.type " << name << ",%object\n"; - if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() && - !(isDarwin && - TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) { - // FIXME: This seems to be pretty darwin-specific + const MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GVar, Mang, TM); + OutStreamer.SwitchSection(TheSection); + // FIXME: get this stuff from section kind flags. + if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() && + // Don't put things that should go in the cstring section into "comm". + !TheSection->getKind().isMergeableCString()) { if (GVar->hasExternalLinkage()) { - SwitchToSection(TAI->SectionForGlobal(GVar)); - if (const char *Directive = TAI->getZeroFillDirective()) { + if (const char *Directive = MAI->getZeroFillDirective()) { O << "\t.globl\t" << name << "\n"; O << Directive << "__DATA, __common, " << name << ", " << Size << ", " << Align << "\n"; @@ -1043,57 +1171,56 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { if (isDarwin) { if (GVar->hasLocalLinkage()) { - O << TAI->getLCOMMDirective() << name << "," << Size + O << MAI->getLCOMMDirective() << name << "," << Size << ',' << Align; } else if (GVar->hasCommonLinkage()) { - O << TAI->getCOMMDirective() << name << "," << Size + O << MAI->getCOMMDirective() << name << "," << Size << ',' << Align; } else { - SwitchToSection(TAI->SectionForGlobal(GVar)); + OutStreamer.SwitchSection(TheSection); O << "\t.globl " << name << '\n' - << TAI->getWeakDefDirective() << name << '\n'; + << MAI->getWeakDefDirective() << name << '\n'; EmitAlignment(Align, GVar); O << name << ":"; if (VerboseAsm) { - O << "\t\t\t\t" << TAI->getCommentString() << ' '; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t\t\t" << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); } O << '\n'; EmitGlobalConstant(C); return; } - } else if (TAI->getLCOMMDirective() != NULL) { + } else if (MAI->getLCOMMDirective() != NULL) { if (GVar->hasLocalLinkage()) { - O << TAI->getLCOMMDirective() << name << "," << Size; + O << MAI->getLCOMMDirective() << name << "," << Size; } else { - O << TAI->getCOMMDirective() << name << "," << Size; - if (TAI->getCOMMDirectiveTakesAlignment()) - O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + O << MAI->getCOMMDirective() << name << "," << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); } } else { - SwitchToSection(TAI->SectionForGlobal(GVar)); if (GVar->hasLocalLinkage()) O << "\t.local\t" << name << "\n"; - O << TAI->getCOMMDirective() << name << "," << Size; - if (TAI->getCOMMDirectiveTakesAlignment()) - O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + O << MAI->getCOMMDirective() << name << "," << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << "," << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); } if (VerboseAsm) { - O << "\t\t" << TAI->getCommentString() << " "; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t" << MAI->getCommentString() << " "; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); } O << "\n"; return; } } - SwitchToSection(TAI->SectionForGlobal(GVar)); switch (GVar->getLinkage()) { - case GlobalValue::CommonLinkage: - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::LinkerPrivateLinkage: if (isDarwin) { O << "\t.globl " << name << "\n" << "\t.weak_definition " << name << "\n"; @@ -1101,28 +1228,27 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { O << "\t.weak " << name << "\n"; } break; - case GlobalValue::AppendingLinkage: - // FIXME: appending linkage variables should go into a section of - // their name or something. For now, just emit them as external. - case GlobalValue::ExternalLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: O << "\t.globl " << name << "\n"; - // FALL THROUGH - case GlobalValue::PrivateLinkage: - case GlobalValue::InternalLinkage: break; - default: - assert(0 && "Unknown linkage type!"); + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: break; + default: + llvm_unreachable("Unknown linkage type!"); } EmitAlignment(Align, GVar); O << name << ":"; if (VerboseAsm) { - O << "\t\t\t\t" << TAI->getCommentString() << " "; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t\t\t" << MAI->getCommentString() << " "; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); } O << "\n"; - if (TAI->hasDotTypeDotSizeDirective()) + if (MAI->hasDotTypeDotSizeDirective()) O << "\t.size " << name << ", " << Size << "\n"; EmitGlobalConstant(C); @@ -1131,83 +1257,36 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { bool ARMAsmPrinter::doFinalization(Module &M) { - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I); - if (Subtarget->isTargetDarwin()) { - SwitchToDataSection(""); - - // Output stubs for dynamically-linked functions - for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); - i != e; ++i) { - if (TM.getRelocationModel() == Reloc::PIC_) - SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs," - "none,16", 0); - else - SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs," - "none,12", 0); + // All darwin targets use mach-o. + TargetLoweringObjectFileMachO &TLOFMacho = + static_cast(getObjFileLowering()); - EmitAlignment(2); - O << "\t.code\t32\n"; - - const char *p = i->getKeyData(); - printSuffixedName(p, "$stub"); - O << ":\n"; - O << "\t.indirect_symbol " << p << "\n"; - O << "\tldr ip, "; - printSuffixedName(p, "$slp"); - O << "\n"; - if (TM.getRelocationModel() == Reloc::PIC_) { - printSuffixedName(p, "$scv"); - O << ":\n"; - O << "\tadd ip, pc, ip\n"; - } - O << "\tldr pc, [ip, #0]\n"; - printSuffixedName(p, "$slp"); - O << ":\n"; - O << "\t.long\t"; - printSuffixedName(p, "$lazy_ptr"); - if (TM.getRelocationModel() == Reloc::PIC_) { - O << "-("; - printSuffixedName(p, "$scv"); - O << "+8)\n"; - } else - O << "\n"; - SwitchToDataSection(".lazy_symbol_pointer", 0); - printSuffixedName(p, "$lazy_ptr"); - O << ":\n"; - O << "\t.indirect_symbol " << p << "\n"; - O << "\t.long\tdyld_stub_binding_helper\n"; - } - O << "\n"; + O << '\n'; // Output non-lazy-pointers for external and common global variables. if (!GVNonLazyPtrs.empty()) { - SwitchToDataSection("\t.non_lazy_symbol_pointer", 0); - for (StringSet<>::iterator i = GVNonLazyPtrs.begin(), - e = GVNonLazyPtrs.end(); i != e; ++i) { - const char *p = i->getKeyData(); - printSuffixedName(p, "$non_lazy_ptr"); - O << ":\n"; - O << "\t.indirect_symbol " << p << "\n"; + // Switch with ".non_lazy_symbol_pointer" directive. + OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); + EmitAlignment(2); + for (StringMap::iterator I = GVNonLazyPtrs.begin(), + E = GVNonLazyPtrs.end(); I != E; ++I) { + O << I->second << ":\n"; + O << "\t.indirect_symbol " << I->getKeyData() << "\n"; O << "\t.long\t0\n"; } } if (!HiddenGVNonLazyPtrs.empty()) { - SwitchToSection(TAI->getDataSection()); - for (StringSet<>::iterator i = HiddenGVNonLazyPtrs.begin(), - e = HiddenGVNonLazyPtrs.end(); i != e; ++i) { - const char *p = i->getKeyData(); - EmitAlignment(2); - printSuffixedName(p, "$non_lazy_ptr"); - O << ":\n"; - O << "\t.long " << p << "\n"; + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + EmitAlignment(2); + for (StringMap::iterator I = HiddenGVNonLazyPtrs.begin(), + E = HiddenGVNonLazyPtrs.end(); I != E; ++I) { + O << I->second << ":\n"; + O << "\t.long " << I->getKeyData() << "\n"; } } - // Funny Darwin hack: This flag tells the linker that no global symbols // contain code that falls through to other global symbols (e.g. the obvious // implementation of multiple entry points). If this doesn't occur, the @@ -1219,24 +1298,8 @@ bool ARMAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } -/// createARMCodePrinterPass - Returns a pass that prints the ARM -/// assembly code for a MachineFunction to the given output stream, -/// using the given target machine description. This should work -/// regardless of whether the function is in SSA form. -/// -FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o, - ARMBaseTargetMachine &tm, - bool verbose) { - return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); -} - -namespace { - static struct Register { - Register() { - ARMBaseTargetMachine::registerAsmPrinter(createARMCodePrinterPass); - } - } Registrator; -} - // Force static initialization. -extern "C" void LLVMInitializeARMAsmPrinter() { } +extern "C" void LLVMInitializeARMAsmPrinter() { + RegisterAsmPrinter X(TheARMTarget); + RegisterAsmPrinter Y(TheThumbTarget); +} diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile index ce36cec47b6e2..208beccce8a31 100644 --- a/lib/Target/ARM/AsmPrinter/Makefile +++ b/lib/Target/ARM/AsmPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===## +##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 9c46fe0484b66..6e09eb2ff4d50 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -12,6 +12,8 @@ tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) add_llvm_target(ARMCodeGen + ARMBaseInstrInfo.cpp + ARMBaseRegisterInfo.cpp ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp @@ -20,14 +22,17 @@ add_llvm_target(ARMCodeGen ARMISelLowering.cpp ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp + ARMMCAsmInfo.cpp ARMRegisterInfo.cpp ARMSubtarget.cpp - ARMTargetAsmInfo.cpp ARMTargetMachine.cpp + NEONPreAllocPass.cpp Thumb1InstrInfo.cpp Thumb1RegisterInfo.cpp + Thumb2ITBlockPass.cpp Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp + Thumb2SizeReduction.cpp ) target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG) diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index 9a3b9be5b3454..a8dd38cb362e2 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -18,6 +18,6 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ ARMGenCodeEmitter.inc ARMGenCallingConv.inc -DIRS = AsmPrinter +DIRS = AsmPrinter AsmParser TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp new file mode 100644 index 0000000000000..821b872ac7cd1 --- /dev/null +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -0,0 +1,394 @@ +//===-- NEONPreAllocPass.cpp - Allocate adjacent NEON registers--*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "neon-prealloc" +#include "ARM.h" +#include "ARMInstrInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +namespace { + class VISIBILITY_HIDDEN NEONPreAllocPass : public MachineFunctionPass { + const TargetInstrInfo *TII; + + public: + static char ID; + NEONPreAllocPass() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "NEON register pre-allocation pass"; + } + + private: + bool PreAllocNEONRegisters(MachineBasicBlock &MBB); + }; + + char NEONPreAllocPass::ID = 0; +} + +static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, + unsigned &Offset, unsigned &Stride) { + // Default to unit stride with no offset. + Stride = 1; + Offset = 0; + + switch (Opcode) { + default: + break; + + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2d64: + case ARM::VLD2LNd8: + case ARM::VLD2LNd16: + case ARM::VLD2LNd32: + FirstOpnd = 0; + NumRegs = 2; + return true; + + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + FirstOpnd = 0; + NumRegs = 4; + return true; + + case ARM::VLD2LNq16a: + case ARM::VLD2LNq32a: + FirstOpnd = 0; + NumRegs = 2; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD2LNq16b: + case ARM::VLD2LNq32b: + FirstOpnd = 0; + NumRegs = 2; + Offset = 1; + Stride = 2; + return true; + + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD3d64: + case ARM::VLD3LNd8: + case ARM::VLD3LNd16: + case ARM::VLD3LNd32: + FirstOpnd = 0; + NumRegs = 3; + return true; + + case ARM::VLD3q8a: + case ARM::VLD3q16a: + case ARM::VLD3q32a: + FirstOpnd = 0; + NumRegs = 3; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD3q8b: + case ARM::VLD3q16b: + case ARM::VLD3q32b: + FirstOpnd = 0; + NumRegs = 3; + Offset = 1; + Stride = 2; + return true; + + case ARM::VLD3LNq16a: + case ARM::VLD3LNq32a: + FirstOpnd = 0; + NumRegs = 3; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD3LNq16b: + case ARM::VLD3LNq32b: + FirstOpnd = 0; + NumRegs = 3; + Offset = 1; + Stride = 2; + return true; + + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD4d64: + case ARM::VLD4LNd8: + case ARM::VLD4LNd16: + case ARM::VLD4LNd32: + FirstOpnd = 0; + NumRegs = 4; + return true; + + case ARM::VLD4q8a: + case ARM::VLD4q16a: + case ARM::VLD4q32a: + FirstOpnd = 0; + NumRegs = 4; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD4q8b: + case ARM::VLD4q16b: + case ARM::VLD4q32b: + FirstOpnd = 0; + NumRegs = 4; + Offset = 1; + Stride = 2; + return true; + + case ARM::VLD4LNq16a: + case ARM::VLD4LNq32a: + FirstOpnd = 0; + NumRegs = 4; + Offset = 0; + Stride = 2; + return true; + + case ARM::VLD4LNq16b: + case ARM::VLD4LNq32b: + FirstOpnd = 0; + NumRegs = 4; + Offset = 1; + Stride = 2; + return true; + + case ARM::VST2d8: + case ARM::VST2d16: + case ARM::VST2d32: + case ARM::VST2d64: + case ARM::VST2LNd8: + case ARM::VST2LNd16: + case ARM::VST2LNd32: + FirstOpnd = 3; + NumRegs = 2; + return true; + + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + FirstOpnd = 3; + NumRegs = 4; + return true; + + case ARM::VST2LNq16a: + case ARM::VST2LNq32a: + FirstOpnd = 3; + NumRegs = 2; + Offset = 0; + Stride = 2; + return true; + + case ARM::VST2LNq16b: + case ARM::VST2LNq32b: + FirstOpnd = 3; + NumRegs = 2; + Offset = 1; + Stride = 2; + return true; + + case ARM::VST3d8: + case ARM::VST3d16: + case ARM::VST3d32: + case ARM::VST3d64: + case ARM::VST3LNd8: + case ARM::VST3LNd16: + case ARM::VST3LNd32: + FirstOpnd = 3; + NumRegs = 3; + return true; + + case ARM::VST3q8a: + case ARM::VST3q16a: + case ARM::VST3q32a: + FirstOpnd = 4; + NumRegs = 3; + Offset = 0; + Stride = 2; + return true; + + case ARM::VST3q8b: + case ARM::VST3q16b: + case ARM::VST3q32b: + FirstOpnd = 4; + NumRegs = 3; + Offset = 1; + Stride = 2; + return true; + + case ARM::VST3LNq16a: + case ARM::VST3LNq32a: + FirstOpnd = 3; + NumRegs = 3; + Offset = 0; + Stride = 2; + return true; + + case ARM::VST3LNq16b: + case ARM::VST3LNq32b: + FirstOpnd = 3; + NumRegs = 3; + Offset = 1; + Stride = 2; + return true; + + case ARM::VST4d8: + case ARM::VST4d16: + case ARM::VST4d32: + case ARM::VST4d64: + case ARM::VST4LNd8: + case ARM::VST4LNd16: + case ARM::VST4LNd32: + FirstOpnd = 3; + NumRegs = 4; + return true; + + case ARM::VST4q8a: + case ARM::VST4q16a: + case ARM::VST4q32a: + FirstOpnd = 4; + NumRegs = 4; + Offset = 0; + Stride = 2; + return true; + + case ARM::VST4q8b: + case ARM::VST4q16b: + case ARM::VST4q32b: + FirstOpnd = 4; + NumRegs = 4; + Offset = 1; + Stride = 2; + return true; + + case ARM::VST4LNq16a: + case ARM::VST4LNq32a: + FirstOpnd = 3; + NumRegs = 4; + Offset = 0; + Stride = 2; + return true; + + case ARM::VST4LNq16b: + case ARM::VST4LNq32b: + FirstOpnd = 3; + NumRegs = 4; + Offset = 1; + Stride = 2; + return true; + + case ARM::VTBL2: + FirstOpnd = 1; + NumRegs = 2; + return true; + + case ARM::VTBL3: + FirstOpnd = 1; + NumRegs = 3; + return true; + + case ARM::VTBL4: + FirstOpnd = 1; + NumRegs = 4; + return true; + + case ARM::VTBX2: + FirstOpnd = 2; + NumRegs = 2; + return true; + + case ARM::VTBX3: + FirstOpnd = 2; + NumRegs = 3; + return true; + + case ARM::VTBX4: + FirstOpnd = 2; + NumRegs = 4; + return true; + } + + return false; +} + +bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + for (; MBBI != E; ++MBBI) { + MachineInstr *MI = &*MBBI; + unsigned FirstOpnd, NumRegs, Offset, Stride; + if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) + continue; + + MachineBasicBlock::iterator NextI = next(MBBI); + for (unsigned R = 0; R < NumRegs; ++R) { + MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + + // For now, just assign a fixed set of adjacent registers. + // This leaves plenty of room for future improvements. + static const unsigned NEONDRegs[] = { + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 + }; + MO.setReg(NEONDRegs[Offset + R * Stride]); + + if (MO.isUse()) { + // Insert a copy from VirtReg. + TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg, + ARM::DPRRegisterClass, ARM::DPRRegisterClass); + if (MO.isKill()) { + MachineInstr *CopyMI = prior(MBBI); + CopyMI->findRegisterUseOperand(VirtReg)->setIsKill(); + } + MO.setIsKill(); + } else if (MO.isDef() && !MO.isDead()) { + // Add a copy to VirtReg. + TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(), + ARM::DPRRegisterClass, ARM::DPRRegisterClass); + } + } + } + + return Modified; +} + +bool NEONPreAllocPass::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= PreAllocNEONRegisters(MBB); + } + + return Modified; +} + +/// createNEONPreAllocPass - returns an instance of the NEON register +/// pre-allocation pass. +FunctionPass *llvm::createNEONPreAllocPass() { + return new NEONPreAllocPass(); +} diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index 4d3200b445c12..a961a576f40d1 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -226,3 +226,31 @@ etc. Almost all Thumb instructions clobber condition code. //===---------------------------------------------------------------------===// Add ldmia, stmia support. + +//===---------------------------------------------------------------------===// + +Thumb load / store address mode offsets are scaled. The values kept in the +instruction operands are pre-scale values. This probably ought to be changed +to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions. + +//===---------------------------------------------------------------------===// + +We need to make (some of the) Thumb1 instructions predicable. That will allow +shrinking of predicated Thumb2 instructions. To allow this, we need to be able +to toggle the 's' bit since they do not set CPSR when they are inside IT blocks. + +//===---------------------------------------------------------------------===// + +Make use of hi register variants of cmp: tCMPhir / tCMPZhir. + +//===---------------------------------------------------------------------===// + +Thumb1 immediate field sometimes keep pre-scaled values. See +Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and +Thumb2. + +//===---------------------------------------------------------------------===// + +Rather than having tBR_JTr print a ".align 2" and constant island pass pad it, +add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes +won't have to over-estimate. It can also be used for loop alignment pass. diff --git a/lib/Target/ARM/README-Thumb2.txt b/lib/Target/ARM/README-Thumb2.txt new file mode 100644 index 0000000000000..e7c2552d9e4c7 --- /dev/null +++ b/lib/Target/ARM/README-Thumb2.txt @@ -0,0 +1,6 @@ +//===---------------------------------------------------------------------===// +// Random ideas for the ARM backend (Thumb2 specific). +//===---------------------------------------------------------------------===// + +Make sure jumptable destinations are below the jumptable in order to make use +of tbb / tbh. diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index f3377f91ab964..8fb1da30088f9 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -537,3 +537,66 @@ Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) while ARMConstantIslandPass only need to worry about LDR (literal). + +//===---------------------------------------------------------------------===// + +We need to fix constant isel for ARMv6t2 to use MOVT. + +//===---------------------------------------------------------------------===// + +Constant island pass should make use of full range SoImm values for LEApcrel. +Be careful though as the last attempt caused infinite looping on lencod. + +//===---------------------------------------------------------------------===// + +Predication issue. This function: + +extern unsigned array[ 128 ]; +int foo( int x ) { + int y; + y = array[ x & 127 ]; + if ( x & 128 ) + y = 123456789 & ( y >> 2 ); + else + y = 123456789 & y; + return y; +} + +compiles to: + +_foo: + and r1, r0, #127 + ldr r2, LCPI1_0 + ldr r2, [r2] + ldr r1, [r2, +r1, lsl #2] + mov r2, r1, lsr #2 + tst r0, #128 + moveq r2, r1 + ldr r0, LCPI1_1 + and r0, r2, r0 + bx lr + +It would be better to do something like this, to fold the shift into the +conditional move: + + and r1, r0, #127 + ldr r2, LCPI1_0 + ldr r2, [r2] + ldr r1, [r2, +r1, lsl #2] + tst r0, #128 + movne r1, r1, lsr #2 + ldr r0, LCPI1_1 + and r0, r1, r0 + bx lr + +it saves an instruction and a register. + +//===---------------------------------------------------------------------===// + +add/sub/and/or + i32 imm can be simplified by folding part of the immediate +into the operation. + +//===---------------------------------------------------------------------===// + +It might be profitable to cse MOVi16 if there are lots of 32-bit immediates +with the same bottom half. diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp new file mode 100644 index 0000000000000..163a0a9875849 --- /dev/null +++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheARMTarget, llvm::TheThumbTarget; + +extern "C" void LLVMInitializeARMTargetInfo() { + RegisterTarget + X(TheARMTarget, "arm", "ARM"); + + RegisterTarget + Y(TheThumbTarget, "thumb", "Thumb"); +} diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..3910bb02e2196 --- /dev/null +++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMARMInfo + ARMTargetInfo.cpp + ) + +add_dependencies(LLVMARMInfo ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/TargetInfo/Makefile b/lib/Target/ARM/TargetInfo/Makefile new file mode 100644 index 0000000000000..6292ab14b3469 --- /dev/null +++ b/lib/Target/ARM/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/ARM/TargetInfo/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index e13a8117bf2f5..7eed30edf25c3 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -22,63 +22,29 @@ using namespace llvm; -Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { +Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) { } -bool Thumb1InstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - unsigned oc = MI.getOpcode(); - switch (oc) { - default: - return false; - case ARM::tMOVr: - case ARM::tMOVhir2lor: - case ARM::tMOVlor2hir: - case ARM::tMOVhir2hir: - assert(MI.getDesc().getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "Invalid Thumb MOV instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - } -} - -unsigned Thumb1InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case ARM::tRestore: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } +unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -unsigned Thumb1InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case ARM::tSpill: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } +bool +Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case ARM::tBX_RET: + case ARM::tBX_RET_vararg: + case ARM::tPOP_RET: + case ARM::tB: + case ARM::tBR_JTr: + return true; + default: break; } - return 0; + + return false; } bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, @@ -91,15 +57,15 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (DestRC == ARM::GPRRegisterClass) { if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); return true; } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); return true; } } else if (DestRC == ARM::tGPRRegisterClass) { if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); return true; } else if (SrcRC == ARM::tGPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); @@ -120,17 +86,19 @@ canFoldMemoryOperand(const MachineInstr *MI, switch (Opc) { default: break; case ARM::tMOVr: - case ARM::tMOVlor2hir: - case ARM::tMOVhir2lor: - case ARM::tMOVhir2hir: { + case ARM::tMOVtgpr2gpr: + case ARM::tMOVgpr2tgpr: + case ARM::tMOVgpr2gpr: { if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); - if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg)) + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + !isARMLowRegister(SrcReg)) // tSpill cannot take a high register operand. return false; } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); - if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg)) + if (TargetRegisterInfo::isPhysicalRegister(DstReg) && + !isARMLowRegister(DstReg)) // tRestore cannot target a high register operand. return false; } @@ -148,36 +116,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!"); + assert((RC == ARM::tGPRRegisterClass || + (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + isARMLowRegister(SrcReg))) && "Unknown regclass!"); if (RC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tSpill)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0)); } } -void Thumb1InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const{ - DebugLoc DL = DebugLoc::getUnknownLoc(); - unsigned Opc = 0; - - assert(RC == ARM::GPRRegisterClass && "Unknown regclass!"); - if (RC == ARM::GPRRegisterClass) { - Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR; - } - - MachineInstrBuilder MIB = - BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; -} - void Thumb1InstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, @@ -185,33 +134,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!"); + assert((RC == ARM::tGPRRegisterClass || + (TargetRegisterInfo::isPhysicalRegister(DestReg) && + isARMLowRegister(DestReg))) && "Unknown regclass!"); if (RC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) - .addFrameIndex(FI).addImm(0); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) + .addFrameIndex(FI).addImm(0)); } } -void Thumb1InstrInfo:: -loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - unsigned Opc = 0; - - if (RC == ARM::GPRRegisterClass) { - Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR; - } - - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; -} - bool Thumb1InstrInfo:: spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -223,6 +155,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, if (MI != MBB.end()) DL = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH)); + AddDefaultPred(MIB); + MIB.addReg(0); // No write back. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); // Add the callee-saved register as live-in. It's killed at the spill. @@ -242,7 +176,12 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return false; bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; - MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc()); + DebugLoc DL = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::tPOP)); + AddDefaultPred(MIB); + MIB.addReg(0); // No write back. + + bool NumRegs = 0; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (Reg == ARM::LR) { @@ -250,15 +189,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (isVarArg) continue; Reg = ARM::PC; - PopMI->setDesc(get(ARM::tPOP_RET)); + (*MIB).setDesc(get(ARM::tPOP_RET)); MI = MBB.erase(MI); } - PopMI->addOperand(MachineOperand::CreateReg(Reg, true)); + MIB.addReg(Reg, getDefRegState(true)); + ++NumRegs; } // It's illegal to emit pop instruction without operands. - if (PopMI->getNumOperands() > 0) - MBB.insert(MI, PopMI); + if (NumRegs) + MBB.insert(MI, &*MIB); return true; } @@ -274,27 +214,30 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, switch (Opc) { default: break; case ARM::tMOVr: - case ARM::tMOVlor2hir: - case ARM::tMOVhir2lor: - case ARM::tMOVhir2hir: { + case ARM::tMOVtgpr2gpr: + case ARM::tMOVgpr2tgpr: + case ARM::tMOVgpr2gpr: { if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); bool isKill = MI->getOperand(1).isKill(); - if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg)) + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && + !isARMLowRegister(SrcReg)) // tSpill cannot take a high register operand. break; - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); + NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0)); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); - if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg)) + if (TargetRegisterInfo::isPhysicalRegister(DstReg) && + !isARMLowRegister(DstReg)) // tRestore cannot target a high register operand. break; bool isDead = MI->getOperand(0).isDead(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) - .addFrameIndex(FI).addImm(0); + NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) + .addReg(DstReg, + RegState::Define | getDeadRegState(isDead)) + .addFrameIndex(FI).addImm(0)); } break; } diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 1bfa1d0bdc34c..13cc5787b5b91 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -27,6 +27,13 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb1InstrInfo(const ARMSubtarget &STI); + // Return the non-pre/post incrementing version of 'Opc'. Return 0 + // if there is not such an opcode. + unsigned getUnindexedOpcode(unsigned Opc) const; + + // Return true if the block does not fall through. + bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). @@ -40,14 +47,6 @@ public: MachineBasicBlock::iterator MI, const std::vector &CSI) const; - bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SrcReg, @@ -58,21 +57,11 @@ public: unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const; - void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; - void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl &Ops) const; @@ -80,7 +69,7 @@ public: MachineInstr* MI, const SmallVectorImpl &Ops, int FrameIndex) const; - + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, const SmallVectorImpl &Ops, diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 92f01d1006dd6..3c896da4c0ca0 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -13,12 +13,15 @@ #include "ARM.h" #include "ARMAddressingModes.h" +#include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" #include "Thumb1RegisterInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,14 +33,11 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt -ThumbRegScavenging("enable-thumb-reg-scavenging", - cl::Hidden, - cl::desc("Enable register scavenging on Thumb")); - -Thumb1RegisterInfo::Thumb1RegisterInfo(const TargetInstrInfo &tii, +Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { } @@ -46,20 +46,24 @@ Thumb1RegisterInfo::Thumb1RegisterInfo(const TargetInstrInfo &tii, /// specified immediate. void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Val, - const TargetInstrInfo *TII, - DebugLoc dl) const { + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred, + unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get(Type::Int32Ty, Val); + Constant *C = ConstantInt::get( + Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg) - .addConstantPoolIndex(Idx); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRcp)) + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); } const TargetRegisterClass* -Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const { +Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const { if (isARMLowRegister(Reg)) return ARM::tGPRRegisterClass; switch (Reg) { @@ -75,9 +79,16 @@ Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const { bool Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return ThumbRegScavenging; + return true; +} + +bool +Thumb1RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) + const { + return true; } + bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); @@ -91,6 +102,7 @@ bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } + /// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize /// a destreg = basereg + immediate in Thumb code. Materialize the immediate /// in a register using mov / mvn sequences or load the immediate from a @@ -103,6 +115,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, const TargetInstrInfo &TII, const Thumb1RegisterInfo& MRI, DebugLoc dl) { + MachineFunction &MF = *MBB.getParent(); bool isHigh = !isARMLowRegister(DestReg) || (BaseReg != 0 && !isARMLowRegister(BaseReg)); bool isSub = false; @@ -117,31 +130,31 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, unsigned LdReg = DestReg; if (DestReg == ARM::SP) { assert(BaseReg == ARM::SP && "Unexpected!"); - LdReg = ARM::R3; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R3, RegState::Kill); + LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); } if (NumBytes <= 255 && NumBytes >= 0) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) + .addImm(NumBytes); else if (NumBytes < 0 && NumBytes >= -255) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg) + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) + .addImm(NumBytes); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) .addReg(LdReg, RegState::Kill); } else - MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl); + MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes); // Emit add / sub. int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); - const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, - TII.get(Opc), DestReg); + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); + if (Opc != ARM::tADDhirr) + MIB = AddDefaultT1CC(MIB); if (DestReg == ARM::SP || isSub) MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); else MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); - if (DestReg == ARM::SP) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) - .addReg(ARM::R12, RegState::Kill); + AddDefaultPred(MIB); } /// calcNumMI - Returns the number of instructions required to materialize @@ -187,6 +200,8 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, unsigned Scale = 1; int Opc = 0; int ExtraOpc = 0; + bool NeedCC = false; + bool NeedPred = false; if (DestReg == BaseReg && BaseReg == ARM::SP) { assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); @@ -213,7 +228,16 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, if (DestReg != BaseReg) DstNotEqBase = true; NumBits = 8; - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + if (DestReg == ARM::SP) { + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); + NumBits = 7; + Scale = 4; + } else { + Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + NumBits = 8; + NeedPred = NeedCC = true; + } isTwoAddr = true; } @@ -233,8 +257,10 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, unsigned Chunk = (1 << 3) - 1; unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; Bytes -= ThisVal; - BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal); + const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); + const MachineInstrBuilder MIB = + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)); + AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) .addReg(BaseReg, RegState::Kill); @@ -248,13 +274,22 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Bytes -= ThisVal; ThisVal /= Scale; // Build the new tADD / tSUB. - if (isTwoAddr) - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(DestReg).addImm(ThisVal); + if (isTwoAddr) { + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); + if (NeedCC) + MIB = AddDefaultT1CC(MIB); + MIB .addReg(DestReg).addImm(ThisVal); + if (NeedPred) + MIB = AddDefaultPred(MIB); + } else { bool isKill = BaseReg != ARM::SP; - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); + if (NeedCC) + MIB = AddDefaultT1CC(MIB); + MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); + if (NeedPred) + MIB = AddDefaultPred(MIB); BaseReg = DestReg; if (Opc == ARM::tADDrSPi) { @@ -265,15 +300,17 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Scale = 1; Chunk = ((1 << NumBits) - 1) * Scale; Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - isTwoAddr = true; + NeedPred = NeedCC = isTwoAddr = true; } } } - if (ExtraOpc) - BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg) - .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3); + if (ExtraOpc) { + const TargetInstrDesc &TID = TII.get(ExtraOpc); + AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) + .addReg(DestReg, RegState::Kill) + .addImm(((unsigned)NumBytes) & 3)); + } } static void emitSPUpdate(MachineBasicBlock &MBB, @@ -329,16 +366,64 @@ static void emitThumbConstant(MachineBasicBlock &MBB, int Chunk = (1 << 8) - 1; int ThisVal = (Imm > Chunk) ? Chunk : Imm; Imm -= ThisVal; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal); + AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), + DestReg)) + .addImm(ThisVal)); if (Imm > 0) emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); - if (isSub) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg) - .addReg(DestReg, RegState::Kill); + if (isSub) { + const TargetInstrDesc &TID = TII.get(ARM::tRSB); + AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) + .addReg(DestReg, RegState::Kill)); + } } -void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ +static void removeOperands(MachineInstr &MI, unsigned i) { + unsigned Op = i; + for (unsigned e = MI.getNumOperands(); i != e; ++i) + MI.RemoveOperand(Op); +} + +int Thumb1RegisterInfo:: +rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int Offset, + unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const +{ + // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo + // version then can pull out Thumb1 specific parts here + return 0; +} + +/// saveScavengerRegister - Save the register so it can be used by the +/// register scavenger. Return true. +bool Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const { + // Thumb1 can't use the emergency spill slot on the stack because + // ldr/str immediate offsets must be positive, and if we're referencing + // off the frame pointer (if, for example, there are alloca() calls in + // the function, the offset will be negative. Use R12 instead since that's + // a call clobbered register that we know won't be used in Thumb1 mode. + + TII.copyRegToReg(MBB, I, ARM::R12, Reg, ARM::GPRRegisterClass, RC); + return true; +} + +/// restoreScavengerRegister - restore a registers saved by +// saveScavengerRegister(). +void Thumb1RegisterInfo::restoreScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const { + TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass); +} + +unsigned +Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const{ + unsigned VReg = 0; unsigned i = 0; MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); @@ -380,7 +465,7 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned Scale = 1; if (FrameReg != ARM::SP) { Opcode = ARM::tADDi3; - MI.setDesc(TII.get(ARM::tADDi3)); + MI.setDesc(TII.get(Opcode)); NumBits = 3; } else { NumBits = 8; @@ -391,19 +476,26 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Offset == 0) { // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVhir2lor)); + MI.setDesc(TII.get(ARM::tMOVgpr2tgpr)); MI.getOperand(i).ChangeToRegister(FrameReg, false); MI.RemoveOperand(i+1); - return; + return 0; } // Common case: small offset, fits into instruction. unsigned Mask = (1 << NumBits) - 1; if (((Offset / Scale) & ~Mask) == 0) { // Replace the FrameIndex with sp / fp - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); - return; + if (Opcode == ARM::tADDi3) { + removeOperands(MI, i); + MachineInstrBuilder MIB(&MI); + AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) + .addImm(Offset / Scale)); + } else { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); + } + return 0; } unsigned DestReg = MI.getOperand(0).getReg(); @@ -415,15 +507,21 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, *this, dl); MBB.erase(II); - return; + return 0; } if (Offset > 0) { // Translate r0 = add sp, imm to // r0 = add sp, 255*4 // r0 = add r0, (imm - 255*4) - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Mask); + if (Opcode == ARM::tADDi3) { + removeOperands(MI, i); + MachineInstrBuilder MIB(&MI); + AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); + } else { + MI.getOperand(i).ChangeToRegister(FrameReg, false); + MI.getOperand(i+1).ChangeToImmediate(Mask); + } Offset = (Offset - Mask * Scale); MachineBasicBlock::iterator NII = next(II); emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, @@ -433,11 +531,16 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // r0 = -imm (this is then translated into a series of instructons) // r0 = add r0, sp emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); + MI.setDesc(TII.get(ARM::tADDhirr)); MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); MI.getOperand(i+1).ChangeToRegister(FrameReg, false); + if (Opcode == ARM::tADDi3) { + MachineInstrBuilder MIB(&MI); + AddDefaultPred(MIB); + } } - return; + return 0; } else { unsigned ImmIdx = 0; int InstrOffs = 0; @@ -452,8 +555,7 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, break; } default: - assert(0 && "Unsupported addressing mode!"); - abort(); + llvm_unreachable("Unsupported addressing mode!"); break; } @@ -468,7 +570,7 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Replace the FrameIndex with sp MI.getOperand(i).ChangeToRegister(FrameReg, false); ImmOp.ChangeToImmediate(ImmedOffset); - return; + return 0; } bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; @@ -495,6 +597,11 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // SP+LargeImm. assert(Offset && "This code isn't needed if offset already handled!"); + // Remove predicate first. + int PIdx = MI.findFirstPredOperandIdx(); + if (PIdx != -1) + removeOperands(MI, PIdx); + if (Desc.mayLoad()) { // Use the destination register to materialize sp + offset. unsigned TmpReg = MI.getOperand(0).getReg(); @@ -504,12 +611,14 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, Offset, false, TII, *this, dl); else { - emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl); + emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); UseRR = true; } - } else + } else { emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, *this, dl); + } + MI.setDesc(TII.get(ARM::tLDR)); MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); if (UseRR) @@ -518,52 +627,37 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, else // tLDR has an extra register operand. MI.addOperand(MachineOperand::CreateReg(0, false)); } else if (Desc.mayStore()) { - // FIXME! This is horrific!!! We need register scavenging. - // Our temporary workaround has marked r3 unavailable. Of course, r3 is - // also a ABI register so it's possible that is is the register that is - // being storing here. If that's the case, we do the following: - // r12 = r2 - // Use r2 to materialize sp + offset - // str r3, r2 - // r2 = r12 - unsigned ValReg = MI.getOperand(0).getReg(); - unsigned TmpReg = ARM::R3; - bool UseRR = false; - if (ValReg == ARM::R3) { - BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R2, RegState::Kill); - TmpReg = ARM::R2; - } - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - if (Opcode == ARM::tSpill) { - if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); - else { - emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl); - UseRR = true; - } - } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); - MI.setDesc(TII.get(ARM::tSTR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tSTR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); - - MachineBasicBlock::iterator NII = next(II); - if (ValReg == ARM::R3) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2) - .addReg(ARM::R12, RegState::Kill); - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) - .addReg(ARM::R12, RegState::Kill); + VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass); + assert (Value && "Frame index virtual allocated, but Value arg is NULL!"); + *Value = Offset; + bool UseRR = false; + + if (Opcode == ARM::tSpill) { + if (FrameReg == ARM::SP) + emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg, + Offset, false, TII, *this, dl); + else { + emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); + UseRR = true; + } + } else + emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, + *this, dl); + MI.setDesc(TII.get(ARM::tSTR)); + MI.getOperand(i).ChangeToRegister(VReg, false, false, true); + if (UseRR) // Use [reg, reg] addrmode. + MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); + else // tSTR has an extra register operand. + MI.addOperand(MachineOperand::CreateReg(0, false)); } else assert(false && "Unexpected opcode!"); + + // Add predicate back if it's needed. + if (MI.getDesc().isPredicable()) { + MachineInstrBuilder MIB(&MI); + AddDefaultPred(MIB); + } + return VReg; } void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { @@ -577,15 +671,6 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); - // Check if R3 is live in. It might have to be used as a scratch register. - for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(), - E = MF.getRegInfo().livein_end(); I != E; ++I) { - if (I->first == ARM::R3) { - AFI->setR3IsLiveIn(true); - break; - } - } - // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. NumBytes = (NumBytes + 3) & ~3; MFI->setStackSize(NumBytes); @@ -647,8 +732,7 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const { // Darwin ABI requires FP to point to the stack slot that contains the // previous FP. if (STI.isTargetDarwin() || hasFP(MF)) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) .addFrameIndex(FramePtrSpillFI).addImm(0); } @@ -729,7 +813,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, TII, *this, dl); else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP) + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(FramePtr); } else { if (MBBI->getOpcode() == ARM::tBX_RET && @@ -745,11 +829,14 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, if (VARegSaveSize) { // Epilogue for vararg functions: pop LR to R3 and branch off it. // FIXME: Verify this is still ok when R3 is no longer being reserved. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3); + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(0) // No write back. + .addReg(ARM::R3, RegState::Define); emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) + .addReg(ARM::R3, RegState::Kill); MBB.erase(MBBI); } } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 6d4f1f0bf5e27..bb7a6199d10d6 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -20,28 +20,28 @@ namespace llvm { class ARMSubtarget; - class TargetInstrInfo; + class ARMBaseInstrInfo; class Type; struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: - Thumb1RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); + Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. - void emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Val, - const TargetInstrInfo *TII, - DebugLoc dl) const; + void emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, int Val, + ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0) const; /// Code Generation virtual methods... const TargetRegisterClass * - getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; - - bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const; bool hasReservedCallFrame(MachineFunction &MF) const; @@ -49,8 +49,23 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + // rewrite MI to access 'Offset' bytes from the FP. Return the offset that + // could not be handled directly in MI. + int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int Offset, + unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const; + + bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const; + void restoreScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + unsigned Reg) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp new file mode 100644 index 0000000000000..98b5cbdfb98f4 --- /dev/null +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -0,0 +1,158 @@ +//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "thumb2-it" +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumITs, "Number of IT blocks inserted"); + +namespace { + struct VISIBILITY_HIDDEN Thumb2ITBlockPass : public MachineFunctionPass { + static char ID; + Thumb2ITBlockPass() : MachineFunctionPass(&ID) {} + + const Thumb2InstrInfo *TII; + ARMFunctionInfo *AFI; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "Thumb IT blocks insertion pass"; + } + + private: + MachineBasicBlock::iterator + SplitT2MOV32imm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineInstr *MI, DebugLoc dl, + unsigned PredReg, ARMCC::CondCodes CC); + bool InsertITBlocks(MachineBasicBlock &MBB); + }; + char Thumb2ITBlockPass::ID = 0; +} + +static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){ + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) + return ARMCC::AL; + return llvm::getInstrPredicate(MI, PredReg); +} + +MachineBasicBlock::iterator +Thumb2ITBlockPass::SplitT2MOV32imm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineInstr *MI, + DebugLoc dl, unsigned PredReg, + ARMCC::CondCodes CC) { + // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here. + // The only reason it was a single instruction was so it could be + // re-materialized. We want to split it before this and the thumb2 + // size reduction pass to make sure the IT mask is correct and expose + // width reduction opportunities. It doesn't make sense to do this in a + // separate pass so here it is. + unsigned DstReg = MI->getOperand(0).getReg(); + bool DstDead = MI->getOperand(0).isDead(); // Is this possible? + unsigned Imm = MI->getOperand(1).getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg) + .addImm(Lo16).addImm(CC).addReg(PredReg); + BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)) + .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg); + --MBBI; + --MBBI; + MI->eraseFromParent(); + return MBBI; +} + +bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineInstr *MI = &*MBBI; + DebugLoc dl = MI->getDebugLoc(); + unsigned PredReg = 0; + ARMCC::CondCodes CC = getPredicate(MI, PredReg); + + if (MI->getOpcode() == ARM::t2MOVi32imm) { + MBBI = SplitT2MOV32imm(MBB, MBBI, MI, dl, PredReg, CC); + continue; + } + + if (CC == ARMCC::AL) { + ++MBBI; + continue; + } + + // Insert an IT instruction. + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) + .addImm(CC); + ++MBBI; + + // Finalize IT mask. + ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); + unsigned Mask = 0, Pos = 3; + while (MBBI != E && Pos) { + MachineInstr *NMI = &*MBBI; + DebugLoc ndl = NMI->getDebugLoc(); + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); + if (NMI->getOpcode() == ARM::t2MOVi32imm) { + MBBI = SplitT2MOV32imm(MBB, MBBI, NMI, ndl, NPredReg, NCC); + continue; + } + + if (NCC == OCC) { + Mask |= (1 << Pos); + } else if (NCC != CC) + break; + --Pos; + ++MBBI; + } + Mask |= (1 << Pos); + MIB.addImm(Mask); + Modified = true; + ++NumITs; + } + + return Modified; +} + +bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { + const TargetMachine &TM = Fn.getTarget(); + AFI = Fn.getInfo(); + TII = static_cast(TM.getInstrInfo()); + + if (!AFI->isThumbFunction()) + return false; + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= InsertITBlocks(MBB); + } + + return Modified; +} + +/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks +/// insertion pass. +FunctionPass *llvm::createThumb2ITBlockPass() { + return new Thumb2ITBlockPass(); +} diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 35d09fdac3852..264601bf4143e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -13,6 +13,7 @@ #include "ARMInstrInfo.h" #include "ARM.h" +#include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -22,127 +23,62 @@ using namespace llvm; -Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { +Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) { } -bool Thumb2InstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned& SrcSubIdx, unsigned& DstSubIdx) const { - SrcSubIdx = DstSubIdx = 0; // No sub-registers. - - unsigned oc = MI.getOpcode(); - switch (oc) { - default: - return false; - // FIXME: Thumb2 - case ARM::tMOVr: - case ARM::tMOVhir2lor: - case ARM::tMOVlor2hir: - case ARM::tMOVhir2hir: - assert(MI.getDesc().getNumOperands() >= 2 && - MI.getOperand(0).isReg() && - MI.getOperand(1).isReg() && - "Invalid Thumb MOV instruction"); - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - return true; - } -} - -unsigned Thumb2InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - // FIXME: Thumb2 - case ARM::tRestore: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } +unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { + // FIXME return 0; } -unsigned Thumb2InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - // FIXME: Thumb2 - case ARM::tSpill: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } +bool +Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case ARM::t2LDM_RET: + case ARM::t2B: // Uncond branch. + case ARM::t2BR_JT: // Jumptable branch. + case ARM::t2TBB: // Table branch byte. + case ARM::t2TBH: // Table branch halfword. + case ARM::tBR_JTr: // Jumptable branch (16-bit version). + case ARM::tBX_RET: + case ARM::tBX_RET_vararg: + case ARM::tPOP_RET: + case ARM::tB: + return true; + default: break; } - return 0; + + return false; } -bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const { +bool +Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - // FIXME: Thumb2 - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg); - return true; - } - } else if (DestRC == ARM::tGPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); - return true; - } - } - - return false; -} - -bool Thumb2InstrInfo:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const { - if (Ops.size() != 1) return false; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: break; - case ARM::tMOVr: - case ARM::tMOVlor2hir: - case ARM::tMOVhir2lor: - case ARM::tMOVhir2hir: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - return false; - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg)) - // tRestore cannot target a high register operand. - return false; - } + if (DestRC == ARM::GPRRegisterClass && + SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); + return true; + } else if (DestRC == ARM::GPRRegisterClass && + SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); + return true; + } else if (DestRC == ARM::tGPRRegisterClass && + SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); return true; - } } - return false; + // Handle SPR, DPR, and QPR copies. + return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC); } void Thumb2InstrInfo:: @@ -152,36 +88,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!"); - - // FIXME: Thumb2 - if (RC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tSpill)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); - } -} - -void Thumb2InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const{ - DebugLoc DL = DebugLoc::getUnknownLoc(); - unsigned Opc = 0; - - // FIXME: Thumb2. Is GPRRegClass here correct? - assert(RC == ARM::GPRRegisterClass && "Unknown regclass!"); if (RC == ARM::GPRRegisterClass) { - Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR; + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0)); + return; } - MachineInstrBuilder MIB = - BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; + ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC); } void Thumb2InstrInfo:: @@ -191,122 +105,381 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - // FIXME: Thumb2 - assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!"); - - if (RC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) - .addFrameIndex(FI).addImm(0); + if (RC == ARM::GPRRegisterClass) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) + .addFrameIndex(FI).addImm(0)); + return; } + + ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC); } -void Thumb2InstrInfo:: -loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - DebugLoc DL = DebugLoc::getUnknownLoc(); - unsigned Opc = 0; - // FIXME: Thumb2. Is GPRRegClass ok here? - if (RC == ARM::GPRRegisterClass) { - Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR; +void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + unsigned DestReg, unsigned BaseReg, int NumBytes, + ARMCC::CondCodes Pred, unsigned PredReg, + const ARMBaseInstrInfo &TII) { + bool isSub = NumBytes < 0; + if (isSub) NumBytes = -NumBytes; + + // If profitable, use a movw or movt to materialize the offset. + // FIXME: Use the scavenger to grab a scratch register. + if (DestReg != ARM::SP && DestReg != BaseReg && + NumBytes >= 4096 && + ARM_AM::getT2SOImmVal(NumBytes) == -1) { + bool Fits = false; + if (NumBytes < 65536) { + // Use a movw to materialize the 16-bit constant. + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) + .addImm(NumBytes) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + Fits = true; + } else if ((NumBytes & 0xffff) == 0) { + // Use a movt to materialize the 32-bit constant. + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) + .addReg(DestReg) + .addImm(NumBytes >> 16) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + Fits = true; + } + + if (Fits) { + if (isSub) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg) + .addReg(BaseReg, RegState::Kill) + .addReg(DestReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + } else { + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg) + .addReg(DestReg, RegState::Kill) + .addReg(BaseReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + } + return; + } } - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; -} + while (NumBytes) { + unsigned ThisVal = NumBytes; + unsigned Opc = 0; + if (DestReg == ARM::SP && BaseReg != ARM::SP) { + // mov sp, rn. Note t2MOVr cannot be used. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg); + BaseReg = ARM::SP; + continue; + } -bool Thumb2InstrInfo:: -spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI) const { - if (CSI.empty()) - return false; + if (BaseReg == ARM::SP) { + // sub sp, sp, #imm7 + if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) { + assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + // FIXME: Fix Thumb1 immediate encoding. + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg).addImm(ThisVal/4); + NumBytes = 0; + continue; + } + + // sub rd, sp, so_imm + Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi; + if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { + NumBytes = 0; + } else { + // FIXME: Move this to ARMAddressingModes.h? + unsigned RotAmt = CountLeadingZeros_32(ThisVal); + ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); + NumBytes &= ~ThisVal; + assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && + "Bit extraction didn't work?"); + } + } else { + assert(DestReg != ARM::SP && BaseReg != ARM::SP); + Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; + if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { + NumBytes = 0; + } else if (ThisVal < 4096) { + Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + NumBytes = 0; + } else { + // FIXME: Move this to ARMAddressingModes.h? + unsigned RotAmt = CountLeadingZeros_32(ThisVal); + ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); + NumBytes &= ~ThisVal; + assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && + "Bit extraction didn't work?"); + } + } - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH)); - for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - MIB.addReg(Reg, RegState::Kill); + // Build the new ADD / SUB. + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm(ThisVal))); + + BaseReg = DestReg; } - return true; } -bool Thumb2InstrInfo:: -restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI) const { - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); - if (CSI.empty()) - return false; - - bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; - MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc()); - for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - if (Reg == ARM::LR) { - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - Reg = ARM::PC; - PopMI->setDesc(get(ARM::tPOP_RET)); - MI = MBB.erase(MI); - } - PopMI->addOperand(MachineOperand::CreateReg(Reg, true)); +static unsigned +negativeOffsetOpcode(unsigned opcode) +{ + switch (opcode) { + case ARM::t2LDRi12: return ARM::t2LDRi8; + case ARM::t2LDRHi12: return ARM::t2LDRHi8; + case ARM::t2LDRBi12: return ARM::t2LDRBi8; + case ARM::t2LDRSHi12: return ARM::t2LDRSHi8; + case ARM::t2LDRSBi12: return ARM::t2LDRSBi8; + case ARM::t2STRi12: return ARM::t2STRi8; + case ARM::t2STRBi12: return ARM::t2STRBi8; + case ARM::t2STRHi12: return ARM::t2STRHi8; + + case ARM::t2LDRi8: + case ARM::t2LDRHi8: + case ARM::t2LDRBi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRSBi8: + case ARM::t2STRi8: + case ARM::t2STRBi8: + case ARM::t2STRHi8: + return opcode; + + default: + break; } - // It's illegal to emit pop instruction without operands. - if (PopMI->getNumOperands() > 0) - MBB.insert(MI, PopMI); + return 0; +} + +static unsigned +positiveOffsetOpcode(unsigned opcode) +{ + switch (opcode) { + case ARM::t2LDRi8: return ARM::t2LDRi12; + case ARM::t2LDRHi8: return ARM::t2LDRHi12; + case ARM::t2LDRBi8: return ARM::t2LDRBi12; + case ARM::t2LDRSHi8: return ARM::t2LDRSHi12; + case ARM::t2LDRSBi8: return ARM::t2LDRSBi12; + case ARM::t2STRi8: return ARM::t2STRi12; + case ARM::t2STRBi8: return ARM::t2STRBi12; + case ARM::t2STRHi8: return ARM::t2STRHi12; + + case ARM::t2LDRi12: + case ARM::t2LDRHi12: + case ARM::t2LDRBi12: + case ARM::t2LDRSHi12: + case ARM::t2LDRSBi12: + case ARM::t2STRi12: + case ARM::t2STRBi12: + case ARM::t2STRHi12: + return opcode; - return true; + default: + break; + } + + return 0; } -MachineInstr *Thumb2InstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - switch (Opc) { - default: break; - case ARM::tMOVr: - case ARM::tMOVlor2hir: - case ARM::tMOVhir2lor: - case ARM::tMOVhir2hir: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - break; - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg)) - // tRestore cannot target a high register operand. - break; - bool isDead = MI->getOperand(0).isDead(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead)) - .addFrameIndex(FI).addImm(0); - } +static unsigned +immediateOffsetOpcode(unsigned opcode) +{ + switch (opcode) { + case ARM::t2LDRs: return ARM::t2LDRi12; + case ARM::t2LDRHs: return ARM::t2LDRHi12; + case ARM::t2LDRBs: return ARM::t2LDRBi12; + case ARM::t2LDRSHs: return ARM::t2LDRSHi12; + case ARM::t2LDRSBs: return ARM::t2LDRSBi12; + case ARM::t2STRs: return ARM::t2STRi12; + case ARM::t2STRBs: return ARM::t2STRBi12; + case ARM::t2STRHs: return ARM::t2STRHi12; + + case ARM::t2LDRi12: + case ARM::t2LDRHi12: + case ARM::t2LDRBi12: + case ARM::t2LDRSHi12: + case ARM::t2LDRSBi12: + case ARM::t2STRi12: + case ARM::t2STRBi12: + case ARM::t2STRHi12: + case ARM::t2LDRi8: + case ARM::t2LDRHi8: + case ARM::t2LDRBi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRSBi8: + case ARM::t2STRi8: + case ARM::t2STRBi8: + case ARM::t2STRHi8: + return opcode; + + default: break; } + + return 0; +} + +bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const ARMBaseInstrInfo &TII) { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = MI.getDesc(); + unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); + bool isSub = false; + + // Memory operands in inline assembly always use AddrModeT2_i12. + if (Opcode == ARM::INLINEASM) + AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? + + if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { + Offset += MI.getOperand(FrameRegIdx+1).getImm(); + + bool isSP = FrameReg == ARM::SP; + if (Offset == 0) { + // Turn it into a move. + MI.setDesc(TII.get(ARM::tMOVgpr2gpr)); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.RemoveOperand(FrameRegIdx+1); + Offset = 0; + return true; + } + + if (Offset < 0) { + Offset = -Offset; + isSub = true; + MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri)); + } else { + MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri)); + } + + // Common case: small offset, fits into instruction. + if (ARM_AM::getT2SOImmVal(Offset) != -1) { + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); + Offset = 0; + return true; + } + // Another common case: imm12. + if (Offset < 4096) { + unsigned NewOpc = isSP + ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12) + : (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12); + MI.setDesc(TII.get(NewOpc)); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); + Offset = 0; + return true; + } + + // Otherwise, extract 8 adjacent bits from the immediate into this + // t2ADDri/t2SUBri. + unsigned RotAmt = CountLeadingZeros_32(Offset); + unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt); + + // We will handle these bits from offset, clear them. + Offset &= ~ThisImmVal; + + assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 && + "Bit extraction didn't work?"); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal); + } else { + + // AddrMode4 cannot handle any offset. + if (AddrMode == ARMII::AddrMode4) + return false; + + // AddrModeT2_so cannot handle any offset. If there is no offset + // register then we change to an immediate version. + unsigned NewOpc = Opcode; + if (AddrMode == ARMII::AddrModeT2_so) { + unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg(); + if (OffsetReg != 0) { + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + return Offset == 0; + } + + MI.RemoveOperand(FrameRegIdx+1); + MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0); + NewOpc = immediateOffsetOpcode(Opcode); + AddrMode = ARMII::AddrModeT2_i12; + } + + unsigned NumBits = 0; + unsigned Scale = 1; + if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) { + // i8 supports only negative, and i12 supports only positive, so + // based on Offset sign convert Opcode to the appropriate + // instruction + Offset += MI.getOperand(FrameRegIdx+1).getImm(); + if (Offset < 0) { + NewOpc = negativeOffsetOpcode(Opcode); + NumBits = 8; + isSub = true; + Offset = -Offset; + } else { + NewOpc = positiveOffsetOpcode(Opcode); + NumBits = 12; + } + } else { + // VFP and NEON address modes. + int InstrOffs = 0; + if (AddrMode == ARMII::AddrMode5) { + const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1); + InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm()); + if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub) + InstrOffs *= -1; + } + NumBits = 8; + Scale = 4; + Offset += InstrOffs * 4; + assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); + if (Offset < 0) { + Offset = -Offset; + isSub = true; + } + } + + if (NewOpc != Opcode) + MI.setDesc(TII.get(NewOpc)); + + MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1); + + // Attempt to fold address computation + // Common case: small offset, fits into instruction. + int ImmedOffset = Offset / Scale; + unsigned Mask = (1 << NumBits) - 1; + if ((unsigned)Offset <= Mask * Scale) { + // Replace the FrameIndex with fp/sp + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + if (isSub) { + if (AddrMode == ARMII::AddrMode5) + // FIXME: Not consistent. + ImmedOffset |= 1 << NumBits; + else + ImmedOffset = -ImmedOffset; + } + ImmOp.ChangeToImmediate(ImmedOffset); + Offset = 0; + return true; + } + + // Otherwise, offset doesn't fit. Pull in what we can to simplify + ImmedOffset = ImmedOffset & Mask; + if (isSub) { + if (AddrMode == ARMII::AddrMode5) + // FIXME: Not consistent. + ImmedOffset |= 1 << NumBits; + else { + ImmedOffset = -ImmedOffset; + if (ImmedOffset == 0) + // Change the opcode back if the encoded offset is zero. + MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc))); + } + } + ImmOp.ChangeToImmediate(ImmedOffset); + Offset &= ~(Mask*Scale); } - return NewMI; + Offset = (isSub) ? -Offset : Offset; + return Offset == 0; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 84dcb49a6eed0..f3688c0084ae6 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -27,66 +27,34 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { public: explicit Thumb2InstrInfo(const ARMSubtarget &STI); - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } - - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI) const; - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI) const; + // Return the non-pre/post incrementing version of 'Opc'. Return 0 + // if there is not such an opcode. + unsigned getUnindexedOpcode(unsigned Opc) const; - bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + // Return true if the block does not fall through. + bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC) const; - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC) const; + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; - void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC) const; void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC) const; + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; - void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - - bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const; - - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } }; } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 0f0c0e41fc5ad..6c4c15dfe3546 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -13,12 +13,15 @@ #include "ARM.h" #include "ARMAddressingModes.h" +#include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" #include "Thumb2RegisterInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,14 +33,10 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -static cl::opt -Thumb2RegScavenging("enable-thumb2-reg-scavenging", - cl::Hidden, - cl::desc("Enable register scavenging on Thumb-2")); - -Thumb2RegisterInfo::Thumb2RegisterInfo(const TargetInstrInfo &tii, +Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) : ARMBaseRegisterInfo(tii, sti) { } @@ -46,710 +45,23 @@ Thumb2RegisterInfo::Thumb2RegisterInfo(const TargetInstrInfo &tii, /// specified immediate. void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Val, - const TargetInstrInfo *TII, - DebugLoc dl) const { + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred, + unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get(Type::Int32Ty, Val); + Constant *C = ConstantInt::get( + Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); - BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg) - .addConstantPoolIndex(Idx); -} - -const TargetRegisterClass* -Thumb2RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const { - if (isARMLowRegister(Reg)) - return ARM::tGPRRegisterClass; - switch (Reg) { - default: - break; - case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: - case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: - return ARM::GPRRegisterClass; - } - - return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); -} - -bool -Thumb2RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return Thumb2RegScavenging; -} - -bool Thumb2RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - unsigned CFSize = FFI->getMaxCallFrameSize(); - // It's not always a good idea to include the call frame as part of the - // stack frame. ARM (especially Thumb) has small immediate offset to - // address the stack frame. So a large call frame can cause poor codegen - // and may even makes it impossible to scavenge a register. - if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 - return false; - - return !MF.getFrameInfo()->hasVarSizedObjects(); -} - -/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in Thumb code. Materialize the immediate -/// in a register using mov / mvn sequences or load the immediate from a -/// constpool entry. -static -void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, unsigned BaseReg, - int NumBytes, bool CanChangeCC, - const TargetInstrInfo &TII, - const Thumb2RegisterInfo& MRI, - DebugLoc dl) { - bool isHigh = !isARMLowRegister(DestReg) || - (BaseReg != 0 && !isARMLowRegister(BaseReg)); - bool isSub = false; - // Subtract doesn't have high register version. Load the negative value - // if either base or dest register is a high register. Also, if do not - // issue sub as part of the sequence if condition register is to be - // preserved. - if (NumBytes < 0 && !isHigh && CanChangeCC) { - isSub = true; - NumBytes = -NumBytes; - } - unsigned LdReg = DestReg; - if (DestReg == ARM::SP) { - assert(BaseReg == ARM::SP && "Unexpected!"); - LdReg = ARM::R3; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - } - - if (NumBytes <= 255 && NumBytes >= 0) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); - else if (NumBytes < 0 && NumBytes >= -255) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg) - .addReg(LdReg, RegState::Kill); - } else - MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl); - - // Emit add / sub. - int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); - const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, - TII.get(Opc), DestReg); - if (DestReg == ARM::SP || isSub) - MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill); - else - MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); - if (DestReg == ARM::SP) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) - .addReg(ARM::R12, RegState::Kill); -} - -/// calcNumMI - Returns the number of instructions required to materialize -/// the specific add / sub r, c instruction. -static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, - unsigned NumBits, unsigned Scale) { - unsigned NumMIs = 0; - unsigned Chunk = ((1 << NumBits) - 1) * Scale; - - if (Opc == ARM::tADDrSPi) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - NumMIs++; - NumBits = 8; - Scale = 1; // Followed by a number of tADDi8. - Chunk = ((1 << NumBits) - 1) * Scale; - } - - NumMIs += Bytes / Chunk; - if ((Bytes % Chunk) != 0) - NumMIs++; - if (ExtraOpc) - NumMIs++; - return NumMIs; -} - -/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in Thumb code. -static -void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, unsigned BaseReg, - int NumBytes, const TargetInstrInfo &TII, - const Thumb2RegisterInfo& MRI, - DebugLoc dl) { - bool isSub = NumBytes < 0; - unsigned Bytes = (unsigned)NumBytes; - if (isSub) Bytes = -NumBytes; - bool isMul4 = (Bytes & 3) == 0; - bool isTwoAddr = false; - bool DstNotEqBase = false; - unsigned NumBits = 1; - unsigned Scale = 1; - int Opc = 0; - int ExtraOpc = 0; - - if (DestReg == BaseReg && BaseReg == ARM::SP) { - assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); - NumBits = 7; - Scale = 4; - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - isTwoAddr = true; - } else if (!isSub && BaseReg == ARM::SP) { - // r1 = add sp, 403 - // => - // r1 = add sp, 100 * 4 - // r1 = add r1, 3 - if (!isMul4) { - Bytes &= ~3; - ExtraOpc = ARM::tADDi3; - } - NumBits = 8; - Scale = 4; - Opc = ARM::tADDrSPi; - } else { - // sp = sub sp, c - // r1 = sub sp, c - // r8 = sub sp, c - if (DestReg != BaseReg) - DstNotEqBase = true; - NumBits = 8; - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - isTwoAddr = true; - } - - unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); - unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; - if (NumMIs > Threshold) { - // This will expand into too many instructions. Load the immediate from a - // constpool entry. - emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, - MRI, dl); - return; - } - - if (DstNotEqBase) { - if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) { - // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) - unsigned Chunk = (1 << 3) - 1; - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal); - } else { - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill); - } - BaseReg = DestReg; - } - - unsigned Chunk = ((1 << NumBits) - 1) * Scale; - while (Bytes) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - ThisVal /= Scale; - // Build the new tADD / tSUB. - if (isTwoAddr) - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(DestReg).addImm(ThisVal); - else { - bool isKill = BaseReg != ARM::SP; - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); - BaseReg = DestReg; - - if (Opc == ARM::tADDrSPi) { - // r4 = add sp, imm - // r4 = add r4, imm - // ... - NumBits = 8; - Scale = 1; - Chunk = ((1 << NumBits) - 1) * Scale; - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - isTwoAddr = true; - } - } - } - - if (ExtraOpc) - BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg) - .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3); -} - -static void emitSPUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - const Thumb2RegisterInfo &MRI, - int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, - MRI, dl); -} - -void Thumb2RegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - if (!hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc dl = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { - emitSPUpdate(MBB, I, TII, dl, *this, -Amount); - } else { - assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); - emitSPUpdate(MBB, I, TII, dl, *this, Amount); - } - } - } - MBB.erase(I); -} - -/// emitThumbConstant - Emit a series of instructions to materialize a -/// constant. -static void emitThumbConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Imm, - const TargetInstrInfo &TII, - const Thumb2RegisterInfo& MRI, - DebugLoc dl) { - bool isSub = Imm < 0; - if (isSub) Imm = -Imm; - - int Chunk = (1 << 8) - 1; - int ThisVal = (Imm > Chunk) ? Chunk : Imm; - Imm -= ThisVal; - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal); - if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); - if (isSub) - BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg) - .addReg(DestReg, RegState::Kill); -} - -void Thumb2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ - unsigned i = 0; - MachineInstr &MI = *II; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - ARMFunctionInfo *AFI = MF.getInfo(); - DebugLoc dl = MI.getDebugLoc(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - unsigned FrameReg = ARM::SP; - int FrameIndex = MI.getOperand(i).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MF.getFrameInfo()->getStackSize() + SPAdj; - - if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (hasFP(MF)) { - assert(SPAdj == 0 && "Unexpected"); - // There is alloca()'s in this function, must reference off the frame - // pointer instead. - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - } - - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = MI.getDesc(); - unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - - if (Opcode == ARM::tADDrSPi) { - Offset += MI.getOperand(i+1).getImm(); - - // Can't use tADDrSPi if it's based off the frame pointer. - unsigned NumBits = 0; - unsigned Scale = 1; - if (FrameReg != ARM::SP) { - Opcode = ARM::tADDi3; - MI.setDesc(TII.get(ARM::tADDi3)); - NumBits = 3; - } else { - NumBits = 8; - Scale = 4; - assert((Offset & 3) == 0 && - "Thumb add/sub sp, #imm immediate must be multiple of 4!"); - } - - if (Offset == 0) { - // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVhir2lor)); - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.RemoveOperand(i+1); - return; - } - - // Common case: small offset, fits into instruction. - unsigned Mask = (1 << NumBits) - 1; - if (((Offset / Scale) & ~Mask) == 0) { - // Replace the FrameIndex with sp / fp - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset / Scale); - return; - } - - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Bytes = (Offset > 0) ? Offset : -Offset; - unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); - // MI would expand into a large number of instructions. Don't try to - // simplify the immediate. - if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, - *this, dl); - MBB.erase(II); - return; - } - - if (Offset > 0) { - // Translate r0 = add sp, imm to - // r0 = add sp, 255*4 - // r0 = add r0, (imm - 255*4) - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Mask); - Offset = (Offset - Mask * Scale); - MachineBasicBlock::iterator NII = next(II); - emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, - *this, dl); - } else { - // Translate r0 = add sp, -imm to - // r0 = -imm (this is then translated into a series of instructons) - // r0 = add r0, sp - emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); - MI.setDesc(TII.get(ARM::tADDhirr)); - MI.getOperand(i).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(i+1).ChangeToRegister(FrameReg, false); - } - return; - } else { - unsigned ImmIdx = 0; - int InstrOffs = 0; - unsigned NumBits = 0; - unsigned Scale = 1; - switch (AddrMode) { - case ARMII::AddrModeT1_s: { - ImmIdx = i+1; - InstrOffs = MI.getOperand(ImmIdx).getImm(); - NumBits = (FrameReg == ARM::SP) ? 8 : 5; - Scale = 4; - break; - } - default: - assert(0 && "Unsupported addressing mode!"); - abort(); - break; - } - - Offset += InstrOffs * Scale; - assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!"); - - // Common case: small offset, fits into instruction. - MachineOperand &ImmOp = MI.getOperand(ImmIdx); - int ImmedOffset = Offset / Scale; - unsigned Mask = (1 << NumBits) - 1; - if ((unsigned)Offset <= Mask * Scale) { - // Replace the FrameIndex with sp - MI.getOperand(i).ChangeToRegister(FrameReg, false); - ImmOp.ChangeToImmediate(ImmedOffset); - return; - } - - bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill; - if (AddrMode == ARMII::AddrModeT1_s) { - // Thumb tLDRspi, tSTRspi. These will change to instructions that use - // a different base register. - NumBits = 5; - Mask = (1 << NumBits) - 1; - } - // If this is a thumb spill / restore, we will be using a constpool load to - // materialize the offset. - if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore) - ImmOp.ChangeToImmediate(0); - else { - // Otherwise, it didn't fit. Pull in what we can to simplify the immed. - ImmedOffset = ImmedOffset & Mask; - ImmOp.ChangeToImmediate(ImmedOffset); - Offset &= ~(Mask*Scale); - } - } - - // If we get here, the immediate doesn't fit into the instruction. We folded - // as much as possible above, handle the rest, providing a register that is - // SP+LargeImm. - assert(Offset && "This code isn't needed if offset already handled!"); - - if (Desc.mayLoad()) { - // Use the destination register to materialize sp + offset. - unsigned TmpReg = MI.getOperand(0).getReg(); - bool UseRR = false; - if (Opcode == ARM::tRestore) { - if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); - else { - emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl); - UseRR = true; - } - } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); - MI.setDesc(TII.get(ARM::tLDR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) - // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tLDR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); - } else if (Desc.mayStore()) { - // FIXME! This is horrific!!! We need register scavenging. - // Our temporary workaround has marked r3 unavailable. Of course, r3 is - // also a ABI register so it's possible that is is the register that is - // being storing here. If that's the case, we do the following: - // r12 = r2 - // Use r2 to materialize sp + offset - // str r3, r2 - // r2 = r12 - unsigned ValReg = MI.getOperand(0).getReg(); - unsigned TmpReg = ARM::R3; - bool UseRR = false; - if (ValReg == ARM::R3) { - BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R2, RegState::Kill); - TmpReg = ARM::R2; - } - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12) - .addReg(ARM::R3, RegState::Kill); - if (Opcode == ARM::tSpill) { - if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); - else { - emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl); - UseRR = true; - } - } else - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); - MI.setDesc(TII.get(ARM::tSTR)); - MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); - if (UseRR) // Use [reg, reg] addrmode. - MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); - else // tSTR has an extra register operand. - MI.addOperand(MachineOperand::CreateReg(0, false)); - - MachineBasicBlock::iterator NII = next(II); - if (ValReg == ARM::R3) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2) - .addReg(ARM::R12, RegState::Kill); - if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) - BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3) - .addReg(ARM::R12, RegState::Kill); - } else - assert(false && "Unexpected opcode!"); -} - -void Thumb2RegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); - unsigned NumBytes = MFI->getStackSize(); - const std::vector &CSI = MFI->getCalleeSavedInfo(); - DebugLoc dl = (MBBI != MBB.end() ? - MBBI->getDebugLoc() : DebugLoc::getUnknownLoc()); - - // Check if R3 is live in. It might have to be used as a scratch register. - for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(), - E = MF.getRegInfo().livein_end(); I != E; ++I) { - if (I->first == ARM::R3) { - AFI->setR3IsLiveIn(true); - break; - } - } - - // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. - NumBytes = (NumBytes + 3) & ~3; - MFI->setStackSize(NumBytes); - - // Determine the sizes of each callee-save spill areas and record which frame - // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; - int FramePtrSpillFI = 0; - - if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize); - - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); - return; - } - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - int FI = CSI[i].getFrameIdx(); - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - break; - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - if (STI.isTargetDarwin()) { - AFI->addGPRCalleeSavedArea2Frame(FI); - GPRCS2Size += 4; - } else { - AFI->addGPRCalleeSavedArea1Frame(FI); - GPRCS1Size += 4; - } - break; - default: - AFI->addDPRCalleeSavedAreaFrame(FI); - DPRCSSize += 8; - } - } - - if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { - ++MBBI; - if (MBBI != MBB.end()) - dl = MBBI->getDebugLoc(); - } - - // Darwin ABI requires FP to point to the stack slot that contains the - // previous FP. - if (STI.isTargetDarwin() || hasFP(MF)) { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); - } - - // Determine starting offsets of spill areas. - unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); - unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; - unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); - AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); - AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); - AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - - NumBytes = DPRCSOffset; - if (NumBytes) { - // Insert it after all the callee-save spills. - emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes); - } - - if (STI.isTargetELF() && hasFP(MF)) { - MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - - AFI->getFramePtrSpillOffset()); - } - - AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); - AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); - AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0); } -static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - -static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - return (MI->getOpcode() == ARM::tRestore && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); -} - -void Thumb2RegisterInfo::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = prior(MBB.end()); - assert((MBBI->getOpcode() == ARM::tBX_RET || - MBBI->getOpcode() == ARM::tPOP_RET) && - "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo(); - unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); - int NumBytes = (int)MFI->getStackSize(); - - if (!AFI->hasStackFrame()) { - if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); - } else { - // Unwind MBBI to point to first LDR / FLDD. - const unsigned *CSRegs = getCalleeSavedRegs(); - if (MBBI != MBB.begin()) { - do - --MBBI; - while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs)); - if (!isCSRestore(MBBI, CSRegs)) - ++MBBI; - } - - // Move SP to start of FP callee save spill area. - NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + - AFI->getGPRCalleeSavedArea2Size() + - AFI->getDPRCalleeSavedAreaSize()); - - if (hasFP(MF)) { - NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; - // Reset SP based on frame pointer only if the stack frame extends beyond - // frame pointer stack slot or target is ELF and the function has FP. - if (NumBytes) - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes, - TII, *this, dl); - else - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP) - .addReg(FramePtr); - } else { - if (MBBI->getOpcode() == ARM::tBX_RET && - &MBB.front() != MBBI && - prior(MBBI)->getOpcode() == ARM::tPOP) { - MachineBasicBlock::iterator PMBBI = prior(MBBI); - emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes); - } else - emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); - } - } - - if (VARegSaveSize) { - // Epilogue for vararg functions: pop LR to R3 and branch off it. - // FIXME: Verify this is still ok when R3 is no longer being reserved. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3); - - emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); - - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); - MBB.erase(MBBI); - } +bool Thumb2RegisterInfo:: +requiresRegisterScavenging(const MachineFunction &MF) const { + return true; } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index d379c31508330..a63c60b73b804 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -20,40 +20,23 @@ namespace llvm { class ARMSubtarget; - class TargetInstrInfo; + class ARMBaseInstrInfo; class Type; struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { public: - Thumb2RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); + Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Val, - const TargetInstrInfo *TII, - DebugLoc dl) const; - - /// Code Generation virtual methods... - const TargetRegisterClass * - getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; - - bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, int Val, + ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; - - bool hasReservedCallFrame(MachineFunction &MF) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; - - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; }; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp new file mode 100644 index 0000000000000..b8879d2ed1fd0 --- /dev/null +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -0,0 +1,685 @@ +//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "t2-reduce-size" +#include "ARM.h" +#include "ARMAddressingModes.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMBaseInstrInfo.h" +#include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); +STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); +STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); + +static cl::opt ReduceLimit("t2-reduce-limit", + cl::init(-1), cl::Hidden); +static cl::opt ReduceLimit2Addr("t2-reduce-limit2", + cl::init(-1), cl::Hidden); +static cl::opt ReduceLimitLdSt("t2-reduce-limit3", + cl::init(-1), cl::Hidden); + +namespace { + /// ReduceTable - A static table with information on mapping from wide + /// opcodes to narrow + struct ReduceEntry { + unsigned WideOpc; // Wide opcode + unsigned NarrowOpc1; // Narrow opcode to transform to + unsigned NarrowOpc2; // Narrow opcode when it's two-address + uint8_t Imm1Limit; // Limit of immediate field (bits) + uint8_t Imm2Limit; // Limit of immediate field when it's two-address + unsigned LowRegs1 : 1; // Only possible if low-registers are used + unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr) + unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa. + // 1 - No cc field. + // 2 - Always set CPSR. + unsigned PredCC2 : 2; + unsigned Special : 1; // Needs to be dealt with specially + }; + + static const ReduceEntry ReduceTable[] = { + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S + { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, + { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, + // Note: immediate scale is 4. + { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0 }, + { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 }, + { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 }, + { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, + { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0 }, + { ARM::t2CMPzri,ARM::tCMPzi8, 0, 8, 0, 1, 0, 2,0, 0 }, + { ARM::t2CMPzrr,ARM::tCMPzhir,0, 0, 0, 0, 0, 2,0, 0 }, + { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, + // FIXME: adr.n immediate offset must be multiple of 4. + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, + { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, + { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, + // FIXME: Do we need the 16-bit 'S' variant? + { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, + { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, + { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 0, 0,1, 0 }, + { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 }, + { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, + { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 }, + { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 }, + { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 }, + { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 }, + + // FIXME: Clean this up after splitting each Thumb load / store opcode + // into multiple ones. + { ARM::t2LDRi12,ARM::tLDR, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRHi12,ARM::tLDRH, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRi12,ARM::tSTR, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRHi12,ARM::tSTRH, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRHs, ARM::tSTRH, 0, 0, 0, 1, 0, 0,0, 1 }, + + { ARM::t2LDM_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2LDM, ARM::tLDM, ARM::tPOP, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2STM, ARM::tSTM, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, + }; + + class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass { + public: + static char ID; + Thumb2SizeReduce(); + + const Thumb2InstrInfo *TII; + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "Thumb2 instruction size reduction pass"; + } + + private: + /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. + DenseMap ReduceOpcodeMap; + + bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, + bool is2Addr, ARMCC::CondCodes Pred, + bool LiveCPSR, bool &HasCC, bool &CCDead); + + bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry); + + bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, bool LiveCPSR); + + /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address + /// instruction. + bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR); + + /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit + /// non-two-address instruction. + bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR); + + /// ReduceMBB - Reduce width of instructions in the specified basic block. + bool ReduceMBB(MachineBasicBlock &MBB); + }; + char Thumb2SizeReduce::ID = 0; +} + +Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) { + for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) { + unsigned FromOpc = ReduceTable[i].WideOpc; + if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second) + assert(false && "Duplicated entries?"); + } +} + +static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { + for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs) + if (*Regs == ARM::CPSR) + return true; + return false; +} + +bool +Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, + bool is2Addr, ARMCC::CondCodes Pred, + bool LiveCPSR, bool &HasCC, bool &CCDead) { + if ((is2Addr && Entry.PredCC2 == 0) || + (!is2Addr && Entry.PredCC1 == 0)) { + if (Pred == ARMCC::AL) { + // Not predicated, must set CPSR. + if (!HasCC) { + // Original instruction was not setting CPSR, but CPSR is not + // currently live anyway. It's ok to set it. The CPSR def is + // dead though. + if (!LiveCPSR) { + HasCC = true; + CCDead = true; + return true; + } + return false; + } + } else { + // Predicated, must not set CPSR. + if (HasCC) + return false; + } + } else if ((is2Addr && Entry.PredCC2 == 2) || + (!is2Addr && Entry.PredCC1 == 2)) { + /// Old opcode has an optional def of CPSR. + if (HasCC) + return true; + // If both old opcode does not implicit CPSR def, then it's not ok since + // these new opcodes CPSR def is not meant to be thrown away. e.g. CMP. + if (!HasImplicitCPSRDef(MI->getDesc())) + return false; + HasCC = true; + } else { + // 16-bit instruction does not set CPSR. + if (HasCC) + return false; + } + + return true; +} + +static bool VerifyLowRegs(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + bool isPCOk = (Opc == ARM::t2LDM_RET) || (Opc == ARM::t2LDM); + bool isLROk = (Opc == ARM::t2STM); + bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || Reg == ARM::CPSR) + continue; + if (isPCOk && Reg == ARM::PC) + continue; + if (isLROk && Reg == ARM::LR) + continue; + if (isSPOk && Reg == ARM::SP) + continue; + if (!isARMLowRegister(Reg)) + return false; + } + return true; +} + +bool +Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry) { + if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt)) + return false; + + unsigned Scale = 1; + bool HasImmOffset = false; + bool HasShift = false; + bool isLdStMul = false; + unsigned Opc = Entry.NarrowOpc1; + unsigned OpNum = 3; // First 'rest' of operands. + switch (Entry.WideOpc) { + default: + llvm_unreachable("Unexpected Thumb2 load / store opcode!"); + case ARM::t2LDRi12: + case ARM::t2STRi12: + Scale = 4; + HasImmOffset = true; + break; + case ARM::t2LDRBi12: + case ARM::t2STRBi12: + HasImmOffset = true; + break; + case ARM::t2LDRHi12: + case ARM::t2STRHi12: + Scale = 2; + HasImmOffset = true; + break; + case ARM::t2LDRs: + case ARM::t2LDRBs: + case ARM::t2LDRHs: + case ARM::t2LDRSBs: + case ARM::t2LDRSHs: + case ARM::t2STRs: + case ARM::t2STRBs: + case ARM::t2STRHs: + HasShift = true; + OpNum = 4; + break; + case ARM::t2LDM_RET: + case ARM::t2LDM: + case ARM::t2STM: { + OpNum = 0; + unsigned BaseReg = MI->getOperand(0).getReg(); + unsigned Mode = MI->getOperand(1).getImm(); + if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) { + Opc = Entry.NarrowOpc2; + OpNum = 2; + } else if (Entry.WideOpc == ARM::t2LDM_RET || + !isARMLowRegister(BaseReg) || + !ARM_AM::getAM4WBFlag(Mode) || + ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) { + return false; + } + isLdStMul = true; + break; + } + } + + unsigned OffsetReg = 0; + bool OffsetKill = false; + if (HasShift) { + OffsetReg = MI->getOperand(2).getReg(); + OffsetKill = MI->getOperand(2).isKill(); + if (MI->getOperand(3).getImm()) + // Thumb1 addressing mode doesn't support shift. + return false; + } + + unsigned OffsetImm = 0; + if (HasImmOffset) { + OffsetImm = MI->getOperand(2).getImm(); + unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale; + if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset) + // Make sure the immediate field fits. + return false; + } + + // Add the 16-bit load / store instruction. + // FIXME: Thumb1 addressing mode encode both immediate and register offset. + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); + if (!isLdStMul) { + MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1)); + if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) { + // tLDRSB and tLDRSH do not have an immediate offset field. On the other + // hand, it must have an offset register. + // FIXME: Remove this special case. + MIB.addImm(OffsetImm/Scale); + } + assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); + + MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); + } + + // Transfer the rest of operands. + for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) + MIB.addOperand(MI->getOperand(OpNum)); + + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); + + MBB.erase(MI); + ++NumLdSts; + return true; +} + +bool +Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR) { + if (Entry.LowRegs1 && !VerifyLowRegs(MI)) + return false; + + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.mayLoad() || TID.mayStore()) + return ReduceLoadStore(MBB, MI, Entry); + + unsigned Opc = MI->getOpcode(); + switch (Opc) { + default: break; + case ARM::t2ADDSri: + case ARM::t2ADDSrr: { + unsigned PredReg = 0; + if (getInstrPredicate(MI, PredReg) == ARMCC::AL) { + switch (Opc) { + default: break; + case ARM::t2ADDSri: { + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) + return true; + // fallthrough + } + case ARM::t2ADDSrr: + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + } + } + break; + } + case ARM::t2RSBri: + case ARM::t2RSBSri: + if (MI->getOperand(2).getImm() == 0) + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + break; + } + return false; +} + +bool +Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR) { + + if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) + return false; + + const TargetInstrDesc &TID = MI->getDesc(); + unsigned Reg0 = MI->getOperand(0).getReg(); + unsigned Reg1 = MI->getOperand(1).getReg(); + if (Reg0 != Reg1) + return false; + if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) + return false; + if (Entry.Imm2Limit) { + unsigned Imm = MI->getOperand(2).getImm(); + unsigned Limit = (1 << Entry.Imm2Limit) - 1; + if (Imm > Limit) + return false; + } else { + unsigned Reg2 = MI->getOperand(2).getReg(); + if (Entry.LowRegs2 && !isARMLowRegister(Reg2)) + return false; + } + + // Check if it's possible / necessary to transfer the predicate. + const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + bool SkipPred = false; + if (Pred != ARMCC::AL) { + if (!NewTID.isPredicable()) + // Can't transfer predicate, fail. + return false; + } else { + SkipPred = !NewTID.isPredicable(); + } + + bool HasCC = false; + bool CCDead = false; + if (TID.hasOptionalDef()) { + unsigned NumOps = TID.getNumOperands(); + HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); + if (HasCC && MI->getOperand(NumOps-1).isDead()) + CCDead = true; + } + if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) + return false; + + // Add the 16-bit instruction. + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); + MIB.addOperand(MI->getOperand(0)); + if (NewTID.hasOptionalDef()) { + if (HasCC) + AddDefaultT1CC(MIB, CCDead); + else + AddNoT1CC(MIB); + } + + // Transfer the rest of operands. + unsigned NumOps = TID.getNumOperands(); + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + if (i < NumOps && TID.OpInfo[i].isOptionalDef()) + continue; + if (SkipPred && TID.OpInfo[i].isPredicate()) + continue; + MIB.addOperand(MI->getOperand(i)); + } + + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); + + MBB.erase(MI); + ++Num2Addrs; + return true; +} + +bool +Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, + const ReduceEntry &Entry, + bool LiveCPSR) { + if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) + return false; + + unsigned Limit = ~0U; + unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1; + if (Entry.Imm1Limit) + Limit = ((1 << Entry.Imm1Limit) - 1) * Scale; + + const TargetInstrDesc &TID = MI->getDesc(); + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) { + if (TID.OpInfo[i].isPredicate()) + continue; + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (!Reg || Reg == ARM::CPSR) + continue; + if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP) + continue; + if (Entry.LowRegs1 && !isARMLowRegister(Reg)) + return false; + } else if (MO.isImm() && + !TID.OpInfo[i].isPredicate()) { + if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0) + return false; + } + } + + // Check if it's possible / necessary to transfer the predicate. + const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + bool SkipPred = false; + if (Pred != ARMCC::AL) { + if (!NewTID.isPredicable()) + // Can't transfer predicate, fail. + return false; + } else { + SkipPred = !NewTID.isPredicable(); + } + + bool HasCC = false; + bool CCDead = false; + if (TID.hasOptionalDef()) { + unsigned NumOps = TID.getNumOperands(); + HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); + if (HasCC && MI->getOperand(NumOps-1).isDead()) + CCDead = true; + } + if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) + return false; + + // Add the 16-bit instruction. + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); + MIB.addOperand(MI->getOperand(0)); + if (NewTID.hasOptionalDef()) { + if (HasCC) + AddDefaultT1CC(MIB, CCDead); + else + AddNoT1CC(MIB); + } + + // Transfer the rest of operands. + unsigned NumOps = TID.getNumOperands(); + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + if (i < NumOps && TID.OpInfo[i].isOptionalDef()) + continue; + if ((TID.getOpcode() == ARM::t2RSBSri || + TID.getOpcode() == ARM::t2RSBri) && i == 2) + // Skip the zero immediate operand, it's now implicit. + continue; + bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate()); + if (SkipPred && isPred) + continue; + const MachineOperand &MO = MI->getOperand(i); + if (Scale > 1 && !isPred && MO.isImm()) + MIB.addImm(MO.getImm() / Scale); + else { + if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR) + // Skip implicit def of CPSR. Either it's modeled as an optional + // def now or it's already an implicit def on the new instruction. + continue; + MIB.addOperand(MO); + } + } + if (!TID.isPredicable() && NewTID.isPredicable()) + AddDefaultPred(MIB); + + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); + + MBB.erase(MI); + ++NumNarrows; + return true; +} + +static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { + bool HasDef = false; + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + if (!MO.isDead()) + HasDef = true; + } + + return HasDef || LiveCPSR; +} + +static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isDef()) + continue; + if (MO.getReg() != ARM::CPSR) + continue; + assert(LiveCPSR && "CPSR liveness tracking is wrong!"); + if (MO.isKill()) { + LiveCPSR = false; + break; + } + } + + return LiveCPSR; +} + +bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + bool LiveCPSR = false; + // Yes, CPSR could be livein. + for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(), + E = MBB.livein_end(); I != E; ++I) { + if (*I == ARM::CPSR) { + LiveCPSR = true; + break; + } + } + + MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); + MachineBasicBlock::iterator NextMII; + for (; MII != E; MII = NextMII) { + NextMII = next(MII); + + MachineInstr *MI = &*MII; + LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR); + + unsigned Opcode = MI->getOpcode(); + DenseMap::iterator OPI = ReduceOpcodeMap.find(Opcode); + if (OPI != ReduceOpcodeMap.end()) { + const ReduceEntry &Entry = ReduceTable[OPI->second]; + // Ignore "special" cases for now. + if (Entry.Special) { + if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) { + Modified = true; + MachineBasicBlock::iterator I = prior(NextMII); + MI = &*I; + } + goto ProcessNext; + } + + // Try to transform to a 16-bit two-address instruction. + if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { + Modified = true; + MachineBasicBlock::iterator I = prior(NextMII); + MI = &*I; + goto ProcessNext; + } + + // Try to transform ro a 16-bit non-two-address instruction. + if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { + Modified = true; + MachineBasicBlock::iterator I = prior(NextMII); + MI = &*I; + } + } + + ProcessNext: + LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR); + } + + return Modified; +} + +bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { + const TargetMachine &TM = MF.getTarget(); + TII = static_cast(TM.getInstrInfo()); + + bool Modified = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Modified |= ReduceMBB(*I); + return Modified; +} + +/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size +/// reduction pass. +FunctionPass *llvm::createThumb2SizeReductionPass() { + return new Thumb2SizeReduce(); +} diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h index 0818e25b33a20..b8a06459e1cde 100644 --- a/lib/Target/Alpha/Alpha.h +++ b/lib/Target/Alpha/Alpha.h @@ -22,20 +22,22 @@ namespace llvm { class AlphaTargetMachine; class FunctionPass; class MachineCodeEmitter; - class raw_ostream; + class ObjectCodeEmitter; + class formatted_raw_ostream; FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM); - FunctionPass *createAlphaCodePrinterPass(raw_ostream &OS, - TargetMachine &TM, - bool Verbose); FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM); FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM, MachineCodeEmitter &MCE); FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM, - JITCodeEmitter &JCE); + JITCodeEmitter &JCE); + FunctionPass *createAlphaObjectCodeEmitterPass(AlphaTargetMachine &TM, + ObjectCodeEmitter &OCE); FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm); FunctionPass *createAlphaBranchSelectionPass(); + extern Target TheAlphaTarget; + } // end namespace llvm; // Defines symbolic names for Alpha registers. This defines a mapping from diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td index e3748c6a09f39..6efdf554e1760 100644 --- a/lib/Target/Alpha/Alpha.td +++ b/lib/Target/Alpha/Alpha.td @@ -29,6 +29,12 @@ def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true", include "AlphaRegisterInfo.td" +//===----------------------------------------------------------------------===// +// Calling Convention Description +//===----------------------------------------------------------------------===// + +include "AlphaCallingConv.td" + //===----------------------------------------------------------------------===// // Schedule Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp index aca8ca7348978..719ffaec3eaff 100644 --- a/lib/Target/Alpha/AlphaBranchSelector.cpp +++ b/lib/Target/Alpha/AlphaBranchSelector.cpp @@ -17,7 +17,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" using namespace llvm; namespace { diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td new file mode 100644 index 0000000000000..38ada69c2425e --- /dev/null +++ b/lib/Target/Alpha/AlphaCallingConv.td @@ -0,0 +1,37 @@ +//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for Alpha architecture. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Alpha Return Value Calling Convention +//===----------------------------------------------------------------------===// +def RetCC_Alpha : CallingConv<[ + // i64 is returned in register R0 + CCIfType<[i64], CCAssignToReg<[R0]>>, + + // f32 / f64 are returned in F0/F1 + CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>> +]>; + +//===----------------------------------------------------------------------===// +// Alpha Argument Calling Conventions +//===----------------------------------------------------------------------===// +def CC_Alpha : CallingConv<[ + // The first 6 arguments are passed in registers, whether integer or + // floating-point + CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21], + [F16, F17, F18, F19, F20, F21]>>, + + CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21], + [R16, R17, R18, R19, R20, R21]>>, + + // Stack slots are 8 bytes in size and 8-byte aligned. + CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>> +]>; diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp index f50f007c20764..8023add979142 100644 --- a/lib/Target/Alpha/AlphaCodeEmitter.cpp +++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp @@ -19,16 +19,19 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { - + class AlphaCodeEmitter { MachineCodeEmitter &MCE; public: @@ -57,7 +60,7 @@ namespace { public: static char ID; explicit Emitter(TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(&ID), AlphaCodeEmitter(mce), + : MachineFunctionPass(&ID), AlphaCodeEmitter(mce), II(0), TM(tm), MCE(mce) {} Emitter(TargetMachine &tm, CodeEmitter &mce, const AlphaInstrInfo& ii) : MachineFunctionPass(&ID), AlphaCodeEmitter(mce), @@ -69,8 +72,6 @@ namespace { return "Alpha Machine Code Emitter"; } - void emitInstruction(const MachineInstr &MI); - private: void emitBasicBlock(MachineBasicBlock &MBB); }; @@ -91,6 +92,10 @@ FunctionPass *llvm::createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM, JITCodeEmitter &JCE) { return new Emitter(TM, JCE); } +FunctionPass *llvm::createAlphaObjectCodeEmitterPass(AlphaTargetMachine &TM, + ObjectCodeEmitter &OCE) { + return new Emitter(TM, OCE); +} template bool Emitter::runOnMachineFunction(MachineFunction &MF) { @@ -111,6 +116,7 @@ void Emitter::emitBasicBlock(MachineBasicBlock &MBB) { for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { const MachineInstr &MI = *I; + MCE.processDebugLoc(MI.getDebugLoc(), true); switch(MI.getOpcode()) { default: MCE.emitWordLE(getBinaryCodeForInstr(*I)); @@ -119,8 +125,10 @@ void Emitter::emitBasicBlock(MachineBasicBlock &MBB) { case Alpha::PCLABEL: case Alpha::MEMLABEL: case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::KILL: break; //skip these } + MCE.processDebugLoc(MI.getDebugLoc(), false); } } @@ -159,13 +167,12 @@ static unsigned getAlphaRegNumber(unsigned Reg) { case Alpha::R30 : case Alpha::F30 : return 30; case Alpha::R31 : case Alpha::F31 : return 31; default: - assert(0 && "Unhandled reg"); - abort(); + llvm_unreachable("Unhandled reg"); } } unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) { + const MachineOperand &MO) { unsigned rv = 0; // Return value; defaults to 0 for unhandled cases // or things that get fixed up later by the JIT. @@ -175,7 +182,7 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, } else if (MO.isImm()) { rv = MO.getImm(); } else if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) { - DOUT << MO << " is a relocated op for " << MI << "\n"; + DEBUG(errs() << MO << " is a relocated op for " << MI << "\n"); unsigned Reloc = 0; int Offset = 0; bool useGOT = false; @@ -211,8 +218,7 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, Offset = MI.getOperand(3).getImm(); break; default: - assert(0 && "unknown relocatable instruction"); - abort(); + llvm_unreachable("unknown relocatable instruction"); } if (MO.isGlobal()) MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), @@ -229,14 +235,14 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, } else if (MO.isMBB()) { MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), Alpha::reloc_bsr, MO.getMBB())); - }else { - cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; - abort(); + } else { +#ifndef NDEBUG + errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; +#endif + llvm_unreachable(0); } return rv; } #include "AlphaGenCodeEmitter.inc" - - diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index e3f631a1f5be4..e3587fb2c90fb 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -26,9 +26,12 @@ #include "llvm/DerivedTypes.h" #include "llvm/GlobalValue.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -114,7 +117,7 @@ namespace { uint64_t complow = 1 << (63 - at); uint64_t comphigh = 1 << (64 - at); //cerr << x << ":" << complow << ":" << comphigh << "\n"; - if (abs(complow - x) <= abs(comphigh - x)) + if (abs64(complow - x) <= abs64(comphigh - x)) return complow; else return comphigh; @@ -208,7 +211,6 @@ private: /// GOT address into a register. /// SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() { - MachineFunction *MF = BB->getParent(); unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } @@ -216,7 +218,6 @@ SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() { /// getGlobalRetAddr - Grab the return address. /// SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() { - MachineFunction *MF = BB->getParent(); unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF); return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode(); } @@ -269,8 +270,8 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, Chain.getValue(1)); SDNode *CNode = - CurDAG->getTargetNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag, - Chain, Chain.getValue(1)); + CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Flag, + Chain, Chain.getValue(1)); Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, SDValue(CNode, 1)); return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain); @@ -278,8 +279,8 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { case ISD::READCYCLECOUNTER: { SDValue Chain = N->getOperand(0); - return CurDAG->getTargetNode(Alpha::RPCC, dl, MVT::i64, MVT::Other, - Chain); + return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other, + Chain); } case ISD::Constant: { @@ -302,10 +303,11 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true break; //(zext (LDAH (LDA))) //Else use the constant pool - ConstantInt *C = ConstantInt::get(Type::Int64Ty, uval); + ConstantInt *C = ConstantInt::get( + Type::getInt64Ty(*CurDAG->getContext()), uval); SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64); - SDNode *Tmp = CurDAG->getTargetNode(Alpha::LDAHr, dl, MVT::i64, CPI, - SDValue(getGlobalBaseReg(), 0)); + SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI, + SDValue(getGlobalBaseReg(), 0)); return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, CPI, SDValue(Tmp, 0), CurDAG->getEntryNode()); } @@ -313,7 +315,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { case ISD::ConstantFP: { ConstantFPSDNode *CN = cast(N); bool isDouble = N->getValueType(0) == MVT::f64; - MVT T = isDouble ? MVT::f64 : MVT::f32; + EVT T = isDouble ? MVT::f64 : MVT::f32; if (CN->getValueAPF().isPosZero()) { return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS, T, CurDAG->getRegister(Alpha::F31, T), @@ -323,7 +325,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { T, CurDAG->getRegister(Alpha::F31, T), CurDAG->getRegister(Alpha::F31, T)); } else { - abort(); + llvm_report_error("Unhandled FP constant type"); } break; } @@ -336,7 +338,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { bool rev = false; bool inv = false; switch(CC) { - default: DEBUG(N->dump(CurDAG)); assert(0 && "Unknown FP comparison!"); + default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!"); case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ: Opc = Alpha::CMPTEQ; break; case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT: @@ -356,48 +358,29 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { }; SDValue tmp1 = N->getOperand(rev?1:0); SDValue tmp2 = N->getOperand(rev?0:1); - SDNode *cmp = CurDAG->getTargetNode(Opc, dl, MVT::f64, tmp1, tmp2); + SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2); if (inv) - cmp = CurDAG->getTargetNode(Alpha::CMPTEQ, dl, - MVT::f64, SDValue(cmp, 0), - CurDAG->getRegister(Alpha::F31, MVT::f64)); + cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl, + MVT::f64, SDValue(cmp, 0), + CurDAG->getRegister(Alpha::F31, MVT::f64)); switch(CC) { case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE: case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE: { - SDNode* cmp2 = CurDAG->getTargetNode(Alpha::CMPTUN, dl, MVT::f64, - tmp1, tmp2); - cmp = CurDAG->getTargetNode(Alpha::ADDT, dl, MVT::f64, - SDValue(cmp2, 0), SDValue(cmp, 0)); + SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64, + tmp1, tmp2); + cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64, + SDValue(cmp2, 0), SDValue(cmp, 0)); break; } default: break; } - SDNode* LD = CurDAG->getTargetNode(Alpha::FTOIT, dl, - MVT::i64, SDValue(cmp, 0)); - return CurDAG->getTargetNode(Alpha::CMPULT, dl, MVT::i64, - CurDAG->getRegister(Alpha::R31, MVT::i64), - SDValue(LD,0)); - } - break; - - case ISD::SELECT: - if (N->getValueType(0).isFloatingPoint() && - (N->getOperand(0).getOpcode() != ISD::SETCC || - !N->getOperand(0).getOperand(1).getValueType().isFloatingPoint())) { - //This should be the condition not covered by the Patterns - //FIXME: Don't have SelectCode die, but rather return something testable - // so that things like this can be caught in fall though code - //move int to fp - bool isDouble = N->getValueType(0) == MVT::f64; - SDValue cond = N->getOperand(0); - SDValue TV = N->getOperand(1); - SDValue FV = N->getOperand(2); - - SDNode* LD = CurDAG->getTargetNode(Alpha::ITOFT, dl, MVT::f64, cond); - return CurDAG->getTargetNode(isDouble?Alpha::FCMOVNET:Alpha::FCMOVNES, - dl, MVT::f64, FV, TV, SDValue(LD,0)); + SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl, + MVT::i64, SDValue(cmp, 0)); + return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64, + CurDAG->getRegister(Alpha::R31, MVT::i64), + SDValue(LD,0)); } break; @@ -422,11 +405,11 @@ SDNode *AlphaDAGToDAGISel::Select(SDValue Op) { if (get_zapImm(mask)) { SDValue Z = - SDValue(CurDAG->getTargetNode(Alpha::ZAPNOTi, dl, MVT::i64, - N->getOperand(0).getOperand(0), - getI64Imm(get_zapImm(mask))), 0); - return CurDAG->getTargetNode(Alpha::SRLr, dl, MVT::i64, Z, - getI64Imm(sval)); + SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64, + N->getOperand(0).getOperand(0), + getI64Imm(get_zapImm(mask))), 0); + return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z, + getI64Imm(sval)); } } break; @@ -443,95 +426,26 @@ void AlphaDAGToDAGISel::SelectCALL(SDValue Op) { SDNode *N = Op.getNode(); SDValue Chain = N->getOperand(0); SDValue Addr = N->getOperand(1); - SDValue InFlag(0,0); // Null incoming flag value. + SDValue InFlag = N->getOperand(N->getNumOperands() - 1); DebugLoc dl = N->getDebugLoc(); - std::vector CallOperands; - std::vector TypeOperands; - - //grab the arguments - for(int i = 2, e = N->getNumOperands(); i < e; ++i) { - TypeOperands.push_back(N->getOperand(i).getValueType()); - CallOperands.push_back(N->getOperand(i)); - } - int count = N->getNumOperands() - 2; - - static const unsigned args_int[] = {Alpha::R16, Alpha::R17, Alpha::R18, - Alpha::R19, Alpha::R20, Alpha::R21}; - static const unsigned args_float[] = {Alpha::F16, Alpha::F17, Alpha::F18, - Alpha::F19, Alpha::F20, Alpha::F21}; - - for (int i = 6; i < count; ++i) { - unsigned Opc = Alpha::WTF; - if (TypeOperands[i].isInteger()) { - Opc = Alpha::STQ; - } else if (TypeOperands[i] == MVT::f32) { - Opc = Alpha::STS; - } else if (TypeOperands[i] == MVT::f64) { - Opc = Alpha::STT; - } else - assert(0 && "Unknown operand"); - - SDValue Ops[] = { CallOperands[i], getI64Imm((i - 6) * 8), - CurDAG->getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64), - Chain }; - Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 4), 0); - } - for (int i = 0; i < std::min(6, count); ++i) { - if (TypeOperands[i].isInteger()) { - Chain = CurDAG->getCopyToReg(Chain, dl, args_int[i], - CallOperands[i], InFlag); - InFlag = Chain.getValue(1); - } else if (TypeOperands[i] == MVT::f32 || TypeOperands[i] == MVT::f64) { - Chain = CurDAG->getCopyToReg(Chain, dl, args_float[i], - CallOperands[i], InFlag); - InFlag = Chain.getValue(1); - } else - assert(0 && "Unknown operand"); - } - - // Finally, once everything is in registers to pass to the call, emit the - // call itself. if (Addr.getOpcode() == AlphaISD::GPRelLo) { SDValue GOT = SDValue(getGlobalBaseReg(), 0); Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag); InFlag = Chain.getValue(1); - Chain = SDValue(CurDAG->getTargetNode(Alpha::BSR, dl, MVT::Other, - MVT::Flag, Addr.getOperand(0), - Chain, InFlag), 0); + Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other, + MVT::Flag, Addr.getOperand(0), + Chain, InFlag), 0); } else { Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag); InFlag = Chain.getValue(1); - Chain = SDValue(CurDAG->getTargetNode(Alpha::JSR, dl, MVT::Other, - MVT::Flag, Chain, InFlag), 0); + Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other, + MVT::Flag, Chain, InFlag), 0); } InFlag = Chain.getValue(1); - std::vector CallResults; - - switch (N->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unexpected ret value!"); - case MVT::Other: break; - case MVT::i64: - Chain = CurDAG->getCopyFromReg(Chain, dl, - Alpha::R0, MVT::i64, InFlag).getValue(1); - CallResults.push_back(Chain.getValue(0)); - break; - case MVT::f32: - Chain = CurDAG->getCopyFromReg(Chain, dl, - Alpha::F0, MVT::f32, InFlag).getValue(1); - CallResults.push_back(Chain.getValue(0)); - break; - case MVT::f64: - Chain = CurDAG->getCopyFromReg(Chain, dl, - Alpha::F0, MVT::f64, InFlag).getValue(1); - CallResults.push_back(Chain.getValue(0)); - break; - } - - CallResults.push_back(Chain); - for (unsigned i = 0, e = CallResults.size(); i != e; ++i) - ReplaceUses(Op.getValue(i), CallResults[i]); + ReplaceUses(Op.getValue(0), Chain); + ReplaceUses(Op.getValue(1), InFlag); } diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index fa0b65609fba6..b3f865cf4a83a 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -13,17 +13,22 @@ #include "AlphaISelLowering.h" #include "AlphaTargetMachine.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/Intrinsics.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; /// AddLiveIn - This helper function adds the specified physical register to the @@ -37,14 +42,15 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, return VReg; } -AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) { +AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) + : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the TargetLowering object. //I am having problems with shr n i8 1 setShiftAmountType(MVT::i64); setBooleanContents(ZeroOrOneBooleanContent); - + setUsesGlobalOffsetTable(true); - + addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass); addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass); @@ -54,24 +60,26 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand); - + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); - + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); @@ -85,7 +93,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::BSWAP , MVT::i64, Expand); setOperationAction(ISD::ROTL , MVT::i64, Expand); setOperationAction(ISD::ROTR , MVT::i64, Expand); - + setOperationAction(ISD::SREM , MVT::i64, Custom); setOperationAction(ISD::UREM , MVT::i64, Custom); setOperationAction(ISD::SDIV , MVT::i64, Custom); @@ -99,6 +107,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); // We don't support sin/cos/sqrt/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); @@ -123,7 +134,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // Not implemented yet. - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); @@ -141,8 +152,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::i32, Custom); - setOperationAction(ISD::RET, MVT::Other, Custom); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); @@ -159,7 +168,7 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) computeRegisterProperties(); } -MVT AlphaTargetLowering::getSetCCResultType(MVT VT) const { +MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i64; } @@ -187,13 +196,13 @@ unsigned AlphaTargetLowering::getFunctionAlignment(const Function *F) const { } static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); - + SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, JTI, DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi); @@ -219,43 +228,205 @@ static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) { // //#define GP $29 // //#define SP $30 -static SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, - int &VarArgsBase, - int &VarArgsOffset) { +#include "AlphaGenCallingConv.inc" + +SDValue +AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CC_Alpha); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, + getPointerTy(), true)); + + SmallVector, 4> RegsToPass; + SmallVector MemOpChains; + SDValue StackPtr; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + SDValue Arg = Outs[i].Val; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + // Arguments that can be passed on register must be kept at RegsToPass + // vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64); + + SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), + StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset())); + + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), 0)); + } + } + + // Transform all store nodes into one single node because all store nodes are + // independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain and + // flag operands which copy the outgoing args into registers. The InFlag in + // necessary since all emited instructions must be stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, getPointerTy(), true), + DAG.getConstant(0, getPointerTy(), true), + InFlag); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); +} + +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +/// +SDValue +AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, + *DAG.getContext()); + + CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), + VA.getLocVT(), InFlag).getValue(1); + SDValue RetValue = Chain.getValue(0); + InFlag = Chain.getValue(2); + + // If this is an 8/16/32-bit value, it is really passed promoted to 64 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue); + + InVals.push_back(RetValue); + } + + return Chain; +} + +SDValue +AlphaTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - std::vector ArgValues; - SDValue Root = Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); unsigned args_int[] = { Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21}; unsigned args_float[] = { Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21}; - - for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; ++ArgNo) { + + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue argt; - MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + EVT ObjectVT = Ins[ArgNo].VT; SDValue ArgVal; if (ArgNo < 6) { - switch (ObjectVT.getSimpleVT()) { + switch (ObjectVT.getSimpleVT().SimpleTy) { default: assert(false && "Invalid value type!"); case MVT::f64: - args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], + args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], &Alpha::F8RCRegClass); - ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT); + ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT); break; case MVT::f32: - args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], + args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], &Alpha::F4RCRegClass); - ArgVal = DAG.getCopyFromReg(Root, dl, args_float[ArgNo], ObjectVT); + ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT); break; case MVT::i64: - args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], + args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], &Alpha::GPRCRegClass); - ArgVal = DAG.getCopyFromReg(Root, dl, args_int[ArgNo], MVT::i64); + ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64); break; } } else { //more args @@ -265,60 +436,58 @@ static SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); - ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); } - ArgValues.push_back(ArgVal); + InVals.push_back(ArgVal); } // If the functions takes variable number of arguments, copy all regs to stack - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; if (isVarArg) { - VarArgsOffset = (Op.getNode()->getNumValues()-1) * 8; + VarArgsOffset = Ins.size() * 8; std::vector LS; for (int i = 0; i < 6; ++i) { if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); - SDValue argt = DAG.getCopyFromReg(Root, dl, args_int[i], MVT::i64); + SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); int FI = MFI->CreateFixedObject(8, -8 * (6 - i)); if (i == 0) VarArgsBase = FI; SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0)); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); - argt = DAG.getCopyFromReg(Root, dl, args_float[i], MVT::f64); + argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); FI = MFI->CreateFixedObject(8, - 8 * (12 - i)); SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Root, dl, argt, SDFI, NULL, 0)); + LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); } //Set up a token factor with all the stack traffic - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size()); } - ArgValues.push_back(Root); - - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()); + return Chain; } -static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - SDValue Copy = DAG.getCopyToReg(Op.getOperand(0), dl, Alpha::R26, - DAG.getNode(AlphaISD::GlobalRetAddr, - DebugLoc::getUnknownLoc(), - MVT::i64), - SDValue()); - switch (Op.getNumOperands()) { +SDValue +AlphaTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + + SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26, + DAG.getNode(AlphaISD::GlobalRetAddr, + DebugLoc::getUnknownLoc(), + MVT::i64), + SDValue()); + switch (Outs.size()) { default: - assert(0 && "Do not know how to return this many arguments!"); - abort(); - case 1: + llvm_unreachable("Do not know how to return this many arguments!"); + case 0: break; //return SDValue(); // ret void is legal - case 3: { - MVT ArgVT = Op.getOperand(1).getValueType(); + case 1: { + EVT ArgVT = Outs[0].Val.getValueType(); unsigned ArgReg; if (ArgVT.isInteger()) ArgReg = Alpha::R0; @@ -326,14 +495,14 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { assert(ArgVT.isFloatingPoint()); ArgReg = Alpha::F0; } - Copy = DAG.getCopyToReg(Copy, dl, ArgReg, - Op.getOperand(1), Copy.getValue(1)); + Copy = DAG.getCopyToReg(Copy, dl, ArgReg, + Outs[0].Val, Copy.getValue(1)); if (DAG.getMachineFunction().getRegInfo().liveout_empty()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg); break; } - case 5: { - MVT ArgVT = Op.getOperand(1).getValueType(); + case 2: { + EVT ArgVT = Outs[0].Val.getValueType(); unsigned ArgReg1, ArgReg2; if (ArgVT.isInteger()) { ArgReg1 = Alpha::R0; @@ -343,104 +512,25 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { ArgReg1 = Alpha::F0; ArgReg2 = Alpha::F1; } - Copy = DAG.getCopyToReg(Copy, dl, ArgReg1, - Op.getOperand(1), Copy.getValue(1)); - if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), + Copy = DAG.getCopyToReg(Copy, dl, ArgReg1, + Outs[0].Val, Copy.getValue(1)); + if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1) == DAG.getMachineFunction().getRegInfo().liveout_end()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1); - Copy = DAG.getCopyToReg(Copy, dl, ArgReg2, - Op.getOperand(3), Copy.getValue(1)); - if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), + Copy = DAG.getCopyToReg(Copy, dl, ArgReg2, + Outs[1].Val, Copy.getValue(1)); + if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2) == DAG.getMachineFunction().getRegInfo().liveout_end()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2); break; } } - return DAG.getNode(AlphaISD::RET_FLAG, dl, + return DAG.getNode(AlphaISD::RET_FLAG, dl, MVT::Other, Copy, Copy.getValue(1)); } -std::pair -AlphaTargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, - bool RetSExt, bool RetZExt, bool isVarArg, - bool isInreg, unsigned NumFixedArgs, - unsigned CallingConv, - bool isTailCall, SDValue Callee, - ArgListTy &Args, SelectionDAG &DAG, - DebugLoc dl) { - int NumBytes = 0; - if (Args.size() > 6) - NumBytes = (Args.size() - 6) * 8; - - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); - std::vector args_to_use; - for (unsigned i = 0, e = Args.size(); i != e; ++i) - { - switch (getValueType(Args[i].Ty).getSimpleVT()) { - default: assert(0 && "Unexpected ValueType for argument!"); - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - // Promote the integer to 64 bits. If the input type is signed use a - // sign extend, otherwise use a zero extend. - if (Args[i].isSExt) - Args[i].Node = DAG.getNode(ISD::SIGN_EXTEND, dl, - MVT::i64, Args[i].Node); - else if (Args[i].isZExt) - Args[i].Node = DAG.getNode(ISD::ZERO_EXTEND, dl, - MVT::i64, Args[i].Node); - else - Args[i].Node = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Args[i].Node); - break; - case MVT::i64: - case MVT::f64: - case MVT::f32: - break; - } - args_to_use.push_back(Args[i].Node); - } - - std::vector RetVals; - MVT RetTyVT = getValueType(RetTy); - MVT ActualRetTyVT = RetTyVT; - if (RetTyVT.getSimpleVT() >= MVT::i1 && RetTyVT.getSimpleVT() <= MVT::i32) - ActualRetTyVT = MVT::i64; - - if (RetTyVT != MVT::isVoid) - RetVals.push_back(ActualRetTyVT); - RetVals.push_back(MVT::Other); - - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end()); - SDValue TheCall = DAG.getNode(AlphaISD::CALL, dl, - RetVals, &Ops[0], Ops.size()); - Chain = TheCall.getValue(RetTyVT != MVT::isVoid); - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), SDValue()); - SDValue RetVal = TheCall; - - if (RetTyVT != ActualRetTyVT) { - ISD::NodeType AssertKind = ISD::DELETED_NODE; - if (RetSExt) - AssertKind = ISD::AssertSext; - else if (RetZExt) - AssertKind = ISD::AssertZext; - - if (AssertKind != ISD::DELETED_NODE) - RetVal = DAG.getNode(AssertKind, dl, MVT::i64, RetVal, - DAG.getValueType(RetTyVT)); - - RetVal = DAG.getNode(ISD::TRUNCATE, dl, RetTyVT, RetVal); - } - - return std::make_pair(RetVal, Chain); -} - void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, SelectionDAG &DAG) { Chain = N->getOperand(0); @@ -475,12 +565,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); switch (Op.getOpcode()) { - default: assert(0 && "Wasn't expecting to be able to lower this!"); - case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, - VarArgsBase, - VarArgsOffset); - - case ISD::RET: return LowerRET(Op,DAG); + default: llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: { @@ -488,11 +573,40 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (IntNo) { default: break; // Don't custom lower most intrinsics. case Intrinsic::alpha_umulh: - return DAG.getNode(ISD::MULHU, dl, MVT::i64, + return DAG.getNode(ISD::MULHU, dl, MVT::i64, Op.getOperand(1), Op.getOperand(2)); } } + case ISD::SRL_PARTS: { + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(64, MVT::i64), ShAmt); + SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm, + DAG.getConstant(0, MVT::i64), ISD::SETLE); + // if 64 - shAmt <= 0 + SDValue Hi_Neg = DAG.getConstant(0, MVT::i64); + SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(0, MVT::i64), bm); + SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg); + // else + SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm); + SDValue Hi_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt); + SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt); + Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries); + // Merge + SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos); + SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos); + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, 2, dl); + } + // case ISD::SRA_PARTS: + + // case ISD::SHL_PARTS: + + case ISD::SINT_TO_FP: { assert(Op.getOperand(0).getValueType() == MVT::i64 && "Unhandled SINT_TO_FP type in custom expander!"); @@ -509,7 +623,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { if (!isDouble) //Promote src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src); - + src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src); return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, src); @@ -519,14 +633,14 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment()); // FIXME there isn't really any debug info here - + SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, CPI, DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi); return Lo; } case ISD::GlobalTLSAddress: - assert(0 && "TLS not implemented for Alpha."); + llvm_unreachable("TLS not implemented for Alpha."); case ISD::GlobalAddress: { GlobalAddressSDNode *GSDN = cast(Op); GlobalValue *GV = GSDN->getGlobal(); @@ -540,11 +654,11 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi); return Lo; } else - return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA, + return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA, DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); } case ISD::ExternalSymbol: { - return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, + return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, DAG.getTargetExternalSymbol(cast(Op) ->getSymbol(), MVT::i64), DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); @@ -554,7 +668,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SREM: //Expand only on constant case if (Op.getOperand(1).getOpcode() == ISD::Constant) { - MVT VT = Op.getNode()->getValueType(0); + EVT VT = Op.getNode()->getValueType(0); SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ? BuildUDIV(Op.getNode(), DAG, NULL) : BuildSDIV(Op.getNode(), DAG, NULL); @@ -567,7 +681,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::UDIV: if (Op.getValueType().isInteger()) { if (Op.getOperand(1).getOpcode() == ISD::Constant) - return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL) + return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL) : BuildUDIV(Op.getNode(), DAG, NULL); const char* opstr = 0; switch (Op.getOpcode()) { @@ -601,12 +715,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { SDValue SrcP = Op.getOperand(2); const Value *DestS = cast(Op.getOperand(3))->getValue(); const Value *SrcS = cast(Op.getOperand(4))->getValue(); - + SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0); SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0); - SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, + SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, DAG.getConstant(8, MVT::i64)); - Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, + Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, NP, NULL,0, MVT::i32); SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, DAG.getConstant(8, MVT::i64)); @@ -616,7 +730,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Op.getOperand(0); SDValue VAListP = Op.getOperand(1); const Value *VAListS = cast(Op.getOperand(2))->getValue(); - + // vastart stores the address of the VarArgsBase and VarArgsOffset SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64); SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0); @@ -625,13 +739,13 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64), SA2, NULL, 0, MVT::i32); } - case ISD::RETURNADDR: + case ISD::RETURNADDR: return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(), MVT::i64); //FIXME: implement case ISD::FRAMEADDR: break; } - + return SDValue(); } @@ -655,7 +769,7 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. -AlphaTargetLowering::ConstraintType +AlphaTargetLowering::ConstraintType AlphaTargetLowering::getConstraintType(const std::string &Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { @@ -670,37 +784,37 @@ AlphaTargetLowering::getConstraintType(const std::string &Constraint) const { std::vector AlphaTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; // Unknown constriant letter - case 'f': + case 'f': return make_vector(Alpha::F0 , Alpha::F1 , Alpha::F2 , Alpha::F3 , Alpha::F4 , Alpha::F5 , - Alpha::F6 , Alpha::F7 , Alpha::F8 , - Alpha::F9 , Alpha::F10, Alpha::F11, - Alpha::F12, Alpha::F13, Alpha::F14, - Alpha::F15, Alpha::F16, Alpha::F17, - Alpha::F18, Alpha::F19, Alpha::F20, - Alpha::F21, Alpha::F22, Alpha::F23, - Alpha::F24, Alpha::F25, Alpha::F26, - Alpha::F27, Alpha::F28, Alpha::F29, + Alpha::F6 , Alpha::F7 , Alpha::F8 , + Alpha::F9 , Alpha::F10, Alpha::F11, + Alpha::F12, Alpha::F13, Alpha::F14, + Alpha::F15, Alpha::F16, Alpha::F17, + Alpha::F18, Alpha::F19, Alpha::F20, + Alpha::F21, Alpha::F22, Alpha::F23, + Alpha::F24, Alpha::F25, Alpha::F26, + Alpha::F27, Alpha::F28, Alpha::F29, Alpha::F30, Alpha::F31, 0); - case 'r': - return make_vector(Alpha::R0 , Alpha::R1 , Alpha::R2 , - Alpha::R3 , Alpha::R4 , Alpha::R5 , - Alpha::R6 , Alpha::R7 , Alpha::R8 , - Alpha::R9 , Alpha::R10, Alpha::R11, - Alpha::R12, Alpha::R13, Alpha::R14, - Alpha::R15, Alpha::R16, Alpha::R17, - Alpha::R18, Alpha::R19, Alpha::R20, - Alpha::R21, Alpha::R22, Alpha::R23, - Alpha::R24, Alpha::R25, Alpha::R26, - Alpha::R27, Alpha::R28, Alpha::R29, + case 'r': + return make_vector(Alpha::R0 , Alpha::R1 , Alpha::R2 , + Alpha::R3 , Alpha::R4 , Alpha::R5 , + Alpha::R6 , Alpha::R7 , Alpha::R8 , + Alpha::R9 , Alpha::R10, Alpha::R11, + Alpha::R12, Alpha::R13, Alpha::R14, + Alpha::R15, Alpha::R16, Alpha::R17, + Alpha::R18, Alpha::R19, Alpha::R20, + Alpha::R21, Alpha::R22, Alpha::R23, + Alpha::R24, Alpha::R25, Alpha::R26, + Alpha::R27, Alpha::R28, Alpha::R29, Alpha::R30, Alpha::R31, 0); } } - + return std::vector(); } //===----------------------------------------------------------------------===// @@ -709,7 +823,8 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, MachineBasicBlock * AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); assert((MI->getOpcode() == Alpha::CAS32 || MI->getOpcode() == Alpha::CAS64 || @@ -719,10 +834,10 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == Alpha::SWAP64) && "Unexpected instr type to insert"); - bool is32 = MI->getOpcode() == Alpha::CAS32 || + bool is32 = MI->getOpcode() == Alpha::CAS32 || MI->getOpcode() == Alpha::LAS32 || MI->getOpcode() == Alpha::SWAP32; - + //Load locked store conditional for atomic ops take on the same form //start: //ll @@ -734,30 +849,35 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); MachineFunction::iterator It = BB; ++It; - + MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + // Inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->transferSuccessors(thisMBB); F->insert(It, llscMBB); F->insert(It, sinkMBB); BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB); - + unsigned reg_res = MI->getOperand(0).getReg(), reg_ptr = MI->getOperand(1).getReg(), reg_v2 = MI->getOperand(2).getReg(), reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); - BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), + BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), reg_res).addImm(0).addReg(reg_ptr); switch (MI->getOpcode()) { case Alpha::CAS32: case Alpha::CAS64: { - unsigned reg_cmp + unsigned reg_cmp = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp) .addReg(reg_v2).addReg(reg_res); diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index 492536735454d..b580c9d71264a 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -62,12 +62,11 @@ namespace llvm { class AlphaTargetLowering : public TargetLowering { int VarArgsOffset; // What is the offset to the first vaarg int VarArgsBase; // What is the base FrameIndex - bool useITOF; public: explicit AlphaTargetLowering(TargetMachine &TM); /// getSetCCResultType - Get the SETCC result ValueType - virtual MVT getSetCCResultType(MVT VT) const; + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// @@ -82,24 +81,21 @@ namespace llvm { // Friendly names for dumps const char *getTargetNodeName(unsigned Opcode) const; - /// LowerCallTo - This hook lowers an abstract call to a function into an - /// actual call. - virtual std::pair - LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt, - bool isVarArg, bool isInreg, unsigned NumFixedArgs, unsigned CC, - bool isTailCall, SDValue Callee, ArgListTy &Args, - SelectionDAG &DAG, DebugLoc dl); + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); ConstraintType getConstraintType(const std::string &Constraint) const; std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; - - bool hasITOF() { return useITOF; } + EVT VT) const; MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *BB, + DenseMap *EM) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -111,6 +107,26 @@ namespace llvm { void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, SelectionDAG &DAG); + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); }; } diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 76a594fba4567..86173ff2721b5 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; AlphaInstrInfo::AlphaInstrInfo() @@ -200,29 +201,7 @@ AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FrameIdx).addReg(Alpha::F31); else - abort(); -} - -void AlphaInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - unsigned Opc = 0; - if (RC == Alpha::F4RCRegisterClass) - Opc = Alpha::STS; - else if (RC == Alpha::F8RCRegisterClass) - Opc = Alpha::STT; - else if (RC == Alpha::GPRCRegisterClass) - Opc = Alpha::STQ; - else - abort(); - DebugLoc DL = DebugLoc::getUnknownLoc(); - MachineInstrBuilder MIB = - BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); + llvm_unreachable("Unhandled register class"); } void @@ -245,28 +224,7 @@ AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg) .addFrameIndex(FrameIdx).addReg(Alpha::F31); else - abort(); -} - -void AlphaInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - unsigned Opc = 0; - if (RC == Alpha::F4RCRegisterClass) - Opc = Alpha::LDS; - else if (RC == Alpha::F8RCRegisterClass) - Opc = Alpha::LDT; - else if (RC == Alpha::GPRCRegisterClass) - Opc = Alpha::LDQ; - else - abort(); - DebugLoc DL = DebugLoc::getUnknownLoc(); - MachineInstrBuilder MIB = - BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); + llvm_unreachable("Unhandled register class"); } MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -331,7 +289,7 @@ static unsigned AlphaRevCondCode(unsigned Opcode) { case Alpha::FBLE: return Alpha::FBGT; case Alpha::FBLT: return Alpha::FBGE; default: - assert(0 && "Unknown opcode"); + llvm_unreachable("Unknown opcode"); } return 0; // Not reached } diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index ea0988553acc7..274f452ab74da 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -54,20 +54,10 @@ public: unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const; - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; - - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index e73bdf9f6e915..3b98206e5b1f9 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -702,7 +702,7 @@ def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>; //misc FP selects //Select double - + def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), @@ -791,12 +791,14 @@ def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in -def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",[], s_ftoi>; //Floating to integer move, S_floating +def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC", + [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC", [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in -def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",[], s_itof>; //Integer to floating move, S_floating +def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC", + [(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC", [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move @@ -818,6 +820,10 @@ let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC", [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>; +def : Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf), + (f64 (FCMOVEQT F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>; +def : Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf), + (f32 (FCMOVEQS F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>; ///////////////////////////////////////////////////////// //Branching diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp index ba7478e90ecc8..d32813552f012 100644 --- a/lib/Target/Alpha/AlphaJITInfo.cpp +++ b/lib/Target/Alpha/AlphaJITInfo.cpp @@ -16,8 +16,9 @@ #include "AlphaRelocations.h" #include "llvm/Function.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/Config/alloca.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -57,12 +58,12 @@ static void EmitBranchToAt(void *At, void *To) { AtI[0] = BUILD_OR(0, 27, 27); - DOUT << "Stub targeting " << To << "\n"; + DEBUG(errs() << "Stub targeting " << To << "\n"); for (int x = 1; x <= 8; ++x) { AtI[2*x - 1] = BUILD_SLLi(27,27,8); unsigned d = (Fn >> (64 - 8 * x)) & 0x00FF; - //DOUT << "outputing " << hex << d << dec << "\n"; + //DEBUG(errs() << "outputing " << hex << d << dec << "\n"); AtI[2*x] = BUILD_ORi(27, 27, d); } AtI[17] = BUILD_JMP(31,27,0); //jump, preserving ra, and setting pv @@ -71,7 +72,7 @@ static void EmitBranchToAt(void *At, void *To) { void AlphaJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { //FIXME - assert(0); + llvm_unreachable(0); } static TargetJITInfo::JITCompilerFn JITCompilerFunction; @@ -86,12 +87,12 @@ extern "C" { //rewrite the stub to an unconditional branch if (((unsigned*)CameFromStub)[18] == 0x00FFFFFF) { - DOUT << "Came from a stub, rewriting\n"; + DEBUG(errs() << "Came from a stub, rewriting\n"); EmitBranchToAt(CameFromStub, Target); } else { - DOUT << "confused, didn't come from stub at " << CameFromStub - << " old jump vector " << oldpv - << " new jump vector " << Target << "\n"; + DEBUG(errs() << "confused, didn't come from stub at " << CameFromStub + << " old jump vector " << oldpv + << " new jump vector " << Target << "\n"); } //Change pv to new Target @@ -184,8 +185,7 @@ extern "C" { ); #else void AlphaCompilationCallback() { - cerr << "Cannot call AlphaCompilationCallback() on a non-Alpha arch!\n"; - abort(); + llvm_unreachable("Cannot call AlphaCompilationCallback() on a non-Alpha arch!"); } #endif } @@ -199,7 +199,7 @@ void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn, for (int x = 0; x < 19; ++ x) JCE.emitWordLE(0); EmitBranchToAt(Addr, Fn); - DOUT << "Emitting Stub to " << Fn << " at [" << Addr << "]\n"; + DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n"); return JCE.finishGVStub(F); } @@ -241,34 +241,34 @@ void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR, long idx = 0; bool doCommon = true; switch ((Alpha::RelocationType)MR->getRelocationType()) { - default: assert(0 && "Unknown relocation type!"); + default: llvm_unreachable("Unknown relocation type!"); case Alpha::reloc_literal: //This is a LDQl idx = MR->getGOTIndex(); - DOUT << "Literal relocation to slot " << idx; + DEBUG(errs() << "Literal relocation to slot " << idx); idx = (idx - GOToffset) * 8; - DOUT << " offset " << idx << "\n"; + DEBUG(errs() << " offset " << idx << "\n"); break; case Alpha::reloc_gprellow: idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8]; idx = getLower16(idx); - DOUT << "gprellow relocation offset " << idx << "\n"; - DOUT << " Pointer is " << (void*)MR->getResultPointer() - << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n"; + DEBUG(errs() << "gprellow relocation offset " << idx << "\n"); + DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer() + << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n"); break; case Alpha::reloc_gprelhigh: idx = (unsigned char*)MR->getResultPointer() - &GOTBase[GOToffset * 8]; idx = getUpper16(idx); - DOUT << "gprelhigh relocation offset " << idx << "\n"; - DOUT << " Pointer is " << (void*)MR->getResultPointer() - << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n"; + DEBUG(errs() << "gprelhigh relocation offset " << idx << "\n"); + DEBUG(errs() << " Pointer is " << (void*)MR->getResultPointer() + << " GOT is " << (void*)&GOTBase[GOToffset * 8] << "\n"); break; case Alpha::reloc_gpdist: switch (*RelocPos >> 26) { case 0x09: //LDAH idx = &GOTBase[GOToffset * 8] - (unsigned char*)RelocPos; idx = getUpper16(idx); - DOUT << "LDAH: " << idx << "\n"; + DEBUG(errs() << "LDAH: " << idx << "\n"); //add the relocation to the map gpdistmap[std::make_pair(Function, MR->getConstantVal())] = RelocPos; break; @@ -278,10 +278,10 @@ void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR, idx = &GOTBase[GOToffset * 8] - (unsigned char*)gpdistmap[std::make_pair(Function, MR->getConstantVal())]; idx = getLower16(idx); - DOUT << "LDA: " << idx << "\n"; + DEBUG(errs() << "LDA: " << idx << "\n"); break; default: - assert(0 && "Cannot handle gpdist yet"); + llvm_unreachable("Cannot handle gpdist yet"); } break; case Alpha::reloc_bsr: { diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/AlphaMCAsmInfo.cpp new file mode 100644 index 0000000000000..b652a5305a018 --- /dev/null +++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp @@ -0,0 +1,22 @@ +//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the AlphaMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "AlphaMCAsmInfo.h" +using namespace llvm; + +AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) { + AlignmentIsInBytes = false; + PrivateGlobalPrefix = "$"; + PICJumpTableDirective = ".gprel32"; + WeakRefDirective = "\t.weak\t"; +} diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/AlphaMCAsmInfo.h new file mode 100644 index 0000000000000..c27065d28427b --- /dev/null +++ b/lib/Target/Alpha/AlphaMCAsmInfo.h @@ -0,0 +1,29 @@ +//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the AlphaMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHATARGETASMINFO_H +#define ALPHATARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + struct AlphaMCAsmInfo : public MCAsmInfo { + explicit AlphaMCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 0ff53c7cc309c..98e97304c64e4 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -28,6 +28,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include @@ -149,8 +151,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, //variable locals //<- SP -void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -172,16 +176,16 @@ void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Now add the frame object offset to the offset from the virtual frame index. int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); - DOUT << "FI: " << FrameIndex << " Offset: " << Offset << "\n"; + DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n"); Offset += MF.getFrameInfo()->getStackSize(); - DOUT << "Corrected Offset " << Offset - << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n"; + DEBUG(errs() << "Corrected Offset " << Offset + << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n"); if (Offset > IMM_HIGH || Offset < IMM_LOW) { - DOUT << "Unconditionally using R28 for evil purposes Offset: " - << Offset << "\n"; + DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: " + << Offset << "\n"); //so in this case, we need to use a temporary register, and move the //original inst off the SP/FP //fix up the old: @@ -195,6 +199,7 @@ void AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } else { MI.getOperand(i).ChangeToImmediate(Offset); } + return 0; } @@ -244,8 +249,10 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) .addImm(getLower16(NumBytes)).addReg(Alpha::R30); } else { - cerr << "Too big a stack frame at " << NumBytes << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Too big a stack frame at " + NumBytes; + llvm_report_error(Msg.str()); } //now if we need to, save the old FP and set the new @@ -294,14 +301,16 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) .addImm(getLower16(NumBytes)).addReg(Alpha::R30); } else { - cerr << "Too big a stack frame at " << NumBytes << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Too big a stack frame at " + NumBytes; + llvm_report_error(Msg.str()); } } } unsigned AlphaRegisterInfo::getRARegister() const { - assert(0 && "What is the return address register"); + llvm_unreachable("What is the return address register"); return 0; } @@ -310,17 +319,17 @@ unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const { } unsigned AlphaRegisterInfo::getEHExceptionRegister() const { - assert(0 && "What is the exception register"); + llvm_unreachable("What is the exception register"); return 0; } unsigned AlphaRegisterInfo::getEHHandlerRegister() const { - assert(0 && "What is the exception handler register"); + llvm_unreachable("What is the exception handler register"); return 0; } int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - assert(0 && "What is the dwarf register number"); + llvm_unreachable("What is the dwarf register number"); return -1; } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index 5012fe8ccd1ec..66f089873d781 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -41,8 +41,9 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; //void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp index d5a9365d75c11..bda7104ab9268 100644 --- a/lib/Target/Alpha/AlphaSubtarget.cpp +++ b/lib/Target/Alpha/AlphaSubtarget.cpp @@ -16,7 +16,7 @@ #include "AlphaGenSubtarget.inc" using namespace llvm; -AlphaSubtarget::AlphaSubtarget(const Module &M, const std::string &FS) +AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &FS) : HasCT(false) { std::string CPU = "generic"; diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h index 0a944cb0a634b..f0eb93c6cba2e 100644 --- a/lib/Target/Alpha/AlphaSubtarget.h +++ b/lib/Target/Alpha/AlphaSubtarget.h @@ -20,7 +20,6 @@ #include namespace llvm { -class Module; class AlphaSubtarget : public TargetSubtarget { protected: @@ -31,9 +30,9 @@ protected: public: /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - AlphaSubtarget(const Module &M, const std::string &FS); + AlphaSubtarget(const std::string &TT, const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index 060089cbb6d6f..b8bc13b630979 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -12,60 +12,26 @@ #include "Alpha.h" #include "AlphaJITInfo.h" -#include "AlphaTargetAsmInfo.h" +#include "AlphaMCAsmInfo.h" #include "AlphaTargetMachine.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetMachineRegistry.h" -#include "llvm/Support/raw_ostream.h" - +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -// Register the targets -static RegisterTarget X("alpha", "Alpha [experimental]"); - -// No assembler printer by default -AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0; - -// Force static initialization. -extern "C" void LLVMInitializeAlphaTarget() { } - -const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const { - return new AlphaTargetAsmInfo(*this); -} - -unsigned AlphaTargetMachine::getModuleMatchQuality(const Module &M) { - // We strongly match "alpha*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 5 && TT[0] == 'a' && TT[1] == 'l' && TT[2] == 'p' && - TT[3] == 'h' && TT[4] == 'a') - return 20; - // If the target triple is something non-alpha, we don't match. - if (!TT.empty()) return 0; - - if (M.getEndianness() == Module::LittleEndian && - M.getPointerSize() == Module::Pointer64) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target - - return getJITMatchQuality()/2; -} - -unsigned AlphaTargetMachine::getJITMatchQuality() { -#ifdef __alpha - return 10; -#else - return 0; -#endif +extern "C" void LLVMInitializeAlphaTarget() { + // Register the target. + RegisterTargetMachine X(TheAlphaTarget); + RegisterAsmInfo Y(TheAlphaTarget); } -AlphaTargetMachine::AlphaTargetMachine(const Module &M, const std::string &FS) - : DataLayout("e-f128:128:128"), +AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : LLVMTargetMachine(T, TT), + DataLayout("e-f128:128:128"), FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), JITInfo(*this), - Subtarget(M, FS), + Subtarget(TT, FS), TLInfo(*this) { setRelocationModel(Reloc::PIC_); } @@ -84,51 +50,40 @@ bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // Must run branch selection immediately preceding the asm printer PM.add(createAlphaBranchSelectionPass()); - return false; -} -bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { PM.add(createAlphaLLRPPass(*this)); - // Output assembly language. - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { + MachineCodeEmitter &MCE) { PM.add(createAlphaCodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } return false; } bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { + JITCodeEmitter &JCE) { PM.add(createAlphaJITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } + return false; +} +bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + PM.add(createAlphaObjectCodeEmitterPass(*this, OCE)); return false; } bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { - return addCodeEmitter(PM, OptLevel, DumpAsm, MCE); + return addCodeEmitter(PM, OptLevel, MCE); } bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { - return addCodeEmitter(PM, OptLevel, DumpAsm, JCE); + return addCodeEmitter(PM, OptLevel, JCE); +} +bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + return addCodeEmitter(PM, OptLevel, OCE); } diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 26684c7778a4f..f03e9388f7def 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -34,18 +34,9 @@ class AlphaTargetMachine : public LLVMTargetMachine { AlphaSubtarget Subtarget; AlphaTargetLowering TLInfo; -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - - // To avoid having target depend on the asmprinter stuff libraries, asmprinter - // set this functions to ctor pointer at startup time if they are linked in. - typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, - TargetMachine &tm, - bool verbose); - static AsmPrinterCtorFn AsmPrinterCtor; - public: - AlphaTargetMachine(const Module &M, const std::string &FS); + AlphaTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } @@ -61,31 +52,24 @@ public: return &JITInfo; } - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); - // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); + MachineCodeEmitter &MCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + JITCodeEmitter &JCE); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE); + ObjectCodeEmitter &JCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE); - - static void registerAsmPrinter(AsmPrinterCtorFn F) { - AsmPrinterCtor = F; - } + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE); }; } // end namespace llvm diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index 982ef5e851948..d8e8b79f5398a 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -17,16 +17,20 @@ #include "AlphaInstrInfo.h" #include "AlphaTargetMachine.h" #include "llvm/Module.h" -#include "llvm/MDNode.h" #include "llvm/Type.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -37,21 +41,22 @@ namespace { /// Unique incrementer for label values for referencing Global values. /// - explicit AlphaAsmPrinter(raw_ostream &o, TargetMachine &tm, - const TargetAsmInfo *T, bool V) + explicit AlphaAsmPrinter(formatted_raw_ostream &o, TargetMachine &tm, + const MCAsmInfo *T, bool V) : AsmPrinter(o, tm, T, V) {} virtual const char *getPassName() const { return "Alpha Assembly Printer"; } - bool printInstruction(const MachineInstr *MI); + void printInstruction(const MachineInstr *MI); + static const char *getRegisterName(unsigned RegNo); + void printOp(const MachineOperand &MO, bool IsCallOp = false); void printOperand(const MachineInstr *MI, int opNum); - void printBaseOffsetPair (const MachineInstr *MI, int i, bool brackets=true); - void printModuleLevelGV(const GlobalVariable* GVar); + void printBaseOffsetPair(const MachineInstr *MI, int i, bool brackets=true); + void PrintGlobalVariable(const GlobalVariable *GVar); bool runOnMachineFunction(MachineFunction &F); - bool doInitialization(Module &M); - bool doFinalization(Module &M); + void EmitStartOfAsmFile(Module &M); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); @@ -62,17 +67,6 @@ namespace { }; } // end of anonymous namespace -/// createAlphaCodePrinterPass - Returns a pass that prints the Alpha -/// assembly code for a MachineFunction to the given output stream, -/// using the given target machine description. This should work -/// regardless of whether the function is in SSA form. -/// -FunctionPass *llvm::createAlphaCodePrinterPass(raw_ostream &o, - TargetMachine &tm, - bool verbose) { - return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); -} - #include "AlphaGenAsmWriter.inc" void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum) @@ -81,7 +75,7 @@ void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum) if (MO.getType() == MachineOperand::MO_Register) { assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && "Not physreg??"); - O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + O << getRegisterName(MO.getReg()); } else if (MO.isImm()) { O << MO.getImm(); assert(MO.getImm() < (1 << 30)); @@ -92,24 +86,21 @@ void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum) void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) { - const TargetRegisterInfo &RI = *TM.getRegisterInfo(); - switch (MO.getType()) { case MachineOperand::MO_Register: - O << RI.get(MO.getReg()).AsmName; + O << getRegisterName(MO.getReg()); return; case MachineOperand::MO_Immediate: - cerr << "printOp() does not handle immediate values\n"; - abort(); + llvm_unreachable("printOp() does not handle immediate values"); return; case MachineOperand::MO_MachineBasicBlock: - printBasicBlockLabel(MO.getMBB()); + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); return; case MachineOperand::MO_ConstantPoolIndex: - O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); return; @@ -117,14 +108,12 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) { O << MO.getSymbolName(); return; - case MachineOperand::MO_GlobalAddress: { - GlobalValue *GV = MO.getGlobal(); - O << Mang->getValueName(GV); + case MachineOperand::MO_GlobalAddress: + O << Mang->getMangledName(MO.getGlobal()); return; - } case MachineOperand::MO_JumpTableIndex: - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); return; @@ -151,13 +140,14 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out labels for the function. const Function *F = MF.getFunction(); - SwitchToSection(TAI->SectionForGlobal(F)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); EmitAlignment(MF.getAlignment(), F); switch (F->getLinkage()) { - default: assert(0 && "Unknown linkage type!"); + default: llvm_unreachable("Unknown linkage type!"); case Function::InternalLinkage: // Symbols default to internal. case Function::PrivateLinkage: + case Function::LinkerPrivateLinkage: break; case Function::ExternalLinkage: O << "\t.globl " << CurrentFnName << "\n"; @@ -166,7 +156,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { case Function::WeakODRLinkage: case Function::LinkOnceAnyLinkage: case Function::LinkOnceODRLinkage: - O << TAI->getWeakRefDirective() << CurrentFnName << "\n"; + O << MAI->getWeakRefDirective() << CurrentFnName << "\n"; break; } @@ -180,17 +170,19 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { if (I != MF.begin()) { - printBasicBlockLabel(I, true, true); - O << '\n'; + EmitBasicBlockStart(I); } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { // Print the assembly for the instruction. ++EmittedInsts; - if (!printInstruction(II)) { - assert(0 && "Unhandled instruction in asm writer!"); - abort(); - } + processDebugLoc(II, true); + printInstruction(II); + + if (VerboseAsm && !II->getDebugLoc().isUnknown()) + EmitComments(*II); + O << '\n'; + processDebugLoc(II, false); } } @@ -200,17 +192,15 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { return false; } -bool AlphaAsmPrinter::doInitialization(Module &M) -{ - if(TM.getSubtarget().hasCT()) +void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (TM.getSubtarget().hasCT()) O << "\t.arch ev6\n"; //This might need to be ev67, so leave this test here else O << "\t.arch ev6\n"; O << "\t.set noat\n"; - return AsmPrinter::doInitialization(M); } -void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { +void AlphaAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) { const TargetData *TD = TM.getTargetData(); if (!GVar->hasInitializer()) return; // External global require no code @@ -219,15 +209,14 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { if (EmitSpecialLLVMGlobal(GVar)) return; - std::string name = Mang->getValueName(GVar); + std::string name = Mang->getMangledName(GVar); Constant *C = GVar->getInitializer(); - if (isa(C) || isa(C)) - return; unsigned Size = TD->getTypeAllocSize(C->getType()); unsigned Align = TD->getPreferredAlignmentLog(GVar); // 0: Switch to section - SwitchToSection(TAI->SectionForGlobal(GVar)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang, + TM)); // 1: Check visibility printVisibility(name, GVar->getVisibility()); @@ -239,23 +228,22 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::CommonLinkage: - O << TAI->getWeakRefDirective() << name << '\n'; + O << MAI->getWeakRefDirective() << name << '\n'; break; case GlobalValue::AppendingLinkage: case GlobalValue::ExternalLinkage: - O << TAI->getGlobalDirective() << name << "\n"; + O << MAI->getGlobalDirective() << name << "\n"; break; case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: break; default: - assert(0 && "Unknown linkage type!"); - cerr << "Unknown linkage type!\n"; - abort(); + llvm_unreachable("Unknown linkage type!"); } // 3: Type, Size, Align - if (TAI->hasDotTypeDotSizeDirective()) { + if (MAI->hasDotTypeDotSizeDirective()) { O << "\t.type\t" << name << ", @object\n"; O << "\t.size\t" << name << ", " << Size << "\n"; } @@ -268,14 +256,6 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { O << '\n'; } -bool AlphaAsmPrinter::doFinalization(Module &M) { - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I); - - return AsmPrinter::doFinalization(M); -} - /// PrintAsmOperand - Print out an operand for an inline asm expression. /// bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -298,12 +278,6 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } // Force static initialization. -extern "C" void LLVMInitializeAlphaAsmPrinter() { } - -namespace { - static struct Register { - Register() { - AlphaTargetMachine::registerAsmPrinter(createAlphaCodePrinterPass); - } - } Registrator; +extern "C" void LLVMInitializeAlphaAsmPrinter() { + RegisterAsmPrinter X(TheAlphaTarget); } diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile index c5b3e946695b7..3c64a3c606f39 100644 --- a/lib/Target/Alpha/AsmPrinter/Makefile +++ b/lib/Target/Alpha/AsmPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/Alpha/Makefile ---------------------------*- Makefile -*-===## +##===- lib/Target/Alpha/AsmPrinter/Makefile ----------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt index 2a382d5cadf33..b4f41aebd8db0 100644 --- a/lib/Target/Alpha/CMakeLists.txt +++ b/lib/Target/Alpha/CMakeLists.txt @@ -8,6 +8,7 @@ tablegen(AlphaGenInstrInfo.inc -gen-instr-desc) tablegen(AlphaGenCodeEmitter.inc -gen-emitter) tablegen(AlphaGenAsmWriter.inc -gen-asm-writer) tablegen(AlphaGenDAGISel.inc -gen-dag-isel) +tablegen(AlphaGenCallingConv.inc -gen-callingconv) tablegen(AlphaGenSubtarget.inc -gen-subtarget) add_llvm_target(AlphaCodeGen @@ -18,9 +19,9 @@ add_llvm_target(AlphaCodeGen AlphaISelLowering.cpp AlphaJITInfo.cpp AlphaLLRP.cpp + AlphaMCAsmInfo.cpp AlphaRegisterInfo.cpp AlphaSubtarget.cpp - AlphaTargetAsmInfo.cpp AlphaTargetMachine.cpp ) diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile index d6c82c7d7435c..d2d71097410b2 100644 --- a/lib/Target/Alpha/Makefile +++ b/lib/Target/Alpha/Makefile @@ -15,8 +15,8 @@ BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \ AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \ AlphaGenInstrInfo.inc AlphaGenCodeEmitter.inc \ AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \ - AlphaGenSubtarget.inc + AlphaGenCallingConv.inc AlphaGenSubtarget.inc -DIRS = AsmPrinter +DIRS = AsmPrinter TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp new file mode 100644 index 0000000000000..f7099b9ae9753 --- /dev/null +++ b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Alpha.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +llvm::Target llvm::TheAlphaTarget; + +extern "C" void LLVMInitializeAlphaTargetInfo() { + RegisterTarget + X(TheAlphaTarget, "alpha", "Alpha [experimental]"); +} diff --git a/lib/Target/Alpha/TargetInfo/CMakeLists.txt b/lib/Target/Alpha/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..2a7291b90aeb9 --- /dev/null +++ b/lib/Target/Alpha/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMAlphaInfo + AlphaTargetInfo.cpp + ) + +add_dependencies(LLVMAlphaInfo AlphaCodeGenTable_gen) diff --git a/lib/Target/Alpha/TargetInfo/Makefile b/lib/Target/Alpha/TargetInfo/Makefile new file mode 100644 index 0000000000000..de01d7f8e8ef3 --- /dev/null +++ b/lib/Target/Alpha/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Alpha/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMAlphaInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp new file mode 100644 index 0000000000000..91fd5dde5a237 --- /dev/null +++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp @@ -0,0 +1,242 @@ +//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Blackfin.h" +#include "BlackfinInstrInfo.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN BlackfinAsmPrinter : public AsmPrinter { + public: + BlackfinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *MAI, bool V) + : AsmPrinter(O, TM, MAI, V) {} + + virtual const char *getPassName() const { + return "Blackfin Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int opNum); + void printMemoryOperand(const MachineInstr *MI, int opNum); + void printInstruction(const MachineInstr *MI); // autogenerated. + static const char *getRegisterName(unsigned RegNo); + + void emitLinkage(const std::string &n, GlobalValue::LinkageTypes l); + bool runOnMachineFunction(MachineFunction &F); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + void PrintGlobalVariable(const GlobalVariable* GVar); + }; +} // end of anonymous namespace + +#include "BlackfinGenAsmWriter.inc" + +extern "C" void LLVMInitializeBlackfinAsmPrinter() { + RegisterAsmPrinter X(TheBlackfinTarget); +} + +void BlackfinAsmPrinter::emitLinkage(const std::string &name, + GlobalValue::LinkageTypes l) { + switch (l) { + default: llvm_unreachable("Unknown linkage type!"); + case GlobalValue::InternalLinkage: // Symbols default to internal. + case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: + break; + case GlobalValue::ExternalLinkage: + O << MAI->getGlobalDirective() << name << "\n"; + break; + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + O << MAI->getGlobalDirective() << name << "\n"; + O << MAI->getWeakDefDirective() << name << "\n"; + break; + } +} + +void BlackfinAsmPrinter::PrintGlobalVariable(const GlobalVariable* GV) { + const TargetData *TD = TM.getTargetData(); + + if (!GV->hasInitializer() || EmitSpecialLLVMGlobal(GV)) + return; + + std::string name = Mang->getMangledName(GV); + Constant *C = GV->getInitializer(); + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang, + TM)); + emitLinkage(name, GV->getLinkage()); + EmitAlignment(TD->getPreferredAlignmentLog(GV), GV); + printVisibility(name, GV->getVisibility()); + + O << "\t.type " << name << ", STT_OBJECT\n"; + O << "\t.size " << name << ',' << TD->getTypeAllocSize(C->getType()) << '\n'; + O << name << ":\n"; + EmitGlobalConstant(C); +} + +/// runOnMachineFunction - This uses the printInstruction() +/// method to print assembly for each instruction. +/// +bool BlackfinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SetupMachineFunction(MF); + EmitConstantPool(MF.getConstantPool()); + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + const Function *F = MF.getFunction(); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + EmitAlignment(2, F); + emitLinkage(CurrentFnName, F->getLinkage()); + printVisibility(CurrentFnName, F->getVisibility()); + + O << "\t.type\t" << CurrentFnName << ", STT_FUNC\n" + << CurrentFnName << ":\n"; + + if (DW) + DW->BeginFunction(&MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + EmitBasicBlockStart(I); + + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + processDebugLoc(II, true); + + printInstruction(II); + if (VerboseAsm && !II->getDebugLoc().isUnknown()) + EmitComments(*II); + O << '\n'; + + processDebugLoc(II, false); + ++EmittedInsts; + } + } + + O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n"; + + if (DW) + DW->EndFunction(&MF); + + return false; +} + +void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand (opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Virtual registers should be already mapped!"); + O << getRegisterName(MO.getReg()); + break; + + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); + return; + case MachineOperand::MO_GlobalAddress: + O << Mang->getMangledName(MO.getGlobal()); + printOffset(MO.getOffset()); + break; + case MachineOperand::MO_ExternalSymbol: + O << Mang->makeNameProper(MO.getSymbolName()); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + << MO.getIndex(); + break; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + default: + llvm_unreachable(""); + break; + } +} + +void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum) { + printOperand(MI, opNum); + + if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) + return; + + O << " + "; + printOperand(MI, opNum+1); +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'r': + break; + } + } + + printOperand(MI, OpNo); + + return false; +} + +bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier + + O << '['; + printOperand(MI, OpNo); + O << ']'; + + return false; +} diff --git a/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt b/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000000000..795aebfe2b8ea --- /dev/null +++ b/lib/Target/Blackfin/AsmPrinter/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMBlackfinAsmPrinter + BlackfinAsmPrinter.cpp + ) +add_dependencies(LLVMBlackfinAsmPrinter BlackfinCodeGenTable_gen) diff --git a/lib/Target/Blackfin/AsmPrinter/Makefile b/lib/Target/Blackfin/AsmPrinter/Makefile new file mode 100644 index 0000000000000..091d4df0bcb87 --- /dev/null +++ b/lib/Target/Blackfin/AsmPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/Blackfin/AsmPrinter/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMBlackfinAsmPrinter + +# Hack: we need to include 'main' Blackfin target directory to grab private +# headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h new file mode 100644 index 0000000000000..ec1fa8689ded7 --- /dev/null +++ b/lib/Target/Blackfin/Blackfin.h @@ -0,0 +1,38 @@ +//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Blackfin back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_BLACKFIN_H +#define TARGET_BLACKFIN_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + + class FunctionPass; + class BlackfinTargetMachine; + + FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM, + CodeGenOpt::Level OptLevel); + extern Target TheBlackfinTarget; + +} // end namespace llvm + +// Defines symbolic names for Blackfin registers. This defines a mapping from +// register name to register number. +#include "BlackfinGenRegisterNames.inc" + +// Defines symbolic names for the Blackfin instructions. +#include "BlackfinGenInstrNames.inc" + +#endif diff --git a/lib/Target/Blackfin/Blackfin.td b/lib/Target/Blackfin/Blackfin.td new file mode 100644 index 0000000000000..b9046383fa6a9 --- /dev/null +++ b/lib/Target/Blackfin/Blackfin.td @@ -0,0 +1,201 @@ +//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Blackfin Subtarget features. +//===----------------------------------------------------------------------===// + +def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true", + "Build for SDRAM">; + +def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true", + "Assume instruction cache lookaside buffers are enabled at runtime">; + +//===----------------------------------------------------------------------===// +// Bugs in the silicon becomes workarounds in the compiler. +// See http://www.analog.com/ for the full list of IC anomalies. +//===----------------------------------------------------------------------===// + +def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true", + "Work around 05000074 - " + "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">; + +def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true", + "Work around 05000244 - " + "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">; + +def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true", + "Work around 05000245 - " + "Access in the Shadow of a Conditional Branch">; + +def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true", + "Work around 05000257 - " + "Interrupt/Exception During Short Hardware Loop">; + +def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true", + "Work around 05000283 - " + "System MMR Write Is Stalled Indefinitely when Killed">; + +def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true", + "Work around 05000312 - " + "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">; + +def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly", + "wa_killed_mmr", "true", + "Work around 05000315 - " + "Killed System MMR Write Completes Erroneously on Next System MMR Access">; + +def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true", + "Work around 05000371 - " + "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">; + +def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true", + "Work around 05000426 - " + "Speculative Fetches of Indirect-Pointer Instructions">; + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "BlackfinRegisterInfo.td" +include "BlackfinCallingConv.td" +include "BlackfinInstrInfo.td" + +def BlackfinInstrInfo : InstrInfo {} + +//===----------------------------------------------------------------------===// +// Blackfin processors supported. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"generic", "", []>; + +multiclass Core Features> { + def : Proc; + def : Proc; + def : Proc; +} + +multiclass CoreEdinburgh + : Core { + def : Proc; + def : Proc; + def : Proc; + def : Proc; +} +multiclass CoreBraemar + : Core { + def : Proc; + def : Proc; +} +multiclass CoreStirling + : Core { + def : Proc; + def : Proc; + def : Proc; +} +multiclass CoreMoab + : Core { + def : Proc; + def : Proc; + def : Proc; + def : Proc; +} +multiclass CoreTeton + : Core { + def : Proc; + def : Proc; +} +multiclass CoreKookaburra + : Core { + def : Proc; + def : Proc; + def : Proc; +} +multiclass CoreMockingbird + : Core { + def : Proc; + def : Proc; +} +multiclass CoreBrodie + : Core { + def : Proc; + def : Proc; +} + +defm BF512 : CoreBrodie<"bf512">; +defm BF514 : CoreBrodie<"bf514">; +defm BF516 : CoreBrodie<"bf516">; +defm BF518 : CoreBrodie<"bf518">; +defm BF522 : CoreMockingbird<"bf522">; +defm BF523 : CoreKookaburra<"bf523">; +defm BF524 : CoreMockingbird<"bf524">; +defm BF525 : CoreKookaburra<"bf525">; +defm BF526 : CoreMockingbird<"bf526">; +defm BF527 : CoreKookaburra<"bf527">; +defm BF531 : CoreEdinburgh<"bf531">; +defm BF532 : CoreEdinburgh<"bf532">; +defm BF533 : CoreEdinburgh<"bf533">; +defm BF534 : CoreBraemar<"bf534">; +defm BF536 : CoreBraemar<"bf536">; +defm BF537 : CoreBraemar<"bf537">; +defm BF538 : CoreStirling<"bf538">; +defm BF539 : CoreStirling<"bf539">; +defm BF542 : CoreMoab<"bf542">; +defm BF544 : CoreMoab<"bf544">; +defm BF548 : CoreMoab<"bf548">; +defm BF549 : CoreMoab<"bf549">; +defm BF561 : CoreTeton<"bf561">; + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// + +def Blackfin : Target { + // Pull in Instruction Info: + let InstructionSet = BlackfinInstrInfo; +} diff --git a/lib/Target/Blackfin/BlackfinCallingConv.td b/lib/Target/Blackfin/BlackfinCallingConv.td new file mode 100644 index 0000000000000..0abc84c3c405e --- /dev/null +++ b/lib/Target/Blackfin/BlackfinCallingConv.td @@ -0,0 +1,30 @@ +//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the Blackfin architectures. +// +//===----------------------------------------------------------------------===// + +// Blackfin C Calling convention. +def CC_Blackfin : CallingConv<[ + CCIfType<[i16], CCPromoteToType>, + CCIfSRet>, + CCAssignToReg<[R0, R1, R2]>, + CCAssignToStack<4, 4> +]>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +// Blackfin C return-value convention. +def RetCC_Blackfin : CallingConv<[ + CCIfType<[i16], CCPromoteToType>, + CCAssignToReg<[R0, R1]> +]>; diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp new file mode 100644 index 0000000000000..fc62a1884b1f3 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -0,0 +1,191 @@ +//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the Blackfin target. +// +//===----------------------------------------------------------------------===// + +#include "Blackfin.h" +#include "BlackfinISelLowering.h" +#include "BlackfinTargetMachine.h" +#include "BlackfinRegisterInfo.h" +#include "llvm/Intrinsics.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine +/// instructions for SelectionDAG operations. +namespace { + class BlackfinDAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we + /// can make the right decision when generating code for different targets. + //const BlackfinSubtarget &Subtarget; + public: + BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel) {} + + virtual void InstructionSelect(); + + virtual const char *getPassName() const { + return "Blackfin DAG->DAG Pattern Instruction Selection"; + } + + // Include the pieces autogenerated from the target description. +#include "BlackfinGenDAGISel.inc" + + private: + SDNode *Select(SDValue Op); + bool SelectADDRspii(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Offset); + + // Walk the DAG after instruction selection, fixing register class issues. + void FixRegisterClasses(SelectionDAG &DAG); + + const BlackfinInstrInfo &getInstrInfo() { + return *static_cast(TM).getInstrInfo(); + } + const BlackfinRegisterInfo *getRegisterInfo() { + return static_cast(TM).getRegisterInfo(); + } + }; +} // end anonymous namespace + +FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new BlackfinDAGToDAGISel(TM, OptLevel); +} + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void BlackfinDAGToDAGISel::InstructionSelect() { + // Select target instructions for the DAG. + SelectRoot(*CurDAG); + DEBUG(errs() << "Selected selection DAG before regclass fixup:\n"); + DEBUG(CurDAG->dump()); + FixRegisterClasses(*CurDAG); +} + +SDNode *BlackfinDAGToDAGISel::Select(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + if (N->isMachineOpcode()) + return NULL; // Already selected. + + switch (N->getOpcode()) { + default: break; + case ISD::FrameIndex: { + // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp + // SP, Px + int FI = cast(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); + return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI, + CurDAG->getTargetConstant(0, MVT::i32)); + } + } + + return SelectCode(Op); +} + +bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Op, + SDValue Addr, + SDValue &Base, + SDValue &Offset) { + FrameIndexSDNode *FIN = 0; + if ((FIN = dyn_cast(Addr))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; + } + if (Addr.getOpcode() == ISD::ADD) { + ConstantSDNode *CN = 0; + if ((FIN = dyn_cast(Addr.getOperand(0))) && + (CN = dyn_cast(Addr.getOperand(1))) && + (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { + // Constant positive word offset from frame index + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); + return true; + } + } + return false; +} + +static inline bool isCC(const TargetRegisterClass *RC) { + return RC == &BF::AnyCCRegClass || BF::AnyCCRegClass.hasSubClass(RC); +} + +static inline bool isDCC(const TargetRegisterClass *RC) { + return RC == &BF::DRegClass || BF::DRegClass.hasSubClass(RC) || isCC(RC); +} + +static void UpdateNodeOperand(SelectionDAG &DAG, + SDNode *N, + unsigned Num, + SDValue Val) { + SmallVector ops(N->op_begin(), N->op_end()); + ops[Num] = Val; + SDValue New = DAG.UpdateNodeOperands(SDValue(N, 0), ops.data(), ops.size()); + DAG.ReplaceAllUsesWith(N, New.getNode()); +} + +// After instruction selection, insert COPY_TO_REGCLASS nodes to help in +// choosing the proper register classes. +void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) { + const BlackfinInstrInfo &TII = getInstrInfo(); + const BlackfinRegisterInfo *TRI = getRegisterInfo(); + DAG.AssignTopologicalOrder(); + HandleSDNode Dummy(DAG.getRoot()); + + for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(); + NI != DAG.allnodes_end(); ++NI) { + if (NI->use_empty() || !NI->isMachineOpcode()) + continue; + const TargetInstrDesc &DefTID = TII.get(NI->getMachineOpcode()); + for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) { + if (!UI->isMachineOpcode()) + continue; + + if (UI.getUse().getResNo() >= DefTID.getNumDefs()) + continue; + const TargetRegisterClass *DefRC = + DefTID.OpInfo[UI.getUse().getResNo()].getRegClass(TRI); + + const TargetInstrDesc &UseTID = TII.get(UI->getMachineOpcode()); + if (UseTID.getNumDefs()+UI.getOperandNo() >= UseTID.getNumOperands()) + continue; + const TargetRegisterClass *UseRC = + UseTID.OpInfo[UseTID.getNumDefs()+UI.getOperandNo()].getRegClass(TRI); + if (!DefRC || !UseRC) + continue; + // We cannot copy CC <-> !(CC/D) + if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) { + SDNode *Copy = + DAG.getMachineNode(TargetInstrInfo::COPY_TO_REGCLASS, + NI->getDebugLoc(), + MVT::i32, + UI.getUse().get(), + DAG.getTargetConstant(BF::DRegClassID, MVT::i32)); + UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0)); + } + } + } + DAG.setRoot(Dummy.getValue()); +} + diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp new file mode 100644 index 0000000000000..4b321ec0fda17 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -0,0 +1,614 @@ +//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interfaces that Blackfin uses to lower LLVM code +// into a selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "BlackfinISelLowering.h" +#include "BlackfinTargetMachine.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "BlackfinGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) + : TargetLowering(TM, new TargetLoweringObjectFileELF()) { + setShiftAmountType(MVT::i16); + setBooleanContents(ZeroOrOneBooleanContent); + setStackPointerRegisterToSaveRestore(BF::SP); + setIntDivIsCheap(false); + + // Set up the legal register classes. + addRegisterClass(MVT::i32, BF::DRegisterClass); + addRegisterClass(MVT::i16, BF::D16RegisterClass); + + computeRegisterProperties(); + + // Blackfin doesn't have i1 loads or stores + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + + // i16 registers don't do much + setOperationAction(ISD::AND, MVT::i16, Promote); + setOperationAction(ISD::OR, MVT::i16, Promote); + setOperationAction(ISD::XOR, MVT::i16, Promote); + setOperationAction(ISD::CTPOP, MVT::i16, Promote); + // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote + // immediately. + setOperationAction(ISD::CTLZ, MVT::i16, Promote); + setOperationAction(ISD::CTTZ, MVT::i16, Promote); + setOperationAction(ISD::SETCC, MVT::i16, Promote); + + // Blackfin has no division + setOperationAction(ISD::SDIV, MVT::i16, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i16, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i16, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i16, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i16, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + + // No carry-in operations. + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); + + // Blackfin has no intrinsics for these particular operations. + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // i32 has native CTPOP, but not CTLZ/CTTZ + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + + // READCYCLECOUNTER needs special type legalization. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); + + // We don't have line number support yet. + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); + setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); + setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + + // Use the default implementation. + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); +} + +const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case BFISD::CALL: return "BFISD::CALL"; + case BFISD::RET_FLAG: return "BFISD::RET_FLAG"; + case BFISD::Wrapper: return "BFISD::Wrapper"; + } +} + +MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const { + // SETCC always sets the CC register. Technically that is an i1 register, but + // that type is not legal, so we treat it as an i32 register. + return MVT::i32; +} + +SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) { + DebugLoc DL = Op.getDebugLoc(); + GlobalValue *GV = cast(Op)->getGlobal(); + + Op = DAG.getTargetGlobalAddress(GV, MVT::i32); + return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); +} + +SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { + DebugLoc DL = Op.getDebugLoc(); + int JTI = cast(Op)->getIndex(); + + Op = DAG.getTargetJumpTable(JTI, MVT::i32); + return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); +} + +SDValue +BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space + CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + if (VA.isRegLoc()) { + EVT RegVT = VA.getLocVT(); + TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ? + BF::PRegisterClass : BF::DRegisterClass; + assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState"); + assert(RC->hasType(RegVT) && "Unexpected regclass in CCState"); + + unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); + MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + + // If this is an 8 or 16-bit value, it is really passed promoted to 32 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + + InVals.push_back(ArgValue); + } else { + assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); + unsigned ObjSize = VA.getLocVT().getStoreSize(); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset()); + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + } + } + + return Chain; +} + +SDValue +BlackfinTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + + // CCValAssign - represent the assignment of the return value to locations. + SmallVector RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), + RVLocs, *DAG.getContext()); + + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDValue Opi = Outs[i].Val; + + // Expand to i32 if necessary + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi); + break; + case CCValAssign::ZExt: + Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi); + break; + case CCValAssign::AExt: + Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi); + break; + } + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue()); + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) { + return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + } else { + return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain); + } +} + +SDValue +BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs, + *DAG.getContext()); + CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space + CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin); + + // Get the size of the outgoing arguments stack space requirement. + unsigned ArgsSize = CCInfo.getNextStackOffset(); + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = Outs[i].Val; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + // Arguments that can be passed on register must be kept at + // RegsToPass vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); + int Offset = VA.getLocMemOffset(); + assert(Offset%4 == 0 && "Unaligned LocMemOffset"); + assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type"); + SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32); + SDValue OffsetN = DAG.getIntPtrConstant(Offset); + OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN); + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN, + PseudoSourceValue::getStack(), + Offset)); + } + } + + // Transform all store nodes into one single node because + // all store nodes are independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emited instructions must be + // stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); + + std::vector NodeTys; + NodeTys.push_back(MVT::Other); // Returns a chain + NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. + SDValue Ops[] = { Chain, Callee, InFlag }; + Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops, + InFlag.getNode() ? 3 : 2); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), + DAG.getIntPtrConstant(0, true), InFlag); + InFlag = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState RVInfo(CallConv, isVarArg, DAG.getTarget(), RVLocs, + *DAG.getContext()); + + RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &RV = RVLocs[i]; + unsigned Reg = RV.getLocReg(); + + Chain = DAG.getCopyFromReg(Chain, dl, Reg, + RVLocs[i].getLocVT(), InFlag); + SDValue Val = Chain.getValue(0); + InFlag = Chain.getValue(2); + Chain = Chain.getValue(1); + + // Callee is responsible for extending any i16 return values. + switch (RV.getLocInfo()) { + case CCValAssign::SExt: + Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val, + DAG.getValueType(RV.getValVT())); + break; + case CCValAssign::ZExt: + Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val, + DAG.getValueType(RV.getValVT())); + break; + default: + break; + } + + // Truncate to valtype + if (RV.getLocInfo() != CCValAssign::Full) + Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val); + InVals.push_back(Val); + } + + return Chain; +} + +// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have +// add-with-carry instructions. +SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) { + // Operands: lhs, rhs, carry-in (AC0 flag) + // Results: sum, carry-out (AC0 flag) + DebugLoc dl = Op.getDebugLoc(); + + unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB; + + // zext incoming carry flag in AC0 to 32 bits + SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32, + /* flag= */ Op.getOperand(2)); + CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32, + SDValue(CarryIn, 0)); + + // Add operands, produce sum and carry flag + SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag, + Op.getOperand(0), Op.getOperand(1)); + + // Store intermediate carry from Sum + SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32, + /* flag= */ SDValue(Sum, 1)); + + // Add incoming carry, again producing an output flag + Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Flag, + SDValue(Sum, 0), SDValue(CarryIn, 0)); + + // Update AC0 with the intermediate carry, producing a flag. + SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Flag, + SDValue(Carry1, 0)); + + // Compose (i32, flag) pair + SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) }; + return DAG.getMergeValues(ops, 2, dl); +} + +SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + default: + Op.getNode()->dump(); + llvm_unreachable("Should not custom lower this!"); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + llvm_unreachable("TLS not implemented for Blackfin."); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + // Frame & Return address. Currently unimplemented + case ISD::FRAMEADDR: return SDValue(); + case ISD::RETURNADDR: return SDValue(); + case ISD::ADDE: + case ISD::SUBE: return LowerADDE(Op, DAG); + } +} + +void +BlackfinTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) { + DebugLoc dl = N->getDebugLoc(); + switch (N->getOpcode()) { + default: + llvm_unreachable("Do not know how to custom type legalize this operation!"); + return; + case ISD::READCYCLECOUNTER: { + // The low part of the cycle counter is in CYCLES, the high part in + // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read + // CYCLES2 last. + SDValue TheChain = N->getOperand(0); + SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32); + SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32); + // Use a buildpair to merge the two 32-bit values into a 64-bit one. + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi)); + // Outgoing chain. If we were to use the chain from lo instead, it would be + // possible to entirely eliminate the CYCLES2 read in (i32 (trunc + // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2 + // read beyond the next CYCLES read, leading to invalid results. + Results.push_back(hi.getValue(1)); + return; + } + } +} + +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned BlackfinTargetLowering::getFunctionAlignment(const Function *F) const { + return 2; +} + +//===----------------------------------------------------------------------===// +// Blackfin Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +BlackfinTargetLowering::ConstraintType +BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() != 1) + return TargetLowering::getConstraintType(Constraint); + + switch (Constraint[0]) { + // Standard constraints + case 'r': + return C_RegisterClass; + + // Blackfin-specific constraints + case 'a': + case 'd': + case 'z': + case 'D': + case 'W': + case 'e': + case 'b': + case 'v': + case 'f': + case 'c': + case 't': + case 'u': + case 'k': + case 'x': + case 'y': + case 'w': + return C_RegisterClass; + case 'A': + case 'B': + case 'C': + case 'Z': + case 'Y': + return C_Register; + } + + // Not implemented: q0-q7, qA. Use {R2} etc instead + + return TargetLowering::getConstraintType(Constraint); +} + +/// getRegForInlineAsmConstraint - Return register no and class for a C_Register +/// constraint. +std::pair BlackfinTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { + typedef std::pair Pair; + using namespace BF; + + if (Constraint.size() != 1) + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + + switch (Constraint[0]) { + // Standard constraints + case 'r': + return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass); + + // Blackfin-specific constraints + case 'a': return Pair(0U, PRegisterClass); + case 'd': return Pair(0U, DRegisterClass); + case 'e': return Pair(0U, AccuRegisterClass); + case 'A': return Pair(A0, AccuRegisterClass); + case 'B': return Pair(A1, AccuRegisterClass); + case 'b': return Pair(0U, IRegisterClass); + case 'v': return Pair(0U, BRegisterClass); + case 'f': return Pair(0U, MRegisterClass); + case 'C': return Pair(CC, JustCCRegisterClass); + case 'x': return Pair(0U, GRRegisterClass); + case 'w': return Pair(0U, ALLRegisterClass); + case 'Z': return Pair(P3, PRegisterClass); + case 'Y': return Pair(P1, PRegisterClass); + } + + // Not implemented: q0-q7, qA. Use {R2} etc instead. + // Constraints z, D, W, c, t, u, k, and y use non-existing classes, defer to + // getRegClassForInlineAsmConstraint() + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +std::vector BlackfinTargetLowering:: +getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { + using namespace BF; + + if (Constraint.size() != 1) + return std::vector(); + + switch (Constraint[0]) { + case 'z': return make_vector(P0, P1, P2, 0); + case 'D': return make_vector(R0, R2, R4, R6, 0); + case 'W': return make_vector(R1, R3, R5, R7, 0); + case 'c': return make_vector(I0, I1, I2, I3, + B0, B1, B2, B3, + L0, L1, L2, L3, 0); + case 't': return make_vector(LT0, LT1, 0); + case 'u': return make_vector(LB0, LB1, 0); + case 'k': return make_vector(LC0, LC1, 0); + case 'y': return make_vector(RETS, RETN, RETI, RETX, RETE, + ASTAT, SEQSTAT, USP, 0); + } + + return std::vector(); +} + +bool BlackfinTargetLowering:: +isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The Blackfin target isn't yet aware of offsets. + return false; +} diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h new file mode 100644 index 0000000000000..cdbc7d258c313 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -0,0 +1,81 @@ +//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Blackfin uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFIN_ISELLOWERING_H +#define BLACKFIN_ISELLOWERING_H + +#include "llvm/Target/TargetLowering.h" +#include "Blackfin.h" + +namespace llvm { + + namespace BFISD { + enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + CALL, // A call instruction. + RET_FLAG, // Return with a flag operand. + Wrapper // Address wrapper + }; + } + + class BlackfinTargetLowering : public TargetLowering { + int VarArgsFrameOffset; // Frame offset to start of varargs area. + public: + BlackfinTargetLowering(TargetMachine &TM); + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual void ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG); + + int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + + ConstraintType getConstraintType(const std::string &Constraint) const; + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + std::vector + getRegClassForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + const char *getTargetNodeName(unsigned Opcode) const; + unsigned getFunctionAlignment(const Function *F) const; + + private: + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerADDE(SDValue Op, SelectionDAG &DAG); + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); + }; +} // end namespace llvm + +#endif // BLACKFIN_ISELLOWERING_H diff --git a/lib/Target/Blackfin/BlackfinInstrFormats.td b/lib/Target/Blackfin/BlackfinInstrFormats.td new file mode 100644 index 0000000000000..d8e6e252e7875 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinInstrFormats.td @@ -0,0 +1,34 @@ +//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +class InstBfin pattern> + : Instruction { + field bits<32> Inst; + + let Namespace = "BF"; + + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; +} + +// Single-word (16-bit) instructions +class F1 pattern> + : InstBfin { +} + +// Double-word (32-bit) instructions +class F2 pattern> + : InstBfin { +} diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp new file mode 100644 index 0000000000000..3fd5d4dc0bf1f --- /dev/null +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -0,0 +1,280 @@ +//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Blackfin implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "BlackfinInstrInfo.h" +#include "BlackfinSubtarget.h" +#include "Blackfin.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/ErrorHandling.h" +#include "BlackfinGenInstrInfo.inc" + +using namespace llvm; + +BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST) + : TargetInstrInfoImpl(BlackfinInsts, array_lengthof(BlackfinInsts)), + RI(ST, *this), + Subtarget(ST) {} + +/// Return true if the instruction is a register to register move and +/// leave the source and dest operands in the passed parameters. +bool BlackfinInstrInfo::isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, + unsigned &DstReg, + unsigned &SrcSR, + unsigned &DstSR) const { + SrcSR = DstSR = 0; // No sub-registers. + switch (MI.getOpcode()) { + case BF::MOVE: + case BF::MOVE_ncccc: + case BF::MOVE_ccncc: + case BF::MOVECC_zext: + case BF::MOVECC_nz: + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + case BF::SLL16i: + if (MI.getOperand(2).getImm()!=0) + return false; + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + default: + return false; + } +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case BF::LOAD32fi: + case BF::LOAD16fi: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case BF::STORE32fi: + case BF::STORE16fi: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && + MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned BlackfinInstrInfo:: +InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME this should probably have a DebugLoc operand + DebugLoc dl = DebugLoc::getUnknownLoc(); + + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "Branch conditions have one component!"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, dl, get(BF::JUMPa)).addMBB(TBB); + return 1; + } + + // Conditional branch. + llvm_unreachable("Implement conditional branches!"); +} + +static bool inClass(const TargetRegisterClass &Test, + unsigned Reg, + const TargetRegisterClass *RC) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Test.contains(Reg); + else + return &Test==RC || Test.hasSubClass(RC); +} + +bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc dl = DebugLoc::getUnknownLoc(); + + if (inClass(BF::ALLRegClass, DestReg, DestRC) && + inClass(BF::ALLRegClass, SrcReg, SrcRC)) { + BuildMI(MBB, I, dl, get(BF::MOVE), DestReg).addReg(SrcReg); + return true; + } + + if (inClass(BF::D16RegClass, DestReg, DestRC) && + inClass(BF::D16RegClass, SrcReg, SrcRC)) { + BuildMI(MBB, I, dl, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0); + return true; + } + + if (inClass(BF::AnyCCRegClass, SrcReg, SrcRC) && + inClass(BF::DRegClass, DestReg, DestRC)) { + if (inClass(BF::NotCCRegClass, SrcReg, SrcRC)) { + BuildMI(MBB, I, dl, get(BF::MOVENCC_z), DestReg).addReg(SrcReg); + BuildMI(MBB, I, dl, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0); + } else { + BuildMI(MBB, I, dl, get(BF::MOVECC_zext), DestReg).addReg(SrcReg); + } + return true; + } + + if (inClass(BF::AnyCCRegClass, DestReg, DestRC) && + inClass(BF::DRegClass, SrcReg, SrcRC)) { + if (inClass(BF::NotCCRegClass, DestReg, DestRC)) + BuildMI(MBB, I, dl, get(BF::SETEQri_not), DestReg).addReg(SrcReg); + else + BuildMI(MBB, I, dl, get(BF::MOVECC_nz), DestReg).addReg(SrcReg); + return true; + } + + if (inClass(BF::NotCCRegClass, DestReg, DestRC) && + inClass(BF::JustCCRegClass, SrcReg, SrcRC)) { + BuildMI(MBB, I, dl, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg); + return true; + } + + if (inClass(BF::JustCCRegClass, DestReg, DestRC) && + inClass(BF::NotCCRegClass, SrcReg, SrcRC)) { + BuildMI(MBB, I, dl, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg); + return true; + } + + llvm_unreachable((std::string("Bad regclasses for reg-to-reg copy: ")+ + SrcRC->getName() + " -> " + DestRC->getName()).c_str()); + return false; +} + +void +BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, + bool isKill, + int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = I != MBB.end() ? + I->getDebugLoc() : DebugLoc::getUnknownLoc(); + + if (inClass(BF::DPRegClass, SrcReg, RC)) { + BuildMI(MBB, I, DL, get(BF::STORE32fi)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0); + return; + } + + if (inClass(BF::D16RegClass, SrcReg, RC)) { + BuildMI(MBB, I, DL, get(BF::STORE16fi)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0); + return; + } + + if (inClass(BF::AnyCCRegClass, SrcReg, RC)) { + BuildMI(MBB, I, DL, get(BF::STORE8fi)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0); + return; + } + + llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+ + RC->getName()).c_str()); +} + +void BlackfinInstrInfo:: +storeRegToAddr(MachineFunction &MF, + unsigned SrcReg, + bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + llvm_unreachable("storeRegToAddr not implemented"); +} + +void +BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + int FI, + const TargetRegisterClass *RC) const { + DebugLoc DL = I != MBB.end() ? + I->getDebugLoc() : DebugLoc::getUnknownLoc(); + if (inClass(BF::DPRegClass, DestReg, RC)) { + BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg) + .addFrameIndex(FI) + .addImm(0); + return; + } + + if (inClass(BF::D16RegClass, DestReg, RC)) { + BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg) + .addFrameIndex(FI) + .addImm(0); + return; + } + + if (inClass(BF::AnyCCRegClass, DestReg, RC)) { + BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg) + .addFrameIndex(FI) + .addImm(0); + return; + } + + llvm_unreachable("Cannot load regclass from stack slot"); +} + +void BlackfinInstrInfo:: +loadRegFromAddr(MachineFunction &MF, + unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const { + llvm_unreachable("loadRegFromAddr not implemented"); +} diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h new file mode 100644 index 0000000000000..ea3429c1014a9 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -0,0 +1,80 @@ +//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Blackfin implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFININSTRUCTIONINFO_H +#define BLACKFININSTRUCTIONINFO_H + +#include "llvm/Target/TargetInstrInfo.h" +#include "BlackfinRegisterInfo.h" + +namespace llvm { + + class BlackfinInstrInfo : public TargetInstrInfoImpl { + const BlackfinRegisterInfo RI; + const BlackfinSubtarget& Subtarget; + public: + explicit BlackfinInstrInfo(BlackfinSubtarget &ST); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; } + + virtual bool isMoveInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual unsigned + InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void storeRegToAddr(MachineFunction &MF, + unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC) const; + + virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td new file mode 100644 index 0000000000000..934b18864cb5e --- /dev/null +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -0,0 +1,873 @@ +//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the Blackfin instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "BlackfinInstrFormats.td" + +// These are target-independent nodes, but have target-specific formats. +def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; +def SDT_BfinCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>; + +def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + +def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; + +def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>; + +//===----------------------------------------------------------------------===// +// Transformations +//===----------------------------------------------------------------------===// + +def trailingZeros_xform : SDNodeXFormgetTargetConstant(N->getAPIntValue().countTrailingZeros(), + MVT::i32); +}]>; + +def trailingOnes_xform : SDNodeXFormgetTargetConstant(N->getAPIntValue().countTrailingOnes(), + MVT::i32); +}]>; + +def LO16 : SDNodeXFormgetTargetConstant((unsigned short)N->getZExtValue(), MVT::i16); +}]>; + +def HI16 : SDNodeXFormgetTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16); +}]>; + +//===----------------------------------------------------------------------===// +// Immediates +//===----------------------------------------------------------------------===// + +def imm3 : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>; +def uimm3 : PatLeaf<(imm), [{return isUint<3>(N->getZExtValue());}]>; +def uimm4 : PatLeaf<(imm), [{return isUint<4>(N->getZExtValue());}]>; +def uimm5 : PatLeaf<(imm), [{return isUint<5>(N->getZExtValue());}]>; + +def uimm5m2 : PatLeaf<(imm), [{ + uint64_t value = N->getZExtValue(); + return value % 2 == 0 && isUint<5>(value); +}]>; + +def uimm6m4 : PatLeaf<(imm), [{ + uint64_t value = N->getZExtValue(); + return value % 4 == 0 && isUint<6>(value); +}]>; + +def imm7 : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>; +def imm16 : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>; +def uimm16 : PatLeaf<(imm), [{return isUint<16>(N->getZExtValue());}]>; + +def ximm16 : PatLeaf<(imm), [{ + int64_t value = N->getSExtValue(); + return value < (1<<16) && value >= -(1<<15); +}]>; + +def imm17m2 : PatLeaf<(imm), [{ + int64_t value = N->getSExtValue(); + return value % 2 == 0 && isInt<17>(value); +}]>; + +def imm18m4 : PatLeaf<(imm), [{ + int64_t value = N->getSExtValue(); + return value % 4 == 0 && isInt<18>(value); +}]>; + +// 32-bit bitmask transformed to a bit number +def uimm5mask : Operand, PatLeaf<(imm), [{ + return isPowerOf2_32(N->getZExtValue()); +}], trailingZeros_xform>; + +// 32-bit inverse bitmask transformed to a bit number +def uimm5imask : Operand, PatLeaf<(imm), [{ + return isPowerOf2_32(~N->getZExtValue()); +}], trailingOnes_xform>; + +//===----------------------------------------------------------------------===// +// Operands +//===----------------------------------------------------------------------===// + +def calltarget : Operand; + +def brtarget : Operand; + +// Addressing modes +def ADDRspii : ComplexPattern; + +// Address operands +def MEMii : Operand { + let PrintMethod = "printMemoryOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +// Pseudo instructions. +class Pseudo pattern> + : InstBfin; + +let Defs = [SP], Uses = [SP] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + "${:comment}ADJCALLSTACKDOWN $amt", + [(BfinCallseqStart timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + "${:comment}ADJCALLSTACKUP $amt1 $amt2", + [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>; +} + +//===----------------------------------------------------------------------===// +// Table C-9. Program Flow Control Instructions +//===----------------------------------------------------------------------===// + +let isBranch = 1, isTerminator = 1 in { + +let isIndirectBranch = 1 in +def JUMPp : F1<(outs), (ins P:$target), + "JUMP ($target);", + [(brind P:$target)]>; + +// TODO JUMP (PC-P) + +// NOTE: assembler chooses between JUMP.S and JUMP.L +def JUMPa : F1<(outs), (ins brtarget:$target), + "jump $target;", + [(br bb:$target)]>; + +def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target), + "if $cc jump $target;", + [(brcond AnyCC:$cc, bb:$target)]>; +} + +let isCall = 1, + Defs = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in { +def CALLa: F1<(outs), (ins calltarget:$func, variable_ops), + "call $func;", []>; +def CALLp: F1<(outs), (ins P:$func, variable_ops), + "call ($func);", [(BfinCall P:$func)]>; +} + +let isReturn = 1, + isTerminator = 1, + Uses = [RETS] in +def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>; + +//===----------------------------------------------------------------------===// +// Table C-10. Load / Store Instructions +//===----------------------------------------------------------------------===// + +// Immediate constant loads + +// sext immediate, i32 D/P regs +def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src), + "$dst = $src (x);", + [(set DP:$dst, imm7:$src)]>; + +// zext immediate, i32 reg groups 0-3 +def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src), + "$dst = $src (z);", + [(set GR:$dst, uimm16:$src)]>; + +// sext immediate, i32 reg groups 0-3 +def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src), + "$dst = $src (x);", + [(set GR:$dst, imm16:$src)]>; + +// Pseudo-instruction for loading a general 32-bit constant. +def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src), + "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);", + [(set GR:$dst, imm:$src)]>; + +def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src), + "$dst.h = $src; $dst.l = $src;", []>; + + +// 16-bit immediate, i16 reg groups 0-3 +def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src), + "$dst = $src;", []>; + +def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)), + (LOAD32sym tglobaladdr:$addr)>; + +def : Pat<(BfinWrapper (i32 tjumptable:$addr)), + (LOAD32sym tjumptable:$addr)>; + +// We cannot copy from GR16 to D16, and codegen wants to insert copies if we +// emit GR16 instructions. As a hack, we use this fake instruction instead. +def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src), + "$dst = $src;", + [(set D16:$dst, ximm16:$src)]>; + +// Memory loads with patterns + +def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr), + "$dst = [$ptr];", + [(set DP:$dst, (load P:$ptr))]>; + +// Pseudo-instruction for loading a stack slot +def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem), + "${:comment}FI $dst = [$mem];", + [(set DP:$dst, (load ADDRspii:$mem))]>; + +// Note: Expands to multiple insns +def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem), + "${:comment}FI $dst = [$mem];", + [(set D16:$dst, (load ADDRspii:$mem))]>; + +// Pseudo-instruction for loading a stack slot, used for AnyCC regs. +// Replaced with Load D + CC=D +def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem), + "${:comment}FI $dst = B[$mem];", + [(set AnyCC:$dst, (load ADDRspii:$mem))]>; + +def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off), + "$dst = [$ptr + $off];", + [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>; + +def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off), + "$dst = [$ptr + $off];", + [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>; + +def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr), + "$dst = W[$ptr] (z);", + [(set D:$dst, (zextloadi16 P:$ptr))]>; + +def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>; + +def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), + "$dst = w[$ptr + $off] (z);", + [(set D:$dst, (zextloadi16 (add P:$ptr, + uimm5m2:$off)))]>; + +def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))), + (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>; + +def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), + "$dst = w[$ptr + $off] (z);", + [(set D:$dst, + (zextloadi16 (add P:$ptr, imm17m2:$off)))]>; + +def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))), + (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>; + +def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr), + "$dst = w[$ptr] (x);", + [(set D:$dst, (sextloadi16 P:$ptr))]>; + +def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), + "$dst = w[$ptr + $off] (x);", + [(set D:$dst, + (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>; + +def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), + "$dst = w[$ptr + $off] (x);", + [(set D:$dst, + (sextloadi16 (add P:$ptr, imm17m2:$off)))]>; + +def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr), + "$dst = w[$ptr];", + [(set D16:$dst, (load PI:$ptr))]>; + +def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr), + "$dst = B[$ptr] (z);", + [(set D:$dst, (zextloadi8 P:$ptr))]>; + +def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>; +def : Pat<(i16 (extloadi8 P:$ptr)), + (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>; +def : Pat<(i16 (zextloadi8 P:$ptr)), + (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>; + +def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), + "$dst = b[$ptr + $off] (z);", + [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>; + +def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))), + (LOAD32p_imm16_8z P:$ptr, imm:$off)>; +def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))), + (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off), + bfin_subreg_lo16)>; +def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))), + (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off), + bfin_subreg_lo16)>; + +def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr), + "$dst = b[$ptr] (x);", + [(set D:$dst, (sextloadi8 P:$ptr))]>; + +def : Pat<(i16 (sextloadi8 P:$ptr)), + (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), bfin_subreg_lo16)>; + +def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), + "$dst = b[$ptr + $off] (x);", + [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>; + +def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))), + (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off), + bfin_subreg_lo16)>; +// Memory loads without patterns + +let mayLoad = 1 in { + +multiclass LOAD_incdec { + def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr), + !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>; + def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr), + !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>; +} +multiclass LOAD_incdecpost + : LOAD_incdec { + def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off), + !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>; +} + +defm LOAD32p: LOAD_incdec; +defm LOAD32i: LOAD_incdec; +defm LOAD8z32p: LOAD_incdec; +defm LOAD8s32p: LOAD_incdec; +defm LOADhi: LOAD_incdec; +defm LOAD16z32p: LOAD_incdecpost; +defm LOAD16s32p: LOAD_incdecpost; + +def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off), + "$dst = [$ptr ++ $off];", []>; + +// Note: $fp MUST be FP +def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off), + "$dst = [$fp - $off];", []>; + +def LOAD32i: F1<(outs D:$dst), (ins I:$ptr), + "$dst = [$ptr];", []>; +def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off), + "$dst = [$ptr ++ $off];", []>; + + + +def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off), + "$dst = w[$ptr ++ $off];", []>; + + +} + +// Memory stores with patterns +def STORE32p: F1<(outs), (ins DP:$val, P:$ptr), + "[$ptr] = $val;", + [(store DP:$val, P:$ptr)]>; + +// Pseudo-instructions for storing to a stack slot +def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem), + "${:comment}FI [$mem] = $val;", + [(store DP:$val, ADDRspii:$mem)]>; + +// Note: This stack-storing pseudo-instruction is expanded to multiple insns +def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem), + "${:comment}FI [$mem] = $val;", + [(store D16:$val, ADDRspii:$mem)]>; + +// Pseudo-instructions for storing AnyCC register to a stack slot. +// Replaced with D=CC + STORE byte +def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem), + "${:comment}FI b[$mem] = $val;", + [(store AnyCC:$val, ADDRspii:$mem)]>; + +def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off), + "[$ptr + $off] = $val;", + [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>; + +def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off), + "[$ptr + $off] = $val;", + [(store DP:$val, (add P:$ptr, imm18m4:$off))]>; + +def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr), + "w[$ptr] = $val;", + [(store D16:$val, PI:$ptr)]>; + +def STORE8p: F1<(outs), (ins D:$val, P:$ptr), + "b[$ptr] = $val;", + [(truncstorei8 D:$val, P:$ptr)]>; + +def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off), + "b[$ptr + $off] = $val;", + [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>; + +let Constraints = "$ptr = $ptr_wb" in { + +multiclass STORE_incdec { + def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr), + !strconcat(pre, "[$ptr++] = $val;"), + [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>; + def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr), + !strconcat(pre, "[$ptr--] = $val;"), + [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, + (ineg off)))]>; +} + +defm STORE32p: STORE_incdec; +defm STORE16i: STORE_incdec; +defm STORE8p: STORE_incdec; + +def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off), + "[$ptr ++ $off] = $val;", + [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>; + +def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off), + "w[$ptr ++ $off] = $val;", + [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>; +} + +// Memory stores without patterns + +let mayStore = 1 in { + +// Note: only works for $fp == FP +def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off), + "[$fp - $off] = $val;", []>; + +def STORE32i: F1<(outs), (ins D:$val, I:$ptr), + "[$ptr] = $val;", []>; + +def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr), + "[$ptr++] = $val;", []>; + +def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr), + "[$ptr--] = $val;", []>; + +def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off), + "[$ptr ++ $off] = $val;", []>; +} + +def : Pat<(truncstorei16 D:$val, PI:$ptr), + (STORE16pi (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$val, D), + bfin_subreg_lo16), PI:$ptr)>; + +def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr), + (STORE16pi (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$val, D), + bfin_subreg_hi16), PI:$ptr)>; + +def : Pat<(truncstorei8 D16L:$val, P:$ptr), + (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS D16L:$val, D16L), + bfin_subreg_lo16), + P:$ptr)>; + +//===----------------------------------------------------------------------===// +// Table C-11. Move Instructions. +//===----------------------------------------------------------------------===// + +def MOVE: F1<(outs ALL:$dst), (ins ALL:$src), + "$dst = $src;", + []>; + +let isTwoAddress = 1 in +def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc), + "if $cc $dst = $src2;", + [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>; + +let Defs = [AZ, AN, AC0, V] in { +def MOVEzext: F1<(outs D:$dst), (ins D16L:$src), + "$dst = $src (z);", + [(set D:$dst, (zext D16L:$src))]>; + +def MOVEsext: F1<(outs D:$dst), (ins D16L:$src), + "$dst = $src (x);", + [(set D:$dst, (sext D16L:$src))]>; + +def MOVEzext8: F1<(outs D:$dst), (ins D:$src), + "$dst = $src.b (z);", + [(set D:$dst, (and D:$src, 0xff))]>; + +def MOVEsext8: F1<(outs D:$dst), (ins D:$src), + "$dst = $src.b (x);", + [(set D:$dst, (sext_inreg D:$src, i8))]>; + +} + +def : Pat<(sext_inreg D16L:$src, i8), + (EXTRACT_SUBREG (MOVEsext8 + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + D16L:$src, + bfin_subreg_lo16)), + bfin_subreg_lo16)>; + +def : Pat<(sext_inreg D:$src, i16), + (MOVEsext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>; + +def : Pat<(and D:$src, 0xffff), + (MOVEzext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>; + +def : Pat<(i32 (anyext D16L:$src)), + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS D16L:$src, D16L), + bfin_subreg_lo16)>; + +// TODO Dreg = Dreg_byte (X/Z) + +// TODO Accumulator moves + +//===----------------------------------------------------------------------===// +// Table C-12. Stack Control Instructions +//===----------------------------------------------------------------------===// + +let Uses = [SP], Defs = [SP] in { +def PUSH: F1<(outs), (ins ALL:$src), + "[--sp] = $src;", []> { let mayStore = 1; } + +// NOTE: POP does not work for DP regs, use LOAD instead +def POP: F1<(outs ALL:$dst), (ins), + "$dst = [sp++];", []> { let mayLoad = 1; } +} + +// TODO: push/pop multiple + +def LINK: F2<(outs), (ins i32imm:$amount), + "link $amount;", []>; + +def UNLINK: F2<(outs), (ins), + "unlink;", []>; + +//===----------------------------------------------------------------------===// +// Table C-13. Control Code Bit Management Instructions +//===----------------------------------------------------------------------===// + +multiclass SETCC { + def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b), + !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf), + [(set JustCC:$cc, (opnode D:$a, D:$b))]>; + + def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b), + !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf), + [(set JustCC:$cc, (opnode DP:$a, imm3:$b))]>; + + def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b), + !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf), + []>; + + def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b), + !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf), + [(set NotCC:$cc, (invnode DP:$a, imm3:$b))]>; +} + +defm SETEQ : SETCC; +defm SETLT : SETCC; +defm SETLE : SETCC; +defm SETULE : SETCC; + +def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b), + "cc = $a == $b;", + [(set NotCC:$cc, (setne D:$a, D:$b))]>; + +def : Pat<(setgt D:$a, D:$b), (SETLTdd D:$b, D:$a)>; +def : Pat<(setge D:$a, D:$b), (SETLEdd D:$b, D:$a)>; +def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>; +def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>; + +// TODO: compare pointer for P-P comparisons +// TODO: compare accumulator + +let Defs = [AC0] in +def OR_ac0_cc : F1<(outs), (ins JustCC:$cc), + "ac0 \\|= cc;", []>; + +let Uses = [AC0] in +def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins), + "cc = ac0;", []>; + +def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb), + "cc = !cc;", []>; + +def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb), + "cc = !cc;", []>; + +def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc), + "$dst = $cc;", + [(set D:$dst, (zext JustCC:$cc))]>; + +def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc), + "$dst = cc;", []>; + +def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src), + "cc = $src;", + [(set AnyCC:$cc, (setne D:$src, 0))]>; + +//===----------------------------------------------------------------------===// +// Table C-14. Logical Operations Instructions +//===----------------------------------------------------------------------===// + +def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst = $src1 & $src2;", + [(set D:$dst, (and D:$src1, D:$src2))]>; + +def NOT: F1<(outs D:$dst), (ins D:$src), + "$dst = ~$src;", + [(set D:$dst, (not D:$src))]>; + +def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst = $src1 \\| $src2;", + [(set D:$dst, (or D:$src1, D:$src2))]>; + +def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst = $src1 ^ $src2;", + [(set D:$dst, (xor D:$src1, D:$src2))]>; + +// missing: BXOR, BXORSHIFT + +//===----------------------------------------------------------------------===// +// Table C-15. Bit Operations Instructions +//===----------------------------------------------------------------------===// + +let isTwoAddress = 1 in { +def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2), + "bitclr($dst, $src2);", + [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>; + +def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2), + "bitset($dst, $src2);", + [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>; + +def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2), + "bittgl($dst, $src2);", + [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>; +} + +def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2), + "cc = bittst($src1, $src2);", + [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2), + (i32 0)))]>; + +def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2), + "cc = !bittst($src1, $src2);", + [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2), + (i32 0)))]>; + +// TODO: DEPOSIT, EXTRACT, BITMUX + +def ONES: F2<(outs D16L:$dst), (ins D:$src), + "$dst = ones $src;", + [(set D16L:$dst, (trunc (ctpop D:$src)))]>; + +def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>; + +//===----------------------------------------------------------------------===// +// Table C-16. Shift / Rotate Instructions +//===----------------------------------------------------------------------===// + +multiclass SHIFT32 { + def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount), + !subst("XX", ops, "$dst XX= $amount;"), + [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>; + def r : F1<(outs D:$dst), (ins D:$src, D:$amount), + !subst("XX", ops, "$dst XX= $amount;"), + [(set D:$dst, (opnode D:$src, D:$amount))]>; +} + +let Defs = [AZ, AN, V, VS], + isTwoAddress = 1 in { +defm SRA : SHIFT32>>">; +defm SRL : SHIFT32>">; +defm SLL : SHIFT32; +} + +// TODO: automatic switching between 2-addr and 3-addr (?) + +let Defs = [AZ, AN, V, VS] in { +def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount), + "$dst = lshift $src by $amount;", + [(set D:$dst, (shl D:$src, D16L:$amount))]>; + +// Arithmetic left-shift = saturing overflow. +def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount), + "$dst = ashift $src by $amount;", + [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>; + +def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount), + "$dst = $src >>> $amount;", + [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>; + +def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount), + "$dst = $src >> $amount;", + [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>; + +// Arithmetic left-shift = saturing overflow. +def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount), + "$dst = ashift $src BY $amount;", + [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>; + +def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount), + "$dst = $src << $amount;", + [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>; + +def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount), + "$dst = lshift $src by $amount;", + [(set D16:$dst, (shl D16:$src, D16L:$amount))]>; + +} + +//===----------------------------------------------------------------------===// +// Table C-17. Arithmetic Operations Instructions +//===----------------------------------------------------------------------===// + +// TODO: ABS + +let Defs = [AZ, AN, AC0, V, VS] in { + +def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst = $src1 + $src2;", + [(set D:$dst, (add D:$src1, D:$src2))]>; + +def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), + "$dst = $src1 + $src2;", + [(set D16:$dst, (add D16:$src1, D16:$src2))]>; + +let isTwoAddress = 1 in +def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2), + "$dst += $src2;", + [(set D:$dst, (add D:$src1, imm7:$src2))]>; + +def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst = $src1 - $src2;", + [(set D:$dst, (sub D:$src1, D:$src2))]>; + +def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), + "$dst = $src1 - $src2;", + [(set D16:$dst, (sub D16:$src1, D16:$src2))]>; + +} + +def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>; +def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>; + +let Defs = [AZ, AN, V, VS] in +def NEG: F1<(outs D:$dst), (ins D:$src), + "$dst = -$src;", + [(set D:$dst, (ineg D:$src))]>; + +// No pattern, it would confuse isel to have two i32 = i32+i32 patterns +def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2), + "$dst = $src1 + $src2;", []>; + +let isTwoAddress = 1 in +def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2), + "$dst += $src2;", []>; + +let Defs = [AZ, AN, V] in +def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2), + "$dst = $src1 + $src2 (rnd20);", []>; + +let Defs = [V, VS] in { +def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), + "$dst = $src1 * $src2 (is);", + [(set D16:$dst, (mul D16:$src1, D16:$src2))]>; + +def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), + "$dst = $src1 * $src2 (ih);", + [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>; + +def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2), + "$dst = $src1 * $src2 (is);", + [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>; + +def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2), + "$dst = $src1 * $src2 (is);", + [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>; +} + + +let isTwoAddress = 1 in +def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst *= $src2;", + [(set D:$dst, (mul D:$src1, D:$src2))]>; + +//===----------------------------------------------------------------------===// +// Table C-18. External Exent Management Instructions +//===----------------------------------------------------------------------===// + +def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>; +def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>; +def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>; +def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>; +def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>; +def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>; +def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>; +def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>; +def NOP : F1<(outs), (ins), "nop;", []>; +def MNOP : F2<(outs), (ins), "mnop;", []>; +def ABORT : F1<(outs), (ins), "abort;", []>; + +//===----------------------------------------------------------------------===// +// Table C-19. Cache Control Instructions +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Table C-20. Video Pixel Operations Instructions +//===----------------------------------------------------------------------===// + +def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst = align8($src1, $src2);", + [(set D:$dst, (or (shl D:$src1, (i32 24)), + (srl D:$src2, (i32 8))))]>; + +def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst = align16($src1, $src2);", + [(set D:$dst, (or (shl D:$src1, (i32 16)), + (srl D:$src2, (i32 16))))]>; + +def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2), + "$dst = align16($src1, $src2);", + [(set D:$dst, (or (shl D:$src1, (i32 8)), + (srl D:$src2, (i32 24))))]>; + +def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>; + +// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA, +// BYTEPACK, BYTEUNPACK + +// Table C-21. Vector Operations Instructions + +// Patterns +def : Pat<(BfinCall (i32 tglobaladdr:$dst)), + (CALLa tglobaladdr:$dst)>; +def : Pat<(BfinCall (i32 texternalsym:$dst)), + (CALLa texternalsym:$dst)>; + +def : Pat<(sext JustCC:$cc), + (NEG (MOVECC_zext JustCC:$cc))>; +def : Pat<(anyext JustCC:$cc), + (MOVECC_zext JustCC:$cc)>; +def : Pat<(i16 (zext JustCC:$cc)), + (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>; +def : Pat<(i16 (sext JustCC:$cc)), + (EXTRACT_SUBREG (NEG (MOVECC_zext JustCC:$cc)), bfin_subreg_lo16)>; +def : Pat<(i16 (anyext JustCC:$cc)), + (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>; + +def : Pat<(i16 (trunc D:$src)), + (EXTRACT_SUBREG (COPY_TO_REGCLASS D:$src, D), bfin_subreg_lo16)>; diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp new file mode 100644 index 0000000000000..6d0f66cd7a5d1 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp @@ -0,0 +1,21 @@ +//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the BlackfinMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "BlackfinMCAsmInfo.h" + +using namespace llvm; + +BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) { + GlobalPrefix = "_"; + CommentString = "//"; +} diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/BlackfinMCAsmInfo.h new file mode 100644 index 0000000000000..0efc29523067b --- /dev/null +++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.h @@ -0,0 +1,29 @@ +//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the BlackfinMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFINTARGETASMINFO_H +#define BLACKFINTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + struct BlackfinMCAsmInfo : public MCAsmInfo { + explicit BlackfinMCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp new file mode 100644 index 0000000000000..8c0a58aca41ec --- /dev/null +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -0,0 +1,472 @@ +//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Blackfin implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "Blackfin.h" +#include "BlackfinRegisterInfo.h" +#include "BlackfinSubtarget.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Type.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +using namespace llvm; + +BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st, + const TargetInstrInfo &tii) + : BlackfinGenRegisterInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP), + Subtarget(st), + TII(tii) {} + +const unsigned* +BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + using namespace BF; + static const unsigned CalleeSavedRegs[] = { + FP, + R4, R5, R6, R7, + P3, P4, P5, + 0 }; + return CalleeSavedRegs; +} + +const TargetRegisterClass* const *BlackfinRegisterInfo:: +getCalleeSavedRegClasses(const MachineFunction *MF) const { + using namespace BF; + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + &PRegClass, + &DRegClass, &DRegClass, &DRegClass, &DRegClass, + &PRegClass, &PRegClass, &PRegClass, + 0 }; + return CalleeSavedRegClasses; +} + +BitVector +BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + using namespace BF; + BitVector Reserved(getNumRegs()); + Reserved.set(AZ); + Reserved.set(AN); + Reserved.set(AQ); + Reserved.set(AC0); + Reserved.set(AC1); + Reserved.set(AV0); + Reserved.set(AV0S); + Reserved.set(AV1); + Reserved.set(AV1S); + Reserved.set(V); + Reserved.set(VS); + Reserved.set(CYCLES).set(CYCLES2); + Reserved.set(L0); + Reserved.set(L1); + Reserved.set(L2); + Reserved.set(L3); + Reserved.set(SP); + Reserved.set(RETS); + if (hasFP(MF)) + Reserved.set(FP); + return Reserved; +} + +const TargetRegisterClass* +BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { + assert(isPhysicalRegister(reg) && "reg must be a physical register"); + + // Pick the smallest register class of the right type that contains + // this physreg. + const TargetRegisterClass* BestRC = 0; + for (regclass_iterator I = regclass_begin(), E = regclass_end(); + I != E; ++I) { + const TargetRegisterClass* RC = *I; + if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && + (!BestRC || RC->getNumRegs() < BestRC->getNumRegs())) + BestRC = RC; + } + + assert(BestRC && "Couldn't find the register class"); + return BestRC; +} + +// hasFP - Return true if the specified function should have a dedicated frame +// pointer register. This is true if the function has variable sized allocas or +// if frame pointer elimination is disabled. +bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasCalls() || MFI->hasVarSizedObjects(); +} + +bool BlackfinRegisterInfo:: +requiresRegisterScavenging(const MachineFunction &MF) const { + return true; +} + +// Emit instructions to add delta to D/P register. ScratchReg must be of the +// same class as Reg (P). +void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + DebugLoc DL, + unsigned Reg, + unsigned ScratchReg, + int delta) const { + if (!delta) + return; + if (isInt<7>(delta)) { + BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg) + .addReg(Reg) // No kill on two-addr operand + .addImm(delta); + return; + } + + // We must load delta into ScratchReg and add that. + loadConstant(MBB, I, DL, ScratchReg, delta); + if (BF::PRegClass.contains(Reg)) { + assert(BF::PRegClass.contains(ScratchReg) && + "ScratchReg must be a P register"); + BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg) + .addReg(Reg, RegState::Kill) + .addReg(ScratchReg, RegState::Kill); + } else { + assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register"); + assert(BF::DRegClass.contains(ScratchReg) && + "ScratchReg must be a D register"); + BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg) + .addReg(Reg, RegState::Kill) + .addReg(ScratchReg, RegState::Kill); + } +} + +// Emit instructions to load a constant into D/P register +void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + DebugLoc DL, + unsigned Reg, + int value) const { + if (isInt<7>(value)) { + BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value); + return; + } + + if (isUint<16>(value)) { + BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value); + return; + } + + if (isInt<16>(value)) { + BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value); + return; + } + + // We must split into halves + BuildMI(MBB, I, DL, + TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_hi16)) + .addImm((value >> 16) & 0xffff) + .addReg(Reg, RegState::ImplicitDefine); + BuildMI(MBB, I, DL, + TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_lo16)) + .addImm(value & 0xffff) + .addReg(Reg, RegState::ImplicitKill) + .addReg(Reg, RegState::ImplicitDefine); +} + +void BlackfinRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + if (!hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + if (Amount != 0) { + assert(Amount%4 == 0 && "Unaligned call frame size"); + if (I->getOpcode() == BF::ADJCALLSTACKDOWN) { + adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount); + } else { + assert(I->getOpcode() == BF::ADJCALLSTACKUP && + "Unknown call frame pseudo instruction"); + adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount); + } + } + } + MBB.erase(I); +} + +/// findScratchRegister - Find a 'free' register. Try for a call-clobbered +/// register first and then a spilled callee-saved register if that fails. +static unsigned findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) { + assert(RS && "Register scavenging must be on"); + unsigned Reg = RS->FindUnusedReg(RC); + if (Reg == 0) + Reg = RS->scavengeRegister(RC, II, SPAdj); + return Reg; +} + +unsigned +BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned FIPos; + for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) { + assert(FIPos < MI.getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + int FrameIndex = MI.getOperand(FIPos).getIndex(); + assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm()); + int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + + MI.getOperand(FIPos+1).getImm(); + unsigned BaseReg = BF::FP; + if (hasFP(MF)) { + assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer"); + } else { + BaseReg = BF::SP; + Offset += MF.getFrameInfo()->getStackSize() + SPAdj; + } + + bool isStore = false; + + switch (MI.getOpcode()) { + case BF::STORE32fi: + isStore = true; + case BF::LOAD32fi: { + assert(Offset%4 == 0 && "Unaligned i32 stack access"); + assert(FIPos==1 && "Bad frame index operand"); + MI.getOperand(FIPos).ChangeToRegister(BaseReg, false); + MI.getOperand(FIPos+1).setImm(Offset); + if (isUint<6>(Offset)) { + MI.setDesc(TII.get(isStore + ? BF::STORE32p_uimm6m4 + : BF::LOAD32p_uimm6m4)); + return 0; + } + if (BaseReg == BF::FP && isUint<7>(-Offset)) { + MI.setDesc(TII.get(isStore + ? BF::STORE32fp_nimm7m4 + : BF::LOAD32fp_nimm7m4)); + MI.getOperand(FIPos+1).setImm(-Offset); + return 0; + } + if (isInt<18>(Offset)) { + MI.setDesc(TII.get(isStore + ? BF::STORE32p_imm18m4 + : BF::LOAD32p_imm18m4)); + return 0; + } + // Use RegScavenger to calculate proper offset... + MI.dump(); + llvm_unreachable("Stack frame offset too big"); + break; + } + case BF::ADDpp: { + assert(MI.getOperand(0).isReg() && "ADD instruction needs a register"); + unsigned DestReg = MI.getOperand(0).getReg(); + // We need to produce a stack offset in a P register. We emit: + // P0 = offset; + // P0 = BR + P0; + assert(FIPos==1 && "Bad frame index operand"); + loadConstant(MBB, II, DL, DestReg, Offset); + MI.getOperand(1).ChangeToRegister(DestReg, false, false, true); + MI.getOperand(2).ChangeToRegister(BaseReg, false); + break; + } + case BF::STORE16fi: + isStore = true; + case BF::LOAD16fi: { + assert(Offset%2 == 0 && "Unaligned i16 stack access"); + assert(FIPos==1 && "Bad frame index operand"); + // We need a P register to use as an address + unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj); + assert(ScratchReg && "Could not scavenge register"); + loadConstant(MBB, II, DL, ScratchReg, Offset); + BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg) + .addReg(ScratchReg, RegState::Kill) + .addReg(BaseReg); + MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi)); + MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true); + MI.RemoveOperand(2); + break; + } + case BF::STORE8fi: { + // This is an AnyCC spill, we need a scratch register. + assert(FIPos==1 && "Bad frame index operand"); + MachineOperand SpillReg = MI.getOperand(0); + unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj); + assert(ScratchReg && "Could not scavenge register"); + if (SpillReg.getReg()==BF::NCC) { + BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg) + .addOperand(SpillReg); + BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg) + .addReg(ScratchReg).addImm(0); + } else { + BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg) + .addOperand(SpillReg); + } + // STORE D + MI.setDesc(TII.get(BF::STORE8p_imm16)); + MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true); + MI.getOperand(FIPos).ChangeToRegister(BaseReg, false); + MI.getOperand(FIPos+1).setImm(Offset); + break; + } + case BF::LOAD8fi: { + // This is an restore, we need a scratch register. + assert(FIPos==1 && "Bad frame index operand"); + MachineOperand SpillReg = MI.getOperand(0); + unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj); + assert(ScratchReg && "Could not scavenge register"); + MI.setDesc(TII.get(BF::LOAD32p_imm16_8z)); + MI.getOperand(0).ChangeToRegister(ScratchReg, true); + MI.getOperand(FIPos).ChangeToRegister(BaseReg, false); + MI.getOperand(FIPos+1).setImm(Offset); + ++II; + if (SpillReg.getReg()==BF::CC) { + // CC = D + BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC) + .addReg(ScratchReg, RegState::Kill); + } else { + // Restore NCC (CC = D==0) + BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC) + .addReg(ScratchReg, RegState::Kill) + .addImm(0); + } + break; + } + default: + llvm_unreachable("Cannot eliminate frame index"); + break; + } + return 0; +} + +void BlackfinRegisterInfo:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = BF::DPRegisterClass; + if (requiresRegisterScavenging(MF)) { + // Reserve a slot close to SP or frame pointer. + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment())); + } +} + +void BlackfinRegisterInfo:: +processFunctionBeforeFrameFinalized(MachineFunction &MF) const { +} + +// Emit a prologue that sets up a stack frame. +// On function entry, R0-R2 and P0 may hold arguments. +// R3, P1, and P2 may be used as scratch registers +void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + DebugLoc dl = (MBBI != MBB.end() + ? MBBI->getDebugLoc() + : DebugLoc::getUnknownLoc()); + + int FrameSize = MFI->getStackSize(); + if (FrameSize%4) { + FrameSize = (FrameSize+3) & ~3; + MFI->setStackSize(FrameSize); + } + + if (!hasFP(MF)) { + assert(!MFI->hasCalls() && + "FP elimination on a non-leaf function is not supported"); + adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize); + return; + } + + // emit a LINK instruction + if (FrameSize <= 0x3ffff) { + BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize); + return; + } + + // Frame is too big, do a manual LINK: + // [--SP] = RETS; + // [--SP] = FP; + // FP = SP; + // P1 = -FrameSize; + // SP = SP + P1; + BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH)) + .addReg(BF::RETS, RegState::Kill); + BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH)) + .addReg(BF::FP, RegState::Kill); + BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP) + .addReg(BF::SP); + loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize); + BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP) + .addReg(BF::SP, RegState::Kill) + .addReg(BF::P1, RegState::Kill); + +} + +void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + DebugLoc dl = MBBI->getDebugLoc(); + + int FrameSize = MFI->getStackSize(); + assert(FrameSize%4 == 0 && "Misaligned frame size"); + + if (!hasFP(MF)) { + assert(!MFI->hasCalls() && + "FP elimination on a non-leaf function is not supported"); + adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize); + return; + } + + // emit an UNLINK instruction + BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK)); +} + +unsigned BlackfinRegisterInfo::getRARegister() const { + return BF::RETS; +} + +unsigned BlackfinRegisterInfo::getFrameRegister(MachineFunction &MF) const { + return hasFP(MF) ? BF::FP : BF::SP; +} + +unsigned BlackfinRegisterInfo::getEHExceptionRegister() const { + llvm_unreachable("What is the exception register"); + return 0; +} + +unsigned BlackfinRegisterInfo::getEHHandlerRegister() const { + llvm_unreachable("What is the exception handler register"); + return 0; +} + +int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + llvm_unreachable("What is the dwarf register number"); + return -1; +} + +#include "BlackfinGenRegisterInfo.inc" + diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h new file mode 100644 index 0000000000000..501f504d06bf5 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -0,0 +1,104 @@ +//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Blackfin implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFINREGISTERINFO_H +#define BLACKFINREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "BlackfinGenRegisterInfo.h.inc" + +namespace llvm { + + class BlackfinSubtarget; + class TargetInstrInfo; + class Type; + + // Subregister indices, keep in sync with BlackfinRegisterInfo.td + enum BfinSubregIdx { + bfin_subreg_lo16 = 1, + bfin_subreg_hi16 = 2, + bfin_subreg_lo32 = 3 + }; + + struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo { + BlackfinSubtarget &Subtarget; + const TargetInstrInfo &TII; + + BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + // getSubReg implemented by tablegen + + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const { + return &BF::PRegClass; + } + + const TargetRegisterClass *getPhysicalRegisterRegClass(unsigned reg, + EVT VT) const; + + bool hasFP(const MachineFunction &MF) const; + + // bool hasReservedCallFrame(MachineFunction &MF) const; + + bool requiresRegisterScavenging(const MachineFunction &MF) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + + void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getRARegister() const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; + + // Utility functions + void adjustRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + DebugLoc DL, + unsigned Reg, + unsigned ScratchReg, + int delta) const; + void loadConstant(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + DebugLoc DL, + unsigned Reg, + int value) const; + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td new file mode 100644 index 0000000000000..642d10f5aa67f --- /dev/null +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td @@ -0,0 +1,385 @@ +//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the Blackfin register file +//===----------------------------------------------------------------------===// + +// Registers are identified with 3-bit group and 3-bit ID numbers. + +class BlackfinReg : Register { + field bits<3> Group; + field bits<3> Num; + let Namespace = "BF"; +} + +// Rc - 1-bit registers +class Rc bitno, string n> : BlackfinReg { + field bits<5> BitNum = bitno; +} + +// Rs - 16-bit integer registers +class Rs group, bits<3> num, bits<1> hi, string n> : BlackfinReg { + let Group = group; + let Num = num; + field bits<1> High = hi; +} + +// Ri - 32-bit integer registers with subregs +class Ri group, bits<3> num, string n> : BlackfinReg { + let Group = group; + let Num = num; +} + +// Ra 40-bit accumulator registers +class Ra num, string n, list subs> : BlackfinReg { + let SubRegs = subs; + let Group = 4; + let Num = num; +} + +// Ywo halves of 32-bit register +multiclass Rss group, bits<3> num, string n> { + def H : Rs; + def L : Rs; +} + +// Rii - 32-bit integer registers with subregs +class Rii group, bits<3> num, string n, list subs> + : BlackfinReg { + let SubRegs = subs; + let Group = group; + let Num = num; +} + +// Status bits are all part of ASTAT +def AZ : Rc<0, "az">; +def AN : Rc<1, "an">; +def CC : Rc<5, "cc">, DwarfRegNum<[34]>; +def NCC : Rc<5, "!cc"> { let Aliases = [CC]; } +def AQ : Rc<6, "aq">; +def AC0 : Rc<12, "ac0">; +def AC1 : Rc<13, "ac1">; +def AV0 : Rc<16, "av0">; +def AV0S : Rc<17, "av0s">; +def AV1 : Rc<18, "av1">; +def AV1S : Rc<19, "av1s">; +def V : Rc<24, "v">; +def VS : Rc<25, "vs">; +// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD + +// Group 0: Integer registers +defm R0 : Rss<0, 0, "r0">; +def R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>; +defm R1 : Rss<0, 1, "r1">; +def R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>; +defm R2 : Rss<0, 2, "r2">; +def R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>; +defm R3 : Rss<0, 3, "r3">; +def R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>; +defm R4 : Rss<0, 4, "r4">; +def R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>; +defm R5 : Rss<0, 5, "r5">; +def R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>; +defm R6 : Rss<0, 6, "r6">; +def R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>; +defm R7 : Rss<0, 7, "r7">; +def R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>; + +// Group 1: Pointer registers +defm P0 : Rss<1, 0, "p0">; +def P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>; +defm P1 : Rss<1, 1, "p1">; +def P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>; +defm P2 : Rss<1, 2, "p2">; +def P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>; +defm P3 : Rss<1, 3, "p3">; +def P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>; +defm P4 : Rss<1, 4, "p4">; +def P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>; +defm P5 : Rss<1, 5, "p5">; +def P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>; +defm SP : Rss<1, 6, "sp">; +def SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>; +defm FP : Rss<1, 7, "fp">; +def FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>; + +// Group 2: Index registers +defm I0 : Rss<2, 0, "i0">; +def I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>; +defm I1 : Rss<2, 1, "i1">; +def I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>; +defm I2 : Rss<2, 2, "i2">; +def I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>; +defm I3 : Rss<2, 3, "i3">; +def I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>; +defm M0 : Rss<2, 4, "m0">; +def M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>; +defm M1 : Rss<2, 5, "m1">; +def M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>; +defm M2 : Rss<2, 6, "m2">; +def M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>; +defm M3 : Rss<2, 7, "m3">; +def M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>; + +// Group 3: Cyclic indexing registers +defm B0 : Rss<3, 0, "b0">; +def B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>; +defm B1 : Rss<3, 1, "b1">; +def B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>; +defm B2 : Rss<3, 2, "b2">; +def B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>; +defm B3 : Rss<3, 3, "b3">; +def B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>; +defm L0 : Rss<3, 4, "l0">; +def L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>; +defm L1 : Rss<3, 5, "l1">; +def L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>; +defm L2 : Rss<3, 6, "l2">; +def L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>; +defm L3 : Rss<3, 7, "l3">; +def L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>; + +// Accumulators +def A0X : Ri <4, 0, "a0.x">; +defm A0 : Rss<4, 1, "a0">; +def A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>; +def A0 : Ra <0, "a0", [A0X, A0W]>; + +def A1X : Ri <4, 2, "a1.x">; +defm A1 : Rss<4, 3, "a1">; +def A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>; +def A1 : Ra <2, "a1", [A1X, A1W]>; + +def RETS : Ri<4, 7, "rets">, DwarfRegNum<[35]>; +def RETI : Ri<7, 3, "reti">, DwarfRegNum<[36]>; +def RETX : Ri<7, 4, "retx">, DwarfRegNum<[37]>; +def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>; +def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>; + +def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> { + let SubRegs = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]; +} + +def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>; +def USP : Ri<7, 0, "usp">, DwarfRegNum<[42]>; +def EMUDAT : Ri<7, 7, "emudat">, DwarfRegNum<[43]>; +def SYSCFG : Ri<7, 2, "syscfg">; +def CYCLES : Ri<6, 6, "cycles">; +def CYCLES2 : Ri<6, 7, "cycles2">; + +// Hardware loops +def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>; +def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>; +def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>; +def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>; +def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>; +def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>; + +// Subregs are: +// 1: .L +// 2: .H +// 3: .W (32 low bits of 40-bit accu) +// Keep in sync with enum in BlackfinRegisterInfo.h +def bfin_subreg_lo16 : PatLeaf<(i32 1)>; +def bfin_subreg_hi16 : PatLeaf<(i32 2)>; +def bfin_subreg_32bit : PatLeaf<(i32 3)>; + +def : SubRegSet<1, + [R0, R1, R2, R3, R4, R5, R6, R7, + P0, P1, P2, P3, P4, P5, SP, FP, + I0, I1, I2, I3, M0, M1, M2, M3, + B0, B1, B2, B3, L0, L1, L2, L3], + [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L, + P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL, + I0L, I1L, I2L, I3L, M0L, M1L, M2L, M3L, + B0L, B1L, B2L, B3L, L0L, L1L, L2L, L3L]>; + +def : SubRegSet<2, + [R0, R1, R2, R3, R4, R5, R6, R7, + P0, P1, P2, P3, P4, P5, SP, FP, + I0, I1, I2, I3, M0, M1, M2, M3, + B0, B1, B2, B3, L0, L1, L2, L3], + [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H, + P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH, + I0H, I1H, I2H, I3H, M0H, M1H, M2H, M3H, + B0H, B1H, B2H, B3H, L0H, L1H, L2H, L3H]>; + +def : SubRegSet<1, [A0, A0W, A1, A1W], [A0L, A0L, A1L, A1L]>; +def : SubRegSet<2, [A0, A0W, A1, A1W], [A0H, A0H, A1H, A1H]>; + +// Register classes. +def D16 : RegisterClass<"BF", [i16], 16, + [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L, + R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L]>; + +def D16L : RegisterClass<"BF", [i16], 16, + [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L]>; + +def D16H : RegisterClass<"BF", [i16], 16, + [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H]>; + +def P16 : RegisterClass<"BF", [i16], 16, + [P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L, + P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>; + +def P16L : RegisterClass<"BF", [i16], 16, + [P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>; + +def P16H : RegisterClass<"BF", [i16], 16, + [P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>; + +def DP16 : RegisterClass<"BF", [i16], 16, + [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L, + R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L, + P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L, + P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>; + +def DP16L : RegisterClass<"BF", [i16], 16, + [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L, + P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>; + +def DP16H : RegisterClass<"BF", [i16], 16, + [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H, + P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>; + +def GR16 : RegisterClass<"BF", [i16], 16, + [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L, + R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L, + P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L, + P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL, + I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L, + M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L, + B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L, + L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>; + +def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { + let SubRegClassList = [D16L, D16H]; +} + +def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> { + let SubRegClassList = [P16L, P16H]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + PClass::iterator + PClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + return allocation_order_begin(MF) + + (RI->hasFP(MF) ? 7 : 6); + } + }]; +} + +def I : RegisterClass<"BF", [i32], 32, [I0, I1, I2, I3]>; +def M : RegisterClass<"BF", [i32], 32, [M0, M1, M2, M3]>; +def B : RegisterClass<"BF", [i32], 32, [B0, B1, B2, B3]>; +def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>; + +def DP : RegisterClass<"BF", [i32], 32, + [R0, R1, R2, R3, R4, R5, R6, R7, + P0, P1, P2, P3, P4, P5, FP, SP]> { + let SubRegClassList = [DP16L, DP16H]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + DPClass::iterator + DPClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + return allocation_order_begin(MF) + + (RI->hasFP(MF) ? 15 : 14); + } + }]; +} + +def GR : RegisterClass<"BF", [i32], 32, + [R0, R1, R2, R3, R4, R5, R6, R7, + P0, P1, P2, P3, P4, P5, + I0, I1, I2, I3, M0, M1, M2, M3, + B0, B1, B2, B3, L0, L1, L2, L3, + FP, SP]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GRClass::iterator + GRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + return allocation_order_begin(MF) + + (RI->hasFP(MF) ? 31 : 30); + } + }]; +} + +def ALL : RegisterClass<"BF", [i32], 32, + [R0, R1, R2, R3, R4, R5, R6, R7, + P0, P1, P2, P3, P4, P5, + I0, I1, I2, I3, M0, M1, M2, M3, + B0, B1, B2, B3, L0, L1, L2, L3, + FP, SP, + A0X, A0W, A1X, A1W, ASTAT, RETS, + LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2, + USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + ALLClass::iterator + ALLClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + return allocation_order_begin(MF) + + (RI->hasFP(MF) ? 31 : 30); + } + }]; +} + +def PI : RegisterClass<"BF", [i32], 32, + [P0, P1, P2, P3, P4, P5, I0, I1, I2, I3, FP, SP]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + PIClass::iterator + PIClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + return allocation_order_begin(MF) + + (RI->hasFP(MF) ? 11 : 10); + } + }]; +} + +// We are going to pretend that CC and !CC are 32-bit registers, even though +// they only can hold 1 bit. +let CopyCost = -1, Size = 8 in { +def JustCC : RegisterClass<"BF", [i32], 8, [CC]>; +def NotCC : RegisterClass<"BF", [i32], 8, [NCC]>; +def AnyCC : RegisterClass<"BF", [i32], 8, [CC, NCC]> { + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + AnyCCClass::iterator + AnyCCClass::allocation_order_end(const MachineFunction &MF) const { + return allocation_order_begin(MF)+1; + } + }]; +} +def StatBit : RegisterClass<"BF", [i1], 8, + [AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]>; +} + +// Should be i40, but that isn't defined. It is not a legal type yet anyway. +def Accu : RegisterClass<"BF", [i64], 64, [A0, A1]>; diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp new file mode 100644 index 0000000000000..e104c5245a9e5 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinSubtarget.cpp @@ -0,0 +1,36 @@ +//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the blackfin specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "BlackfinSubtarget.h" +#include "BlackfinGenSubtarget.inc" + +using namespace llvm; + +BlackfinSubtarget::BlackfinSubtarget(const std::string &TT, + const std::string &FS) + : sdram(false), + icplb(false), + wa_mi_shift(false), + wa_csync(false), + wa_specld(false), + wa_mmr_stall(false), + wa_lcregs(false), + wa_hwloop(false), + wa_ind_call(false), + wa_killed_mmr(false), + wa_rets(false) +{ + std::string CPU = "generic"; + // Parse features string. + ParseSubtargetFeatures(FS, CPU); +} diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h new file mode 100644 index 0000000000000..d667fe26519ba --- /dev/null +++ b/lib/Target/Blackfin/BlackfinSubtarget.h @@ -0,0 +1,45 @@ +//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the BLACKFIN specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFIN_SUBTARGET_H +#define BLACKFIN_SUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" +#include + +namespace llvm { + + class BlackfinSubtarget : public TargetSubtarget { + bool sdram; + bool icplb; + bool wa_mi_shift; + bool wa_csync; + bool wa_specld; + bool wa_mmr_stall; + bool wa_lcregs; + bool wa_hwloop; + bool wa_ind_call; + bool wa_killed_mmr; + bool wa_rets; + public: + BlackfinSubtarget(const std::string &TT, const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp new file mode 100644 index 0000000000000..47ba2fe28f582 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -0,0 +1,42 @@ +//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "BlackfinTargetMachine.h" +#include "Blackfin.h" +#include "BlackfinMCAsmInfo.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetRegistry.h" + +using namespace llvm; + +extern "C" void LLVMInitializeBlackfinTarget() { + RegisterTargetMachine X(TheBlackfinTarget); + RegisterAsmInfo Y(TheBlackfinTarget); + +} + +BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, + const std::string &TT, + const std::string &FS) + : LLVMTargetMachine(T, TT), + DataLayout("e-p:32:32-i64:32-f64:32"), + Subtarget(TT, FS), + TLInfo(*this), + InstrInfo(Subtarget), + FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) { +} + +bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createBlackfinISelDag(*this, OptLevel)); + return false; +} diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h new file mode 100644 index 0000000000000..73ed3143f5309 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -0,0 +1,54 @@ +//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Blackfin specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFINTARGETMACHINE_H +#define BLACKFINTARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "BlackfinInstrInfo.h" +#include "BlackfinSubtarget.h" +#include "BlackfinISelLowering.h" + +namespace llvm { + + class BlackfinTargetMachine : public LLVMTargetMachine { + const TargetData DataLayout; + BlackfinSubtarget Subtarget; + BlackfinTargetLowering TLInfo; + BlackfinInstrInfo InstrInfo; + TargetFrameInfo FrameInfo; + public: + BlackfinTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); + + virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const BlackfinSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + virtual const BlackfinRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual BlackfinTargetLowering* getTargetLowering() const { + return const_cast(&TLInfo); + } + virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual bool addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt new file mode 100644 index 0000000000000..6c3b2447a6946 --- /dev/null +++ b/lib/Target/Blackfin/CMakeLists.txt @@ -0,0 +1,21 @@ +set(LLVM_TARGET_DEFINITIONS Blackfin.td) + +tablegen(BlackfinGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(BlackfinGenRegisterNames.inc -gen-register-enums) +tablegen(BlackfinGenRegisterInfo.inc -gen-register-desc) +tablegen(BlackfinGenInstrNames.inc -gen-instr-enums) +tablegen(BlackfinGenInstrInfo.inc -gen-instr-desc) +tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer) +tablegen(BlackfinGenDAGISel.inc -gen-dag-isel) +tablegen(BlackfinGenSubtarget.inc -gen-subtarget) +tablegen(BlackfinGenCallingConv.inc -gen-callingconv) + +add_llvm_target(BlackfinCodeGen + BlackfinInstrInfo.cpp + BlackfinISelDAGToDAG.cpp + BlackfinISelLowering.cpp + BlackfinMCAsmInfo.cpp + BlackfinRegisterInfo.cpp + BlackfinSubtarget.cpp + BlackfinTargetMachine.cpp + ) diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile new file mode 100644 index 0000000000000..c0c1bce793d0a --- /dev/null +++ b/lib/Target/Blackfin/Makefile @@ -0,0 +1,23 @@ +##===- lib/Target/Blackfin/Makefile ------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMBlackfinCodeGen +TARGET = Blackfin + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \ + BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \ + BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \ + BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \ + BlackfinGenCallingConv.inc + +DIRS = AsmPrinter TargetInfo + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/Blackfin/README.txt b/lib/Target/Blackfin/README.txt new file mode 100644 index 0000000000000..b4c8227cd645e --- /dev/null +++ b/lib/Target/Blackfin/README.txt @@ -0,0 +1,244 @@ +//===-- README.txt - Notes for Blackfin Target ------------------*- org -*-===// + +* Condition codes +** DONE Problem with asymmetric SETCC operations +The instruction + + CC = R0 < 2 + +is not symmetric - there is no R0 > 2 instruction. On the other hand, IF CC +JUMP can take both CC and !CC as a condition. We cannot pattern-match (brcond +(not cc), target), the DAG optimizer removes that kind of thing. + +This is handled by creating a pseudo-register NCC that aliases CC. Register +classes JustCC and NotCC are used to control the inversion of CC. + +** DONE CC as an i32 register +The AnyCC register class pretends to hold i32 values. It can only represent the +values 0 and 1, but we can copy to and from the D class. This hack makes it +possible to represent the setcc instruction without having i1 as a legal type. + +In most cases, the CC register is set by a "CC = .." or BITTST instruction, and +then used in a conditional branch or move. The code generator thinks it is +moving 32 bits, but the value stays in CC. In other cases, the result of a +comparison is actually used as am i32 number, and CC will be copied to a D +register. + +* Stack frames +** TODO Use Push/Pop instructions +We should use the push/pop instructions when saving callee-saved +registers. The are smaller, and we may even use push multiple instructions. + +** TODO requiresRegisterScavenging +We need more intelligence in determining when the scavenger is needed. We +should keep track of: +- Spilling D16 registers +- Spilling AnyCC registers + +* Assembler +** TODO Implement PrintGlobalVariable +** TODO Remove LOAD32sym +It's a hack combining two instructions by concatenation. + +* Inline Assembly + +These are the GCC constraints from bfin/constraints.md: + +| Code | Register class | LLVM | +|-------+-------------------------------------------+------| +| a | P | C | +| d | D | C | +| z | Call clobbered P (P0, P1, P2) | X | +| D | EvenD | X | +| W | OddD | X | +| e | Accu | C | +| A | A0 | S | +| B | A1 | S | +| b | I | C | +| v | B | C | +| f | M | C | +| c | Circular I, B, L | X | +| C | JustCC | S | +| t | LoopTop | X | +| u | LoopBottom | X | +| k | LoopCount | X | +| x | GR | C | +| y | RET*, ASTAT, SEQSTAT, USP | X | +| w | ALL | C | +| Z | The FD-PIC GOT pointer (P3) | S | +| Y | The FD-PIC function pointer register (P1) | S | +| q0-q7 | R0-R7 individually | | +| qA | P0 | | +|-------+-------------------------------------------+------| +| Code | Constant | | +|-------+-------------------------------------------+------| +| J | 1< X(TheBlackfinTarget, "bfin", + "Analog Devices Blackfin [experimental]"); +} diff --git a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..5ca80604f63cd --- /dev/null +++ b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMBlackfinInfo + BlackfinTargetInfo.cpp + ) + +add_dependencies(LLVMBlackfinInfo BlackfinCodeGenTable_gen) diff --git a/lib/Target/Blackfin/TargetInfo/Makefile b/lib/Target/Blackfin/TargetInfo/Makefile new file mode 100644 index 0000000000000..c49cfbe690776 --- /dev/null +++ b/lib/Target/Blackfin/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Blackfin/TargetInfo/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMBlackfinInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 294c6d35beabc..fe63edf3ff68c 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -24,43 +24,36 @@ #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" #include "llvm/InlineAsm.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantsScanner.h" #include "llvm/Analysis/FindUsedTypes.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Target/TargetMachineRegistry.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/System/Host.h" #include "llvm/Config/config.h" #include #include using namespace llvm; -/// CBackendTargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int CBackendTargetMachineModule; -int CBackendTargetMachineModule = 0; - -// Register the target. -static RegisterTarget X("c", "C backend"); - -// Force static initialization. -extern "C" void LLVMInitializeCBackendTarget() { } +extern "C" void LLVMInitializeCBackendTarget() { + // Register the target. + RegisterTargetMachine X(TheCBackendTarget); +} namespace { /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for @@ -88,12 +81,12 @@ namespace { /// CWriter - This class is the main chunk of code that converts an LLVM /// module to a C translation unit. class CWriter : public FunctionPass, public InstVisitor { - raw_ostream &Out; + formatted_raw_ostream &Out; IntrinsicLowering *IL; Mangler *Mang; LoopInfo *LI; const Module *TheModule; - const TargetAsmInfo* TAsm; + const MCAsmInfo* TAsm; const TargetData* TD; std::map TypeNames; std::map FPConstantMap; @@ -101,12 +94,14 @@ namespace { std::set ByValParams; unsigned FPCounter; unsigned OpaqueCounter; + DenseMap AnonValueNumbers; + unsigned NextAnonValueNumber; public: static char ID; - explicit CWriter(raw_ostream &o) + explicit CWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), - TheModule(0), TAsm(0), TD(0), OpaqueCounter(0) { + TheModule(0), TAsm(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) { FPCounter = 0; } @@ -149,24 +144,26 @@ namespace { return false; } - raw_ostream &printType(raw_ostream &Out, const Type *Ty, - bool isSigned = false, - const std::string &VariableName = "", - bool IgnoreName = false, - const AttrListPtr &PAL = AttrListPtr()); + raw_ostream &printType(formatted_raw_ostream &Out, + const Type *Ty, + bool isSigned = false, + const std::string &VariableName = "", + bool IgnoreName = false, + const AttrListPtr &PAL = AttrListPtr()); std::ostream &printType(std::ostream &Out, const Type *Ty, bool isSigned = false, const std::string &VariableName = "", bool IgnoreName = false, const AttrListPtr &PAL = AttrListPtr()); - raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty, - bool isSigned, - const std::string &NameSoFar = ""); + raw_ostream &printSimpleType(formatted_raw_ostream &Out, + const Type *Ty, + bool isSigned, + const std::string &NameSoFar = ""); std::ostream &printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned, const std::string &NameSoFar = ""); - void printStructReturnPointerFunctionType(raw_ostream &Out, + void printStructReturnPointerFunctionType(formatted_raw_ostream &Out, const AttrListPtr &PAL, const PointerType *Ty); @@ -239,7 +236,7 @@ namespace { // Must be an expression, must be used exactly once. If it is dead, we // emit it inline where it would go. - if (I.getType() == Type::VoidTy || !I.hasOneUse() || + if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I)) @@ -286,11 +283,11 @@ namespace { void visitBranchInst(BranchInst &I); void visitSwitchInst(SwitchInst &I); void visitInvokeInst(InvokeInst &I) { - assert(0 && "Lowerinvoke pass didn't work!"); + llvm_unreachable("Lowerinvoke pass didn't work!"); } void visitUnwindInst(UnwindInst &I) { - assert(0 && "Lowerinvoke pass didn't work!"); + llvm_unreachable("Lowerinvoke pass didn't work!"); } void visitUnreachableInst(UnreachableInst &I); @@ -321,8 +318,10 @@ namespace { void visitExtractValueInst(ExtractValueInst &I); void visitInstruction(Instruction &I) { - cerr << "C Writer does not know about " << I; - abort(); +#ifndef NDEBUG + errs() << "C Writer does not know about " << I; +#endif + llvm_unreachable(0); } void outputLValue(Instruction *I) { @@ -430,7 +429,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) { /// printStructReturnPointerFunctionType - This is like printType for a struct /// return type, except, instead of printing the type as void (*)(Struct*, ...) /// print it as "Struct (*)(...)", for struct return functions. -void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, +void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out, const AttrListPtr &PAL, const PointerType *TheTy) { const FunctionType *FTy = cast(TheTy->getElementType()); @@ -466,7 +465,8 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, } raw_ostream & -CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, +CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty, + bool isSigned, const std::string &NameSoFar) { assert((Ty->isPrimitiveType() || Ty->isInteger() || isa(Ty)) && "Invalid type for printSimpleType"); @@ -505,8 +505,10 @@ CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned, } default: - cerr << "Unknown primitive type: " << *Ty << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Unknown primitive type: " << *Ty << "\n"; +#endif + llvm_unreachable(0); } } @@ -550,17 +552,20 @@ CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned, } default: - cerr << "Unknown primitive type: " << *Ty << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Unknown primitive type: " << *Ty << "\n"; +#endif + llvm_unreachable(0); } } // Pass the Type* and the variable name and this prints out the variable // declaration. // -raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, - bool isSigned, const std::string &NameSoFar, - bool IgnoreName, const AttrListPtr &PAL) { +raw_ostream &CWriter::printType(formatted_raw_ostream &Out, + const Type *Ty, + bool isSigned, const std::string &NameSoFar, + bool IgnoreName, const AttrListPtr &PAL) { if (Ty->isPrimitiveType() || Ty->isInteger() || isa(Ty)) { printSimpleType(Out, Ty, isSigned, NameSoFar); return Out; @@ -652,8 +657,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, return Out << TyName << ' ' << NameSoFar; } default: - assert(0 && "Unhandled case in getTypeProps!"); - abort(); + llvm_unreachable("Unhandled case in getTypeProps!"); } return Out; @@ -756,8 +760,7 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty, return Out << TyName << ' ' << NameSoFar; } default: - assert(0 && "Unhandled case in getTypeProps!"); - abort(); + llvm_unreachable("Unhandled case in getTypeProps!"); } return Out; @@ -769,7 +772,8 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { // ubytes or an array of sbytes with positive values. // const Type *ETy = CPA->getType()->getElementType(); - bool isString = (ETy == Type::Int8Ty || ETy == Type::Int8Ty); + bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) || + ETy == Type::getInt8Ty(CPA->getContext())); // Make sure the last character is a null char, as automatically added by C if (isString && (CPA->getNumOperands() == 0 || @@ -855,10 +859,11 @@ void CWriter::printConstantVector(ConstantVector *CP, bool Static) { static bool isFPCSafeToPrint(const ConstantFP *CFP) { bool ignored; // Do long doubles in hex for now. - if (CFP->getType() != Type::FloatTy && CFP->getType() != Type::DoubleTy) + if (CFP->getType() != Type::getFloatTy(CFP->getContext()) && + CFP->getType() != Type::getDoubleTy(CFP->getContext())) return false; APFloat APF = APFloat(CFP->getValueAPF()); // copy - if (CFP->getType() == Type::FloatTy) + if (CFP->getType() == Type::getFloatTy(CFP->getContext())) APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); #if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A char Buffer[100]; @@ -916,7 +921,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) { Out << ')'; break; default: - assert(0 && "Invalid cast opcode"); + llvm_unreachable("Invalid cast opcode"); } // Print the source type cast @@ -946,7 +951,7 @@ void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) { case Instruction::FPToUI: break; // These don't need a source cast. default: - assert(0 && "Invalid cast opcode"); + llvm_unreachable("Invalid cast opcode"); break; } } @@ -970,12 +975,12 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << "("; printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType()); if (CE->getOpcode() == Instruction::SExt && - CE->getOperand(0)->getType() == Type::Int1Ty) { + CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) { // Make sure we really sext from bool here by subtracting from 0 Out << "0-"; } printConstant(CE->getOperand(0), Static); - if (CE->getType() == Type::Int1Ty && + if (CE->getType() == Type::getInt1Ty(CPV->getContext()) && (CE->getOpcode() == Instruction::Trunc || CE->getOpcode() == Instruction::FPToUI || CE->getOpcode() == Instruction::FPToSI || @@ -1055,10 +1060,10 @@ void CWriter::printConstant(Constant *CPV, bool Static) { case ICmpInst::ICMP_UGT: Out << " > "; break; case ICmpInst::ICMP_SGE: case ICmpInst::ICMP_UGE: Out << " >= "; break; - default: assert(0 && "Illegal ICmp predicate"); + default: llvm_unreachable("Illegal ICmp predicate"); } break; - default: assert(0 && "Illegal opcode here!"); + default: llvm_unreachable("Illegal opcode here!"); } printConstantWithCast(CE->getOperand(1), CE->getOpcode()); if (NeedsClosingParens) @@ -1076,7 +1081,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { else { const char* op = 0; switch (CE->getPredicate()) { - default: assert(0 && "Illegal FCmp predicate"); + default: llvm_unreachable("Illegal FCmp predicate"); case FCmpInst::FCMP_ORD: op = "ord"; break; case FCmpInst::FCMP_UNO: op = "uno"; break; case FCmpInst::FCMP_UEQ: op = "ueq"; break; @@ -1104,9 +1109,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) { return; } default: - cerr << "CWriter Error: Unhandled constant expression: " +#ifndef NDEBUG + errs() << "CWriter Error: Unhandled constant expression: " << *CE << "\n"; - abort(); +#endif + llvm_unreachable(0); } } else if (isa(CPV) && CPV->getType()->isSingleValueType()) { Out << "(("; @@ -1122,9 +1129,9 @@ void CWriter::printConstant(Constant *CPV, bool Static) { if (ConstantInt *CI = dyn_cast(CPV)) { const Type* Ty = CI->getType(); - if (Ty == Type::Int1Ty) + if (Ty == Type::getInt1Ty(CPV->getContext())) Out << (CI->getZExtValue() ? '1' : '0'); - else if (Ty == Type::Int32Ty) + else if (Ty == Type::getInt32Ty(CPV->getContext())) Out << CI->getZExtValue() << 'u'; else if (Ty->getPrimitiveSizeInBits() > 32) Out << CI->getZExtValue() << "ull"; @@ -1151,15 +1158,17 @@ void CWriter::printConstant(Constant *CPV, bool Static) { if (I != FPConstantMap.end()) { // Because of FP precision problems we must load from a stack allocated // value that holds the value in hex. - Out << "(*(" << (FPC->getType() == Type::FloatTy ? "float" : - FPC->getType() == Type::DoubleTy ? "double" : + Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ? + "float" : + FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? + "double" : "long double") << "*)&FPConstant" << I->second << ')'; } else { double V; - if (FPC->getType() == Type::FloatTy) + if (FPC->getType() == Type::getFloatTy(CPV->getContext())) V = FPC->getValueAPF().convertToFloat(); - else if (FPC->getType() == Type::DoubleTy) + else if (FPC->getType() == Type::getDoubleTy(CPV->getContext())) V = FPC->getValueAPF().convertToDouble(); else { // Long double. Convert the number to double, discarding precision. @@ -1189,7 +1198,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) { std::string Num(&Buffer[0], &Buffer[6]); unsigned long Val = strtoul(Num.c_str(), 0, 16); - if (FPC->getType() == Type::FloatTy) + if (FPC->getType() == Type::getFloatTy(FPC->getContext())) Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\"" << Buffer << "\") /*nan*/ "; else @@ -1198,7 +1207,8 @@ void CWriter::printConstant(Constant *CPV, bool Static) { } else if (IsInf(V)) { // The value is Inf if (V < 0) Out << '-'; - Out << "LLVM_INF" << (FPC->getType() == Type::FloatTy ? "F" : "") + Out << "LLVM_INF" << + (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "") << " /*inf*/ "; } else { std::string Num; @@ -1312,8 +1322,10 @@ void CWriter::printConstant(Constant *CPV, bool Static) { } // FALL THROUGH default: - cerr << "Unknown constant type: " << *CPV << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Unknown constant type: " << *CPV << "\n"; +#endif + llvm_unreachable(0); } } @@ -1359,7 +1371,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { } if (NeedsExplicitCast) { Out << "(("; - if (Ty->isInteger() && Ty != Type::Int1Ty) + if (Ty->isInteger() && Ty != Type::getInt1Ty(Ty->getContext())) printSimpleType(Out, Ty, TypeIsSigned); else printType(Out, Ty); // not integer, sign doesn't matter @@ -1419,33 +1431,36 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { } std::string CWriter::GetValueName(const Value *Operand) { - std::string Name; - - if (!isa(Operand) && Operand->getName() != "") { - std::string VarName; - - Name = Operand->getName(); - VarName.reserve(Name.capacity()); - - for (std::string::iterator I = Name.begin(), E = Name.end(); - I != E; ++I) { - char ch = *I; + // Mangle globals with the standard mangler interface for LLC compatibility. + if (const GlobalValue *GV = dyn_cast(Operand)) + return Mang->getMangledName(GV); + + std::string Name = Operand->getName(); + + if (Name.empty()) { // Assign unique names to local temporaries. + unsigned &No = AnonValueNumbers[Operand]; + if (No == 0) + No = ++NextAnonValueNumber; + Name = "tmp__" + utostr(No); + } + + std::string VarName; + VarName.reserve(Name.capacity()); - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (ch >= '0' && ch <= '9') || ch == '_')) { - char buffer[5]; - sprintf(buffer, "_%x_", ch); - VarName += buffer; - } else - VarName += ch; - } + for (std::string::iterator I = Name.begin(), E = Name.end(); + I != E; ++I) { + char ch = *I; - Name = "llvm_cbe_" + VarName; - } else { - Name = Mang->getValueName(Operand); + if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || ch == '_')) { + char buffer[5]; + sprintf(buffer, "_%x_", ch); + VarName += buffer; + } else + VarName += ch; } - return Name; + return "llvm_cbe_" + VarName; } /// writeInstComputationInline - Emit the computation for the specified @@ -1454,19 +1469,22 @@ void CWriter::writeInstComputationInline(Instruction &I) { // We can't currently support integer types other than 1, 8, 16, 32, 64. // Validate this. const Type *Ty = I.getType(); - if (Ty->isInteger() && (Ty!=Type::Int1Ty && Ty!=Type::Int8Ty && - Ty!=Type::Int16Ty && Ty!=Type::Int32Ty && Ty!=Type::Int64Ty)) { - cerr << "The C backend does not currently support integer " - << "types of widths other than 1, 8, 16, 32, 64.\n"; - cerr << "This is being tracked as PR 4158.\n"; - abort(); + if (Ty->isInteger() && (Ty!=Type::getInt1Ty(I.getContext()) && + Ty!=Type::getInt8Ty(I.getContext()) && + Ty!=Type::getInt16Ty(I.getContext()) && + Ty!=Type::getInt32Ty(I.getContext()) && + Ty!=Type::getInt64Ty(I.getContext()))) { + llvm_report_error("The C backend does not currently support integer " + "types of widths other than 1, 8, 16, 32, 64.\n" + "This is being tracked as PR 4158."); } // If this is a non-trivial bool computation, make sure to truncate down to // a 1 bit value. This is important because we want "add i1 x, y" to return // "0" when x and y are true, not "2" for example. bool NeedBoolTrunc = false; - if (I.getType() == Type::Int1Ty && !isa(I) && !isa(I)) + if (I.getType() == Type::getInt1Ty(I.getContext()) && + !isa(I) && !isa(I)) NeedBoolTrunc = true; if (NeedBoolTrunc) @@ -1615,7 +1633,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { // If the operand was a pointer, convert to a large integer type. const Type* OpTy = Operand->getType(); if (isa(OpTy)) - OpTy = TD->getIntPtrType(); + OpTy = TD->getIntPtrType(Operand->getContext()); Out << "(("; printSimpleType(Out, OpTy, castIsSigned); @@ -1627,13 +1645,13 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { // generateCompilerSpecificCode - This is where we add conditional compilation // directives to cater to specific compilers as need be. // -static void generateCompilerSpecificCode(raw_ostream& Out, +static void generateCompilerSpecificCode(formatted_raw_ostream& Out, const TargetData *TD) { // Alloca is hard to get, and we don't want to include stdlib.h here. Out << "/* get a declaration for alloca */\n" << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n" << "#define alloca(x) __builtin_alloca((x))\n" - << "#define _alloca(x) __builtin_alloca((x))\n" + << "#define _alloca(x) __builtin_alloca((x))\n" << "#elif defined(__APPLE__)\n" << "extern void *__builtin_alloca(unsigned long);\n" << "#define alloca(x) __builtin_alloca(x)\n" @@ -1646,7 +1664,7 @@ static void generateCompilerSpecificCode(raw_ostream& Out, << "extern void *__builtin_alloca(unsigned int);\n" << "#endif\n" << "#define alloca(x) __builtin_alloca(x)\n" - << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)\n" + << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n" << "#define alloca(x) __builtin_alloca(x)\n" << "#elif defined(_MSC_VER)\n" << "#define inline _inline\n" @@ -1803,8 +1821,34 @@ static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) { return NotSpecial; } +// PrintEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +static void PrintEscapedString(const char *Str, unsigned Length, + raw_ostream &Out) { + for (unsigned i = 0; i != Length; ++i) { + unsigned char C = Str[i]; + if (isprint(C) && C != '\\' && C != '"') + Out << C; + else if (C == '\\') + Out << "\\\\"; + else if (C == '\"') + Out << "\\\""; + else if (C == '\t') + Out << "\\t"; + else + Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F); + } +} + +// PrintEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +static void PrintEscapedString(const std::string &Str, raw_ostream &Out) { + PrintEscapedString(Str.c_str(), Str.size(), Out); +} bool CWriter::doInitialization(Module &M) { + FunctionPass::doInitialization(M); + // Initialize TheModule = &M; @@ -1855,6 +1899,29 @@ bool CWriter::doInitialization(Module &M) { // First output all the declarations for the program, because C requires // Functions & globals to be declared before they are used. // + if (!M.getModuleInlineAsm().empty()) { + Out << "/* Module asm statements */\n" + << "asm("; + + // Split the string into lines, to make it easier to read the .ll file. + std::string Asm = M.getModuleInlineAsm(); + size_t CurPos = 0; + size_t NewLine = Asm.find_first_of('\n', CurPos); + while (NewLine != std::string::npos) { + // We found a newline, print the portion of the asm string from the + // last newline up to this newline. + Out << "\""; + PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine), + Out); + Out << "\\n\"\n"; + CurPos = NewLine+1; + NewLine = Asm.find_first_of('\n', CurPos); + } + Out << "\""; + PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out); + Out << "\");\n" + << "/* End Module asm statements */\n"; + } // Loop over the symbol table, emitting all named constants... printModuleTypes(M.getTypeSymbolTable()); @@ -1910,7 +1977,7 @@ bool CWriter::doInitialization(Module &M) { Out << " __HIDDEN__"; if (I->hasName() && I->getName()[0] == 1) - Out << " LLVM_ASM(\"" << I->getName().c_str()+1 << "\")"; + Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; Out << ";\n"; } @@ -2085,20 +2152,20 @@ void CWriter::printFloatingPointConstants(const Constant *C) { FPConstantMap[FPC] = FPCounter; // Number the FP constants - if (FPC->getType() == Type::DoubleTy) { + if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) { double Val = FPC->getValueAPF().convertToDouble(); uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); Out << "static const ConstantDoubleTy FPConstant" << FPCounter++ << " = 0x" << utohexstr(i) << "ULL; /* " << Val << " */\n"; - } else if (FPC->getType() == Type::FloatTy) { + } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) { float Val = FPC->getValueAPF().convertToFloat(); uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt(). getZExtValue(); Out << "static const ConstantFloatTy FPConstant" << FPCounter++ << " = 0x" << utohexstr(i) << "U; /* " << Val << " */\n"; - } else if (FPC->getType() == Type::X86_FP80Ty) { + } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) { // api needed to prevent premature destruction APInt api = FPC->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); @@ -2106,7 +2173,8 @@ void CWriter::printFloatingPointConstants(const Constant *C) { << " = { 0x" << utohexstr(p[0]) << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}" << "}; /* Long double constant */\n"; - } else if (FPC->getType() == Type::PPC_FP128Ty) { + } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) || + FPC->getType() == Type::getFP128Ty(FPC->getContext())) { APInt api = FPC->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Out << "static const ConstantFP128Ty FPConstant" << FPCounter++ @@ -2115,7 +2183,7 @@ void CWriter::printFloatingPointConstants(const Constant *C) { << "}; /* Long double constant */\n"; } else { - assert(0 && "Unknown float type!"); + llvm_unreachable("Unknown float type!"); } } @@ -2215,6 +2283,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { case CallingConv::X86_FastCall: Out << "__attribute__((fastcall)) "; break; + default: + break; } // Loop over the arguments, printing them... @@ -2351,7 +2421,8 @@ void CWriter::printFunction(Function &F) { printType(Out, AI->getAllocatedType(), false, GetValueName(AI)); Out << "; /* Address-exposed local */\n"; PrintedVar = true; - } else if (I->getType() != Type::VoidTy && !isInlinableInst(*I)) { + } else if (I->getType() != Type::getVoidTy(F.getContext()) && + !isInlinableInst(*I)) { Out << " "; printType(Out, I->getType(), false, GetValueName(&*I)); Out << ";\n"; @@ -2428,7 +2499,8 @@ void CWriter::printBasicBlock(BasicBlock *BB) { for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E; ++II) { if (!isInlinableInst(*II) && !isDirectAlloca(II)) { - if (II->getType() != Type::VoidTy && !isInlineAsm(*II)) + if (II->getType() != Type::getVoidTy(BB->getContext()) && + !isInlineAsm(*II)) outputLValue(II); else Out << " "; @@ -2603,8 +2675,9 @@ void CWriter::visitBinaryOperator(Instruction &I) { // We must cast the results of binary operations which might be promoted. bool needsCast = false; - if ((I.getType() == Type::Int8Ty) || (I.getType() == Type::Int16Ty) - || (I.getType() == Type::FloatTy)) { + if ((I.getType() == Type::getInt8Ty(I.getContext())) || + (I.getType() == Type::getInt16Ty(I.getContext())) + || (I.getType() == Type::getFloatTy(I.getContext()))) { needsCast = true; Out << "(("; printType(Out, I.getType(), false); @@ -2623,9 +2696,9 @@ void CWriter::visitBinaryOperator(Instruction &I) { Out << ")"; } else if (I.getOpcode() == Instruction::FRem) { // Output a call to fmod/fmodf instead of emitting a%b - if (I.getType() == Type::FloatTy) + if (I.getType() == Type::getFloatTy(I.getContext())) Out << "fmodf("; - else if (I.getType() == Type::DoubleTy) + else if (I.getType() == Type::getDoubleTy(I.getContext())) Out << "fmod("; else // all 3 flavors of long double Out << "fmodl("; @@ -2663,7 +2736,11 @@ void CWriter::visitBinaryOperator(Instruction &I) { case Instruction::Shl : Out << " << "; break; case Instruction::LShr: case Instruction::AShr: Out << " >> "; break; - default: cerr << "Invalid operator type!" << I; abort(); + default: +#ifndef NDEBUG + errs() << "Invalid operator type!" << I; +#endif + llvm_unreachable(0); } writeOperandWithCast(I.getOperand(1), I.getOpcode()); @@ -2700,7 +2777,11 @@ void CWriter::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_SLT: Out << " < "; break; case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: Out << " > "; break; - default: cerr << "Invalid icmp predicate!" << I; abort(); + default: +#ifndef NDEBUG + errs() << "Invalid icmp predicate!" << I; +#endif + llvm_unreachable(0); } writeOperandWithCast(I.getOperand(1), I); @@ -2724,7 +2805,7 @@ void CWriter::visitFCmpInst(FCmpInst &I) { const char* op = 0; switch (I.getPredicate()) { - default: assert(0 && "Illegal FCmp predicate"); + default: llvm_unreachable("Illegal FCmp predicate"); case FCmpInst::FCMP_ORD: op = "ord"; break; case FCmpInst::FCMP_UNO: op = "uno"; break; case FCmpInst::FCMP_UEQ: op = "ueq"; break; @@ -2752,7 +2833,7 @@ void CWriter::visitFCmpInst(FCmpInst &I) { static const char * getFloatBitCastField(const Type *Ty) { switch (Ty->getTypeID()) { - default: assert(0 && "Invalid Type"); + default: llvm_unreachable("Invalid Type"); case Type::FloatTyID: return "Float"; case Type::DoubleTyID: return "Double"; case Type::IntegerTyID: { @@ -2784,12 +2865,13 @@ void CWriter::visitCastInst(CastInst &I) { printCast(I.getOpcode(), SrcTy, DstTy); // Make a sext from i1 work by subtracting the i1 from 0 (an int). - if (SrcTy == Type::Int1Ty && I.getOpcode() == Instruction::SExt) + if (SrcTy == Type::getInt1Ty(I.getContext()) && + I.getOpcode() == Instruction::SExt) Out << "0-"; writeOperand(I.getOperand(0)); - if (DstTy == Type::Int1Ty && + if (DstTy == Type::getInt1Ty(I.getContext()) && (I.getOpcode() == Instruction::Trunc || I.getOpcode() == Instruction::FPToUI || I.getOpcode() == Instruction::FPToSI || @@ -3020,10 +3102,12 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << ", "; // Output the last argument to the enclosing function. if (I.getParent()->getParent()->arg_empty()) { - cerr << "The C backend does not currently support zero " + std::string msg; + raw_string_ostream Msg(msg); + Msg << "The C backend does not currently support zero " << "argument varargs functions, such as '" - << I.getParent()->getParent()->getName() << "'!\n"; - abort(); + << I.getParent()->getParent()->getName() << "'!"; + llvm_report_error(Msg.str()); } writeOperand(--I.getParent()->getParent()->arg_end()); Out << ')'; @@ -3092,16 +3176,15 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, case Intrinsic::dbg_stoppoint: { // If we use writeOperand directly we get a "u" suffix which is rejected // by gcc. - std::stringstream SPIStr; DbgStopPointInst &SPI = cast(I); - SPI.getDirectory()->print(SPIStr); + std::string dir; + GetConstantStringInfo(SPI.getDirectory(), dir); + std::string file; + GetConstantStringInfo(SPI.getFileName(), file); Out << "\n#line " << SPI.getLine() - << " \""; - Out << SPIStr.str(); - SPIStr.clear(); - SPI.getFileName()->print(SPIStr); - Out << SPIStr.str() << "\"\n"; + << " \"" + << dir << '/' << file << "\"\n"; return true; } case Intrinsic::x86_sse_cmp_ss: @@ -3113,7 +3196,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << ')'; // Multiple GCC builtins multiplex onto this intrinsic. switch (cast(I.getOperand(3))->getZExtValue()) { - default: assert(0 && "Invalid llvm.x86.sse.cmp!"); + default: llvm_unreachable("Invalid llvm.x86.sse.cmp!"); case 0: Out << "__builtin_ia32_cmpeq"; break; case 1: Out << "__builtin_ia32_cmplt"; break; case 2: Out << "__builtin_ia32_cmple"; break; @@ -3159,27 +3242,25 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { const char *const *table = 0; - //Grab the translation table from TargetAsmInfo if it exists + // Grab the translation table from MCAsmInfo if it exists. if (!TAsm) { + std::string Triple = TheModule->getTargetTriple(); + if (Triple.empty()) + Triple = llvm::sys::getHostTriple(); + std::string E; - const TargetMachineRegistry::entry* Match = - TargetMachineRegistry::getClosestStaticTargetForModule(*TheModule, E); - if (Match) { - //Per platform Target Machines don't exist, so create it - // this must be done only once - const TargetMachine* TM = Match->CtorFn(*TheModule, ""); - TAsm = TM->getTargetAsmInfo(); - } + if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) + TAsm = Match->createAsmInfo(Triple); } if (TAsm) table = TAsm->getAsmCBE(); - //Search the translation table if it exists + // Search the translation table if it exists. for (int i = 0; table && table[i]; i += 2) if (c.Codes[0] == table[i]) return table[i+1]; - //default is identity + // Default is identity. return c.Codes[0]; } @@ -3215,7 +3296,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { std::vector Constraints = as->ParseConstraints(); std::vector > ResultVals; - if (CI.getType() == Type::VoidTy) + if (CI.getType() == Type::getVoidTy(CI.getContext())) ; else if (const StructType *ST = dyn_cast(CI.getType())) { for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) @@ -3325,7 +3406,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { } void CWriter::visitMallocInst(MallocInst &I) { - assert(0 && "lowerallocations pass didn't work!"); + llvm_unreachable("lowerallocations pass didn't work!"); } void CWriter::visitAllocaInst(AllocaInst &I) { @@ -3342,7 +3423,7 @@ void CWriter::visitAllocaInst(AllocaInst &I) { } void CWriter::visitFreeInst(FreeInst &I) { - assert(0 && "lowerallocations pass didn't work!"); + llvm_unreachable("lowerallocations pass didn't work!"); } void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, @@ -3603,7 +3684,7 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { //===----------------------------------------------------------------------===// bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, - raw_ostream &o, + formatted_raw_ostream &o, CodeGenFileType FileType, CodeGenOpt::Level OptLevel) { if (FileType != TargetMachine::AssemblyFile) return true; diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index 8b262455ad34a..715bbdaf0c87c 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -20,23 +20,20 @@ namespace llvm { struct CTargetMachine : public TargetMachine { - const TargetData DataLayout; // Calculates type size & alignment - - CTargetMachine(const Module &M, const std::string &FS) - : DataLayout(&M) {} + CTargetMachine(const Target &T, const std::string &TT, const std::string &FS) + : TargetMachine(T) {} virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out, + virtual bool addPassesToEmitWholeFile(PassManager &PM, + formatted_raw_ostream &Out, CodeGenFileType FileType, CodeGenOpt::Level OptLevel); - - // This class always works, but must be requested explicitly on - // llc command line. - static unsigned getModuleMatchQuality(const Module &M) { return 0; } - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const TargetData *getTargetData() const { return 0; } }; +extern Target TheCBackendTarget; + } // End llvm namespace diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile index 336de0c6f440b..3b5ef0f34692c 100644 --- a/lib/Target/CBackend/Makefile +++ b/lib/Target/CBackend/Makefile @@ -9,6 +9,9 @@ LEVEL = ../../.. LIBRARYNAME = LLVMCBackend + +DIRS = TargetInfo + include $(LEVEL)/Makefile.common CompileCommonOpts += -Wno-format diff --git a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp new file mode 100644 index 0000000000000..f7e8ff254848e --- /dev/null +++ b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "CTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheCBackendTarget; + +extern "C" void LLVMInitializeCBackendTargetInfo() { + RegisterTarget<> X(TheCBackendTarget, "c", "C backend"); +} diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..5b35fa7c065b3 --- /dev/null +++ b/lib/Target/CBackend/TargetInfo/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMCBackendInfo + CBackendTargetInfo.cpp + ) + diff --git a/lib/Target/CBackend/TargetInfo/Makefile b/lib/Target/CBackend/TargetInfo/Makefile new file mode 100644 index 0000000000000..d4d5e15b40bb1 --- /dev/null +++ b/lib/Target/CBackend/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/CBackend/TargetInfo/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMCBackendInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 7cffd0e53c178..8769ee297b653 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -1,18 +1,14 @@ add_llvm_library(LLVMTarget - DarwinTargetAsmInfo.cpp - ELFTargetAsmInfo.cpp SubtargetFeature.cpp Target.cpp - TargetAsmInfo.cpp TargetData.cpp TargetELFWriterInfo.cpp TargetFrameInfo.cpp TargetInstrInfo.cpp + TargetIntrinsicInfo.cpp + TargetLoweringObjectFile.cpp TargetMachOWriterInfo.cpp TargetMachine.cpp - TargetMachineRegistry.cpp TargetRegisterInfo.cpp TargetSubtarget.cpp ) - -# TODO: Support other targets besides X86. See Makefile. diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt index 9684e63a60de4..1e508fe18908c 100644 --- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt +++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt @@ -4,6 +4,6 @@ include_directories( ) add_llvm_library(LLVMCellSPUAsmPrinter - SPUAsmPrinter.cpp + SPUAsmPrinter.cpp ) add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen) \ No newline at end of file diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile index dd56df71a5dee..69639efca748d 100644 --- a/lib/Target/CellSPU/AsmPrinter/Makefile +++ b/lib/Target/CellSPU/AsmPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===## +##===- lib/Target/CellSPU/AsmPrinter/Makefile --------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 2847d0b8393b1..0f8d5393ab845 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -19,25 +19,29 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/MDNode.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Support/Mangler.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" #include using namespace llvm; @@ -49,8 +53,8 @@ namespace { class VISIBILITY_HIDDEN SPUAsmPrinter : public AsmPrinter { std::set FnStubs, GVStubs; public: - explicit SPUAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, bool V) : + explicit SPUAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) : AsmPrinter(O, TM, T, V) {} virtual const char *getPassName() const { @@ -62,10 +66,10 @@ namespace { } /// printInstruction - This method is automatically generated by tablegen - /// from the instruction set description. This method returns true if the - /// machine instruction was sufficiently described to print it, otherwise it - /// returns false. - bool printInstruction(const MachineInstr *MI); + /// from the instruction set description. + void printInstruction(const MachineInstr *MI); + static const char *getRegisterName(unsigned RegNo); + void printMachineInstruction(const MachineInstr *MI); void printOp(const MachineOperand &MO); @@ -76,14 +80,13 @@ namespace { unsigned RegNo = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); - O << TM.getRegisterInfo()->get(RegNo).AsmName; + O << getRegisterName(RegNo); } void printOperand(const MachineInstr *MI, unsigned OpNo) { const MachineOperand &MO = MI->getOperand(OpNo); if (MO.isReg()) { - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg())&&"Not physreg??"); - O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; + O << getRegisterName(MO.getReg()); } else if (MO.isImm()) { O << MO.getImm(); } else { @@ -150,8 +153,7 @@ namespace { // the value contained in the register. For this reason, the darwin // assembler requires that we print r0 as 0 (no r) when used as the base. const MachineOperand &MO = MI->getOperand(OpNo); - O << TM.getRegisterInfo()->get(MO.getReg()).AsmName; - O << ", "; + O << getRegisterName(MO.getReg()) << ", "; printOperand(MI, OpNo+1); } @@ -264,7 +266,7 @@ namespace { && "Invalid negated immediate rotate 7-bit argument"); O << -value; } else { - assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm"); } } @@ -275,31 +277,25 @@ namespace { && "Invalid negated immediate rotate 7-bit argument"); O << -value; } else { - assert(0 &&"Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm"); } } virtual bool runOnMachineFunction(MachineFunction &F) = 0; - //! Assembly printer cleanup after function has been emitted - virtual bool doFinalization(Module &M) = 0; }; /// LinuxAsmPrinter - SPU assembly printer, customized for Linux class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter { - DwarfWriter *DW; public: - explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM, - const TargetAsmInfo *T, bool V) - : SPUAsmPrinter(O, TM, T, V), DW(0) {} + explicit LinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) + : SPUAsmPrinter(O, TM, T, V) {} virtual const char *getPassName() const { return "STI CBEA SPU Assembly Printer"; } bool runOnMachineFunction(MachineFunction &F); - bool doInitialization(Module &M); - //! Dump globals, perform cleanup after function emission - bool doFinalization(Module &M); void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -309,7 +305,7 @@ namespace { } //! Emit a global variable according to its section and type - void printModuleLevelGV(const GlobalVariable* GVar); + void PrintGlobalVariable(const GlobalVariable* GVar); }; } // end of anonymous namespace @@ -319,35 +315,34 @@ namespace { void SPUAsmPrinter::printOp(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Immediate: - cerr << "printOp() does not handle immediate values\n"; - abort(); + llvm_report_error("printOp() does not handle immediate values"); return; case MachineOperand::MO_MachineBasicBlock: - printBasicBlockLabel(MO.getMBB()); + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); return; case MachineOperand::MO_JumpTableIndex: - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); return; case MachineOperand::MO_ConstantPoolIndex: - O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); return; case MachineOperand::MO_ExternalSymbol: // Computing the address of an external symbol, not calling it. if (TM.getRelocationModel() != Reloc::Static) { - std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); + std::string Name(MAI->getGlobalPrefix()); Name += MO.getSymbolName(); GVStubs.insert(Name); O << "L" << Name << "$non_lazy_ptr"; return; } - O << TAI->getGlobalPrefix() << MO.getSymbolName(); + O << MAI->getGlobalPrefix() << MO.getSymbolName(); return; case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. GlobalValue *GV = MO.getGlobal(); - std::string Name = Mang->getValueName(GV); + std::string Name = Mang->getMangledName(GV); // External or weakly linked global variables need non-lazily-resolved // stubs @@ -410,15 +405,18 @@ bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, /// void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; + processDebugLoc(MI, true); printInstruction(MI); + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + processDebugLoc(MI, false); + O << '\n'; } /// runOnMachineFunction - This uses the printMachineInstruction() /// method to print assembly for each instruction. /// -bool -LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) -{ +bool LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; SetupMachineFunction(MF); @@ -430,12 +428,13 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) // Print out labels for the function. const Function *F = MF.getFunction(); - SwitchToSection(TAI->SectionForGlobal(F)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); EmitAlignment(MF.getAlignment(), F); switch (F->getLinkage()) { - default: assert(0 && "Unknown linkage type!"); + default: llvm_unreachable("Unknown linkage type!"); case Function::PrivateLinkage: + case Function::LinkerPrivateLinkage: case Function::InternalLinkage: // Symbols default to internal. break; case Function::ExternalLinkage: @@ -460,8 +459,7 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) I != E; ++I) { // Print a label for the basic block. if (I != MF.begin()) { - printBasicBlockLabel(I, true, true); - O << '\n'; + EmitBasicBlockStart(I); } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { @@ -483,29 +481,13 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) } -bool LinuxAsmPrinter::doInitialization(Module &M) { - bool Result = AsmPrinter::doInitialization(M); - DW = getAnalysisIfAvailable(); - SwitchToTextSection("\t.text"); - return Result; -} - -/// PrintUnmangledNameSafely - Print out the printable characters in the name. -/// Don't print things like \\n or \\0. -static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { - for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); - Name != E; ++Name) - if (isprint(*Name)) - OS << *Name; -} - /*! Emit a global variable according to its section, alignment, etc. \note This code was shamelessly copied from the PowerPC's assembly printer, which sort of screams for some kind of refactorization of common code. */ -void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { +void LinuxAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) { const TargetData *TD = TM.getTargetData(); if (!GVar->hasInitializer()) @@ -515,18 +497,17 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { if (EmitSpecialLLVMGlobal(GVar)) return; - std::string name = Mang->getValueName(GVar); + std::string name = Mang->getMangledName(GVar); printVisibility(name, GVar->getVisibility()); Constant *C = GVar->getInitializer(); - if (isa(C) || isa(C)) - return; const Type *Type = C->getType(); unsigned Size = TD->getTypeAllocSize(Type); unsigned Align = TD->getPreferredAlignmentLog(GVar); - SwitchToSection(TAI->SectionForGlobal(GVar)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang, + TM)); if (C->isNullValue() && /* FIXME: Verify correct */ !GVar->hasSection() && @@ -540,12 +521,12 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { O << name << ":\n"; O << "\t.zero " << Size << '\n'; } else if (GVar->hasLocalLinkage()) { - O << TAI->getLCOMMDirective() << name << ',' << Size; + O << MAI->getLCOMMDirective() << name << ',' << Size; } else { O << ".comm " << name << ',' << Size; } - O << "\t\t" << TAI->getCommentString() << " '"; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t" << MAI->getCommentString() << " '"; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); O << "'\n"; return; } @@ -570,48 +551,23 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { << "\t.type " << name << ", @object\n"; // FALL THROUGH case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: case GlobalValue::InternalLinkage: break; default: - cerr << "Unknown linkage type!"; - abort(); + llvm_report_error("Unknown linkage type!"); } EmitAlignment(Align, GVar); - O << name << ":\t\t\t\t" << TAI->getCommentString() << " '"; - PrintUnmangledNameSafely(GVar, O); + O << name << ":\t\t\t\t" << MAI->getCommentString() << " '"; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); O << "'\n"; EmitGlobalConstant(C); O << '\n'; } -bool LinuxAsmPrinter::doFinalization(Module &M) { - // Print out module-level global variables here. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I); - - return AsmPrinter::doFinalization(M); -} - -/// createSPUCodePrinterPass - Returns a pass that prints the Cell SPU -/// assembly code for a MachineFunction to the given output stream, in a format -/// that the Linux SPU assembler can deal with. -/// -FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o, - SPUTargetMachine &tm, - bool verbose) { - return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); -} - // Force static initialization. -extern "C" void LLVMInitializeCellSPUAsmPrinter() { } - -namespace { - static struct Register { - Register() { - SPUTargetMachine::registerAsmPrinter(createSPUAsmPrinterPass); - } - } Registrator; +extern "C" void LLVMInitializeCellSPUAsmPrinter() { + RegisterAsmPrinter X(TheCellSPUTarget); } diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index 8a558459802e2..0cb6676d7df71 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -17,9 +17,9 @@ add_llvm_target(CellSPUCodeGen SPUInstrInfo.cpp SPUISelDAGToDAG.cpp SPUISelLowering.cpp + SPUMCAsmInfo.cpp SPURegisterInfo.cpp SPUSubtarget.cpp - SPUTargetAsmInfo.cpp SPUTargetMachine.cpp ) diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile index a460db3cfeda8..8415168aea200 100644 --- a/lib/Target/CellSPU/Makefile +++ b/lib/Target/CellSPU/Makefile @@ -17,6 +17,6 @@ BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \ SPUGenInstrInfo.inc SPUGenDAGISel.inc \ SPUGenSubtarget.inc SPUGenCallingConv.inc -DIRS = AsmPrinter +DIRS = AsmPrinter TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h index 10d1110f1ad16..02713b5402daf 100644 --- a/lib/Target/CellSPU/SPU.h +++ b/lib/Target/CellSPU/SPU.h @@ -21,12 +21,9 @@ namespace llvm { class SPUTargetMachine; class FunctionPass; - class raw_ostream; + class formatted_raw_ostream; FunctionPass *createSPUISelDag(SPUTargetMachine &TM); - FunctionPass *createSPUAsmPrinterPass(raw_ostream &o, - SPUTargetMachine &tm, - bool verbose); /*--== Utility functions/predicates/etc used all over the place: --==*/ //! Predicate test for a signed 10-bit value @@ -92,6 +89,9 @@ namespace llvm { inline bool isU10Constant(uint64_t Value) { return (Value == (Value & 0x3ff)); } + + extern Target TheCellSPUTarget; + } // Defines symbolic names for the SPU instructions. diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp index caaa71a422fb8..9dbab1da99021 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp +++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/Debug.h" - +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -115,7 +115,8 @@ SPUHazardRecognizer::getHazardType(SUnit *SU) if (mustBeOdd && !EvenOdd) retval = Hazard; - DOUT << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " << retval << "\n"; + DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " + << retval << "\n"); EvenOdd ^= 1; return retval; #else @@ -129,7 +130,7 @@ void SPUHazardRecognizer::EmitInstruction(SUnit *SU) void SPUHazardRecognizer::AdvanceCycle() { - DOUT << "SPUHazardRecognizer::AdvanceCycle\n"; + DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n"); } void SPUHazardRecognizer::EmitNoop() diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 779d75d0218ac..1f9e5fcc4a7fa 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -30,9 +30,12 @@ #include "llvm/Constants.h" #include "llvm/GlobalValue.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -100,7 +103,7 @@ namespace { bool isIntS16Immediate(ConstantSDNode *CN, short &Imm) { - MVT vt = CN->getValueType(0); + EVT vt = CN->getValueType(0); Imm = (short) CN->getZExtValue(); if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) { return true; @@ -129,7 +132,7 @@ namespace { static bool isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) { - MVT vt = FPN->getValueType(0); + EVT vt = FPN->getValueType(0); if (vt == MVT::f32) { int val = FloatToBits(FPN->getValueAPF().convertToFloat()); int sval = (int) ((val << 16) >> 16); @@ -151,10 +154,10 @@ namespace { } //===------------------------------------------------------------------===// - //! MVT to "useful stuff" mapping structure: + //! EVT to "useful stuff" mapping structure: struct valtype_map_s { - MVT VT; + EVT VT; unsigned ldresult_ins; /// LDRESULT instruction (0 = undefined) bool ldresult_imm; /// LDRESULT instruction requires immediate? unsigned lrinst; /// LR instruction @@ -178,7 +181,7 @@ namespace { const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); - const valtype_map_s *getValueTypeMapEntry(MVT VT) + const valtype_map_s *getValueTypeMapEntry(EVT VT) { const valtype_map_s *retval = 0; for (size_t i = 0; i < n_valtype_map; ++i) { @@ -191,10 +194,11 @@ namespace { #ifndef NDEBUG if (retval == 0) { - cerr << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for " - << VT.getMVTString() - << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for " + << VT.getEVTString(); + llvm_report_error(Msg.str()); } #endif @@ -249,10 +253,10 @@ namespace { SPUtli(*tm.getTargetLowering()) { } - virtual bool runOnFunction(Function &Fn) { + virtual bool runOnMachineFunction(MachineFunction &MF) { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; - SelectionDAGISel::runOnFunction(Fn); + SelectionDAGISel::runOnMachineFunction(MF); return true; } @@ -274,8 +278,8 @@ namespace { } SDNode *emitBuildVector(SDValue build_vec) { - MVT vecVT = build_vec.getValueType(); - MVT eltVT = vecVT.getVectorElementType(); + EVT vecVT = build_vec.getValueType(); + EVT eltVT = vecVT.getVectorElementType(); SDNode *bvNode = build_vec.getNode(); DebugLoc dl = bvNode->getDebugLoc(); @@ -319,19 +323,19 @@ namespace { SDNode *Select(SDValue Op); //! Emit the instruction sequence for i64 shl - SDNode *SelectSHLi64(SDValue &Op, MVT OpVT); + SDNode *SelectSHLi64(SDValue &Op, EVT OpVT); //! Emit the instruction sequence for i64 srl - SDNode *SelectSRLi64(SDValue &Op, MVT OpVT); + SDNode *SelectSRLi64(SDValue &Op, EVT OpVT); //! Emit the instruction sequence for i64 sra - SDNode *SelectSRAi64(SDValue &Op, MVT OpVT); + SDNode *SelectSRAi64(SDValue &Op, EVT OpVT); //! Emit the necessary sequence for loading i64 constants: - SDNode *SelectI64Constant(SDValue &Op, MVT OpVT, DebugLoc dl); + SDNode *SelectI64Constant(SDValue &Op, EVT OpVT, DebugLoc dl); //! Alternate instruction emit sequence for loading i64 constants - SDNode *SelectI64Constant(uint64_t i64const, MVT OpVT, DebugLoc dl); + SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl); //! Returns true if the address N is an A-form (local store) address bool SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, @@ -375,7 +379,7 @@ namespace { break; case 'v': // not offsetable #if 1 - assert(0 && "InlineAsmMemoryOperand 'v' constraint not handled."); + llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled."); #else SelectAddrIdxOnly(Op, Op, Op0, Op1); #endif @@ -430,23 +434,21 @@ bool SPUDAGToDAGISel::SelectAFormAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Index) { // These match the addr256k operand type: - MVT OffsVT = MVT::i16; + EVT OffsVT = MVT::i16; SDValue Zero = CurDAG->getTargetConstant(0, OffsVT); switch (N.getOpcode()) { case ISD::Constant: case ISD::ConstantPool: case ISD::GlobalAddress: - cerr << "SPU SelectAFormAddr: Constant/Pool/Global not lowered.\n"; - abort(); + llvm_report_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered."); /*NOTREACHED*/ case ISD::TargetConstant: case ISD::TargetGlobalAddress: case ISD::TargetJumpTable: - cerr << "SPUSelectAFormAddr: Target Constant/Pool/Global not wrapped as " - << "A-form address.\n"; - abort(); + llvm_report_error("SPUSelectAFormAddr: Target Constant/Pool/Global " + "not wrapped as A-form address."); /*NOTREACHED*/ case SPUISD::AFormAddr: @@ -512,13 +514,13 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, SDValue &Index, int minOffset, int maxOffset) { unsigned Opc = N.getOpcode(); - MVT PtrTy = SPUtli.getPointerTy(); + EVT PtrTy = SPUtli.getPointerTy(); if (Opc == ISD::FrameIndex) { // Stack frame index must be less than 512 (divided by 16): FrameIndexSDNode *FIN = dyn_cast(N); int FI = int(FIN->getIndex()); - DEBUG(cerr << "SelectDFormAddr: ISD::FrameIndex = " + DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = " << FI << "\n"); if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { Base = CurDAG->getTargetConstant(0, PtrTy); @@ -543,7 +545,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, if (Op0.getOpcode() == ISD::FrameIndex) { FrameIndexSDNode *FIN = dyn_cast(Op0); int FI = int(FIN->getIndex()); - DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { @@ -564,7 +566,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDValue Op, SDValue N, SDValue &Base, if (Op1.getOpcode() == ISD::FrameIndex) { FrameIndexSDNode *FIN = dyn_cast(Op1); int FI = int(FIN->getIndex()); - DEBUG(cerr << "SelectDFormAddr: ISD::ADD offset = " << offset + DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); if (SPUFrameInfo::FItoStackOffset(FI) < maxOffset) { @@ -690,7 +692,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { unsigned Opc = N->getOpcode(); int n_ops = -1; unsigned NewOpc; - MVT OpVT = Op.getValueType(); + EVT OpVT = Op.getValueType(); SDValue Ops[8]; DebugLoc dl = N->getDebugLoc(); @@ -711,8 +713,9 @@ SPUDAGToDAGISel::Select(SDValue Op) { } else { NewOpc = SPU::Ar32; Ops[0] = CurDAG->getRegister(SPU::R1, Op.getValueType()); - Ops[1] = SDValue(CurDAG->getTargetNode(SPU::ILAr32, dl, Op.getValueType(), - TFI, Imm0), 0); + Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, + Op.getValueType(), TFI, Imm0), + 0); n_ops = 2; } } else if (Opc == ISD::Constant && OpVT == MVT::i64) { @@ -723,17 +726,17 @@ SPUDAGToDAGISel::Select(SDValue Op) { } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) && OpVT == MVT::i64) { SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType(); - MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); - MVT OpVecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + EVT Op0VT = Op0.getValueType(); + EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), + Op0VT, (128 / Op0VT.getSizeInBits())); + EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), + OpVT, (128 / OpVT.getSizeInBits())); SDValue shufMask; - switch (Op0VT.getSimpleVT()) { + switch (Op0VT.getSimpleVT().SimpleTy) { default: - cerr << "CellSPU Select: Unhandled zero/any extend MVT\n"; - abort(); + llvm_report_error("CellSPU Select: Unhandled zero/any extend EVT"); /*NOTREACHED*/ - break; case MVT::i32: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, CurDAG->getConstant(0x80808080, MVT::i32), @@ -811,8 +814,8 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (shift_amt >= 32) { SDNode *hi32 = - CurDAG->getTargetNode(SPU::ORr32_r64, dl, OpVT, - Op0.getOperand(0)); + CurDAG->getMachineNode(SPU::ORr32_r64, dl, OpVT, + Op0.getOperand(0)); shift_amt -= 32; if (shift_amt > 0) { @@ -823,8 +826,8 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (Op0.getOpcode() == ISD::SRL) Opc = SPU::ROTMr32; - hi32 = CurDAG->getTargetNode(Opc, dl, OpVT, SDValue(hi32, 0), - shift); + hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0), + shift); } return hi32; @@ -856,10 +859,10 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (OpVT == MVT::v2f64) Opc = SPU::DFNMSv2f64; - return CurDAG->getTargetNode(Opc, dl, OpVT, - Op00.getOperand(0), - Op00.getOperand(1), - Op0.getOperand(1)); + return CurDAG->getMachineNode(Opc, dl, OpVT, + Op00.getOperand(0), + Op00.getOperand(1), + Op0.getOperand(1)); } } @@ -876,43 +879,44 @@ SPUDAGToDAGISel::Select(SDValue Op) { negConst, negConst)); } - return CurDAG->getTargetNode(Opc, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + return CurDAG->getMachineNode(Opc, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); } else if (Opc == ISD::FABS) { if (OpVT == MVT::f64) { SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); - return CurDAG->getTargetNode(SPU::ANDfabs64, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); } else if (OpVT == MVT::v2f64) { SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, absConst, absConst); SDNode *signMask = emitBuildVector(absVec); - return CurDAG->getTargetNode(SPU::ANDfabsvec, dl, OpVT, - Op.getOperand(0), SDValue(signMask, 0)); + return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT, + Op.getOperand(0), SDValue(signMask, 0)); } } else if (Opc == SPUISD::LDRESULT) { // Custom select instructions for LDRESULT - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); SDValue Arg = N->getOperand(0); SDValue Chain = N->getOperand(1); SDNode *Result; const valtype_map_s *vtm = getValueTypeMapEntry(VT); if (vtm->ldresult_ins == 0) { - cerr << "LDRESULT for unsupported type: " - << VT.getMVTString() - << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "LDRESULT for unsupported type: " + << VT.getEVTString(); + llvm_report_error(Msg.str()); } Opc = vtm->ldresult_ins; if (vtm->ldresult_imm) { SDValue Zero = CurDAG->getTargetConstant(0, VT); - Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain); + Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Zero, Chain); } else { - Result = CurDAG->getTargetNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain); + Result = CurDAG->getMachineNode(Opc, dl, VT, MVT::Other, Arg, Arg, Chain); } return Result; @@ -923,7 +927,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { // SPUInstrInfo catches the following patterns: // (SPUindirect (SPUhi ...), (SPUlo ...)) // (SPUindirect $sp, imm) - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); RegisterSDNode *RN; @@ -948,7 +952,7 @@ SPUDAGToDAGISel::Select(SDValue Op) { if (N->hasOneUse()) return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); else - return CurDAG->getTargetNode(NewOpc, dl, OpVT, Ops, n_ops); + return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops); } else return SelectCode(Op); } @@ -966,24 +970,25 @@ SPUDAGToDAGISel::Select(SDValue Op) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) { +SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, EVT OpVT) { SDValue Op0 = Op.getOperand(0); - MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), + OpVT, (128 / OpVT.getSizeInBits())); SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftAmtVT = ShiftAmt.getValueType(); + EVT ShiftAmtVT = ShiftAmt.getValueType(); SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; SDValue SelMaskVal; DebugLoc dl = Op.getDebugLoc(); - VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0); + VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); - SelMask = CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal); - ZeroFill = CurDAG->getTargetNode(SPU::ILv2i64, dl, VecVT, - CurDAG->getTargetConstant(0, OpVT)); - VecOp0 = CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT, - SDValue(ZeroFill, 0), - SDValue(VecOp0, 0), - SDValue(SelMask, 0)); + SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal); + ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT, + CurDAG->getTargetConstant(0, OpVT)); + VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT, + SDValue(ZeroFill, 0), + SDValue(VecOp0, 0), + SDValue(SelMask, 0)); if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { unsigned bytes = unsigned(CN->getZExtValue()) >> 3; @@ -991,35 +996,35 @@ SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) { if (bytes > 0) { Shift = - CurDAG->getTargetNode(SPU::SHLQBYIv2i64, dl, VecVT, - SDValue(VecOp0, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT, + SDValue(VecOp0, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); } if (bits > 0) { Shift = - CurDAG->getTargetNode(SPU::SHLQBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : VecOp0), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : VecOp0), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); } } else { SDNode *Bytes = - CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(3, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(3, ShiftAmtVT)); SDNode *Bits = - CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(7, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(7, ShiftAmtVT)); Shift = - CurDAG->getTargetNode(SPU::SHLQBYv2i64, dl, VecVT, - SDValue(VecOp0, 0), SDValue(Bytes, 0)); + CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT, + SDValue(VecOp0, 0), SDValue(Bytes, 0)); Shift = - CurDAG->getTargetNode(SPU::SHLQBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(Bits, 0)); + CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(Bits, 0)); } - return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); } /*! @@ -1031,15 +1036,16 @@ SPUDAGToDAGISel::SelectSHLi64(SDValue &Op, MVT OpVT) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) { +SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, EVT OpVT) { SDValue Op0 = Op.getOperand(0); - MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), + OpVT, (128 / OpVT.getSizeInBits())); SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftAmtVT = ShiftAmt.getValueType(); + EVT ShiftAmtVT = ShiftAmt.getValueType(); SDNode *VecOp0, *Shift = 0; DebugLoc dl = Op.getDebugLoc(); - VecOp0 = CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op0); + VecOp0 = CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op0); if (ConstantSDNode *CN = dyn_cast(ShiftAmt)) { unsigned bytes = unsigned(CN->getZExtValue()) >> 3; @@ -1047,45 +1053,45 @@ SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) { if (bytes > 0) { Shift = - CurDAG->getTargetNode(SPU::ROTQMBYIv2i64, dl, VecVT, - SDValue(VecOp0, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT, + SDValue(VecOp0, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); } if (bits > 0) { Shift = - CurDAG->getTargetNode(SPU::ROTQMBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : VecOp0), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : VecOp0), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); } } else { SDNode *Bytes = - CurDAG->getTargetNode(SPU::ROTMIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(3, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(3, ShiftAmtVT)); SDNode *Bits = - CurDAG->getTargetNode(SPU::ANDIr32, dl, ShiftAmtVT, - ShiftAmt, - CurDAG->getTargetConstant(7, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(7, ShiftAmtVT)); // Ensure that the shift amounts are negated! - Bytes = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT, - SDValue(Bytes, 0), - CurDAG->getTargetConstant(0, ShiftAmtVT)); + Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, + SDValue(Bytes, 0), + CurDAG->getTargetConstant(0, ShiftAmtVT)); - Bits = CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT, - SDValue(Bits, 0), - CurDAG->getTargetConstant(0, ShiftAmtVT)); + Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, + SDValue(Bits, 0), + CurDAG->getTargetConstant(0, ShiftAmtVT)); Shift = - CurDAG->getTargetNode(SPU::ROTQMBYv2i64, dl, VecVT, - SDValue(VecOp0, 0), SDValue(Bytes, 0)); + CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT, + SDValue(VecOp0, 0), SDValue(Bytes, 0)); Shift = - CurDAG->getTargetNode(SPU::ROTQMBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(Bits, 0)); + CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(Bits, 0)); } - return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); } /*! @@ -1097,33 +1103,34 @@ SPUDAGToDAGISel::SelectSRLi64(SDValue &Op, MVT OpVT) { * @return The SDNode with the entire instruction sequence */ SDNode * -SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { +SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, EVT OpVT) { // Promote Op0 to vector - MVT VecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits())); + EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), + OpVT, (128 / OpVT.getSizeInBits())); SDValue ShiftAmt = Op.getOperand(1); - MVT ShiftAmtVT = ShiftAmt.getValueType(); + EVT ShiftAmtVT = ShiftAmt.getValueType(); DebugLoc dl = Op.getDebugLoc(); SDNode *VecOp0 = - CurDAG->getTargetNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0)); + CurDAG->getMachineNode(SPU::ORv2i64_i64, dl, VecVT, Op.getOperand(0)); SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); SDNode *SignRot = - CurDAG->getTargetNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64, - SDValue(VecOp0, 0), SignRotAmt); + CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64, + SDValue(VecOp0, 0), SignRotAmt); SDNode *UpperHalfSign = - CurDAG->getTargetNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0)); + CurDAG->getMachineNode(SPU::ORi32_v4i32, dl, MVT::i32, SDValue(SignRot, 0)); SDNode *UpperHalfSignMask = - CurDAG->getTargetNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0)); + CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0)); SDNode *UpperLowerMask = - CurDAG->getTargetNode(SPU::FSMBIv2i64, dl, VecVT, - CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); + CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, + CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); SDNode *UpperLowerSelect = - CurDAG->getTargetNode(SPU::SELBv2i64, dl, VecVT, - SDValue(UpperHalfSignMask, 0), - SDValue(VecOp0, 0), - SDValue(UpperLowerMask, 0)); + CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT, + SDValue(UpperHalfSignMask, 0), + SDValue(VecOp0, 0), + SDValue(UpperLowerMask, 0)); SDNode *Shift = 0; @@ -1134,46 +1141,46 @@ SPUDAGToDAGISel::SelectSRAi64(SDValue &Op, MVT OpVT) { if (bytes > 0) { bytes = 31 - bytes; Shift = - CurDAG->getTargetNode(SPU::ROTQBYIv2i64, dl, VecVT, - SDValue(UpperLowerSelect, 0), - CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT, + SDValue(UpperLowerSelect, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); } if (bits > 0) { bits = 8 - bits; Shift = - CurDAG->getTargetNode(SPU::ROTQBIIv2i64, dl, VecVT, - SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), - CurDAG->getTargetConstant(bits, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); } } else { SDNode *NegShift = - CurDAG->getTargetNode(SPU::SFIr32, dl, ShiftAmtVT, - ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); + CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, + ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); Shift = - CurDAG->getTargetNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT, - SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); + CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT, + SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); Shift = - CurDAG->getTargetNode(SPU::ROTQBIv2i64, dl, VecVT, - SDValue(Shift, 0), SDValue(NegShift, 0)); + CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(NegShift, 0)); } - return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); + return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(Shift, 0)); } /*! Do the necessary magic necessary to load a i64 constant */ -SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, MVT OpVT, +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDValue& Op, EVT OpVT, DebugLoc dl) { ConstantSDNode *CN = cast(Op.getNode()); return SelectI64Constant(CN->getZExtValue(), OpVT, dl); } -SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT, +SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, DebugLoc dl) { - MVT OpVecVT = MVT::getVectorVT(OpVT, 2); + EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2); SDValue i64vec = SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl); @@ -1186,8 +1193,8 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT, SDValue Op0 = i64vec.getOperand(0); ReplaceUses(i64vec, Op0); - return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(Op0), 0)); + return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, + SDValue(emitBuildVector(Op0), 0)); } else if (i64vec.getOpcode() == SPUISD::SHUFB) { SDValue lhs = i64vec.getOperand(0); SDValue rhs = i64vec.getOperand(1); @@ -1225,14 +1232,14 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, MVT OpVT, SDValue(lhsNode, 0), SDValue(rhsNode, 0), SDValue(shufMaskNode, 0))); - return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(shufNode, 0)); + return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, + SDValue(shufNode, 0)); } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { - return CurDAG->getTargetNode(SPU::ORi64_v2i64, dl, OpVT, - SDValue(emitBuildVector(i64vec), 0)); + return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, + SDValue(emitBuildVector(i64vec), 0)); } else { - cerr << "SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec condition\n"; - abort(); + llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" + "condition"); } } diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index d8a77766bd598..aaf07838fb683 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -15,8 +15,9 @@ #include "SPUISelLowering.h" #include "SPUTargetMachine.h" #include "SPUFrameInfo.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/VectorExtras.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,13 +25,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetOptions.h" - +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -39,10 +40,10 @@ using namespace llvm; namespace { std::map node_names; - //! MVT mapping to useful data for Cell SPU + //! EVT mapping to useful data for Cell SPU struct valtype_map_s { - const MVT valtype; - const int prefslot_byte; + EVT valtype; + int prefslot_byte; }; const valtype_map_s valtype_map[] = { @@ -58,7 +59,7 @@ namespace { const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); - const valtype_map_s *getValueTypeMapEntry(MVT VT) { + const valtype_map_s *getValueTypeMapEntry(EVT VT) { const valtype_map_s *retval = 0; for (size_t i = 0; i < n_valtype_map; ++i) { @@ -70,10 +71,11 @@ namespace { #ifndef NDEBUG if (retval == 0) { - cerr << "getValueTypeMapEntry returns NULL for " - << VT.getMVTString() - << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "getValueTypeMapEntry returns NULL for " + << VT.getEVTString(); + llvm_report_error(Msg.str()); } #endif @@ -98,8 +100,8 @@ namespace { TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - MVT ArgVT = Op.getOperand(i).getValueType(); - const Type *ArgTy = ArgVT.getTypeForMVT(); + EVT ArgVT = Op.getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; @@ -110,10 +112,13 @@ namespace { TLI.getPointerTy()); // Splice the libcall in wherever FindInputOutputChains tells us to. - const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT(); + const Type *RetTy = + Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, - 0, CallingConv::C, false, Callee, Args, DAG, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Op.getDebugLoc()); return CallInfo.first; @@ -121,9 +126,8 @@ namespace { } SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) - : TargetLowering(TM), - SPUTM(TM) -{ + : TargetLowering(TM, new TargetLoweringObjectFileELF()), + SPUTM(TM) { // Fold away setcc operations if possible. setPow2DivIsCheap(); @@ -151,6 +155,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + setTruncStoreAction(MVT::i128, MVT::i64, Expand); + setTruncStoreAction(MVT::i128, MVT::i32, Expand); + setTruncStoreAction(MVT::i128, MVT::i16, Expand); + setTruncStoreAction(MVT::i128, MVT::i8, Expand); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // SPU constant load actions are custom lowered: setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Custom); @@ -158,7 +169,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // SPU's loads and stores have to be custom lowered: for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; ++sctype) { - MVT VT = (MVT::SimpleValueType)sctype; + MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -167,20 +178,20 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, VT, Custom); for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { - MVT StoreVT = (MVT::SimpleValueType) stype; + MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; setTruncStoreAction(VT, StoreVT, Expand); } } for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; ++sctype) { - MVT VT = (MVT::SimpleValueType) sctype; + MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { - MVT StoreVT = (MVT::SimpleValueType) stype; + MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; setTruncStoreAction(VT, StoreVT, Expand); } } @@ -199,11 +210,37 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // SPU has no intrinsics for these particular operations: setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - // SPU has no SREM/UREM instructions - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); + // SPU has no division/remainder instructions + setOperationAction(ISD::SREM, MVT::i8, Expand); + setOperationAction(ISD::UREM, MVT::i8, Expand); + setOperationAction(ISD::SDIV, MVT::i8, Expand); + setOperationAction(ISD::UDIV, MVT::i8, Expand); + setOperationAction(ISD::SDIVREM, MVT::i8, Expand); + setOperationAction(ISD::UDIVREM, MVT::i8, Expand); + setOperationAction(ISD::SREM, MVT::i16, Expand); + setOperationAction(ISD::UREM, MVT::i16, Expand); + setOperationAction(ISD::SDIV, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i16, Expand); + setOperationAction(ISD::SDIVREM, MVT::i16, Expand); + setOperationAction(ISD::UDIVREM, MVT::i16, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i128, Expand); + setOperationAction(ISD::UREM, MVT::i128, Expand); + setOperationAction(ISD::SDIV, MVT::i128, Expand); + setOperationAction(ISD::UDIV, MVT::i128, Expand); + setOperationAction(ISD::SDIVREM, MVT::i128, Expand); + setOperationAction(ISD::UDIVREM, MVT::i128, Expand); // We don't support sin/cos/sqrt/fmod setOperationAction(ISD::FSIN , MVT::f64, Expand); @@ -283,11 +320,19 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i16, Custom); setOperationAction(ISD::CTPOP, MVT::i32, Custom); setOperationAction(ISD::CTPOP, MVT::i64, Custom); + setOperationAction(ISD::CTPOP, MVT::i128, Expand); + setOperationAction(ISD::CTTZ , MVT::i8, Expand); + setOperationAction(ISD::CTTZ , MVT::i16, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); setOperationAction(ISD::CTTZ , MVT::i64, Expand); + setOperationAction(ISD::CTTZ , MVT::i128, Expand); + setOperationAction(ISD::CTLZ , MVT::i8, Promote); + setOperationAction(ISD::CTLZ , MVT::i16, Promote); setOperationAction(ISD::CTLZ , MVT::i32, Legal); + setOperationAction(ISD::CTLZ , MVT::i64, Expand); + setOperationAction(ISD::CTLZ , MVT::i128, Expand); // SPU has a version of select that implements (a&~c)|(b&c), just like // select ought to work: @@ -305,10 +350,21 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // Custom lower i128 -> i64 truncates setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); + // Custom lower i32/i64 -> i128 sign extend + setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); + + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); // SPU has a legal FP -> signed INT instruction for f32, but for f64, need // to expand to a libcall, hence the custom lowering: setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); // FDIV on SPU requires custom lowering setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall @@ -339,16 +395,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // appropriate instructions to materialize the address. for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; ++sctype) { - MVT VT = (MVT::SimpleValueType)sctype; + MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; setOperationAction(ISD::GlobalAddress, VT, Custom); setOperationAction(ISD::ConstantPool, VT, Custom); setOperationAction(ISD::JumpTable, VT, Custom); } - // RET must be custom lowered, to meet ABI requirements - setOperationAction(ISD::RET, MVT::Other, Custom); - // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); @@ -385,7 +438,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); @@ -461,9 +514,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; - node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; - node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; - node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; @@ -490,9 +540,11 @@ unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { // Return the Cell SPU's SETCC result type //===----------------------------------------------------------------------===// -MVT SPUTargetLowering::getSetCCResultType(MVT VT) const { +MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { // i16 and i32 are valid SETCC result types - return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32); + return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? + VT.getSimpleVT().SimpleTy : + MVT::i32); } //===----------------------------------------------------------------------===// @@ -525,9 +577,9 @@ static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { LoadSDNode *LN = cast(Op); SDValue the_chain = LN->getChain(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - MVT InVT = LN->getMemoryVT(); - MVT OutVT = Op.getValueType(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT InVT = LN->getMemoryVT(); + EVT OutVT = Op.getValueType(); ISD::LoadExtType ExtType = LN->getExtensionType(); unsigned alignment = LN->getAlignment(); const valtype_map_s *vtm = getValueTypeMapEntry(InVT); @@ -632,7 +684,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Convert the loaded v16i8 vector to the appropriate vector type // specified by the operand: - MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits())); + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + InVT, (128 / InVT.getSizeInBits())); result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); @@ -665,11 +718,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_INC: case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: - cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " "UNINDEXED\n"; - cerr << (unsigned) LN->getAddressingMode() << "\n"; - abort(); - /*NOTREACHED*/ + Msg << (unsigned) LN->getAddressingMode(); + llvm_report_error(Msg.str()); + /*NOTREACHED*/ + } } return SDValue(); @@ -685,17 +742,19 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { StoreSDNode *SN = cast(Op); SDValue Value = SN->getValue(); - MVT VT = Value.getValueType(); - MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT VT = Value.getValueType(); + EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned alignment = SN->getAlignment(); switch (SN->getAddressingMode()) { case ISD::UNINDEXED: { // The vector type we really want to load from the 16-byte chunk. - MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), - stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())), + stVecVT = EVT::getVectorVT(*DAG.getContext(), + StVT, (128 / StVT.getSizeInBits())); SDValue alignLoadVec; SDValue basePtr = SN->getBasePtr(); @@ -790,9 +849,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // to the stack pointer, which is always aligned. #if !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - cerr << "CellSPU LowerSTORE: basePtr = "; + errs() << "CellSPU LowerSTORE: basePtr = "; basePtr.getNode()->dump(&DAG); - cerr << "\n"; + errs() << "\n"; } #endif @@ -815,9 +874,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { const SDValue ¤tRoot = DAG.getRoot(); DAG.setRoot(result); - cerr << "------- CellSPU:LowerStore result:\n"; + errs() << "------- CellSPU:LowerStore result:\n"; DAG.dump(); - cerr << "-------\n"; + errs() << "-------\n"; DAG.setRoot(currentRoot); } #endif @@ -830,20 +889,24 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_INC: case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: - cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " + { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " "UNINDEXED\n"; - cerr << (unsigned) SN->getAddressingMode() << "\n"; - abort(); - /*NOTREACHED*/ + Msg << (unsigned) SN->getAddressingMode(); + llvm_report_error(Msg.str()); + /*NOTREACHED*/ + } } return SDValue(); } //! Generate the address of a constant pool entry. -SDValue +static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); @@ -863,9 +926,8 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } } - assert(0 && - "LowerConstantPool: Relocation model other than static" - " not supported."); + llvm_unreachable("LowerConstantPool: Relocation model other than static" + " not supported."); return SDValue(); } @@ -877,7 +939,7 @@ SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); SDValue Zero = DAG.getConstant(0, PtrVT); @@ -895,14 +957,14 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } } - assert(0 && - "LowerJumpTable: Relocation model other than static not supported."); + llvm_unreachable("LowerJumpTable: Relocation model other than static" + " not supported."); return SDValue(); } static SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); @@ -920,9 +982,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } else { - cerr << "LowerGlobalAddress: Relocation model other than static not " - << "supported.\n"; - abort(); + llvm_report_error("LowerGlobalAddress: Relocation model other than static" + "not supported."); /*NOTREACHED*/ } @@ -932,7 +993,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { //! Custom lower double precision floating point constants static SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); // FIXME there is no actual debug info here DebugLoc dl = Op.getDebugLoc(); @@ -952,16 +1013,17 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) { return SDValue(); } -static SDValue -LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) -{ +SDValue +SPUTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SmallVector ArgValues; - SDValue Root = Op.getOperand(0); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - DebugLoc dl = Op.getDebugLoc(); const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); @@ -970,24 +1032,24 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) unsigned ArgRegIdx = 0; unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Add DAG nodes to load the arguments or copy them out of registers. - for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1; - ArgNo != e; ++ArgNo) { - MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; SDValue ArgVal; if (ArgRegIdx < NumArgRegs) { const TargetRegisterClass *ArgRegClass; - switch (ObjectVT.getSimpleVT()) { + switch (ObjectVT.getSimpleVT().SimpleTy) { default: { - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << ObjectVT.getMVTString() - << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "LowerFormalArguments Unhandled argument type: " + << ObjectVT.getEVTString(); + llvm_report_error(Msg.str()); } case MVT::i8: ArgRegClass = &SPU::R8CRegClass; @@ -1022,7 +1084,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); - ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++ArgRegIdx; } else { // We need to load the argument to a virtual register if we determined @@ -1030,13 +1092,13 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) // or we're forced to do vararg int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); ArgOffset += StackSlotSize; } - ArgValues.push_back(ArgVal); + InVals.push_back(ArgVal); // Update the chain - Root = ArgVal.getOperand(0); + Chain = ArgVal.getOperand(0); } // vararg handling: @@ -1051,23 +1113,19 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); - SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0); - Root = Store.getOperand(0); + SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0); + Chain = Store.getOperand(0); MemOps.push_back(Store); // Increment address by stack slot size for the next stored argument ArgOffset += StackSlotSize; } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); } - ArgValues.push_back(Root); - - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()); + return Chain; } /// isLSAAddress - Return the immediate to use if the specified @@ -1084,19 +1142,23 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); } -static SDValue -LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { - CallSDNode *TheCall = cast(Op.getNode()); - SDValue Chain = TheCall->getChain(); - SDValue Callee = TheCall->getCallee(); - unsigned NumOps = TheCall->getNumArgs(); +SDValue +SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); + unsigned NumOps = Outs.size(); unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); - DebugLoc dl = TheCall->getDebugLoc(); // Handy pointer type - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Accumulate how many bytes are to be pushed on the stack, including the // linkage area, and parameter passing area. According to the SPU ABI, @@ -1119,15 +1181,15 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = TheCall->getArg(i); + SDValue Arg = Outs[i].Val; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); - switch (Arg.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected ValueType for argument!"); + switch (Arg.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i8: case MVT::i16: case MVT::i32: @@ -1193,7 +1255,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { GlobalValue *GV = G->getGlobal(); - MVT CalleeVT = Callee.getValueType(); + EVT CalleeVT = Callee.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); @@ -1217,7 +1279,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - MVT CalleeVT = Callee.getValueType(); + EVT CalleeVT = Callee.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); @@ -1251,50 +1313,46 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), DAG.getIntPtrConstant(0, true), InFlag); - if (TheCall->getValueType(0) != MVT::Other) + if (!Ins.empty()) InFlag = Chain.getValue(1); - SDValue ResultVals[3]; - unsigned NumResults = 0; + // If the function returns void, just return the chain. + if (Ins.empty()) + return Chain; // If the call has results, copy the values out of the ret val registers. - switch (TheCall->getValueType(0).getSimpleVT()) { - default: assert(0 && "Unexpected ret value!"); + switch (Ins[0].VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ret value!"); case MVT::Other: break; case MVT::i32: - if (TheCall->getValueType(1) == MVT::i32) { + if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, MVT::i32, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); + InVals.push_back(Chain.getValue(0)); Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, Chain.getValue(2)).getValue(1); - ResultVals[1] = Chain.getValue(0); - NumResults = 2; + InVals.push_back(Chain.getValue(0)); } else { Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + InVals.push_back(Chain.getValue(0)); } break; case MVT::i64: Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + InVals.push_back(Chain.getValue(0)); break; case MVT::i128: Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + InVals.push_back(Chain.getValue(0)); break; case MVT::f32: case MVT::f64: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0), + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + InVals.push_back(Chain.getValue(0)); break; case MVT::v2f64: case MVT::v2i64: @@ -1302,31 +1360,25 @@ LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0), + Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, InFlag).getValue(1); - ResultVals[0] = Chain.getValue(0); - NumResults = 1; + InVals.push_back(Chain.getValue(0)); break; } - // If the function returns void, just return the chain. - if (NumResults == 0) - return Chain; - - // Otherwise, merge everything together with a MERGE_VALUES node. - ResultVals[NumResults++] = Chain; - SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl); - return Res.getValue(Op.getResNo()); + return Chain; } -static SDValue -LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { +SDValue +SPUTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - DebugLoc dl = Op.getDebugLoc(); - CCState CCInfo(CC, isVarArg, TM, RVLocs); - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_SPU); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. @@ -1335,7 +1387,6 @@ LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - SDValue Chain = Op.getOperand(0); SDValue Flag; // Copy the result values into the output registers. @@ -1343,7 +1394,7 @@ LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Op.getOperand(i*2+1), Flag); + Outs[i].Val, Flag); Flag = Chain.getValue(1); } @@ -1384,7 +1435,7 @@ getVecImm(SDNode *N) { /// and the value fits into an unsigned 18-bit constant, and if so, return the /// constant SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { uint64_t Value = CN->getZExtValue(); if (ValueType == MVT::i64) { @@ -1406,7 +1457,7 @@ SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, /// and the value fits into a signed 16-bit constant, and if so, return the /// constant SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { int64_t Value = CN->getSExtValue(); if (ValueType == MVT::i64) { @@ -1429,7 +1480,7 @@ SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, /// and the value fits into a signed 10-bit constant, and if so, return the /// constant SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { int64_t Value = CN->getSExtValue(); if (ValueType == MVT::i64) { @@ -1455,7 +1506,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, /// constant vectors. Thus, we test to see if the upper and lower bytes are the /// same value. SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { int Value = (int) CN->getZExtValue(); if (ValueType == MVT::i16 @@ -1474,7 +1525,7 @@ SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, /// and the value fits into a signed 16-bit constant, and if so, return the /// constant SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType) { + EVT ValueType) { if (ConstantSDNode *CN = getVecImm(N)) { uint64_t Value = CN->getZExtValue(); if ((ValueType == MVT::i32 @@ -1505,10 +1556,10 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { } //! Lower a BUILD_VECTOR instruction creatively: -SDValue +static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT EltVT = VT.getVectorElementType(); + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); BuildVectorSDNode *BCN = dyn_cast(Op.getNode()); assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); @@ -1528,13 +1579,15 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { uint64_t SplatBits = APSplatBits.getZExtValue(); - switch (VT.getSimpleVT()) { - default: - cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " - << VT.getMVTString() - << "\n"; - abort(); + switch (VT.getSimpleVT().SimpleTy) { + default: { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + << VT.getEVTString(); + llvm_report_error(Msg.str()); /*NOTREACHED*/ + } case MVT::v4f32: { uint32_t Value32 = uint32_t(SplatBits); assert(SplatBitSize == 32 @@ -1591,7 +1644,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { /*! */ SDValue -SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, +SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, DebugLoc dl) { uint32_t upper = uint32_t(SplatVal >> 32); uint32_t lower = uint32_t(SplatVal); @@ -1704,8 +1757,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If we have a single element being moved from V1 to V2, this can be handled // using the C*[DX] compute mask instructions, but the vector elements have // to be monotonically increasing with one exception element. - MVT VecVT = V1.getValueType(); - MVT EltVT = VecVT.getVectorElementType(); + EVT VecVT = V1.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); unsigned EltsFromV2 = 0; unsigned V2Elt = 0; unsigned V2EltIdx0 = 0; @@ -1725,7 +1778,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { V2EltIdx0 = 2; } else - assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); + llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); for (unsigned i = 0; i != MaxElts; ++i) { if (SVN->getMaskElt(i) < 0) @@ -1770,7 +1823,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Initialize temporary register to 0 SDValue InitTempReg = DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); @@ -1816,13 +1869,13 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *CN = cast(Op0.getNode()); SmallVector ConstVecValues; - MVT VT; + EVT VT; size_t n_copies; // Create a constant vector: - switch (Op.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected constant value type in " - "LowerSCALAR_TO_VECTOR"); + switch (Op.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected constant value type in " + "LowerSCALAR_TO_VECTOR"); case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; @@ -1839,8 +1892,8 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { &ConstVecValues[0], ConstVecValues.size()); } else { // Otherwise, copy the value from one register to another: - switch (Op0.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR"); + switch (Op0.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); case MVT::i8: case MVT::i16: case MVT::i32: @@ -1855,7 +1908,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue N = Op.getOperand(0); SDValue Elt = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); @@ -1867,13 +1920,13 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // sanity checks: if (VT == MVT::i8 && EltNo >= 16) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); else if (VT == MVT::i16 && EltNo >= 8) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); else if (VT == MVT::i32 && EltNo >= 4) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); else if (VT == MVT::i64 && EltNo >= 2) - assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { // i32 and i64: Element 0 is the preferred slot @@ -1884,7 +1937,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { int prefslot_begin = -1, prefslot_end = -1; int elt_byte = EltNo * VT.getSizeInBits() / 8; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Invalid value type!"); case MVT::i8: { @@ -1910,7 +1963,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { assert(prefslot_begin != -1 && prefslot_end != -1 && "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); - unsigned int ShufBytes[16]; + unsigned int ShufBytes[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; for (int i = 0; i < 16; ++i) { // zero fill uppper part of preferred slot, don't care about the // other slots: @@ -1946,10 +2001,10 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { } else { // Variable index: Rotate the requested element into slot 0, then replicate // slot 0 across the vector - MVT VecVT = N.getValueType(); + EVT VecVT = N.getValueType(); if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { - cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n"; - abort(); + llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" + "vector type!"); } // Make life easier by making sure the index is zero-extended to i32 @@ -1974,10 +2029,10 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // consistency with the notion of a unified register set) SDValue replicate; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: - cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n"; - abort(); + llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" + "type"); /*NOTREACHED*/ case MVT::i8: { SDValue factor = DAG.getConstant(0x00000000, MVT::i32); @@ -2021,12 +2076,12 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue ValOp = Op.getOperand(1); SDValue IdxOp = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); ConstantSDNode *CN = cast(IdxOp); assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Use $sp ($1) because it's always 16-byte aligned and it's available: SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), @@ -2047,12 +2102,12 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, { SDValue N0 = Op.getOperand(0); // Everything has at least one operand DebugLoc dl = Op.getDebugLoc(); - MVT ShiftVT = TLI.getShiftAmountTy(); + EVT ShiftVT = TLI.getShiftAmountTy(); assert(Op.getValueType() == MVT::i8); switch (Opc) { default: - assert(0 && "Unhandled i8 math operator"); + llvm_unreachable("Unhandled i8 math operator"); /*NOTREACHED*/ break; case ISD::ADD: { @@ -2078,7 +2133,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, case ISD::ROTR: case ISD::ROTL: { SDValue N1 = Op.getOperand(1); - MVT N1VT = N1.getValueType(); + EVT N1VT = N1.getValueType(); N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); if (!N1VT.bitsEq(ShiftVT)) { @@ -2101,7 +2156,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, case ISD::SRL: case ISD::SHL: { SDValue N1 = Op.getOperand(1); - MVT N1VT = N1.getValueType(); + EVT N1VT = N1.getValueType(); N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); if (!N1VT.bitsEq(ShiftVT)) { @@ -2118,7 +2173,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, } case ISD::SRA: { SDValue N1 = Op.getOperand(1); - MVT N1VT = N1.getValueType(); + EVT N1VT = N1.getValueType(); N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); if (!N1VT.bitsEq(ShiftVT)) { @@ -2151,7 +2206,7 @@ static SDValue LowerByteImmed(SDValue Op, SelectionDAG &DAG) { SDValue ConstVec; SDValue Arg; - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); ConstVec = Op.getOperand(0); @@ -2202,11 +2257,12 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) { ones per byte, which then have to be accumulated. */ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); + EVT VT = Op.getValueType(); + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); DebugLoc dl = Op.getDebugLoc(); - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Invalid value type!"); case MVT::i8: { @@ -2312,9 +2368,9 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { */ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SPUTargetLowering &TLI) { - MVT OpVT = Op.getValueType(); + EVT OpVT = Op.getValueType(); SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType(); + EVT Op0VT = Op0.getValueType(); if ((OpVT == MVT::i32 && Op0VT == MVT::f64) || OpVT == MVT::i64) { @@ -2338,9 +2394,9 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, */ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, SPUTargetLowering &TLI) { - MVT OpVT = Op.getValueType(); + EVT OpVT = Op.getValueType(); SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType(); + EVT Op0VT = Op0.getValueType(); if ((OpVT == MVT::f64 && Op0VT == MVT::i32) || Op0VT == MVT::i64) { @@ -2369,12 +2425,12 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, SDValue lhs = Op.getOperand(0); SDValue rhs = Op.getOperand(1); - MVT lhsVT = lhs.getValueType(); + EVT lhsVT = lhs.getValueType(); assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); - MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); + EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); - MVT IntVT(MVT::i64); + EVT IntVT(MVT::i64); // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently // selected to a NOP: @@ -2458,9 +2514,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, case ISD::SETONE: compareOp = ISD::SETNE; break; default: - cerr << "CellSPU ISel Select: unimplemented f64 condition\n"; - abort(); - break; + llvm_report_error("CellSPU ISel Select: unimplemented f64 condition"); } SDValue result = @@ -2497,7 +2551,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue lhs = Op.getOperand(0); SDValue rhs = Op.getOperand(1); SDValue trueval = Op.getOperand(2); @@ -2526,14 +2580,15 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) { // Type to truncate to - MVT VT = Op.getValueType(); - MVT::SimpleValueType simpleVT = VT.getSimpleVT(); - MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); + EVT VT = Op.getValueType(); + MVT simpleVT = VT.getSimpleVT(); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); DebugLoc dl = Op.getDebugLoc(); // Type to truncate from SDValue Op0 = Op.getOperand(0); - MVT Op0VT = Op0.getValueType(); + EVT Op0VT = Op0.getValueType(); if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { // Create shuffle mask, least significant doubleword of quadword @@ -2555,6 +2610,61 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) return SDValue(); // Leave the truncate unmolested } +/*! + * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic + * algorithm is to duplicate the sign bit using rotmai to generate at + * least one byte full of sign bits. Then propagate the "sign-byte" into + * the leftmost words and the i64/i32 into the rightmost words using shufb. + * + * @param Op The sext operand + * @param DAG The current DAG + * @return The SDValue with the entire instruction sequence + */ +static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) +{ + DebugLoc dl = Op.getDebugLoc(); + + // Type to extend to + MVT OpVT = Op.getValueType().getSimpleVT(); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), + OpVT, (128 / OpVT.getSizeInBits())); + + // Type to extend from + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType().getSimpleVT(); + + // The type to extend to needs to be a i128 and + // the type to extend from needs to be i64 or i32. + assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && + "LowerSIGN_EXTEND: input and/or output operand have wrong size"); + + // Create shuffle mask + unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 + unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 + unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask2, MVT::i32), + DAG.getConstant(mask3, MVT::i32)); + + // Word wise arithmetic right shift to generate at least one byte + // that contains sign bits. + MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; + SDValue sraVal = DAG.getNode(ISD::SRA, + dl, + mvt, + DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), + DAG.getConstant(31, MVT::i32)); + + // Shuffle bytes - Copy the sign bits into the upper 64 bits + // and the input value into the lower 64 bits. + SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, + DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask); + + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle); +} + //! Custom (target-specific) lowering entry point /*! This is where LLVM's DAG selection process calls to do target-specific @@ -2564,15 +2674,17 @@ SDValue SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { unsigned Opc = (unsigned) Op.getOpcode(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); switch (Opc) { default: { - cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; - cerr << "Op.getOpcode() = " << Opc << "\n"; - cerr << "*Op.getNode():\n"; +#ifndef NDEBUG + errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; + errs() << "Op.getOpcode() = " << Opc << "\n"; + errs() << "*Op.getNode():\n"; Op.getNode()->dump(); - abort(); +#endif + llvm_unreachable(0); } case ISD::LOAD: case ISD::EXTLOAD: @@ -2589,12 +2701,6 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); case ISD::ConstantFP: return LowerConstantFP(Op, DAG); - case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); - case ISD::CALL: - return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); - case ISD::RET: - return LowerRET(Op, DAG, getTargetMachine()); // i8, i64 math ops: case ISD::ADD: @@ -2651,6 +2757,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); + + case ISD::SIGN_EXTEND: + return LowerSIGN_EXTEND(Op, DAG); } return SDValue(); @@ -2662,13 +2771,13 @@ void SPUTargetLowering::ReplaceNodeResults(SDNode *N, { #if 0 unsigned Opc = (unsigned) N->getOpcode(); - MVT OpVT = N->getValueType(0); + EVT OpVT = N->getValueType(0); switch (Opc) { default: { - cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; - cerr << "Op.getOpcode() = " << Opc << "\n"; - cerr << "*Op.getNode():\n"; + errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; + errs() << "Op.getOpcode() = " << Opc << "\n"; + errs() << "*Op.getNode():\n"; N->dump(); abort(); /*NOTREACHED*/ @@ -2692,8 +2801,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(0); // everything has at least one operand - MVT NodeVT = N->getValueType(0); // The node's value type - MVT Op0VT = Op0.getValueType(); // The first operand's result + EVT NodeVT = N->getValueType(0); // The node's value type + EVT Op0VT = Op0.getValueType(); // The first operand's result SDValue Result; // Initially, empty result DebugLoc dl = N->getDebugLoc(); @@ -2722,7 +2831,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const #if !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - cerr << "\n" + errs() << "\n" << "Replace: (add (SPUindirect , ), 0)\n" << "With: (SPUindirect , )\n"; } @@ -2738,7 +2847,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const #if !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - cerr << "\n" + errs() << "\n" << "Replace: (add (SPUindirect , " << CN1->getSExtValue() << "), " << CN0->getSExtValue() << ")\n" << "With: (SPUindirect , " @@ -2762,11 +2871,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const // Types must match, however... #if !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - cerr << "\nReplace: "; + errs() << "\nReplace: "; N->dump(&DAG); - cerr << "\nWith: "; + errs() << "\nWith: "; Op0.getNode()->dump(&DAG); - cerr << "\n"; + errs() << "\n"; } #endif @@ -2781,11 +2890,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const // (SPUindirect (SPUaform , 0), 0) -> // (SPUaform , 0) - DEBUG(cerr << "Replace: "); + DEBUG(errs() << "Replace: "); DEBUG(N->dump(&DAG)); - DEBUG(cerr << "\nWith: "); + DEBUG(errs() << "\nWith: "); DEBUG(Op0.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); + DEBUG(errs() << "\n"); return Op0; } @@ -2798,7 +2907,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const #if !defined(NDEBUG) if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { - cerr << "\n" + errs() << "\n" << "Replace: (SPUindirect (add , ), 0)\n" << "With: (SPUindirect , )\n"; } @@ -2813,9 +2922,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const } case SPUISD::SHLQUAD_L_BITS: case SPUISD::SHLQUAD_L_BYTES: - case SPUISD::VEC_SHL: - case SPUISD::VEC_SRL: - case SPUISD::VEC_SRA: case SPUISD::ROTBYTES_LEFT: { SDValue Op1 = N->getOperand(1); @@ -2860,11 +2966,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const // Otherwise, return unchanged. #ifndef NDEBUG if (Result.getNode()) { - DEBUG(cerr << "\nReplace.SPU: "); + DEBUG(errs() << "\nReplace.SPU: "); DEBUG(N->dump(&DAG)); - DEBUG(cerr << "\nWith: "); + DEBUG(errs() << "\nWith: "); DEBUG(Result.getNode()->dump(&DAG)); - DEBUG(cerr << "\n"); + DEBUG(errs() << "\n"); } #endif @@ -2895,7 +3001,7 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const std::pair SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const + EVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters @@ -2943,9 +3049,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case SPUISD::VEC2PREFSLOT: case SPUISD::SHLQUAD_L_BITS: case SPUISD::SHLQUAD_L_BYTES: - case SPUISD::VEC_SHL: - case SPUISD::VEC_SRL: - case SPUISD::VEC_SRA: case SPUISD::VEC_ROTL: case SPUISD::VEC_ROTR: case SPUISD::ROTBYTES_LEFT: @@ -2963,7 +3066,7 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; case ISD::SETCC: { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { VT = MVT::i32; diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index b1583f4ee2d61..ab349bb7851f8 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -43,9 +43,6 @@ namespace llvm { VEC2PREFSLOT, ///< Extract element 0 SHLQUAD_L_BITS, ///< Rotate quad left, by bits SHLQUAD_L_BYTES, ///< Rotate quad left, by bytes - VEC_SHL, ///< Vector shift left - VEC_SRL, ///< Vector shift right (logical) - VEC_SRA, ///< Vector shift right (arithmetic) VEC_ROTL, ///< Vector rotate left VEC_ROTR, ///< Vector rotate right ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) @@ -64,22 +61,22 @@ namespace llvm { //! Utility functions specific to CellSPU: namespace SPU { SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType); + EVT ValueType); SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType); + EVT ValueType); SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType); + EVT ValueType); SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType); + EVT ValueType); SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, - MVT ValueType); + EVT ValueType); SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG); SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG); SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM); - //! Simplify a MVT::v2i64 constant splat to CellSPU-ready form - SDValue LowerV2I64Splat(MVT OpVT, SelectionDAG &DAG, uint64_t splat, + //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form + SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat, DebugLoc dl); } @@ -109,7 +106,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - Return the ValueType for ISD::SETCC - virtual MVT getSetCCResultType(MVT VT) const; + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; //! Custom lowering hooks virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); @@ -134,7 +131,7 @@ namespace llvm { std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, bool hasMemory, @@ -150,6 +147,28 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); }; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index e629c8d31aaf1..ecce8e3e93160 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -17,8 +17,9 @@ #include "SPUTargetMachine.h" #include "SPUGenInstrInfo.inc" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/Streams.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -313,8 +314,7 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, } else if (RC == SPU::VECREGRegisterClass) { opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8; } else { - assert(0 && "Unknown regclass!"); - abort(); + llvm_unreachable("Unknown regclass!"); } DebugLoc DL = DebugLoc::getUnknownLoc(); @@ -323,43 +323,6 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(isKill)), FrameIdx); } -void SPUInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - cerr << "storeRegToAddr() invoked!\n"; - abort(); - - if (Addr[0].isFI()) { - /* do what storeRegToStackSlot does here */ - } else { - unsigned Opc = 0; - if (RC == SPU::GPRCRegisterClass) { - /* Opc = PPC::STW; */ - } else if (RC == SPU::R16CRegisterClass) { - /* Opc = PPC::STD; */ - } else if (RC == SPU::R32CRegisterClass) { - /* Opc = PPC::STFD; */ - } else if (RC == SPU::R32FPRegisterClass) { - /* Opc = PPC::STFD; */ - } else if (RC == SPU::R64FPRegisterClass) { - /* Opc = PPC::STFS; */ - } else if (RC == SPU::VECREGRegisterClass) { - /* Opc = PPC::STVX; */ - } else { - assert(0 && "Unknown regclass!"); - abort(); - } - DebugLoc DL = DebugLoc::getUnknownLoc(); - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)) - .addReg(SrcReg, getKillRegState(isKill)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - } -} - void SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -385,8 +348,7 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, } else if (RC == SPU::VECREGRegisterClass) { opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8; } else { - assert(0 && "Unknown regclass in loadRegFromStackSlot!"); - abort(); + llvm_unreachable("Unknown regclass in loadRegFromStackSlot!"); } DebugLoc DL = DebugLoc::getUnknownLoc(); @@ -394,47 +356,6 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx); } -/*! - \note We are really pessimistic here about what kind of a load we're doing. - */ -void SPUInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) - const { - cerr << "loadRegToAddr() invoked!\n"; - abort(); - - if (Addr[0].isFI()) { - /* do what loadRegFromStackSlot does here... */ - } else { - unsigned Opc = 0; - if (RC == SPU::R8CRegisterClass) { - /* do brilliance here */ - } else if (RC == SPU::R16CRegisterClass) { - /* Opc = PPC::LWZ; */ - } else if (RC == SPU::R32CRegisterClass) { - /* Opc = PPC::LD; */ - } else if (RC == SPU::R32FPRegisterClass) { - /* Opc = PPC::LFD; */ - } else if (RC == SPU::R64FPRegisterClass) { - /* Opc = PPC::LFS; */ - } else if (RC == SPU::VECREGRegisterClass) { - /* Opc = PPC::LVX; */ - } else if (RC == SPU::GPRCRegisterClass) { - /* Opc = something else! */ - } else { - assert(0 && "Unknown regclass!"); - abort(); - } - DebugLoc DL = DebugLoc::getUnknownLoc(); - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - } -} - //! Return true if the specified load or store can be folded bool SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, @@ -543,7 +464,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, } else if (isCondBranch(LastInst)) { // Block ends with fall-through condbranch. TBB = LastInst->getOperand(1).getMBB(); - DEBUG(cerr << "Pushing LastInst: "); + DEBUG(errs() << "Pushing LastInst: "); DEBUG(LastInst->dump()); Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); Cond.push_back(LastInst->getOperand(0)); @@ -564,7 +485,7 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, // If the block ends with a conditional and unconditional branch, handle it. if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) { TBB = SecondLastInst->getOperand(1).getMBB(); - DEBUG(cerr << "Pushing SecondLastInst: "); + DEBUG(errs() << "Pushing SecondLastInst: "); DEBUG(SecondLastInst->dump()); Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); Cond.push_back(SecondLastInst->getOperand(0)); @@ -596,7 +517,7 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 0; // Remove the first branch. - DEBUG(cerr << "Removing branch: "); + DEBUG(errs() << "Removing branch: "); DEBUG(I->dump()); I->eraseFromParent(); I = MBB.end(); @@ -608,7 +529,7 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 1; // Remove the second branch. - DEBUG(cerr << "Removing second branch: "); + DEBUG(errs() << "Removing second branch: "); DEBUG(I->dump()); I->eraseFromParent(); return 2; @@ -632,14 +553,14 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR)); MIB.addMBB(TBB); - DEBUG(cerr << "Inserted one-way uncond branch: "); + DEBUG(errs() << "Inserted one-way uncond branch: "); DEBUG((*MIB).dump()); } else { // Conditional branch MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); MIB.addReg(Cond[1].getReg()).addMBB(TBB); - DEBUG(cerr << "Inserted one-way cond branch: "); + DEBUG(errs() << "Inserted one-way cond branch: "); DEBUG((*MIB).dump()); } return 1; @@ -651,9 +572,9 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MIB.addReg(Cond[1].getReg()).addMBB(TBB); MIB2.addMBB(FBB); - DEBUG(cerr << "Inserted conditional branch: "); + DEBUG(errs() << "Inserted conditional branch: "); DEBUG((*MIB).dump()); - DEBUG(cerr << "part 2: "); + DEBUG(errs() << "part 2: "); DEBUG((*MIB2).dump()); return 2; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index ffb40875ff103..c644a117965ce 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -68,24 +68,12 @@ namespace llvm { unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const; - //! Store a register to an address, based on its register class - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - //! Load a register from a stack slot, based on its register class. virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; - //! Loqad a register from an address, based on its register class - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - //! Return true if the specified load or store can be folded virtual bool canFoldMemoryOperand(const MachineInstr *MI, diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 63eb85a2921e6..09849da45ae27 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -4430,13 +4430,6 @@ def : Pat<(v4i32 v4i32Imm:$imm), def : Pat<(i8 imm:$imm), (ILHr8 imm:$imm)>; -//===----------------------------------------------------------------------===// -// Call instruction patterns: -//===----------------------------------------------------------------------===// -// Return void -def : Pat<(ret), - (RET)>; - //===----------------------------------------------------------------------===// // Zero/Any/Sign extensions //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp new file mode 100644 index 0000000000000..1c921ab87ff20 --- /dev/null +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -0,0 +1,40 @@ +//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the SPUMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "SPUMCAsmInfo.h" +using namespace llvm; + +SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { + ZeroDirective = "\t.space\t"; + SetDirective = "\t.set"; + Data64bitsDirective = "\t.quad\t"; + AlignmentIsInBytes = false; + LCOMMDirective = "\t.lcomm\t"; + + PCSymbol = "."; + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + + // Has leb128, .loc and .file + HasLEB128 = true; + HasDotLocAndDotFile = true; + + SupportsDebugInformation = true; + NeedsSet = true; + + // Exception handling is not supported on CellSPU (think about it: you only + // have 256K for code+data. Would you support exception handling?) + ExceptionsType = ExceptionHandling::None; +} + diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/SPUMCAsmInfo.h new file mode 100644 index 0000000000000..8d75ea84116a5 --- /dev/null +++ b/lib/Target/CellSPU/SPUMCAsmInfo.h @@ -0,0 +1,28 @@ +//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SPUMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUTARGETASMINFO_H +#define SPUTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + struct SPULinuxMCAsmInfo : public MCAsmInfo { + explicit SPULinuxMCAsmInfo(const Target &T, const StringRef &TT); + }; +} // namespace llvm + +#endif /* SPUTARGETASMINFO_H */ diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 87c4115d1b189..c722e4b006ea4 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -87,9 +87,9 @@ def SPUshlquad_l_bits: SDNode<"SPUISD::SHLQUAD_L_BITS", SPUvecshift_type, []>; def SPUshlquad_l_bytes: SDNode<"SPUISD::SHLQUAD_L_BYTES", SPUvecshift_type, []>; // Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only): -def SPUvec_shl: SDNode<"SPUISD::VEC_SHL", SPUvecshift_type, []>; -def SPUvec_srl: SDNode<"SPUISD::VEC_SRL", SPUvecshift_type, []>; -def SPUvec_sra: SDNode<"SPUISD::VEC_SRA", SPUvecshift_type, []>; +def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>; +def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>; +def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>; def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index e031048e7ccb3..8412006124ccb 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -35,7 +35,9 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include @@ -176,8 +178,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { case SPU::R126: return 126; case SPU::R127: return 127; default: - cerr << "Unhandled reg in SPURegisterInfo::getRegisterNumbering!\n"; - abort(); + llvm_report_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering"); } } @@ -218,8 +219,8 @@ SPURegisterInfo::getNumArgRegs() /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. -const TargetRegisterClass * SPURegisterInfo::getPointerRegClass() const -{ +const TargetRegisterClass * +SPURegisterInfo::getPointerRegClass(unsigned Kind) const { return &SPU::R32CRegClass; } @@ -325,9 +326,9 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, MBB.erase(I); } -void +unsigned SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const + int *Value, RegScavenger *RS) const { unsigned i = 0; MachineInstr &MI = *II; @@ -364,12 +365,13 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, SPOp.ChangeToRegister(SPU::R1, false); if (Offset > SPUFrameInfo::maxFrameOffset() || Offset < SPUFrameInfo::minFrameOffset()) { - cerr << "Large stack adjustment (" + errs() << "Large stack adjustment (" << Offset << ") in SPURegisterInfo::eliminateFrameIndex."; } else { MO.ChangeToImmediate(Offset); } + return 0; } /// determineFrameLayout - Determine the size of the frame and maximum call @@ -485,8 +487,10 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const .addReg(SPU::R2) .addReg(SPU::R1); } else { - cerr << "Unhandled frame size: " << FrameSize << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Unhandled frame size: " << FrameSize; + llvm_report_error(Msg.str()); } if (hasDebugInfo) { @@ -577,8 +581,10 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const .addReg(SPU::R2) .addReg(SPU::R1); } else { - cerr << "Unhandled frame size: " << FrameSize << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Unhandled frame size: " << FrameSize; + llvm_report_error(Msg.str()); } } } diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 5b6e9ec68cdb4..1d9d07e9b3e1b 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -43,7 +43,8 @@ namespace llvm { /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. - virtual const TargetRegisterClass *getPointerRegClass() const; + virtual const TargetRegisterClass * + getPointerRegClass(unsigned Kind = 0) const; //! Return the array of callee-saved registers virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; @@ -62,8 +63,9 @@ namespace llvm { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; //! Convert frame indicies into machine operands - void eliminateFrameIndex(MachineBasicBlock::iterator II, int, - RegScavenger *RS) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + int *Value = NULL, + RegScavenger *RS = NULL) const; //! Determine the frame's layour void determineFrameLayout(MachineFunction &MF) const; diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp index 0a1c2f75cfe5e..0f18b7fa8b26a 100644 --- a/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -13,15 +13,11 @@ #include "SPUSubtarget.h" #include "SPU.h" -#include "llvm/Module.h" -#include "llvm/Target/TargetMachine.h" #include "SPUGenSubtarget.inc" using namespace llvm; -SPUSubtarget::SPUSubtarget(const TargetMachine &tm, const Module &M, - const std::string &FS) : - TM(tm), +SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) : StackAlignment(16), ProcDirective(SPU::DEFAULT_PROC), UseLargeMem(false) diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h index b6a34099b2f7d..94ac73ce39f82 100644 --- a/lib/Target/CellSPU/SPUSubtarget.h +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -20,9 +20,7 @@ #include namespace llvm { - class Module; class GlobalValue; - class TargetMachine; namespace SPU { enum { @@ -33,8 +31,6 @@ namespace llvm { class SPUSubtarget : public TargetSubtarget { protected: - const TargetMachine &TM; - /// stackAlignment - The minimum alignment known to hold of the stack frame /// on entry to the function and which must be maintained by every function. unsigned StackAlignment; @@ -52,10 +48,9 @@ namespace llvm { public: /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - SPUSubtarget(const TargetMachine &TM, const Module &M, - const std::string &FS); + SPUSubtarget(const std::string &TT, const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 2470972ca4961..6500067849dbd 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -13,62 +13,36 @@ #include "SPU.h" #include "SPURegisterNames.h" -#include "SPUTargetAsmInfo.h" +#include "SPUMCAsmInfo.h" #include "SPUTargetMachine.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetMachineRegistry.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -namespace { - // Register the targets - RegisterTarget - CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]"); +extern "C" void LLVMInitializeCellSPUTarget() { + // Register the target. + RegisterTargetMachine X(TheCellSPUTarget); + RegisterAsmInfo Y(TheCellSPUTarget); } -// No assembler printer by default -SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0; - -// Force static initialization. -extern "C" void LLVMInitializeCellSPUTarget() { } - const std::pair * SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const { NumEntries = 1; return &LR[0]; } -const TargetAsmInfo * -SPUTargetMachine::createTargetAsmInfo() const -{ - return new SPULinuxTargetAsmInfo(*this); -} - -unsigned -SPUTargetMachine::getModuleMatchQuality(const Module &M) -{ - // We strongly match "spu-*" or "cellspu-*". - std::string TT = M.getTargetTriple(); - if ((TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "spu") - || (TT.size() == 7 && std::string(TT.begin(), TT.begin()+7) == "cellspu") - || (TT.size() >= 4 && std::string(TT.begin(), TT.begin()+4) == "spu-") - || (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "cellspu-")) - return 20; - - return 0; // No match at all... -} - -SPUTargetMachine::SPUTargetMachine(const Module &M, const std::string &FS) - : Subtarget(*this, M, FS), +SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : LLVMTargetMachine(T, TT), + Subtarget(TT, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameInfo(*this), TLInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) -{ + InstrItins(Subtarget.getInstrItineraryData()) { // For the time being, use static relocations, since there's really no // support for PIC yet. setRelocationModel(Reloc::Static); @@ -78,22 +52,9 @@ SPUTargetMachine::SPUTargetMachine(const Module &M, const std::string &FS) // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool -SPUTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) -{ +bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { // Install an instruction selector. PM.add(createSPUISelDag(*this)); return false; } - -bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { - // Output assembly language. - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, Verbose)); - return false; -} diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 4c28521317b9d..9fdcfe9ab619b 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -35,19 +35,9 @@ class SPUTargetMachine : public LLVMTargetMachine { SPUFrameInfo FrameInfo; SPUTargetLowering TLInfo; InstrItineraryData InstrItins; - -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - - // To avoid having target depend on the asmprinter stuff libraries, asmprinter - // set this functions to ctor pointer at startup time if they are linked in. - typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, - SPUTargetMachine &tm, - bool verbose); - static AsmPrinterCtorFn AsmPrinterCtor; - public: - SPUTargetMachine(const Module &M, const std::string &FS); + SPUTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); /// Return the subtarget implementation object virtual const SPUSubtarget *getSubtargetImpl() const { @@ -66,12 +56,6 @@ public: virtual TargetJITInfo *getJITInfo() { return NULL; } - - //! Module match function - /*! - Module matching function called by TargetMachineRegistry(). - */ - static unsigned getModuleMatchQuality(const Module &M); virtual SPUTargetLowering *getTargetLowering() const { return const_cast(&TLInfo); @@ -92,13 +76,6 @@ public: // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); - - static void registerAsmPrinter(AsmPrinterCtorFn F) { - AsmPrinterCtor = F; - } }; } // end namespace llvm diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..928d0fe97e0d2 --- /dev/null +++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMCellSPUInfo + CellSPUTargetInfo.cpp + ) + +add_dependencies(LLVMCellSPUInfo CellSPUCodeGenTable_gen) diff --git a/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp new file mode 100644 index 0000000000000..049ea236e9922 --- /dev/null +++ b/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheCellSPUTarget; + +extern "C" void LLVMInitializeCellSPUTargetInfo() { + RegisterTarget + X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]"); +} diff --git a/lib/Target/CellSPU/TargetInfo/Makefile b/lib/Target/CellSPU/TargetInfo/Makefile new file mode 100644 index 0000000000000..9cb6827b43233 --- /dev/null +++ b/lib/Target/CellSPU/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMCellSPUInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 28f58e86f6234..14ad451074a53 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -23,13 +23,12 @@ #include "llvm/Pass.h" #include "llvm/PassManager.h" #include "llvm/TypeSymbolTable.h" -#include "llvm/Target/TargetMachineRegistry.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Streams.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include #include @@ -71,19 +70,10 @@ static cl::opt NameToGenerate("cppfor", cl::Optional, cl::desc("Specify the name of the thing to generate"), cl::init("!bad!")); -/// CppBackendTargetMachineModule - Note that this is used on hosts -/// that cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int CppBackendTargetMachineModule; -int CppBackendTargetMachineModule = 0; - -// Register the target. -static RegisterTarget X("cpp", "C++ backend"); - -// Force static initialization. -extern "C" void LLVMInitializeCppBackendTarget() { } +extern "C" void LLVMInitializeCppBackendTarget() { + // Register the target. + RegisterTargetMachine X(TheCppBackendTarget); +} namespace { typedef std::vector TypeList; @@ -97,7 +87,7 @@ namespace { /// CppWriter - This class is the main chunk of code that converts an LLVM /// module to a C++ translation unit. class CppWriter : public ModulePass { - raw_ostream &Out; + formatted_raw_ostream &Out; const Module *TheModule; uint64_t uniqueNum; TypeMap TypeNames; @@ -112,7 +102,7 @@ namespace { public: static char ID; - explicit CppWriter(raw_ostream &o) : + explicit CppWriter(formatted_raw_ostream &o) : ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {} virtual const char *getPassName() const { return "C++ backend"; } @@ -133,7 +123,7 @@ namespace { private: void printLinkageType(GlobalValue::LinkageTypes LT); void printVisibilityType(GlobalValue::VisibilityTypes VisTypes); - void printCallingConv(unsigned cc); + void printCallingConv(CallingConv::ID cc); void printEscapedString(const std::string& str); void printCFP(const ConstantFP* CFP); @@ -165,7 +155,7 @@ namespace { }; static unsigned indent_level = 0; - inline raw_ostream& nl(raw_ostream& Out, int delta = 0) { + inline formatted_raw_ostream& nl(formatted_raw_ostream& Out, int delta = 0) { Out << "\n"; if (delta >= 0 || indent_level >= unsigned(-delta)) indent_level += delta; @@ -220,8 +210,7 @@ namespace { } void CppWriter::error(const std::string& msg) { - cerr << msg << "\n"; - exit(2); + llvm_report_error(msg); } // printCFP - Print a floating point constant .. very carefully :) @@ -230,9 +219,9 @@ namespace { void CppWriter::printCFP(const ConstantFP *CFP) { bool ignored; APFloat APF = APFloat(CFP->getValueAPF()); // copy - if (CFP->getType() == Type::FloatTy) + if (CFP->getType() == Type::getFloatTy(CFP->getContext())) APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "ConstantFP::get("; + Out << "ConstantFP::get(getGlobalContext(), "; Out << "APFloat("; #if HAVE_PRINTF_A char Buffer[100]; @@ -241,7 +230,7 @@ namespace { !strncmp(Buffer, "-0x", 3) || !strncmp(Buffer, "+0x", 3)) && APF.bitwiseIsEqual(APFloat(atof(Buffer)))) { - if (CFP->getType() == Type::DoubleTy) + if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) Out << "BitsToDouble(" << Buffer << ")"; else Out << "BitsToFloat((float)" << Buffer << ")"; @@ -259,11 +248,11 @@ namespace { ((StrVal[0] == '-' || StrVal[0] == '+') && (StrVal[1] >= '0' && StrVal[1] <= '9'))) && (CFP->isExactlyValue(atof(StrVal.c_str())))) { - if (CFP->getType() == Type::DoubleTy) + if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) Out << StrVal; else Out << StrVal << "f"; - } else if (CFP->getType() == Type::DoubleTy) + } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) Out << "BitsToDouble(0x" << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue()) << "ULL) /* " << StrVal << " */"; @@ -279,7 +268,7 @@ namespace { Out << ")"; } - void CppWriter::printCallingConv(unsigned cc){ + void CppWriter::printCallingConv(CallingConv::ID cc){ // Print the calling convention. switch (cc) { case CallingConv::C: Out << "CallingConv::C"; break; @@ -296,6 +285,8 @@ namespace { Out << "GlobalValue::InternalLinkage"; break; case GlobalValue::PrivateLinkage: Out << "GlobalValue::PrivateLinkage"; break; + case GlobalValue::LinkerPrivateLinkage: + Out << "GlobalValue::LinkerPrivateLinkage"; break; case GlobalValue::AvailableExternallyLinkage: Out << "GlobalValue::AvailableExternallyLinkage "; break; case GlobalValue::LinkOnceAnyLinkage: @@ -325,7 +316,7 @@ namespace { void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { switch (VisType) { - default: assert(0 && "Unknown GVar visibility"); + default: llvm_unreachable("Unknown GVar visibility"); case GlobalValue::DefaultVisibility: Out << "GlobalValue::DefaultVisibility"; break; @@ -357,20 +348,21 @@ namespace { // First, handle the primitive types .. easy if (Ty->isPrimitiveType() || Ty->isInteger()) { switch (Ty->getTypeID()) { - case Type::VoidTyID: return "Type::VoidTy"; + case Type::VoidTyID: return "Type::getVoidTy(getGlobalContext())"; case Type::IntegerTyID: { unsigned BitWidth = cast(Ty)->getBitWidth(); - return "IntegerType::get(" + utostr(BitWidth) + ")"; + return "IntegerType::get(getGlobalContext(), " + utostr(BitWidth) + ")"; } - case Type::X86_FP80TyID: return "Type::X86_FP80Ty"; - case Type::FloatTyID: return "Type::FloatTy"; - case Type::DoubleTyID: return "Type::DoubleTy"; - case Type::LabelTyID: return "Type::LabelTy"; + case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(getGlobalContext())"; + case Type::FloatTyID: return "Type::getFloatTy(getGlobalContext())"; + case Type::DoubleTyID: return "Type::getDoubleTy(getGlobalContext())"; + case Type::LabelTyID: return "Type::getLabelTy(getGlobalContext())"; default: error("Invalid primitive type"); break; } - return "Type::VoidTy"; // shouldn't be returned, but make it sensible + // shouldn't be returned, but make it sensible + return "Type::getVoidTy(getGlobalContext())"; } // Now, see if we've seen the type before and return that @@ -436,7 +428,10 @@ namespace { } else { name = getTypePrefix(val->getType()); } - name += (val->hasName() ? val->getName() : utostr(uniqueNum++)); + if (val->hasName()) + name += val->getName(); + else + name += utostr(uniqueNum++); sanitize(name); NameSet::iterator NI = UsedNames.find(name); if (NI != UsedNames.end()) @@ -477,6 +472,7 @@ namespace { HANDLE_ATTR(Nest); HANDLE_ATTR(ReadNone); HANDLE_ATTR(ReadOnly); + HANDLE_ATTR(InlineHint); HANDLE_ATTR(NoInline); HANDLE_ATTR(AlwaysInline); HANDLE_ATTR(OptimizeForSize); @@ -519,7 +515,8 @@ namespace { if (TI != TypeStack.end()) { TypeMap::const_iterator I = UnresolvedTypes.find(Ty); if (I == UnresolvedTypes.end()) { - Out << "PATypeHolder " << typeName << "_fwd = OpaqueType::get();"; + Out << "PATypeHolder " << typeName; + Out << "_fwd = OpaqueType::get(getGlobalContext());"; nl(Out); UnresolvedTypes[Ty] = typeName; } @@ -579,6 +576,7 @@ namespace { nl(Out); } Out << "StructType* " << typeName << " = StructType::get(" + << "mod->getContext(), " << typeName << "_fields, /*isPacked=*/" << (ST->isPacked() ? "true" : "false") << ");"; nl(Out); @@ -618,7 +616,8 @@ namespace { break; } case Type::OpaqueTyID: { - Out << "OpaqueType* " << typeName << " = OpaqueType::get();"; + Out << "OpaqueType* " << typeName; + Out << " = OpaqueType::get(getGlobalContext());"; nl(Out); break; } @@ -753,9 +752,10 @@ namespace { if (const ConstantInt *CI = dyn_cast(CV)) { std::string constValue = CI->getValue().toString(10, true); - Out << "ConstantInt* " << constName << " = ConstantInt::get(APInt(" - << cast(CI->getType())->getBitWidth() << ", \"" - << constValue << "\", " << constValue.length() << ", 10));"; + Out << "ConstantInt* " << constName + << " = ConstantInt::get(getGlobalContext(), APInt(" + << cast(CI->getType())->getBitWidth() + << ", StringRef(\"" << constValue << "\"), 10));"; } else if (isa(CV)) { Out << "ConstantAggregateZero* " << constName << " = ConstantAggregateZero::get(" << typeName << ");"; @@ -767,8 +767,11 @@ namespace { printCFP(CFP); Out << ";"; } else if (const ConstantArray *CA = dyn_cast(CV)) { - if (CA->isString() && CA->getType()->getElementType() == Type::Int8Ty) { - Out << "Constant* " << constName << " = ConstantArray::get(\""; + if (CA->isString() && + CA->getType()->getElementType() == + Type::getInt8Ty(CA->getContext())) { + Out << "Constant* " << constName << + " = ConstantArray::get(getGlobalContext(), \""; std::string tmp = CA->getAsString(); bool nullTerminate = false; if (tmp[tmp.length()-1] == 0) { @@ -839,12 +842,12 @@ namespace { << getCppName(CE->getOperand(0)) << ", " << "&" << constName << "_indices[0], " << constName << "_indices.size()" - << " );"; + << ");"; } else if (CE->isCast()) { printConstant(CE->getOperand(0)); Out << "Constant* " << constName << " = ConstantExpr::getCast("; switch (CE->getOpcode()) { - default: assert(0 && "Invalid cast opcode"); + default: llvm_unreachable("Invalid cast opcode"); case Instruction::Trunc: Out << "Instruction::Trunc"; break; case Instruction::ZExt: Out << "Instruction::ZExt"; break; case Instruction::SExt: Out << "Instruction::SExt"; break; @@ -995,13 +998,13 @@ namespace { void CppWriter::printVariableHead(const GlobalVariable *GV) { nl(Out) << "GlobalVariable* " << getCppName(GV); if (is_inline) { - Out << " = mod->getGlobalVariable("; + Out << " = mod->getGlobalVariable(getGlobalContext(), "; printEscapedString(GV->getName()); Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; nl(Out) << "if (!" << getCppName(GV) << ") {"; in(); nl(Out) << getCppName(GV); } - Out << " = new GlobalVariable("; + Out << " = new GlobalVariable(/*Module=*/*mod, "; nl(Out) << "/*Type=*/"; printCppName(GV->getType()->getElementType()); Out << ","; @@ -1016,8 +1019,7 @@ namespace { } nl(Out) << "/*Name=*/\""; printEscapedString(GV->getName()); - Out << "\","; - nl(Out) << "mod);"; + Out << "\");"; nl(Out); if (GV->hasSection()) { @@ -1095,7 +1097,7 @@ namespace { case Instruction::Ret: { const ReturnInst* ret = cast(I); - Out << "ReturnInst::Create(" + Out << "ReturnInst::Create(getGlobalContext(), " << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; break; } @@ -1159,8 +1161,9 @@ namespace { << bbname << ");"; break; } - case Instruction::Unreachable:{ + case Instruction::Unreachable: { Out << "new UnreachableInst(" + << "getGlobalContext(), " << bbname << ");"; break; } @@ -1210,7 +1213,7 @@ namespace { break; } case Instruction::FCmp: { - Out << "FCmpInst* " << iName << " = new FCmpInst("; + Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", "; switch (cast(I)->getPredicate()) { case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break; case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break; @@ -1232,11 +1235,11 @@ namespace { } Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; printEscapedString(I->getName()); - Out << "\", " << bbname << ");"; + Out << "\");"; break; } case Instruction::ICmp: { - Out << "ICmpInst* " << iName << " = new ICmpInst("; + Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", "; switch (cast(I)->getPredicate()) { case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; @@ -1252,7 +1255,7 @@ namespace { } Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; printEscapedString(I->getName()); - Out << "\", " << bbname << ");"; + Out << "\");"; break; } case Instruction::Malloc: { @@ -1680,7 +1683,8 @@ namespace { for (Function::const_iterator BI = F->begin(), BE = F->end(); BI != BE; ++BI) { std::string bbname(getCppName(BI)); - Out << "BasicBlock* " << bbname << " = BasicBlock::Create(\""; + Out << "BasicBlock* " << bbname << + " = BasicBlock::Create(getGlobalContext(), \""; if (BI->hasName()) printEscapedString(BI->getName()); Out << "\"," << getCppName(BI->getParent()) << ",0);"; @@ -1799,6 +1803,7 @@ namespace { void CppWriter::printProgram(const std::string& fname, const std::string& mName) { + Out << "#include \n"; Out << "#include \n"; Out << "#include \n"; Out << "#include \n"; @@ -1808,8 +1813,8 @@ namespace { Out << "#include \n"; Out << "#include \n"; Out << "#include \n"; + Out << "#include \n"; Out << "#include \n"; - Out << "#include \n"; Out << "#include \n"; Out << "#include \n"; Out << "#include \n"; @@ -1821,7 +1826,6 @@ namespace { Out << "int main(int argc, char**argv) {\n"; Out << " Module* Mod = " << fname << "();\n"; Out << " verifyModule(*Mod, PrintMessageAction);\n"; - Out << " outs().flush();\n"; Out << " PassManager PM;\n"; Out << " PM.add(createPrintModulePass(&outs()));\n"; Out << " PM.run(*Mod);\n"; @@ -1836,7 +1840,7 @@ namespace { nl(Out,1) << "// Module Construction"; nl(Out) << "Module* mod = new Module(\""; printEscapedString(mName); - Out << "\");"; + Out << "\", getGlobalContext());"; if (!TheModule->getTargetTriple().empty()) { nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; } @@ -2014,7 +2018,7 @@ char CppWriter::ID = 0; //===----------------------------------------------------------------------===// bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM, - raw_ostream &o, + formatted_raw_ostream &o, CodeGenFileType FileType, CodeGenOpt::Level OptLevel) { if (FileType != TargetMachine::AssemblyFile) return true; diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index db4bc0e722c89..1f74f76b5ac14 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -19,25 +19,24 @@ namespace llvm { -class raw_ostream; +class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { - const TargetData DataLayout; // Calculates type size & alignment - - CPPTargetMachine(const Module &M, const std::string &FS) - : DataLayout(&M) {} + CPPTargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : TargetMachine(T) {} virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out, + virtual bool addPassesToEmitWholeFile(PassManager &PM, + formatted_raw_ostream &Out, CodeGenFileType FileType, CodeGenOpt::Level OptLevel); - // This class always works, but shouldn't be the default in most cases. - static unsigned getModuleMatchQuality(const Module &M) { return 1; } - - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const TargetData *getTargetData() const { return 0; } }; +extern Target TheCppBackendTarget; + } // End llvm namespace diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile index ca7e1a82c8089..dc9cf48c8b1e8 100644 --- a/lib/Target/CppBackend/Makefile +++ b/lib/Target/CppBackend/Makefile @@ -9,6 +9,9 @@ LEVEL = ../../.. LIBRARYNAME = LLVMCppBackend + +DIRS = TargetInfo + include $(LEVEL)/Makefile.common CompileCommonOpts += -Wno-format diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..edaf5d3cb1886 --- /dev/null +++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMCppBackendInfo + CppBackendTargetInfo.cpp + ) + diff --git a/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp new file mode 100644 index 0000000000000..d0aeb12499c53 --- /dev/null +++ b/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp @@ -0,0 +1,26 @@ +//===-- CppBackendTargetInfo.cpp - CppBackend Target Implementation -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "CPPTargetMachine.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheCppBackendTarget; + +static unsigned CppBackend_TripleMatchQuality(const std::string &TT) { + // This class always works, but shouldn't be the default in most cases. + return 1; +} + +extern "C" void LLVMInitializeCppBackendTargetInfo() { + TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp", + "C++ backend", + &CppBackend_TripleMatchQuality); +} diff --git a/lib/Target/CppBackend/TargetInfo/Makefile b/lib/Target/CppBackend/TargetInfo/Makefile new file mode 100644 index 0000000000000..6e682838daec8 --- /dev/null +++ b/lib/Target/CppBackend/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/CppBackend/TargetInfo/Makefile -----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMCppBackendInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index ee73c381cd4ac..26d637b4347b3 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -19,44 +19,35 @@ #include "llvm/TypeSymbolTable.h" #include "llvm/Analysis/ConstantsScanner.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/Passes.h" +using namespace llvm; -namespace { +namespace llvm { // TargetMachine for the MSIL struct VISIBILITY_HIDDEN MSILTarget : public TargetMachine { - const TargetData DataLayout; // Calculates type size & alignment - - MSILTarget(const Module &M, const std::string &FS) - : DataLayout(&M) {} + MSILTarget(const Target &T, const std::string &TT, const std::string &FS) + : TargetMachine(T) {} virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, raw_ostream &Out, + virtual bool addPassesToEmitWholeFile(PassManager &PM, + formatted_raw_ostream &Out, CodeGenFileType FileType, CodeGenOpt::Level OptLevel); - // This class always works, but shouldn't be the default in most cases. - static unsigned getModuleMatchQuality(const Module &M) { return 1; } - - virtual const TargetData *getTargetData() const { return &DataLayout; } + virtual const TargetData *getTargetData() const { return 0; } }; } -/// MSILTargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int MSILTargetMachineModule; -int MSILTargetMachineModule = 0; - -static RegisterTarget X("msil", "MSIL backend"); - -// Force static initialization. -extern "C" void LLVMInitializeMSILTarget() { } +extern "C" void LLVMInitializeMSILTarget() { + // Register the target. + RegisterTargetMachine X(TheMSILTarget); +} bool MSILModule::runOnModule(Module &M) { ModulePtr = &M; @@ -239,8 +230,17 @@ bool MSILWriter::isZeroValue(const Value* V) { std::string MSILWriter::getValueName(const Value* V) { + std::string Name; + if (const GlobalValue *GV = dyn_cast(V)) + Name = Mang->getMangledName(GV); + else { + unsigned &No = AnonValueNumbers[V]; + if (No == 0) No = ++NextAnonValueNumber; + Name = "tmp" + utostr(No); + } + // Name into the quotes allow control and space characters. - return "'"+Mang->getValueName(V)+"'"; + return "'"+Name+"'"; } @@ -257,11 +257,20 @@ std::string MSILWriter::getLabelName(const std::string& Name) { std::string MSILWriter::getLabelName(const Value* V) { - return getLabelName(Mang->getValueName(V)); + std::string Name; + if (const GlobalValue *GV = dyn_cast(V)) + Name = Mang->getMangledName(GV); + else { + unsigned &No = AnonValueNumbers[V]; + if (No == 0) No = ++NextAnonValueNumber; + Name = "tmp" + utostr(No); + } + + return getLabelName(Name); } -std::string MSILWriter::getConvModopt(unsigned CallingConvID) { +std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) { switch (CallingConvID) { case CallingConv::C: case CallingConv::Cold: @@ -272,8 +281,8 @@ std::string MSILWriter::getConvModopt(unsigned CallingConvID) { case CallingConv::X86_StdCall: return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) "; default: - cerr << "CallingConvID = " << CallingConvID << '\n'; - assert(0 && "Unsupported calling convention"); + errs() << "CallingConvID = " << CallingConvID << '\n'; + llvm_unreachable("Unsupported calling convention"); } return ""; // Not reached } @@ -318,8 +327,8 @@ std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) { case Type::DoubleTyID: return "float64 "; default: - cerr << "Type = " << *Ty << '\n'; - assert(0 && "Invalid primitive type"); + errs() << "Type = " << *Ty << '\n'; + llvm_unreachable("Invalid primitive type"); } return ""; // Not reached } @@ -346,8 +355,8 @@ std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned, return getArrayTypeName(Ty->getTypeID(),Ty); return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' "; default: - cerr << "Type = " << *Ty << '\n'; - assert(0 && "Invalid type in getTypeName()"); + errs() << "Type = " << *Ty << '\n'; + llvm_unreachable("Invalid type in getTypeName()"); } return ""; // Not reached } @@ -390,8 +399,8 @@ std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand, case Type::PointerTyID: return "i"+utostr(TD->getTypeAllocSize(Ty)); default: - cerr << "TypeID = " << Ty->getTypeID() << '\n'; - assert(0 && "Invalid type in TypeToPostfix()"); + errs() << "TypeID = " << Ty->getTypeID() << '\n'; + llvm_unreachable("Invalid type in TypeToPostfix()"); } return ""; // Not reached } @@ -406,7 +415,7 @@ void MSILWriter::printConvToPtr() { printSimpleInstruction("conv.u8"); break; default: - assert(0 && "Module use not supporting pointer size"); + llvm_unreachable("Module use not supporting pointer size"); } } @@ -417,15 +426,15 @@ void MSILWriter::printPtrLoad(uint64_t N) { printSimpleInstruction("ldc.i4",utostr(N).c_str()); // FIXME: Need overflow test? if (!isUInt32(N)) { - cerr << "Value = " << utostr(N) << '\n'; - assert(0 && "32-bit pointer overflowed"); + errs() << "Value = " << utostr(N) << '\n'; + llvm_unreachable("32-bit pointer overflowed"); } break; case Module::Pointer64: printSimpleInstruction("ldc.i8",utostr(N).c_str()); break; default: - assert(0 && "Module use not supporting pointer size"); + llvm_unreachable("Module use not supporting pointer size"); } } @@ -460,8 +469,8 @@ void MSILWriter::printConstLoad(const Constant* C) { // Undefined constant value = NULL. printPtrLoad(0); } else { - cerr << "Constant = " << *C << '\n'; - assert(0 && "Invalid constant value"); + errs() << "Constant = " << *C << '\n'; + llvm_unreachable("Invalid constant value"); } Out << '\n'; } @@ -509,8 +518,8 @@ void MSILWriter::printValueLoad(const Value* V) { printConstantExpr(cast(V)); break; default: - cerr << "Value = " << *V << '\n'; - assert(0 && "Invalid value location"); + errs() << "Value = " << *V << '\n'; + llvm_unreachable("Invalid value location"); } } @@ -524,8 +533,8 @@ void MSILWriter::printValueSave(const Value* V) { printSimpleInstruction("stloc",getValueName(V).c_str()); break; default: - cerr << "Value = " << *V << '\n'; - assert(0 && "Invalid value location"); + errs() << "Value = " << *V << '\n'; + llvm_unreachable("Invalid value location"); } } @@ -651,12 +660,19 @@ void MSILWriter::printIndirectSave(const Type* Ty) { void MSILWriter::printCastInstruction(unsigned int Op, const Value* V, - const Type* Ty) { + const Type* Ty, const Type* SrcTy) { std::string Tmp(""); printValueLoad(V); switch (Op) { // Signed case Instruction::SExt: + // If sign extending int, convert first from unsigned to signed + // with the same bit size - because otherwise we will loose the sign. + if (SrcTy) { + Tmp = "conv."+getTypePostfix(SrcTy,false,true); + printSimpleInstruction(Tmp.c_str()); + } + // FALLTHROUGH case Instruction::SIToFP: case Instruction::FPToSI: Tmp = "conv."+getTypePostfix(Ty,false,true); @@ -679,8 +695,8 @@ void MSILWriter::printCastInstruction(unsigned int Op, const Value* V, // FIXME: meaning that ld*/st* instruction do not change data format. break; default: - cerr << "Opcode = " << Op << '\n'; - assert(0 && "Invalid conversion instruction"); + errs() << "Opcode = " << Op << '\n'; + llvm_unreachable("Invalid conversion instruction"); } } @@ -770,8 +786,8 @@ void MSILWriter::printFunctionCall(const Value* FnVal, else if (const InvokeInst* Invoke = dyn_cast(Inst)) Name = getConvModopt(Invoke->getCallingConv()); else { - cerr << "Instruction = " << Inst->getName() << '\n'; - assert(0 && "Need \"Invoke\" or \"Call\" instruction only"); + errs() << "Instruction = " << Inst->getName() << '\n'; + llvm_unreachable("Need \"Invoke\" or \"Call\" instruction only"); } if (const Function* F = dyn_cast(FnVal)) { // Direct call. @@ -804,7 +820,8 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { // Save as pointer type "void*" printValueLoad(Inst->getOperand(1)); printSimpleInstruction("ldloca",Name.c_str()); - printIndirectSave(PointerType::getUnqual(IntegerType::get(8))); + printIndirectSave(PointerType::getUnqual( + IntegerType::get(Inst->getContext(), 8))); break; case Intrinsic::vaend: // Close argument list handle. @@ -818,8 +835,8 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator"); break; default: - cerr << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n'; - assert(0 && "Invalid intrinsic function"); + errs() << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n'; + llvm_unreachable("Invalid intrinsic function"); } } @@ -877,12 +894,13 @@ void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left, break; case ICmpInst::ICMP_UGT: printBinaryInstruction("cgt.un",Left,Right); + break; case ICmpInst::ICMP_SGT: printBinaryInstruction("cgt",Left,Right); break; default: - cerr << "Predicate = " << Predicate << '\n'; - assert(0 && "Invalid icmp predicate"); + errs() << "Predicate = " << Predicate << '\n'; + llvm_unreachable("Invalid icmp predicate"); } } @@ -976,7 +994,7 @@ void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left, printSimpleInstruction("or"); break; default: - assert(0 && "Illegal FCmp predicate"); + llvm_unreachable("Illegal FCmp predicate"); } } @@ -1024,7 +1042,8 @@ void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) { "instance typedref [mscorlib]System.ArgIterator::GetNextArg()"); printSimpleInstruction("refanyval","void*"); std::string Name = - "ldind."+getTypePostfix(PointerType::getUnqual(IntegerType::get(8)),false); + "ldind."+getTypePostfix(PointerType::getUnqual( + IntegerType::get(Inst->getContext(), 8)),false); printSimpleInstruction(Name.c_str()); } @@ -1132,9 +1151,13 @@ void MSILWriter::printInstruction(const Instruction* Inst) { case Instruction::Store: printIndirectSave(Inst->getOperand(1), Inst->getOperand(0)); break; + case Instruction::SExt: + printCastInstruction(Inst->getOpcode(),Left, + cast(Inst)->getDestTy(), + cast(Inst)->getSrcTy()); + break; case Instruction::Trunc: case Instruction::ZExt: - case Instruction::SExt: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::UIToFP: @@ -1169,10 +1192,10 @@ void MSILWriter::printInstruction(const Instruction* Inst) { printAllocaInstruction(cast(Inst)); break; case Instruction::Malloc: - assert(0 && "LowerAllocationsPass used"); + llvm_unreachable("LowerAllocationsPass used"); break; case Instruction::Free: - assert(0 && "LowerAllocationsPass used"); + llvm_unreachable("LowerAllocationsPass used"); break; case Instruction::Unreachable: printSimpleInstruction("ldstr", "\"Unreachable instruction\""); @@ -1184,8 +1207,8 @@ void MSILWriter::printInstruction(const Instruction* Inst) { printVAArgInstruction(cast(Inst)); break; default: - cerr << "Instruction = " << Inst->getName() << '\n'; - assert(0 && "Unsupported instruction"); + errs() << "Instruction = " << Inst->getName() << '\n'; + llvm_unreachable("Unsupported instruction"); } } @@ -1216,7 +1239,7 @@ void MSILWriter::printBasicBlock(const BasicBlock* BB) { // Print instruction printInstruction(Inst); // Save result - if (Inst->getType()!=Type::VoidTy) { + if (Inst->getType()!=Type::getVoidTy(BB->getContext())) { // Do not save value after invoke, it done in "try" block if (Inst->getOpcode()==Instruction::Invoke) continue; printValueSave(Inst); @@ -1245,7 +1268,7 @@ void MSILWriter::printLocalVariables(const Function& F) { Ty = PointerType::getUnqual(AI->getAllocatedType()); Name = getValueName(AI); Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n"; - } else if (I->getType()!=Type::VoidTy) { + } else if (I->getType()!=Type::getVoidTy(F.getContext())) { // Operation result. Ty = I->getType(); Name = getValueName(&*I); @@ -1372,8 +1395,8 @@ void MSILWriter::printConstantExpr(const ConstantExpr* CE) { printBinaryInstruction("shr",left,right); break; default: - cerr << "Expression = " << *CE << "\n"; - assert(0 && "Invalid constant expression"); + errs() << "Expression = " << *CE << "\n"; + llvm_unreachable("Invalid constant expression"); } } @@ -1406,8 +1429,8 @@ void MSILWriter::printStaticInitializerList() { postfix = "stind."+postfix; printSimpleInstruction(postfix.c_str()); } else { - cerr << "Constant = " << *I->constant << '\n'; - assert(0 && "Invalid static initializer"); + errs() << "Constant = " << *I->constant << '\n'; + llvm_unreachable("Invalid static initializer"); } } } @@ -1470,8 +1493,8 @@ unsigned int MSILWriter::getBitWidth(const Type* Ty) { case 64: return N; default: - cerr << "Bits = " << N << '\n'; - assert(0 && "Unsupported integer width"); + errs() << "Bits = " << N << '\n'; + llvm_unreachable("Unsupported integer width"); } return 0; // Not reached } @@ -1528,12 +1551,12 @@ void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) { // Null pointer initialization if (TySize==4) Out << "int32 (0)"; else if (TySize==8) Out << "int64 (0)"; - else assert(0 && "Invalid pointer size"); + else llvm_unreachable("Invalid pointer size"); } break; default: - cerr << "TypeID = " << Ty->getTypeID() << '\n'; - assert(0 && "Invalid type in printStaticConstant()"); + errs() << "TypeID = " << Ty->getTypeID() << '\n'; + llvm_unreachable("Invalid type in printStaticConstant()"); } // Increase offset. Offset += TySize; @@ -1555,8 +1578,8 @@ void MSILWriter::printStaticInitializer(const Constant* C, Out << getTypeName(C->getType()); break; default: - cerr << "Type = " << *C << "\n"; - assert(0 && "Invalid constant type"); + errs() << "Type = " << *C << "\n"; + llvm_unreachable("Invalid constant type"); } // Print initializer std::string label = Name; @@ -1595,17 +1618,18 @@ void MSILWriter::printGlobalVariables() { const char* MSILWriter::getLibraryName(const Function* F) { - return getLibraryForSymbol(F->getName().c_str(), true, F->getCallingConv()); + return getLibraryForSymbol(F->getName(), true, F->getCallingConv()); } const char* MSILWriter::getLibraryName(const GlobalVariable* GV) { - return getLibraryForSymbol(Mang->getValueName(GV).c_str(), false, 0); + return getLibraryForSymbol(Mang->getMangledName(GV), false, CallingConv::C); } -const char* MSILWriter::getLibraryForSymbol(const char* Name, bool isFunction, - unsigned CallingConv) { +const char* MSILWriter::getLibraryForSymbol(const StringRef &Name, + bool isFunction, + CallingConv::ID CallingConv) { // TODO: Read *.def file with function and libraries definitions. return "MSVCRT.DLL"; } @@ -1654,11 +1678,10 @@ void MSILWriter::printExternals() { E = ModulePtr->global_end(); I!=E; ++I) { if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue; // Use "LoadLibrary"/"GetProcAddress" to recive variable address. - std::string Label = "not_null$_"+utostr(getUniqID()); std::string Tmp = getTypeName(I->getType())+getValueName(&*I); printSimpleInstruction("ldsflda",Tmp.c_str()); Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n"; - Out << "\tldstr\t\"" << Mang->getValueName(&*I) << "\"\n"; + Out << "\tldstr\t\"" << Mang->getMangledName(&*I) << "\"\n"; printSimpleInstruction("call","void* $MSIL_Import(string,string)"); printIndirectSave(I->getType()); } @@ -1671,7 +1694,8 @@ void MSILWriter::printExternals() { // External Interface declaration //===----------------------------------------------------------------------===// -bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, raw_ostream &o, +bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, + formatted_raw_ostream &o, CodeGenFileType FileType, CodeGenOpt::Level OptLevel) { diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h index 45f5579bfb341..2280a3bed9151 100644 --- a/lib/Target/MSIL/MSILWriter.h +++ b/lib/Target/MSIL/MSILWriter.h @@ -13,24 +13,24 @@ #ifndef MSILWRITER_H #define MSILWRITER_H +#include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/FindUsedTypes.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetMachineRegistry.h" #include "llvm/Support/Mangler.h" -#include -using namespace llvm; -namespace { +namespace llvm { + extern Target TheMSILTarget; class MSILModule : public ModulePass { Module *ModulePtr; @@ -56,7 +56,7 @@ namespace { }; - class MSILWriter : public FunctionPass { + class MSILWriter : public FunctionPass { struct StaticInitializer { const Constant* constant; uint64_t offset; @@ -75,7 +75,7 @@ namespace { } public: - raw_ostream &Out; + formatted_raw_ostream &Out; Module* ModulePtr; const TargetData* TD; Mangler* Mang; @@ -85,7 +85,11 @@ namespace { StaticInitList; const std::set* UsedTypes; static char ID; - MSILWriter(raw_ostream &o) : FunctionPass(&ID), Out(o) { + DenseMap AnonValueNumbers; + unsigned NextAnonValueNumber; + + MSILWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o), + NextAnonValueNumber(0) { UniqID = 0; } @@ -130,7 +134,7 @@ namespace { std::string getLabelName(const std::string& Name); - std::string getConvModopt(unsigned CallingConvID); + std::string getConvModopt(CallingConv::ID CallingConvID); std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty); @@ -183,7 +187,7 @@ namespace { void printIndirectSave(const Type* Ty); void printCastInstruction(unsigned int Op, const Value* V, - const Type* Ty); + const Type* Ty, const Type* SrcTy=0); void printGepInstruction(const Value* V, gep_type_iterator I, gep_type_iterator E); @@ -244,11 +248,12 @@ namespace { const char* getLibraryName(const GlobalVariable* GV); - const char* getLibraryForSymbol(const char* Name, bool isFunction, - unsigned CallingConv); + const char* getLibraryForSymbol(const StringRef &Name, bool isFunction, + CallingConv::ID CallingConv); void printExternals(); }; + } #endif diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile index 94265edf98c51..8057cc7480390 100644 --- a/lib/Target/MSIL/Makefile +++ b/lib/Target/MSIL/Makefile @@ -9,6 +9,9 @@ LEVEL = ../../.. LIBRARYNAME = LLVMMSIL + +DIRS = TargetInfo + include $(LEVEL)/Makefile.common CompileCommonOpts := $(CompileCommonOpts) -Wno-format diff --git a/lib/Target/MSIL/TargetInfo/CMakeLists.txt b/lib/Target/MSIL/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..9f0c3a09341a9 --- /dev/null +++ b/lib/Target/MSIL/TargetInfo/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMSILInfo + MSILTargetInfo.cpp + ) + diff --git a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp new file mode 100644 index 0000000000000..dfd42814e51cc --- /dev/null +++ b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp @@ -0,0 +1,26 @@ +//===-- MSILTargetInfo.cpp - MSIL Target Implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MSILWriter.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheMSILTarget; + +static unsigned MSIL_TripleMatchQuality(const std::string &TT) { + // This class always works, but shouldn't be the default in most cases. + return 1; +} + +extern "C" void LLVMInitializeMSILTargetInfo() { + TargetRegistry::RegisterTarget(TheMSILTarget, "msil", + "MSIL backend", + &MSIL_TripleMatchQuality); +} diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile new file mode 100644 index 0000000000000..30b0950db0f75 --- /dev/null +++ b/lib/Target/MSIL/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/MSIL/TargetInfo/Makefile -----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMSILInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/AsmPrinter/CMakeLists.txt b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000000000..6e6688746463e --- /dev/null +++ b/lib/Target/MSP430/AsmPrinter/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMSP430AsmPrinter + MSP430AsmPrinter.cpp + ) +add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen) diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp new file mode 100644 index 0000000000000..852019febf5e8 --- /dev/null +++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp @@ -0,0 +1,281 @@ +//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the MSP430 assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "MSP430.h" +#include "MSP430InstrInfo.h" +#include "MSP430MCAsmInfo.h" +#include "MSP430TargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter { + public: + MSP430AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *MAI, bool V) + : AsmPrinter(O, TM, MAI, V) {} + + virtual const char *getPassName() const { + return "MSP430 Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int OpNum, + const char* Modifier = 0); + void printSrcMemOperand(const MachineInstr *MI, int OpNum, + const char* Modifier = 0); + void printCCOperand(const MachineInstr *MI, int OpNum); + void printInstruction(const MachineInstr *MI); // autogenerated. + static const char *getRegisterName(unsigned RegNo); + + void printMachineInstruction(const MachineInstr * MI); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode); + + void emitFunctionHeader(const MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &F); + + virtual void PrintGlobalVariable(const GlobalVariable *GV) { + // FIXME: No support for global variables? + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AsmPrinter::getAnalysisUsage(AU); + AU.setPreservesAll(); + } + }; +} // end of anonymous namespace + +#include "MSP430GenAsmWriter.inc" + + +void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + const Function *F = MF.getFunction(); + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + + unsigned FnAlign = MF.getAlignment(); + EmitAlignment(FnAlign, F); + + switch (F->getLinkage()) { + default: llvm_unreachable("Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + case Function::LinkerPrivateLinkage: + break; + case Function::ExternalLinkage: + O << "\t.globl\t" << CurrentFnName << '\n'; + break; + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + O << "\t.weak\t" << CurrentFnName << '\n'; + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + O << "\t.type\t" << CurrentFnName << ",@function\n" + << CurrentFnName << ":\n"; +} + +bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SetupMachineFunction(MF); + O << "\n\n"; + + // Print the 'header' of function + emitFunctionHeader(MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + EmitBasicBlockStart(I); + + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) + // Print the assembly for the instruction. + printMachineInstruction(II); + } + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; + + // We didn't modify anything + return false; +} + +void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + processDebugLoc(MI, true); + + // Call the autogenerated instruction printer routines. + printInstruction(MI); + + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + O << '\n'; + + processDebugLoc(MI, false); +} + +void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + const char* Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << getRegisterName(MO.getReg()); + return; + case MachineOperand::MO_Immediate: + if (!Modifier || strcmp(Modifier, "nohash")) + O << '#'; + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); + return; + case MachineOperand::MO_GlobalAddress: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + std::string Name = Mang->getMangledName(MO.getGlobal()); + uint64_t Offset = MO.getOffset(); + + O << (isMemOp ? '&' : '#'); + if (Offset) + O << '(' << Offset << '+'; + + O << Name; + if (Offset) + O << ')'; + + return; + } + case MachineOperand::MO_ExternalSymbol: { + bool isMemOp = Modifier && !strcmp(Modifier, "mem"); + std::string Name(MAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + + O << (isMemOp ? '&' : '#') << Name; + + return; + } + default: + llvm_unreachable("Not implemented yet!"); + } +} + +void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, + const char* Modifier) { + const MachineOperand &Base = MI->getOperand(OpNum); + const MachineOperand &Disp = MI->getOperand(OpNum+1); + + if (Base.isGlobal()) + printOperand(MI, OpNum, "mem"); + else if (Disp.isImm() && !Base.getReg()) + printOperand(MI, OpNum); + else if (Base.getReg()) { + if (Disp.getImm()) { + printOperand(MI, OpNum + 1, "nohash"); + O << '('; + printOperand(MI, OpNum); + O << ')'; + } else { + O << '@'; + printOperand(MI, OpNum); + } + } else + llvm_unreachable("Unsupported memory operand"); +} + +void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) { + unsigned CC = MI->getOperand(OpNum).getImm(); + + switch (CC) { + default: + llvm_unreachable("Unsupported CC code"); + break; + case MSP430::COND_E: + O << "eq"; + break; + case MSP430::COND_NE: + O << "ne"; + break; + case MSP430::COND_HS: + O << "hs"; + break; + case MSP430::COND_LO: + O << "lo"; + break; + case MSP430::COND_GE: + O << "ge"; + break; + case MSP430::COND_L: + O << 'l'; + break; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + + printOperand(MI, OpNo); + return false; +} + +bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) { + return true; // Unknown modifier. + } + printSrcMemOperand(MI, OpNo); + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeMSP430AsmPrinter() { + RegisterAsmPrinter X(TheMSP430Target); +} diff --git a/lib/Target/MSP430/AsmPrinter/Makefile b/lib/Target/MSP430/AsmPrinter/Makefile new file mode 100644 index 0000000000000..4f340c673358e --- /dev/null +++ b/lib/Target/MSP430/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMSP430AsmPrinter + +# Hack: we need to include 'main' MSP430 target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 67017733cd9cf..60e0bb1856c32 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -10,14 +10,14 @@ tablegen(MSP430GenDAGISel.inc -gen-dag-isel) tablegen(MSP430GenCallingConv.inc -gen-callingconv) tablegen(MSP430GenSubtarget.inc -gen-subtarget) -add_llvm_target(MSP430 - MSP430AsmPrinter.cpp - MSP430FrameInfo.cpp +add_llvm_target(MSP430CodeGen MSP430InstrInfo.cpp MSP430ISelDAGToDAG.cpp MSP430ISelLowering.cpp + MSP430MCAsmInfo.cpp MSP430RegisterInfo.cpp MSP430Subtarget.cpp - MSP430TargetAsmInfo.cpp MSP430TargetMachine.cpp ) + +target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG) diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h index fc13c9e875f1c..d9f5f8629541f 100644 --- a/lib/Target/MSP430/MSP430.h +++ b/lib/Target/MSP430/MSP430.h @@ -20,13 +20,13 @@ namespace llvm { class MSP430TargetMachine; class FunctionPass; - class raw_ostream; + class formatted_raw_ostream; FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel); - FunctionPass *createMSP430CodePrinterPass(raw_ostream &o, - MSP430TargetMachine &tm, - bool verbose); + + extern Target TheMSP430Target; + } // end namespace llvm; // Defines symbolic names for MSP430 registers. diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index bf49ec0bff462..4195a88f8de00 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -28,8 +28,14 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" + using namespace llvm; +STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); + /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine /// instructions for SelectionDAG operations. /// @@ -50,10 +56,15 @@ namespace { return "MSP430 DAG->DAG Pattern Instruction Selection"; } + virtual bool + SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector &OutOps); + // Include the pieces autogenerated from the target description. #include "MSP430GenDAGISel.inc" private: + void PreprocessForRMW(); SDNode *Select(SDValue Op); bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp); @@ -120,21 +131,155 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr, } +bool MSP430DAGToDAGISel:: +SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (!SelectAddr(Op, Op, Op0, Op1)) + return true; + break; + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; +} + +/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand +/// and move load below the TokenFactor. Replace store's chain operand with +/// load's chain result. +/// Shamelessly stolen from X86. +static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, + SDValue Store, SDValue TF) { + SmallVector Ops; + bool isRMW = false; + SDValue TF0, TF1, NewTF; + for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) + if (Load.getNode() == TF.getOperand(i).getNode()) { + TF0 = Load.getOperand(0); + Ops.push_back(TF0); + } else { + TF1 = TF.getOperand(i); + Ops.push_back(TF1); + if (LoadSDNode* LD = dyn_cast(TF1)) + isRMW = !LD->isVolatile(); + } + + if (isRMW && TF1.getOperand(0).getNode() == TF0.getNode()) + NewTF = TF0; + else + NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); + + SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, + Load.getOperand(1), + Load.getOperand(2)); + CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), + Store.getOperand(2), Store.getOperand(3)); +} + +/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The chain +/// produced by the load must only be used by the store's chain operand, +/// otherwise this may produce a cycle in the DAG. +/// Shamelessly stolen from X86. FIXME: Should we make this function common? +static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, + SDValue &Load) { + if (N.getOpcode() == ISD::BIT_CONVERT) + N = N.getOperand(0); + + LoadSDNode *LD = dyn_cast(N); + if (!LD || LD->isVolatile()) + return false; + if (LD->getAddressingMode() != ISD::UNINDEXED) + return false; + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) + return false; + + if (N.hasOneUse() && + LD->hasNUsesOfValue(1, 1) && + N.getOperand(1) == Address && + LD->isOperandOf(Chain.getNode())) { + Load = N; + return true; + } + return false; +} + +/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. +/// Shamelessly stolen from X86. +void MSP430DAGToDAGISel::PreprocessForRMW() { + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) { + if (!ISD::isNON_TRUNCStore(I)) + continue; + + SDValue Chain = I->getOperand(0); + if (Chain.getNode()->getOpcode() != ISD::TokenFactor) + continue; + + SDValue N1 = I->getOperand(1); // Value to store + SDValue N2 = I->getOperand(2); // Address of store + + if (!N1.hasOneUse()) + continue; + + bool RModW = false; + SDValue Load; + unsigned Opcode = N1.getNode()->getOpcode(); + switch (Opcode) { + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADDC: + case ISD::ADDE: { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + RModW = isRMWLoad(N10, Chain, N2, Load); + + if (!RModW && isRMWLoad(N11, Chain, N2, Load)) { + // Swap the operands, making the RMW load the first operand seems + // to help selection and prevent token chain loops. + N1 = CurDAG->UpdateNodeOperands(N1, N11, N10); + RModW = true; + } + break; + } + case ISD::SUB: + case ISD::SUBC: + case ISD::SUBE: { + SDValue N10 = N1.getOperand(0); + RModW = isRMWLoad(N10, Chain, N2, Load); + break; + } + } + + if (RModW) { + MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); + ++NumLoadMoved; + } + } +} /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void MSP430DAGToDAGISel::InstructionSelect() { + PreprocessForRMW(); + + DEBUG(errs() << "Selection DAG after RMW preprocessing:\n"); + DEBUG(CurDAG->dump()); + DEBUG(BB->dump()); // Codegen the basic block. -#ifndef NDEBUG - DOUT << "===== Instruction selection begins:\n"; - Indent = 0; -#endif + DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(Indent = 0); SelectRoot(*CurDAG); -#ifndef NDEBUG - DOUT << "===== Instruction selection ends:\n"; -#endif + DEBUG(errs() << "===== Instruction selection ends:\n"); CurDAG->RemoveDeadNodes(); } @@ -144,21 +289,17 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { DebugLoc dl = Op.getDebugLoc(); // Dump information about the Node being selected - #ifndef NDEBUG - DOUT << std::string(Indent, ' ') << "Selecting: "; + DEBUG(errs().indent(Indent) << "Selecting: "); DEBUG(Node->dump(CurDAG)); - DOUT << "\n"; - Indent += 2; - #endif + DEBUG(errs() << "\n"); + DEBUG(Indent += 2); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "== "; - DEBUG(Node->dump(CurDAG)); - DOUT << "\n"; - Indent -= 2; - #endif + DEBUG(errs().indent(Indent-2) << "== "; + Node->dump(CurDAG); + errs() << "\n"); + DEBUG(Indent -= 2); return NULL; } @@ -172,23 +313,21 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { if (Node->hasOneUse()) return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16, TFI, CurDAG->getTargetConstant(0, MVT::i16)); - return CurDAG->getTargetNode(MSP430::ADD16ri, dl, MVT::i16, - TFI, CurDAG->getTargetConstant(0, MVT::i16)); + return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16, + TFI, CurDAG->getTargetConstant(0, MVT::i16)); } } // Select the default instruction SDNode *ResNode = SelectCode(Op); - #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(errs() << std::string(Indent-2, ' ') << "=> "); if (ResNode == NULL || ResNode == Op.getNode()) DEBUG(Op.getNode()->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); - DOUT << "\n"; - Indent -= 2; - #endif + DEBUG(errs() << "\n"); + DEBUG(Indent -= 2); return ResNode; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 91a8663a632c3..b56f069b54de3 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -31,12 +31,16 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/VectorExtras.h" using namespace llvm; MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : - TargetLowering(tm), Subtarget(*tm.getSubtargetImpl()), TM(tm) { + TargetLowering(tm, new TargetLoweringObjectFileELF()), + Subtarget(*tm.getSubtargetImpl()), TM(tm) { // Set up the register classes. addRegisterClass(MVT::i8, MSP430::GR8RegisterClass); @@ -77,7 +81,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::ROTR, MVT::i8, Expand); setOperationAction(ISD::ROTL, MVT::i16, Expand); setOperationAction(ISD::ROTR, MVT::i16, Expand); - setOperationAction(ISD::RET, MVT::Other, Custom); setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -92,6 +95,24 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand); + + setOperationAction(ISD::CTTZ, MVT::i8, Expand); + setOperationAction(ISD::CTTZ, MVT::i16, Expand); + setOperationAction(ISD::CTLZ, MVT::i8, Expand); + setOperationAction(ISD::CTLZ, MVT::i16, Expand); + setOperationAction(ISD::CTPOP, MVT::i8, Expand); + setOperationAction(ISD::CTPOP, MVT::i16, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // FIXME: Implement efficiently multiplication by a constant setOperationAction(ISD::MUL, MVT::i16, Expand); @@ -110,19 +131,16 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); case ISD::SHL: // FALLTHROUGH case ISD::SRL: case ISD::SRA: return LowerShifts(Op, DAG); - case ISD::RET: return LowerRET(Op, DAG); - case ISD::CALL: return LowerCALL(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); default: - assert(0 && "unimplemented operand"); + llvm_unreachable("unimplemented operand"); return SDValue(); } } @@ -132,33 +150,85 @@ unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; } +//===----------------------------------------------------------------------===// +// MSP430 Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +TargetLowering::ConstraintType +MSP430TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return C_RegisterClass; + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::pair +MSP430TargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + case 'r': // GENERAL_REGS + if (VT == MVT::i8) + return std::make_pair(0U, MSP430::GR8RegisterClass); + + return std::make_pair(0U, MSP430::GR16RegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// #include "MSP430GenCallingConv.inc" -SDValue MSP430TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, - SelectionDAG &DAG) { - unsigned CC = cast(Op.getOperand(1))->getZExtValue(); - switch (CC) { +SDValue +MSP430TargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + switch (CallConv) { default: - assert(0 && "Unsupported calling convention"); + llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: - return LowerCCCArguments(Op, DAG); + return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } } -SDValue MSP430TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { - CallSDNode *TheCall = cast(Op.getNode()); - unsigned CallingConv = TheCall->getCallingConv(); - switch (CallingConv) { +SDValue +MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + switch (CallConv) { default: - assert(0 && "Unsupported calling convention"); + llvm_unreachable("Unsupported calling convention"); case CallingConv::Fast: case CallingConv::C: - return LowerCCCCallTo(Op, DAG, CallingConv); + return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, + Outs, Ins, dl, DAG, InVals); } } @@ -166,40 +236,46 @@ SDValue MSP430TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { /// generate load operations for arguments places on the stack. // FIXME: struct return stuff // FIXME: varargs -SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op, - SelectionDAG &DAG) { +SDValue +MSP430TargetLowering::LowerCCCArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SDValue Root = Op.getOperand(0); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - unsigned CC = MF.getFunction()->getCallingConv(); - DebugLoc dl = Op.getDebugLoc(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MSP430); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430); assert(!isVarArg && "Varargs not supported yet"); - SmallVector ArgValues; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isRegLoc()) { // Arguments passed in registers - MVT RegVT = VA.getLocVT(); - switch (RegVT.getSimpleVT()) { - default: - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << RegVT.getSimpleVT() - << "\n"; - abort(); + EVT RegVT = VA.getLocVT(); + switch (RegVT.getSimpleVT().SimpleTy) { + default: + { +#ifndef NDEBUG + errs() << "LowerFormalArguments Unhandled argument type: " + << RegVT.getSimpleVT().SimpleTy << "\n"; +#endif + llvm_unreachable(0); + } case MVT::i16: unsigned VReg = RegInfo.createVirtualRegister(MSP430::GR16RegisterClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); - SDValue ArgValue = DAG.getCopyFromReg(Root, dl, VReg, RegVT); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); // If this is an 8-bit value, it is really passed promoted to 16 // bits. Insert an assert[sz]ext to capture this, then truncate to the @@ -214,7 +290,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op, if (VA.getLocInfo() != CCValAssign::Full) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); - ArgValues.push_back(ArgValue); + InVals.push_back(ArgValue); } } else { // Sanity check @@ -222,8 +298,8 @@ SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op, // Load the argument to a virtual register unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; if (ObjSize > 2) { - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << VA.getLocVT().getSimpleVT() + errs() << "LowerFormalArguments Unhandled argument type: " + << VA.getLocVT().getSimpleVT().SimpleTy << "\n"; } // Create the frame index object for this incoming parameter... @@ -232,30 +308,29 @@ SDValue MSP430TargetLowering::LowerCCCArguments(SDValue Op, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); - ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN, - PseudoSourceValue::getFixedStack(FI), 0)); + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + PseudoSourceValue::getFixedStack(FI), 0)); } } - ArgValues.push_back(Root); - - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); + return Chain; } -SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { +SDValue +MSP430TargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + // CCValAssign - represent the assignment of the return value to a location SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - DebugLoc dl = Op.getDebugLoc(); // CCState - Info about the registers and stack slot. - CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); - // Analize return values of ISD::RET - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_MSP430); + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_MSP430); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. @@ -265,8 +340,6 @@ SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - // The chain is always operand #0 - SDValue Chain = Op.getOperand(0); SDValue Flag; // Copy the result values into the output registers. @@ -274,10 +347,8 @@ SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - // ISD::RET => ret chain, (regnum1,val1), ... - // So i*2+1 index only the regnums Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Op.getOperand(i*2+1), Flag); + Outs[i].Val, Flag); // Guarantee that all emitted copies are stuck together, // avoiding something bad. @@ -294,19 +365,21 @@ SDValue MSP430TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { /// LowerCCCCallTo - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: sret. -SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, - unsigned CC) { - CallSDNode *TheCall = cast(Op.getNode()); - SDValue Chain = TheCall->getChain(); - SDValue Callee = TheCall->getCallee(); - bool isVarArg = TheCall->isVarArg(); - DebugLoc dl = Op.getDebugLoc(); - +SDValue +MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl + &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(TheCall, CC_MSP430); + CCInfo.AnalyzeCallOperands(Outs, CC_MSP430); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -322,12 +395,11 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - // Arguments start after the 5 first operands of ISD::CALL - SDValue Arg = TheCall->getArg(i); + SDValue Arg = Outs[i].Val; // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); @@ -412,50 +484,43 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, // Handle result values, copying them out of physregs into vregs that we // return. - return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), - Op.getResNo()); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, + DAG, InVals); } -/// LowerCallResult - Lower the result values of an ISD::CALL into the -/// appropriate copies out of appropriate physical registers. This assumes that -/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call -/// being lowered. Returns a SDNode with the same number of values as the -/// ISD::CALL. -SDNode* +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +/// +SDValue MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallSDNode *TheCall, - unsigned CallingConv, - SelectionDAG &DAG) { - bool isVarArg = TheCall->isVarArg(); - DebugLoc dl = TheCall->getDebugLoc(); + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { // Assign locations to each value returned by this call. SmallVector RVLocs; - CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(TheCall, RetCC_MSP430); - SmallVector ResultVals; + CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); - ResultVals.push_back(Chain.getValue(0)); + InVals.push_back(Chain.getValue(0)); } - ResultVals.push_back(Chain); - - // Merge everything together with a MERGE_VALUES node. - return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()).getNode(); + return Chain; } SDValue MSP430TargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) { unsigned Opc = Op.getOpcode(); SDNode* N = Op.getNode(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); // We currently only lower shifts of constant argument. @@ -511,7 +576,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, unsigned &TargetCC, // FIXME: Handle jump negative someday TargetCC = MSP430::COND_INVALID; switch (CC) { - default: assert(0 && "Invalid integer condition!"); + default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: TargetCC = MSP430::COND_E; // aka COND_Z break; @@ -585,7 +650,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) { SDValue Val = Op.getOperand(0); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); assert(VT == MVT::i16 && "Only support i16 for now!"); @@ -616,7 +681,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { MachineBasicBlock* MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == MSP430::Select16 || @@ -646,6 +712,10 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addImm(MI->getOperand(3).getImm()); F->insert(I, copy0MBB); F->insert(I, copy1MBB); + // Inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + EM->insert(std::make_pair(*SI, copy1MBB)); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. copy1MBB->transferSuccessors(BB); diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 4a90a0eb26390..fdbc384f1df06 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -33,7 +33,7 @@ namespace llvm { /// Y = RRC X, rotate right via carry RRC, - /// CALL/TAILCALL - These operations represent an abstract call + /// CALL - These operations represent an abstract call /// instruction, which includes a bunch of information. CALL, @@ -77,10 +77,6 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); - SDValue LowerRET(SDValue Op, SelectionDAG &DAG); - SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG); SDValue LowerShifts(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); @@ -88,16 +84,58 @@ namespace llvm { SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG); - SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, - unsigned CC); - SDNode* LowerCallResult(SDValue Chain, SDValue InFlag, - CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG); + TargetLowering::ConstraintType + getConstraintType(const std::string &Constraint) const; + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *BB, + DenseMap *EM) const; private: + SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + SDValue LowerCCCArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals); + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); + const MSP430Subtarget &Subtarget; const MSP430TargetMachine &TM; }; diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 91112c3d732ff..37fbb6d9999bc 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -44,7 +45,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIdx).addImm(0) .addReg(SrcReg, getKillRegState(isKill)); else - assert(0 && "Cannot store this register to stack slot!"); + llvm_unreachable("Cannot store this register to stack slot!"); } void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -61,7 +62,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(MSP430::MOV8rm)) .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0); else - assert(0 && "Cannot store this register to stack slot!"); + llvm_unreachable("Cannot store this register to stack slot!"); } bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB, @@ -171,7 +172,7 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // Conditional branch. unsigned Count = 0; - assert(0 && "Implement conditional branches!"); + llvm_unreachable("Implement conditional branches!"); return Count; } diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 39c08e40be463..f7e0d2bad6382 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -155,7 +155,7 @@ let isCall = 1 in let Defs = [R12W, R13W, R14W, R15W, SRW], Uses = [SPW] in { def CALLi : Pseudo<(outs), (ins i16imm:$dst, variable_ops), - "call\t${dst:call}", [(MSP430call imm:$dst)]>; + "call\t$dst", [(MSP430call imm:$dst)]>; def CALLr : Pseudo<(outs), (ins GR16:$dst, variable_ops), "call\t$dst", [(MSP430call GR16:$dst)]>; def CALLm : Pseudo<(outs), (ins memsrc:$dst, variable_ops), @@ -243,6 +243,13 @@ def MOV16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), "mov.w\t{$src, $dst}", [(store GR16:$src, addr:$dst)]>; +def MOV8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "mov.b\t{$src, $dst}", + [(store (i8 (load addr:$src)), addr:$dst)]>; +def MOV16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "mov.w\t{$src, $dst}", + [(store (i16 (load addr:$src)), addr:$dst)]>; + //===----------------------------------------------------------------------===// // Arithmetic Instructions @@ -671,30 +678,26 @@ def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), let isTwoAddress = 0 in { def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), "bis.b\t{$src, $dst}", - [(store (or (load addr:$dst), GR8:$src), addr:$dst), - (implicit SRW)]>; + [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>; def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), "bis.w\t{$src, $dst}", - [(store (or (load addr:$dst), GR16:$src), addr:$dst), - (implicit SRW)]>; + [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>; def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), "bis.b\t{$src, $dst}", - [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst), - (implicit SRW)]>; + [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>; def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), "bis.w\t{$src, $dst}", - [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst), - (implicit SRW)]>; + [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>; def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), "bis.b\t{$src, $dst}", - [(store (or (load addr:$dst), (i8 (load addr:$src))), addr:$dst), - (implicit SRW)]>; + [(store (or (i8 (load addr:$dst)), + (i8 (load addr:$src))), addr:$dst)]>; def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), "bis.w\t{$src, $dst}", - [(store (or (load addr:$dst), (i16 (load addr:$src))), addr:$dst), - (implicit SRW)]>; + [(store (or (i16 (load addr:$dst)), + (i16 (load addr:$src))), addr:$dst)]>; } } // isTwoAddress = 1 @@ -722,59 +725,6 @@ def CMP16im : Pseudo<(outs), (ins i16imm:$src1, memsrc:$src2), "cmp.w\t{$src1, $src2}", [(MSP430cmp (i16 imm:$src1), (load addr:$src2)), (implicit SRW)]>; -// FIXME: imm is allowed only on src operand, not on dst. - -//def CMP8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2), -// "cmp.b\t{$src1, $src2}", -// [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>; -//def CMP16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2), -// "cmp.w\t{$src1, $src2}", -// [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>; - -//def CMP8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2), -// "cmp.b\t{$src1, $src2}", -// [(MSP430cmp (load addr:$src1), (i8 imm:$src2)), (implicit SRW)]>; -//def CMP16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2), -// "cmp.w\t{$src1, $src2}", -// [(MSP430cmp (load addr:$src1), (i16 imm:$src2)), (implicit SRW)]>; - - -// Imm 0, +1, +2, +4, +8 are encoded via constant generator registers. -// That's why we can use them as dest operands. -// We don't define new class for them, since they would need special encoding -// in the future. - -def CMP8ri0 : Pseudo<(outs), (ins GR8:$src1), - "cmp.b\t{$src1, #0}", - [(MSP430cmp GR8:$src1, 0), (implicit SRW)]>; -def CMP16ri0: Pseudo<(outs), (ins GR16:$src1), - "cmp.w\t{$src1, #0}", - [(MSP430cmp GR16:$src1, 0), (implicit SRW)]>; -def CMP8ri1 : Pseudo<(outs), (ins GR8:$src1), - "cmp.b\t{$src1, #1}", - [(MSP430cmp GR8:$src1, 1), (implicit SRW)]>; -def CMP16ri1: Pseudo<(outs), (ins GR16:$src1), - "cmp.w\t{$src1, #1}", - [(MSP430cmp GR16:$src1, 1), (implicit SRW)]>; -def CMP8ri2 : Pseudo<(outs), (ins GR8:$src1), - "cmp.b\t{$src1, #2}", - [(MSP430cmp GR8:$src1, 2), (implicit SRW)]>; -def CMP16ri2: Pseudo<(outs), (ins GR16:$src1), - "cmp.w\t{$src1, #2}", - [(MSP430cmp GR16:$src1, 2), (implicit SRW)]>; -def CMP8ri4 : Pseudo<(outs), (ins GR8:$src1), - "cmp.b\t{$src1, #4}", - [(MSP430cmp GR8:$src1, 4), (implicit SRW)]>; -def CMP16ri4: Pseudo<(outs), (ins GR16:$src1), - "cmp.w\t{$src1, #4}", - [(MSP430cmp GR16:$src1, 4), (implicit SRW)]>; -def CMP8ri8 : Pseudo<(outs), (ins GR8:$src1), - "cmp.b\t{$src1, #8}", - [(MSP430cmp GR8:$src1, 8), (implicit SRW)]>; -def CMP16ri8: Pseudo<(outs), (ins GR16:$src1), - "cmp.w\t{$src1, #8}", - [(MSP430cmp GR16:$src1, 8), (implicit SRW)]>; - def CMP8rm : Pseudo<(outs), (ins GR8:$src1, memsrc:$src2), "cmp.b\t{$src1, $src2}", [(MSP430cmp GR8:$src1, (load addr:$src2)), (implicit SRW)]>; diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp new file mode 100644 index 0000000000000..069313e2ef0b1 --- /dev/null +++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp @@ -0,0 +1,20 @@ +//===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the MSP430MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "MSP430MCAsmInfo.h" +using namespace llvm; + +MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) { + AlignmentIsInBytes = false; + AllowNameToStartWithDigit = true; +} diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MSP430MCAsmInfo.h new file mode 100644 index 0000000000000..8318029ae78d4 --- /dev/null +++ b/lib/Target/MSP430/MSP430MCAsmInfo.h @@ -0,0 +1,28 @@ +//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MSP430MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430TARGETASMINFO_H +#define MSP430TARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + struct MSP430MCAsmInfo : public MCAsmInfo { + explicit MSP430MCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index d40bac73eab87..1a5893e4bfbc2 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -45,7 +46,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -const TargetRegisterClass* const* +const TargetRegisterClass *const * MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { static const TargetRegisterClass * const CalleeSavedRegClasses[] = { &MSP430::GR16RegClass, &MSP430::GR16RegClass, @@ -58,8 +59,7 @@ MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { return CalleeSavedRegClasses; } -BitVector -MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { +BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // Mark 4 special registers as reserved. @@ -75,7 +75,8 @@ MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -const TargetRegisterClass* MSP430RegisterInfo::getPointerRegClass() const { +const TargetRegisterClass * +MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const { return &MSP430::GR16RegClass; } @@ -146,9 +147,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void +unsigned MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -186,7 +188,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(BasePtr, false); if (Offset == 0) - return; + return 0; // We need to materialize the offset via add instruction. unsigned DstReg = MI.getOperand(0).getReg(); @@ -197,11 +199,12 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg) .addReg(DstReg).addImm(Offset); - return; + return 0; } MI.getOperand(i).ChangeToRegister(BasePtr, false); MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; } void @@ -291,7 +294,7 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF, switch (RetOpcode) { case MSP430::RET: break; // These are ok default: - assert(0 && "Can only insert epilog into returning blocks"); + llvm_unreachable("Can only insert epilog into returning blocks"); } // Get the number of bytes to allocate from the FrameInfo @@ -310,7 +313,6 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF, NumBytes = StackSize - CSSize; // Skip the callee-saved pop instructions. - MachineBasicBlock::iterator LastCSPop = MBBI; while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); @@ -327,7 +329,16 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF, // mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes); if (MFI->hasVarSizedObjects()) { - assert(0 && "Not implemented yet!"); + BuildMI(MBB, MBBI, DL, + TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW); + if (CSSize) { + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, + TII.get(MSP430::SUB16ri), MSP430::SPW) + .addReg(MSP430::SPW).addImm(CSSize); + // The SRW implicit def is dead. + MI->getOperand(3).setIsDead(); + } } else { // adjust stack pointer back: SPW += numbytes if (NumBytes) { @@ -349,7 +360,7 @@ unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const { } int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - assert(0 && "Not implemented yet!"); + llvm_unreachable("Not implemented yet!"); return 0; } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index a210e36e001d9..5f3a216866b79 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -40,7 +40,7 @@ public: getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; BitVector getReservedRegs(const MachineFunction &MF) const; - const TargetRegisterClass* getPointerRegClass() const; + const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(MachineFunction &MF) const; @@ -49,8 +49,9 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index ef9e10339bc3f..1346cb9a04dc8 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -14,12 +14,10 @@ #include "MSP430Subtarget.h" #include "MSP430.h" #include "MSP430GenSubtarget.inc" -#include "llvm/Target/TargetMachine.h" using namespace llvm; -MSP430Subtarget::MSP430Subtarget(const TargetMachine &TM, const Module &M, - const std::string &FS) { +MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &FS) { std::string CPU = "generic"; // Parse features string. diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index 96c8108b71bc8..1070544f0773e 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -19,17 +19,14 @@ #include namespace llvm { -class Module; -class TargetMachine; class MSP430Subtarget : public TargetSubtarget { bool ExtendedInsts; public: /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - MSP430Subtarget(const TargetMachine &TM, const Module &M, - const std::string &FS); + MSP430Subtarget(const std::string &TT, const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index dd09d43da984d..5e21f8ea29ef8 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -12,43 +12,30 @@ //===----------------------------------------------------------------------===// #include "MSP430.h" -#include "MSP430TargetAsmInfo.h" +#include "MSP430MCAsmInfo.h" #include "MSP430TargetMachine.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetMachineRegistry.h" - +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -/// MSP430TargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int MSP430TargetMachineModule; -int MSP430TargetMachineModule = 0; - - -// Register the targets -static RegisterTarget -X("msp430", "MSP430 [experimental]"); - -// Force static initialization. -extern "C" void LLVMInitializeMSP430Target() { } +extern "C" void LLVMInitializeMSP430Target() { + // Register the target. + RegisterTargetMachine X(TheMSP430Target); + RegisterAsmInfo Z(TheMSP430Target); +} -MSP430TargetMachine::MSP430TargetMachine(const Module &M, +MSP430TargetMachine::MSP430TargetMachine(const Target &T, + const std::string &TT, const std::string &FS) : - Subtarget(*this, M, FS), + LLVMTargetMachine(T, TT), + Subtarget(TT, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } -const TargetAsmInfo *MSP430TargetMachine::createTargetAsmInfo() const { - return new MSP430TargetAsmInfo(*this); -} bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { @@ -57,23 +44,3 @@ bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM, return false; } -bool MSP430TargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { - // Output assembly language. - PM.add(createMSP430CodePrinterPass(Out, *this, Verbose)); - return false; -} - -unsigned MSP430TargetMachine::getModuleMatchQuality(const Module &M) { - std::string TT = M.getTargetTriple(); - - // We strongly match msp430 - if (TT.size() >= 6 && TT[0] == 'm' && TT[1] == 's' && TT[2] == 'p' && - TT[3] == '4' && TT[4] == '3' && TT[5] == '0') - return 20; - - return 0; -} - diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index d9ffa2b5ac8ff..d38614018c847 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -37,11 +37,9 @@ class MSP430TargetMachine : public LLVMTargetMachine { // any MSP430 specific FrameInfo class. TargetFrameInfo FrameInfo; -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - public: - MSP430TargetMachine(const Module &M, const std::string &FS); + MSP430TargetMachine(const Target &T, const std::string &TT, + const std::string &FS); virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -57,10 +55,6 @@ public: } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, bool Verbose, - raw_ostream &Out); - static unsigned getModuleMatchQuality(const Module &M); }; // MSP430TargetMachine. } // end namespace llvm diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile index 45cb3aa45b85a..4b18bc9ab428d 100644 --- a/lib/Target/MSP430/Makefile +++ b/lib/Target/MSP430/Makefile @@ -7,7 +7,7 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../.. -LIBRARYNAME = LLVMMSP430 +LIBRARYNAME = LLVMMSP430CodeGen TARGET = MSP430 # Make sure that tblgen is run, first thing. @@ -17,5 +17,7 @@ BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \ MSP430GenDAGISel.inc MSP430GenCallingConv.inc \ MSP430GenSubtarget.inc +DIRS = AsmPrinter TargetInfo + include $(LEVEL)/Makefile.common diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..1d408d0cb5be0 --- /dev/null +++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMSP430Info + MSP430TargetInfo.cpp + ) + +add_dependencies(LLVMMSP430Info MSP430Table_gen) diff --git a/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp new file mode 100644 index 0000000000000..f9ca5c49c9790 --- /dev/null +++ b/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MSP430.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheMSP430Target; + +extern "C" void LLVMInitializeMSP430TargetInfo() { + RegisterTarget + X(TheMSP430Target, "msp430", "MSP430 [experimental]"); +} diff --git a/lib/Target/MSP430/TargetInfo/Makefile b/lib/Target/MSP430/TargetInfo/Makefile new file mode 100644 index 0000000000000..abb08f2548ee1 --- /dev/null +++ b/lib/Target/MSP430/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/MSP430/TargetInfo/Makefile ---------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMSP430Info + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt index 197cc2921edbe..56c68a6b41600 100644 --- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt +++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt @@ -4,6 +4,6 @@ include_directories( ) add_llvm_library(LLVMMipsAsmPrinter - MipsAsmPrinter.cpp + MipsAsmPrinter.cpp ) add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen) \ No newline at end of file diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index cb4047988eb91..ccf9ee518d331 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -22,24 +22,28 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/MDNode.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -50,8 +54,8 @@ namespace { class VISIBILITY_HIDDEN MipsAsmPrinter : public AsmPrinter { const MipsSubtarget *Subtarget; public: - explicit MipsAsmPrinter(raw_ostream &O, MipsTargetMachine &TM, - const TargetAsmInfo *T, bool V) + explicit MipsAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) : AsmPrinter(O, TM, T, V) { Subtarget = &TM.getSubtarget(); } @@ -68,34 +72,25 @@ namespace { const char *Modifier = 0); void printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier = 0); - void printModuleLevelGV(const GlobalVariable* GVar); + void PrintGlobalVariable(const GlobalVariable *GVar); void printSavedRegsBitmask(MachineFunction &MF); void printHex32(unsigned int Value); - const char *emitCurrentABIString(void); + const char *emitCurrentABIString(); void emitFunctionStart(MachineFunction &MF); void emitFunctionEnd(MachineFunction &MF); void emitFrameDirective(MachineFunction &MF); - bool printInstruction(const MachineInstr *MI); // autogenerated. + void printInstruction(const MachineInstr *MI); // autogenerated. + static const char *getRegisterName(unsigned RegNo); + bool runOnMachineFunction(MachineFunction &F); - bool doInitialization(Module &M); - bool doFinalization(Module &M); + void EmitStartOfAsmFile(Module &M); }; } // end of anonymous namespace #include "MipsGenAsmWriter.inc" -/// createMipsCodePrinterPass - Returns a pass that prints the MIPS -/// assembly code for a MachineFunction to the given output stream, -/// using the given target machine description. This should work -/// regardless of whether the function is in SSA form. -FunctionPass *llvm::createMipsCodePrinterPass(raw_ostream &o, - MipsTargetMachine &tm, - bool verbose) { - return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); -} - //===----------------------------------------------------------------------===// // // Mips Asm Directives @@ -186,9 +181,7 @@ printHex32(unsigned int Value) //===----------------------------------------------------------------------===// /// Frame Directive -void MipsAsmPrinter:: -emitFrameDirective(MachineFunction &MF) -{ +void MipsAsmPrinter::emitFrameDirective(MachineFunction &MF) { const TargetRegisterInfo &RI = *TM.getRegisterInfo(); unsigned stackReg = RI.getFrameRegister(MF); @@ -196,16 +189,14 @@ emitFrameDirective(MachineFunction &MF) unsigned stackSize = MF.getFrameInfo()->getStackSize(); - O << "\t.frame\t" << '$' << LowercaseString(RI.get(stackReg).AsmName) + O << "\t.frame\t" << '$' << LowercaseString(getRegisterName(stackReg)) << ',' << stackSize << ',' - << '$' << LowercaseString(RI.get(returnReg).AsmName) + << '$' << LowercaseString(getRegisterName(returnReg)) << '\n'; } /// Emit Set directives. -const char * MipsAsmPrinter:: -emitCurrentABIString(void) -{ +const char *MipsAsmPrinter::emitCurrentABIString() { switch(Subtarget->getTargetABI()) { case MipsSubtarget::O32: return "abi32"; case MipsSubtarget::O64: return "abiO64"; @@ -215,17 +206,15 @@ emitCurrentABIString(void) default: break; } - assert(0 && "Unknown Mips ABI"); + llvm_unreachable("Unknown Mips ABI"); return NULL; } /// Emit the directives used by GAS on the start of functions -void MipsAsmPrinter:: -emitFunctionStart(MachineFunction &MF) -{ +void MipsAsmPrinter::emitFunctionStart(MachineFunction &MF) { // Print out the label for the function. const Function *F = MF.getFunction(); - SwitchToSection(TAI->SectionForGlobal(F)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); // 2 bits aligned EmitAlignment(MF.getAlignment(), F); @@ -235,7 +224,7 @@ emitFunctionStart(MachineFunction &MF) printVisibility(CurrentFnName, F->getVisibility()); - if ((TAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux()) + if ((MAI->hasDotTypeDotSizeDirective()) && Subtarget->isLinux()) O << "\t.type\t" << CurrentFnName << ", @function\n"; O << CurrentFnName << ":\n"; @@ -247,9 +236,7 @@ emitFunctionStart(MachineFunction &MF) } /// Emit the directives used by GAS on the end of functions -void MipsAsmPrinter:: -emitFunctionEnd(MachineFunction &MF) -{ +void MipsAsmPrinter::emitFunctionEnd(MachineFunction &MF) { // There are instruction for this macros, but they must // always be at the function end, and we can't emit and // break with BB logic. @@ -257,15 +244,13 @@ emitFunctionEnd(MachineFunction &MF) O << "\t.set\treorder\n"; O << "\t.end\t" << CurrentFnName << '\n'; - if (TAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux()) + if (MAI->hasDotTypeDotSizeDirective() && !Subtarget->isLinux()) O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; } /// runOnMachineFunction - This uses the printMachineInstruction() /// method to print assembly for each instruction. -bool MipsAsmPrinter:: -runOnMachineFunction(MachineFunction &MF) -{ +bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; SetupMachineFunction(MF); @@ -287,14 +272,21 @@ runOnMachineFunction(MachineFunction &MF) // Print a label for the basic block. if (I != MF.begin()) { - printBasicBlockLabel(I, true, true); - O << '\n'; + EmitBasicBlockStart(I); } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { + processDebugLoc(II, true); + // Print the assembly for the instruction. printInstruction(II); + + if (VerboseAsm && !II->getDebugLoc().isUnknown()) + EmitComments(*II); + O << '\n'; + + processDebugLoc(II, false); ++EmittedInsts; } @@ -310,10 +302,8 @@ runOnMachineFunction(MachineFunction &MF) } // Print out an operand for an inline asm expression. -bool MipsAsmPrinter:: -PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode) -{ +bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant,const char *ExtraCode){ // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. @@ -322,57 +312,33 @@ PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } -void MipsAsmPrinter:: -printOperand(const MachineInstr *MI, int opNum) -{ +void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand(opNum); - const TargetRegisterInfo &RI = *TM.getRegisterInfo(); bool closeP = false; - bool isPIC = (TM.getRelocationModel() == Reloc::PIC_); - bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large); - - // %hi and %lo used on mips gas to load global addresses on - // static code. %got is used to load global addresses when - // using PIC_. %call16 is used to load direct call targets - // on PIC_ and small code size. %call_lo and %call_hi load - // direct call targets on PIC_ and large code size. - if (MI->getOpcode() == Mips::LUi && !MO.isReg() && !MO.isImm()) { - if ((isPIC) && (isCodeLarge)) - O << "%call_hi("; - else - O << "%hi("; + + if (MO.getTargetFlags()) closeP = true; - } else if ((MI->getOpcode() == Mips::ADDiu) && !MO.isReg() && !MO.isImm()) { - const MachineOperand &firstMO = MI->getOperand(opNum-1); - if (firstMO.getReg() == Mips::GP) - O << "%gp_rel("; + + switch(MO.getTargetFlags()) { + case MipsII::MO_GPREL: O << "%gp_rel("; break; + case MipsII::MO_GOT_CALL: O << "%call16("; break; + case MipsII::MO_GOT: + if (MI->getOpcode() == Mips::LW) + O << "%got("; else O << "%lo("; - closeP = true; - } else if ((isPIC) && (MI->getOpcode() == Mips::LW) && - (!MO.isReg()) && (!MO.isImm())) { - const MachineOperand &firstMO = MI->getOperand(opNum-1); - const MachineOperand &lastMO = MI->getOperand(opNum+1); - if ((firstMO.isReg()) && (lastMO.isReg())) { - if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() == Mips::GP) - && (!isCodeLarge)) - O << "%call16("; - else if ((firstMO.getReg() != Mips::T9) && (lastMO.getReg() == Mips::GP)) - O << "%got("; - else if ((firstMO.getReg() == Mips::T9) && (lastMO.getReg() != Mips::GP) - && (isCodeLarge)) - O << "%call_lo("; - closeP = true; - } + break; + case MipsII::MO_ABS_HILO: + if (MI->getOpcode() == Mips::LUi) + O << "%hi("; + else + O << "%lo("; + break; } - - switch (MO.getType()) - { + + switch (MO.getType()) { case MachineOperand::MO_Register: - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - O << '$' << LowercaseString (RI.get(MO.getReg()).AsmName); - else - O << '$' << MO.getReg(); + O << '$' << LowercaseString(getRegisterName(MO.getReg())); break; case MachineOperand::MO_Immediate: @@ -380,14 +346,11 @@ printOperand(const MachineInstr *MI, int opNum) break; case MachineOperand::MO_MachineBasicBlock: - printBasicBlockLabel(MO.getMBB()); + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: - { - const GlobalValue *GV = MO.getGlobal(); - O << Mang->getValueName(GV); - } + O << Mang->getMangledName(MO.getGlobal()); break; case MachineOperand::MO_ExternalSymbol: @@ -395,25 +358,23 @@ printOperand(const MachineInstr *MI, int opNum) break; case MachineOperand::MO_JumpTableIndex: - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); break; case MachineOperand::MO_ConstantPoolIndex: - O << TAI->getPrivateGlobalPrefix() << "CPI" + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); break; default: - O << ""; abort (); break; + llvm_unreachable(""); } if (closeP) O << ")"; } -void MipsAsmPrinter:: -printUnsignedImm(const MachineInstr *MI, int opNum) -{ +void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand(opNum); if (MO.getType() == MachineOperand::MO_Immediate) O << (unsigned short int)MO.getImm(); @@ -422,8 +383,7 @@ printUnsignedImm(const MachineInstr *MI, int opNum) } void MipsAsmPrinter:: -printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) -{ +printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) { // when using stack locations for not load/store instructions // print the same way as all normal 3 operand instructions. if (Modifier && !strcmp(Modifier, "stackloc")) { @@ -443,17 +403,14 @@ printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) } void MipsAsmPrinter:: -printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) -{ +printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) { const MachineOperand& MO = MI->getOperand(opNum); O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); } -bool MipsAsmPrinter:: -doInitialization(Module &M) -{ - Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix()); - +void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { + // FIXME: Use SwitchSection. + // Tell the assembler which ABI we are using O << "\t.section .mdebug." << emitCurrentABIString() << '\n'; @@ -464,12 +421,9 @@ doInitialization(Module &M) // return to previous section O << "\t.previous" << '\n'; - - return false; // success } -void MipsAsmPrinter:: -printModuleLevelGV(const GlobalVariable* GVar) { +void MipsAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) { const TargetData *TD = TM.getTargetData(); if (!GVar->hasInitializer()) @@ -480,10 +434,8 @@ printModuleLevelGV(const GlobalVariable* GVar) { return; O << "\n\n"; - std::string name = Mang->getValueName(GVar); + std::string name = Mang->getMangledName(GVar); Constant *C = GVar->getInitializer(); - if (isa(C) || isa(C)) - return; const Type *CTy = C->getType(); unsigned Size = TD->getTypeAllocSize(CTy); const ConstantArray *CVA = dyn_cast(C); @@ -503,7 +455,8 @@ printModuleLevelGV(const GlobalVariable* GVar) { printVisibility(name, GVar->getVisibility()); - SwitchToSection(TAI->SectionForGlobal(GVar)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang, + TM)); if (C->isNullValue() && !GVar->hasSection()) { if (!GVar->isThreadLocal() && @@ -513,8 +466,8 @@ printModuleLevelGV(const GlobalVariable* GVar) { if (GVar->hasLocalLinkage()) O << "\t.local\t" << name << '\n'; - O << TAI->getCOMMDirective() << name << ',' << Size; - if (TAI->getCOMMDirectiveTakesAlignment()) + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) O << ',' << (1 << Align); O << '\n'; @@ -536,29 +489,27 @@ printModuleLevelGV(const GlobalVariable* GVar) { // or something. For now, just emit them as external. case GlobalValue::ExternalLinkage: // If external or appending, declare as a global symbol - O << TAI->getGlobalDirective() << name << '\n'; + O << MAI->getGlobalDirective() << name << '\n'; // Fall Through case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: case GlobalValue::InternalLinkage: if (CVA && CVA->isCString()) printSizeAndType = false; break; case GlobalValue::GhostLinkage: - cerr << "Should not have any unmaterialized functions!\n"; - abort(); + llvm_unreachable("Should not have any unmaterialized functions!"); case GlobalValue::DLLImportLinkage: - cerr << "DLLImport linkage is not supported by this target!\n"; - abort(); + llvm_unreachable("DLLImport linkage is not supported by this target!"); case GlobalValue::DLLExportLinkage: - cerr << "DLLExport linkage is not supported by this target!\n"; - abort(); + llvm_unreachable("DLLExport linkage is not supported by this target!"); default: - assert(0 && "Unknown linkage type!"); + llvm_unreachable("Unknown linkage type!"); } EmitAlignment(Align, GVar); - if (TAI->hasDotTypeDotSizeDirective() && printSizeAndType) { + if (MAI->hasDotTypeDotSizeDirective() && printSizeAndType) { O << "\t.type " << name << ",@object\n"; O << "\t.size " << name << ',' << Size << '\n'; } @@ -567,26 +518,9 @@ printModuleLevelGV(const GlobalVariable* GVar) { EmitGlobalConstant(C); } -bool MipsAsmPrinter:: -doFinalization(Module &M) -{ - // Print out module-level global variables here. - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) - printModuleLevelGV(I); - - O << '\n'; - - return AsmPrinter::doFinalization(M); -} - -namespace { - static struct Register { - Register() { - MipsTargetMachine::registerAsmPrinter(createMipsCodePrinterPass); - } - } Registrator; -} // Force static initialization. -extern "C" void LLVMInitializeMipsAsmPrinter() { } +extern "C" void LLVMInitializeMipsAsmPrinter() { + RegisterAsmPrinter X(TheMipsTarget); + RegisterAsmPrinter Y(TheMipselTarget); +} diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index d27e6f174d088..0e3bf5a96d408 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -15,10 +15,11 @@ add_llvm_target(MipsCodeGen MipsInstrInfo.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp + MipsMCAsmInfo.cpp MipsRegisterInfo.cpp MipsSubtarget.cpp - MipsTargetAsmInfo.cpp MipsTargetMachine.cpp + MipsTargetObjectFile.cpp ) target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 48ab5f994704c..078034532d504 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -17,7 +17,7 @@ BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtarget.inc -DIRS = AsmPrinter +DIRS = AsmPrinter TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 9b22a91b1609d..a9ab050d6f0dc 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -21,13 +21,14 @@ namespace llvm { class MipsTargetMachine; class FunctionPass; class MachineCodeEmitter; - class raw_ostream; + class formatted_raw_ostream; FunctionPass *createMipsISelDag(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); - FunctionPass *createMipsCodePrinterPass(raw_ostream &OS, - MipsTargetMachine &TM, - bool Verbose); + + extern Target TheMipsTarget; + extern Target TheMipselTarget; + } // end namespace llvm; // Defines symbolic names for Mips registers. This defines a mapping from diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 53de1bbea66e4..cc20dd7b4ff63 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -32,6 +32,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -106,22 +108,16 @@ private: /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void MipsDAGToDAGISel:: -InstructionSelect() -{ +void MipsDAGToDAGISel::InstructionSelect() { DEBUG(BB->dump()); // Codegen the basic block. - #ifndef NDEBUG - DOUT << "===== Instruction selection begins:\n"; - Indent = 0; - #endif + DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(Indent = 0); // Select target instructions for the DAG. SelectRoot(*CurDAG); - #ifndef NDEBUG - DOUT << "===== Instruction selection ends:\n"; - #endif + DEBUG(errs() << "===== Instruction selection ends:\n"); CurDAG->RemoveDeadNodes(); } @@ -129,7 +125,6 @@ InstructionSelect() /// getGlobalBaseReg - Output the instructions required to put the /// GOT address into a register. SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { - MachineFunction *MF = BB->getParent(); unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } @@ -186,29 +181,23 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) /// Select instructions not customized! Used for /// expanded, promoted and normal instructions -SDNode* MipsDAGToDAGISel:: -Select(SDValue N) -{ +SDNode* MipsDAGToDAGISel::Select(SDValue N) { SDNode *Node = N.getNode(); unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected - #ifndef NDEBUG - DOUT << std::string(Indent, ' ') << "Selecting: "; - DEBUG(Node->dump(CurDAG)); - DOUT << "\n"; - Indent += 2; - #endif + DEBUG(errs().indent(Indent) << "Selecting: "; + Node->dump(CurDAG); + errs() << "\n"); + DEBUG(Indent += 2); // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "== "; - DEBUG(Node->dump(CurDAG)); - DOUT << "\n"; - Indent -= 2; - #endif + DEBUG(errs().indent(Indent-2) << "== "; + Node->dump(CurDAG); + errs() << "\n"); + DEBUG(Indent -= 2); return NULL; } @@ -242,10 +231,10 @@ Select(SDValue N) SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); - MVT VT = LHS.getValueType(); - SDNode *Carry = CurDAG->getTargetNode(Mips::SLTu, dl, VT, Ops, 2); - SDNode *AddCarry = CurDAG->getTargetNode(Mips::ADDu, dl, VT, - SDValue(Carry,0), RHS); + EVT VT = LHS.getValueType(); + SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2); + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, + SDValue(Carry,0), RHS); return CurDAG->SelectNodeTo(N.getNode(), MOp, VT, MVT::Flag, LHS, SDValue(AddCarry,0)); @@ -265,13 +254,13 @@ Select(SDValue N) else Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV); - SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2); + SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2); SDValue InFlag = SDValue(Node, 0); - SDNode *Lo = CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32, - MVT::Flag, InFlag); + SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, + MVT::Flag, InFlag); InFlag = SDValue(Lo,1); - SDNode *Hi = CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag); + SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); if (!N.getValue(0).use_empty()) ReplaceUses(N.getValue(0), SDValue(Lo,0)); @@ -290,15 +279,15 @@ Select(SDValue N) SDValue MulOp2 = Node->getOperand(1); unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - SDNode *MulNode = CurDAG->getTargetNode(MulOp, dl, - MVT::Flag, MulOp1, MulOp2); + SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, + MVT::Flag, MulOp1, MulOp2); SDValue InFlag = SDValue(MulNode, 0); if (MulOp == ISD::MUL) - return CurDAG->getTargetNode(Mips::MFLO, dl, MVT::i32, InFlag); + return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag); else - return CurDAG->getTargetNode(Mips::MFHI, dl, MVT::i32, InFlag); + return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); } /// Div/Rem operations @@ -317,10 +306,10 @@ Select(SDValue N) Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu); MOp = Mips::MFHI; } - SDNode *Node = CurDAG->getTargetNode(Op, dl, MVT::Flag, Op1, Op2); + SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Flag, Op1, Op2); SDValue InFlag = SDValue(Node, 0); - return CurDAG->getTargetNode(MOp, dl, MVT::i32, InFlag); + return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag); } // Get target GOT address. @@ -333,7 +322,6 @@ Select(SDValue N) /// be loaded with 3 instructions. case MipsISD::JmpLink: { if (TM.getRelocationModel() == Reloc::PIC_) { - //bool isCodeLarge = (TM.getCodeModel() == CodeModel::Large); SDValue Chain = Node->getOperand(0); SDValue Callee = Node->getOperand(1); SDValue T9Reg = CurDAG->getRegister(Mips::T9, MVT::i32); @@ -347,7 +335,7 @@ Select(SDValue N) // Use load to get GOT target SDValue Ops[] = { Callee, GPReg, Chain }; - SDValue Load = SDValue(CurDAG->getTargetNode(Mips::LW, dl, MVT::i32, + SDValue Load = SDValue(CurDAG->getMachineNode(Mips::LW, dl, MVT::i32, MVT::Other, Ops, 3), 0); Chain = Load.getValue(1); @@ -358,7 +346,7 @@ Select(SDValue N) Chain = CurDAG->getCopyToReg(Chain, dl, T9Reg, Callee, InFlag); // Emit Jump and Link Register - SDNode *ResNode = CurDAG->getTargetNode(Mips::JALR, dl, MVT::Other, + SDNode *ResNode = CurDAG->getMachineNode(Mips::JALR, dl, MVT::Other, MVT::Flag, T9Reg, Chain); Chain = SDValue(ResNode, 0); InFlag = SDValue(ResNode, 1); @@ -372,15 +360,13 @@ Select(SDValue N) // Select the default instruction SDNode *ResNode = SelectCode(N); - #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; + DEBUG(errs().indent(Indent-2) << "=> "); if (ResNode == NULL || ResNode == N.getNode()) DEBUG(N.getNode()->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); - DOUT << "\n"; - Indent -= 2; - #endif + DEBUG(errs() << "\n"); + DEBUG(Indent -= 2); return ResNode; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 3d2e2b7a773d2..ab8790ad7dbd4 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-lower" - #include "MipsISelLowering.h" #include "MipsMachineFunction.h" #include "MipsTargetMachine.h" +#include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -31,13 +31,11 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -const char *MipsTargetLowering:: -getTargetNodeName(unsigned Opcode) const -{ - switch (Opcode) - { +const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { case MipsISD::JmpLink : return "MipsISD::JmpLink"; case MipsISD::Hi : return "MipsISD::Hi"; case MipsISD::Lo : return "MipsISD::Lo"; @@ -54,8 +52,8 @@ getTargetNodeName(unsigned Opcode) const } MipsTargetLowering:: -MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) -{ +MipsTargetLowering(MipsTargetMachine &TM) + : TargetLowering(TM, new MipsTargetObjectFile()) { Subtarget = &TM.getSubtarget(); // Mips does not have i1 type, so use i32 for @@ -82,6 +80,10 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + // MIPS doesn't have extending float->double load/store + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // Used by legalize types to correctly generate the setcc result. // Without this, every float setcc comes with a AND/OR with the result, // we don't want this, since the fpcmp result goes to a flag register, @@ -91,7 +93,6 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - setOperationAction(ISD::RET, MVT::Other, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); @@ -119,11 +120,20 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FPOWI, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FLOG, MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP, MVT::f32, Expand); // We don't have line number support yet. setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); @@ -154,7 +164,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM) computeRegisterProperties(); } -MVT MipsTargetLowering::getSetCCResultType(MVT VT) const { +MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -170,16 +180,13 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::AND: return LowerANDOR(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); - case ISD::CALL: return LowerCALL(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::OR: return LowerANDOR(Op, DAG); - case ISD::RET: return LowerRET(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); } @@ -202,37 +209,6 @@ AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC) return VReg; } -// A address must be loaded from a small section if its size is less than the -// small section size threshold. Data in this section must be addressed using -// gp_rel operator. -bool MipsTargetLowering::IsInSmallSection(unsigned Size) { - return (Size > 0 && (Size <= Subtarget->getSSectionThreshold())); -} - -// Discover if this global address can be placed into small data/bss section. -bool MipsTargetLowering::IsGlobalInSmallSection(GlobalValue *GV) -{ - const TargetData *TD = getTargetData(); - const GlobalVariable *GVA = dyn_cast(GV); - - if (!GVA) - return false; - - const Type *Ty = GV->getType()->getElementType(); - unsigned Size = TD->getTypeAllocSize(Ty); - - // if this is a internal constant string, there is a special - // section for it, but not in small data/bss. - if (GVA->hasInitializer() && GV->hasLocalLinkage()) { - Constant *C = GVA->getInitializer(); - const ConstantArray *CVA = dyn_cast(C); - if (CVA && CVA->isCString()) - return false; - } - - return IsInSmallSection(Size); -} - // Get fp branch code (not opcode) from condition code. static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) @@ -247,7 +223,7 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) { switch(BC) { default: - assert(0 && "Unknown branch code"); + llvm_unreachable("Unknown branch code"); case Mips::BRANCH_T : return Mips::BC1T; case Mips::BRANCH_F : return Mips::BC1F; case Mips::BRANCH_TL : return Mips::BC1TL; @@ -257,7 +233,7 @@ static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) { static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { switch (CC) { - default: assert(0 && "Unknown fp condition code!"); + default: llvm_unreachable("Unknown fp condition code!"); case ISD::SETEQ: case ISD::SETOEQ: return Mips::FCOND_EQ; case ISD::SETUNE: return Mips::FCOND_OGL; @@ -283,7 +259,8 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); bool isFPCmp = false; DebugLoc dl = MI->getDebugLoc(); @@ -331,9 +308,12 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. + // Also inform sdisel of the edge changes. for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) + e = BB->succ_end(); i != e; ++i) { + EM->insert(std::make_pair(*i, sinkMBB)); sinkMBB->addSuccessor(*i); + } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while(!BB->succ_empty()) @@ -508,29 +488,34 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) Cond, True, False, CCNode); } -SDValue MipsTargetLowering:: -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) -{ +SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); - if (!Subtarget->hasABICall()) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { SDVTList VTs = DAG.getVTList(MVT::i32); - SDValue Ops[] = { GA }; + + MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering(); + // %gp_rel relocation - if (!isa(GV) && IsGlobalInSmallSection(GV)) { - SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, Ops, 1); + if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + MipsII::MO_GPREL); + SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); } // %hi/%lo relocation - SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1); + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + MipsII::MO_ABS_HILO); + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); - } else { // Abicall relocations, TODO: make this cleaner. + } else { + SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + MipsII::MO_GOT); SDValue ResNode = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), GA, NULL, 0); // On functions and global targets not internal linked only @@ -541,14 +526,14 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo); } - assert(0 && "Dont know how to handle GlobalAddress"); + llvm_unreachable("Dont know how to handle GlobalAddress"); return SDValue(0,0); } SDValue MipsTargetLowering:: LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { - assert(0 && "TLS not implemented for MIPS."); + llvm_unreachable("TLS not implemented for MIPS."); return SDValue(); // Not reached } @@ -559,15 +544,17 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) SDValue HiPart; // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); + bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; + unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HILO; - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { - SDVTList VTs = DAG.getVTList(MVT::i32); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); + + if (IsPIC) { SDValue Ops[] = { JTI }; - HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, Ops, 1); + HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1); } else // Emit Load from Global Pointer HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0); @@ -583,7 +570,8 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) SDValue ResNode; ConstantPoolSDNode *N = cast(Op); Constant *C = N->getConstVal(); - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + MipsII::MO_ABS_HILO); // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); @@ -592,8 +580,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) // but the asm printer currently doens't support this feature without // hacking it. This feature should come soon so we can uncomment the // stuff below. - //if (!Subtarget->hasABICall() && - // IsInSmallSection(getTargetData()->getTypeAllocSize(C->getType()))) { + //if (IsInSmallSection(C->getType())) { // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP); // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); @@ -608,13 +595,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) //===----------------------------------------------------------------------===// // Calling Convention Implementation -// -// The lower operations present on calling convention works on this order: -// LowerCALL (virt regs --> phys regs, virt regs --> stack) -// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) -// LowerRET (virt regs --> phys regs) -// LowerCALL (phys regs --> virt regs) -// //===----------------------------------------------------------------------===// #include "MipsGenCallingConv.inc" @@ -632,8 +612,8 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) // go to stack. //===----------------------------------------------------------------------===// -static bool CC_MipsO32(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, +static bool CC_MipsO32(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { static const unsigned IntRegsSize=4, FloatRegsSize=2; @@ -699,38 +679,38 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, } //===----------------------------------------------------------------------===// -// CALL Calling Convention Implementation +// Call Calling Convention Implementation //===----------------------------------------------------------------------===// -/// LowerCALL - functions arguments are copied from virtual regs to +/// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isVarArg, isTailCall. -SDValue MipsTargetLowering:: -LowerCALL(SDValue Op, SelectionDAG &DAG) -{ - MachineFunction &MF = DAG.getMachineFunction(); - - CallSDNode *TheCall = cast(Op.getNode()); - SDValue Chain = TheCall->getChain(); - SDValue Callee = TheCall->getCallee(); - bool isVarArg = TheCall->isVarArg(); - unsigned CC = TheCall->getCallingConv(); - DebugLoc dl = TheCall->getDebugLoc(); +SDValue +MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); // To meet O32 ABI, Mips must always allocate 16 bytes on // the stack (even if less than 4 are used as arguments) if (Subtarget->isABI_O32()) { - int VTsize = MVT(MVT::i32).getSizeInBits()/8; + int VTsize = EVT(MVT::i32).getSizeInBits()/8; MFI->CreateFixedObject(VTsize, (VTsize*3)); - CCInfo.AnalyzeCallOperands(TheCall, CC_MipsO32); + CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); } else - CCInfo.AnalyzeCallOperands(TheCall, CC_Mips); + CCInfo.AnalyzeCallOperands(Outs, CC_Mips); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -747,12 +727,12 @@ LowerCALL(SDValue Op, SelectionDAG &DAG) // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - SDValue Arg = TheCall->getArg(i); + SDValue Arg = Outs[i].Val; CCValAssign &VA = ArgLocs[i]; // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: if (Subtarget->isABI_O32() && VA.isRegLoc()) { if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32) @@ -825,10 +805,13 @@ LowerCALL(SDValue Op, SelectionDAG &DAG) // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. + unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + getPointerTy(), 0, OpFlag); else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), + getPointerTy(), OpFlag); // MipsJmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... @@ -859,7 +842,7 @@ LowerCALL(SDValue Op, SelectionDAG &DAG) // Create a stack location to hold GP when PIC is used. This stack // location is used on function prologue to save GP and also after all // emited CALL's to restore GP. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { + if (IsPIC) { // Function can have an arbitrary number of calls, so // hold the LastArgStackLoc with the biggest offset. int FI; @@ -887,75 +870,69 @@ LowerCALL(SDValue Op, SelectionDAG &DAG) // Handle result values, copying them out of physregs into vregs that we // return. - return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), Op.getResNo()); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); } -/// LowerCallResult - Lower the result values of an ISD::CALL into the -/// appropriate copies out of appropriate physical registers. This assumes that -/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call -/// being lowered. Returns a SDNode with the same number of values as the -/// ISD::CALL. -SDNode *MipsTargetLowering:: -LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG) { - - bool isVarArg = TheCall->isVarArg(); - DebugLoc dl = TheCall->getDebugLoc(); +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +SDValue +MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { // Assign locations to each value returned by this call. SmallVector RVLocs; - CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(TheCall, RetCC_Mips); - SmallVector ResultVals; + CCInfo.AnalyzeCallResult(Ins, RetCC_Mips); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), - RVLocs[i].getValVT(), InFlag).getValue(1); + RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); - ResultVals.push_back(Chain.getValue(0)); + InVals.push_back(Chain.getValue(0)); } - - ResultVals.push_back(Chain); - // Merge everything together with a MERGE_VALUES node. - return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()).getNode(); + return Chain; } //===----------------------------------------------------------------------===// -// FORMAL_ARGUMENTS Calling Convention Implementation +// Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// -/// LowerFORMAL_ARGUMENTS - transform physical registers into +/// LowerFormalArguments - transform physical registers into /// virtual registers and generate load operations for /// arguments places on the stack. /// TODO: isVarArg -SDValue MipsTargetLowering:: -LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) -{ - SDValue Root = Op.getOperand(0); +SDValue +MipsTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); - DebugLoc dl = Op.getDebugLoc(); - - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); if (Subtarget->isABI_O32()) - CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_MipsO32); + CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); else - CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_Mips); + CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); - SmallVector ArgValues; SDValue StackPtr; unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16); @@ -965,7 +942,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) // Arguments stored on registers if (VA.isRegLoc()) { - MVT RegVT = VA.getLocVT(); + EVT RegVT = VA.getLocVT(); TargetRegisterClass *RC = 0; if (RegVT == MVT::i32) @@ -976,12 +953,12 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) if (!Subtarget->isSingleFloat()) RC = Mips::AFGR64RegisterClass; } else - assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering"); + llvm_unreachable("RegVT not supported by LowerFormalArguments Lowering"); // Transform the arguments stored on // physical registers into virtual ones unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then @@ -1005,14 +982,14 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) { unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg()+1, RC); - SDValue ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg2, RegVT); + SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); SDValue Hi = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue); SDValue Lo = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, ArgValue2); ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi); } } - ArgValues.push_back(ArgValue); + InVals.push_back(ArgValue); // To meet ABI, when VARARGS are passed on registers, the registers // must have their values written to the caller stack frame. @@ -1034,7 +1011,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) // emit ISD::STORE whichs stores the // parameter value to a stack Location - ArgValues.push_back(DAG.getStore(Root, dl, ArgValue, PtrOff, NULL, 0)); + InVals.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0)); } } else { // VA.isRegLoc() @@ -1057,7 +1034,7 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); } } @@ -1070,36 +1047,33 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32)); MipsFI->setSRetReturnReg(Reg); } - SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]); - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root); + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } - ArgValues.push_back(Root); - - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); + return Chain; } //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// -SDValue MipsTargetLowering:: -LowerRET(SDValue Op, SelectionDAG &DAG) -{ +SDValue +MipsTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + // CCValAssign - represent the assignment of // the return value to a location SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - DebugLoc dl = Op.getDebugLoc(); // CCState - Info about the registers and stack slot. - CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); - // Analize return values of ISD::RET - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Mips); + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_Mips); // If this is the first return lowered for this function, add // the regs to the liveout set for the function. @@ -1109,8 +1083,6 @@ LowerRET(SDValue Op, SelectionDAG &DAG) DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - // The chain is always operand #0 - SDValue Chain = Op.getOperand(0); SDValue Flag; // Copy the result values into the output registers. @@ -1118,10 +1090,8 @@ LowerRET(SDValue Op, SelectionDAG &DAG) CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - // ISD::RET => ret chain, (regnum1,val1), ... - // So i*2+1 index only the regnums Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Op.getOperand(i*2+1), Flag); + Outs[i].Val, Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad @@ -1138,7 +1108,7 @@ LowerRET(SDValue Op, SelectionDAG &DAG) unsigned Reg = MipsFI->getSRetReturnReg(); if (!Reg) - assert(0 && "sret virtual register not created in the entry block"); + llvm_unreachable("sret virtual register not created in the entry block"); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag); @@ -1188,7 +1158,7 @@ getConstraintType(const std::string &Constraint) const /// return a list of registers that can be used to satisfy the constraint. /// This should only be used for C_RegisterClass constraints. std::pair MipsTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const +getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { @@ -1210,7 +1180,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const /// pointer. std::vector MipsTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const + EVT VT) const { if (Constraint.size() != 1) return std::vector(); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 9ad4895ce6e58..dddba4291d289 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -66,8 +66,8 @@ namespace llvm { //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// - class MipsTargetLowering : public TargetLowering - { + + class MipsTargetLowering : public TargetLowering { public: explicit MipsTargetLowering(MipsTargetMachine &TM); @@ -80,7 +80,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - get the ISD::SETCC result ValueType - MVT getSetCCResultType(MVT VT) const; + MVT::SimpleValueType getSetCCResultType(EVT VT) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; @@ -88,40 +88,62 @@ namespace llvm { // Subtarget Info const MipsSubtarget *Subtarget; + // Lower Operand helpers - SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG); - bool IsGlobalInSmallSection(GlobalValue *GV); - bool IsInSmallSection(unsigned Size); + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); // Lower Operand specifics SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG); SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerRET(SDValue Op, SelectionDAG &DAG); SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG); SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); + virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap *EM) const; // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; }; diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index e16fd8e400c5d..91599043cb2c2 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "MipsGenInstrInfo.inc" using namespace llvm; @@ -208,29 +209,6 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(0).addFrameIndex(FI); } -void MipsInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, SmallVectorImpl &Addr, - const TargetRegisterClass *RC, SmallVectorImpl &NewMIs) const -{ - unsigned Opc; - if (RC == Mips::CPURegsRegisterClass) - Opc = Mips::SW; - else if (RC == Mips::FGR32RegisterClass) - Opc = Mips::SWC1; - else { - assert(RC == Mips::AFGR64RegisterClass); - Opc = Mips::SDC1; - } - - DebugLoc DL = DebugLoc::getUnknownLoc(); - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)) - .addReg(SrcReg, getKillRegState(isKill)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; -} - void MipsInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, @@ -251,28 +229,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI); } -void MipsInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - unsigned Opc; - if (RC == Mips::CPURegsRegisterClass) - Opc = Mips::LW; - else if (RC == Mips::FGR32RegisterClass) - Opc = Mips::LWC1; - else { - assert(RC == Mips::AFGR64RegisterClass); - Opc = Mips::LDC1; - } - - DebugLoc DL = DebugLoc::getUnknownLoc(); - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; -} - MachineInstr *MipsInstrInfo:: foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, @@ -372,7 +328,7 @@ static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc) unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case Mips::COND_E : return Mips::BEQ; case Mips::COND_NE : return Mips::BNE; case Mips::COND_GZ : return Mips::BGTZ; @@ -421,7 +377,7 @@ unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC) Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case Mips::COND_E : return Mips::COND_NE; case Mips::COND_NE : return Mips::COND_E; case Mips::COND_GZ : return Mips::COND_LEZ; diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 6655c6749fdf7..249d3de3b7000 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -15,6 +15,7 @@ #define MIPSINSTRUCTIONINFO_H #include "Mips.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "MipsRegisterInfo.h" @@ -92,7 +93,7 @@ namespace Mips { inline static const char *MipsFCCToString(Mips::CondCode CC) { switch (CC) { - default: assert(0 && "Unknown condition code"); + default: llvm_unreachable("Unknown condition code"); case FCOND_F: case FCOND_T: return "f"; case FCOND_UN: @@ -129,6 +130,38 @@ namespace Mips { } } +/// MipsII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace MipsII { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // Mips Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_GOT - Represents the offset into the global offset table at which + /// the address the relocation entry symbol resides during execution. + MO_GOT, + + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides + /// during execution. This is different from the above since this flag + /// can only be present in call instructions. + MO_GOT_CALL, + + /// MO_GPREL - Represents the offset from the current gp value to be used + /// for the relocatable object file being produced. + MO_GPREL, + + /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HILO + + }; +} + class MipsInstrInfo : public TargetInstrInfoImpl { MipsTargetMachine &TM; const MipsRegisterInfo RI; @@ -182,21 +215,11 @@ public: unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const; - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, const SmallVectorImpl &Ops, diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp new file mode 100644 index 0000000000000..60ef1c9e4fef3 --- /dev/null +++ b/lib/Target/Mips/MipsMCAsmInfo.cpp @@ -0,0 +1,27 @@ +//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the MipsMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "MipsMCAsmInfo.h" +using namespace llvm; + +MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) { + AlignmentIsInBytes = false; + COMMDirectiveTakesAlignment = true; + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; + PrivateGlobalPrefix = "$"; + CommentString = "#"; + ZeroDirective = "\t.space\t"; + PICJumpTableDirective = "\t.gpword\t"; +} diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h new file mode 100644 index 0000000000000..33a4b5edb258f --- /dev/null +++ b/lib/Target/Mips/MipsMCAsmInfo.h @@ -0,0 +1,30 @@ +//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MipsMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSTARGETASMINFO_H +#define MIPSTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + class MipsMCAsmInfo : public MCAsmInfo { + public: + explicit MipsMCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index ac3cdfd38e16c..949c78aebc938 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -57,7 +57,7 @@ private: /// to be used on emitPrologue and processFunctionBeforeFrameFinalized. MipsFIHolder GPHolder; - /// On LowerFORMAL_ARGUMENTS the stack size is unknown, so the Stack + /// On LowerFormalArguments the stack size is unknown, so the Stack /// Pointer Offset calculation of "not in register arguments" must be /// postponed to emitPrologue. SmallVector FnLoadArgs; @@ -65,7 +65,7 @@ private: // When VarArgs, we must write registers back to caller stack, preserving // on register arguments. Since the stack size is unknown on - // LowerFORMAL_ARGUMENTS, the Stack Pointer Offset calculation must be + // LowerFormalArguments, the Stack Pointer Offset calculation must be // postponed to emitPrologue. SmallVector FnStoreVarArgs; bool HasStoreVarArgs; diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 579d4db6422f6..d2289e9cdbaa9 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -31,6 +31,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" @@ -79,12 +81,12 @@ getRegisterNumbering(unsigned RegEnum) case Mips::SP : case Mips::F29: return 29; case Mips::FP : case Mips::F30: case Mips::D15: return 30; case Mips::RA : case Mips::F31: return 31; - default: assert(0 && "Unknown register number!"); + default: llvm_unreachable("Unknown register number!"); } return 0; // Not reached } -unsigned MipsRegisterInfo::getPICCallReg(void) { return Mips::T9; } +unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } //===----------------------------------------------------------------------===// // Callee Saved Registers methods @@ -210,7 +212,7 @@ getReservedRegs(const MachineFunction &MF) const // The emitted instruction will be something like: // lw REGX, 16+StackSize(SP) // -// Since the total stack size is unknown on LowerFORMAL_ARGUMENTS, all +// Since the total stack size is unknown on LowerFormalArguments, all // stack references (ObjectOffset) created to reference the function // arguments, are negative numbers. This way, on eliminateFrameIndex it's // possible to detect those references and the offsets are adjusted to @@ -232,7 +234,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1; // Replace the dummy '0' SPOffset by the negative offsets, as explained on - // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid + // LowerFormalArguments. Leaving '0' for while is necessary to avoid // the approach done by calculateFrameObjectOffsets to the stack frame. MipsFI->adjustLoadArgsFI(MFI); MipsFI->adjustStoreVarArgsFI(MFI); @@ -346,9 +348,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. -void MipsRegisterInfo:: -eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const +unsigned MipsRegisterInfo:: +eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + int *Value, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); @@ -360,34 +362,27 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, "Instr doesn't have FrameIndex operand!"); } - #ifndef NDEBUG - DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n"; - DOUT << "<--------->\n"; - MI.print(DOUT); - #endif + DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n"; + errs() << "<--------->\n" << MI); int FrameIndex = MI.getOperand(i).getIndex(); int stackSize = MF.getFrameInfo()->getStackSize(); int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex); - #ifndef NDEBUG - DOUT << "FrameIndex : " << FrameIndex << "\n"; - DOUT << "spOffset : " << spOffset << "\n"; - DOUT << "stackSize : " << stackSize << "\n"; - #endif + DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n" + << "spOffset : " << spOffset << "\n" + << "stackSize : " << stackSize << "\n"); - // as explained on LowerFORMAL_ARGUMENTS, detect negative offsets + // as explained on LowerFormalArguments, detect negative offsets // and adjust SPOffsets considering the final stack size. int Offset = ((spOffset < 0) ? (stackSize + (-(spOffset+4))) : (spOffset)); Offset += MI.getOperand(i-1).getImm(); - #ifndef NDEBUG - DOUT << "Offset : " << Offset << "\n"; - DOUT << "<--------->\n"; - #endif + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); MI.getOperand(i-1).ChangeToImmediate(Offset); MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); + return 0; } void MipsRegisterInfo:: @@ -515,19 +510,19 @@ getFrameRegister(MachineFunction &MF) const { unsigned MipsRegisterInfo:: getEHExceptionRegister() const { - assert(0 && "What is the exception register"); + llvm_unreachable("What is the exception register"); return 0; } unsigned MipsRegisterInfo:: getEHHandlerRegister() const { - assert(0 && "What is the exception handler register"); + llvm_unreachable("What is the exception handler register"); return 0; } int MipsRegisterInfo:: getDwarfRegNum(unsigned RegNum, bool isEH) const { - assert(0 && "What is the dwarf register number"); + llvm_unreachable("What is the dwarf register number"); return -1; } diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 808e995b4ed3d..122f786656b47 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -34,7 +34,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { static unsigned getRegisterNumbering(unsigned RegEnum); /// Get PIC indirect call register - static unsigned getPICCallReg(void); + static unsigned getPICCallReg(); /// Adjust the Mips stack frame. void adjustMipsStackFrame(MachineFunction &MF) const; @@ -54,8 +54,9 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { MachineBasicBlock::iterator I) const; /// Stack Frame Processing Methods - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 4245f274f8f04..db114da00d734 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -14,37 +14,20 @@ #include "MipsSubtarget.h" #include "Mips.h" #include "MipsGenSubtarget.inc" -#include "llvm/Module.h" -#include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt -NotABICall("disable-mips-abicall", cl::Hidden, - cl::desc("Disable code for SVR4-style dynamic objects")); -static cl::opt -AbsoluteCall("enable-mips-absolute-call", cl::Hidden, - cl::desc("Enable absolute call within abicall")); -static cl::opt -SSThreshold("mips-ssection-threshold", cl::Hidden, - cl::desc("Small data and bss section threshold size (default=8)"), - cl::init(8)); - -MipsSubtarget::MipsSubtarget(const TargetMachine &TM, const Module &M, - const std::string &FS, bool little) : +MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS, + bool little) : MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false), - IsFP64bit(false), IsGP64bit(false), HasVFPU(false), HasABICall(true), - HasAbsoluteCall(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), - HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false) + IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), + HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), + HasSwap(false), HasBitCount(false) { std::string CPU = "mips1"; MipsArchVersion = Mips1; // Parse features string. ParseSubtargetFeatures(FS, CPU); - const std::string& TT = M.getTargetTriple(); - - // Small section size threshold - SSectionThreshold = SSThreshold; // Is the target system Linux ? if (TT.find("linux") == std::string::npos) @@ -65,13 +48,4 @@ MipsSubtarget::MipsSubtarget(const TargetMachine &TM, const Module &M, HasSwap = true; HasCondMov = true; } - - // Abicall is the default for O32 ABI, but is disabled within EABI and in - // static code. - if (NotABICall || isABI_EABI() || (TM.getRelocationModel() == Reloc::Static)) - HasABICall = false; - - // TODO: disable when handling 64 bit symbols in the future. - if (HasABICall && AbsoluteCall) - HasAbsoluteCall = true; } diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 61c37c1d377e6..1d6f87d8c0630 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -20,7 +20,6 @@ #include namespace llvm { -class Module; class MipsSubtarget : public TargetSubtarget { @@ -58,20 +57,9 @@ protected: // HasVFPU - Processor has a vector floating point unit. bool HasVFPU; - // IsABICall - Enable SRV4 code for SVR4-style dynamic objects - bool HasABICall; - - // HasAbsoluteCall - Enable code that is not fully position-independent. - // Only works with HasABICall enabled. - bool HasAbsoluteCall; - // isLinux - Target system is Linux. Is false we consider ELFOS for now. bool IsLinux; - // Put global and static items less than or equal to SSectionThreshold - // bytes into the small data or bss section. The default is 8. - unsigned SSectionThreshold; - /// Features related to the presence of specific instructions. // HasSEInReg - SEB and SEH (signext in register) instructions. @@ -103,9 +91,8 @@ public: unsigned getTargetABI() const { return MipsABI; } /// This constructor initializes the data members to match that - /// of the specified module. - MipsSubtarget(const TargetMachine &TM, const Module &M, - const std::string &FS, bool little); + /// of the specified triple. + MipsSubtarget(const std::string &TT, const std::string &FS, bool little); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -121,10 +108,7 @@ public: bool isSingleFloat() const { return IsSingleFloat; }; bool isNotSingleFloat() const { return !IsSingleFloat; }; bool hasVFPU() const { return HasVFPU; }; - bool hasABICall() const { return HasABICall; }; - bool hasAbsoluteCall() const { return HasAbsoluteCall; }; bool isLinux() const { return IsLinux; }; - unsigned getSSectionThreshold() const { return SSectionThreshold; } /// Features related to the presence of specific instructions. bool hasSEInReg() const { return HasSEInReg; }; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 4675536ce2a5e..4fa5450df138d 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -12,35 +12,18 @@ //===----------------------------------------------------------------------===// #include "Mips.h" -#include "MipsTargetAsmInfo.h" +#include "MipsMCAsmInfo.h" #include "MipsTargetMachine.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -/// MipsTargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int MipsTargetMachineModule; -int MipsTargetMachineModule = 0; - -// Register the target. -static RegisterTarget X("mips", "Mips"); -static RegisterTarget Y("mipsel", "Mipsel"); - -MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0; - - -// Force static initialization. -extern "C" void LLVMInitializeMipsTarget() { } - -const TargetAsmInfo *MipsTargetMachine:: -createTargetAsmInfo() const -{ - return new MipsTargetAsmInfo(*this); +extern "C" void LLVMInitializeMipsTarget() { + // Register the target. + RegisterTargetMachine X(TheMipsTarget); + RegisterTargetMachine Y(TheMipselTarget); + RegisterAsmInfo A(TheMipsTarget); + RegisterAsmInfo B(TheMipselTarget); } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -51,17 +34,22 @@ createTargetAsmInfo() const // an easier handling. // Using CodeModel::Large enables different CALL behavior. MipsTargetMachine:: -MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle=false): - Subtarget(*this, M, FS, isLittle), +MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, + bool isLittle=false): + LLVMTargetMachine(T, TT), + Subtarget(TT, FS, isLittle), DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") : std::string("E-p:32:32:32-i8:8:32-i16:16:32")), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), - TLInfo(*this) -{ + TLInfo(*this) { // Abicall enables PIC by default - if (Subtarget.hasABICall()) - setRelocationModel(Reloc::PIC_); + if (getRelocationModel() == Reloc::Default) { + if (Subtarget.isABI_O32()) + setRelocationModel(Reloc::PIC_); + else + setRelocationModel(Reloc::Static); + } // TODO: create an option to enable long calls, like -mlong-calls, // that would be our CodeModel::Large. It must not work with Abicall. @@ -70,43 +58,9 @@ MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle=false): } MipselTargetMachine:: -MipselTargetMachine(const Module &M, const std::string &FS) : - MipsTargetMachine(M, FS, true) {} - -// return 0 and must specify -march to gen MIPS code. -unsigned MipsTargetMachine:: -getModuleMatchQuality(const Module &M) -{ - // We strongly match "mips*-*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 5 && std::string(TT.begin(), TT.begin()+5) == "mips-") - return 20; - - if (TT.size() >= 13 && std::string(TT.begin(), - TT.begin()+13) == "mipsallegrex-") - return 20; - - return 0; -} - -// return 0 and must specify -march to gen MIPSEL code. -unsigned MipselTargetMachine:: -getModuleMatchQuality(const Module &M) -{ - // We strongly match "mips*el-*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 7 && std::string(TT.begin(), TT.begin()+7) == "mipsel-") - return 20; - - if (TT.size() >= 15 && std::string(TT.begin(), - TT.begin()+15) == "mipsallegrexel-") - return 20; - - if (TT.size() == 3 && std::string(TT.begin(), TT.begin()+3) == "psp") - return 20; - - return 0; -} +MipselTargetMachine(const Target &T, const std::string &TT, + const std::string &FS) : + MipsTargetMachine(T, TT, FS, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. @@ -126,14 +80,3 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) PM.add(createMipsDelaySlotFillerPass(*this)); return true; } - -// Implements the AssemblyEmitter for the target. Must return -// true if AssemblyEmitter is supported -bool MipsTargetMachine:: -addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out) { - // Output assembly language. - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - PM.add(AsmPrinterCtor(Out, *this, Verbose)); - return false; -} diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 95e5be40f7512..c3428be48f592 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -22,7 +22,7 @@ #include "llvm/Target/TargetFrameInfo.h" namespace llvm { - class raw_ostream; + class formatted_raw_ostream; class MipsTargetMachine : public LLVMTargetMachine { MipsSubtarget Subtarget; @@ -30,24 +30,9 @@ namespace llvm { MipsInstrInfo InstrInfo; TargetFrameInfo FrameInfo; MipsTargetLowering TLInfo; - - protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - protected: - // To avoid having target depend on the asmprinter stuff libraries, - // asmprinter set this functions to ctor pointer at startup time if they are - // linked in. - typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, - MipsTargetMachine &tm, - bool verbose); - static AsmPrinterCtorFn AsmPrinterCtor; - public: - MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle); - - static void registerAsmPrinter(AsmPrinterCtorFn F) { - AsmPrinterCtor = F; - } + MipsTargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool isLittle); virtual const MipsInstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -66,25 +51,19 @@ namespace llvm { return const_cast(&TLInfo); } - static unsigned getModuleMatchQuality(const Module &M); - // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); }; /// MipselTargetMachine - Mipsel target machine. /// class MipselTargetMachine : public MipsTargetMachine { public: - MipselTargetMachine(const Module &M, const std::string &FS); - - static unsigned getModuleMatchQuality(const Module &M); + MipselTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp new file mode 100644 index 0000000000000..85e9d65a32b18 --- /dev/null +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -0,0 +1,93 @@ +//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MipsTargetObjectFile.h" +#include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +static cl::opt +SSThreshold("mips-ssection-threshold", cl::Hidden, + cl::desc("Small data and bss section threshold size (default=8)"), + cl::init(8)); + +void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + SmallDataSection = + getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + + SmallBSSSection = + getELFSection(".sbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + +} + +// A address must be loaded from a small section if its size is less than the +// small section size threshold. Data in this section must be addressed using +// gp_rel operator. +static bool IsInSmallSection(uint64_t Size) { + return Size > 0 && Size <= SSThreshold; +} + +bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const { + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return false; + + return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM)); +} + +/// IsGlobalInSmallSection - Return true if this global address should be +/// placed into small data/bss section. +bool MipsTargetObjectFile:: +IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, + SectionKind Kind) const { + // Only global variables, not functions. + const GlobalVariable *GVA = dyn_cast(GV); + if (!GVA) + return false; + + // We can only do this for datarel or BSS objects for now. + if (!Kind.isBSS() && !Kind.isDataRel()) + return false; + + // If this is a internal constant string, there is a special + // section for it, but not in small data/bss. + if (Kind.isMergeable1ByteCString()) + return false; + + const Type *Ty = GV->getType()->getElementType(); + return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty)); +} + + + +const MCSection *MipsTargetObjectFile:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*" + // sections? + + // Handle Small Section classification here. + if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallBSSSection; + if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind)) + return SmallDataSection; + + // Otherwise, we work the same as ELF. + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM); +} diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h new file mode 100644 index 0000000000000..32e0436f0c97c --- /dev/null +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -0,0 +1,41 @@ +//===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H +#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H + +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + + class MipsTargetObjectFile : public TargetLoweringObjectFileELF { + const MCSection *SmallDataSection; + const MCSection *SmallBSSSection; + public: + + void Initialize(MCContext &Ctx, const TargetMachine &TM); + + + /// IsGlobalInSmallSection - Return true if this global address should be + /// placed into small data/bss section. + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM, SectionKind Kind)const; + bool IsGlobalInSmallSection(const GlobalValue *GV, + const TargetMachine &TM) const; + + const MCSection *SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const; + + // TODO: Classify globals as mips wishes. + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..6e5d56ba4ae78 --- /dev/null +++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMipsInfo + MipsTargetInfo.cpp + ) + +add_dependencies(LLVMMipsInfo MipsCodeGenTable_gen) diff --git a/lib/Target/Mips/TargetInfo/Makefile b/lib/Target/Mips/TargetInfo/Makefile new file mode 100644 index 0000000000000..32f4e1695b1d7 --- /dev/null +++ b/lib/Target/Mips/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Mips/TargetInfo/Makefile -----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMMipsInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp new file mode 100644 index 0000000000000..cc3d61e4e71d3 --- /dev/null +++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp @@ -0,0 +1,21 @@ +//===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheMipsTarget, llvm::TheMipselTarget; + +extern "C" void LLVMInitializeMipsTargetInfo() { + RegisterTarget X(TheMipsTarget, "mips", "Mips"); + + RegisterTarget Y(TheMipselTarget, "mipsel", "Mipsel"); +} diff --git a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000000000..2e1b809b92d7d --- /dev/null +++ b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMPIC16AsmPrinter + PIC16AsmPrinter.cpp + ) +add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen) diff --git a/lib/Target/PIC16/AsmPrinter/Makefile b/lib/Target/PIC16/AsmPrinter/Makefile new file mode 100644 index 0000000000000..f4db57e607163 --- /dev/null +++ b/lib/Target/PIC16/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/PIC16/AsmPrinter/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMPIC16AsmPrinter + +# Hack: we need to include 'main' pic16 target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp new file mode 100644 index 0000000000000..3f415afc10902 --- /dev/null +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -0,0 +1,484 @@ +//===-- PIC16AsmPrinter.cpp - PIC16 LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PIC16 assembly language. +// +//===----------------------------------------------------------------------===// + +#include "PIC16AsmPrinter.h" +#include "MCSectionPIC16.h" +#include "PIC16MCAsmInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" +#include +using namespace llvm; + +#include "PIC16GenAsmWriter.inc" + +PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) +: AsmPrinter(O, TM, T, V), DbgInfo(O, T) { + PTLI = static_cast(TM.getTargetLowering()); + PMAI = static_cast(T); + PTOF = (PIC16TargetObjectFile*)&PTLI->getObjFileLowering(); +} + +bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { + processDebugLoc(MI, true); + printInstruction(MI); + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + O << '\n'; + processDebugLoc(MI, false); + return true; +} + +/// runOnMachineFunction - This emits the frame section, autos section and +/// assembly for each instruction. Also takes care of function begin debug +/// directive and file begin debug directive (if required) for the function. +/// +bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + // This calls the base class function required to be called at beginning + // of runOnMachineFunction. + SetupMachineFunction(MF); + + // Get the mangled name. + const Function *F = MF.getFunction(); + CurrentFnName = Mang->getMangledName(F); + + // Emit the function frame (args and temps). + EmitFunctionFrame(MF); + + DbgInfo.BeginFunction(MF); + + // Emit the autos section of function. + EmitAutos(CurrentFnName); + + // Now emit the instructions of function in its code section. + const MCSection *fCodeSection = + getObjFileLowering().getSectionForFunction(CurrentFnName); + // Start the Code Section. + O << "\n"; + OutStreamer.SwitchSection(fCodeSection); + + // Emit the frame address of the function at the beginning of code. + O << "\tretlw low(" << PAN::getFrameLabel(CurrentFnName) << ")\n"; + O << "\tretlw high(" << PAN::getFrameLabel(CurrentFnName) << ")\n"; + + // Emit function start label. + O << CurrentFnName << ":\n"; + + DebugLoc CurDL; + O << "\n"; + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + + // Print a label for the basic block. + if (I != MF.begin()) { + EmitBasicBlockStart(I); + } + + // Print a basic block. + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + + // Emit the line directive if source line changed. + const DebugLoc DL = II->getDebugLoc(); + if (!DL.isUnknown() && DL != CurDL) { + DbgInfo.ChangeDebugLoc(MF, DL); + CurDL = DL; + } + + // Print the assembly for the instruction. + printMachineInstruction(II); + } + } + + // Emit function end debug directives. + DbgInfo.EndFunction(MF); + + return false; // we didn't modify anything. +} + + +// printOperand - print operand of insn. +void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << getRegisterName(MO.getReg()); + return; + + case MachineOperand::MO_Immediate: + O << (int)MO.getImm(); + return; + + case MachineOperand::MO_GlobalAddress: { + std::string Sname = Mang->getMangledName(MO.getGlobal()); + // FIXME: currently we do not have a memcpy def coming in the module + // by any chance, as we do not link in those as .bc lib. So these calls + // are always external and it is safe to emit an extern. + if (PAN::isMemIntrinsic(Sname)) { + LibcallDecls.push_back(createESName(Sname)); + } + + O << Sname; + break; + } + case MachineOperand::MO_ExternalSymbol: { + const char *Sname = MO.getSymbolName(); + + // If its a libcall name, record it to decls section. + if (PAN::getSymbolTag(Sname) == PAN::LIBCALL) { + LibcallDecls.push_back(Sname); + } + + // Record a call to intrinsic to print the extern declaration for it. + std::string Sym = Sname; + if (PAN::isMemIntrinsic(Sym)) { + Sym = PAN::addPrefix(Sym); + LibcallDecls.push_back(createESName(Sym)); + } + + O << Sym; + break; + } + case MachineOperand::MO_MachineBasicBlock: + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); + return; + + default: + llvm_unreachable(" Operand type not supported."); + } +} + +/// printCCOperand - Print the cond code operand. +/// +void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { + int CC = (int)MI->getOperand(opNum).getImm(); + O << PIC16CondCodeToString((PIC16CC::CondCodes)CC); +} + +// This function is used to sort the decls list. +// should return true if s1 should come before s2. +static bool is_before(const char *s1, const char *s2) { + return strcmp(s1, s2) <= 0; +} + +// This is used by list::unique below. +// unique will filter out duplicates if it knows them. +static bool is_duplicate(const char *s1, const char *s2) { + return !strcmp(s1, s2); +} + +/// printLibcallDecls - print the extern declarations for compiler +/// intrinsics. +/// +void PIC16AsmPrinter::printLibcallDecls() { + // If no libcalls used, return. + if (LibcallDecls.empty()) return; + + O << MAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n"; + // Remove duplicate entries. + LibcallDecls.sort(is_before); + LibcallDecls.unique(is_duplicate); + + for (std::list::const_iterator I = LibcallDecls.begin(); + I != LibcallDecls.end(); I++) { + O << MAI->getExternDirective() << *I << "\n"; + O << MAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n"; + O << MAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n"; + } + O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n"; +} + +/// doInitialization - Perform Module level initializations here. +/// One task that we do here is to sectionize all global variables. +/// The MemSelOptimizer pass depends on the sectionizing. +/// +bool PIC16AsmPrinter::doInitialization(Module &M) { + bool Result = AsmPrinter::doInitialization(M); + + // FIXME:: This is temporary solution to generate the include file. + // The processor should be passed to llc as in input and the header file + // should be generated accordingly. + O << "\n\t#include P16F1937.INC\n"; + + // Set the section names for all globals. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { + const MCSection *S = getObjFileLowering().SectionForGlobal(I, Mang, TM); + + I->setSection(((const MCSectionPIC16*)S)->getName()); + } + + DbgInfo.BeginModule(M); + EmitFunctionDecls(M); + EmitUndefinedVars(M); + EmitDefinedVars(M); + EmitIData(M); + EmitUData(M); + EmitRomData(M); + return Result; +} + +/// Emit extern decls for functions imported from other modules, and emit +/// global declarations for function defined in this module and which are +/// available to other modules. +/// +void PIC16AsmPrinter::EmitFunctionDecls(Module &M) { + // Emit declarations for external functions. + O <<"\n"<getCommentString() << "Function Declarations - BEGIN." <<"\n"; + for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) { + if (I->isIntrinsic()) + continue; + + std::string Name = Mang->getMangledName(I); + if (Name.compare("@abort") == 0) + continue; + + if (!I->isDeclaration() && !I->hasExternalLinkage()) + continue; + + // Do not emit memcpy, memset, and memmove here. + // Calls to these routines can be generated in two ways, + // 1. User calling the standard lib function + // 2. Codegen generating these calls for llvm intrinsics. + // In the first case a prototype is alread availale, while in + // second case the call is via and externalsym and the prototype is missing. + // So declarations for these are currently always getting printing by + // tracking both kind of references in printInstrunction. + if (I->isDeclaration() && PAN::isMemIntrinsic(Name)) continue; + + const char *directive = I->isDeclaration() ? MAI->getExternDirective() : + MAI->getGlobalDirective(); + + O << directive << Name << "\n"; + O << directive << PAN::getRetvalLabel(Name) << "\n"; + O << directive << PAN::getArgsLabel(Name) << "\n"; + } + + O << MAI->getCommentString() << "Function Declarations - END." <<"\n"; +} + +// Emit variables imported from other Modules. +void PIC16AsmPrinter::EmitUndefinedVars(Module &M) { + std::vector Items = PTOF->ExternalVarDecls->Items; + if (!Items.size()) return; + + O << "\n" << MAI->getCommentString() << "Imported Variables - BEGIN" << "\n"; + for (unsigned j = 0; j < Items.size(); j++) { + O << MAI->getExternDirective() << Mang->getMangledName(Items[j]) << "\n"; + } + O << MAI->getCommentString() << "Imported Variables - END" << "\n"; +} + +// Emit variables defined in this module and are available to other modules. +void PIC16AsmPrinter::EmitDefinedVars(Module &M) { + std::vector Items = PTOF->ExternalVarDefs->Items; + if (!Items.size()) return; + + O << "\n" << MAI->getCommentString() << "Exported Variables - BEGIN" << "\n"; + for (unsigned j = 0; j < Items.size(); j++) { + O << MAI->getGlobalDirective() << Mang->getMangledName(Items[j]) << "\n"; + } + O << MAI->getCommentString() << "Exported Variables - END" << "\n"; +} + +// Emit initialized data placed in ROM. +void PIC16AsmPrinter::EmitRomData(Module &M) { + // Print ROM Data section. + const std::vector &ROSections = PTOF->ROSections; + for (unsigned i = 0; i < ROSections.size(); i++) { + const std::vector &Items = ROSections[i]->Items; + if (!Items.size()) continue; + O << "\n"; + OutStreamer.SwitchSection(PTOF->ROSections[i]->S_); + for (unsigned j = 0; j < Items.size(); j++) { + O << Mang->getMangledName(Items[j]); + Constant *C = Items[j]->getInitializer(); + int AddrSpace = Items[j]->getType()->getAddressSpace(); + EmitGlobalConstant(C, AddrSpace); + } + } +} + +bool PIC16AsmPrinter::doFinalization(Module &M) { + printLibcallDecls(); + EmitRemainingAutos(); + DbgInfo.EndModule(M); + O << "\n\t" << "END\n"; + return AsmPrinter::doFinalization(M); +} + +void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { + const Function *F = MF.getFunction(); + std::string FuncName = Mang->getMangledName(F); + const TargetData *TD = TM.getTargetData(); + // Emit the data section name. + O << "\n"; + + const MCSection *fPDataSection = + getObjFileLowering().getSectionForFunctionFrame(CurrentFnName); + OutStreamer.SwitchSection(fPDataSection); + + // Emit function frame label + O << PAN::getFrameLabel(CurrentFnName) << ":\n"; + + const Type *RetType = F->getReturnType(); + unsigned RetSize = 0; + if (RetType->getTypeID() != Type::VoidTyID) + RetSize = TD->getTypeAllocSize(RetType); + + //Emit function return value space + // FIXME: Do not emit RetvalLable when retsize is zero. To do this + // we will need to avoid printing a global directive for Retval label + // in emitExternandGloblas. + if(RetSize > 0) + O << PAN::getRetvalLabel(CurrentFnName) << " RES " << RetSize << "\n"; + else + O << PAN::getRetvalLabel(CurrentFnName) << ": \n"; + + // Emit variable to hold the space for function arguments + unsigned ArgSize = 0; + for (Function::const_arg_iterator argi = F->arg_begin(), + arge = F->arg_end(); argi != arge ; ++argi) { + const Type *Ty = argi->getType(); + ArgSize += TD->getTypeAllocSize(Ty); + } + + O << PAN::getArgsLabel(CurrentFnName) << " RES " << ArgSize << "\n"; + + // Emit temporary space + int TempSize = PTLI->GetTmpSize(); + if (TempSize > 0) + O << PAN::getTempdataLabel(CurrentFnName) << " RES " << TempSize << '\n'; +} + +void PIC16AsmPrinter::EmitIData(Module &M) { + + // Print all IDATA sections. + const std::vector &IDATASections = PTOF->IDATASections; + for (unsigned i = 0; i < IDATASections.size(); i++) { + O << "\n"; + if (IDATASections[i]->S_->getName().find("llvm.") != std::string::npos) + continue; + OutStreamer.SwitchSection(IDATASections[i]->S_); + std::vector Items = IDATASections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string Name = Mang->getMangledName(Items[j]); + Constant *C = Items[j]->getInitializer(); + int AddrSpace = Items[j]->getType()->getAddressSpace(); + O << Name; + EmitGlobalConstant(C, AddrSpace); + } + } +} + +void PIC16AsmPrinter::EmitUData(Module &M) { + const TargetData *TD = TM.getTargetData(); + + // Print all BSS sections. + const std::vector &BSSSections = PTOF->BSSSections; + for (unsigned i = 0; i < BSSSections.size(); i++) { + O << "\n"; + OutStreamer.SwitchSection(BSSSections[i]->S_); + std::vector Items = BSSSections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string Name = Mang->getMangledName(Items[j]); + Constant *C = Items[j]->getInitializer(); + const Type *Ty = C->getType(); + unsigned Size = TD->getTypeAllocSize(Ty); + + O << Name << " RES " << Size << "\n"; + } + } +} + +void PIC16AsmPrinter::EmitAutos(std::string FunctName) { + // Section names for all globals are already set. + const TargetData *TD = TM.getTargetData(); + + // Now print Autos section for this function. + std::string SectionName = PAN::getAutosSectionName(FunctName); + const std::vector &AutosSections = PTOF->AutosSections; + for (unsigned i = 0; i < AutosSections.size(); i++) { + O << "\n"; + if (AutosSections[i]->S_->getName() == SectionName) { + // Set the printing status to true + AutosSections[i]->setPrintedStatus(true); + OutStreamer.SwitchSection(AutosSections[i]->S_); + const std::vector &Items = AutosSections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string VarName = Mang->getMangledName(Items[j]); + Constant *C = Items[j]->getInitializer(); + const Type *Ty = C->getType(); + unsigned Size = TD->getTypeAllocSize(Ty); + // Emit memory reserve directive. + O << VarName << " RES " << Size << "\n"; + } + break; + } + } +} + +// Print autos that were not printed during the code printing of functions. +// As the functions might themselves would have got deleted by the optimizer. +void PIC16AsmPrinter::EmitRemainingAutos() { + const TargetData *TD = TM.getTargetData(); + + // Now print Autos section for this function. + std::vector AutosSections = PTOF->AutosSections; + for (unsigned i = 0; i < AutosSections.size(); i++) { + + // if the section is already printed then don't print again + if (AutosSections[i]->isPrinted()) + continue; + + // Set status as printed + AutosSections[i]->setPrintedStatus(true); + + O << "\n"; + OutStreamer.SwitchSection(AutosSections[i]->S_); + const std::vector &Items = AutosSections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string VarName = Mang->getMangledName(Items[j]); + Constant *C = Items[j]->getInitializer(); + const Type *Ty = C->getType(); + unsigned Size = TD->getTypeAllocSize(Ty); + // Emit memory reserve directive. + O << VarName << " RES " << Size << "\n"; + } + } +} + + +extern "C" void LLVMInitializePIC16AsmPrinter() { + RegisterAsmPrinter X(ThePIC16Target); +} + + diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h new file mode 100644 index 0000000000000..2dd4600b76c2e --- /dev/null +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -0,0 +1,80 @@ +//===-- PIC16AsmPrinter.h - PIC16 LLVM assembly writer ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PIC16 assembly language. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16ASMPRINTER_H +#define PIC16ASMPRINTER_H + +#include "PIC16.h" +#include "PIC16TargetMachine.h" +#include "PIC16DebugInfo.h" +#include "PIC16MCAsmInfo.h" +#include "PIC16TargetObjectFile.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include +#include + +namespace llvm { + class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter { + public: + explicit PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V); + private: + virtual const char *getPassName() const { + return "PIC16 Assembly Printer"; + } + + PIC16TargetObjectFile &getObjFileLowering() const { + return (PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering(); + } + + bool runOnMachineFunction(MachineFunction &F); + void printOperand(const MachineInstr *MI, int opNum); + void printCCOperand(const MachineInstr *MI, int opNum); + void printInstruction(const MachineInstr *MI); // definition autogenerated. + static const char *getRegisterName(unsigned RegNo); + + bool printMachineInstruction(const MachineInstr *MI); + void EmitFunctionDecls (Module &M); + void EmitUndefinedVars (Module &M); + void EmitDefinedVars (Module &M); + void EmitIData (Module &M); + void EmitUData (Module &M); + void EmitAutos (std::string FunctName); + void EmitRemainingAutos (); + void EmitRomData (Module &M); + void EmitFunctionFrame(MachineFunction &MF); + void printLibcallDecls(); + protected: + bool doInitialization(Module &M); + bool doFinalization(Module &M); + + /// PrintGlobalVariable - Emit the specified global variable and its + /// initializer to the output stream. + virtual void PrintGlobalVariable(const GlobalVariable *GV) { + // PIC16 doesn't use normal hooks for this. + } + + private: + PIC16TargetObjectFile *PTOF; + PIC16TargetLowering *PTLI; + PIC16DbgInfo DbgInfo; + const PIC16MCAsmInfo *PMAI; + std::list LibcallDecls; // List of extern decls. + }; +} // end of namespace + +#endif diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt index 00d737af4c2ef..0ee88f9cda6f0 100644 --- a/lib/Target/PIC16/CMakeLists.txt +++ b/lib/Target/PIC16/CMakeLists.txt @@ -11,14 +11,14 @@ tablegen(PIC16GenCallingConv.inc -gen-callingconv) tablegen(PIC16GenSubtarget.inc -gen-subtarget) add_llvm_target(PIC16 - PIC16AsmPrinter.cpp PIC16DebugInfo.cpp PIC16InstrInfo.cpp PIC16ISelDAGToDAG.cpp PIC16ISelLowering.cpp PIC16MemSelOpt.cpp + PIC16MCAsmInfo.cpp PIC16RegisterInfo.cpp PIC16Subtarget.cpp - PIC16TargetAsmInfo.cpp PIC16TargetMachine.cpp + PIC16TargetObjectFile.cpp ) diff --git a/lib/Target/PIC16/MCSectionPIC16.h b/lib/Target/PIC16/MCSectionPIC16.h new file mode 100644 index 0000000000000..352be99d71c2f --- /dev/null +++ b/lib/Target/PIC16/MCSectionPIC16.h @@ -0,0 +1,88 @@ +//===- MCSectionPIC16.h - PIC16-specific section representation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCSectionPIC16 class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PIC16SECTION_H +#define LLVM_PIC16SECTION_H + +#include "llvm/MC/MCSection.h" + +namespace llvm { + + /// MCSectionPIC16 - Represents a physical section in PIC16 COFF. + /// Contains data objects. + /// + class MCSectionPIC16 : public MCSection { + /// Name of the section to uniquely identify it. + std::string Name; + + /// User can specify an address at which a section should be placed. + /// Negative value here means user hasn't specified any. + int Address; + + /// Overlay information - Sections with same color can be overlaid on + /// one another. + int Color; + + /// Conatined data objects. + std::vectorItems; + + /// Total size of all data objects contained here. + unsigned Size; + + MCSectionPIC16(const StringRef &name, SectionKind K, int addr, int color) + : MCSection(K), Name(name), Address(addr), Color(color) { + } + + public: + /// Return the name of the section. + const std::string &getName() const { return Name; } + + /// Return the Address of the section. + int getAddress() const { return Address; } + + /// Return the Color of the section. + int getColor() const { return Color; } + + /// PIC16 Terminology for section kinds is as below. + /// UDATA - BSS + /// IDATA - initialized data (equiv to Metadata) + /// ROMDATA - ReadOnly. + /// UDATA_OVR - Sections that can be overlaid. Section of such type is + /// used to contain function autos an frame. We can think of + /// it as equiv to llvm ThreadBSS) + /// So, let's have some convenience functions to Map PIC16 Section types + /// to SectionKind just for the sake of better readability. + static SectionKind UDATA_Kind() { return SectionKind::getBSS(); } + static SectionKind IDATA_Kind() { return SectionKind::getMetadata(); } + static SectionKind ROMDATA_Kind() { return SectionKind::getReadOnly(); } + static SectionKind UDATA_OVR_Kind() { return SectionKind::getThreadBSS(); } + + // If we could just do getKind() == UDATA_Kind() ? + bool isUDATA_Kind() { return getKind().isBSS(); } + bool isIDATA_Kind() { return getKind().isMetadata(); } + bool isROMDATA_Kind() { return getKind().isMetadata(); } + bool isUDATA_OVR_Kind() { return getKind().isThreadBSS(); } + + /// This would be the only way to create a section. + static MCSectionPIC16 *Create(const StringRef &Name, SectionKind K, + int Address, int Color, MCContext &Ctx); + + /// Override this as PIC16 has its own way of printing switching + /// to a section. + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const; + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/PIC16/Makefile b/lib/Target/PIC16/Makefile index c429324cc2d13..f913675da8928 100644 --- a/lib/Target/PIC16/Makefile +++ b/lib/Target/PIC16/Makefile @@ -7,7 +7,7 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../.. -LIBRARYNAME = LLVMPIC16 +LIBRARYNAME = LLVMPIC16CodeGen TARGET = PIC16 # Make sure that tblgen is run, first thing. @@ -17,5 +17,7 @@ BUILT_SOURCES = PIC16GenRegisterInfo.h.inc PIC16GenRegisterNames.inc \ PIC16GenDAGISel.inc PIC16GenCallingConv.inc \ PIC16GenSubtarget.inc +DIRS = AsmPrinter TargetInfo + include $(LEVEL)/Makefile.common diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h index 7940648928a76..8a3704d7071e0 100644 --- a/lib/Target/PIC16/PIC16.h +++ b/lib/Target/PIC16/PIC16.h @@ -15,8 +15,8 @@ #ifndef LLVM_TARGET_PIC16_H #define LLVM_TARGET_PIC16_H +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" -#include #include #include #include @@ -26,7 +26,7 @@ namespace llvm { class PIC16TargetMachine; class FunctionPass; class MachineCodeEmitter; - class raw_ostream; + class formatted_raw_ostream; namespace PIC16CC { enum CondCodes { @@ -83,7 +83,7 @@ namespace PIC16CC { // initialized globals - @idata..# // Function frame - @.frame_section. // Function autos - @.autos_section. - // Declarations - @section.0 + // Declarations - Enclosed in comments. No section for them. //---------------------------------------------------------- // Tags used to mangle different names. @@ -221,17 +221,29 @@ namespace PIC16CC { return Func1 + tag + "# CODE"; } - // udata and idata section names are generated by a given number. + // udata, romdata and idata section names are generated by a given number. // @udata..# - static std::string getUdataSectionName(unsigned num) { + static std::string getUdataSectionName(unsigned num, + std::string prefix = "") { std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << "udata." << num << ".# UDATA"; + o << getTagName(PREFIX_SYMBOL) << prefix << "udata." << num + << ".# UDATA"; return o.str(); } - static std::string getIdataSectionName(unsigned num) { + static std::string getRomdataSectionName(unsigned num, + std::string prefix = "") { std::ostringstream o; - o << getTagName(PREFIX_SYMBOL) << "idata." << num << ".# IDATA"; + o << getTagName(PREFIX_SYMBOL) << prefix << "romdata." << num + << ".# ROMDATA"; + return o.str(); + } + + static std::string getIdataSectionName(unsigned num, + std::string prefix = "") { + std::ostringstream o; + o << getTagName(PREFIX_SYMBOL) << prefix << "idata." << num + << ".# IDATA"; return o.str(); } @@ -242,6 +254,15 @@ namespace PIC16CC { return false; } + inline static bool isMemIntrinsic (const std::string &Name) { + if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 || + Name.compare("@memmove") == 0) { + return true; + } + + return false; + } + inline static bool isLocalToFunc (std::string &Func, std::string &Var) { if (! isLocalName(Var)) return false; @@ -295,7 +316,7 @@ namespace PIC16CC { inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) { switch (CC) { - default: assert(0 && "Unknown condition code"); + default: llvm_unreachable("Unknown condition code"); case PIC16CC::NE: return "ne"; case PIC16CC::EQ: return "eq"; case PIC16CC::LT: return "lt"; @@ -311,7 +332,7 @@ namespace PIC16CC { inline static bool isSignedComparison(PIC16CC::CondCodes CC) { switch (CC) { - default: assert(0 && "Unknown condition code"); + default: llvm_unreachable("Unknown condition code"); case PIC16CC::NE: case PIC16CC::EQ: case PIC16CC::LT: @@ -330,11 +351,12 @@ namespace PIC16CC { FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM); - FunctionPass *createPIC16CodePrinterPass(raw_ostream &OS, - PIC16TargetMachine &TM, - bool Verbose); - // Banksel optimzer pass. + // Banksel optimizer pass. FunctionPass *createPIC16MemSelOptimizerPass(); + + extern Target ThePIC16Target; + extern Target TheCooperTarget; + } // end namespace llvm; // Defines symbolic names for PIC16 registers. This defines a mapping from diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 430058805ddcf..961caedbf9aba 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -1,3 +1,4 @@ + //===-- PIC16DebugInfo.cpp - Implementation for PIC16 Debug Information ======// // // The LLVM Compiler Infrastructure @@ -15,8 +16,10 @@ #include "PIC16DebugInfo.h" #include "llvm/GlobalVariable.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/DebugLoc.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; @@ -25,11 +28,11 @@ using namespace llvm; void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TagName) { - if (Ty.isBasicType(Ty.getTag())) + if (Ty.isBasicType()) PopulateBasicTypeInfo (Ty, TypeNo); - else if (Ty.isDerivedType(Ty.getTag())) + else if (Ty.isDerivedType()) PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); - else if (Ty.isCompositeType(Ty.getTag())) + else if (Ty.isCompositeType()) PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName); else { TypeNo = PIC16Dbg::T_NULL; @@ -41,8 +44,7 @@ void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, /// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty. /// void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) { - std::string Name = ""; - Ty.getName(Name); + std::string Name = Ty.getName(); unsigned short BaseTy = GetTypeDebugNumber(Name); TypeNo = TypeNo << PIC16Dbg::S_BASIC; TypeNo = TypeNo | (0xffff & BaseTy); @@ -67,7 +69,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, // We also need to encode the the information about the base type of // pointer in TypeNo. - DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom(); + DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom(); PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName); } @@ -76,7 +78,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TagName) { - DICompositeType CTy = DICompositeType(Ty.getGV()); + DICompositeType CTy = DICompositeType(Ty.getNode()); DIArray Elements = CTy.getTypeArray(); unsigned short size = 1; unsigned short Dimension[4]={0,0,0,0}; @@ -85,7 +87,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo, if (Element.getTag() == dwarf::DW_TAG_subrange_type) { TypeNo = TypeNo << PIC16Dbg::S_DERIVED; TypeNo = TypeNo | PIC16Dbg::DT_ARY; - DISubrange SubRange = DISubrange(Element.getGV()); + DISubrange SubRange = DISubrange(Element.getNode()); Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1; // Each dimension is represented by 2 bytes starting at byte 9. Aux[8+i*2+0] = Dimension[i]; @@ -108,16 +110,20 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TagName) { - DICompositeType CTy = DICompositeType(Ty.getGV()); + DICompositeType CTy = DICompositeType(Ty.getNode()); TypeNo = TypeNo << PIC16Dbg::S_BASIC; if (Ty.getTag() == dwarf::DW_TAG_structure_type) TypeNo = TypeNo | PIC16Dbg::T_STRUCT; else TypeNo = TypeNo | PIC16Dbg::T_UNION; - CTy.getName(TagName); + TagName = CTy.getName(); // UniqueSuffix is .number where number is obtained from // llvm.dbg.composite. - std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18); + // FIXME: This will break when composite type is not represented by + // llvm.dbg.composite* global variable. Since we need to revisit + // PIC16DebugInfo implementation anyways after the MDNodes based + // framework is done, let us continue with the way it is. + std::string UniqueSuffix = "." + Ty.getNode()->getNameStr().substr(18); TagName += UniqueSuffix; unsigned short size = CTy.getSizeInBits()/8; // 7th and 8th byte represent size. @@ -200,12 +206,14 @@ short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) { /// required initializations. void PIC16DbgInfo::BeginModule(Module &M) { // Emit file directive for module. - GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit"); - if (CU) { + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + if (DbgFinder.compile_unit_count() != 0) { + // FIXME : What if more then one CUs are present in a module ? + MDNode *CU = *DbgFinder.compile_unit_begin(); EmitDebugDirectives = true; SwitchToCU(CU); } - // Emit debug info for decls of composite types. EmitCompositeTypeDecls(M); } @@ -233,10 +241,11 @@ void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) { // Retreive the first valid debug Loc and process it. const DebugLoc &DL = GetDebugLocForFunction(MF); - ChangeDebugLoc(MF, DL, true); - - EmitFunctBeginDI(MF.getFunction()); - + // Emit debug info only if valid debug info is available. + if (!DL.isUnknown()) { + ChangeDebugLoc(MF, DL, true); + EmitFunctBeginDI(MF.getFunction()); + } // Set current line to 0 so that.line directive is genearted after .bf. CurLine = 0; } @@ -249,7 +258,7 @@ void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF, if (! EmitDebugDirectives) return; assert (! DL.isUnknown() && "can't change to invalid debug loc"); - GlobalVariable *CU = MF.getDebugLocTuple(DL).CompileUnit; + MDNode *CU = MF.getDebugLocTuple(DL).Scope; unsigned line = MF.getDebugLocTuple(DL).Line; SwitchToCU(CU); @@ -268,7 +277,10 @@ void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) { /// void PIC16DbgInfo::EndFunction(const MachineFunction &MF) { if (! EmitDebugDirectives) return; - EmitFunctEndDI(MF.getFunction(), CurLine); + const DebugLoc &DL = GetDebugLocForFunction(MF); + // Emit debug info only if valid debug info is available. + if (!DL.isUnknown()) + EmitFunctEndDI(MF.getFunction(), CurLine); } /// EndModule - Emit .eof for end of module. @@ -283,7 +295,7 @@ void PIC16DbgInfo::EndModule(Module &M) { /// composite type. /// void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, - std::string UniqueSuffix) { + std::string SuffixNo) { unsigned long Value = 0; DIArray Elements = CTy.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) { @@ -292,24 +304,22 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, bool HasAux = false; int ElementAux[PIC16Dbg::AuxSize] = { 0 }; std::string TagName = ""; - std::string ElementName; - GlobalVariable *GV = Element.getGV(); - DIDerivedType DITy(GV); - DITy.getName(ElementName); + DIDerivedType DITy(Element.getNode()); + const char *ElementName = DITy.getName(); unsigned short ElementSize = DITy.getSizeInBits()/8; // Get mangleddd name for this structure/union element. - std::string MangMemName = ElementName + UniqueSuffix; + std::string MangMemName = ElementName + SuffixNo; PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName); short Class = 0; if( CTy.getTag() == dwarf::DW_TAG_union_type) Class = PIC16Dbg::C_MOU; else if (CTy.getTag() == dwarf::DW_TAG_structure_type) Class = PIC16Dbg::C_MOS; - EmitSymbol(MangMemName, Class, TypeNo, Value); + EmitSymbol(MangMemName.c_str(), Class, TypeNo, Value); if (CTy.getTag() == dwarf::DW_TAG_structure_type) Value += ElementSize; if (HasAux) - EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TagName); + EmitAuxEntry(MangMemName.c_str(), ElementAux, PIC16Dbg::AuxSize, TagName); } } @@ -317,48 +327,48 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, /// and union declarations. /// void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { - for(iplist::iterator I = M.getGlobalList().begin(), - E = M.getGlobalList().end(); I != E; I++) { - // Structures and union declaration's debug info has llvm.dbg.composite - // in its name. - if(I->getName().find("llvm.dbg.composite") != std::string::npos) { - GlobalVariable *GV = cast(I); - DICompositeType CTy(GV); - if (CTy.getTag() == dwarf::DW_TAG_union_type || - CTy.getTag() == dwarf::DW_TAG_structure_type ) { - std::string name; - CTy.getName(name); - std::string DIVar = I->getName(); - // Get the number after llvm.dbg.composite and make UniqueSuffix from - // it. - std::string UniqueSuffix = "." + DIVar.substr(18); - std::string MangledCTyName = name + UniqueSuffix; - unsigned short size = CTy.getSizeInBits()/8; - int Aux[PIC16Dbg::AuxSize] = {0}; - // 7th and 8th byte represent size of structure/union. - Aux[6] = size & 0xff; - Aux[7] = size >> 8; - // Emit .def for structure/union tag. - if( CTy.getTag() == dwarf::DW_TAG_union_type) - EmitSymbol(MangledCTyName, PIC16Dbg::C_UNTAG); - else if (CTy.getTag() == dwarf::DW_TAG_structure_type) - EmitSymbol(MangledCTyName, PIC16Dbg::C_STRTAG); - - // Emit auxiliary debug information for structure/union tag. - EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize); - - // Emit members. - EmitCompositeTypeElements (CTy, UniqueSuffix); - - // Emit mangled Symbol for end of structure/union. - std::string EOSSymbol = ".eos" + UniqueSuffix; - EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS); - EmitAuxEntry(EOSSymbol, Aux, PIC16Dbg::AuxSize, MangledCTyName); - } + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + for (DebugInfoFinder::iterator I = DbgFinder.type_begin(), + E = DbgFinder.type_end(); I != E; ++I) { + DICompositeType CTy(*I); + if (CTy.isNull()) + continue; + if (CTy.getTag() == dwarf::DW_TAG_union_type || + CTy.getTag() == dwarf::DW_TAG_structure_type ) { + const char *Name = CTy.getName(); + // Get the number after llvm.dbg.composite and make UniqueSuffix from + // it. + std::string DIVar = CTy.getNode()->getNameStr(); + std::string UniqueSuffix = "." + DIVar.substr(18); + std::string MangledCTyName = Name + UniqueSuffix; + unsigned short size = CTy.getSizeInBits()/8; + int Aux[PIC16Dbg::AuxSize] = {0}; + // 7th and 8th byte represent size of structure/union. + Aux[6] = size & 0xff; + Aux[7] = size >> 8; + // Emit .def for structure/union tag. + if( CTy.getTag() == dwarf::DW_TAG_union_type) + EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_UNTAG); + else if (CTy.getTag() == dwarf::DW_TAG_structure_type) + EmitSymbol(MangledCTyName.c_str(), PIC16Dbg::C_STRTAG); + + // Emit auxiliary debug information for structure/union tag. + EmitAuxEntry(MangledCTyName.c_str(), Aux, PIC16Dbg::AuxSize); + + // Emit members. + EmitCompositeTypeElements (CTy, UniqueSuffix); + + // Emit mangled Symbol for end of structure/union. + std::string EOSSymbol = ".eos" + UniqueSuffix; + EmitSymbol(EOSSymbol.c_str(), PIC16Dbg::C_EOS); + EmitAuxEntry(EOSSymbol.c_str(), Aux, PIC16Dbg::AuxSize, + MangledCTyName.c_str()); } } } + /// EmitFunctBeginDI - Emit .bf for function. /// void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { @@ -425,31 +435,26 @@ void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short /// EmitVarDebugInfo - Emit debug information for all variables. /// void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { - GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables"); - if (!Root) - return; - - Constant *RootC = cast(*Root->use_begin()); - for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); - UI != UE; ++UI) { - for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); - UUI != UUE; ++UUI) { - DIGlobalVariable DIGV(cast(*UUI)); - DIType Ty = DIGV.getType(); - unsigned short TypeNo = 0; - bool HasAux = false; - int Aux[PIC16Dbg::AuxSize] = { 0 }; - std::string TagName = ""; - std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName(); - PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName); - // Emit debug info only if type information is availaible. - if (TypeNo != PIC16Dbg::T_NULL) { - O << "\n\t.type " << VarName << ", " << TypeNo; - short ClassNo = getStorageClass(DIGV); - O << "\n\t.class " << VarName << ", " << ClassNo; - if (HasAux) - EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName); - } + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + + for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), + E = DbgFinder.global_variable_end(); I != E; ++I) { + DIGlobalVariable DIGV(*I); + DIType Ty = DIGV.getType(); + unsigned short TypeNo = 0; + bool HasAux = false; + int Aux[PIC16Dbg::AuxSize] = { 0 }; + std::string TagName = ""; + std::string VarName = MAI->getGlobalPrefix()+DIGV.getGlobal()->getNameStr(); + PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName); + // Emit debug info only if type information is availaible. + if (TypeNo != PIC16Dbg::T_NULL) { + O << "\n\t.type " << VarName << ", " << TypeNo; + short ClassNo = getStorageClass(DIGV); + O << "\n\t.class " << VarName << ", " << ClassNo; + if (HasAux) + EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName); } } O << "\n"; @@ -457,12 +462,12 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { /// SwitchToCU - Switch to a new compilation unit. /// -void PIC16DbgInfo::SwitchToCU(GlobalVariable *CU) { +void PIC16DbgInfo::SwitchToCU(MDNode *CU) { // Get the file path from CU. DICompileUnit cu(CU); - std::string DirName, FileName; - std::string FilePath = cu.getDirectory(DirName) + "/" + - cu.getFilename(FileName); + std::string DirName = cu.getDirectory(); + std::string FileName = cu.getFilename(); + std::string FilePath = DirName + "/" + FileName; // Nothing to do if source file is still same. if ( FilePath == CurFile ) return; diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h index d126d851b50e0..54e27c7c33771 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.h +++ b/lib/Target/PIC16/PIC16DebugInfo.h @@ -16,8 +16,6 @@ #include "llvm/Analysis/DebugInfo.h" #include "llvm/Module.h" -#include "llvm/Target/TargetAsmInfo.h" -#include namespace llvm { class MachineFunction; @@ -90,11 +88,11 @@ namespace llvm { }; } - class raw_ostream; + class formatted_raw_ostream; class PIC16DbgInfo { - raw_ostream &O; - const TargetAsmInfo *TAI; + formatted_raw_ostream &O; + const MCAsmInfo *MAI; std::string CurFile; unsigned CurLine; @@ -103,7 +101,8 @@ namespace llvm { bool EmitDebugDirectives; public: - PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) { + PIC16DbgInfo(formatted_raw_ostream &o, const MCAsmInfo *T) + : O(o), MAI(T) { CurFile = ""; CurLine = 0; EmitDebugDirectives = false; @@ -118,7 +117,7 @@ namespace llvm { private: - void SwitchToCU (GlobalVariable *CU); + void SwitchToCU (MDNode *CU); void SwitchToLine (unsigned Line, bool IsInBeginFunction = false); void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux, @@ -144,8 +143,7 @@ namespace llvm { short getStorageClass(DIGlobalVariable DIGV); void EmitFunctBeginDI(const Function *F); void EmitCompositeTypeDecls(Module &M); - void EmitCompositeTypeElements (DICompositeType CTy, - std::string UniqueSuffix); + void EmitCompositeTypeElements (DICompositeType CTy, std::string Suffix); void EmitFunctEndDI(const Function *F, unsigned Line); void EmitAuxEntry(const std::string VarName, int Aux[], int num = PIC16Dbg::AuxSize, std::string TagName = ""); diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp index 6c2b8ec9747a8..cc57d12c9042a 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp @@ -13,6 +13,8 @@ #define DEBUG_TYPE "pic16-isel" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "PIC16ISelDAGToDAG.h" #include "llvm/Support/Debug.h" diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index 83abed3958a4a..3a2f6b47b37e1 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -31,7 +31,7 @@ class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel { /// PIC16Lowering - This object fully describes how to lower LLVM code to an /// PIC16-specific SelectionDAG. - PIC16TargetLowering PIC16Lowering; + PIC16TargetLowering &PIC16Lowering; public: explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 0d24f61c49a83..bf986b1354c54 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pic16-lower" - #include "PIC16ISelLowering.h" +#include "PIC16TargetObjectFile.h" #include "PIC16TargetMachine.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalValue.h" @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -30,7 +31,7 @@ using namespace llvm; static const char *getIntrinsicName(unsigned opcode) { std::string Basename; switch(opcode) { - default: assert (0 && "do not know intrinsic name"); + default: llvm_unreachable("do not know intrinsic name"); // Arithmetic Right shift for integer types. case PIC16ISD::SRA_I8: Basename = "sra.i8"; break; case RTLIB::SRA_I16: Basename = "sra.i16"; break; @@ -114,22 +115,48 @@ static const char *getIntrinsicName(unsigned opcode) { std::string Fullname = prefix + tagname + Basename; // The name has to live through program life. - char *tmp = new char[Fullname.size() + 1]; - strcpy (tmp, Fullname.c_str()); - - return tmp; + return createESName(Fullname); +} + +// getStdLibCallName - Get the name for the standard library function. +static const char *getStdLibCallName(unsigned opcode) { + std::string BaseName; + switch(opcode) { + case RTLIB::COS_F32: BaseName = "cos"; + break; + case RTLIB::SIN_F32: BaseName = "sin"; + break; + case RTLIB::MEMCPY: BaseName = "memcpy"; + break; + case RTLIB::MEMSET: BaseName = "memset"; + break; + case RTLIB::MEMMOVE: BaseName = "memmove"; + break; + default: llvm_unreachable("do not know std lib call name"); + } + std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); + std::string LibCallName = prefix + BaseName; + + // The name has to live through program life. + return createESName(LibCallName); } // PIC16TargetLowering Constructor. PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) - : TargetLowering(TM), TmpSize(0) { + : TargetLowering(TM, new PIC16TargetObjectFile()), TmpSize(0) { Subtarget = &TM.getSubtarget(); addRegisterClass(MVT::i8, PIC16::GPRRegisterClass); setShiftAmountType(MVT::i8); - setShiftAmountFlavor(Extend); + + // Std lib call names + setLibcallName(RTLIB::COS_F32, getStdLibCallName(RTLIB::COS_F32)); + setLibcallName(RTLIB::SIN_F32, getStdLibCallName(RTLIB::SIN_F32)); + setLibcallName(RTLIB::MEMCPY, getStdLibCallName(RTLIB::MEMCPY)); + setLibcallName(RTLIB::MEMSET, getStdLibCallName(RTLIB::MEMSET)); + setLibcallName(RTLIB::MEMMOVE, getStdLibCallName(RTLIB::MEMMOVE)); // SRA library call names setPIC16LibcallName(PIC16ISD::SRA_I8, getIntrinsicName(PIC16ISD::SRA_I8)); @@ -226,6 +253,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) setOperationAction(ISD::STORE, MVT::i8, Legal); setOperationAction(ISD::STORE, MVT::i16, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i64, Custom); setOperationAction(ISD::ADDE, MVT::i8, Custom); setOperationAction(ISD::ADDC, MVT::i8, Custom); @@ -240,46 +268,27 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) setOperationAction(ISD::XOR, MVT::i8, Custom); setOperationAction(ISD::FrameIndex, MVT::i16, Custom); - setOperationAction(ISD::CALL, MVT::i16, Custom); - setOperationAction(ISD::RET, MVT::Other, Custom); - setOperationAction(ISD::MUL, MVT::i8, Custom); - setOperationAction(ISD::MUL, MVT::i16, Expand); - setOperationAction(ISD::MUL, MVT::i32, Expand); + setOperationAction(ISD::MUL, MVT::i8, Custom); setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::MULHU, MVT::i8, Expand); - setOperationAction(ISD::MULHU, MVT::i16, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::MULHS, MVT::i8, Expand); - setOperationAction(ISD::MULHS, MVT::i16, Expand); - setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::SRA, MVT::i8, Custom); - setOperationAction(ISD::SRA, MVT::i16, Expand); - setOperationAction(ISD::SRA, MVT::i32, Expand); setOperationAction(ISD::SHL, MVT::i8, Custom); - setOperationAction(ISD::SHL, MVT::i16, Expand); - setOperationAction(ISD::SHL, MVT::i32, Expand); setOperationAction(ISD::SRL, MVT::i8, Custom); - setOperationAction(ISD::SRL, MVT::i16, Expand); - setOperationAction(ISD::SRL, MVT::i32, Expand); + + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // PIC16 does not support shift parts - setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i8, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i8, Expand); - setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand); - setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i8, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); // PIC16 does not have a SETCC, expand it to SELECT_CC. @@ -356,7 +365,8 @@ static void PopulateResults(SDValue N, SmallVectorImpl&Results) { Results.push_back(N); } -MVT PIC16TargetLowering::getSetCCResultType(MVT ValType) const { +MVT::SimpleValueType +PIC16TargetLowering::getSetCCResultType(EVT ValType) const { return MVT::i8; } @@ -379,7 +389,7 @@ PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) { SDValue PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, - MVT RetVT, const SDValue *Ops, + EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, SelectionDAG &DAG, DebugLoc dl) { @@ -389,17 +399,20 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, TargetLowering::ArgListEntry Entry; for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; - Entry.Ty = Entry.Node.getValueType().getTypeForMVT(); + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isSExt = isSigned; Entry.isZExt = !isSigned; Args.push_back(Entry); } - SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i8); - const Type *RetTy = RetVT.getTypeForMVT(); + SDValue Callee = DAG.getExternalSymbol(getPIC16LibcallName(Call), MVT::i16); + + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair CallInfo = LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, CallingConv::C, false, Callee, Args, DAG, dl); + false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); return CallInfo.first; } @@ -429,6 +442,7 @@ const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const { case PIC16ISD::SUBCC: return "PIC16ISD::SUBCC"; case PIC16ISD::SELECT_ICC: return "PIC16ISD::SELECT_ICC"; case PIC16ISD::BRCOND: return "PIC16ISD::BRCOND"; + case PIC16ISD::RET: return "PIC16ISD::RET"; case PIC16ISD::Dummy: return "PIC16ISD::Dummy"; } } @@ -502,7 +516,7 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { SDValue Chain = St->getChain(); SDValue Src = St->getValue(); SDValue Ptr = St->getBasePtr(); - MVT ValueType = Src.getValueType(); + EVT ValueType = Src.getValueType(); unsigned StoreOffset = 0; DebugLoc dl = N->getDebugLoc(); @@ -519,6 +533,10 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { SDValue SrcLo, SrcHi; GetExpandedParts(Src, DAG, SrcLo, SrcHi); SDValue ChainLo = Chain, ChainHi = Chain; + // FIXME: This makes unsafe assumptions. The Chain may be a TokenFactor + // created for an unrelated purpose, in which case it may not have + // exactly two operands. Also, even if it does have two operands, they + // may not be the low and high parts of an aligned load that was split. if (Chain.getOpcode() == ISD::TokenFactor) { ChainLo = Chain.getOperand(0); ChainHi = Chain.getOperand(1); @@ -546,16 +564,19 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { GetExpandedParts(SrcHi, DAG, SrcHi1, SrcHi2); SDValue ChainLo = Chain, ChainHi = Chain; + // FIXME: This makes unsafe assumptions; see the FIXME above. if (Chain.getOpcode() == ISD::TokenFactor) { ChainLo = Chain.getOperand(0); ChainHi = Chain.getOperand(1); } SDValue ChainLo1 = ChainLo, ChainLo2 = ChainLo, ChainHi1 = ChainHi, ChainHi2 = ChainHi; + // FIXME: This makes unsafe assumptions; see the FIXME above. if (ChainLo.getOpcode() == ISD::TokenFactor) { ChainLo1 = ChainLo.getOperand(0); ChainLo2 = ChainLo.getOperand(1); } + // FIXME: This makes unsafe assumptions; see the FIXME above. if (ChainHi.getOpcode() == ISD::TokenFactor) { ChainHi1 = ChainHi.getOperand(0); ChainHi2 = ChainHi.getOperand(1); @@ -583,8 +604,26 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { getChain(Store3), getChain(Store4)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetLo, RetHi); - } - else { + } else if (ValueType == MVT::i64) { + SDValue SrcLo, SrcHi; + GetExpandedParts(Src, DAG, SrcLo, SrcHi); + SDValue ChainLo = Chain, ChainHi = Chain; + // FIXME: This makes unsafe assumptions; see the FIXME above. + if (Chain.getOpcode() == ISD::TokenFactor) { + ChainLo = Chain.getOperand(0); + ChainHi = Chain.getOperand(1); + } + SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL, + 0 + StoreOffset); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(4, Ptr.getValueType())); + SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL, + 1 + StoreOffset); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, + Store2); + } else { assert (0 && "value type not supported"); return SDValue(); } @@ -660,7 +699,7 @@ void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo, SDValue &Hi) { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - MVT NewVT = getTypeToTransformTo(N->getValueType(0)); + EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); // Extract the lo component. Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NewVT, Op, @@ -808,7 +847,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) { SDValue Load, Offset; SDVTList Tys; - MVT VT, NewVT; + EVT VT, NewVT; SDValue PtrLo, PtrHi; unsigned LoadOffset; @@ -821,7 +860,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) { unsigned NumLoads = VT.getSizeInBits() / 8; std::vector PICLoads; unsigned iter; - MVT MemVT = LD->getMemoryVT(); + EVT MemVT = LD->getMemoryVT(); if(ISD::isNON_EXTLoad(N)) { for (iter=0; itergetMemoryVT(); + EVT MemVT = LD->getMemoryVT(); unsigned MemBytes = MemVT.getSizeInBits() / 8; // if MVT::i1 is extended to MVT::i8 then MemBytes will be zero // So set it to one @@ -945,6 +984,19 @@ SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { return Call; } +SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { + // We should have handled larger operands in type legalizer itself. + assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower"); + + SDNode *N = Op.getNode(); + SmallVector Ops(2); + Ops[0] = N->getOperand(0); + Ops[1] = N->getOperand(1); + SDValue Call = MakePIC16Libcall(PIC16ISD::MUL_I8, N->getValueType(0), + &Ops[0], 2, true, DAG, N->getDebugLoc()); + return Call; +} + void PIC16TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl&Results, @@ -953,12 +1005,8 @@ PIC16TargetLowering::LowerOperationWrapper(SDNode *N, SDValue Res; unsigned i; switch (Op.getOpcode()) { - case ISD::FORMAL_ARGUMENTS: - Res = LowerFORMAL_ARGUMENTS(Op, DAG); break; case ISD::LOAD: Res = ExpandLoad(Op.getNode(), DAG); break; - case ISD::CALL: - Res = LowerCALL(Op, DAG); break; default: { // All other operations are handled in LowerOperation. Res = LowerOperation(Op, DAG); @@ -978,8 +1026,6 @@ PIC16TargetLowering::LowerOperationWrapper(SDNode *N, SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG); case ISD::ADD: case ISD::ADDC: case ISD::ADDE: @@ -992,6 +1038,8 @@ SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { return ExpandLoad(Op.getNode(), DAG); case ISD::STORE: return ExpandStore(Op.getNode(), DAG); + case ISD::MUL: + return LowerMUL(Op, DAG); case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -1000,10 +1048,6 @@ SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::AND: case ISD::XOR: return LowerBinOp(Op, DAG); - case ISD::CALL: - return LowerCALL(Op, DAG); - case ISD::RET: - return LowerRET(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: @@ -1048,12 +1092,12 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, } SDValue PIC16TargetLowering:: -LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag, +LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, - SelectionDAG &DAG) { - CallSDNode *TheCall = dyn_cast(Op); - unsigned NumOps = TheCall->getNumArgs(); - DebugLoc dl = TheCall->getDebugLoc(); + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG) { + unsigned NumOps = Outs.size(); // If call has no arguments then do nothing and return. if (NumOps == 0) @@ -1064,10 +1108,10 @@ LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag, SDValue Arg, StoreRet; // For PIC16 ABI the arguments come after the return value. - unsigned RetVals = TheCall->getNumRetVals(); + unsigned RetVals = Ins.size(); for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) { // Get the arguments - Arg = TheCall->getArg(i); + Arg = Outs[i].Val; Ops.clear(); Ops.push_back(Chain); @@ -1087,16 +1131,14 @@ LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag, } SDValue PIC16TargetLowering:: -LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel, - SDValue InFlag, SelectionDAG &DAG) { - CallSDNode *TheCall = dyn_cast(Op); - unsigned NumOps = TheCall->getNumArgs(); - DebugLoc dl = TheCall->getDebugLoc(); +LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + unsigned NumOps = Outs.size(); std::string Name; SDValue Arg, StoreAt; - MVT ArgVT; + EVT ArgVT; unsigned Size=0; - unsigned ArgCount=0; // If call has no arguments then do nothing and return. if (NumOps == 0) @@ -1114,9 +1156,9 @@ LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel, std::vector Ops; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - for (unsigned i=ArgCount, Offset = 0; igetArg(i); + Arg = Outs[i].Val; StoreOffset = (Offset + AddressOffset); // Store the argument on frame @@ -1144,12 +1186,12 @@ LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue ArgLabel, } SDValue PIC16TargetLowering:: -LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag, - SDValue DataAddr_Lo, SDValue DataAddr_Hi, - SelectionDAG &DAG) { - CallSDNode *TheCall = dyn_cast(Op); - DebugLoc dl = TheCall->getDebugLoc(); - unsigned RetVals = TheCall->getNumRetVals(); +LowerIndirectCallReturn(SDValue Chain, SDValue InFlag, + SDValue DataAddr_Lo, SDValue DataAddr_Hi, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + unsigned RetVals = Ins.size(); // If call does not have anything to return // then do nothing and go back. @@ -1157,7 +1199,6 @@ LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag, return Chain; // Call has something to return - std::vector ResultVals; SDValue LoadRet; SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other, MVT::Flag); @@ -1167,23 +1208,20 @@ LowerIndirectCallReturn (SDValue Op, SDValue Chain, SDValue InFlag, InFlag); InFlag = getOutFlag(LoadRet); Chain = getChain(LoadRet); - ResultVals.push_back(LoadRet); + InVals.push_back(LoadRet); } - ResultVals.push_back(Chain); - SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl); - return Res; + return Chain; } SDValue PIC16TargetLowering:: -LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue RetLabel, - SDValue InFlag, SelectionDAG &DAG) { - CallSDNode *TheCall = dyn_cast(Op); - DebugLoc dl = TheCall->getDebugLoc(); +LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + // Currently handling primitive types only. They will come in // i8 parts - unsigned RetVals = TheCall->getNumRetVals(); - - std::vector ResultVals; + unsigned RetVals = Ins.size(); // Return immediately if the return type is void if (RetVals == 0) @@ -1209,30 +1247,20 @@ LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue RetLabel, Chain = getChain(LoadRet); Offset++; - ResultVals.push_back(LoadRet); + InVals.push_back(LoadRet); } - // To return use MERGE_VALUES - ResultVals.push_back(Chain); - SDValue Res = DAG.getMergeValues(&ResultVals[0], ResultVals.size(), dl); - return Res; + return Chain; } -SDValue PIC16TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { - SDValue Chain = Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); - - if (Op.getNumOperands() == 1) // return void - return Op; +SDValue +PIC16TargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { - // return should have odd number of operands - if ((Op.getNumOperands() % 2) == 0 ) { - assert(0 && "Do not know how to return this many arguments!"); - abort(); - } - // Number of values to return - unsigned NumRet = (Op.getNumOperands() / 2); + unsigned NumRet = Outs.size(); // Function returns value always on stack with the offset starting // from 0 @@ -1246,68 +1274,13 @@ SDValue PIC16TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { SDValue BS = DAG.getConstant(1, MVT::i8); SDValue RetVal; for(unsigned i=0;igetOperand(2*i + 1); + RetVal = Outs[i].Val; Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal, ES, BS, DAG.getConstant (i, MVT::i8)); } - return DAG.getNode(ISD::RET, dl, MVT::Other, Chain); -} - -// CALL node may have some operands non-legal to PIC16. Generate new CALL -// node with all the operands legal. -// Currently only Callee operand of the CALL node is non-legal. This function -// legalizes the Callee operand and uses all other operands as are to generate -// new CALL node. - -SDValue PIC16TargetLowering::LegalizeCALL(SDValue Op, SelectionDAG &DAG) { - CallSDNode *TheCall = dyn_cast(Op); - SDValue Chain = TheCall->getChain(); - SDValue Callee = TheCall->getCallee(); - DebugLoc dl = TheCall->getDebugLoc(); - unsigned i =0; - - assert(Callee.getValueType() == MVT::i16 && - "Don't know how to legalize this call node!!!"); - assert(Callee.getOpcode() == ISD::BUILD_PAIR && - "Don't know how to legalize this call node!!!"); - - if (isDirectAddress(Callee)) { - // Come here for direct calls - Callee = Callee.getOperand(0).getOperand(0); - } else { - // Come here for indirect calls - SDValue Lo, Hi; - // Indirect addresses. Get the hi and lo parts of ptr. - GetExpandedParts(Callee, DAG, Lo, Hi); - // Connect Lo and Hi parts of the callee with the PIC16Connect - Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi); - } - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - - // Add the call arguments and their flags - unsigned NumArgs = TheCall->getNumArgs(); - for(i=0;igetArg(i)); - Ops.push_back(TheCall->getArgFlagsVal(i)); - } - std::vector NodeTys; - unsigned NumRets = TheCall->getNumRetVals(); - for(i=0;igetRetValType(i)); - - // Return a Chain as well - NodeTys.push_back(MVT::Other); - - SDVTList VTs = DAG.getVTList(&NodeTys[0], NodeTys.size()); - // Generate new call with all the operands legal - return DAG.getCall(TheCall->getCallingConv(), dl, - TheCall->isVarArg(), TheCall->isTailCall(), - TheCall->isInreg(), VTs, &Ops[0], Ops.size(), - TheCall->getNumFixedArgs()); + return DAG.getNode(PIC16ISD::RET, dl, MVT::Other, Chain); } void PIC16TargetLowering:: @@ -1372,36 +1345,40 @@ GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, DataAddr_Hi = DAG.getNode(PIC16ISD::MTHI, dl, MVT::i8, Call, OperFlag); } +SDValue +PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { -SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { - CallSDNode *TheCall = dyn_cast(Op); - SDValue Chain = TheCall->getChain(); - SDValue Callee = TheCall->getCallee(); - DebugLoc dl = TheCall->getDebugLoc(); - if (Callee.getValueType() == MVT::i16 && - Callee.getOpcode() == ISD::BUILD_PAIR) { - // Control should come here only from TypeLegalizer for lowering - - // Legalize the non-legal arguments of call and return the - // new call with legal arguments. - return LegalizeCALL(Op, DAG); - } - // Control should come here from Legalize DAG. - // Here all the operands of CALL node should be legal. - - // If this is an indirect call then to pass the arguments - // and read the return value back, we need the data address - // of the function being called. - // To get the data address two more calls need to be made. + assert(Callee.getValueType() == MVT::i16 && + "Don't know how to legalize this call node!!!"); // The flag to track if this is a direct or indirect call. bool IsDirectCall = true; - unsigned RetVals = TheCall->getNumRetVals(); - unsigned NumArgs = TheCall->getNumArgs(); + unsigned RetVals = Ins.size(); + unsigned NumArgs = Outs.size(); SDValue DataAddr_Lo, DataAddr_Hi; - if (Callee.getOpcode() == PIC16ISD::PIC16Connect) { + if (!isa(Callee) && + !isa(Callee)) { IsDirectCall = false; // This is indirect call + + // If this is an indirect call then to pass the arguments + // and read the return value back, we need the data address + // of the function being called. + // To get the data address two more calls need to be made. + + // Come here for indirect calls + SDValue Lo, Hi; + // Indirect addresses. Get the hi and lo parts of ptr. + GetExpandedParts(Callee, DAG, Lo, Hi); + // Connect Lo and Hi parts of the callee with the PIC16Connect + Callee = DAG.getNode(PIC16ISD::PIC16Connect, dl, MVT::i8, Lo, Hi); + // Read DataAddress only if we have to pass arguments or // read return value. if ((RetVals > 0) || (NumArgs > 0)) @@ -1457,12 +1434,13 @@ SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Pass the argument to function before making the call. SDValue CallArgs; if (IsDirectCall) { - CallArgs = LowerDirectCallArguments(Op, Chain, ArgLabel, OperFlag, DAG); + CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag, + Outs, dl, DAG); Chain = getChain(CallArgs); OperFlag = getOutFlag(CallArgs); } else { - CallArgs = LowerIndirectCallArguments(Op, Chain, OperFlag, DataAddr_Lo, - DataAddr_Hi, DAG); + CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo, + DataAddr_Hi, Outs, Ins, dl, DAG); Chain = getChain(CallArgs); OperFlag = getOutFlag(CallArgs); } @@ -1483,10 +1461,11 @@ SDValue PIC16TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Lower the return value reading after the call. if (IsDirectCall) - return LowerDirectCallReturn(Op, Chain, RetLabel, OperFlag, DAG); + return LowerDirectCallReturn(RetLabel, Chain, OperFlag, + Ins, dl, DAG, InVals); else - return LowerIndirectCallReturn(Op, Chain, OperFlag, DataAddr_Lo, - DataAddr_Hi, DAG); + return LowerIndirectCallReturn(Chain, OperFlag, DataAddr_Lo, + DataAddr_Hi, Ins, dl, DAG, InVals); } bool PIC16TargetLowering::isDirectLoad(const SDValue Op) { @@ -1591,11 +1570,20 @@ SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) { SDValue NewVal = ConvertToMemOperand (Op.getOperand(0), DAG, dl); SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Flag); - if (Op.getOpcode() == ISD::SUBE) - return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1), - Op.getOperand(2)); - else - return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1)); + switch (Op.getOpcode()) { + default: + assert (0 && "Opcode unknown."); + case ISD::SUBE: + return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1), + Op.getOperand(2)); + break; + case ISD::SUBC: + return DAG.getNode(Op.getOpcode(), dl, Tys, NewVal, Op.getOperand(1)); + break; + case ISD::SUB: + return DAG.getNode(Op.getOpcode(), dl, MVT::i8, NewVal, Op.getOperand(1)); + break; + } } void PIC16TargetLowering::InitReservedFrameCount(const Function *F) { @@ -1609,17 +1597,19 @@ void PIC16TargetLowering::InitReservedFrameCount(const Function *F) { ReservedFrameCount = NumArgs + 1; } -// LowerFORMAL_ARGUMENTS - Argument values are loaded from the +// LowerFormalArguments - Argument values are loaded from the // .args + offset. All arguments are already broken to leaglized // types, so the offset just runs from 0 to NumArgVals - 1. -SDValue PIC16TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, - SelectionDAG &DAG) { - SmallVector ArgValues; - unsigned NumArgVals = Op.getNode()->getNumValues() - 1; - DebugLoc dl = Op.getDebugLoc(); - SDValue Chain = Op.getOperand(0); // Formal arguments' chain - +SDValue +PIC16TargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) { + unsigned NumArgVals = Ins.size(); // Get the callee's name to create the .args label to pass args. MachineFunction &MF = DAG.getMachineFunction(); @@ -1643,13 +1633,10 @@ SDValue PIC16TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SDValue PICLoad = DAG.getNode(PIC16ISD::PIC16LdArg, dl, VTs, Chain, ES, BS, Offset); Chain = getChain(PICLoad); - ArgValues.push_back(PICLoad); + InVals.push_back(PICLoad); } - // Return a MERGE_VALUE node. - ArgValues.push_back(Op.getOperand(0)); - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); + return Chain; } // Perform DAGCombine of PIC16Load. @@ -1697,7 +1684,7 @@ SDValue PIC16TargetLowering::PerformDAGCombine(SDNode *N, static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) { switch (CC) { - default: assert(0 && "Unknown condition code!"); + default: llvm_unreachable("Unknown condition code!"); case ISD::SETNE: return PIC16CC::NE; case ISD::SETEQ: return PIC16CC::EQ; case ISD::SETGT: return PIC16CC::GT; @@ -1826,7 +1813,8 @@ SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { MachineBasicBlock * PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm(); DebugLoc dl = MI->getDebugLoc(); @@ -1852,9 +1840,18 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by transferring all successors of the current + // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - sinkMBB->transferSuccessors(BB); + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while (!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index b40ea12c15f6a..286ed2411ef89 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -52,6 +52,7 @@ namespace llvm { SUBCC, // Compare for equality or inequality. SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond. BRCOND, // Conditional branch. + RET, // Return. Dummy }; @@ -81,39 +82,45 @@ namespace llvm { /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT getSetCCResultType(MVT ValType) const; - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG); + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG); SDValue LowerADD(SDValue Op, SelectionDAG &DAG); SDValue LowerSUB(SDValue Op, SelectionDAG &DAG); SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); - SDValue LowerRET(SDValue Op, SelectionDAG &DAG); // Call returns SDValue - LowerDirectCallReturn(SDValue Op, SDValue Chain, SDValue FrameAddress, - SDValue InFlag, SelectionDAG &DAG); + LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); SDValue - LowerIndirectCallReturn(SDValue Op, SDValue Chain, SDValue InFlag, - SDValue DataAddr_Lo, SDValue DataAddr_Hi, - SelectionDAG &DAG); + LowerIndirectCallReturn(SDValue Chain, SDValue InFlag, + SDValue DataAddr_Lo, SDValue DataAddr_Hi, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); // Call arguments SDValue - LowerDirectCallArguments(SDValue Op, SDValue Chain, SDValue FrameAddress, - SDValue InFlag, SelectionDAG &DAG); + LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); SDValue - LowerIndirectCallArguments(SDValue Op, SDValue Chain, SDValue InFlag, + LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, - SelectionDAG &DAG); + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG); SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC, SelectionDAG &DAG, DebugLoc dl); virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap *EM) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); @@ -124,6 +131,28 @@ namespace llvm { SmallVectorImpl &Results, SelectionDAG &DAG); + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); + SDValue ExpandStore(SDNode *N, SelectionDAG &DAG); SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG); SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG); @@ -174,12 +203,6 @@ namespace llvm { void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, int &Offset); - - // CALL node should have all legal operands only. Legalize all non-legal - // operands of CALL node and then return the new call will all operands - // legal. - SDValue LegalizeCALL(SDValue Op, SelectionDAG &DAG); - // For indirect calls data address of the callee frame need to be // extracted. This function fills the arguments DataAddr_Lo and // DataAddr_Hi with the address of the callee frame. @@ -209,7 +232,7 @@ namespace llvm { const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call); // Make PIC16 Libcall. - SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, MVT RetVT, + SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, SelectionDAG &DAG, DebugLoc dl); diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 8418423fa06a6..cb0c41bc0b5cc 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" #include @@ -104,7 +105,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addImm(1); // Emit banksel for it. } else - assert(0 && "Can't store this register to stack slot"); + llvm_unreachable("Can't store this register to stack slot"); } void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -144,7 +145,7 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(1); // Emit banksel for it. } else - assert(0 && "Can't load this register from stack slot"); + llvm_unreachable("Can't load this register from stack slot"); } bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB, diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td index a054bdcbe3c3d..250ca0a373f21 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.td +++ b/lib/Target/PIC16/PIC16InstrInfo.td @@ -115,6 +115,8 @@ def PIC16Brcond : SDNode<"PIC16ISD::BRCOND", SDT_PIC16Brcond, def PIC16Selecticc : SDNode<"PIC16ISD::SELECT_ICC", SDT_PIC16Selecticc, [SDNPInFlag]>; +def PIC16ret : SDNode<"PIC16ISD::RET", SDTNone, [SDNPHasChain]>; + //===----------------------------------------------------------------------===// // PIC16 Operand Definitions. //===----------------------------------------------------------------------===// @@ -375,8 +377,9 @@ def subfw_2: SUBFW<0, "subwf", subc>; let Uses = [STATUS] in def subfwb: SUBFW<0, "subwfb", sube>; // With Borrow. -def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>; } +let Defs = [STATUS], isTerminator = 1 in +def subfw_cc: SUBFW<0, "subwf", PIC16Subcc>; // [F] -= W ; let mayStore = 1 in @@ -425,8 +428,9 @@ class SUBLW opcode, SDNode OpNode> : let Defs = [STATUS] in { def sublw_1 : SUBLW<0, sub>; def sublw_2 : SUBLW<0, subc>; -def sublw_cc : SUBLW<0, PIC16Subcc>; } +let Defs = [STATUS], isTerminator = 1 in +def sublw_cc : SUBLW<0, PIC16Subcc>; // Call instruction. let isCall = 1, @@ -489,8 +493,9 @@ def pagesel : // Return insn. +let isTerminator = 1, isBarrier = 1, isReturn = 1 in def Return : - ControlFormat<0, (outs), (ins), "return", [(ret)]>; + ControlFormat<0, (outs), (ins), "return", [(PIC16ret)]>; //===----------------------------------------------------------------------===// // PIC16 Replacment Patterns. diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp new file mode 100644 index 0000000000000..a17d1a8b1c25c --- /dev/null +++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp @@ -0,0 +1,58 @@ +//===-- PIC16MCAsmInfo.cpp - PIC16 asm properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the PIC16MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "PIC16MCAsmInfo.h" + +// FIXME: Layering violation to get enums and static function, should be moved +// to separate headers. +#include "PIC16.h" +#include "PIC16ISelLowering.h" +using namespace llvm; + +PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) { + CommentString = ";"; + GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL); + GlobalDirective = "\tglobal\t"; + ExternDirective = "\textern\t"; + + Data8bitsDirective = " db "; + Data16bitsDirective = " dw "; + Data32bitsDirective = " dl "; + Data64bitsDirective = NULL; + ZeroDirective = NULL; + AsciiDirective = " dt "; + AscizDirective = NULL; + + RomData8bitsDirective = " dw "; + RomData16bitsDirective = " rom_di "; + RomData32bitsDirective = " rom_dl "; + + + // Set it to false because we weed to generate c file name and not bc file + // name. + HasSingleParameterDotFile = false; +} + +const char *PIC16MCAsmInfo::getDataASDirective(unsigned Size, + unsigned AS) const { + if (AS != PIC16ISD::ROM_SPACE) + return 0; + + switch (Size) { + case 8: return RomData8bitsDirective; + case 16: return RomData16bitsDirective; + case 32: return RomData32bitsDirective; + default: return NULL; + } +} + diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.h b/lib/Target/PIC16/PIC16MCAsmInfo.h new file mode 100644 index 0000000000000..e84db8532a151 --- /dev/null +++ b/lib/Target/PIC16/PIC16MCAsmInfo.h @@ -0,0 +1,35 @@ +//=====-- PIC16MCAsmInfo.h - PIC16 asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the PIC16MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16TARGETASMINFO_H +#define PIC16TARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + class PIC16MCAsmInfo : public MCAsmInfo { + const char *RomData8bitsDirective; + const char *RomData16bitsDirective; + const char *RomData32bitsDirective; + public: + PIC16MCAsmInfo(const Target &T, const StringRef &TT); + + virtual const char *getDataASDirective(unsigned size, unsigned AS) const; + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index 43d47ae5292f8..c9ebb5756cda4 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -22,7 +22,7 @@ #define DEBUG_TYPE "pic16-codegen" #include "PIC16.h" #include "PIC16InstrInfo.h" -#include "PIC16TargetAsmInfo.h" +#include "PIC16MCAsmInfo.h" #include "PIC16TargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp index eb758d8543d07..47087ab3cb943 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -16,7 +16,7 @@ #include "PIC16.h" #include "PIC16RegisterInfo.h" #include "llvm/ADT/BitVector.h" - +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -51,10 +51,13 @@ bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const { return false; } -void PIC16RegisterInfo:: +unsigned PIC16RegisterInfo:: eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS) const -{ /* NOT YET IMPLEMENTED */ } + int *Value, RegScavenger *RS) const +{ + /* NOT YET IMPLEMENTED */ + return 0; +} void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const { /* NOT YET IMPLEMENTED */ } @@ -65,17 +68,17 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const int PIC16RegisterInfo:: getDwarfRegNum(unsigned RegNum, bool isEH) const { - assert(0 && "Not keeping track of debug information yet!!"); + llvm_unreachable("Not keeping track of debug information yet!!"); return -1; } unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const { - assert(0 && "PIC16 Does not have any frame register"); + llvm_unreachable("PIC16 Does not have any frame register"); return 0; } unsigned PIC16RegisterInfo::getRARegister() const { - assert(0 && "PIC16 Does not have any return address register"); + llvm_unreachable("PIC16 Does not have any return address register"); return 0; } diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h index 83689d0486b13..8aa5a10732e1d 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -48,8 +48,9 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo { virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual bool hasFP(const MachineFunction &MF) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS=NULL) const; + virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, int *Value = NULL, + RegScavenger *RS=NULL) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/PIC16/PIC16Subtarget.cpp b/lib/Target/PIC16/PIC16Subtarget.cpp index db8a5d84a4bff..33fc3fb169945 100644 --- a/lib/Target/PIC16/PIC16Subtarget.cpp +++ b/lib/Target/PIC16/PIC16Subtarget.cpp @@ -16,7 +16,7 @@ using namespace llvm; -PIC16Subtarget::PIC16Subtarget(const Module &M, const std::string &FS, +PIC16Subtarget::PIC16Subtarget(const std::string &TT, const std::string &FS, bool Cooper) :IsCooper(Cooper) { diff --git a/lib/Target/PIC16/PIC16Subtarget.h b/lib/Target/PIC16/PIC16Subtarget.h index e5147a0cf8928..81e3783d72999 100644 --- a/lib/Target/PIC16/PIC16Subtarget.h +++ b/lib/Target/PIC16/PIC16Subtarget.h @@ -19,7 +19,6 @@ #include namespace llvm { -class Module; class PIC16Subtarget : public TargetSubtarget { @@ -28,9 +27,9 @@ class PIC16Subtarget : public TargetSubtarget { public: /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - PIC16Subtarget(const Module &M, const std::string &FS, bool Cooper); + PIC16Subtarget(const std::string &TT, const std::string &FS, bool Cooper); /// isCooper - Returns true if the target ISA is Cooper. bool isCooper() const { return IsCooper; } diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp index 77ad1882ca9e7..08307e7cef3f3 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.cpp +++ b/lib/Target/PIC16/PIC16TargetMachine.cpp @@ -12,51 +12,32 @@ //===----------------------------------------------------------------------===// #include "PIC16.h" -#include "PIC16TargetAsmInfo.h" +#include "PIC16MCAsmInfo.h" #include "PIC16TargetMachine.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -/// PIC16TargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int PIC16TargetMachineModule; -int PIC16TargetMachineModule = 0; - - -// Register the targets -static RegisterTarget -X("pic16", "PIC16 14-bit [experimental]."); -static RegisterTarget -Y("cooper", "PIC16 Cooper [experimental]."); +extern "C" void LLVMInitializePIC16Target() { + // Register the target. Curretnly the codegen works for + // enhanced pic16 mid-range. + RegisterTargetMachine X(ThePIC16Target); + RegisterAsmInfo A(ThePIC16Target); +} -// Force static initialization. -extern "C" void LLVMInitializePIC16Target() { } -// PIC16TargetMachine - Traditional PIC16 Machine. -PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS, - bool Cooper) -: Subtarget(M, FS, Cooper), +// PIC16TargetMachine - Enhanced PIC16 mid-range Machine. May also represent +// a Traditional Machine if 'Trad' is true. +PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool Trad) +: LLVMTargetMachine(T, TT), + Subtarget(TT, FS, Trad), DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { } -// CooperTargetMachine - Uses the same PIC16TargetMachine, but makes IsCooper -// as true. -CooperTargetMachine::CooperTargetMachine(const Module &M, const std::string &FS) - : PIC16TargetMachine(M, FS, true) {} - - -const TargetAsmInfo *PIC16TargetMachine::createTargetAsmInfo() const { - return new PIC16TargetAsmInfo(*this); -} bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { @@ -65,15 +46,7 @@ bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM, return false; } -bool PIC16TargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out) { - // Output assembly language. - PM.add(createPIC16CodePrinterPass(Out, *this, Verbose)); - return false; -} - -bool PIC16TargetMachine::addPostRegAlloc(PassManagerBase &PM, +bool PIC16TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createPIC16MemSelOptimizerPass()); return true; // -print-machineinstr should print after this. diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h index 7f62d5c13d64b..b11fdd5dba503 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.h +++ b/lib/Target/PIC16/PIC16TargetMachine.h @@ -37,12 +37,9 @@ class PIC16TargetMachine : public LLVMTargetMachine { // any PIC16 specific FrameInfo class. TargetFrameInfo FrameInfo; -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - public: - PIC16TargetMachine(const Module &M, const std::string &FS, - bool Cooper = false); + PIC16TargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool Cooper = false); virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual const PIC16InstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -59,18 +56,9 @@ public: virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); - virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // PIC16TargetMachine. -/// CooperTargetMachine -class CooperTargetMachine : public PIC16TargetMachine { -public: - CooperTargetMachine(const Module &M, const std::string &FS); -}; // CooperTargetMachine. - } // end namespace llvm #endif diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp new file mode 100644 index 0000000000000..a2a4c09d29781 --- /dev/null +++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp @@ -0,0 +1,440 @@ +//===-- PIC16TargetObjectFile.cpp - PIC16 object files --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PIC16TargetObjectFile.h" +#include "MCSectionPIC16.h" +#include "PIC16ISelLowering.h" +#include "PIC16TargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + + +MCSectionPIC16 *MCSectionPIC16::Create(const StringRef &Name, SectionKind K, + int Address, int Color, MCContext &Ctx) { + return new (Ctx) MCSectionPIC16(Name, K, Address, Color); +} + + +void MCSectionPIC16::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS) const { + OS << getName() << '\n'; +} + + + + +PIC16TargetObjectFile::PIC16TargetObjectFile() + : ExternalVarDecls(0), ExternalVarDefs(0) { +} + +const MCSectionPIC16 *PIC16TargetObjectFile:: +getPIC16Section(const char *Name, SectionKind Kind, + int Address, int Color) const { + MCSectionPIC16 *&Entry = SectionsByName[Name]; + if (Entry) + return Entry; + + return Entry = MCSectionPIC16::Create(Name, Kind, Address, Color, + getContext()); +} + + +void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){ + TargetLoweringObjectFile::Initialize(Ctx, tm); + TM = &tm; + + BSSSection = getPIC16Section("udata.# UDATA", MCSectionPIC16::UDATA_Kind()); + ReadOnlySection = getPIC16Section("romdata.# ROMDATA", + MCSectionPIC16::ROMDATA_Kind()); + DataSection = getPIC16Section("idata.# IDATA", MCSectionPIC16::IDATA_Kind()); + + // Need because otherwise a .text symbol is emitted by DwarfWriter + // in BeginModule, and gpasm cribbs for that .text symbol. + TextSection = getPIC16Section("", SectionKind::getText()); + + ROSections.push_back(new PIC16Section((MCSectionPIC16*)ReadOnlySection)); + + // FIXME: I don't know what the classification of these sections really is. + // These aren't really objects belonging to any section. Just emit them + // in AsmPrinter and remove this code from here. + ExternalVarDecls = new PIC16Section(getPIC16Section("ExternalVarDecls", + SectionKind::getMetadata())); + ExternalVarDefs = new PIC16Section(getPIC16Section("ExternalVarDefs", + SectionKind::getMetadata())); +} + +const MCSection *PIC16TargetObjectFile:: +getSectionForFunction(const std::string &FnName) const { + std::string T = PAN::getCodeSectionName(FnName); + return getPIC16Section(T.c_str(), SectionKind::getText()); +} + + +const MCSection *PIC16TargetObjectFile:: +getSectionForFunctionFrame(const std::string &FnName) const { + std::string T = PAN::getFrameSectionName(FnName); + return getPIC16Section(T.c_str(), SectionKind::getDataRel()); +} + +const MCSection * +PIC16TargetObjectFile::getBSSSectionForGlobal(const GlobalVariable *GV) const { + assert(GV->hasInitializer() && "This global doesn't need space"); + Constant *C = GV->getInitializer(); + assert(C->isNullValue() && "Unitialized globals has non-zero initializer"); + + // Find how much space this global needs. + const TargetData *TD = TM->getTargetData(); + const Type *Ty = C->getType(); + unsigned ValSize = TD->getTypeAllocSize(Ty); + + // Go through all BSS Sections and assign this variable + // to the first available section having enough space. + PIC16Section *FoundBSS = NULL; + for (unsigned i = 0; i < BSSSections.size(); i++) { + if (DataBankSize - BSSSections[i]->Size >= ValSize) { + FoundBSS = BSSSections[i]; + break; + } + } + + // No BSS section spacious enough was found. Crate a new one. + if (!FoundBSS) { + std::string name = PAN::getUdataSectionName(BSSSections.size()); + const MCSectionPIC16 *NewSection + = getPIC16Section(name.c_str(), MCSectionPIC16::UDATA_Kind()); + + FoundBSS = new PIC16Section(NewSection); + + // Add this newly created BSS section to the list of BSSSections. + BSSSections.push_back(FoundBSS); + } + + // Insert the GV into this BSS. + FoundBSS->Items.push_back(GV); + FoundBSS->Size += ValSize; + return FoundBSS->S_; +} + +const MCSection * +PIC16TargetObjectFile::getIDATASectionForGlobal(const GlobalVariable *GV) const{ + assert(GV->hasInitializer() && "This global doesn't need space"); + Constant *C = GV->getInitializer(); + assert(!C->isNullValue() && "initialized globals has zero initializer"); + assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE && + "can split initialized RAM data only"); + + // Find how much space this global needs. + const TargetData *TD = TM->getTargetData(); + const Type *Ty = C->getType(); + unsigned ValSize = TD->getTypeAllocSize(Ty); + + // Go through all IDATA Sections and assign this variable + // to the first available section having enough space. + PIC16Section *FoundIDATA = NULL; + for (unsigned i = 0; i < IDATASections.size(); i++) { + if (DataBankSize - IDATASections[i]->Size >= ValSize) { + FoundIDATA = IDATASections[i]; + break; + } + } + + // No IDATA section spacious enough was found. Crate a new one. + if (!FoundIDATA) { + std::string name = PAN::getIdataSectionName(IDATASections.size()); + const MCSectionPIC16 *NewSection = + getPIC16Section(name.c_str(), MCSectionPIC16::IDATA_Kind()); + + FoundIDATA = new PIC16Section(NewSection); + + // Add this newly created IDATA section to the list of IDATASections. + IDATASections.push_back(FoundIDATA); + } + + // Insert the GV into this IDATA. + FoundIDATA->Items.push_back(GV); + FoundIDATA->Size += ValSize; + return FoundIDATA->S_; +} + +// Get the section for an automatic variable of a function. +// For PIC16 they are globals only with mangled names. +const MCSection * +PIC16TargetObjectFile::getSectionForAuto(const GlobalVariable *GV) const { + + const std::string name = PAN::getSectionNameForSym(GV->getName()); + + // Go through all Auto Sections and assign this variable + // to the appropriate section. + PIC16Section *FoundAutoSec = NULL; + for (unsigned i = 0; i < AutosSections.size(); i++) { + if (AutosSections[i]->S_->getName() == name) { + FoundAutoSec = AutosSections[i]; + break; + } + } + + // No Auto section was found. Crate a new one. + if (!FoundAutoSec) { + const MCSectionPIC16 *NewSection = + getPIC16Section(name.c_str(), MCSectionPIC16::UDATA_OVR_Kind()); + + FoundAutoSec = new PIC16Section(NewSection); + + // Add this newly created autos section to the list of AutosSections. + AutosSections.push_back(FoundAutoSec); + } + + // Insert the auto into this section. + FoundAutoSec->Items.push_back(GV); + + return FoundAutoSec->S_; +} + + +// Override default implementation to put the true globals into +// multiple data sections if required. +const MCSection * +PIC16TargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV1, + SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const { + // We select the section based on the initializer here, so it really + // has to be a GlobalVariable. + const GlobalVariable *GV = dyn_cast(GV1); + if (!GV) + return TargetLoweringObjectFile::SelectSectionForGlobal(GV1, Kind, Mang,TM); + + // Record External Var Decls. + if (GV->isDeclaration()) { + ExternalVarDecls->Items.push_back(GV); + return ExternalVarDecls->S_; + } + + assert(GV->hasInitializer() && "A def without initializer?"); + + // First, if this is an automatic variable for a function, get the section + // name for it and return. + std::string name = GV->getName(); + if (PAN::isLocalName(name)) + return getSectionForAuto(GV); + + // Record Exteranl Var Defs. + if (GV->hasExternalLinkage() || GV->hasCommonLinkage()) + ExternalVarDefs->Items.push_back(GV); + + // See if this is an uninitialized global. + const Constant *C = GV->getInitializer(); + if (C->isNullValue()) + return getBSSSectionForGlobal(GV); + + // If this is initialized data in RAM. Put it in the correct IDATA section. + if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) + return getIDATASectionForGlobal(GV); + + // This is initialized data in rom, put it in the readonly section. + if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) + return getROSectionForGlobal(GV); + + // Else let the default implementation take care of it. + return TargetLoweringObjectFile::SelectSectionForGlobal(GV, Kind, Mang,TM); +} + +PIC16TargetObjectFile::~PIC16TargetObjectFile() { + for (unsigned i = 0; i < BSSSections.size(); i++) + delete BSSSections[i]; + for (unsigned i = 0; i < IDATASections.size(); i++) + delete IDATASections[i]; + for (unsigned i = 0; i < AutosSections.size(); i++) + delete AutosSections[i]; + for (unsigned i = 0; i < ROSections.size(); i++) + delete ROSections[i]; + delete ExternalVarDecls; + delete ExternalVarDefs; +} + + +/// getSpecialCasedSectionGlobals - Allow the target to completely override +/// section assignment of a global. +const MCSection *PIC16TargetObjectFile:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + assert(GV->hasSection()); + + if (const GlobalVariable *GVar = cast(GV)) { + std::string SectName = GVar->getSection(); + // If address for a variable is specified, get the address and create + // section. + std::string AddrStr = "Address="; + if (SectName.compare(0, AddrStr.length(), AddrStr) == 0) { + std::string SectAddr = SectName.substr(AddrStr.length()); + return CreateSectionForGlobal(GVar, Mang, SectAddr); + } + + // Create the section specified with section attribute. + return CreateSectionForGlobal(GVar, Mang); + } + + return getPIC16Section(GV->getSection().c_str(), Kind); +} + +// Create a new section for global variable. If Addr is given then create +// section at that address else create by name. +const MCSection * +PIC16TargetObjectFile::CreateSectionForGlobal(const GlobalVariable *GV, + Mangler *Mang, + const std::string &Addr) const { + // See if this is an uninitialized global. + const Constant *C = GV->getInitializer(); + if (C->isNullValue()) + return CreateBSSSectionForGlobal(GV, Addr); + + // If this is initialized data in RAM. Put it in the correct IDATA section. + if (GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) + return CreateIDATASectionForGlobal(GV, Addr); + + // This is initialized data in rom, put it in the readonly section. + if (GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE) + return CreateROSectionForGlobal(GV, Addr); + + // Else let the default implementation take care of it. + return TargetLoweringObjectFile::SectionForGlobal(GV, Mang, *TM); +} + +// Create uninitialized section for a variable. +const MCSection * +PIC16TargetObjectFile::CreateBSSSectionForGlobal(const GlobalVariable *GV, + std::string Addr) const { + assert(GV->hasInitializer() && "This global doesn't need space"); + assert(GV->getInitializer()->isNullValue() && + "Unitialized global has non-zero initializer"); + std::string Name; + // If address is given then create a section at that address else create a + // section by section name specified in GV. + PIC16Section *FoundBSS = NULL; + if (Addr.empty()) { + Name = GV->getSection() + " UDATA"; + for (unsigned i = 0; i < BSSSections.size(); i++) { + if (BSSSections[i]->S_->getName() == Name) { + FoundBSS = BSSSections[i]; + break; + } + } + } else { + std::string Prefix = GV->getNameStr() + "." + Addr + "."; + Name = PAN::getUdataSectionName(BSSSections.size(), Prefix) + " " + Addr; + } + + PIC16Section *NewBSS = FoundBSS; + if (NewBSS == NULL) { + const MCSectionPIC16 *NewSection = + getPIC16Section(Name.c_str(), MCSectionPIC16::UDATA_Kind()); + NewBSS = new PIC16Section(NewSection); + BSSSections.push_back(NewBSS); + } + + // Insert the GV into this BSS. + NewBSS->Items.push_back(GV); + + // We do not want to put any GV without explicit section into this section + // so set its size to DatabankSize. + NewBSS->Size = DataBankSize; + return NewBSS->S_; +} + +// Get rom section for a variable. Currently there can be only one rom section +// unless a variable explicitly requests a section. +const MCSection * +PIC16TargetObjectFile::getROSectionForGlobal(const GlobalVariable *GV) const { + ROSections[0]->Items.push_back(GV); + return ROSections[0]->S_; +} + +// Create initialized data section for a variable. +const MCSection * +PIC16TargetObjectFile::CreateIDATASectionForGlobal(const GlobalVariable *GV, + std::string Addr) const { + assert(GV->hasInitializer() && "This global doesn't need space"); + assert(!GV->getInitializer()->isNullValue() && + "initialized global has zero initializer"); + assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE && + "can be used for initialized RAM data only"); + + std::string Name; + // If address is given then create a section at that address else create a + // section by section name specified in GV. + PIC16Section *FoundIDATASec = NULL; + if (Addr.empty()) { + Name = GV->getSection() + " IDATA"; + for (unsigned i = 0; i < IDATASections.size(); i++) { + if (IDATASections[i]->S_->getName() == Name) { + FoundIDATASec = IDATASections[i]; + break; + } + } + } else { + std::string Prefix = GV->getNameStr() + "." + Addr + "."; + Name = PAN::getIdataSectionName(IDATASections.size(), Prefix) + " " + Addr; + } + + PIC16Section *NewIDATASec = FoundIDATASec; + if (NewIDATASec == NULL) { + const MCSectionPIC16 *NewSection = + getPIC16Section(Name.c_str(), MCSectionPIC16::IDATA_Kind()); + NewIDATASec = new PIC16Section(NewSection); + IDATASections.push_back(NewIDATASec); + } + // Insert the GV into this IDATA Section. + NewIDATASec->Items.push_back(GV); + // We do not want to put any GV without explicit section into this section + // so set its size to DatabankSize. + NewIDATASec->Size = DataBankSize; + return NewIDATASec->S_; +} + +// Create a section in rom for a variable. +const MCSection * +PIC16TargetObjectFile::CreateROSectionForGlobal(const GlobalVariable *GV, + std::string Addr) const { + assert(GV->getType()->getAddressSpace() == PIC16ISD::ROM_SPACE && + "can be used for ROM data only"); + + std::string Name; + // If address is given then create a section at that address else create a + // section by section name specified in GV. + PIC16Section *FoundROSec = NULL; + if (Addr.empty()) { + Name = GV->getSection() + " ROMDATA"; + for (unsigned i = 1; i < ROSections.size(); i++) { + if (ROSections[i]->S_->getName() == Name) { + FoundROSec = ROSections[i]; + break; + } + } + } else { + std::string Prefix = GV->getNameStr() + "." + Addr + "."; + Name = PAN::getRomdataSectionName(ROSections.size(), Prefix) + " " + Addr; + } + + PIC16Section *NewRomSec = FoundROSec; + if (NewRomSec == NULL) { + const MCSectionPIC16 *NewSection = + getPIC16Section(Name.c_str(), MCSectionPIC16::ROMDATA_Kind()); + NewRomSec = new PIC16Section(NewSection); + ROSections.push_back(NewRomSec); + } + + // Insert the GV into this ROM Section. + NewRomSec->Items.push_back(GV); + return NewRomSec->S_; +} + diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h new file mode 100644 index 0000000000000..75f6cced0ab82 --- /dev/null +++ b/lib/Target/PIC16/PIC16TargetObjectFile.h @@ -0,0 +1,120 @@ +//===-- PIC16TargetObjectFile.h - PIC16 Object Info -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_PIC16_TARGETOBJECTFILE_H +#define LLVM_TARGET_PIC16_TARGETOBJECTFILE_H + +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/ADT/StringMap.h" +#include +#include + +namespace llvm { + class GlobalVariable; + class Module; + class PIC16TargetMachine; + class MCSectionPIC16; + + enum { DataBankSize = 80 }; + + /// PIC16 Splits the global data into mulitple udata and idata sections. + /// Each udata and idata section needs to contain a list of globals that + /// they contain, in order to avoid scanning over all the global values + /// again and printing only those that match the current section. + /// Keeping values inside the sections make printing a section much easier. + /// + /// FIXME: MOVE ALL THIS STUFF TO MCSectionPIC16. + /// + struct PIC16Section { + const MCSectionPIC16 *S_; // Connection to actual Section. + unsigned Size; // Total size of the objects contained. + bool SectionPrinted; + std::vector Items; + + PIC16Section(const MCSectionPIC16 *s) { + S_ = s; + Size = 0; + SectionPrinted = false; + } + bool isPrinted() const { return SectionPrinted; } + void setPrintedStatus(bool status) { SectionPrinted = status; } + }; + + class PIC16TargetObjectFile : public TargetLoweringObjectFile { + /// SectionsByName - Bindings of names to allocated sections. + mutable StringMap SectionsByName; + + const TargetMachine *TM; + + const MCSectionPIC16 *getPIC16Section(const char *Name, + SectionKind K, + int Address = -1, + int Color = -1) const; + public: + mutable std::vector BSSSections; + mutable std::vector IDATASections; + mutable std::vector AutosSections; + mutable std::vector ROSections; + mutable PIC16Section *ExternalVarDecls; + mutable PIC16Section *ExternalVarDefs; + + PIC16TargetObjectFile(); + ~PIC16TargetObjectFile(); + + void Initialize(MCContext &Ctx, const TargetMachine &TM); + + + virtual const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const; + + virtual const MCSection *SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler *Mang, + const TargetMachine&) const; + + const MCSection *getSectionForFunction(const std::string &FnName) const; + const MCSection *getSectionForFunctionFrame(const std::string &FnName)const; + + + private: + std::string getSectionNameForSym(const std::string &Sym) const; + + const MCSection *getBSSSectionForGlobal(const GlobalVariable *GV) const; + const MCSection *getIDATASectionForGlobal(const GlobalVariable *GV) const; + const MCSection *getSectionForAuto(const GlobalVariable *GV) const; + const MCSection *CreateBSSSectionForGlobal(const GlobalVariable *GV, + std::string Addr = "") const; + const MCSection *CreateIDATASectionForGlobal(const GlobalVariable *GV, + std::string Addr = "") const; + const MCSection *getROSectionForGlobal(const GlobalVariable *GV) const; + const MCSection *CreateROSectionForGlobal(const GlobalVariable *GV, + std::string Addr = "") const; + const MCSection *CreateSectionForGlobal(const GlobalVariable *GV, + Mangler *Mang, + const std::string &Addr = "") const; + public: + void SetSectionForGVs(Module &M); + const std::vector &getBSSSections() const { + return BSSSections; + } + const std::vector &getIDATASections() const { + return IDATASections; + } + const std::vector &getAutosSections() const { + return AutosSections; + } + const std::vector &getROSections() const { + return ROSections; + } + + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/PIC16/TargetInfo/CMakeLists.txt b/lib/Target/PIC16/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..bfc6ff4e8e2e9 --- /dev/null +++ b/lib/Target/PIC16/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMPIC16Info + PIC16TargetInfo.cpp + ) + +add_dependencies(LLVMPIC16Info PIC16Table_gen) diff --git a/lib/Target/PIC16/TargetInfo/Makefile b/lib/Target/PIC16/TargetInfo/Makefile new file mode 100644 index 0000000000000..76609f66d6521 --- /dev/null +++ b/lib/Target/PIC16/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/PIC16/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMPIC16Info + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp new file mode 100644 index 0000000000000..46cc81967ebdd --- /dev/null +++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp @@ -0,0 +1,21 @@ +//===-- PIC16TargetInfo.cpp - PIC16 Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PIC16.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::ThePIC16Target, llvm::TheCooperTarget; + +extern "C" void LLVMInitializePIC16TargetInfo() { + RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]"); + + RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]"); +} diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index 7f1673cf462e2..a0fba86fa6b23 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -24,7 +24,6 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/MDNode.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" @@ -32,16 +31,22 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" @@ -52,13 +57,40 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed"); namespace { class VISIBILITY_HIDDEN PPCAsmPrinter : public AsmPrinter { protected: - StringSet<> FnStubs, GVStubs, HiddenGVStubs; + struct FnStubInfo { + std::string Stub, LazyPtr, AnonSymbol; + + FnStubInfo() {} + + void Init(const GlobalValue *GV, Mangler *Mang) { + // Already initialized. + if (!Stub.empty()) return; + Stub = Mang->getMangledName(GV, "$stub", true); + LazyPtr = Mang->getMangledName(GV, "$lazy_ptr", true); + AnonSymbol = Mang->getMangledName(GV, "$stub$tmp", true); + } + + void Init(const std::string &GV, Mangler *Mang) { + // Already initialized. + if (!Stub.empty()) return; + Stub = Mang->makeNameProper(GV + "$stub", + Mangler::Private); + LazyPtr = Mang->makeNameProper(GV + "$lazy_ptr", + Mangler::Private); + AnonSymbol = Mang->makeNameProper(GV + "$stub$tmp", + Mangler::Private); + } + }; + + StringMap FnStubs; + StringMap GVStubs, HiddenGVStubs, TOC; const PPCSubtarget &Subtarget; + uint64_t LabelID; public: - explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, bool V) + explicit PPCAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) : AsmPrinter(O, TM, T, V), - Subtarget(TM.getSubtarget()) {} + Subtarget(TM.getSubtarget()), LabelID(0) {} virtual const char *getPassName() const { return "PowerPC Assembly Printer"; @@ -70,7 +102,7 @@ namespace { unsigned enumRegToMachineReg(unsigned enumReg) { switch (enumReg) { - default: assert(0 && "Unhandled register!"); break; + default: llvm_unreachable("Unhandled register!"); case PPC::CR0: return 0; case PPC::CR1: return 1; case PPC::CR2: return 2; @@ -80,14 +112,16 @@ namespace { case PPC::CR6: return 6; case PPC::CR7: return 7; } - abort(); + llvm_unreachable(0); } /// printInstruction - This method is automatically generated by tablegen /// from the instruction set description. This method returns true if the /// machine instruction was sufficiently described to print it, otherwise it /// returns false. - bool printInstruction(const MachineInstr *MI); + void printInstruction(const MachineInstr *MI); + static const char *getRegisterName(unsigned RegNo); + void printMachineInstruction(const MachineInstr *MI); void printOp(const MachineOperand &MO); @@ -117,7 +151,7 @@ namespace { return; } - const char *RegName = TM.getRegisterInfo()->get(RegNo).AsmName; + const char *RegName = getRegisterName(RegNo); // Linux assembler (Others?) does not take register mnemonics. // FIXME - What about special registers used in mfspr/mtspr? if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName); @@ -190,16 +224,16 @@ namespace { GlobalValue *GV = MO.getGlobal(); if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. - std::string Name = Mang->getValueName(GV); - FnStubs.insert(Name); - printSuffixedName(Name, "$stub"); + FnStubInfo &FnInfo = FnStubs[Mang->getMangledName(GV)]; + FnInfo.Init(GV, Mang); + O << FnInfo.Stub; return; } } if (MO.getType() == MachineOperand::MO_ExternalSymbol) { - std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); - FnStubs.insert(Name); - printSuffixedName(Name, "$stub"); + FnStubInfo &FnInfo =FnStubs[Mang->makeNameProper(MO.getSymbolName())]; + FnInfo.Init(MO.getSymbolName(), Mang); + O << FnInfo.Stub; return; } } @@ -281,20 +315,39 @@ namespace { printOperand(MI, OpNo+1); } + void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + + assert(MO.getType() == MachineOperand::MO_GlobalAddress); + + GlobalValue *GV = MO.getGlobal(); + + std::string Name = Mang->getMangledName(GV); + + // Map symbol -> label of TOC entry. + if (TOC.count(Name) == 0) { + std::string Label; + Label += MAI->getPrivateGlobalPrefix(); + Label += "C"; + Label += utostr(LabelID++); + + TOC[Name] = Label; + } + + O << TOC[Name] << "@toc"; + } + void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, const char *Modifier); virtual bool runOnMachineFunction(MachineFunction &F) = 0; - virtual bool doFinalization(Module &M) = 0; - - virtual void EmitExternalGlobal(const GlobalVariable *GV); }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter { public: - explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM, - const TargetAsmInfo *T, bool V) + explicit PPCLinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) : PPCAsmPrinter(O, TM, T, V){} virtual const char *getPassName() const { @@ -311,16 +364,16 @@ namespace { PPCAsmPrinter::getAnalysisUsage(AU); } - void printModuleLevelGV(const GlobalVariable* GVar); + void PrintGlobalVariable(const GlobalVariable *GVar); }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac /// OS X class VISIBILITY_HIDDEN PPCDarwinAsmPrinter : public PPCAsmPrinter { - raw_ostream &OS; + formatted_raw_ostream &OS; public: - explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM, - const TargetAsmInfo *T, bool V) + explicit PPCDarwinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) : PPCAsmPrinter(O, TM, T, V), OS(O) {} virtual const char *getPassName() const { @@ -328,8 +381,8 @@ namespace { } bool runOnMachineFunction(MachineFunction &F); - bool doInitialization(Module &M); bool doFinalization(Module &M); + void EmitStartOfAsmFile(Module &M); void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -338,7 +391,7 @@ namespace { PPCAsmPrinter::getAnalysisUsage(AU); } - void printModuleLevelGV(const GlobalVariable* GVar); + void PrintGlobalVariable(const GlobalVariable *GVar); }; } // end of anonymous namespace @@ -348,54 +401,52 @@ namespace { void PPCAsmPrinter::printOp(const MachineOperand &MO) { switch (MO.getType()) { case MachineOperand::MO_Immediate: - cerr << "printOp() does not handle immediate values\n"; - abort(); - return; + llvm_unreachable("printOp() does not handle immediate values"); case MachineOperand::MO_MachineBasicBlock: - printBasicBlockLabel(MO.getMBB()); + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); return; case MachineOperand::MO_JumpTableIndex: - O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); // FIXME: PIC relocation model return; case MachineOperand::MO_ConstantPoolIndex: - O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' << MO.getIndex(); return; - case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_ExternalSymbol: { // Computing the address of an external symbol, not calling it. + std::string Name(MAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + if (TM.getRelocationModel() != Reloc::Static) { - std::string Name(TAI->getGlobalPrefix()); Name += MO.getSymbolName(); - GVStubs.insert(Name); - printSuffixedName(Name, "$non_lazy_ptr"); - return; + GVStubs[Name] = Name+"$non_lazy_ptr"; + Name += "$non_lazy_ptr"; } - O << TAI->getGlobalPrefix() << MO.getSymbolName(); + O << Name; return; + } case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. GlobalValue *GV = MO.getGlobal(); - std::string Name = Mang->getValueName(GV); + std::string Name; // External or weakly linked global variables need non-lazily-resolved stubs - if (TM.getRelocationModel() != Reloc::Static) { - if (GV->isDeclaration() || GV->isWeakForLinker()) { - if (GV->hasHiddenVisibility()) { - if (GV->isDeclaration() || GV->hasCommonLinkage() || - GV->hasAvailableExternallyLinkage()) { - HiddenGVStubs.insert(Name); - printSuffixedName(Name, "$non_lazy_ptr"); - } else { - O << Name; - } - } else { - GVStubs.insert(Name); - printSuffixedName(Name, "$non_lazy_ptr"); - } - return; + if (TM.getRelocationModel() != Reloc::Static && + (GV->isDeclaration() || GV->isWeakForLinker())) { + if (!GV->hasHiddenVisibility()) { + Name = Mang->getMangledName(GV, "$non_lazy_ptr", true); + GVStubs[Mang->getMangledName(GV)] = Name; + } else if (GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) { + Name = Mang->getMangledName(GV, "$non_lazy_ptr", true); + HiddenGVStubs[Mang->getMangledName(GV)] = Name; + } else { + Name = Mang->getMangledName(GV); } + } else { + Name = Mang->getMangledName(GV); } O << Name; @@ -409,22 +460,6 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) { } } -/// EmitExternalGlobal - In this case we need to use the indirect symbol. -/// -void PPCAsmPrinter::EmitExternalGlobal(const GlobalVariable *GV) { - std::string Name; - getGlobalLinkName(GV, Name); - if (TM.getRelocationModel() != Reloc::Static) { - if (GV->hasHiddenVisibility()) - HiddenGVStubs.insert(Name); - else - GVStubs.insert(Name); - printSuffixedName(Name, "$non_lazy_ptr"); - return; - } - O << Name; -} - /// PrintAsmOperand - Print out an operand for an inline asm expression. /// bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -461,15 +496,19 @@ bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } +// At the moment, all inline asm memory operands are a single register. +// In any case, the output of this routine should always be just one +// assembler operand. + bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode) { if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. - if (MI->getOperand(OpNo).isReg()) - printMemRegReg(MI, OpNo); - else - printMemRegImm(MI, OpNo); + assert (MI->getOperand(OpNo).isReg()); + O << "0("; + printOperand(MI, OpNo); + O << ")"; return false; } @@ -505,6 +544,8 @@ void PPCAsmPrinter::printPredicateOperand(const MachineInstr *MI, unsigned OpNo, /// void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; + + processDebugLoc(MI, true); // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { @@ -549,12 +590,13 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { } } - if (printInstruction(MI)) - return; // Printer was automatically generated + printInstruction(MI); + + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + O << '\n'; - assert(0 && "Unhandled instruction in asm writer!"); - abort(); - return; + processDebugLoc(MI, false); } /// runOnMachineFunction - This uses the printMachineInstruction() @@ -571,10 +613,10 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out labels for the function. const Function *F = MF.getFunction(); - SwitchToSection(TAI->SectionForGlobal(F)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); switch (F->getLinkage()) { - default: assert(0 && "Unknown linkage type!"); + default: llvm_unreachable("Unknown linkage type!"); case Function::PrivateLinkage: case Function::InternalLinkage: // Symbols default to internal. break; @@ -582,6 +624,7 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { O << "\t.global\t" << CurrentFnName << '\n' << "\t.type\t" << CurrentFnName << ", @function\n"; break; + case Function::LinkerPrivateLinkage: case Function::WeakAnyLinkage: case Function::WeakODRLinkage: case Function::LinkOnceAnyLinkage: @@ -594,7 +637,19 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { printVisibility(CurrentFnName, F->getVisibility()); EmitAlignment(MF.getAlignment(), F); - O << CurrentFnName << ":\n"; + + if (Subtarget.isPPC64()) { + // Emit an official procedure descriptor. + // FIXME 64-bit SVR4: Use MCSection here? + O << "\t.section\t\".opd\",\"aw\"\n"; + O << "\t.align 3\n"; + O << CurrentFnName << ":\n"; + O << "\t.quad .L." << CurrentFnName << ",.TOC.@tocbase\n"; + O << "\t.previous\n"; + O << ".L." << CurrentFnName << ":\n"; + } else { + O << CurrentFnName << ":\n"; + } // Emit pre-function debug information. DW->BeginFunction(&MF); @@ -604,8 +659,7 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { I != E; ++I) { // Print a label for the basic block. if (I != MF.begin()) { - printBasicBlockLabel(I, true, true); - O << '\n'; + EmitBasicBlockStart(I); } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { @@ -619,27 +673,16 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out jump tables referenced by the function. EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - SwitchToSection(TAI->SectionForGlobal(F)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); // Emit post-function debug information. DW->EndFunction(&MF); - O.flush(); - // We didn't modify anything. return false; } -/// PrintUnmangledNameSafely - Print out the printable characters in the name. -/// Don't print things like \\n or \\0. -static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) { - for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen(); - Name != E; ++Name) - if (isprint(*Name)) - OS << *Name; -} - -void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { +void PPCLinuxAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) { const TargetData *TD = TM.getTargetData(); if (!GVar->hasInitializer()) @@ -649,18 +692,17 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { if (EmitSpecialLLVMGlobal(GVar)) return; - std::string name = Mang->getValueName(GVar); + std::string name = Mang->getMangledName(GVar); printVisibility(name, GVar->getVisibility()); Constant *C = GVar->getInitializer(); - if (isa(C) || isa(C)) - return; const Type *Type = C->getType(); unsigned Size = TD->getTypeAllocSize(Type); unsigned Align = TD->getPreferredAlignmentLog(GVar); - SwitchToSection(TAI->SectionForGlobal(GVar)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang, + TM)); if (C->isNullValue() && /* FIXME: Verify correct */ !GVar->hasSection() && @@ -674,13 +716,13 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { O << name << ":\n"; O << "\t.zero " << Size << '\n'; } else if (GVar->hasLocalLinkage()) { - O << TAI->getLCOMMDirective() << name << ',' << Size; + O << MAI->getLCOMMDirective() << name << ',' << Size; } else { O << ".comm " << name << ',' << Size; } if (VerboseAsm) { - O << "\t\t" << TAI->getCommentString() << " '"; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t" << MAI->getCommentString() << " '"; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); O << "'"; } O << '\n'; @@ -693,6 +735,7 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::CommonLinkage: + case GlobalValue::LinkerPrivateLinkage: O << "\t.global " << name << '\n' << "\t.type " << name << ", @object\n" << "\t.weak " << name << '\n'; @@ -709,15 +752,14 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { case GlobalValue::PrivateLinkage: break; default: - cerr << "Unknown linkage type!"; - abort(); + llvm_unreachable("Unknown linkage type!"); } EmitAlignment(Align, GVar); O << name << ":"; if (VerboseAsm) { - O << "\t\t\t\t" << TAI->getCommentString() << " '"; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t\t\t" << MAI->getCommentString() << " '"; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); O << "'"; } O << '\n'; @@ -727,10 +769,20 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { } bool PPCLinuxAsmPrinter::doFinalization(Module &M) { - // Print out module-level global variables here. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I); + const TargetData *TD = TM.getTargetData(); + + bool isPPC64 = TD->getPointerSizeInBits() == 64; + + if (isPPC64 && !TOC.empty()) { + // FIXME 64-bit SVR4: Use MCSection here? + O << "\t.section\t\".toc\",\"aw\"\n"; + + for (StringMap::iterator I = TOC.begin(), E = TOC.end(); + I != E; ++I) { + O << I->second << ":\n"; + O << "\t.tc " << I->getKeyData() << "[TC]," << I->getKeyData() << '\n'; + } + } return AsmPrinter::doFinalization(M); } @@ -749,10 +801,10 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out labels for the function. const Function *F = MF.getFunction(); - SwitchToSection(TAI->SectionForGlobal(F)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); switch (F->getLinkage()) { - default: assert(0 && "Unknown linkage type!"); + default: llvm_unreachable("Unknown linkage type!"); case Function::PrivateLinkage: case Function::InternalLinkage: // Symbols default to internal. break; @@ -763,6 +815,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { case Function::WeakODRLinkage: case Function::LinkOnceAnyLinkage: case Function::LinkOnceODRLinkage: + case Function::LinkerPrivateLinkage: O << "\t.globl\t" << CurrentFnName << '\n'; O << "\t.weak_definition\t" << CurrentFnName << '\n'; break; @@ -789,8 +842,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { I != E; ++I) { // Print a label for the basic block. if (I != MF.begin()) { - printBasicBlockLabel(I, true, true, VerboseAsm); - O << '\n'; + EmitBasicBlockStart(I); } for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { @@ -810,7 +862,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } -bool PPCDarwinAsmPrinter::doInitialization(Module &M) { +void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { static const char *const CPUDirectives[] = { "", "ppc", @@ -833,26 +885,28 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) { assert(Directive <= PPC::DIR_64 && "Directive out of range."); O << "\t.machine " << CPUDirectives[Directive] << '\n'; - bool Result = AsmPrinter::doInitialization(M); - assert(MMI); - // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. - SwitchToTextSection("\t.section __TEXT,__textcoal_nt,coalesced," - "pure_instructions"); + TargetLoweringObjectFileMachO &TLOFMacho = + static_cast(getObjFileLowering()); + OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); if (TM.getRelocationModel() == Reloc::PIC_) { - SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs," - "pure_instructions,32"); + OutStreamer.SwitchSection( + TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 32, SectionKind::getText())); } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { - SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs," - "pure_instructions,16"); + OutStreamer.SwitchSection( + TLOFMacho.getMachOSection("__TEXT","__symbol_stub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 16, SectionKind::getText())); } - SwitchToSection(TAI->getTextSection()); - - return Result; + OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); } -void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { +void PPCDarwinAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) { const TargetData *TD = TM.getTargetData(); if (!GVar->hasInitializer()) @@ -869,8 +923,7 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { return; } - std::string name = Mang->getValueName(GVar); - + std::string name = Mang->getMangledName(GVar); printVisibility(name, GVar->getVisibility()); Constant *C = GVar->getInitializer(); @@ -878,13 +931,17 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { unsigned Size = TD->getTypeAllocSize(Type); unsigned Align = TD->getPreferredAlignmentLog(GVar); - SwitchToSection(TAI->SectionForGlobal(GVar)); + const MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GVar, Mang, TM); + OutStreamer.SwitchSection(TheSection); + /// FIXME: Drive this off the section! if (C->isNullValue() && /* FIXME: Verify correct */ !GVar->hasSection() && (GVar->hasLocalLinkage() || GVar->hasExternalLinkage() || GVar->isWeakForLinker()) && - TAI->SectionKindForGlobal(GVar) != SectionKind::RODataMergeStr) { + // Don't put things that should go in the cstring section into "comm". + !TheSection->getKind().isMergeableCString()) { if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. if (GVar->hasExternalLinkage()) { @@ -892,15 +949,15 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { O << "\t.zerofill __DATA, __common, " << name << ", " << Size << ", " << Align; } else if (GVar->hasLocalLinkage()) { - O << TAI->getLCOMMDirective() << name << ',' << Size << ',' << Align; + O << MAI->getLCOMMDirective() << name << ',' << Size << ',' << Align; } else if (!GVar->hasCommonLinkage()) { O << "\t.globl " << name << '\n' - << TAI->getWeakDefDirective() << name << '\n'; + << MAI->getWeakDefDirective() << name << '\n'; EmitAlignment(Align, GVar); O << name << ":"; if (VerboseAsm) { - O << "\t\t\t\t" << TAI->getCommentString() << " "; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t\t\t" << MAI->getCommentString() << " "; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); } O << '\n'; EmitGlobalConstant(C); @@ -912,8 +969,8 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { O << ',' << Align; } if (VerboseAsm) { - O << "\t\t" << TAI->getCommentString() << " '"; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t" << MAI->getCommentString() << " '"; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); O << "'"; } O << '\n'; @@ -926,6 +983,7 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::CommonLinkage: + case GlobalValue::LinkerPrivateLinkage: O << "\t.globl " << name << '\n' << "\t.weak_definition " << name << '\n'; break; @@ -940,15 +998,14 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { case GlobalValue::PrivateLinkage: break; default: - cerr << "Unknown linkage type!"; - abort(); + llvm_unreachable("Unknown linkage type!"); } EmitAlignment(Align, GVar); O << name << ":"; if (VerboseAsm) { - O << "\t\t\t\t" << TAI->getCommentString() << " '"; - PrintUnmangledNameSafely(GVar, O); + O << "\t\t\t\t" << MAI->getCommentString() << " '"; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); O << "'"; } O << '\n'; @@ -960,141 +1017,110 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { bool PPCDarwinAsmPrinter::doFinalization(Module &M) { const TargetData *TD = TM.getTargetData(); - // Print out module-level global variables here. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I); - bool isPPC64 = TD->getPointerSizeInBits() == 64; + // Darwin/PPC always uses mach-o. + TargetLoweringObjectFileMachO &TLOFMacho = + static_cast(getObjFileLowering()); + + + const MCSection *LSPSection = 0; + if (!FnStubs.empty()) // .lazy_symbol_pointer + LSPSection = TLOFMacho.getLazySymbolPointerSection(); + + // Output stubs for dynamically-linked functions - if (TM.getRelocationModel() == Reloc::PIC_) { - for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); - i != e; ++i) { - SwitchToTextSection("\t.section __TEXT,__picsymbolstub1,symbol_stubs," - "pure_instructions,32"); + if (TM.getRelocationModel() == Reloc::PIC_ && !FnStubs.empty()) { + const MCSection *StubSection = + TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 32, SectionKind::getText()); + for (StringMap::iterator I = FnStubs.begin(), E = FnStubs.end(); + I != E; ++I) { + OutStreamer.SwitchSection(StubSection); EmitAlignment(4); - const char *p = i->getKeyData(); - bool hasQuote = p[0]=='\"'; - printSuffixedName(p, "$stub"); - O << ":\n"; - O << "\t.indirect_symbol " << p << '\n'; + const FnStubInfo &Info = I->second; + O << Info.Stub << ":\n"; + O << "\t.indirect_symbol " << I->getKeyData() << '\n'; O << "\tmflr r0\n"; - O << "\tbcl 20,31,"; - if (hasQuote) - O << "\"L0$" << &p[1]; - else - O << "L0$" << p; - O << '\n'; - if (hasQuote) - O << "\"L0$" << &p[1]; - else - O << "L0$" << p; - O << ":\n"; + O << "\tbcl 20,31," << Info.AnonSymbol << '\n'; + O << Info.AnonSymbol << ":\n"; O << "\tmflr r11\n"; - O << "\taddis r11,r11,ha16("; - printSuffixedName(p, "$lazy_ptr"); - O << "-"; - if (hasQuote) - O << "\"L0$" << &p[1]; - else - O << "L0$" << p; + O << "\taddis r11,r11,ha16(" << Info.LazyPtr << "-" << Info.AnonSymbol; O << ")\n"; O << "\tmtlr r0\n"; - if (isPPC64) - O << "\tldu r12,lo16("; - else - O << "\tlwzu r12,lo16("; - printSuffixedName(p, "$lazy_ptr"); - O << "-"; - if (hasQuote) - O << "\"L0$" << &p[1]; - else - O << "L0$" << p; - O << ")(r11)\n"; + O << (isPPC64 ? "\tldu" : "\tlwzu") << " r12,lo16("; + O << Info.LazyPtr << "-" << Info.AnonSymbol << ")(r11)\n"; O << "\tmtctr r12\n"; O << "\tbctr\n"; - SwitchToDataSection(".lazy_symbol_pointer"); - printSuffixedName(p, "$lazy_ptr"); - O << ":\n"; - O << "\t.indirect_symbol " << p << '\n'; - if (isPPC64) - O << "\t.quad dyld_stub_binding_helper\n"; - else - O << "\t.long dyld_stub_binding_helper\n"; + + OutStreamer.SwitchSection(LSPSection); + O << Info.LazyPtr << ":\n"; + O << "\t.indirect_symbol " << I->getKeyData() << '\n'; + O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n"; } - } else { - for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end(); - i != e; ++i) { - SwitchToTextSection("\t.section __TEXT,__symbol_stub1,symbol_stubs," - "pure_instructions,16"); + } else if (!FnStubs.empty()) { + const MCSection *StubSection = + TLOFMacho.getMachOSection("__TEXT","__symbol_stub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 16, SectionKind::getText()); + + for (StringMap::iterator I = FnStubs.begin(), E = FnStubs.end(); + I != E; ++I) { + OutStreamer.SwitchSection(StubSection); EmitAlignment(4); - const char *p = i->getKeyData(); - printSuffixedName(p, "$stub"); - O << ":\n"; - O << "\t.indirect_symbol " << p << '\n'; - O << "\tlis r11,ha16("; - printSuffixedName(p, "$lazy_ptr"); - O << ")\n"; - if (isPPC64) - O << "\tldu r12,lo16("; - else - O << "\tlwzu r12,lo16("; - printSuffixedName(p, "$lazy_ptr"); - O << ")(r11)\n"; + const FnStubInfo &Info = I->second; + O << Info.Stub << ":\n"; + O << "\t.indirect_symbol " << I->getKeyData() << '\n'; + O << "\tlis r11,ha16(" << Info.LazyPtr << ")\n"; + O << (isPPC64 ? "\tldu" : "\tlwzu") << " r12,lo16("; + O << Info.LazyPtr << ")(r11)\n"; O << "\tmtctr r12\n"; O << "\tbctr\n"; - SwitchToDataSection(".lazy_symbol_pointer"); - printSuffixedName(p, "$lazy_ptr"); - O << ":\n"; - O << "\t.indirect_symbol " << p << '\n'; - if (isPPC64) - O << "\t.quad dyld_stub_binding_helper\n"; - else - O << "\t.long dyld_stub_binding_helper\n"; + OutStreamer.SwitchSection(LSPSection); + O << Info.LazyPtr << ":\n"; + O << "\t.indirect_symbol " << I->getKeyData() << '\n'; + O << (isPPC64 ? "\t.quad" : "\t.long") << " dyld_stub_binding_helper\n"; } } O << '\n'; - if (TAI->doesSupportExceptionHandling() && MMI) { + if (MAI->doesSupportExceptionHandling() && MMI) { // Add the (possibly multiple) personalities to the set of global values. // Only referenced functions get into the Personalities list. const std::vector &Personalities = MMI->getPersonalities(); for (std::vector::const_iterator I = Personalities.begin(), - E = Personalities.end(); I != E; ++I) - if (*I) GVStubs.insert("_" + (*I)->getName()); + E = Personalities.end(); I != E; ++I) { + if (*I) + GVStubs[Mang->getMangledName(*I)] = + Mang->getMangledName(*I, "$non_lazy_ptr", true); + } } - // Output stubs for external and common global variables. + // Output macho stubs for external and common global variables. if (!GVStubs.empty()) { - SwitchToDataSection(".non_lazy_symbol_pointer"); - for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end(); - i != e; ++i) { - std::string p = i->getKeyData(); - printSuffixedName(p, "$non_lazy_ptr"); - O << ":\n"; - O << "\t.indirect_symbol " << p << '\n'; - if (isPPC64) - O << "\t.quad\t0\n"; - else - O << "\t.long\t0\n"; + // Switch with ".non_lazy_symbol_pointer" directive. + OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); + EmitAlignment(isPPC64 ? 3 : 2); + + for (StringMap::iterator I = GVStubs.begin(), + E = GVStubs.end(); I != E; ++I) { + O << I->second << ":\n"; + O << "\t.indirect_symbol " << I->getKeyData() << '\n'; + O << (isPPC64 ? "\t.quad\t0\n" : "\t.long\t0\n"); } } if (!HiddenGVStubs.empty()) { - SwitchToSection(TAI->getDataSection()); - for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end(); - i != e; ++i) { - std::string p = i->getKeyData(); - EmitAlignment(isPPC64 ? 3 : 2); - printSuffixedName(p, "$non_lazy_ptr"); - O << ":\n"; - if (isPPC64) - O << "\t.quad\t"; - else - O << "\t.long\t"; - O << p << '\n'; + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + EmitAlignment(isPPC64 ? 3 : 2); + for (StringMap::iterator I = HiddenGVStubs.begin(), + E = HiddenGVStubs.end(); I != E; ++I) { + O << I->second << ":\n"; + O << (isPPC64 ? "\t.quad\t" : "\t.long\t") << I->getKeyData() << '\n'; } } @@ -1114,28 +1140,19 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { /// for a MachineFunction to the given output stream, in a format that the /// Darwin assembler can deal with. /// -FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o, - PPCTargetMachine &tm, - bool verbose) { +static AsmPrinter *createPPCAsmPrinterPass(formatted_raw_ostream &o, + TargetMachine &tm, + const MCAsmInfo *tai, + bool verbose) { const PPCSubtarget *Subtarget = &tm.getSubtarget(); - if (Subtarget->isDarwin()) { - return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); - } else { - return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); - } -} - -namespace { - static struct Register { - Register() { - PPCTargetMachine::registerAsmPrinter(createPPCAsmPrinterPass); - } - } Registrator; + if (Subtarget->isDarwin()) + return new PPCDarwinAsmPrinter(o, tm, tai, verbose); + return new PPCLinuxAsmPrinter(o, tm, tai, verbose); } -extern "C" int PowerPCAsmPrinterForceLink; -int PowerPCAsmPrinterForceLink = 0; - // Force static initialization. -extern "C" void LLVMInitializePowerPCAsmPrinter() { } +extern "C" void LLVMInitializePowerPCAsmPrinter() { + TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass); + TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass); +} diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index a6479d81a6f11..bdd6d36239808 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -20,10 +20,10 @@ add_llvm_target(PowerPCCodeGen PPCISelLowering.cpp PPCJITInfo.cpp PPCMachOWriterInfo.cpp + PPCMCAsmInfo.cpp PPCPredicates.cpp PPCRegisterInfo.cpp PPCSubtarget.cpp - PPCTargetAsmInfo.cpp PPCTargetMachine.cpp ) diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index db688970e9e71..4015d4aa190d4 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -17,6 +17,6 @@ BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtarget.inc PPCGenCallingConv.inc -DIRS = AsmPrinter +DIRS = AsmPrinter TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index f6c3469908b9b..7b98268bd83d0 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -24,16 +24,21 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; class MachineCodeEmitter; - class raw_ostream; + class ObjectCodeEmitter; + class formatted_raw_ostream; FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); -FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, PPCTargetMachine &TM, - bool Verbose); FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM, MachineCodeEmitter &MCE); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, - JITCodeEmitter &MCE); + JITCodeEmitter &MCE); +FunctionPass *createPPCObjectCodeEmitterPass(PPCTargetMachine &TM, + ObjectCodeEmitter &OCE); + +extern Target ThePPC32Target; +extern Target ThePPC64Target; + } // end namespace llvm; // Defines symbolic names for PowerPC registers. This defines a mapping from diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index cd6018de490b7..0675293e1144d 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -19,12 +19,15 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -55,8 +58,7 @@ namespace { template class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass, - public PPCCodeEmitter - { + public PPCCodeEmitter { TargetMachine &TM; CodeEmitter &MCE; @@ -88,9 +90,10 @@ namespace { template char Emitter::ID = 0; } - + /// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code /// to the specified MCE object. + FunctionPass *llvm::createPPCCodeEmitterPass(PPCTargetMachine &TM, MachineCodeEmitter &MCE) { return new Emitter(TM, MCE); @@ -101,6 +104,11 @@ FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM, return new Emitter(TM, JCE); } +FunctionPass *llvm::createPPCObjectCodeEmitterPass(PPCTargetMachine &TM, + ObjectCodeEmitter &OCE) { + return new Emitter(TM, OCE); +} + template bool Emitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || @@ -121,9 +129,10 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { template void Emitter::emitBasicBlock(MachineBasicBlock &MBB) { MCE.StartMachineBasicBlock(&MBB); - + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ const MachineInstr &MI = *I; + MCE.processDebugLoc(MI.getDebugLoc(), true); switch (MI.getOpcode()) { default: MCE.emitWordBE(getBinaryCodeForInstr(MI)); @@ -133,6 +142,7 @@ void Emitter::emitBasicBlock(MachineBasicBlock &MBB) { MCE.emitLabel(MI.getOperand(0).getImm()); break; case TargetInstrInfo::IMPLICIT_DEF: + case TargetInstrInfo::KILL: break; // pseudo opcode, no side effects case PPC::MovePCtoLR: case PPC::MovePCtoLR8: @@ -141,6 +151,7 @@ void Emitter::emitBasicBlock(MachineBasicBlock &MBB) { MCE.emitWordBE(0x48000005); // bl 1 break; } + MCE.processDebugLoc(MI.getDebugLoc(), false); } } @@ -172,7 +183,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); } switch (MI.getOpcode()) { - default: MI.dump(); assert(0 && "Unknown instruction for relocation!"); + default: MI.dump(); llvm_unreachable("Unknown instruction for relocation!"); case PPC::LIS: case PPC::LIS8: case PPC::ADDIS: @@ -193,7 +204,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, case PPC::LWZ8: case PPC::LFS: case PPC::LFD: - + // Stores. case PPC::STB: case PPC::STB8: @@ -214,7 +225,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, break; } } - + MachineRelocation R; if (MO.isGlobal()) { R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, @@ -231,7 +242,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, R = MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), Reloc, MO.getIndex(), 0); } - + // If in PIC mode, we need to encode the negated address of the // 'movepctolr' into the unrelocated field. After relocation, we'll have // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm @@ -242,7 +253,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, R.setConstantVal(-(intptr_t)MovePCtoLROffset - 4); } MCE.addRelocation(R); - + } else if (MO.isMBB()) { unsigned Reloc = 0; unsigned Opcode = MI.getOpcode(); @@ -252,15 +263,17 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, Reloc = PPC::reloc_pcrel_bx; else // BCC instruction Reloc = PPC::reloc_pcrel_bcx; + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), Reloc, MO.getMBB())); } else { - cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; - abort(); +#ifndef NDEBUG + errs() << "ERROR: Unknown type of MachineOperand: " << MO << "\n"; +#endif + llvm_unreachable(0); } return rv; } #include "PPCGenCodeEmitter.inc" - diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h index 770a560ccf4ed..65f113e6fb9af 100644 --- a/lib/Target/PowerPC/PPCFrameInfo.h +++ b/lib/Target/PowerPC/PPCFrameInfo.h @@ -31,33 +31,32 @@ public: /// getReturnSaveOffset - Return the previous frame offset to save the /// return address. - static unsigned getReturnSaveOffset(bool LP64, bool isDarwinABI) { + static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) { if (isDarwinABI) - return LP64 ? 16 : 8; + return isPPC64 ? 16 : 8; // SVR4 ABI: - return 4; + return isPPC64 ? 16 : 4; } /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. - static unsigned getFramePointerSaveOffset(bool LP64, bool isDarwinABI) { + static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { // For the Darwin ABI: // Use the TOC save slot in the PowerPC linkage area for saving the frame // pointer (if needed.) LLVM does not generate code that uses the TOC (R2 // is treated as a caller saved register.) if (isDarwinABI) - return LP64 ? 40 : 20; + return isPPC64 ? 40 : 20; - // SVR4 ABI: - // Save it right before the link register + // SVR4 ABI: First slot in the general register save area. return -4U; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// - static unsigned getLinkageSize(bool LP64, bool isDarwinABI) { - if (isDarwinABI) - return 6 * (LP64 ? 8 : 4); + static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { + if (isDarwinABI || isPPC64) + return 6 * (isPPC64 ? 8 : 4); // SVR4 ABI: return 8; @@ -65,118 +64,222 @@ public: /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI /// argument area. - static unsigned getMinCallArgumentsSize(bool LP64, bool isDarwinABI) { - // For the Darwin ABI: + static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) { + // For the Darwin ABI / 64-bit SVR4 ABI: // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. - if (isDarwinABI) - return 8 * (LP64 ? 8 : 4); + if (isDarwinABI || isPPC64) + return 8 * (isPPC64 ? 8 : 4); - // SVR4 ABI: + // 32-bit SVR4 ABI: // There is no default stack allocated for the 8 first GPR arguments. return 0; } /// getMinCallFrameSize - Return the minimum size a call frame can be using /// the PowerPC ABI. - static unsigned getMinCallFrameSize(bool LP64, bool isDarwinABI) { + static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) { // The call frame needs to be at least big enough for linkage and 8 args. - return getLinkageSize(LP64, isDarwinABI) + - getMinCallArgumentsSize(LP64, isDarwinABI); + return getLinkageSize(isPPC64, isDarwinABI) + + getMinCallArgumentsSize(isPPC64, isDarwinABI); } // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. - const std::pair * + const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const { // Early exit if not using the SVR4 ABI. if (!TM.getSubtarget().isSVR4ABI()) { NumEntries = 0; return 0; } - - static const std::pair Offsets[] = { + + static const SpillSlot Offsets[] = { // Floating-point register save area offsets. - std::pair(PPC::F31, -8), - std::pair(PPC::F30, -16), - std::pair(PPC::F29, -24), - std::pair(PPC::F28, -32), - std::pair(PPC::F27, -40), - std::pair(PPC::F26, -48), - std::pair(PPC::F25, -56), - std::pair(PPC::F24, -64), - std::pair(PPC::F23, -72), - std::pair(PPC::F22, -80), - std::pair(PPC::F21, -88), - std::pair(PPC::F20, -96), - std::pair(PPC::F19, -104), - std::pair(PPC::F18, -112), - std::pair(PPC::F17, -120), - std::pair(PPC::F16, -128), - std::pair(PPC::F15, -136), - std::pair(PPC::F14, -144), - + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + // General register save area offsets. - std::pair(PPC::R31, -4), - std::pair(PPC::R30, -8), - std::pair(PPC::R29, -12), - std::pair(PPC::R28, -16), - std::pair(PPC::R27, -20), - std::pair(PPC::R26, -24), - std::pair(PPC::R25, -28), - std::pair(PPC::R24, -32), - std::pair(PPC::R23, -36), - std::pair(PPC::R22, -40), - std::pair(PPC::R21, -44), - std::pair(PPC::R20, -48), - std::pair(PPC::R19, -52), - std::pair(PPC::R18, -56), - std::pair(PPC::R17, -60), - std::pair(PPC::R16, -64), - std::pair(PPC::R15, -68), - std::pair(PPC::R14, -72), + {PPC::R31, -4}, + {PPC::R30, -8}, + {PPC::R29, -12}, + {PPC::R28, -16}, + {PPC::R27, -20}, + {PPC::R26, -24}, + {PPC::R25, -28}, + {PPC::R24, -32}, + {PPC::R23, -36}, + {PPC::R22, -40}, + {PPC::R21, -44}, + {PPC::R20, -48}, + {PPC::R19, -52}, + {PPC::R18, -56}, + {PPC::R17, -60}, + {PPC::R16, -64}, + {PPC::R15, -68}, + {PPC::R14, -72}, // CR save area offset. // FIXME SVR4: Disable CR save area for now. -// std::pair(PPC::CR2, -4), -// std::pair(PPC::CR3, -4), -// std::pair(PPC::CR4, -4), -// std::pair(PPC::CR2LT, -4), -// std::pair(PPC::CR2GT, -4), -// std::pair(PPC::CR2EQ, -4), -// std::pair(PPC::CR2UN, -4), -// std::pair(PPC::CR3LT, -4), -// std::pair(PPC::CR3GT, -4), -// std::pair(PPC::CR3EQ, -4), -// std::pair(PPC::CR3UN, -4), -// std::pair(PPC::CR4LT, -4), -// std::pair(PPC::CR4GT, -4), -// std::pair(PPC::CR4EQ, -4), -// std::pair(PPC::CR4UN, -4), +// {PPC::CR2, -4}, +// {PPC::CR3, -4}, +// {PPC::CR4, -4}, +// {PPC::CR2LT, -4}, +// {PPC::CR2GT, -4}, +// {PPC::CR2EQ, -4}, +// {PPC::CR2UN, -4}, +// {PPC::CR3LT, -4}, +// {PPC::CR3GT, -4}, +// {PPC::CR3EQ, -4}, +// {PPC::CR3UN, -4}, +// {PPC::CR4LT, -4}, +// {PPC::CR4GT, -4}, +// {PPC::CR4EQ, -4}, +// {PPC::CR4UN, -4}, // VRSAVE save area offset. - std::pair(PPC::VRSAVE, -4), - + {PPC::VRSAVE, -4}, + // Vector register save area - std::pair(PPC::V31, -16), - std::pair(PPC::V30, -32), - std::pair(PPC::V29, -48), - std::pair(PPC::V28, -64), - std::pair(PPC::V27, -80), - std::pair(PPC::V26, -96), - std::pair(PPC::V25, -112), - std::pair(PPC::V24, -128), - std::pair(PPC::V23, -144), - std::pair(PPC::V22, -160), - std::pair(PPC::V21, -176), - std::pair(PPC::V20, -192) + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192} }; - - NumEntries = array_lengthof(Offsets); - - return Offsets; + + static const SpillSlot Offsets64[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit + // mode? + {PPC::R31, -4}, + {PPC::R30, -12}, + {PPC::R29, -20}, + {PPC::R28, -28}, + {PPC::R27, -36}, + {PPC::R26, -44}, + {PPC::R25, -52}, + {PPC::R24, -60}, + {PPC::R23, -68}, + {PPC::R22, -76}, + {PPC::R21, -84}, + {PPC::R20, -92}, + {PPC::R19, -100}, + {PPC::R18, -108}, + {PPC::R17, -116}, + {PPC::R16, -124}, + {PPC::R15, -132}, + {PPC::R14, -140}, + + {PPC::X31, -8}, + {PPC::X30, -16}, + {PPC::X29, -24}, + {PPC::X28, -32}, + {PPC::X27, -40}, + {PPC::X26, -48}, + {PPC::X25, -56}, + {PPC::X24, -64}, + {PPC::X23, -72}, + {PPC::X22, -80}, + {PPC::X21, -88}, + {PPC::X20, -96}, + {PPC::X19, -104}, + {PPC::X18, -112}, + {PPC::X17, -120}, + {PPC::X16, -128}, + {PPC::X15, -136}, + {PPC::X14, -144}, + + // CR save area offset. + // FIXME SVR4: Disable CR save area for now. +// {PPC::CR2, -4}, +// {PPC::CR3, -4}, +// {PPC::CR4, -4}, +// {PPC::CR2LT, -4}, +// {PPC::CR2GT, -4}, +// {PPC::CR2EQ, -4}, +// {PPC::CR2UN, -4}, +// {PPC::CR3LT, -4}, +// {PPC::CR3GT, -4}, +// {PPC::CR3EQ, -4}, +// {PPC::CR3UN, -4}, +// {PPC::CR4LT, -4}, +// {PPC::CR4GT, -4}, +// {PPC::CR4EQ, -4}, +// {PPC::CR4UN, -4}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192} + }; + + if (TM.getSubtarget().isPPC64()) { + NumEntries = array_lengthof(Offsets64); + + return Offsets64; + } else { + NumEntries = array_lengthof(Offsets); + + return Offsets; + } } }; diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index ec3e757651f40..6af7e0ffbc1a0 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -17,6 +17,8 @@ #include "PPCInstrInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -51,7 +53,7 @@ PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) } void PPCHazardRecognizer970::EndDispatchGroup() { - DOUT << "=== Start of dispatch group\n"; + DEBUG(errs() << "=== Start of dispatch group\n"); NumIssued = 0; // Structural hazard info. @@ -141,7 +143,7 @@ getHazardType(SUnit *SU) { return Hazard; switch (InstrType) { - default: assert(0 && "Unknown instruction type!"); + default: llvm_unreachable("Unknown instruction type!"); case PPCII::PPC970_FXU: case PPCII::PPC970_LSU: case PPCII::PPC970_FPU: @@ -167,7 +169,7 @@ getHazardType(SUnit *SU) { if (isLoad && NumStores) { unsigned LoadSize; switch (Opcode) { - default: assert(0 && "Unknown load!"); + default: llvm_unreachable("Unknown load!"); case PPC::LBZ: case PPC::LBZU: case PPC::LBZX: case PPC::LBZ8: case PPC::LBZU8: @@ -235,7 +237,7 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { if (isStore) { unsigned ThisStoreSize; switch (Opcode) { - default: assert(0 && "Unknown store instruction!"); + default: llvm_unreachable("Unknown store instruction!"); case PPC::STB: case PPC::STB8: case PPC::STBU: case PPC::STBU8: case PPC::STBX: case PPC::STBX8: diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 823e3162191ea..8fa6a6614b97a 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -20,6 +20,7 @@ #include "PPCHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -31,6 +32,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace { @@ -49,17 +52,12 @@ namespace { PPCLowering(*TM.getTargetLowering()), PPCSubTarget(*TM.getSubtargetImpl()) {} - virtual bool runOnFunction(Function &Fn) { - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (Fn.hasAvailableExternallyLinkage()) - return false; - + virtual bool runOnMachineFunction(MachineFunction &MF) { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; - SelectionDAGISel::runOnFunction(Fn); + SelectionDAGISel::runOnMachineFunction(MF); - InsertVRSaveCode(Fn); + InsertVRSaveCode(MF); return true; } @@ -145,30 +143,14 @@ namespace { } /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + /// inline asm expressions. It is always correct to compute the value into + /// a register. The case of adding a (possibly relocatable) constant to a + /// register can be improved, but it is wrong to substitute Reg+Reg for + /// Reg in an asm, because the load or store opcode would have to change. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps) { - SDValue Op0, Op1; - switch (ConstraintCode) { - default: return true; - case 'm': // memory - if (!SelectAddrIdx(Op, Op, Op0, Op1)) - SelectAddrImm(Op, Op, Op0, Op1); - break; - case 'o': // offsetable - if (!SelectAddrImm(Op, Op, Op0, Op1)) { - Op0 = Op; - Op1 = getSmallIPtrImm(0); - } - break; - case 'v': // not offsetable - SelectAddrIdxOnly(Op, Op, Op0, Op1); - break; - } - - OutOps.push_back(Op0); - OutOps.push_back(Op1); + OutOps.push_back(Op); return false; } @@ -179,7 +161,7 @@ namespace { /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. virtual void InstructionSelect(); - void InsertVRSaveCode(Function &Fn); + void InsertVRSaveCode(MachineFunction &MF); virtual const char *getPassName() const { return "PowerPC DAG->DAG Pattern Instruction Selection"; @@ -216,13 +198,12 @@ void PPCDAGToDAGISel::InstructionSelect() { /// InsertVRSaveCode - Once the entire function has been instruction selected, /// all virtual registers are created and all machine instructions are built, /// check to see if we need to save/restore VRSAVE. If so, do it. -void PPCDAGToDAGISel::InsertVRSaveCode(Function &F) { +void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { // Check to see if this function uses vector registers, which means we have to // save and restore the VRSAVE register and update it with the regs we use. // // In this case, there will be virtual registers of vector type type created // by the scheduler. Detect them now. - MachineFunction &Fn = MachineFunction::get(&F); bool HasVectorVReg = false; for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, e = RegInfo->getLastVirtReg()+1; i != e; ++i) @@ -285,7 +266,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { if (!GlobalBaseReg) { const TargetInstrInfo &TII = *TM.getInstrInfo(); // Insert the set of GlobalBaseReg into the first MBB of the function - MachineBasicBlock &FirstMBB = BB->getParent()->front(); + MachineBasicBlock &FirstMBB = MF->front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); DebugLoc dl = DebugLoc::getUnknownLoc(); @@ -488,7 +469,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SH &= 31; SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); + return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); } } return 0; @@ -507,12 +488,12 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, if (isInt32Immediate(RHS, Imm)) { // SETEQ/SETNE comparison with 16-bit immediate, fold it. if (isUInt16(Imm)) - return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS, - getI32Imm(Imm & 0xFFFF)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); // If this is a 16-bit signed immediate, fold it. if (isInt16((int)Imm)) - return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS, - getI32Imm(Imm & 0xFFFF)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: @@ -523,22 +504,22 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, // xoris r0,r3,0x1234 // cmplwi cr0,r0,0x5678 // beq cr0,L6 - SDValue Xor(CurDAG->getTargetNode(PPC::XORIS, dl, MVT::i32, LHS, - getI32Imm(Imm >> 16)), 0); - return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, Xor, - getI32Imm(Imm & 0xFFFF)), 0); + SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, + getI32Imm(Imm >> 16)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, + getI32Imm(Imm & 0xFFFF)), 0); } Opc = PPC::CMPLW; } else if (ISD::isUnsignedIntSetCC(CC)) { if (isInt32Immediate(RHS, Imm) && isUInt16(Imm)) - return SDValue(CurDAG->getTargetNode(PPC::CMPLWI, dl, MVT::i32, LHS, - getI32Imm(Imm & 0xFFFF)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, + getI32Imm(Imm & 0xFFFF)), 0); Opc = PPC::CMPLW; } else { short SImm; if (isIntS16Immediate(RHS, SImm)) - return SDValue(CurDAG->getTargetNode(PPC::CMPWI, dl, MVT::i32, LHS, - getI32Imm((int)SImm & 0xFFFF)), + return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, + getI32Imm((int)SImm & 0xFFFF)), 0); Opc = PPC::CMPW; } @@ -548,12 +529,12 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, if (isInt64Immediate(RHS.getNode(), Imm)) { // SETEQ/SETNE comparison with 16-bit immediate, fold it. if (isUInt16(Imm)) - return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS, - getI32Imm(Imm & 0xFFFF)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, + getI32Imm(Imm & 0xFFFF)), 0); // If this is a 16-bit signed immediate, fold it. if (isInt16(Imm)) - return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS, - getI32Imm(Imm & 0xFFFF)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, + getI32Imm(Imm & 0xFFFF)), 0); // For non-equality comparisons, the default code would materialize the // constant, then compare against it, like this: @@ -565,23 +546,23 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, // cmpldi cr0,r0,0x5678 // beq cr0,L6 if (isUInt32(Imm)) { - SDValue Xor(CurDAG->getTargetNode(PPC::XORIS8, dl, MVT::i64, LHS, - getI64Imm(Imm >> 16)), 0); - return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, Xor, - getI64Imm(Imm & 0xFFFF)), 0); + SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, + getI64Imm(Imm >> 16)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, + getI64Imm(Imm & 0xFFFF)), 0); } } Opc = PPC::CMPLD; } else if (ISD::isUnsignedIntSetCC(CC)) { if (isInt64Immediate(RHS.getNode(), Imm) && isUInt16(Imm)) - return SDValue(CurDAG->getTargetNode(PPC::CMPLDI, dl, MVT::i64, LHS, - getI64Imm(Imm & 0xFFFF)), 0); + return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, + getI64Imm(Imm & 0xFFFF)), 0); Opc = PPC::CMPLD; } else { short SImm; if (isIntS16Immediate(RHS, SImm)) - return SDValue(CurDAG->getTargetNode(PPC::CMPDI, dl, MVT::i64, LHS, - getI64Imm(SImm & 0xFFFF)), + return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, + getI64Imm(SImm & 0xFFFF)), 0); Opc = PPC::CMPD; } @@ -591,7 +572,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); Opc = PPC::FCMPUD; } - return SDValue(CurDAG->getTargetNode(Opc, dl, MVT::i32, LHS, RHS), 0); + return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { @@ -600,8 +581,8 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { case ISD::SETONE: case ISD::SETOLE: case ISD::SETOGE: - assert(0 && "Should be lowered by legalize!"); - default: assert(0 && "Unknown condition!"); abort(); + llvm_unreachable("Should be lowered by legalize!"); + default: llvm_unreachable("Unknown condition!"); case ISD::SETOEQ: case ISD::SETEQ: return PPC::PRED_EQ; case ISD::SETUNE: @@ -632,7 +613,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { Invert = false; Other = -1; switch (CC) { - default: assert(0 && "Unknown condition!"); abort(); + default: llvm_unreachable("Unknown condition!"); case ISD::SETOLT: case ISD::SETLT: return 0; // Bit #0 = SETOLT case ISD::SETOGT: @@ -651,7 +632,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { case ISD::SETOGE: case ISD::SETOLE: case ISD::SETONE: - assert(0 && "Invalid branch code: should be expanded by legalize"); + llvm_unreachable("Invalid branch code: should be expanded by legalize"); // These are invalid for floating point. Assume integer. case ISD::SETULT: return 0; case ISD::SETUGT: return 1; @@ -673,14 +654,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { switch (CC) { default: break; case ISD::SETEQ: { - Op = SDValue(CurDAG->getTargetNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); + Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } case ISD::SETNE: { SDValue AD = - SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, - Op, getI32Imm(~0U)), 0); + SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + Op, getI32Imm(~0U)), 0); return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); } @@ -690,8 +671,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { } case ISD::SETGT: { SDValue T = - SDValue(CurDAG->getTargetNode(PPC::NEG, dl, MVT::i32, Op), 0); - T = SDValue(CurDAG->getTargetNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); + SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); + T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } @@ -701,31 +682,31 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { switch (CC) { default: break; case ISD::SETEQ: - Op = SDValue(CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, - Op, getI32Imm(1)), 0); + Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + Op, getI32Imm(1)), 0); return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, - SDValue(CurDAG->getTargetNode(PPC::LI, dl, - MVT::i32, - getI32Imm(0)), 0), + SDValue(CurDAG->getMachineNode(PPC::LI, dl, + MVT::i32, + getI32Imm(0)), 0), Op.getValue(1)); case ISD::SETNE: { - Op = SDValue(CurDAG->getTargetNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); - SDNode *AD = CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, - Op, getI32Imm(~0U)); + Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); + SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + Op, getI32Imm(~0U)); return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, SDValue(AD, 1)); } case ISD::SETLT: { - SDValue AD = SDValue(CurDAG->getTargetNode(PPC::ADDI, dl, MVT::i32, Op, - getI32Imm(1)), 0); - SDValue AN = SDValue(CurDAG->getTargetNode(PPC::AND, dl, MVT::i32, AD, - Op), 0); + SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, + getI32Imm(1)), 0); + SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, + Op), 0); SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - Op = SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), + Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1)); @@ -748,10 +729,10 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { InFlag).getValue(1); if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1) - IntCR = SDValue(CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, - CCReg), 0); + IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, + CCReg), 0); else - IntCR = SDValue(CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, CCReg), 0); + IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, CCReg), 0); SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), getI32Imm(31), getI32Imm(31) }; @@ -760,7 +741,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { // Get the specified bit. SDValue Tmp = - SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); if (Inv) { assert(OtherCondIdx == -1 && "Can't have split plus negation"); return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1)); @@ -772,7 +753,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDValue Op) { // Get the other bit of the comparison. Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31); SDValue OtherCond = - SDValue(CurDAG->getTargetNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); + SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0); return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond); } @@ -825,17 +806,17 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { // Simple value. if (isInt16(Imm)) { // Just the Lo bits. - Result = CurDAG->getTargetNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); + Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo)); } else if (Lo) { // Handle the Hi bits. unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; - Result = CurDAG->getTargetNode(OpC, dl, MVT::i64, getI32Imm(Hi)); + Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); // And Lo bits. - Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64, - SDValue(Result, 0), getI32Imm(Lo)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Lo)); } else { // Just the Hi bits. - Result = CurDAG->getTargetNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); + Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); } // If no shift, we're done. @@ -843,19 +824,20 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { // Shift for next step if the upper 32-bits were not zero. if (Imm) { - Result = CurDAG->getTargetNode(PPC::RLDICR, dl, MVT::i64, - SDValue(Result, 0), - getI32Imm(Shift), getI32Imm(63 - Shift)); + Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, + SDValue(Result, 0), + getI32Imm(Shift), + getI32Imm(63 - Shift)); } // Add in the last bits as required. if ((Hi = (Remainder >> 16) & 0xFFFF)) { - Result = CurDAG->getTargetNode(PPC::ORIS8, dl, MVT::i64, - SDValue(Result, 0), getI32Imm(Hi)); + Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Hi)); } if ((Lo = Remainder & 0xFFFF)) { - Result = CurDAG->getTargetNode(PPC::ORI8, dl, MVT::i64, - SDValue(Result, 0), getI32Imm(Lo)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Lo)); } return Result; @@ -875,18 +857,18 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { if (N->hasOneUse()) return CurDAG->SelectNodeTo(N, Opc, Op.getValueType(), TFI, getSmallIPtrImm(0)); - return CurDAG->getTargetNode(Opc, dl, Op.getValueType(), TFI, - getSmallIPtrImm(0)); + return CurDAG->getMachineNode(Opc, dl, Op.getValueType(), TFI, + getSmallIPtrImm(0)); } case PPCISD::MFCR: { SDValue InFlag = N->getOperand(1); // Use MFOCRF if supported. if (PPCSubTarget.isGigaProcessor()) - return CurDAG->getTargetNode(PPC::MFOCRF, dl, MVT::i32, - N->getOperand(0), InFlag); + return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, + N->getOperand(0), InFlag); else - return CurDAG->getTargetNode(PPC::MFCR, dl, MVT::i32, InFlag); + return CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, InFlag); } case ISD::SDIV: { @@ -900,17 +882,17 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { SDValue N0 = N->getOperand(0); if ((signed)Imm > 0 && isPowerOf2_32(Imm)) { SDNode *Op = - CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag, - N0, getI32Imm(Log2_32(Imm))); + CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag, + N0, getI32Imm(Log2_32(Imm))); return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), SDValue(Op, 1)); } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) { SDNode *Op = - CurDAG->getTargetNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag, - N0, getI32Imm(Log2_32(-Imm))); + CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Flag, + N0, getI32Imm(Log2_32(-Imm))); SDValue PT = - SDValue(CurDAG->getTargetNode(PPC::ADDZE, dl, MVT::i32, - SDValue(Op, 0), SDValue(Op, 1)), + SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32, + SDValue(Op, 0), SDValue(Op, 1)), 0); return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT); } @@ -923,7 +905,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { case ISD::LOAD: { // Handle preincrement loads. LoadSDNode *LD = cast(Op); - MVT LoadedVT = LD->getMemoryVT(); + EVT LoadedVT = LD->getMemoryVT(); // Normal loads are handled by code generated from the .td file. if (LD->getAddressingMode() != ISD::PRE_INC) @@ -938,8 +920,8 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { if (LD->getValueType(0) != MVT::i64) { // Handle PPC32 integer and normal FP loads. assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); - switch (LoadedVT.getSimpleVT()) { - default: assert(0 && "Invalid PPC load type!"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); case MVT::f64: Opcode = PPC::LFDU; break; case MVT::f32: Opcode = PPC::LFSU; break; case MVT::i32: Opcode = PPC::LWZU; break; @@ -950,8 +932,8 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { } else { assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); - switch (LoadedVT.getSimpleVT()) { - default: assert(0 && "Invalid PPC load type!"); + switch (LoadedVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid PPC load type!"); case MVT::i64: Opcode = PPC::LDU; break; case MVT::i32: Opcode = PPC::LWZU8; break; case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; @@ -964,11 +946,11 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; // FIXME: PPC64 - return CurDAG->getTargetNode(Opcode, dl, LD->getValueType(0), - PPCLowering.getPointerTy(), - MVT::Other, Ops, 3); + return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), + PPCLowering.getPointerTy(), + MVT::Other, Ops, 3); } else { - assert(0 && "R+R preindex loads not supported yet!"); + llvm_unreachable("R+R preindex loads not supported yet!"); } } @@ -1008,7 +990,7 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { SDValue Ops[] = { N->getOperand(0).getOperand(0), N->getOperand(0).getOperand(1), getI32Imm(0), getI32Imm(MB),getI32Imm(ME) }; - return CurDAG->getTargetNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); + return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); } } @@ -1058,8 +1040,8 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { // FIXME: Implement this optzn for PPC64. N->getValueType(0) == MVT::i32) { SDNode *Tmp = - CurDAG->getTargetNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, - N->getOperand(0), getI32Imm(~0U)); + CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Flag, + N->getOperand(0), getI32Imm(~0U)); return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), N->getOperand(0), SDValue(Tmp, 1)); @@ -1109,51 +1091,10 @@ SDNode *PPCDAGToDAGISel::Select(SDValue Op) { SDValue Chain = N->getOperand(0); SDValue Target = N->getOperand(1); unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; - Chain = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Target, - Chain), 0); + Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target, + Chain), 0); return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain); } - case ISD::DECLARE: { - SDValue Chain = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); - FrameIndexSDNode *FINode = dyn_cast(N1); - - // FIXME: We need to handle this for VLAs. - if (!FINode) { - ReplaceUses(Op.getValue(0), Chain); - return NULL; - } - - if (N2.getOpcode() == ISD::ADD) { - if (N2.getOperand(0).getOpcode() == ISD::ADD && - N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg && - N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Hi && - N2.getOperand(1).getOpcode() == PPCISD::Lo) - N2 = N2.getOperand(0).getOperand(1).getOperand(0); - else if (N2.getOperand(0).getOpcode() == ISD::ADD && - N2.getOperand(0).getOperand(0).getOpcode() == PPCISD::GlobalBaseReg && - N2.getOperand(0).getOperand(1).getOpcode() == PPCISD::Lo && - N2.getOperand(1).getOpcode() == PPCISD::Hi) - N2 = N2.getOperand(0).getOperand(1).getOperand(0); - else if (N2.getOperand(0).getOpcode() == PPCISD::Hi && - N2.getOperand(1).getOpcode() == PPCISD::Lo) - N2 = N2.getOperand(0).getOperand(0); - } - - // If we don't have a global address here, the debug info is mangled, just - // drop it. - if (!isa(N2)) { - ReplaceUses(Op.getValue(0), Chain); - return NULL; - } - int FI = cast(N1)->getIndex(); - GlobalValue *GV = cast(N2)->getGlobal(); - SDValue Tmp1 = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy()); - return CurDAG->SelectNodeTo(N, TargetInstrInfo::DECLARE, - MVT::Other, Tmp1, Tmp2, Chain); - } } return SelectCode(Op); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 1c6b2877889f5..3920b3815098d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -31,21 +31,24 @@ #include "llvm/Intrinsics.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/DerivedTypes.h" using namespace llvm; -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT, + EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT, + EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); @@ -54,8 +57,15 @@ static cl::opt EnablePPCPreinc("enable-ppc-preinc", cl::desc("enable preincrement load/store generation on PPC (experimental)"), cl::Hidden); +static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { + if (TM.getSubtargetImpl()->isDarwin()) + return new TargetLoweringObjectFileMachO(); + return new TargetLoweringObjectFileELF(); +} + + PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) - : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) { + : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { setPow2DivIsCheap(); @@ -193,9 +203,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); - // RET must be custom lowered, to meet ABI requirements. - setOperationAction(ISD::RET , MVT::Other, Custom); - // TRAP is legal. setOperationAction(ISD::TRAP, MVT::Other, Legal); @@ -205,8 +212,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - // VAARG is custom lowered with the SVR4 ABI - if (TM.getSubtarget().isSVR4ABI()) + // VAARG is custom lowered with the 32-bit SVR4 ABI. + if ( TM.getSubtarget().isSVR4ABI() + && !TM.getSubtarget().isPPC64()) setOperationAction(ISD::VAARG, MVT::Other, Custom); else setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -276,7 +284,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // will selectively turn on ones that can be effectively codegen'd. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD , VT, Legal); @@ -412,6 +420,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; + case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; @@ -421,6 +430,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; + case PPCISD::NOP: return "PPCISD::NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; @@ -438,12 +448,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::MTFSF: return "PPCISD::MTFSF"; - case PPCISD::TAILCALL: return "PPCISD::TAILCALL"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; } } -MVT PPCTargetLowering::getSetCCResultType(MVT VT) const { +MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } @@ -900,7 +909,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; - Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base), 0); + Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); return true; } } @@ -1012,7 +1021,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; - Base = SDValue(DAG.getTargetNode(Opc, dl, CN->getValueType(0), Base),0); + Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0); return true; } } @@ -1038,7 +1047,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!EnablePPCPreinc) return false; SDValue Ptr; - MVT VT; + EVT VT; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); @@ -1086,7 +1095,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); @@ -1120,7 +1129,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, } SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); SDValue Zero = DAG.getConstant(0, PtrVT); @@ -1154,13 +1163,13 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { - assert(0 && "TLS not implemented for PPC."); + llvm_unreachable("TLS not implemented for PPC."); return SDValue(); // Not reached } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); @@ -1170,6 +1179,13 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, const TargetMachine &TM = DAG.getTarget(); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + return DAG.getNode(PPCISD::TOC_ENTRY, dl, MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero); @@ -1191,7 +1207,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, Lo = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); - if (!TM.getSubtarget().hasLazyResolverStub(GV)) + if (!TM.getSubtarget().hasLazyResolverStub(GV, TM)) return Lo; // If the global is weak or external, we have to go through the lazy @@ -1208,7 +1224,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { // fold the new nodes. if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { if (C->isNullValue() && CC == ISD::SETEQ) { - MVT VT = Op.getOperand(0).getValueType(); + EVT VT = Op.getOperand(0).getValueType(); SDValue Zext = Op.getOperand(0); if (VT.bitsLT(MVT::i32)) { VT = MVT::i32; @@ -1232,9 +1248,9 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { // condition register, reading it back out, and masking the correct bit. The // normal approach here uses sub to do this instead of xor. Using xor exposes // the result to other bit-twiddling opportunities. - MVT LHSVT = Op.getOperand(0).getValueType(); + EVT LHSVT = Op.getOperand(0).getValueType(); if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), Op.getOperand(1)); return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); @@ -1249,7 +1265,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget) { - assert(0 && "VAARG not yet implemented for the SVR4 ABI!"); + llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!"); return SDValue(); // Not reached } @@ -1260,10 +1276,11 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { SDValue Nest = Op.getOperand(3); // 'nest' parameter value DebugLoc dl = Op.getDebugLoc(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); const Type *IntPtrTy = - DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(); + DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType( + *DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -1281,8 +1298,9 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) std::pair CallResult = - LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false, - false, false, 0, CallingConv::C, false, + LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__trampoline_setup", PtrVT), Args, DAG, dl); @@ -1300,16 +1318,16 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { DebugLoc dl = Op.getDebugLoc(); - if (Subtarget.isDarwinABI()) { + if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } - // For the SVR4 ABI we follow the layout of the va_list struct. + // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. // We suppose the given va_list is already allocated. // // typedef struct { @@ -1338,7 +1356,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); @@ -1380,15 +1398,15 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, #include "PPCGenCallingConv.inc" -static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, +static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { return true; } -static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, +static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, EVT &ValVT, + EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -1414,8 +1432,8 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, return false; } -static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, - MVT &LocVT, +static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT, + EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { @@ -1442,29 +1460,20 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, } /// GetFPR - Get the set of FP registers that should be allocated for arguments, -/// depending on which subtarget is selected. -static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { - if (Subtarget.isDarwinABI()) { - static const unsigned FPR[] = { - PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 - }; - return FPR; - } - - +/// on Darwin. +static const unsigned *GetFPR() { static const unsigned FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8 + PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 }; + return FPR; } /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. -static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags, +static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { - MVT ArgVT = Arg.getValueType(); unsigned ArgSize = ArgVT.getSizeInBits()/8; if (Flags.isByVal()) ArgSize = Flags.getByValSize(); @@ -1474,14 +1483,31 @@ static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags, } SDValue -PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, - SelectionDAG &DAG, - int &VarArgsFrameIndex, - int &VarArgsStackOffset, - unsigned &VarArgsNumGPR, - unsigned &VarArgsNumFPR, - const PPCSubtarget &Subtarget) { - // SVR4 ABI Stack Frame Layout: +PPCTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { + return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); + } else { + return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, + dl, DAG, InVals); + } +} + +SDValue +PPCTargetLowering::LowerFormalArguments_SVR4( + SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ // +--> | Back chain | // | +-----------------------------------+ @@ -1512,25 +1538,21 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - SmallVector ArgValues; - SDValue Root = Op.getOperand(0); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - DebugLoc dl = Op.getDebugLoc(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. - unsigned CC = MF.getFunction()->getCallingConv(); - bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); + bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast)); unsigned PtrByteSize = 4; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize); - CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4); + CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1538,11 +1560,11 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, // Arguments stored in registers. if (VA.isRegLoc()) { TargetRegisterClass *RC; - MVT ValVT = VA.getValVT(); + EVT ValVT = VA.getValVT(); - switch (ValVT.getSimpleVT()) { + switch (ValVT.getSimpleVT().SimpleTy) { default: - assert(0 && "ValVT not supported by FORMAL_ARGUMENTS Lowering"); + llvm_unreachable("ValVT not supported by formal arguments Lowering"); case MVT::i32: RC = PPC::GPRCRegisterClass; break; @@ -1562,9 +1584,9 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, // Transform the arguments stored in physical registers into virtual ones. unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, ValVT); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); - ArgValues.push_back(ArgValue); + InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); @@ -1575,7 +1597,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); } } @@ -1583,12 +1605,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, // Aggregates passed by value are stored in the local variable space of the // caller's stack frame, right above the parameter list area. SmallVector ByValArgLocs; - CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs); + CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), + ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); @@ -1632,7 +1655,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + - NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; + NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, CCInfo.getNextStackOffset()); @@ -1645,7 +1668,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, unsigned GPRIndex = 0; for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) { SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); - SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); @@ -1658,7 +1681,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, for (; GPRIndex != NumGPArgRegs; ++GPRIndex) { unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store @@ -1666,18 +1689,18 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } - // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is - // set. + // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 + // is set. // The double arguments are stored to the VarArgsFrameIndex // on the stack. unsigned FPRIndex = 0; for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) { SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); - SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } @@ -1685,47 +1708,40 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, for (; FPRIndex != NumFPArgRegs; ++FPRIndex) { unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by eight for the next argument to store - SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, + SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, dl, - MVT::Other, &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); - - ArgValues.push_back(Root); - - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); + return Chain; } SDValue -PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, - SelectionDAG &DAG, - int &VarArgsFrameIndex, - const PPCSubtarget &Subtarget) { +PPCTargetLowering::LowerFormalArguments_Darwin( + SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - SmallVector ArgValues; - SDValue Root = Op.getOperand(0); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - DebugLoc dl = Op.getDebugLoc(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; // Potential tail calls could cause overwriting of argument stack slots. - unsigned CC = MF.getFunction()->getCallingConv(); - bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); + bool isImmutable = !(PerformTailCallOpt && (CallConv==CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true); @@ -1741,7 +1757,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(Subtarget); + static const unsigned *FPR = GetFPR(); static const unsigned VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, @@ -1765,12 +1781,11 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, // entire point of the following loop. unsigned VecArgOffset = ArgOffset; if (!isVarArg && !isPPC64) { - for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { - MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; - ISD::ArgFlagsTy Flags = - cast(Op.getOperand(ArgNo+3))->getArgFlags(); + ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Flags.isByVal()) { // ObjSize is the true size, ArgSize rounded up to multiple of regs. @@ -1781,8 +1796,8 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, continue; } - switch(ObjectVT.getSimpleVT()) { - default: assert(0 && "Unhandled argument type!"); + switch(ObjectVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled argument type!"); case MVT::i32: case MVT::f32: VecArgOffset += isPPC64 ? 8 : 4; @@ -1811,15 +1826,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, SmallVector MemOps; unsigned nAltivecParamsAtEnd = 0; - for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1; - ArgNo != e; ++ArgNo) { + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; - MVT ObjectVT = Op.getValue(ArgNo).getValueType(); + EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; unsigned ArgSize = ObjSize; - ISD::ArgFlagsTy Flags = - cast(Op.getOperand(ArgNo+3))->getArgFlags(); + ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; unsigned CurArgOffset = ArgOffset; @@ -1828,13 +1841,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { if (isVarArg || isPPC64) { MinReservedArea = ((MinReservedArea+15)/16)*16; - MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), + MinReservedArea += CalculateStackSlotSize(ObjectVT, Flags, PtrByteSize); } else nAltivecParamsAtEnd++; } else // Calculate min reserved area. - MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), + MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, Flags, PtrByteSize); @@ -1852,11 +1865,11 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, // The value of the object is its address. int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgValues.push_back(FIN); + InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); MemOps.push_back(Store); @@ -1875,7 +1888,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); ++GPR_idx; @@ -1888,13 +1901,13 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, continue; } - switch (ObjectVT.getSimpleVT()) { - default: assert(0 && "Unhandled argument type!"); + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled argument type!"); case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); ++GPR_idx; } else { needsLoad = true; @@ -1908,7 +1921,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); - ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { // PPC64 passes i8, i16, and i32 values in i64 registers. Promote @@ -1949,7 +1962,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, else VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); - ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; } else { needsLoad = true; @@ -1966,7 +1979,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, // except in varargs functions. if (VR_idx != Num_VR_Regs) { unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); - ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { ArgOffset += PtrByteSize; @@ -1974,7 +1987,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, GPR_idx++; } ArgOffset += 16; - GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); + GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? } ++VR_idx; } else { @@ -2000,10 +2013,10 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, CurArgOffset + (ArgSize - ObjSize), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); } - ArgValues.push_back(ArgVal); + InVals.push_back(ArgVal); } // Set the size that is at least reserved in caller of this function. Tail @@ -2045,7 +2058,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, else VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); // Increment the address by four for the next argument to store @@ -2055,14 +2068,10 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, dl, - MVT::Other, &MemOps[0], MemOps.size()); - - ArgValues.push_back(Root); + Chain = DAG.getNode(ISD::TokenFactor, dl, + MVT::Other, &MemOps[0], MemOps.size()); - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()); + return Chain; } /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus @@ -2072,13 +2081,14 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, bool isPPC64, bool isVarArg, unsigned CC, - CallSDNode *TheCall, + const SmallVectorImpl + &Outs, unsigned &nAltivecParamsAtEnd) { // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true); - unsigned NumOps = TheCall->getNumArgs(); + unsigned NumOps = Outs.size(); unsigned PtrByteSize = isPPC64 ? 8 : 4; // Add up all the space actually used. @@ -2089,9 +2099,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // 16-byte aligned. nAltivecParamsAtEnd = 0; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = TheCall->getArg(i); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); - MVT ArgVT = Arg.getValueType(); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + EVT ArgVT = Arg.getValueType(); // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { @@ -2104,7 +2114,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. NumBytes = ((NumBytes+15)/16)*16; } - NumBytes += CalculateStackSlotSize(Arg, Flags, PtrByteSize); + NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); } // Allow for Altivec parameters at the end, if needed. @@ -2149,40 +2159,37 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall, return SPDiff; } -/// IsEligibleForTailCallElimination - Check to see whether the next instruction -/// following the call is a return. A function is eligible if caller/callee -/// calling conventions match, currently only fastcc supports tail calls, and -/// the function CALL is immediatly followed by a RET. +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. bool -PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, - SDValue Ret, +PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl &Ins, SelectionDAG& DAG) const { // Variable argument functions are not supported. - if (!PerformTailCallOpt || TheCall->isVarArg()) + if (isVarArg) return false; - if (CheckTailCallReturnConstraints(TheCall, Ret)) { - MachineFunction &MF = DAG.getMachineFunction(); - unsigned CallerCC = MF.getFunction()->getCallingConv(); - unsigned CalleeCC = TheCall->getCallingConv(); - if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { - // Functions containing by val parameters are not supported. - for (unsigned i = 0; i != TheCall->getNumArgs(); i++) { - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); - if (Flags.isByVal()) return false; - } + MachineFunction &MF = DAG.getMachineFunction(); + CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); + if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { + // Functions containing by val parameters are not supported. + for (unsigned i = 0; i != Ins.size(); i++) { + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (Flags.isByVal()) return false; + } - SDValue Callee = TheCall->getCallee(); - // Non PIC/GOT tail calls are supported. - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) - return true; + // Non PIC/GOT tail calls are supported. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return true; - // At the moment we can only do local tail calls (in same module, hidden - // or protected) if we are generating PIC. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - return G->getGlobal()->hasHiddenVisibility() - || G->getGlobal()->hasProtectedVisibility(); - } + // At the moment we can only do local tail calls (in same module, hidden + // or protected) if we are generating PIC. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + return G->getGlobal()->hasHiddenVisibility() + || G->getGlobal()->hasProtectedVisibility(); } return false; @@ -2251,13 +2258,13 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewRetAddrLoc); - MVT VT = isPPC64 ? MVT::i64 : MVT::i32; + EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, PseudoSourceValue::getFixedStack(NewRetAddr), 0); - // When using the SVR4 ABI there is no need to move the FP stack slot - // as the FP is never overwritten. + // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack + // slot as the FP is never overwritten. if (isDarwinABI) { int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); @@ -2279,7 +2286,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); - MVT VT = isPPC64 ? MVT::i64 : MVT::i32; + EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; Info.Arg = Arg; @@ -2300,13 +2307,13 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, DebugLoc dl) { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. - MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; + EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); Chain = SDValue(LROpOut.getNode(), 1); - // When using the SVR4 ABI there is no need to load the FP stack slot - // as the FP is never overwritten. + // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack + // slot as the FP is never overwritten. if (isDarwinABI) { FPOpOut = getFramePointerFrameIndex(DAG); FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); @@ -2340,7 +2347,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, bool isVector, SmallVector &MemOpChains, SmallVector& TailCallArguments, DebugLoc dl) { - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); if (!isTailCall) { if (isVector) { SDValue StackPtr; @@ -2389,9 +2396,9 @@ static unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, SmallVector, 8> &RegsToPass, - SmallVector &Ops, std::vector &NodeTys, + SmallVector &Ops, std::vector &NodeTys, bool isSVR4ABI) { - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. @@ -2444,102 +2451,145 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, return CallOpc; } -static SDValue LowerCallReturn(SDValue Op, SelectionDAG &DAG, TargetMachine &TM, - CallSDNode *TheCall, SDValue Chain, - SDValue InFlag) { - bool isVarArg = TheCall->isVarArg(); - DebugLoc dl = TheCall->getDebugLoc(); - SmallVector ResultVals; +SDValue +PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + SmallVector RVLocs; - unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCRetInfo(CallerCC, isVarArg, TM, RVLocs); - CCRetInfo.AnalyzeCallResult(TheCall, RetCC_PPC); + CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; - MVT VT = VA.getValVT(); + EVT VT = VA.getValVT(); assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VT, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); + InVals.push_back(Chain.getValue(0)); InFlag = Chain.getValue(2); } - // If the function returns void, just return the chain. - if (RVLocs.empty()) - return Chain; - - // Otherwise, merge everything together with a MERGE_VALUES node. - ResultVals.push_back(Chain); - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()); - return Res.getValue(Op.getResNo()); + return Chain; } -static -SDValue FinishCall(SelectionDAG &DAG, CallSDNode *TheCall, TargetMachine &TM, - SmallVector, 8> &RegsToPass, - SDValue Op, SDValue InFlag, SDValue Chain, SDValue &Callee, - int SPDiff, unsigned NumBytes) { - unsigned CC = TheCall->getCallingConv(); - DebugLoc dl = TheCall->getDebugLoc(); - bool isTailCall = TheCall->isTailCall() - && CC == CallingConv::Fast && PerformTailCallOpt; - - std::vector NodeTys; +SDValue +PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, + bool isTailCall, bool isVarArg, + SelectionDAG &DAG, + SmallVector, 8> + &RegsToPass, + SDValue InFlag, SDValue Chain, + SDValue &Callee, + int SPDiff, unsigned NumBytes, + const SmallVectorImpl &Ins, + SmallVectorImpl &InVals) { + std::vector NodeTys; SmallVector Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, isTailCall, RegsToPass, Ops, NodeTys, - TM.getSubtarget().isSVR4ABI()); + PPCSubTarget.isSVR4ABI()); // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCRegisterInfo::eliminateCallFramePseudoInstr. int BytesCalleePops = - (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; + (CallConv==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; if (InFlag.getNode()) Ops.push_back(InFlag); // Emit tail call. if (isTailCall) { - assert(InFlag.getNode() && - "Flag must be set. Depend on flag being set in LowerRET"); - Chain = DAG.getNode(PPCISD::TAILCALL, dl, - TheCall->getVTList(), &Ops[0], Ops.size()); - return SDValue(Chain.getNode(), Op.getResNo()); + // If this is the first return lowered for this function, add the regs + // to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, + *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_PPC); + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + assert(((Callee.getOpcode() == ISD::Register && + cast(Callee)->getReg() == PPC::CTR) || + Callee.getOpcode() == ISD::TargetExternalSymbol || + Callee.getOpcode() == ISD::TargetGlobalAddress || + isa(Callee)) && + "Expecting an global address, external symbol, absolute value or register"); + + return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); } Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); + // Add a NOP immediately after the branch instruction when using the 64-bit + // SVR4 ABI. At link time, if caller and callee are in a different module and + // thus have a different TOC, the call will be replaced with a call to a stub + // function which saves the current TOC, loads the TOC of the callee and + // branches to the callee. The NOP will be replaced with a load instruction + // which restores the TOC of the caller from the TOC save slot of the current + // stack frame. If caller and callee belong to the same module (and have the + // same TOC), the NOP will remain unchanged. + if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { + // Insert NOP. + InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag); + } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), InFlag); - if (TheCall->getValueType(0) != MVT::Other) + if (!Ins.empty()) InFlag = Chain.getValue(1); - return LowerCallReturn(Op, DAG, TM, TheCall, Chain, InFlag); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); +} + +SDValue +PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { + return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, Ins, + dl, DAG, InVals); + } else { + return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, + isTailCall, Outs, Ins, + dl, DAG, InVals); + } } -SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, - TargetMachine &TM) { - // See PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4() for a description - // of the SVR4 ABI stack frame layout. - CallSDNode *TheCall = cast(Op.getNode()); - SDValue Chain = TheCall->getChain(); - bool isVarArg = TheCall->isVarArg(); - unsigned CC = TheCall->getCallingConv(); - assert((CC == CallingConv::C || - CC == CallingConv::Fast) && "Unknown calling convention!"); - bool isTailCall = TheCall->isTailCall() - && CC == CallingConv::Fast && PerformTailCallOpt; - SDValue Callee = TheCall->getCallee(); - DebugLoc dl = TheCall->getDebugLoc(); - - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); +SDValue +PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description + // of the 32-bit SVR4 ABI stack frame layout. + + assert((!isTailCall || + (CallConv == CallingConv::Fast && PerformTailCallOpt)) && + "IsEligibleForTailCallOptimization missed a case!"); + + assert((CallConv == CallingConv::C || + CallConv == CallingConv::Fast) && "Unknown calling convention!"); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); unsigned PtrByteSize = 4; MachineFunction &MF = DAG.getMachineFunction(); @@ -2549,7 +2599,7 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (PerformTailCallOpt && CC==CallingConv::Fast) + if (PerformTailCallOpt && CallConv==CallingConv::Fast) MF.getInfo()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage @@ -2558,7 +2608,8 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, // Assign locations to all of the outgoing arguments. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize); @@ -2567,15 +2618,14 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, // Handle fixed and variable vector arguments differently. // Fixed vector arguments go into registers as long as registers are // available. Variable vector arguments always go into memory. - unsigned NumArgs = TheCall->getNumArgs(); - unsigned NumFixedArgs = TheCall->getNumFixedArgs(); + unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { - MVT ArgVT = TheCall->getArg(i).getValueType(); - ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i); + EVT ArgVT = Outs[i].Val.getValueType(); + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; - if (i < NumFixedArgs) { + if (Outs[i].IsFixed) { Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } else { @@ -2584,24 +2634,27 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, } if (Result) { - cerr << "Call operand #" << i << " has unhandled type " - << ArgVT.getMVTString() << "\n"; - abort(); +#ifndef NDEBUG + errs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString() << "\n"; +#endif + llvm_unreachable(0); } } } else { // All arguments are treated the same. - CCInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4); + CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4); } // Assign locations to all of the outgoing aggregate by value arguments. SmallVector ByValArgLocs; - CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs); + CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs, + *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); - CCByValInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4_ByVal); + CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are @@ -2637,8 +2690,8 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = TheCall->getArg(i); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isByVal()) { // Argument is an aggregate which is passed by value, thus we need to @@ -2712,7 +2765,7 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, // Set CR6 to true if this is a vararg call. if (isVarArg) { - SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0); + SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0); Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag); InFlag = Chain.getValue(1); } @@ -2722,24 +2775,23 @@ SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, false, TailCallArguments); } - return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee, - SPDiff, NumBytes); + return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, + Ins, InVals); } -SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, - TargetMachine &TM) { - CallSDNode *TheCall = cast(Op.getNode()); - SDValue Chain = TheCall->getChain(); - bool isVarArg = TheCall->isVarArg(); - unsigned CC = TheCall->getCallingConv(); - bool isTailCall = TheCall->isTailCall() - && CC == CallingConv::Fast && PerformTailCallOpt; - SDValue Callee = TheCall->getCallee(); - unsigned NumOps = TheCall->getNumArgs(); - DebugLoc dl = TheCall->getDebugLoc(); - - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); +SDValue +PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + unsigned NumOps = Outs.size(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; unsigned PtrByteSize = isPPC64 ? 8 : 4; @@ -2750,7 +2802,7 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). - if (PerformTailCallOpt && CC==CallingConv::Fast) + if (PerformTailCallOpt && CallConv==CallingConv::Fast) MF.getInfo()->setHasFastCall(); unsigned nAltivecParamsAtEnd = 0; @@ -2759,13 +2811,19 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. unsigned NumBytes = - CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CC, TheCall, + CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, + Outs, nAltivecParamsAtEnd); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); + // To protect arguments on the stack from being clobbered in a tail call, + // force all the loads to happen before doing any other lowering. + if (isTailCall) + Chain = DAG.getStackArgumentTokenFactor(Chain); + // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); @@ -2801,7 +2859,7 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(Subtarget); + static const unsigned *FPR = GetFPR(); static const unsigned VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, @@ -2818,9 +2876,8 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { - bool inMem = false; - SDValue Arg = TheCall->getArg(i); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. @@ -2843,7 +2900,7 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, if (Size==1 || Size==2) { // Very small objects are passed right-justified. // Everything else is passed left-justified. - MVT VT = (Size==1) ? MVT::i8 : MVT::i16; + EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, NULL, 0, VT); @@ -2895,8 +2952,8 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, continue; } - switch (Arg.getValueType().getSimpleVT()) { - default: assert(0 && "Unexpected ValueType for argument!"); + switch (Arg.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: if (GPR_idx != NumGPRs) { @@ -2905,7 +2962,6 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, isPPC64, isTailCall, false, MemOpChains, TailCallArguments, dl); - inMem = true; } ArgOffset += PtrByteSize; break; @@ -2945,7 +3001,6 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, isPPC64, isTailCall, false, MemOpChains, TailCallArguments, dl); - inMem = true; } if (isPPC64) ArgOffset += 8; @@ -3017,8 +3072,8 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, ArgOffset = ((ArgOffset+15)/16)*16; ArgOffset += 12*16; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = TheCall->getArg(i); - MVT ArgType = Arg.getValueType(); + SDValue Arg = Outs[i].Val; + EVT ArgType = Arg.getValueType(); if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { if (++j > NumVRs) { @@ -3051,18 +3106,21 @@ SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, FPOp, true, TailCallArguments); } - return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee, - SPDiff, NumBytes); + return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, + RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, + Ins, InVals); } -SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, - TargetMachine &TM) { +SDValue +PPCTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - DebugLoc dl = Op.getDebugLoc(); - CCState CCInfo(CC, isVarArg, TM, RVLocs); - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_PPC); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. @@ -3071,37 +3129,6 @@ SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - SDValue Chain = Op.getOperand(0); - - Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL); - if (Chain.getOpcode() == PPCISD::TAILCALL) { - SDValue TailCall = Chain; - SDValue TargetAddress = TailCall.getOperand(1); - SDValue StackAdjustment = TailCall.getOperand(2); - - assert(((TargetAddress.getOpcode() == ISD::Register && - cast(TargetAddress)->getReg() == PPC::CTR) || - TargetAddress.getOpcode() == ISD::TargetExternalSymbol || - TargetAddress.getOpcode() == ISD::TargetGlobalAddress || - isa(TargetAddress)) && - "Expecting an global address, external symbol, absolute value or register"); - - assert(StackAdjustment.getOpcode() == ISD::Constant && - "Expecting a const value"); - - SmallVector Operands; - Operands.push_back(Chain.getOperand(0)); - Operands.push_back(TargetAddress); - Operands.push_back(StackAdjustment); - // Copy registers used by the call. Last operand is a flag so it is not - // copied. - for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { - Operands.push_back(Chain.getOperand(i)); - } - return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Operands[0], - Operands.size()); - } - SDValue Flag; // Copy the result values into the output registers. @@ -3109,7 +3136,7 @@ SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Op.getOperand(i*2+1), Flag); + Outs[i].Val, Flag); Flag = Chain.getValue(1); } @@ -3125,7 +3152,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, DebugLoc dl = Op.getDebugLoc(); // Get the corect type for pointers. - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Construct the stack pointer operand. bool IsPPC64 = Subtarget.isPPC64(); @@ -3153,7 +3180,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool IsPPC64 = PPCSubTarget.isPPC64(); bool isDarwinABI = PPCSubTarget.isDarwinABI(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. @@ -3177,7 +3204,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool IsPPC64 = PPCSubTarget.isPPC64(); bool isDarwinABI = PPCSubTarget.isDarwinABI(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. @@ -3207,7 +3234,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, DebugLoc dl = Op.getDebugLoc(); // Get the corect type for pointers. - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, PtrVT), Size); @@ -3232,8 +3259,8 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { // Cannot handle SETEQ/SETNE. if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op; - MVT ResVT = Op.getValueType(); - MVT CmpVT = Op.getOperand(0).getValueType(); + EVT ResVT = Op.getValueType(); + EVT CmpVT = Op.getOperand(0).getValueType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); DebugLoc dl = Op.getDebugLoc(); @@ -3302,8 +3329,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); SDValue Tmp; - switch (Op.getValueType().getSimpleVT()) { - default: assert(0 && "Unhandled FP_TO_INT type in custom expander!"); + switch (Op.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : PPCISD::FCTIDZ, @@ -3350,20 +3377,23 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack // then lfd it and fcfid it. - MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *FrameInfo = MF.getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(8, 8); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, Op.getOperand(0)); // STD the extended value into the stack slot. - MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, 0, 8, 8); - SDValue Store = DAG.getNode(PPCISD::STD_32, dl, MVT::Other, - DAG.getEntryNode(), Ext64, FIdx, - DAG.getMemOperand(MO)); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, 0, 8, 8); + SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx }; + SDValue Store = + DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), + Ops, 4, MVT::i64, MMO); // Load the value as a double. SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0); @@ -3396,9 +3426,9 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { */ MachineFunction &MF = DAG.getMachineFunction(); - MVT VT = Op.getValueType(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - std::vector NodeTys; + EVT VT = Op.getValueType(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + std::vector NodeTys; SDValue MFFSreg, InFlag; // Save FP Control Word to register @@ -3437,7 +3467,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); assert(Op.getNumOperands() == 3 && @@ -3449,7 +3479,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); - MVT AmtVT = Amt.getValueType(); + EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, AmtVT), Amt); @@ -3466,7 +3496,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && @@ -3478,7 +3508,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); - MVT AmtVT = Amt.getValueType(); + EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, AmtVT), Amt); @@ -3496,7 +3526,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && @@ -3506,7 +3536,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) { SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); - MVT AmtVT = Amt.getValueType(); + EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, AmtVT), Amt); @@ -3529,21 +3559,21 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) { /// BuildSplatI - Build a canonical splati of Val with an element size of /// SplatSize. Cast the result to VT. -static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT, +static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); - static const MVT VTys[] = { // canonical VT to use for each size. + static const EVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; - MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; + EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. if (Val == -1) SplatSize = 1; - MVT CanonicalVT = VTys[SplatSize-1]; + EVT CanonicalVT = VTys[SplatSize-1]; // Build a canonical splat for this value. SDValue Elt = DAG.getConstant(Val, MVT::i32); @@ -3558,7 +3588,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT, /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, SelectionDAG &DAG, DebugLoc dl, - MVT DestVT = MVT::Other) { + EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, MVT::i32), LHS, RHS); @@ -3568,7 +3598,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, SDValue Op2, SelectionDAG &DAG, - DebugLoc dl, MVT DestVT = MVT::Other) { + DebugLoc dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); @@ -3578,7 +3608,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, - MVT VT, SelectionDAG &DAG, DebugLoc dl) { + EVT VT, SelectionDAG &DAG, DebugLoc dl) { // Force LHS/RHS to be the right type. LHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, RHS); @@ -3789,7 +3819,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, int ShufIdxs[16]; switch (OpNum) { - default: assert(0 && "Unknown i32 permute!"); + default: llvm_unreachable("Unknown i32 permute!"); case OP_VMRGHW: ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; @@ -3825,7 +3855,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, case OP_VSLDOI12: return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } - MVT VT = OpLHS.getValueType(); + EVT VT = OpLHS.getValueType(); OpLHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpLHS); OpRHS = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, OpRHS); SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); @@ -3842,7 +3872,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast(Op); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be @@ -3939,7 +3969,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. - MVT EltVT = V1.getValueType().getVectorElementType(); + EVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; @@ -4026,7 +4056,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(3), // RHS DAG.getConstant(CompareOpc, MVT::i32) }; - std::vector VTs; + std::vector VTs; VTs.push_back(Op.getOperand(2).getValueType()); VTs.push_back(MVT::Flag); SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); @@ -4076,7 +4106,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. @@ -4141,8 +4171,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { } return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { - assert(0 && "Unknown mul to lower!"); - abort(); + llvm_unreachable("Unknown mul to lower!"); } } @@ -4150,7 +4179,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - default: assert(0 && "Wasn't expecting to be able to lower this!"); + default: llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -4165,24 +4194,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); - case ISD::FORMAL_ARGUMENTS: - if (PPCSubTarget.isSVR4ABI()) { - return LowerFORMAL_ARGUMENTS_SVR4(Op, DAG, VarArgsFrameIndex, - VarArgsStackOffset, VarArgsNumGPR, - VarArgsNumFPR, PPCSubTarget); - } else { - return LowerFORMAL_ARGUMENTS_Darwin(Op, DAG, VarArgsFrameIndex, - PPCSubTarget); - } - - case ISD::CALL: - if (PPCSubTarget.isSVR4ABI()) { - return LowerCALL_SVR4(Op, DAG, PPCSubTarget, getTargetMachine()); - } else { - return LowerCALL_Darwin(Op, DAG, PPCSubTarget, getTargetMachine()); - } - - case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); @@ -4234,7 +4245,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, // This sequence changes FPSCR to do round-to-zero, adds the two halves // of the long double, and puts FPSCR back the way it was. We do not // actually model FPSCR. - std::vector NodeTys; + std::vector NodeTys; SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; NodeTys.push_back(MVT::f64); // Return register @@ -4480,7 +4491,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -4516,9 +4528,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by transferring all successors of the current + // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - sinkMBB->transferSuccessors(BB); + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while (!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -4812,7 +4833,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = exitMBB; BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg); } else { - assert(0 && "Unexpected instr type to insert"); + llvm_unreachable("Unexpected instr type to insert"); } F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. @@ -4903,7 +4924,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } // Turn STORE (BSWAP) -> sthbrx/stwbrx. - if (N->getOperand(1).getOpcode() == ISD::BSWAP && + if (cast(N)->isUnindexed() && + N->getOperand(1).getOpcode() == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (N->getOperand(1).getValueType() == MVT::i32 || N->getOperand(1).getValueType() == MVT::i16)) { @@ -4912,9 +4934,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); - return DAG.getNode(PPCISD::STBRX, dl, MVT::Other, N->getOperand(0), - BSwapOp, N->getOperand(2), N->getOperand(3), - DAG.getValueType(N->getOperand(1).getValueType())); + SDValue Ops[] = { + N->getOperand(0), BSwapOp, N->getOperand(2), + DAG.getValueType(N->getOperand(1).getValueType()) + }; + return + DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), + Ops, array_lengthof(Ops), + cast(N)->getMemoryVT(), + cast(N)->getMemOperand()); } break; case ISD::BSWAP: @@ -4925,17 +4953,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. - std::vector VTs; - VTs.push_back(MVT::i32); - VTs.push_back(MVT::Other); - SDValue MO = DAG.getMemOperand(LD->getMemOperand()); SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr(), // Ptr - MO, // MemOperand DAG.getValueType(N->getValueType(0)) // VT }; - SDValue BSLoad = DAG.getNode(PPCISD::LBRX, dl, VTs, Ops, 4); + SDValue BSLoad = + DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, + DAG.getVTList(MVT::i32, MVT::Other), Ops, 3, + LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; @@ -5035,7 +5061,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); // Create the PPCISD altivec 'dot' comparison node. - std::vector VTs; + std::vector VTs; SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare @@ -5090,7 +5116,7 @@ void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, default: break; case PPCISD::LBRX: { // lhbrx is known to have the top bits cleared out. - if (cast(Op.getOperand(3))->getVT() == MVT::i16) + if (cast(Op.getOperand(2))->getVT() == MVT::i16) KnownZero = 0xFFFF0000; break; } @@ -5138,7 +5164,7 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { @@ -5187,7 +5213,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, if (!CST) return; // Must be an immediate to match. unsigned Value = CST->getZExtValue(); switch (Letter) { - default: assert(0 && "Unknown constraint letter!"); + default: llvm_unreachable("Unknown constraint letter!"); case 'I': // "I" is a signed 16-bit constant. if ((short)Value == (int)Value) Result = DAG.getTargetConstant(Value, Op.getValueType()); @@ -5304,7 +5330,7 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { if (cast(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); - MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; MachineFunction &MF = DAG.getMachineFunction(); @@ -5326,7 +5352,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { return false; } -MVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, +EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, bool isSrcConst, bool isSrcStr, SelectionDAG &DAG) const { if (this->PPCSubTarget.isPPC64()) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 962bbb144dff8..ac72d8765b102 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -41,8 +41,7 @@ namespace llvm { FCTIDZ, FCTIWZ, /// STFIWX - The STFIWX instruction. The first operand is an input token - /// chain, then an f64 value to store, then an address to store it to, - /// then a SRCVALUE for the address. + /// chain, then an f64 value to store, then an address to store it to. STFIWX, // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking @@ -60,6 +59,8 @@ namespace llvm { /// though these are usually folded into other nodes. Hi, Lo, + TOC_ENTRY, + /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to /// compute an allocation on the stack. @@ -78,12 +79,12 @@ namespace llvm { /// registers. EXTSW_32, - /// STD_32 - This is the STD instruction for use with "32-bit" registers. - STD_32, - /// CALL - A direct function call. CALL_Darwin, CALL_SVR4, + /// NOP - Special NOP which follows 64-bit SVR4 calls. + NOP, + /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, @@ -119,18 +120,6 @@ namespace llvm { /// an optional input flag argument. COND_BRANCH, - /// CHAIN = STBRX CHAIN, GPRC, Ptr, SRCVALUE, Type - This is a - /// byte-swapping store instruction. It byte-swaps the low "Type" bits of - /// the GPRC input, then stores it through Ptr. Type can be either i16 or - /// i32. - STBRX, - - /// GPRC, CHAIN = LBRX CHAIN, Ptr, SRCVALUE, Type - This is a - /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, - /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 - /// or i32. - LBRX, - // The following 5 instructions are used only as part of the // long double-to-int conversion sequence. @@ -160,14 +149,27 @@ namespace llvm { /// indexed. This is used to implement atomic operations. STCX, - /// TAILCALL - Indicates a tail call should be taken. - TAILCALL, /// TC_RETURN - A tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) /// operand #2 stack adjustment /// operand #3 optional in flag - TC_RETURN + TC_RETURN, + + /// STD_32 - This is the STD instruction for use with "32-bit" registers. + STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, + + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a + /// byte-swapping store instruction. It byte-swaps the low "Type" bits of + /// the GPRC input, then stores it through Ptr. Type can be either i16 or + /// i32. + STBRX, + + /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a + /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, + /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 + /// or i32. + LBRX }; } @@ -232,7 +234,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT getSetCCResultType(MVT VT) const; + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address @@ -286,7 +288,8 @@ namespace llvm { unsigned Depth = 0) const; virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap *EM) const; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; @@ -297,7 +300,7 @@ namespace llvm { ConstraintType getConstraintType(const std::string &Constraint) const; std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual @@ -327,16 +330,16 @@ namespace llvm { /// the offset of the target addressing mode. virtual bool isLegalAddressImmediate(GlobalValue *GV) const; - /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Target which want to do tail call - /// optimization should implement this function. - virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall, - SDValue Ret, - SelectionDAG &DAG) const; + virtual bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl &Ins, + SelectionDAG& DAG) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align, + virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align, bool isSrcConst, bool isSrcStr, SelectionDAG &DAG) const; @@ -370,20 +373,6 @@ namespace llvm { SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex, int VarArgsStackOffset, unsigned VarArgsNumGPR, unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget); - SDValue LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, SelectionDAG &DAG, - int &VarArgsFrameIndex, - int &VarArgsStackOffset, - unsigned &VarArgsNumGPR, - unsigned &VarArgsNumFPR, - const PPCSubtarget &Subtarget); - SDValue LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, SelectionDAG &DAG, - int &VarArgsFrameIndex, - const PPCSubtarget &Subtarget); - SDValue LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, TargetMachine &TM); - SDValue LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget, TargetMachine &TM); - SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM); SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget); SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, @@ -400,6 +389,71 @@ namespace llvm { SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG); SDValue LowerMUL(SDValue Op, SelectionDAG &DAG); + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall, + bool isVarArg, + SelectionDAG &DAG, + SmallVector, 8> + &RegsToPass, + SDValue InFlag, SDValue Chain, + SDValue &Callee, + int SPDiff, unsigned NumBytes, + const SmallVectorImpl &Ins, + SmallVectorImpl &InVals); + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); + + SDValue + LowerFormalArguments_Darwin(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + SDValue + LowerFormalArguments_SVR4(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + SDValue + LowerCall_Darwin(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + SDValue + LowerCall_SVR4(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); }; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 3823e537f11d1..0f68fb939dc03 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -68,7 +68,7 @@ let isCall = 1, PPC970_Unit = 7, F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, LR8,CTR8, - CR0,CR1,CR5,CR6,CR7] in { + CR0,CR1,CR5,CR6,CR7,CARRY] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_Darwin : IForm<18, 0, 1, @@ -94,7 +94,7 @@ let isCall = 1, PPC970_Unit = 7, F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, LR8,CTR8, - CR0,CR1,CR5,CR6,CR7] in { + CR0,CR1,CR5,CR6,CR7,CARRY] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, @@ -123,6 +123,8 @@ def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; +def : Pat<(PPCnop), + (NOP)>; // Atomic operations let usesCustomDAGSchedInserter = 1 in { @@ -327,14 +329,15 @@ def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "add $rT, $rA, $rB", IntGeneral, [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>; +let Defs = [CARRY] in { def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "addc $rT, $rA, $rB", IntGeneral, [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>, PPC970_DGroup_Cracked; -def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "adde $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; - +def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), + "addic $rD, $rA, $imm", IntGeneral, + [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; +} def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "addi $rD, $rA, $imm", IntGeneral, [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; @@ -342,36 +345,41 @@ def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), "addis $rD, $rA, $imm", IntGeneral, [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; +let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "subfic $rD, $rA, $imm", IntGeneral, [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>; -def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "subf $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>; def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subfc $rT, $rA, $rB", IntGeneral, [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>, PPC970_DGroup_Cracked; - -def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), - "subfe $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; +} +def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "subf $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>; +def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), + "neg $rT, $rA", IntGeneral, + [(set G8RC:$rT, (ineg G8RC:$rA))]>; +let Uses = [CARRY], Defs = [CARRY] in { +def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "adde $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addme $rT, $rA", IntGeneral, [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>; def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addze $rT, $rA", IntGeneral, [(set G8RC:$rT, (adde G8RC:$rA, 0))]>; -def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), - "neg $rT, $rA", IntGeneral, - [(set G8RC:$rT, (ineg G8RC:$rA))]>; +def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), + "subfe $rT, $rA, $rB", IntGeneral, + [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfme $rT, $rA", IntGeneral, [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>; def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfze $rT, $rA", IntGeneral, [(set G8RC:$rT, (sube 0, G8RC:$rA))]>; - +} def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), @@ -396,9 +404,11 @@ def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "srd $rA, $rS, $rB", IntRotateD, [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64; +let Defs = [CARRY] in { def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "srad $rA, $rS, $rB", IntRotateD, [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64; +} def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS), "extsb $rA, $rS", IntGeneral, @@ -418,9 +428,11 @@ def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), "extsw $rA, $rS", IntGeneral, [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64; +let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), "sradi $rA, $rS, $SH", IntRotateD, [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; +} def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), "cntlzd $rA, $rS", IntGeneral, [(set G8RC:$rA, (ctlz G8RC:$rS))]>; @@ -543,6 +555,10 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; +def LDtoc: DSForm_1<58, 0, (outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "ld $rD, $disp($reg)", LdStLD, + [(set G8RC:$rD, + (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrBuilder.h b/lib/Target/PowerPC/PPCInstrBuilder.h index 1de69116cd58c..b424d1101416f 100644 --- a/lib/Target/PowerPC/PPCInstrBuilder.h +++ b/lib/Target/PowerPC/PPCInstrBuilder.h @@ -29,7 +29,7 @@ namespace llvm { /// reference has base register as the FrameIndex offset until it is resolved. /// This allows a constant offset to be specified as well... /// -inline const MachineInstrBuilder& +static inline const MachineInstrBuilder& addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, bool mem = true) { if (mem) diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 87c612ab74e67..0083598cf18b5 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -20,7 +20,9 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCAsmInfo.h" using namespace llvm; extern cl::opt EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. @@ -485,8 +487,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, .addReg(PPC::R0) .addReg(PPC::R0)); } else { - assert(0 && "Unknown regclass!"); - abort(); + llvm_unreachable("Unknown regclass!"); } return false; @@ -509,45 +510,6 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MBB.insert(MI, NewMIs[i]); } -void PPCInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const{ - if (Addr[0].isFI()) { - if (StoreRegToStackSlot(MF, SrcReg, isKill, - Addr[0].getIndex(), RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo(); - FuncInfo->setSpillsCR(); - } - - return; - } - - DebugLoc DL = DebugLoc::getUnknownLoc(); - unsigned Opc = 0; - if (RC == PPC::GPRCRegisterClass) { - Opc = PPC::STW; - } else if (RC == PPC::G8RCRegisterClass) { - Opc = PPC::STD; - } else if (RC == PPC::F8RCRegisterClass) { - Opc = PPC::STFD; - } else if (RC == PPC::F4RCRegisterClass) { - Opc = PPC::STFS; - } else if (RC == PPC::VRRCRegisterClass) { - Opc = PPC::STVX; - } else { - assert(0 && "Unknown regclass!"); - abort(); - } - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)) - .addReg(SrcReg, getKillRegState(isKill)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; -} - void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, @@ -634,8 +596,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0) .addReg(PPC::R0)); } else { - assert(0 && "Unknown regclass!"); - abort(); + llvm_unreachable("Unknown regclass!"); } } @@ -653,41 +614,6 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MBB.insert(MI, NewMIs[i]); } -void PPCInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs)const{ - if (Addr[0].isFI()) { - LoadRegFromStackSlot(MF, DebugLoc::getUnknownLoc(), - DestReg, Addr[0].getIndex(), RC, NewMIs); - return; - } - - unsigned Opc = 0; - if (RC == PPC::GPRCRegisterClass) { - assert(DestReg != PPC::LR && "Can't handle this yet!"); - Opc = PPC::LWZ; - } else if (RC == PPC::G8RCRegisterClass) { - assert(DestReg != PPC::LR8 && "Can't handle this yet!"); - Opc = PPC::LD; - } else if (RC == PPC::F8RCRegisterClass) { - Opc = PPC::LFD; - } else if (RC == PPC::F4RCRegisterClass) { - Opc = PPC::LFS; - } else if (RC == PPC::VRRCRegisterClass) { - Opc = PPC::LVX; - } else { - assert(0 && "Unknown regclass!"); - abort(); - } - DebugLoc DL = DebugLoc::getUnknownLoc(); - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; -} - /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into /// copy instructions, turning them into load/store instructions. MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -842,7 +768,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::INLINEASM: { // Inline Asm: Variable size. const MachineFunction *MF = MI->getParent()->getParent(); const char *AsmStr = MI->getOperand(0).getSymbolName(); - return MF->getTarget().getTargetAsmInfo()->getInlineAsmLength(AsmStr); + return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); } case PPC::DBG_LABEL: case PPC::EH_LABEL: diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 492634c979eb3..bb0dc15a79222 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -121,20 +121,10 @@ public: unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const; - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; - - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into /// copy instructions, turning them into load/store instructions. diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 7af59a2ecaf76..dc5db6ff59e3a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -35,11 +35,11 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT> ]>; -def SDT_PPClbrx : SDTypeProfile<1, 3, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT> +def SDT_PPClbrx : SDTypeProfile<1, 2, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; -def SDT_PPCstbrx : SDTypeProfile<0, 4, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT> +def SDT_PPCstbrx : SDTypeProfile<0, 3, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPClarx : SDTypeProfile<1, 1, [ @@ -53,6 +53,8 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; +def SDT_PPCnop : SDTypeProfile<0, 0, []>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -85,6 +87,7 @@ def PPCfsel : SDNode<"PPCISD::FSEL", def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; +def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; @@ -111,6 +114,7 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, @@ -125,9 +129,6 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInFlag]>; -def PPCtailcall : SDNode<"PPCISD::TAILCALL", SDT_PPCCall, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; - def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>; @@ -309,6 +310,10 @@ def memrix : Operand { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); } +def tocentry : Operand { + let PrintMethod = "printTOCEntryLabel"; + let MIOperandInfo = (ops i32imm:$imm); +} // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. @@ -421,7 +426,7 @@ let isCall = 1, PPC970_Unit = 7, LR,CTR, CR0,CR1,CR5,CR6,CR7, CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ, - CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in { + CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_Darwin : IForm<18, 0, 1, @@ -448,7 +453,7 @@ let isCall = 1, PPC970_Unit = 7, LR,CTR, CR0,CR1,CR5,CR6,CR7, CR0LT,CR0GT,CR0EQ,CR0UN,CR1LT,CR1GT,CR1EQ,CR1UN,CR5LT,CR5GT,CR5EQ, - CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in { + CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN,CARRY] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_SVR4 : IForm<18, 0, 1, @@ -736,10 +741,10 @@ def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), "lhbrx $rD, $src", LdStGeneral, - [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i16))]>; + [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), "lwbrx $rD, $src", LdStGeneral, - [(set GPRC:$rD, (PPClbrx xoaddr:$src, srcvalue:$sv, i32))]>; + [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), "lfsx $frD, $src", LdStLFDU, @@ -832,11 +837,11 @@ def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), } def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStGeneral, - [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i16)]>, + [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), "stwbrx $rS, $dst", LdStGeneral, - [(PPCstbrx GPRC:$rS, xoaddr:$dst, srcvalue:$dummy, i32)]>, + [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), @@ -864,6 +869,7 @@ let PPC970_Unit = 1 in { // FXU Operations. def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addi $rD, $rA, $imm", IntGeneral, [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; +let Defs = [CARRY] in { def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>, @@ -871,6 +877,7 @@ def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>; +} def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), "addis $rD, $rA, $imm", IntGeneral, [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; @@ -881,9 +888,11 @@ def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>; +let Defs = [CARRY] in { def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "subfic $rD, $rA, $imm", IntGeneral, [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; +} let isReMaterializable = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), @@ -956,15 +965,19 @@ def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "srw $rA, $rS, $rB", IntGeneral, [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>; +let Defs = [CARRY] in { def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "sraw $rA, $rS, $rB", IntShift, [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>; } +} let PPC970_Unit = 1 in { // FXU Operations. +let Defs = [CARRY] in { def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), "srawi $rA, $rS, $SH", IntShift, [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>; +} def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS), "cntlzw $rA, $rS", IntGeneral, [(set GPRC:$rA, (ctlz GPRC:$rS))]>; @@ -1159,13 +1172,12 @@ let PPC970_Unit = 1 in { // FXU Operations. def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "add $rT, $rA, $rB", IntGeneral, [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>; +let Defs = [CARRY] in { def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "addc $rT, $rA, $rB", IntGeneral, [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>, PPC970_DGroup_Cracked; -def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "adde $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; +} def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "divw $rT, $rA, $rB", IntDivW, [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>, @@ -1186,22 +1198,28 @@ def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subf $rT, $rA, $rB", IntGeneral, [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>; +let Defs = [CARRY] in { def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subfc $rT, $rA, $rB", IntGeneral, [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>, PPC970_DGroup_Cracked; -def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), - "subfe $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; +} +def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), + "neg $rT, $rA", IntGeneral, + [(set GPRC:$rT, (ineg GPRC:$rA))]>; +let Uses = [CARRY], Defs = [CARRY] in { +def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "adde $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addme $rT, $rA", IntGeneral, [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>; def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addze $rT, $rA", IntGeneral, [(set GPRC:$rT, (adde GPRC:$rA, 0))]>; -def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), - "neg $rT, $rA", IntGeneral, - [(set GPRC:$rT, (ineg GPRC:$rA))]>; +def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), + "subfe $rT, $rA, $rB", IntGeneral, + [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfme $rT, $rA", IntGeneral, [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>; @@ -1209,6 +1227,7 @@ def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfze $rT, $rA", IntGeneral, [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; } +} // A-Form instructions. Most of the instructions executed in the FPU are of // this type. diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 7486d74958889..ef25d92f719ae 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -18,6 +18,8 @@ #include "llvm/Function.h" #include "llvm/System/Memory.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static TargetJITInfo::JITCompilerFn JITCompilerFunction; @@ -197,8 +199,7 @@ asm( ); #else void PPC32CompilationCallback() { - assert(0 && "This is not a power pc, you can't execute this!"); - abort(); + llvm_unreachable("This is not a power pc, you can't execute this!"); } #endif @@ -264,8 +265,7 @@ asm( ); #else void PPC64CompilationCallback() { - assert(0 && "This is not a power pc, you can't execute this!"); - abort(); + llvm_unreachable("This is not a power pc, you can't execute this!"); } #endif @@ -383,7 +383,7 @@ void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); switch ((PPC::RelocationType)MR->getRelocationType()) { - default: assert(0 && "Unknown relocation type!"); + default: llvm_unreachable("Unknown relocation type!"); case PPC::reloc_pcrel_bx: // PC-relative relocation for b and bl instructions. ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp new file mode 100644 index 0000000000000..c87879b2a332a --- /dev/null +++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp @@ -0,0 +1,58 @@ +//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the MCAsmInfoDarwin properties. +// +//===----------------------------------------------------------------------===// + +#include "PPCMCAsmInfo.h" +using namespace llvm; + +PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { + PCSymbol = "."; + CommentString = ";"; + ExceptionsType = ExceptionHandling::Dwarf; + + if (!is64Bit) + Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. + AssemblerDialect = 1; // New-Style mnemonics. +} + +PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) { + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + UsedDirective = "\t# .no_dead_strip\t"; + WeakRefDirective = "\t.weak\t"; + + // Uses '.section' before '.bss' directive + UsesELFSectionDirectiveForBSS = true; + + // Debug Information + AbsoluteDebugSectionOffsets = true; + SupportsDebugInformation = true; + + PCSymbol = "."; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + + // Exceptions handling + if (!is64Bit) + ExceptionsType = ExceptionHandling::Dwarf; + AbsoluteEHSectionOffsets = false; + + ZeroDirective = "\t.space\t"; + SetDirective = "\t.set"; + Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; + AlignmentIsInBytes = false; + LCOMMDirective = "\t.lcomm\t"; + AssemblerDialect = 0; // Old-Style mnemonics. +} + diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.h b/lib/Target/PowerPC/PPCMCAsmInfo.h new file mode 100644 index 0000000000000..96ae6fbba0e43 --- /dev/null +++ b/lib/Target/PowerPC/PPCMCAsmInfo.h @@ -0,0 +1,31 @@ +//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCAsmInfoDarwin class. +// +//===----------------------------------------------------------------------===// + +#ifndef PPCTARGETASMINFO_H +#define PPCTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfoDarwin.h" + +namespace llvm { + + struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { + explicit PPCMCAsmInfoDarwin(bool is64Bit); + }; + + struct PPCLinuxMCAsmInfo : public MCAsmInfo { + explicit PPCLinuxMCAsmInfo(bool is64Bit); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp index 3bfa6d7191057..4c14454096caa 100644 --- a/lib/Target/PowerPC/PPCMachOWriterInfo.cpp +++ b/lib/Target/PowerPC/PPCMachOWriterInfo.cpp @@ -16,6 +16,7 @@ #include "PPCTargetMachine.h" #include "llvm/CodeGen/MachORelocation.h" #include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/ErrorHandling.h" #include using namespace llvm; @@ -46,9 +47,9 @@ unsigned PPCMachOWriterInfo::GetTargetRelocation(MachineRelocation &MR, Addr = (uintptr_t)MR.getResultPointer() + ToAddr; switch ((PPC::RelocationType)MR.getRelocationType()) { - default: assert(0 && "Unknown PPC relocation type!"); + default: llvm_unreachable("Unknown PPC relocation type!"); case PPC::reloc_absolute_low_ix: - assert(0 && "Unhandled PPC relocation type!"); + llvm_unreachable("Unhandled PPC relocation type!"); break; case PPC::reloc_vanilla: { diff --git a/lib/Target/PowerPC/PPCPredicates.cpp b/lib/Target/PowerPC/PPCPredicates.cpp index 08a281259e1fa..12bb0a143406b 100644 --- a/lib/Target/PowerPC/PPCPredicates.cpp +++ b/lib/Target/PowerPC/PPCPredicates.cpp @@ -12,12 +12,13 @@ //===----------------------------------------------------------------------===// #include "PPCPredicates.h" +#include "llvm/Support/ErrorHandling.h" #include using namespace llvm; PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { switch (Opcode) { - default: assert(0 && "Unknown PPC branch opcode!"); + default: llvm_unreachable("Unknown PPC branch opcode!"); case PPC::PRED_EQ: return PPC::PRED_NE; case PPC::PRED_NE: return PPC::PRED_EQ; case PPC::PRED_LT: return PPC::PRED_GE; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 97b1c57d79786..cf5c7c0f598ab 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -37,7 +37,9 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include @@ -111,8 +113,7 @@ unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) { case R30: case X30: case F30: case V30: case CR7EQ: return 30; case R31: case X31: case F31: case V31: case CR7UN: return 31; default: - cerr << "Unhandled reg in PPCRegisterInfo::getRegisterNumbering!\n"; - abort(); + llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); } } @@ -139,11 +140,11 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. -const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const { +const TargetRegisterClass * +PPCRegisterInfo::getPointerRegClass(unsigned Kind) const { if (Subtarget.isPPC64()) return &PPC::G8RCRegClass; - else - return &PPC::GPRCRegClass; + return &PPC::GPRCRegClass; } const unsigned* @@ -173,7 +174,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR, 0 }; - + + // 32-bit SVR4 calling convention. static const unsigned SVR4_CalleeSavedRegs[] = { PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, @@ -199,7 +201,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - PPC::LR, 0 + 0 }; // 64-bit Darwin calling convention. static const unsigned Darwin64_CalleeSavedRegs[] = { @@ -226,12 +228,41 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR8, 0 }; + + // 64-bit SVR4 calling convention. + static const unsigned SVR4_64_CalleeSavedRegs[] = { + PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31, + + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + + PPC::VRSAVE, + + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + 0 + }; if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs : Darwin32_CalleeSavedRegs; - - return SVR4_CalleeSavedRegs; + + return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; } const TargetRegisterClass* const* @@ -266,6 +297,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::GPRCRegClass, 0 }; + // 32-bit SVR4 calling convention. static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = { &PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, @@ -294,7 +326,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, &PPC::CRBITRCRegClass, - &PPC::GPRCRegClass, 0 + 0 }; // 64-bit Darwin calling convention. @@ -326,12 +358,45 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::G8RCRegClass, 0 }; + + // 64-bit SVR4 calling convention. + static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = { + &PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass, + + &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, + + &PPC::VRSAVERCRegClass, + + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + + 0 + }; if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses : Darwin32_CalleeSavedRegClasses; - return SVR4_CalleeSavedRegClasses; + return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses + : SVR4_CalleeSavedRegClasses; } // needsFP - Return true if the specified function should have a dedicated frame @@ -363,9 +428,9 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R13); // Small Data Area pointer register } - // On PPC64, r13 is the thread pointer. Never allocate this register. Note - // that this is over conservative, as it also prevents allocation of R31 when - // the FP is not needed. + // On PPC64, r13 is the thread pointer. Never allocate this register. + // Note that this is over conservative, as it also prevents allocation of R31 + // when the FP is not needed. if (Subtarget.isPPC64()) { Reserved.set(PPC::R13); Reserved.set(PPC::R31); @@ -377,6 +442,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::X1); Reserved.set(PPC::X13); Reserved.set(PPC::X31); + + // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. + if (Subtarget.isSVR4ABI()) { + Reserved.set(PPC::X2); + } } if (needsFP(MF)) @@ -457,7 +527,7 @@ static unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, const TargetRegisterClass *RC, int SPAdj) { assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC, true); + unsigned Reg = RS->FindUnusedReg(RC); // FIXME: move ARM callee-saved reg scan to target independent code, then // search for already spilled CS register here. if (Reg == 0) @@ -629,8 +699,10 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, MBB.erase(II); } -void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); // Get the instruction. @@ -669,14 +741,14 @@ void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (FPSI && FrameIndex == FPSI && (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { lowerDynamicAlloc(II, SPAdj, RS); - return; + return 0; } // Special case for pseudo-op SPILL_CR. if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default. if (OpC == PPC::SPILL_CR) { lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; + return 0; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -718,7 +790,7 @@ void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); - return; + return 0; } // The offset doesn't fit into a single register, scavenge one to build the @@ -758,6 +830,7 @@ void PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned StackReg = MI.getOperand(FIOperandNo).getReg(); MI.getOperand(OperandBase).ChangeToRegister(StackReg, false); MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false); + return 0; } /// VRRegNo - Map from a numbered VR register to its enum value. @@ -910,7 +983,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { // don't have a frame pointer, calls, or dynamic alloca then we do not need // to adjust the stack pointer (we fit in the Red Zone). bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); - // FIXME SVR4 The SVR4 ABI has no red zone. + // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. @@ -1005,7 +1078,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (!Subtarget.isSVR4ABI()) { return; } - + // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); const std::vector &CSI = FFI->getCalleeSavedInfo(); @@ -1016,16 +1089,19 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) } unsigned MinGPR = PPC::R31; + unsigned MinG8R = PPC::X31; unsigned MinFPR = PPC::F31; unsigned MinVR = PPC::V31; bool HasGPSaveArea = false; + bool HasG8SaveArea = false; bool HasFPSaveArea = false; bool HasCRSaveArea = false; bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; SmallVector GPRegs; + SmallVector G8Regs; SmallVector FPRegs; SmallVector VRegs; @@ -1041,6 +1117,14 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinGPR) { MinGPR = Reg; } + } else if (RC == PPC::G8RCRegisterClass) { + HasG8SaveArea = true; + + G8Regs.push_back(CSI[i]); + + if (Reg < MinG8R) { + MinG8R = Reg; + } } else if (RC == PPC::F8RCRegisterClass) { HasFPSaveArea = true; @@ -1064,7 +1148,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) MinVR = Reg; } } else { - assert(0 && "Unknown RegisterClass!"); + llvm_unreachable("Unknown RegisterClass!"); } } @@ -1103,7 +1187,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // General register save area starts right below the Floating-point // register save area. - if (HasGPSaveArea) { + if (HasGPSaveArea || HasG8SaveArea) { // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { @@ -1112,7 +1196,22 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } - LowerBound -= (31 - getRegisterNumbering(MinGPR) + 1) * 4; + // Move general register save area spill slots down, taking into account + // the size of the Floating-point register save area. + for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { + int FI = G8Regs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + unsigned MinReg = std::min(getRegisterNumbering(MinGPR), + getRegisterNumbering(MinG8R)); + + if (Subtarget.isPPC64()) { + LowerBound -= (31 - MinReg + 1) * 8; + } else { + LowerBound -= (31 - MinReg + 1) * 4; + } } // The CR save area is below the general register save area. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index ddaefdd2a37c5..1689bc224fb6c 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -37,7 +37,7 @@ public: /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. - virtual const TargetRegisterClass *getPointerRegClass() const; + virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; @@ -66,8 +66,9 @@ public: int SPAdj, RegScavenger *RS) const; void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, int SPAdj, RegScavenger *RS) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index bac8e3aed8eb8..049e893e82edf 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -255,6 +255,11 @@ def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66]>; // VRsave register def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[107]>; +// Carry bit. In the architecture this is really bit 0 of the XER register +// (which really is SPR register 1); this is the only bit interesting to a +// compiler. +def CARRY: SPR<1, "ca">, DwarfRegNum<[0]>; + // FP rounding mode: bits 30 and 31 of the FP status and control register // This is not allocated as a normal register; it appears only in // Uses and Defs. The ABI says it needs to be preserved by a function, @@ -280,7 +285,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32, let MethodBodies = [{ GPRCClass::iterator GPRCClass::allocation_order_begin(const MachineFunction &MF) const { - // In Linux, r2 is reserved for the OS. + // 32-bit SVR4 ABI: r2 is reserved for the OS. + // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer. if (!MF.getTarget().getSubtarget().isDarwin()) return begin()+1; @@ -291,7 +297,7 @@ def GPRC : RegisterClass<"PPC", [i32], 32, // On PPC64, r13 is the thread pointer. Never allocate this register. // Note that this is overconservative, as it also prevents allocation of // R31 when the FP is not needed. - // When using the SVR4 ABI, r13 is reserved for the Small Data Area + // When using the 32-bit SVR4 ABI, r13 is reserved for the Small Data Area // pointer. const PPCSubtarget &Subtarget = MF.getTarget().getSubtarget(); @@ -318,6 +324,10 @@ def G8RC : RegisterClass<"PPC", [i64], 64, let MethodBodies = [{ G8RCClass::iterator G8RCClass::allocation_order_begin(const MachineFunction &MF) const { + // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer. + if (!MF.getTarget().getSubtarget().isDarwin()) + return begin()+1; + return begin(); } G8RCClass::iterator @@ -372,4 +382,6 @@ def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>; def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>; - +def CARRYRC : RegisterClass<"PPC", [i32], 32, [CARRY]> { + let CopyCost = -1; +} diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 425d8e6195c6a..f75e7814526f7 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -13,7 +13,7 @@ #include "PPCSubtarget.h" #include "PPC.h" -#include "llvm/Module.h" +#include "llvm/GlobalValue.h" #include "llvm/Target/TargetMachine.h" #include "PPCGenSubtarget.inc" #include @@ -57,10 +57,9 @@ static const char *GetCurrentPowerPCCPU() { #endif -PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M, - const std::string &FS, bool is64Bit) - : TM(tm) - , StackAlignment(16) +PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, + bool is64Bit) + : StackAlignment(16) , DarwinDirective(PPC::DIR_NONE) , IsGigaProcessor(false) , Has64BitSupport(false) @@ -95,7 +94,6 @@ PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M, // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. - const std::string &TT = M.getTargetTriple(); if (TT.length() > 7) { // Determine which version of darwin this is. size_t DarwinPos = TT.find("-darwin"); @@ -105,24 +103,11 @@ PPCSubtarget::PPCSubtarget(const TargetMachine &tm, const Module &M, else DarwinVers = 8; // Minimum supported darwin is Tiger. } - } else if (TT.empty()) { - // Try to autosense the subtarget from the host compiler. -#if defined(__APPLE__) -#if __APPLE_CC__ > 5400 - DarwinVers = 9; // GCC 5400+ is Leopard. -#else - DarwinVers = 8; // Minimum supported darwin is Tiger. -#endif -#endif } // Set up darwin-specific properties. - if (isDarwin()) { + if (isDarwin()) HasLazyResolverStubs = true; - AsmFlavor = NewMnemonic; - } else { - AsmFlavor = OldMnemonic; - } } /// SetJITMode - This is called to inform the subtarget info that we are @@ -138,7 +123,8 @@ void PPCSubtarget::SetJITMode() { /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. -bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV) const { +bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, + const TargetMachine &TM) const { // We never hae stubs if HasLazyResolverStubs=false or if in static mode. if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static) return false; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index f633cc6d2da4c..02c8ad79bd38f 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -39,18 +39,11 @@ namespace PPC { }; } -class Module; class GlobalValue; class TargetMachine; class PPCSubtarget : public TargetSubtarget { -public: - enum AsmWriterFlavorTy { - OldMnemonic, NewMnemonic, Unset - }; protected: - const TargetMachine &TM; - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned StackAlignment; @@ -61,9 +54,6 @@ protected: /// Which cpu directive was used. unsigned DarwinDirective; - /// AsmFlavor - Which PPC asm dialect to use. - AsmWriterFlavorTy AsmFlavor; - /// Used by the ISel to turn in optimizations for POWER4-derived architectures bool IsGigaProcessor; bool Has64BitSupport; @@ -79,10 +69,9 @@ protected: unsigned char DarwinVers; // Is any darwin-ppc platform. public: /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - PPCSubtarget(const TargetMachine &TM, const Module &M, - const std::string &FS, bool is64Bit); + PPCSubtarget(const std::string &TT, const std::string &FS, bool is64Bit); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -132,7 +121,8 @@ public: /// hasLazyResolverStub - Return true if accesses to the specified global have /// to go through a dyld lazy resolution stub. This means that an extra load /// is required to get the address of the global. - bool hasLazyResolverStub(const GlobalValue *GV) const; + bool hasLazyResolverStub(const GlobalValue *GV, + const TargetMachine &TM) const; // Specific obvious features. bool hasFSQRT() const { return HasFSQRT; } @@ -148,12 +138,9 @@ public: /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. unsigned getDarwinVers() const { return DarwinVers; } - bool isDarwinABI() const { return isDarwin() || IsPPC64; } - bool isSVR4ABI() const { return !isDarwin() && !IsPPC64; } + bool isDarwinABI() const { return isDarwin(); } + bool isSVR4ABI() const { return !isDarwin(); } - unsigned getAsmFlavor() const { - return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0; - } }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index e9073d63a152f..3371954c30f1a 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -12,96 +12,38 @@ //===----------------------------------------------------------------------===// #include "PPC.h" -#include "PPCTargetAsmInfo.h" +#include "PPCMCAsmInfo.h" #include "PPCTargetMachine.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetMachineRegistry.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/FormattedStream.h" using namespace llvm; -/// PowerPCTargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int PowerPCTargetMachineModule; -int PowerPCTargetMachineModule = 0; - -// Register the targets -static RegisterTarget -X("ppc32", "PowerPC 32"); -static RegisterTarget -Y("ppc64", "PowerPC 64"); - -// Force static initialization. -extern "C" void LLVMInitializePowerPCTarget() { } - -// No assembler printer by default -PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0; - -const TargetAsmInfo *PPCTargetMachine::createTargetAsmInfo() const { - if (Subtarget.isDarwin()) - return new PPCDarwinTargetAsmInfo(*this); - else - return new PPCLinuxTargetAsmInfo(*this); -} - -unsigned PPC32TargetMachine::getJITMatchQuality() { -#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__) - if (sizeof(void*) == 4) - return 10; -#endif - return 0; -} -unsigned PPC64TargetMachine::getJITMatchQuality() { -#if defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) || defined(__PPC__) - if (sizeof(void*) == 8) - return 10; -#endif - return 0; -} - -unsigned PPC32TargetMachine::getModuleMatchQuality(const Module &M) { - // We strongly match "powerpc-*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 8 && std::string(TT.begin(), TT.begin()+8) == "powerpc-") - return 20; - - // If the target triple is something non-powerpc, we don't match. - if (!TT.empty()) return 0; +static const MCAsmInfo *createMCAsmInfo(const Target &T, + const StringRef &TT) { + Triple TheTriple(TT); + bool isPPC64 = TheTriple.getArch() == Triple::ppc64; + if (TheTriple.getOS() == Triple::Darwin) + return new PPCMCAsmInfoDarwin(isPPC64); + return new PPCLinuxMCAsmInfo(isPPC64); - if (M.getEndianness() == Module::BigEndian && - M.getPointerSize() == Module::Pointer32) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target - - return getJITMatchQuality()/2; } -unsigned PPC64TargetMachine::getModuleMatchQuality(const Module &M) { - // We strongly match "powerpc64-*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 10 && std::string(TT.begin(), TT.begin()+10) == "powerpc64-") - return 20; - - if (M.getEndianness() == Module::BigEndian && - M.getPointerSize() == Module::Pointer64) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target +extern "C" void LLVMInitializePowerPCTarget() { + // Register the targets + RegisterTargetMachine A(ThePPC32Target); + RegisterTargetMachine B(ThePPC64Target); - return getJITMatchQuality()/2; + RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo); + RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo); } -PPCTargetMachine::PPCTargetMachine(const Module &M, const std::string &FS, - bool is64Bit) - : Subtarget(*this, M, FS, is64Bit), +PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool is64Bit) + : LLVMTargetMachine(T, TT), + Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this), InstrItins(Subtarget.getInstrItineraryData()), MachOWriterInfo(*this) { @@ -118,13 +60,15 @@ PPCTargetMachine::PPCTargetMachine(const Module &M, const std::string &FS, /// groups, which typically degrades performance. bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } -PPC32TargetMachine::PPC32TargetMachine(const Module &M, const std::string &FS) - : PPCTargetMachine(M, FS, false) { +PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : PPCTargetMachine(T, TT, FS, false) { } -PPC64TargetMachine::PPC64TargetMachine(const Module &M, const std::string &FS) - : PPCTargetMachine(M, FS, true) { +PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : PPCTargetMachine(T, TT, FS, true) { } @@ -146,20 +90,36 @@ bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, return false; } -bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, Verbose)); +bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + MachineCodeEmitter &MCE) { + // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. + // FIXME: This should be moved to TargetJITInfo!! + if (Subtarget.isPPC64()) { + // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many + // instructions to materialize arbitrary global variable + function + + // constant pool addresses. + setRelocationModel(Reloc::PIC_); + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + DisableJumpTables = true; + } else { + setRelocationModel(Reloc::Static); + } + + // Inform the subtarget that we are in JIT mode. FIXME: does this break macho + // writing? + Subtarget.SetJITMode(); + + // Machine code emitter pass for PowerPC. + PM.add(createPPCCodeEmitterPass(*this, MCE)); return false; } bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { + JITCodeEmitter &JCE) { // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. // FIXME: This should be moved to TargetJITInfo!! if (Subtarget.isPPC64()) { @@ -179,19 +139,14 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, Subtarget.SetJITMode(); // Machine code emitter pass for PowerPC. - PM.add(createPPCCodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } + PM.add(createPPCJITCodeEmitterPass(*this, JCE)); return false; } bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { + ObjectCodeEmitter &OCE) { // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64. // FIXME: This should be moved to TargetJITInfo!! if (Subtarget.isPPC64()) { @@ -211,43 +166,33 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, Subtarget.SetJITMode(); // Machine code emitter pass for PowerPC. - PM.add(createPPCJITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } + PM.add(createPPCObjectCodeEmitterPass(*this, OCE)); return false; } bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { // Machine code emitter pass for PowerPC. PM.add(createPPCCodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } - return false; } bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { // Machine code emitter pass for PowerPC. PM.add(createPPCJITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } + return false; +} +bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + // Machine code emitter pass for PowerPC. + PM.add(createPPCObjectCodeEmitterPass(*this, OCE)); return false; } + diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index c693bf42a3e06..3399ac89188fa 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -39,18 +39,9 @@ class PPCTargetMachine : public LLVMTargetMachine { InstrItineraryData InstrItins; PPCMachOWriterInfo MachOWriterInfo; -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - - // To avoid having target depend on the asmprinter stuff libraries, asmprinter - // set this functions to ctor pointer at startup time if they are linked in. - typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, - PPCTargetMachine &tm, - bool verbose); - static AsmPrinterCtorFn AsmPrinterCtor; - public: - PPCTargetMachine(const Module &M, const std::string &FS, bool is64Bit); + PPCTargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool is64Bit); virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameInfo *getFrameInfo() const { return &FrameInfo; } @@ -71,26 +62,24 @@ public: return &MachOWriterInfo; } - static void registerAsmPrinter(AsmPrinterCtorFn F) { - AsmPrinterCtor = F; - } - // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); + MachineCodeEmitter &MCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + JITCodeEmitter &JCE); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE); + ObjectCodeEmitter &OCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); + MachineCodeEmitter &MCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE); + JITCodeEmitter &JCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE); virtual bool getEnableTailMergeDefault() const; }; @@ -98,20 +87,16 @@ public: /// class PPC32TargetMachine : public PPCTargetMachine { public: - PPC32TargetMachine(const Module &M, const std::string &FS); - - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); + PPC32TargetMachine(const Target &T, const std::string &TT, + const std::string &FS); }; /// PPC64TargetMachine - PowerPC 64-bit target machine. /// class PPC64TargetMachine : public PPCTargetMachine { public: - PPC64TargetMachine(const Module &M, const std::string &FS); - - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); + PPC64TargetMachine(const Target &T, const std::string &TT, + const std::string &FS); }; } // end namespace llvm diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index 6e9e6c74e8f3f..f5e50fc808a8d 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -149,7 +149,7 @@ http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html Implement Newton-Rhapson method for improving estimate instructions to the correct accuracy, and implementing divide as multiply by reciprocal when it has -more than one use. Itanium will want this too. +more than one use. Itanium would want this too. ===-------------------------------------------------------------------------=== diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..058d599a4af06 --- /dev/null +++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMPowerPCInfo + PowerPCTargetInfo.cpp + ) + +add_dependencies(LLVMPowerPCInfo PowerPCCodeGenTable_gen) diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile new file mode 100644 index 0000000000000..a101aa4a44958 --- /dev/null +++ b/lib/Target/PowerPC/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/PowerPC/TargetInfo/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMPowerPCInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp new file mode 100644 index 0000000000000..ad607d0ade6ab --- /dev/null +++ b/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::ThePPC32Target, llvm::ThePPC64Target; + +extern "C" void LLVMInitializePowerPCTargetInfo() { + RegisterTarget + X(ThePPC32Target, "ppc32", "PowerPC 32"); + + RegisterTarget + Y(ThePPC64Target, "ppc64", "PowerPC 64"); +} diff --git a/lib/Target/README.txt b/lib/Target/README.txt index f68cf0e40df0c..89ea9d0afc42c 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -197,13 +197,6 @@ _bar: addic r3,r3,-1 //===---------------------------------------------------------------------===// -Legalize should lower ctlz like this: - ctlz(x) = popcnt((x-1) & ~x) - -on targets that have popcnt but not ctlz. itanium, what else? - -//===---------------------------------------------------------------------===// - quantum_sigma_x in 462.libquantum contains the following loop: for(i=0; isize; i++) @@ -227,7 +220,20 @@ so cool to turn it into something like: ... which would only do one 32-bit XOR per loop iteration instead of two. It would also be nice to recognize the reg->size doesn't alias reg->node[i], but -alas... +alas. + +//===---------------------------------------------------------------------===// + +This should be optimized to one 'and' and one 'or', from PR4216: + +define i32 @test_bitfield(i32 %bf.prev.low) nounwind ssp { +entry: + %bf.prev.lo.cleared10 = or i32 %bf.prev.low, 32962 ; [#uses=1] + %0 = and i32 %bf.prev.low, -65536 ; [#uses=1] + %1 = and i32 %bf.prev.lo.cleared10, 40186 ; [#uses=1] + %2 = or i32 %1, %0 ; [#uses=1] + ret i32 %2 +} //===---------------------------------------------------------------------===// @@ -335,11 +341,6 @@ when it is declared U32. //===---------------------------------------------------------------------===// -Promote for i32 bswap can use i64 bswap + shr. Useful on targets with 64-bit -regs and bswap, like itanium. - -//===---------------------------------------------------------------------===// - LSR should know what GPR types a target has. This code: volatile short X, Y; // globals @@ -349,24 +350,22 @@ void foo(int N) { for (i = 0; i < N; i++) { X = i; Y = i*4; } } -produces two identical IV's (after promotion) on PPC/ARM: +produces two near identical IV's (after promotion) on PPC/ARM: -LBB1_1: @bb.preheader - mov r3, #0 - mov r2, r3 - mov r1, r3 -LBB1_2: @bb - ldr r12, LCPI1_0 - ldr r12, [r12] - strh r2, [r12] - ldr r12, LCPI1_1 - ldr r12, [r12] - strh r3, [r12] - add r1, r1, #1 <- [0,+,1] - add r3, r3, #4 - add r2, r2, #1 <- [0,+,1] - cmp r1, r0 - bne LBB1_2 @bb +LBB1_2: + ldr r3, LCPI1_0 + ldr r3, [r3] + strh r2, [r3] + ldr r3, LCPI1_1 + ldr r3, [r3] + strh r1, [r3] + add r1, r1, #4 + add r2, r2, #1 <- [0,+,1] + sub r0, r0, #1 <- [0,-,1] + cmp r0, #0 + bne LBB1_2 + +LSR should reuse the "+" IV for the exit test. //===---------------------------------------------------------------------===// @@ -600,25 +599,6 @@ implementations of ceil/floor/rint. //===---------------------------------------------------------------------===// -This GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34043 -contains a testcase that compiles down to: - - %struct.XMM128 = type { <4 x float> } -.. - %src = alloca %struct.XMM128 -.. - %tmp6263 = bitcast %struct.XMM128* %src to <2 x i64>* - %tmp65 = getelementptr %struct.XMM128* %src, i32 0, i32 0 - store <2 x i64> %tmp5899, <2 x i64>* %tmp6263, align 16 - %tmp66 = load <4 x float>* %tmp65, align 16 - %tmp71 = add <4 x float> %tmp66, %tmp66 - -If the mid-level optimizer turned the bitcast of pointer + store of tmp5899 -into a bitcast of the vector value and a store to the pointer, then the -store->load could be easily removed. - -//===---------------------------------------------------------------------===// - Consider: int test() { @@ -1123,16 +1103,6 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts". //===---------------------------------------------------------------------===// -We would like to do the following transform in the instcombiner: - - -X/C -> X/-C - -However, this isn't valid if (-X) overflows. We can implement this when we -have the concept of a "C signed subtraction" operator that which is undefined -on overflow. - -//===---------------------------------------------------------------------===// - This was noticed in the entryblock for grokdeclarator in 403.gcc: %tmp = icmp eq i32 %decl_context, 4 @@ -1311,6 +1281,8 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge) llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS] + //===---------------------------------------------------------------------===// Type based alias analysis: @@ -1318,31 +1290,25 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705 //===---------------------------------------------------------------------===// -When GVN/PRE finds a store of float* to a must aliases pointer when expecting -an int*, it should turn it into a bitcast. This is a nice generalization of -the SROA hack that would apply to other cases, e.g.: - -int foo(int C, int *P, float X) { - if (C) { - bar(); - *P = 42; - } else - *(float*)P = X; - - return *P; -} - - -One example (that requires crazy phi translation) is: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS] - -//===---------------------------------------------------------------------===// - A/B get pinned to the stack because we turn an if/then into a select instead of PRE'ing the load/store. This may be fixable in instcombine: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892 +struct X { int i; }; +int foo (int x) { + struct X a; + struct X b; + struct X *p; + a.i = 1; + b.i = 2; + if (x) + p = &a; + else + p = &b; + return p->i; +} +//===---------------------------------------------------------------------===// Interesting missed case because of control flow flattening (should be 2 loads): http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 @@ -1675,5 +1641,6 @@ entry: Instcombine should be able to optimize away the loads (and thus the globals). +See also PR4973 //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile index f12a6ac39891c..a856828ce4014 100644 --- a/lib/Target/Sparc/AsmPrinter/Makefile +++ b/lib/Target/Sparc/AsmPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===## +##===- lib/Target/Sparc/AsmPrinter/Makefile ----------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index 71bd0dee20685..a3e5fba928f09 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -19,18 +19,22 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" -#include "llvm/MDNode.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Support/Mangler.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" #include "llvm/Support/MathExtras.h" #include #include @@ -49,45 +53,36 @@ namespace { ValueMapTy NumberForBB; unsigned BBNumber; public: - explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM, - const TargetAsmInfo *T, bool V) + explicit SparcAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) : AsmPrinter(O, TM, T, V), BBNumber(0) {} virtual const char *getPassName() const { return "Sparc Assembly Printer"; } - void printModuleLevelGV(const GlobalVariable* GVar); + void PrintGlobalVariable(const GlobalVariable *GVar); void printOperand(const MachineInstr *MI, int opNum); void printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier = 0); void printCCOperand(const MachineInstr *MI, int opNum); - bool printInstruction(const MachineInstr *MI); // autogenerated. + void printInstruction(const MachineInstr *MI); // autogenerated. + static const char *getRegisterName(unsigned RegNo); + bool runOnMachineFunction(MachineFunction &F); - bool doInitialization(Module &M); - bool doFinalization(Module &M); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode); + + void emitFunctionHeader(const MachineFunction &MF); + bool printGetPCX(const MachineInstr *MI, unsigned OpNo); }; } // end of anonymous namespace #include "SparcGenAsmWriter.inc" -/// createSparcCodePrinterPass - Returns a pass that prints the SPARC -/// assembly code for a MachineFunction to the given output stream, -/// using the given target machine description. This should work -/// regardless of whether the function is in SSA form. -/// -FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o, - TargetMachine &tm, - bool verbose) { - return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); -} - - /// runOnMachineFunction - This uses the printInstruction() /// method to print assembly for each instruction. /// @@ -103,17 +98,11 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // BBs the same name. (If you have a better way, please let me know!) O << "\n\n"; - - // Print out the label for the function. - const Function *F = MF.getFunction(); - SwitchToSection(TAI->SectionForGlobal(F)); - EmitAlignment(MF.getAlignment(), F); - O << "\t.globl\t" << CurrentFnName << '\n'; - - printVisibility(CurrentFnName, F->getVisibility()); - - O << "\t.type\t" << CurrentFnName << ", #function\n"; - O << CurrentFnName << ":\n"; + emitFunctionHeader(MF); + + + // Emit pre-function debug information. + DW->BeginFunction(&MF); // Number each basic block so that we can consistently refer to them // in PC-relative references. @@ -129,24 +118,65 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) { I != E; ++I) { // Print a label for the basic block. if (I != MF.begin()) { - printBasicBlockLabel(I, true, true); - O << '\n'; + EmitBasicBlockStart(I); } for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { // Print the assembly for the instruction. + processDebugLoc(II, true); printInstruction(II); + + if (VerboseAsm && !II->getDebugLoc().isUnknown()) + EmitComments(*II); + O << '\n'; + processDebugLoc(II, false); ++EmittedInsts; } } + // Emit post-function debug information. + DW->EndFunction(&MF); + // We didn't modify anything. + O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; return false; } +void SparcAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + const Function *F = MF.getFunction(); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + EmitAlignment(MF.getAlignment(), F); + + switch (F->getLinkage()) { + default: llvm_unreachable("Unknown linkage type"); + case Function::PrivateLinkage: + case Function::InternalLinkage: + // Function is internal. + break; + case Function::DLLExportLinkage: + case Function::ExternalLinkage: + // Function is externally visible + O << "\t.global\t" << CurrentFnName << '\n'; + break; + case Function::LinkerPrivateLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + // Function is weak + O << "\t.weak\t" << CurrentFnName << '\n' ; + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + O << "\t.type\t" << CurrentFnName << ", #function\n"; + O << CurrentFnName << ":\n"; +} + + void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { const MachineOperand &MO = MI->getOperand (opNum); - const TargetRegisterInfo &RI = *TM.getRegisterInfo(); bool CloseParen = false; if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) { O << "%hi("; @@ -158,33 +188,27 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { } switch (MO.getType()) { case MachineOperand::MO_Register: - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - O << "%" << LowercaseString (RI.get(MO.getReg()).AsmName); - else - O << "%reg" << MO.getReg(); + O << "%" << LowercaseString(getRegisterName(MO.getReg())); break; case MachineOperand::MO_Immediate: O << (int)MO.getImm(); break; case MachineOperand::MO_MachineBasicBlock: - printBasicBlockLabel(MO.getMBB()); + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); return; case MachineOperand::MO_GlobalAddress: - { - const GlobalValue *GV = MO.getGlobal(); - O << Mang->getValueName(GV); - } + O << Mang->getMangledName(MO.getGlobal()); break; case MachineOperand::MO_ExternalSymbol: O << MO.getSymbolName(); break; case MachineOperand::MO_ConstantPoolIndex: - O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); break; default: - O << ""; abort (); break; + llvm_unreachable(""); } if (CloseParen) O << ")"; } @@ -218,28 +242,42 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, } } -void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { - int CC = (int)MI->getOperand(opNum).getImm(); - O << SPARCCondCodeToString((SPCC::CondCodes)CC); -} +bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) { + std::string operand = ""; + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + default: assert(0 && "Operand is not a register "); + case MachineOperand::MO_Register: + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Operand is not a physical register "); + operand = "%" + LowercaseString(getRegisterName(MO.getReg())); + break; + } -bool SparcAsmPrinter::doInitialization(Module &M) { - Mang = new Mangler(M, "", TAI->getPrivateGlobalPrefix()); - return false; // success -} + unsigned bbNum = NumberForBB[MI->getParent()->getBasicBlock()]; -bool SparcAsmPrinter::doFinalization(Module &M) { - // Print out module-level global variables here. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I); + O << '\n' << ".LLGETPCH" << bbNum << ":\n"; + O << "\tcall\t.LLGETPC" << bbNum << '\n' ; - O << '\n'; + O << "\t sethi\t" + << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), " + << operand << '\n' ; + + O << ".LLGETPC" << bbNum << ":\n" ; + O << "\tor\t" << operand + << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), " + << operand << '\n'; + O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; + + return true; +} - return AsmPrinter::doFinalization(M); +void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { + int CC = (int)MI->getOperand(opNum).getImm(); + O << SPARCCondCodeToString((SPCC::CondCodes)CC); } -void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { +void SparcAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { const TargetData *TD = TM.getTargetData(); if (!GVar->hasInitializer()) @@ -250,16 +288,15 @@ void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { return; O << "\n\n"; - std::string name = Mang->getValueName(GVar); + std::string name = Mang->getMangledName(GVar); Constant *C = GVar->getInitializer(); - if (isa(C) || isa(C)) - return; unsigned Size = TD->getTypeAllocSize(C->getType()); unsigned Align = TD->getPreferredAlignment(GVar); printVisibility(name, GVar->getVisibility()); - SwitchToSection(TAI->SectionForGlobal(GVar)); + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang, + TM)); if (C->isNullValue() && !GVar->hasSection()) { if (!GVar->isThreadLocal() && @@ -269,8 +306,8 @@ void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { if (GVar->hasLocalLinkage()) O << "\t.local " << name << '\n'; - O << TAI->getCOMMDirective() << name << ',' << Size; - if (TAI->getCOMMDirectiveTakesAlignment()) + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) O << ',' << (1 << Align); O << '\n'; @@ -292,27 +329,25 @@ void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) { // their name or something. For now, just emit them as external. case GlobalValue::ExternalLinkage: // If external or appending, declare as a global symbol - O << TAI->getGlobalDirective() << name << '\n'; + O << MAI->getGlobalDirective() << name << '\n'; // FALL THROUGH case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: case GlobalValue::InternalLinkage: break; case GlobalValue::GhostLinkage: - cerr << "Should not have any unmaterialized functions!\n"; - abort(); + llvm_unreachable("Should not have any unmaterialized functions!"); case GlobalValue::DLLImportLinkage: - cerr << "DLLImport linkage is not supported by this target!\n"; - abort(); + llvm_unreachable("DLLImport linkage is not supported by this target!"); case GlobalValue::DLLExportLinkage: - cerr << "DLLExport linkage is not supported by this target!\n"; - abort(); + llvm_unreachable("DLLExport linkage is not supported by this target!"); default: - assert(0 && "Unknown linkage type!"); + llvm_unreachable("Unknown linkage type!"); } EmitAlignment(Align, GVar); - if (TAI->hasDotTypeDotSizeDirective()) { + if (MAI->hasDotTypeDotSizeDirective()) { O << "\t.type " << name << ",#object\n"; O << "\t.size " << name << ',' << Size << '\n'; } @@ -355,13 +390,7 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -namespace { - static struct Register { - Register() { - SparcTargetMachine::registerAsmPrinter(createSparcCodePrinterPass); - } - } Registrator; -} - // Force static initialization. -extern "C" void LLVMInitializeSparcAsmPrinter() { } +extern "C" void LLVMInitializeSparcAsmPrinter() { + RegisterAsmPrinter X(TheSparcTarget); +} diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index eb045e242b794..74f320a00035f 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -16,9 +16,9 @@ add_llvm_target(SparcCodeGen SparcInstrInfo.cpp SparcISelDAGToDAG.cpp SparcISelLowering.cpp + SparcMCAsmInfo.cpp SparcRegisterInfo.cpp SparcSubtarget.cpp - SparcTargetAsmInfo.cpp SparcTargetMachine.cpp ) diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp index f72a4c4645c15..88b0927b35500 100644 --- a/lib/Target/Sparc/FPMover.cpp +++ b/lib/Target/Sparc/FPMover.cpp @@ -20,6 +20,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumFpDs , "Number of instructions translated"); @@ -75,7 +77,7 @@ static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg, OddReg = OddHalvesOfPairs[i]; return; } - assert(0 && "Can't find reg"); + llvm_unreachable("Can't find reg"); } /// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB. @@ -108,16 +110,16 @@ bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) { else if (MI->getOpcode() == SP::FpABSD) MI->setDesc(TII->get(SP::FABSS)); else - assert(0 && "Unknown opcode!"); + llvm_unreachable("Unknown opcode!"); MI->getOperand(0).setReg(EvenDestReg); MI->getOperand(1).setReg(EvenSrcReg); - DOUT << "FPMover: the modified instr is: " << *MI; + DEBUG(errs() << "FPMover: the modified instr is: " << *MI); // Insert copy for the other half of the double. if (DestDReg != SrcDReg) { MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg) .addReg(OddSrcReg); - DOUT << "FPMover: the inserted instr is: " << *MI; + DEBUG(errs() << "FPMover: the inserted instr is: " << *MI); } ++NumFpDs; } diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile index fdf6afaee0768..6714b4dadb29e 100644 --- a/lib/Target/Sparc/Makefile +++ b/lib/Target/Sparc/Makefile @@ -16,7 +16,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \ SparcGenInstrInfo.inc SparcGenAsmWriter.inc \ SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc -DIRS = AsmPrinter +DIRS = AsmPrinter TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index c7d0ca8a0875e..bb5155e1c263b 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -15,19 +15,21 @@ #ifndef TARGET_SPARC_H #define TARGET_SPARC_H +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" #include namespace llvm { class FunctionPass; class SparcTargetMachine; - class raw_ostream; + class formatted_raw_ostream; FunctionPass *createSparcISelDag(SparcTargetMachine &TM); - FunctionPass *createSparcCodePrinterPass(raw_ostream &OS, TargetMachine &TM, - bool Verbose); FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); FunctionPass *createSparcFPMoverPass(TargetMachine &TM); + + extern Target TheSparcTarget; + } // end namespace llvm; // Defines symbolic names for Sparc registers. This defines a mapping from @@ -83,7 +85,7 @@ namespace llvm { inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) { switch (CC) { - default: assert(0 && "Unknown condition code"); + default: llvm_unreachable("Unknown condition code"); case SPCC::ICC_NE: return "ne"; case SPCC::ICC_E: return "e"; case SPCC::ICC_G: return "g"; diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index c9bd62d0e20d6..a1a4a8ef52c27 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -17,6 +17,8 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -32,10 +34,13 @@ class SparcDAGToDAGISel : public SelectionDAGISel { /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can /// make the right decision when generating code for different targets. const SparcSubtarget &Subtarget; + SparcTargetMachine& TM; + MachineBasicBlock *CurBB; public: - explicit SparcDAGToDAGISel(SparcTargetMachine &TM) - : SelectionDAGISel(TM), - Subtarget(TM.getSubtarget()) { + explicit SparcDAGToDAGISel(SparcTargetMachine &tm) + : SelectionDAGISel(tm), + Subtarget(tm.getSubtarget()), + TM(tm) { } SDNode *Select(SDValue Op); @@ -61,6 +66,9 @@ public: // Include the pieces autogenerated from the target description. #include "SparcGenDAGISel.inc" + +private: + SDNode* getGlobalBaseReg(); }; } // end anonymous namespace @@ -68,12 +76,18 @@ public: /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void SparcDAGToDAGISel::InstructionSelect() { DEBUG(BB->dump()); - + CurBB = BB; // Select target instructions for the DAG. SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); } +SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { + MachineFunction *MF = CurBB->getParent(); + unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); + return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); +} + bool SparcDAGToDAGISel::SelectADDRri(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Offset) { if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { @@ -147,6 +161,9 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) { switch (N->getOpcode()) { default: break; + case SPISD::GLOBAL_BASE_REG: + return getGlobalBaseReg(); + case ISD::SDIV: case ISD::UDIV: { // FIXME: should use a custom expander to expose the SRA to the dag. @@ -156,12 +173,12 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) { // Set the Y register to the high-part. SDValue TopPart; if (N->getOpcode() == ISD::SDIV) { - TopPart = SDValue(CurDAG->getTargetNode(SP::SRAri, dl, MVT::i32, DivLHS, + TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS, CurDAG->getTargetConstant(31, MVT::i32)), 0); } else { TopPart = CurDAG->getRegister(SP::G0, MVT::i32); } - TopPart = SDValue(CurDAG->getTargetNode(SP::WRYrr, dl, MVT::Flag, TopPart, + TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Flag, TopPart, CurDAG->getRegister(SP::G0, MVT::i32)), 0); // FIXME: Handle div by immediate. @@ -175,8 +192,8 @@ SDNode *SparcDAGToDAGISel::Select(SDValue Op) { SDValue MulLHS = N->getOperand(0); SDValue MulRHS = N->getOperand(1); unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr; - SDNode *Mul = CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::Flag, - MulLHS, MulRHS); + SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Flag, + MulLHS, MulRHS); // The high part is in the Y register. return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1)); return NULL; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 4c3efde36fe19..164770d72df77 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -21,7 +21,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -31,18 +33,21 @@ using namespace llvm; #include "SparcGenCallingConv.inc" -static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { +SDValue +SparcTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + // CCValAssign - represent the assignment of the return value to locations. SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - DebugLoc dl = Op.getDebugLoc(); // CCState - Info about the registers and stack slot. - CCState CCInfo(CC, isVarArg, DAG.getTarget(), RVLocs); + CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), + RVLocs, *DAG.getContext()); - // Analize return values of ISD::RET - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_Sparc32); + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. @@ -52,7 +57,6 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - SDValue Chain = Op.getOperand(0); SDValue Flag; // Copy the result values into the output registers. @@ -60,10 +64,8 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - // ISD::RET => ret chain, (regnum1,val1), ... - // So i*2+1 index only the regnums. Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Op.getOperand(i*2+1), Flag); + Outs[i].Val, Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); @@ -74,55 +76,64 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain); } -/// LowerArguments - V8 uses a very simple ABI, where all values are passed in -/// either one or two GPRs, including FP values. TODO: we should pass FP values -/// in FP registers for fastcc functions. -void -SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, - SmallVectorImpl &ArgValues, - DebugLoc dl) { +/// LowerFormalArguments - V8 uses a very simple ABI, where all values are +/// passed in either one or two GPRs, including FP values. TODO: we should +/// pass FP values in FP registers for fastcc functions. +SDValue +SparcTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32); + static const unsigned ArgRegs[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; - const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6; unsigned ArgOffset = 68; - SDValue Root = DAG.getRoot(); - std::vector OutChains; - - for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { - MVT ObjectVT = getValueType(I->getType()); - - switch (ObjectVT.getSimpleVT()) { - default: assert(0 && "Unhandled argument type!"); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + SDValue ArgValue; + CCValAssign &VA = ArgLocs[i]; + // FIXME: We ignore the register assignments of AnalyzeFormalArguments + // because it doesn't know how to split a double into two i32 registers. + EVT ObjectVT = VA.getValVT(); + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: - if (I->use_empty()) { // Argument is dead. + if (!Ins[i].Used) { // Argument is dead. if (CurArgReg < ArgRegEnd) ++CurArgReg; - ArgValues.push_back(DAG.getUNDEF(ObjectVT)); + InVals.push_back(DAG.getUNDEF(ObjectVT)); } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg++, VReg); - SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); if (ObjectVT != MVT::i32) { unsigned AssertOp = ISD::AssertSext; Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg, DAG.getValueType(ObjectVT)); Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg); } - ArgValues.push_back(Arg); + InVals.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { - Load = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); + Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); } else { ISD::LoadExtType LoadOp = ISD::SEXTLOAD; @@ -130,63 +141,63 @@ SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8); FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, DAG.getConstant(Offset, MVT::i32)); - Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Root, FIPtr, + Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, NULL, 0, ObjectVT); Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); } - ArgValues.push_back(Load); + InVals.push_back(Load); } ArgOffset += 4; break; case MVT::f32: - if (I->use_empty()) { // Argument is dead. + if (!Ins[i].Used) { // Argument is dead. if (CurArgReg < ArgRegEnd) ++CurArgReg; - ArgValues.push_back(DAG.getUNDEF(ObjectVT)); + InVals.push_back(DAG.getUNDEF(ObjectVT)); } else if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR // FP value is passed in an integer register. unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg++, VReg); - SDValue Arg = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg); - ArgValues.push_back(Arg); + InVals.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - SDValue Load = DAG.getLoad(MVT::f32, dl, Root, FIPtr, NULL, 0); - ArgValues.push_back(Load); + SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0); + InVals.push_back(Load); } ArgOffset += 4; break; case MVT::i64: case MVT::f64: - if (I->use_empty()) { // Argument is dead. + if (!Ins[i].Used) { // Argument is dead. if (CurArgReg < ArgRegEnd) ++CurArgReg; if (CurArgReg < ArgRegEnd) ++CurArgReg; - ArgValues.push_back(DAG.getUNDEF(ObjectVT)); + InVals.push_back(DAG.getUNDEF(ObjectVT)); } else { SDValue HiVal; if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi); - HiVal = DAG.getCopyFromReg(Root, dl, VRegHi, MVT::i32); + HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - HiVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); + HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); } SDValue LoVal; if (CurArgReg < ArgRegEnd) { // Lives in an incoming GPR unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo); - LoVal = DAG.getCopyFromReg(Root, dl, VRegLo, MVT::i32); + LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); - LoVal = DAG.getLoad(MVT::i32, dl, Root, FIPtr, NULL, 0); + LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); } // Compose the two halves together into an i64 unit. @@ -197,7 +208,7 @@ SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, if (ObjectVT == MVT::f64) WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue); - ArgValues.push_back(WholeValue); + InVals.push_back(WholeValue); } ArgOffset += 8; break; @@ -205,10 +216,12 @@ SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, } // Store remaining ArgRegs to the stack if this is a varargs function. - if (F.isVarArg()) { + if (isVarArg) { // Remember the vararg offset for the va_start implementation. VarArgsFrameOffset = ArgOffset; + std::vector OutChains; + for (; CurArgReg != ArgRegEnd; ++CurArgReg) { unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); MF.getRegInfo().addLiveIn(*CurArgReg, VReg); @@ -220,26 +233,31 @@ SparcTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG, OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0)); ArgOffset += 4; } + + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &OutChains[0], OutChains.size()); + } } - if (!OutChains.empty()) - DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size())); + return Chain; } -static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { - CallSDNode *TheCall = cast(Op.getNode()); - unsigned CallingConv = TheCall->getCallingConv(); - SDValue Chain = TheCall->getChain(); - SDValue Callee = TheCall->getCallee(); - bool isVarArg = TheCall->isVarArg(); - DebugLoc dl = TheCall->getDebugLoc(); +SDValue +SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { #if 0 // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CallingConv, isVarArg, DAG.getTarget(), ArgLocs); - CCInfo.AnalyzeCallOperands(Op.getNode(), CC_Sparc32); + CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs); + CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32); // Get the size of the outgoing arguments stack space requirement. unsigned ArgsSize = CCInfo.getNextStackOffset(); @@ -249,9 +267,9 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { // Count the size of the outgoing arguments. unsigned ArgsSize = 0; - for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { - switch (TheCall->getArg(i).getValueType().getSimpleVT()) { - default: assert(0 && "Unknown value type!"); + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + switch (Outs[i].Val.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown value type!"); case MVT::i1: case MVT::i8: case MVT::i16: @@ -283,13 +301,11 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - - // Arguments start after the 5 first operands of ISD::CALL - SDValue Arg = TheCall->getArg(i); + SDValue Arg = Outs[i].Val; // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg); @@ -325,13 +341,13 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { }; unsigned ArgOffset = 68; - for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; ++i) { - SDValue Val = TheCall->getArg(i); - MVT ObjectVT = Val.getValueType(); + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + SDValue Val = Outs[i].Val; + EVT ObjectVT = Val.getValueType(); SDValue ValToStore(0, 0); unsigned ObjSize; - switch (ObjectVT.getSimpleVT()) { - default: assert(0 && "Unhandled argument type!"); + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled argument type!"); case MVT::i32: ObjSize = 4; @@ -446,7 +462,7 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); - std::vector NodeTys; + std::vector NodeTys; NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. SDValue Ops[] = { Chain, Callee, InFlag }; @@ -459,10 +475,10 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { // Assign locations to each value returned by this call. SmallVector RVLocs; - CCState RVInfo(CallingConv, isVarArg, DAG.getTarget(), RVLocs); + CCState RVInfo(CallConv, isVarArg, DAG.getTarget(), + RVLocs, *DAG.getContext()); - RVInfo.AnalyzeCallResult(TheCall, RetCC_Sparc32); - SmallVector ResultVals; + RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -475,15 +491,10 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { Chain = DAG.getCopyFromReg(Chain, dl, Reg, RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); - ResultVals.push_back(Chain.getValue(0)); + InVals.push_back(Chain.getValue(0)); } - ResultVals.push_back(Chain); - - // Merge everything together with a MERGE_VALUES node. - return DAG.getNode(ISD::MERGE_VALUES, dl, - TheCall->getVTList(), &ResultVals[0], - ResultVals.size()); + return Chain; } @@ -496,7 +507,7 @@ static SDValue LowerCALL(SDValue Op, SelectionDAG &DAG) { /// condition. static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) { switch (CC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ISD::SETEQ: return SPCC::ICC_E; case ISD::SETNE: return SPCC::ICC_NE; case ISD::SETLT: return SPCC::ICC_L; @@ -514,7 +525,7 @@ static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) { /// FCC condition. static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) { switch (CC) { - default: assert(0 && "Unknown fp condition code!"); + default: llvm_unreachable("Unknown fp condition code!"); case ISD::SETEQ: case ISD::SETOEQ: return SPCC::FCC_E; case ISD::SETNE: @@ -538,9 +549,8 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) { } } - SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) - : TargetLowering(TM) { + : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. addRegisterClass(MVT::i32, SP::IntRegsRegisterClass); @@ -635,9 +645,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); - // RET must be custom lowered, to meet ABI requirements - setOperationAction(ISD::RET , MVT::Other, Custom); - // VASTART needs to be custom lowered to use the VarArgsFrameIndex. setOperationAction(ISD::VASTART , MVT::Other, Custom); // VAARG needs to be lowered to not do unaligned accesses for doubles. @@ -654,7 +661,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); - setOperationAction(ISD::DECLARE, MVT::Other, Expand); setStackPointerRegisterToSaveRestore(SP::O6); @@ -734,17 +740,29 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, } } -static SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) { +SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) { GlobalValue *GV = cast(Op)->getGlobal(); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA); - return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); + + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); + + SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, + getPointerTy()); + SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); + SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, + GlobalBase, RelAddr); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + AbsAddr, NULL, 0); } -static SDValue LowerCONSTANTPOOL(SDValue Op, SelectionDAG &DAG) { +SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) { ConstantPoolSDNode *N = cast(Op); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); @@ -752,7 +770,16 @@ static SDValue LowerCONSTANTPOOL(SDValue Op, SelectionDAG &DAG) { SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP); - return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); + + SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, + getPointerTy()); + SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi); + SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, + GlobalBase, RelAddr); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + AbsAddr, NULL, 0); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { @@ -787,7 +814,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { // Get the condition flag. SDValue CompareFlag; if (LHS.getValueType() == MVT::i32) { - std::vector VTs; + std::vector VTs; VTs.push_back(MVT::i32); VTs.push_back(MVT::Flag); SDValue Ops[2] = { LHS, RHS }; @@ -818,7 +845,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { SDValue CompareFlag; if (LHS.getValueType() == MVT::i32) { - std::vector VTs; + std::vector VTs; VTs.push_back(LHS.getValueType()); // subcc returns a value VTs.push_back(MVT::Flag); SDValue Ops[2] = { LHS, RHS }; @@ -849,7 +876,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast(Node->getOperand(2))->getValue(); @@ -900,14 +927,14 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue SparcTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - default: assert(0 && "Should not custom lower this!"); + default: llvm_unreachable("Should not custom lower this!"); // Frame & Return address. Currently unimplemented case ISD::RETURNADDR: return SDValue(); case ISD::FRAMEADDR: return SDValue(); case ISD::GlobalTLSAddress: - assert(0 && "TLS not implemented for Sparc."); - case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); - case ISD::ConstantPool: return LowerCONSTANTPOOL(Op, DAG); + llvm_unreachable("TLS not implemented for Sparc."); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); @@ -915,21 +942,20 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::VASTART: return LowerVASTART(Op, DAG, *this); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::CALL: return LowerCALL(Op, DAG); - case ISD::RET: return LowerRET(Op, DAG); } } MachineBasicBlock * SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned BROpcode; unsigned CC; DebugLoc dl = MI->getDebugLoc(); // Figure out the conditional branch opcode to use for this select_cc. switch (MI->getOpcode()) { - default: assert(0 && "Unknown SELECT_CC!"); + default: llvm_unreachable("Unknown SELECT_CC!"); case SP::SELECT_CC_Int_ICC: case SP::SELECT_CC_FP_ICC: case SP::SELECT_CC_DFP_ICC: @@ -964,9 +990,18 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by transferring all successors of the current + // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - sinkMBB->transferSuccessors(BB); + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while (!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -1011,7 +1046,7 @@ SparcTargetLowering::getConstraintType(const std::string &Constraint) const { std::pair SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': @@ -1024,7 +1059,7 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, std::vector SparcTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() != 1) return std::vector(); @@ -1050,5 +1085,5 @@ SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned SparcTargetLowering::getFunctionAlignment(const Function *) const { - return 4; + return 2; } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 27ce1b76cc79d..55781be8b5b1b 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -35,7 +35,8 @@ namespace llvm { ITOF, // Int to FP within a FP register. CALL, // A call instruction. - RET_FLAG // Return with a flag operand. + RET_FLAG, // Return with a flag operand. + GLOBAL_BASE_REG // Global base reg for PIC }; } @@ -57,25 +58,49 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const; - virtual void LowerArguments(Function &F, SelectionDAG &DAG, - SmallVectorImpl &ArgValues, - DebugLoc dl); virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap *EM) const; virtual const char *getTargetNodeName(unsigned Opcode) const; ConstraintType getConstraintType(const std::string &Constraint) const; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); + + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 12c286af9428d..8667bca7fe96f 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -17,7 +17,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" #include "SparcGenInstrInfo.inc" +#include "SparcMachineFunctionInfo.h" using namespace llvm; SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST) @@ -160,30 +163,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0) .addReg(SrcReg, getKillRegState(isKill)); else - assert(0 && "Can't store this register to stack slot"); -} - -void SparcInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - unsigned Opc = 0; - DebugLoc DL = DebugLoc::getUnknownLoc(); - if (RC == SP::IntRegsRegisterClass) - Opc = SP::STri; - else if (RC == SP::FPRegsRegisterClass) - Opc = SP::STFri; - else if (RC == SP::DFPRegsRegisterClass) - Opc = SP::STDFri; - else - assert(0 && "Can't load this register"); - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - MIB.addReg(SrcReg, getKillRegState(isKill)); - NewMIs.push_back(MIB); - return; + llvm_unreachable("Can't store this register to stack slot"); } void SparcInstrInfo:: @@ -200,28 +180,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, else if (RC == SP::DFPRegsRegisterClass) BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0); else - assert(0 && "Can't load this register from stack slot"); -} - -void SparcInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - unsigned Opc = 0; - if (RC == SP::IntRegsRegisterClass) - Opc = SP::LDri; - else if (RC == SP::FPRegsRegisterClass) - Opc = SP::LDFri; - else if (RC == SP::DFPRegsRegisterClass) - Opc = SP::LDDFri; - else - assert(0 && "Can't load this register"); - DebugLoc DL = DebugLoc::getUnknownLoc(); - MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); - for (unsigned i = 0, e = Addr.size(); i != e; ++i) - MIB.addOperand(Addr[i]); - NewMIs.push_back(MIB); - return; + llvm_unreachable("Can't load this register from stack slot"); } MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -278,3 +237,25 @@ MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NewMI; } + +unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const +{ + SparcMachineFunctionInfo *SparcFI = MF->getInfo(); + unsigned GlobalBaseReg = SparcFI->getGlobalBaseReg(); + if (GlobalBaseReg != 0) + return GlobalBaseReg; + + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF->front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + + GlobalBaseReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass); + + + DebugLoc dl = DebugLoc::getUnknownLoc(); + + BuildMI(FirstMBB, MBBI, dl, get(SP::GETPCX), GlobalBaseReg); + SparcFI->setGlobalBaseReg(GlobalBaseReg); + return GlobalBaseReg; +} diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index ab661b991d74e..345674bacf37e 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -81,20 +81,10 @@ public: unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const; - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; - - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, @@ -107,6 +97,8 @@ public: MachineInstr* LoadMI) const { return 0; } + + unsigned getGlobalBaseReg(MachineFunction *MF) const; }; } diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 2d6c9209e6aef..44821b810b145 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -117,7 +117,7 @@ def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>; def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>; def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>; -// These are target-independent nodes, but have target-specific formats. +// These are target-independent nodes, but have target-specific formats. def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -134,6 +134,10 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall, def retflag : SDNode<"SPISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; +def getPCX : Operand { + let PrintMethod = "printGetPCX"; +} + //===----------------------------------------------------------------------===// // SPARC Flag Conditions //===----------------------------------------------------------------------===// @@ -207,6 +211,11 @@ multiclass F3_12np Op3Val> { class Pseudo pattern> : InstSP; +// GETPCX for PIC +let Defs = [O7], Uses = [O7] in { + def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >; +} + let Defs = [O6], Uses = [O6] in { def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), "!ADJCALLSTACKDOWN $amt", @@ -431,18 +440,23 @@ def LEA_ADDri : F3_2<2, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "add ${addr:arith}, $dst", [(set IntRegs:$dst, ADDRri:$addr)]>; - -defm ADDCC : F3_12<"addcc", 0b010000, addc>; + +let Defs = [ICC] in + defm ADDCC : F3_12<"addcc", 0b010000, addc>; + defm ADDX : F3_12<"addx", 0b001000, adde>; // Section B.15 - Subtract Instructions, p. 110 defm SUB : F3_12 <"sub" , 0b000100, sub>; defm SUBX : F3_12 <"subx" , 0b001100, sube>; -defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>; -def SUBXCCrr: F3_1<2, 0b011100, - (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), - "subxcc $b, $c, $dst", []>; +let Defs = [ICC] in { + defm SUBCC : F3_12 <"subcc", 0b010100, SPcmpicc>; + + def SUBXCCrr: F3_1<2, 0b011100, + (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), + "subxcc $b, $c, $dst", []>; +} // Section B.18 - Multiply Instructions, p. 113 defm UMUL : F3_12np<"umul", 0b001010>; @@ -471,11 +485,12 @@ let isBarrier = 1 in def BA : BranchSP<0b1000, (ins brtarget:$dst), "ba $dst", [(br bb:$dst)]>; - + // FIXME: the encoding for the JIT should look at the condition field. -def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), - "b$cc $dst", - [(SPbricc bb:$dst, imm:$cc)]>; +let Uses = [ICC] in + def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc), + "b$cc $dst", + [(SPbricc bb:$dst, imm:$cc)]>; // Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121 @@ -489,9 +504,10 @@ class FPBranchSP cc, dag ins, string asmstr, list pattern> } // FIXME: the encoding for the JIT should look at the condition field. -def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc), - "fb$cc $dst", - [(SPbrfcc bb:$dst, imm:$cc)]>; +let Uses = [FCC] in + def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc), + "fb$cc $dst", + [(SPbrfcc bb:$dst, imm:$cc)]>; // Section B.24 - Call and Link Instruction, p. 125 @@ -633,15 +649,16 @@ def FDIVD : F3_3<2, 0b110100, 0b001001110, // Note 2: the result of a FCMP is not available until the 2nd cycle // after the instr is retired, but there is no interlock. This behavior // is modelled with a forced noop after the instruction. -def FCMPS : F3_3<2, 0b110101, 0b001010001, - (outs), (ins FPRegs:$src1, FPRegs:$src2), - "fcmps $src1, $src2\n\tnop", - [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>; -def FCMPD : F3_3<2, 0b110101, 0b001010010, - (outs), (ins DFPRegs:$src1, DFPRegs:$src2), - "fcmpd $src1, $src2\n\tnop", - [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>; - +let Defs = [FCC] in { + def FCMPS : F3_3<2, 0b110101, 0b001010001, + (outs), (ins FPRegs:$src1, FPRegs:$src2), + "fcmps $src1, $src2\n\tnop", + [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>; + def FCMPD : F3_3<2, 0b110101, 0b001010010, + (outs), (ins DFPRegs:$src1, DFPRegs:$src2), + "fcmpd $src1, $src2\n\tnop", + [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>; +} //===----------------------------------------------------------------------===// // V9 Instructions @@ -754,8 +771,6 @@ def : Pat<(call tglobaladdr:$dst), def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>; -def : Pat<(ret), (RETL)>; - // Map integer extload's to zextloads. def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>; def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>; diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/SparcMCAsmInfo.cpp new file mode 100644 index 0000000000000..b67537c17881c --- /dev/null +++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp @@ -0,0 +1,38 @@ +//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the SparcMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "SparcMCAsmInfo.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; // .xword is only supported by V9. + ZeroDirective = "\t.skip\t"; + CommentString = "!"; + COMMDirectiveTakesAlignment = true; + HasLEB128 = true; + AbsoluteDebugSectionOffsets = true; + SupportsDebugInformation = true; + + SunStyleELFSectionSwitchSyntax = true; + UsesELFSectionDirectiveForBSS = true; + + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + + PrivateGlobalPrefix = ".L"; +} + + diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/SparcMCAsmInfo.h new file mode 100644 index 0000000000000..12d6ef4a6f187 --- /dev/null +++ b/lib/Target/Sparc/SparcMCAsmInfo.h @@ -0,0 +1,28 @@ +//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SparcMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCTARGETASMINFO_H +#define SPARCTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + struct SparcELFMCAsmInfo : public MCAsmInfo { + explicit SparcELFMCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h new file mode 100644 index 0000000000000..e457235ff6a64 --- /dev/null +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -0,0 +1,32 @@ +//===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares Sparc specific per-machine-function information. +// +//===----------------------------------------------------------------------===// +#ifndef SPARCMACHINEFUNCTIONINFO_H +#define SPARCMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + + class SparcMachineFunctionInfo : public MachineFunctionInfo { + private: + unsigned GlobalBaseReg; + public: + SparcMachineFunctionInfo() : GlobalBaseReg(0) {} + SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {} + + unsigned getGlobalBaseReg() const { return GlobalBaseReg; } + void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + }; +} + +#endif diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 59efb19ab9c5a..7883260e14c0b 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Type.h" #include "llvm/ADT/BitVector.h" @@ -75,8 +76,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; @@ -112,6 +115,7 @@ void SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(SP::G1, false); MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1)); } + return 0; } void SparcRegisterInfo:: @@ -168,28 +172,25 @@ void SparcRegisterInfo::emitEpilogue(MachineFunction &MF, } unsigned SparcRegisterInfo::getRARegister() const { - assert(0 && "What is the return address register"); - return 0; + return SP::I7; } unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const { - assert(0 && "What is the frame register"); - return SP::G1; + return SP::I6; } unsigned SparcRegisterInfo::getEHExceptionRegister() const { - assert(0 && "What is the exception register"); + llvm_unreachable("What is the exception register"); return 0; } unsigned SparcRegisterInfo::getEHHandlerRegister() const { - assert(0 && "What is the exception handler register"); + llvm_unreachable("What is the exception handler register"); return 0; } int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - assert(0 && "What is the dwarf register number"); - return -1; + return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } #include "SparcGenRegisterInfo.inc" diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index fc863f3b28f00..753b1c0492937 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -43,8 +43,9 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td index e3a50ca42bbb3..2b05c19bf1480 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.td +++ b/lib/Target/Sparc/SparcRegisterInfo.td @@ -16,6 +16,10 @@ class SparcReg : Register { let Namespace = "SP"; } +class SparcCtrlReg: Register { + let Namespace = "SP"; +} + // Registers are identified with 5-bit ID numbers. // Ri - 32-bit integer registers class Ri num, string n> : SparcReg { @@ -31,6 +35,10 @@ class Rd num, string n, list subregs> : SparcReg { let SubRegs = subregs; } +// Control Registers +def ICC : SparcCtrlReg<"ICC">; +def FCC : SparcCtrlReg<"FCC">; + // Integer registers def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; @@ -46,7 +54,7 @@ def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; def O3 : Ri<11, "O3">, DwarfRegNum<[11]>; def O4 : Ri<12, "O4">, DwarfRegNum<[12]>; def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; -def O6 : Ri<14, "O6">, DwarfRegNum<[14]>; +def O6 : Ri<14, "SP">, DwarfRegNum<[14]>; def O7 : Ri<15, "O7">, DwarfRegNum<[15]>; def L0 : Ri<16, "L0">, DwarfRegNum<[16]>; def L1 : Ri<17, "L1">, DwarfRegNum<[17]>; @@ -62,7 +70,7 @@ def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; def I3 : Ri<27, "I3">, DwarfRegNum<[27]>; def I4 : Ri<28, "I4">, DwarfRegNum<[28]>; def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; -def I6 : Ri<30, "I6">, DwarfRegNum<[30]>; +def I6 : Ri<30, "FP">, DwarfRegNum<[30]>; def I7 : Ri<31, "I7">, DwarfRegNum<[31]>; // Floating-point registers diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index aaddbff073ad5..8a88cc076429a 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -22,7 +22,7 @@ namespace { cl::desc("Enable V9 instructions in the V8 target")); } -SparcSubtarget::SparcSubtarget(const Module &M, const std::string &FS) { +SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS) { // Set the default features. IsV9 = false; V8DeprecatedInsts = false; diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index e5a5ba47f1064..43770343d3348 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -18,14 +18,13 @@ #include namespace llvm { - class Module; - + class SparcSubtarget : public TargetSubtarget { bool IsV9; bool V8DeprecatedInsts; bool IsVIS; public: - SparcSubtarget(const Module &M, const std::string &FS); + SparcSubtarget(const std::string &TT, const std::string &FS); bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 1343bccadf548..3a381151f946e 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -10,63 +10,30 @@ // //===----------------------------------------------------------------------===// -#include "SparcTargetAsmInfo.h" +#include "SparcMCAsmInfo.h" #include "SparcTargetMachine.h" #include "Sparc.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -// Register the target. -static RegisterTarget X("sparc", "SPARC"); +extern "C" void LLVMInitializeSparcTarget() { + // Register the target. + RegisterTargetMachine X(TheSparcTarget); + RegisterAsmInfo Y(TheSparcTarget); -// No assembler printer by default -SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0; - - -// Force static initialization. -extern "C" void LLVMInitializeSparcTarget() { } - -const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const { - // FIXME: Handle Solaris subtarget someday :) - return new SparcELFTargetAsmInfo(*this); } /// SparcTargetMachine ctor - Create an ILP32 architecture model /// -SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS) - : DataLayout("E-p:32:32-f128:128:128"), - Subtarget(M, FS), TLInfo(*this), InstrInfo(Subtarget), +SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : LLVMTargetMachine(T, TT), + DataLayout("E-p:32:32-f128:128:128"), + Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { } -unsigned SparcTargetMachine::getModuleMatchQuality(const Module &M) { - std::string TT = M.getTargetTriple(); - if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "sparc-") - return 20; - - // If the target triple is something non-sparc, we don't match. - if (!TT.empty()) return 0; - - if (M.getEndianness() == Module::BigEndian && - M.getPointerSize() == Module::Pointer32) -#ifdef __sparc__ - return 20; // BE/32 ==> Prefer sparc on sparc -#else - return 5; // BE/32 ==> Prefer ppc elsewhere -#endif - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target - -#if defined(__sparc__) - return 10; -#else - return 0; -#endif -} - bool SparcTargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createSparcISelDag(*this)); @@ -82,14 +49,3 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, PM.add(createSparcDelaySlotFillerPass(*this)); return true; } - -bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { - // Output assembly language. - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, Verbose)); - return false; -} diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index ee55d3ce774d0..cce55105e76ed 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -23,27 +23,15 @@ namespace llvm { -class Module; - class SparcTargetMachine : public LLVMTargetMachine { const TargetData DataLayout; // Calculates type size & alignment SparcSubtarget Subtarget; SparcTargetLowering TLInfo; SparcInstrInfo InstrInfo; TargetFrameInfo FrameInfo; - -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - - // To avoid having target depend on the asmprinter stuff libraries, asmprinter - // set this functions to ctor pointer at startup time if they are linked in. - typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, - TargetMachine &tm, - bool verbose); - static AsmPrinterCtorFn AsmPrinterCtor; - public: - SparcTargetMachine(const Module &M, const std::string &FS); + SparcTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } @@ -55,18 +43,10 @@ public: return const_cast(&TLInfo); } virtual const TargetData *getTargetData() const { return &DataLayout; } - static unsigned getModuleMatchQuality(const Module &M); // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); - - static void registerAsmPrinter(AsmPrinterCtorFn F) { - AsmPrinterCtor = F; - } }; } // end namespace llvm diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..870b56a6ea1be --- /dev/null +++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMSparcInfo + SparcTargetInfo.cpp + ) + +add_dependencies(LLVMSparcInfo SparcCodeGenTable_gen) diff --git a/lib/Target/Sparc/TargetInfo/Makefile b/lib/Target/Sparc/TargetInfo/Makefile new file mode 100644 index 0000000000000..641ed87160c75 --- /dev/null +++ b/lib/Target/Sparc/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/Sparc/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMSparcInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp new file mode 100644 index 0000000000000..5d697bd23a61c --- /dev/null +++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Sparc.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheSparcTarget; + +extern "C" void LLVMInitializeSparcTargetInfo() { + RegisterTarget X(TheSparcTarget, "sparc", "Sparc"); +} diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index f9370256c602f..664a43cbcca7b 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -12,10 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/SubtargetFeature.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Streams.h" #include -#include #include #include using namespace llvm; @@ -145,22 +144,22 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize); // Print the CPU table. - cerr << "Available CPUs for this target:\n\n"; + errs() << "Available CPUs for this target:\n\n"; for (size_t i = 0; i != CPUTableSize; i++) - cerr << " " << CPUTable[i].Key + errs() << " " << CPUTable[i].Key << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ') << " - " << CPUTable[i].Desc << ".\n"; - cerr << "\n"; + errs() << "\n"; // Print the Feature table. - cerr << "Available features for this target:\n\n"; + errs() << "Available features for this target:\n\n"; for (size_t i = 0; i != FeatTableSize; i++) - cerr << " " << FeatTable[i].Key + errs() << " " << FeatTable[i].Key << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ') << " - " << FeatTable[i].Desc << ".\n"; - cerr << "\n"; + errs() << "\n"; - cerr << "Use +feature to enable a feature, or -feature to disable it.\n" + errs() << "Use +feature to enable a feature, or -feature to disable it.\n" << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n"; exit(1); } @@ -283,10 +282,9 @@ uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); } } else { - cerr << "'" << Features[0] - << "' is not a recognized processor for this target" - << " (ignoring processor)" - << "\n"; + errs() << "'" << Features[0] + << "' is not a recognized processor for this target" + << " (ignoring processor)\n"; } // Iterate through each feature for (size_t i = 1; i < Features.size(); i++) { @@ -314,10 +312,9 @@ uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); } } else { - cerr << "'" << Feature - << "' is not a recognized feature for this target" - << " (ignoring feature)" - << "\n"; + errs() << "'" << Feature + << "' is not a recognized feature for this target" + << " (ignoring feature)\n"; } } @@ -340,25 +337,23 @@ void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table, if (Entry) { return Entry->Value; } else { - cerr << "'" << Features[0] - << "' is not a recognized processor for this target" - << " (ignoring processor)" - << "\n"; + errs() << "'" << Features[0] + << "' is not a recognized processor for this target" + << " (ignoring processor)\n"; return NULL; } } /// print - Print feature string. /// -void SubtargetFeatures::print(std::ostream &OS) const { - for (size_t i = 0; i < Features.size(); i++) { +void SubtargetFeatures::print(raw_ostream &OS) const { + for (size_t i = 0, e = Features.size(); i != e; ++i) OS << Features[i] << " "; - } OS << "\n"; } /// dump - Dump feature info. /// void SubtargetFeatures::dump() const { - print(*cerr.stream()); + print(errs()); } diff --git a/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt b/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000000000..c6be83a61080e --- /dev/null +++ b/lib/Target/SystemZ/AsmPrinter/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMSystemZAsmPrinter + SystemZAsmPrinter.cpp + ) +add_dependencies(LLVMSystemZAsmPrinter SystemZCodeGenTable_gen) diff --git a/lib/Target/SystemZ/AsmPrinter/Makefile b/lib/Target/SystemZ/AsmPrinter/Makefile new file mode 100644 index 0000000000000..9a350dfe62e7d --- /dev/null +++ b/lib/Target/SystemZ/AsmPrinter/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/SystemZ/AsmPrinter/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMSystemZAsmPrinter + +# Hack: we need to include 'main' SystemZ target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp new file mode 100644 index 0000000000000..a128992934be9 --- /dev/null +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -0,0 +1,391 @@ +//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the SystemZ assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" + +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +namespace { + class VISIBILITY_HIDDEN SystemZAsmPrinter : public AsmPrinter { + public: + SystemZAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *MAI, bool V) + : AsmPrinter(O, TM, MAI, V) {} + + virtual const char *getPassName() const { + return "SystemZ Assembly Printer"; + } + + void printOperand(const MachineInstr *MI, int OpNum, + const char* Modifier = 0); + void printPCRelImmOperand(const MachineInstr *MI, int OpNum); + void printRIAddrOperand(const MachineInstr *MI, int OpNum, + const char* Modifier = 0); + void printRRIAddrOperand(const MachineInstr *MI, int OpNum, + const char* Modifier = 0); + void printS16ImmOperand(const MachineInstr *MI, int OpNum) { + O << (int16_t)MI->getOperand(OpNum).getImm(); + } + void printS32ImmOperand(const MachineInstr *MI, int OpNum) { + O << (int32_t)MI->getOperand(OpNum).getImm(); + } + + void printInstruction(const MachineInstr *MI); // autogenerated. + static const char *getRegisterName(unsigned RegNo); + + void printMachineInstruction(const MachineInstr * MI); + + void emitFunctionHeader(const MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &F); + void PrintGlobalVariable(const GlobalVariable* GVar); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AsmPrinter::getAnalysisUsage(AU); + AU.setPreservesAll(); + } + }; +} // end of anonymous namespace + +#include "SystemZGenAsmWriter.inc" + +void SystemZAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + unsigned FnAlign = MF.getAlignment(); + const Function *F = MF.getFunction(); + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + + EmitAlignment(FnAlign, F); + + switch (F->getLinkage()) { + default: assert(0 && "Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + case Function::LinkerPrivateLinkage: + break; + case Function::ExternalLinkage: + O << "\t.globl\t" << CurrentFnName << '\n'; + break; + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + O << "\t.weak\t" << CurrentFnName << '\n'; + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + O << "\t.type\t" << CurrentFnName << ",@function\n" + << CurrentFnName << ":\n"; +} + +bool SystemZAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SetupMachineFunction(MF); + O << "\n\n"; + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print the 'header' of function + emitFunctionHeader(MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + EmitBasicBlockStart(I); + + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) + // Print the assembly for the instruction. + printMachineInstruction(II); + } + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // We didn't modify anything + return false; +} + +void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + processDebugLoc(MI, true); + + // Call the autogenerated instruction printer routines. + printInstruction(MI); + + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + O << '\n'; + + processDebugLoc(MI, false); +} + +void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum){ + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); + return; + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getMangledName(GV); + + O << Name; + + // Assemble calls via PLT for externally visible symbols if PIC. + if (TM.getRelocationModel() == Reloc::PIC_ && + !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() && + !GV->hasLocalLinkage()) + O << "@PLT"; + + printOffset(MO.getOffset()); + return; + } + case MachineOperand::MO_ExternalSymbol: { + std::string Name(MAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + O << Name; + + if (TM.getRelocationModel() == Reloc::PIC_) + O << "@PLT"; + + return; + } + default: + assert(0 && "Not implemented yet!"); + } +} + + +void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, + const char* Modifier) { + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: { + assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + "Virtual registers should be already mapped!"); + unsigned Reg = MO.getReg(); + if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { + if (strncmp(Modifier + 7, "even", 4) == 0) + Reg = TRI->getSubReg(Reg, SystemZ::SUBREG_EVEN); + else if (strncmp(Modifier + 7, "odd", 3) == 0) + Reg = TRI->getSubReg(Reg, SystemZ::SUBREG_ODD); + else + assert(0 && "Invalid subreg modifier"); + } + + O << '%' << getRegisterName(Reg); + return; + } + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); + return; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' + << MO.getIndex(); + + return; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << MO.getIndex(); + + printOffset(MO.getOffset()); + break; + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + std::string Name = Mang->getMangledName(GV); + + O << Name; + break; + } + case MachineOperand::MO_ExternalSymbol: { + std::string Name(MAI->getGlobalPrefix()); + Name += MO.getSymbolName(); + O << Name; + break; + } + default: + assert(0 && "Not implemented yet!"); + } + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case SystemZII::MO_NO_FLAG: + break; + case SystemZII::MO_GOTENT: O << "@GOTENT"; break; + case SystemZII::MO_PLT: O << "@PLT"; break; + } + + printOffset(MO.getOffset()); +} + +void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum, + const char* Modifier) { + const MachineOperand &Base = MI->getOperand(OpNum); + + // Print displacement operand. + printOperand(MI, OpNum+1); + + // Print base operand (if any) + if (Base.getReg()) { + O << '('; + printOperand(MI, OpNum); + O << ')'; + } +} + +void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum, + const char* Modifier) { + const MachineOperand &Base = MI->getOperand(OpNum); + const MachineOperand &Index = MI->getOperand(OpNum+2); + + // Print displacement operand. + printOperand(MI, OpNum+1); + + // Print base operand (if any) + if (Base.getReg()) { + O << '('; + printOperand(MI, OpNum); + if (Index.getReg()) { + O << ','; + printOperand(MI, OpNum+2); + } + O << ')'; + } else + assert(!Index.getReg() && "Should allocate base register first!"); +} + +void SystemZAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { + const TargetData *TD = TM.getTargetData(); + + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + std::string name = Mang->getMangledName(GVar); + Constant *C = GVar->getInitializer(); + unsigned Size = TD->getTypeAllocSize(C->getType()); + unsigned Align = std::max(1U, TD->getPreferredAlignmentLog(GVar)); + + printVisibility(name, GVar->getVisibility()); + + O << "\t.type\t" << name << ",@object\n"; + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang, + TM)); + + if (C->isNullValue() && !GVar->hasSection() && + !GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << '\n'; + + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + + if (VerboseAsm) { + O << "\t\t" << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + return; + } + + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + O << "\t.weak\t" << name << '\n'; + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.globl " << name << '\n'; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + assert(0 && "Unknown linkage type!"); + } + + // Use 16-bit alignment by default to simplify bunch of stuff + EmitAlignment(Align, GVar, 1); + O << name << ":"; + if (VerboseAsm) { + O << "\t\t\t\t" << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << name << ", " << Size << '\n'; + + EmitGlobalConstant(C); +} + +// Force static initialization. +extern "C" void LLVMInitializeSystemZAsmPrinter() { + RegisterAsmPrinter X(TheSystemZTarget); +} diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt new file mode 100644 index 0000000000000..81e51d89ad9f5 --- /dev/null +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_TARGET_DEFINITIONS SystemZ.td) + +tablegen(SystemZGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(SystemZGenRegisterNames.inc -gen-register-enums) +tablegen(SystemZGenRegisterInfo.inc -gen-register-desc) +tablegen(SystemZGenInstrNames.inc -gen-instr-enums) +tablegen(SystemZGenInstrInfo.inc -gen-instr-desc) +tablegen(SystemZGenAsmWriter.inc -gen-asm-writer) +tablegen(SystemZGenDAGISel.inc -gen-dag-isel) +tablegen(SystemZGenCallingConv.inc -gen-callingconv) +tablegen(SystemZGenSubtarget.inc -gen-subtarget) + +add_llvm_target(SystemZCodeGen + SystemZISelDAGToDAG.cpp + SystemZISelLowering.cpp + SystemZInstrInfo.cpp + SystemZMCAsmInfo.cpp + SystemZRegisterInfo.cpp + SystemZSubtarget.cpp + SystemZTargetMachine.cpp + ) + +target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG) diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile new file mode 100644 index 0000000000000..f1097ebcf3b7a --- /dev/null +++ b/lib/Target/SystemZ/Makefile @@ -0,0 +1,22 @@ +##===- lib/Target/SystemZ/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMSystemZCodeGen +TARGET = SystemZ + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \ + SystemZGenRegisterInfo.inc SystemZGenInstrNames.inc \ + SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \ + SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc + +DIRS = AsmPrinter TargetInfo + +include $(LEVEL)/Makefile.common + diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h new file mode 100644 index 0000000000000..ea5240a10c9a0 --- /dev/null +++ b/lib/Target/SystemZ/SystemZ.h @@ -0,0 +1,61 @@ +//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM SystemZ backend. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_SystemZ_H +#define LLVM_TARGET_SystemZ_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class SystemZTargetMachine; + class FunctionPass; + class formatted_raw_ostream; + + namespace SystemZCC { + // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in + // SystemZInstrInfo.td. They must be kept in synch. + enum CondCodes { + O = 0, + H = 1, + NLE = 2, + L = 3, + NHE = 4, + LH = 5, + NE = 6, + E = 7, + NLH = 8, + HE = 9, + NL = 10, + LE = 11, + NH = 12, + NO = 13, + INVALID = -1 + }; + } + + FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + extern Target TheSystemZTarget; + +} // end namespace llvm; + +// Defines symbolic names for SystemZ registers. +// This defines a mapping from register name to register number. +#include "SystemZGenRegisterNames.inc" + +// Defines symbolic names for the SystemZ instructions. +#include "SystemZGenInstrNames.inc" + +#endif diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td new file mode 100644 index 0000000000000..4c08c087225e6 --- /dev/null +++ b/lib/Target/SystemZ/SystemZ.td @@ -0,0 +1,61 @@ +//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the SystemZ target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Subtarget Features. +//===----------------------------------------------------------------------===// +def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true", + "Support Z10 instructions">; + +//===----------------------------------------------------------------------===// +// SystemZ supported processors. +//===----------------------------------------------------------------------===// +class Proc Features> + : Processor; + +def : Proc<"z9", []>; +def : Proc<"z10", [FeatureZ10]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "SystemZRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Calling Convention Description +//===----------------------------------------------------------------------===// + +include "SystemZCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "SystemZInstrInfo.td" +include "SystemZInstrFP.td" + +def SystemZInstrInfo : InstrInfo {} + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def SystemZ : Target { + let InstructionSet = SystemZInstrInfo; +} + diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td new file mode 100644 index 0000000000000..c799a9e501aa2 --- /dev/null +++ b/lib/Target/SystemZ/SystemZCallingConv.td @@ -0,0 +1,46 @@ +//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This describes the calling conventions for SystemZ architecture. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SystemZ Return Value Calling Convention +//===----------------------------------------------------------------------===// +def RetCC_SystemZ : CallingConv<[ + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // i64 is returned in register R2 + CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>, + + // f32 / f64 are returned in F0 + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>> +]>; + +//===----------------------------------------------------------------------===// +// SystemZ Argument Calling Conventions +//===----------------------------------------------------------------------===// +def CC_SystemZ : CallingConv<[ + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // The first 5 integer arguments of non-varargs functions are passed in + // integer registers. + CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>, + + // The first 4 floating point arguments of non-varargs functions are passed + // in FP registers. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>, + + // Integer values get stored in stack slots that are 8 bytes in + // size and 8-byte aligned. + CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>> +]>; diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp new file mode 100644 index 0000000000000..028ee8986a374 --- /dev/null +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -0,0 +1,829 @@ +//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the SystemZ target. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZISelLowering.h" +#include "SystemZTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +static const unsigned subreg_even32 = 1; +static const unsigned subreg_odd32 = 2; +static const unsigned subreg_even = 3; +static const unsigned subreg_odd = 4; + +namespace { + /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's + /// instead of register numbers for the leaves of the matched tree. + struct SystemZRRIAddressMode { + enum { + RegBase, + FrameIndexBase + } BaseType; + + struct { // This is really a union, discriminated by BaseType! + SDValue Reg; + int FrameIndex; + } Base; + + SDValue IndexReg; + int64_t Disp; + bool isRI; + + SystemZRRIAddressMode(bool RI = false) + : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) { + } + + void dump() { + errs() << "SystemZRRIAddressMode " << this << '\n'; + if (BaseType == RegBase) { + errs() << "Base.Reg "; + if (Base.Reg.getNode() != 0) + Base.Reg.getNode()->dump(); + else + errs() << "nul"; + errs() << '\n'; + } else { + errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'; + } + if (!isRI) { + errs() << "IndexReg "; + if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); + else errs() << "nul"; + } + errs() << " Disp " << Disp << '\n'; + } + }; +} + +/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine +/// instructions for SelectionDAG operations. +/// +namespace { + class SystemZDAGToDAGISel : public SelectionDAGISel { + SystemZTargetLowering &Lowering; + const SystemZSubtarget &Subtarget; + + void getAddressOperandsRI(const SystemZRRIAddressMode &AM, + SDValue &Base, SDValue &Disp); + void getAddressOperands(const SystemZRRIAddressMode &AM, + SDValue &Base, SDValue &Disp, + SDValue &Index); + + public: + SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel), + Lowering(*TM.getTargetLowering()), + Subtarget(*TM.getSubtargetImpl()) { } + + virtual void InstructionSelect(); + + virtual const char *getPassName() const { + return "SystemZ DAG->DAG Pattern Instruction Selection"; + } + + /// getI8Imm - Return a target constant with the specified value, of type + /// i8. + inline SDValue getI8Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i8); + } + + /// getI16Imm - Return a target constant with the specified value, of type + /// i16. + inline SDValue getI16Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i16); + } + + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + // Include the pieces autogenerated from the target description. + #include "SystemZGenDAGISel.inc" + + private: + bool SelectAddrRI12Only(SDValue Op, SDValue& Addr, + SDValue &Base, SDValue &Disp); + bool SelectAddrRI12(SDValue Op, SDValue& Addr, + SDValue &Base, SDValue &Disp, + bool is12BitOnly = false); + bool SelectAddrRI(SDValue Op, SDValue& Addr, + SDValue &Base, SDValue &Disp); + bool SelectAddrRRI12(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index); + bool SelectAddrRRI20(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index); + bool SelectLAAddr(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index); + + SDNode *Select(SDValue Op); + + bool TryFoldLoad(SDValue P, SDValue N, + SDValue &Base, SDValue &Disp, SDValue &Index); + + bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM, + bool is12Bit, unsigned Depth = 0); + bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM); + bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM, + bool is12Bit); + + #ifndef NDEBUG + unsigned Indent; + #endif + }; +} // end anonymous namespace + +/// createSystemZISelDag - This pass converts a legalized DAG into a +/// SystemZ-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new SystemZDAGToDAGISel(TM, OptLevel); +} + +/// isImmSExt20 - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// sign extension from a 20-bit value. If so, this returns true and the +/// immediate. +static bool isImmSExt20(int64_t Val, int64_t &Imm) { + if (Val >= -524288 && Val <= 524287) { + Imm = Val; + return true; + } + return false; +} + +/// isImmZExt12 - This method tests to see if the node is either a 32-bit +/// or 64-bit immediate, and if the value can be accurately represented as a +/// zero extension from a 12-bit value. If so, this returns true and the +/// immediate. +static bool isImmZExt12(int64_t Val, int64_t &Imm) { + if (Val >= 0 && Val <= 0xFFF) { + Imm = Val; + return true; + } + return false; +} + +/// MatchAddress - Add the specified node to the specified addressing mode, +/// returning true if it cannot be done. This just pattern matches for the +/// addressing mode. +bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM, + bool is12Bit, unsigned Depth) { + DebugLoc dl = N.getDebugLoc(); + DEBUG(errs() << "MatchAddress: "; AM.dump()); + // Limit recursion. + if (Depth > 5) + return MatchAddressBase(N, AM); + + // FIXME: We can perform better here. If we have something like + // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of + // imm into addressing mode. + switch (N.getOpcode()) { + default: break; + case ISD::Constant: { + int64_t Val = cast(N)->getSExtValue(); + int64_t Imm = 0; + bool Match = (is12Bit ? + isImmZExt12(AM.Disp + Val, Imm) : + isImmSExt20(AM.Disp + Val, Imm)); + if (Match) { + AM.Disp = Imm; + return false; + } + break; + } + + case ISD::FrameIndex: + if (AM.BaseType == SystemZRRIAddressMode::RegBase && + AM.Base.Reg.getNode() == 0) { + AM.BaseType = SystemZRRIAddressMode::FrameIndexBase; + AM.Base.FrameIndex = cast(N)->getIndex(); + return false; + } + break; + + case ISD::SUB: { + // Given A-B, if A can be completely folded into the address and + // the index field with the index field unused, use -B as the index. + // This is a win if a has multiple parts that can be folded into + // the address. Also, this saves a mov if the base register has + // other uses, since it avoids a two-address sub instruction, however + // it costs an additional mov if the index register has other uses. + + // Test if the LHS of the sub can be folded. + SystemZRRIAddressMode Backup = AM; + if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) { + AM = Backup; + break; + } + // Test if the index field is free for use. + if (AM.IndexReg.getNode() || AM.isRI) { + AM = Backup; + break; + } + + // If the base is a register with multiple uses, this transformation may + // save a mov. Otherwise it's probably better not to do it. + if (AM.BaseType == SystemZRRIAddressMode::RegBase && + (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) { + AM = Backup; + break; + } + + // Ok, the transformation is legal and appears profitable. Go for it. + SDValue RHS = N.getNode()->getOperand(1); + SDValue Zero = CurDAG->getConstant(0, N.getValueType()); + SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); + AM.IndexReg = Neg; + + // Insert the new nodes into the topological ordering. + if (Zero.getNode()->getNodeId() == -1 || + Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), Zero.getNode()); + Zero.getNode()->setNodeId(N.getNode()->getNodeId()); + } + if (Neg.getNode()->getNodeId() == -1 || + Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), Neg.getNode()); + Neg.getNode()->setNodeId(N.getNode()->getNodeId()); + } + return false; + } + + case ISD::ADD: { + SystemZRRIAddressMode Backup = AM; + if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) && + !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1)) + return false; + AM = Backup; + if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) && + !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) + return false; + AM = Backup; + + // If we couldn't fold both operands into the address at the same time, + // see if we can just put each operand into a register and fold at least + // the add. + if (!AM.isRI && + AM.BaseType == SystemZRRIAddressMode::RegBase && + !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) { + AM.Base.Reg = N.getNode()->getOperand(0); + AM.IndexReg = N.getNode()->getOperand(1); + return false; + } + break; + } + + case ISD::OR: + // Handle "X | C" as "X + C" iff X is known to have C bits clear. + if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { + SystemZRRIAddressMode Backup = AM; + int64_t Offset = CN->getSExtValue(); + int64_t Imm = 0; + bool MatchOffset = (is12Bit ? + isImmZExt12(AM.Disp + Offset, Imm) : + isImmSExt20(AM.Disp + Offset, Imm)); + // The resultant disp must fit in 12 or 20-bits. + if (MatchOffset && + // LHS should be an addr mode. + !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) && + // Check to see if the LHS & C is zero. + CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { + AM.Disp = Imm; + return false; + } + AM = Backup; + } + break; + } + + return MatchAddressBase(N, AM); +} + +/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the +/// specified addressing mode without any further recursion. +bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N, + SystemZRRIAddressMode &AM) { + // Is the base register already occupied? + if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) { + // If so, check to see if the index register is set. + if (AM.IndexReg.getNode() == 0 && !AM.isRI) { + AM.IndexReg = N; + return false; + } + + // Otherwise, we cannot select it. + return true; + } + + // Default, generate it as a register. + AM.BaseType = SystemZRRIAddressMode::RegBase; + AM.Base.Reg = N; + return false; +} + +void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM, + SDValue &Base, SDValue &Disp) { + if (AM.BaseType == SystemZRRIAddressMode::RegBase) + Base = AM.Base.Reg; + else + Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()); + Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64); +} + +void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM, + SDValue &Base, SDValue &Disp, + SDValue &Index) { + getAddressOperandsRI(AM, Base, Disp); + Index = AM.IndexReg; +} + +/// Returns true if the address can be represented by a base register plus +/// an unsigned 12-bit displacement [r+imm]. +bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue Op, SDValue& Addr, + SDValue &Base, SDValue &Disp) { + return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true); +} + +bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr, + SDValue &Base, SDValue &Disp, + bool is12BitOnly) { + SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true); + bool Done = false; + + if (!Addr.hasOneUse()) { + unsigned Opcode = Addr.getOpcode(); + if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) { + // If we are able to fold N into addressing mode, then we'll allow it even + // if N has multiple uses. In general, addressing computation is used as + // addresses by all of its uses. But watch out for CopyToReg uses, that + // means the address computation is liveout. It will be computed by a LA + // so we want to avoid computing the address twice. + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), + UE = Addr.getNode()->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::CopyToReg) { + MatchAddressBase(Addr, AM12); + Done = true; + break; + } + } + } + } + if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true)) + return false; + + // Check, whether we can match stuff using 20-bit displacements + if (!Done && !is12BitOnly && + !MatchAddress(Addr, AM20, /* is12Bit */ false)) + if (AM12.Disp == 0 && AM20.Disp != 0) + return false; + + DEBUG(errs() << "MatchAddress (final): "; AM12.dump()); + + EVT VT = Addr.getValueType(); + if (AM12.BaseType == SystemZRRIAddressMode::RegBase) { + if (!AM12.Base.Reg.getNode()) + AM12.Base.Reg = CurDAG->getRegister(0, VT); + } + + assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!"); + + getAddressOperandsRI(AM12, Base, Disp); + + return true; +} + +/// Returns true if the address can be represented by a base register plus +/// a signed 20-bit displacement [r+imm]. +bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr, + SDValue &Base, SDValue &Disp) { + SystemZRRIAddressMode AM(/*isRI*/true); + bool Done = false; + + if (!Addr.hasOneUse()) { + unsigned Opcode = Addr.getOpcode(); + if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) { + // If we are able to fold N into addressing mode, then we'll allow it even + // if N has multiple uses. In general, addressing computation is used as + // addresses by all of its uses. But watch out for CopyToReg uses, that + // means the address computation is liveout. It will be computed by a LA + // so we want to avoid computing the address twice. + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), + UE = Addr.getNode()->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::CopyToReg) { + MatchAddressBase(Addr, AM); + Done = true; + break; + } + } + } + } + if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false)) + return false; + + DEBUG(errs() << "MatchAddress (final): "; AM.dump()); + + EVT VT = Addr.getValueType(); + if (AM.BaseType == SystemZRRIAddressMode::RegBase) { + if (!AM.Base.Reg.getNode()) + AM.Base.Reg = CurDAG->getRegister(0, VT); + } + + assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!"); + + getAddressOperandsRI(AM, Base, Disp); + + return true; +} + +/// Returns true if the address can be represented by a base register plus +/// index register plus an unsigned 12-bit displacement [base + idx + imm]. +bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index) { + SystemZRRIAddressMode AM20, AM12; + bool Done = false; + + if (!Addr.hasOneUse()) { + unsigned Opcode = Addr.getOpcode(); + if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) { + // If we are able to fold N into addressing mode, then we'll allow it even + // if N has multiple uses. In general, addressing computation is used as + // addresses by all of its uses. But watch out for CopyToReg uses, that + // means the address computation is liveout. It will be computed by a LA + // so we want to avoid computing the address twice. + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), + UE = Addr.getNode()->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::CopyToReg) { + MatchAddressBase(Addr, AM12); + Done = true; + break; + } + } + } + } + if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true)) + return false; + + // Check, whether we can match stuff using 20-bit displacements + if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false)) + if (AM12.Disp == 0 && AM20.Disp != 0) + return false; + + DEBUG(errs() << "MatchAddress (final): "; AM12.dump()); + + EVT VT = Addr.getValueType(); + if (AM12.BaseType == SystemZRRIAddressMode::RegBase) { + if (!AM12.Base.Reg.getNode()) + AM12.Base.Reg = CurDAG->getRegister(0, VT); + } + + if (!AM12.IndexReg.getNode()) + AM12.IndexReg = CurDAG->getRegister(0, VT); + + getAddressOperands(AM12, Base, Disp, Index); + + return true; +} + +/// Returns true if the address can be represented by a base register plus +/// index register plus a signed 20-bit displacement [base + idx + imm]. +bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index) { + SystemZRRIAddressMode AM; + bool Done = false; + + if (!Addr.hasOneUse()) { + unsigned Opcode = Addr.getOpcode(); + if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) { + // If we are able to fold N into addressing mode, then we'll allow it even + // if N has multiple uses. In general, addressing computation is used as + // addresses by all of its uses. But watch out for CopyToReg uses, that + // means the address computation is liveout. It will be computed by a LA + // so we want to avoid computing the address twice. + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), + UE = Addr.getNode()->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::CopyToReg) { + MatchAddressBase(Addr, AM); + Done = true; + break; + } + } + } + } + if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false)) + return false; + + DEBUG(errs() << "MatchAddress (final): "; AM.dump()); + + EVT VT = Addr.getValueType(); + if (AM.BaseType == SystemZRRIAddressMode::RegBase) { + if (!AM.Base.Reg.getNode()) + AM.Base.Reg = CurDAG->getRegister(0, VT); + } + + if (!AM.IndexReg.getNode()) + AM.IndexReg = CurDAG->getRegister(0, VT); + + getAddressOperands(AM, Base, Disp, Index); + + return true; +} + +/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing +/// mode it matches can be cost effectively emitted as an LA/LAY instruction. +bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr, + SDValue &Base, SDValue &Disp, SDValue &Index) { + SystemZRRIAddressMode AM; + + if (MatchAddress(Addr, AM, false)) + return false; + + EVT VT = Addr.getValueType(); + unsigned Complexity = 0; + if (AM.BaseType == SystemZRRIAddressMode::RegBase) + if (AM.Base.Reg.getNode()) + Complexity = 1; + else + AM.Base.Reg = CurDAG->getRegister(0, VT); + else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase) + Complexity = 4; + + if (AM.IndexReg.getNode()) + Complexity += 1; + else + AM.IndexReg = CurDAG->getRegister(0, VT); + + if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) + Complexity += 1; + + if (Complexity > 2) { + getAddressOperands(AM, Base, Disp, Index); + return true; + } + + return false; +} + +bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, + SDValue &Base, SDValue &Disp, SDValue &Index) { + if (ISD::isNON_EXTLoad(N.getNode()) && + N.hasOneUse() && + IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) + return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); + return false; +} + +/// InstructionSelect - This callback is invoked by +/// SelectionDAGISel when it has created a SelectionDAG for us to codegen. +void SystemZDAGToDAGISel::InstructionSelect() { + DEBUG(BB->dump()); + + // Codegen the basic block. + DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(Indent = 0); + SelectRoot(*CurDAG); + DEBUG(errs() << "===== Instruction selection ends:\n"); + + CurDAG->RemoveDeadNodes(); +} + +SDNode *SystemZDAGToDAGISel::Select(SDValue Op) { + SDNode *Node = Op.getNode(); + EVT NVT = Node->getValueType(0); + DebugLoc dl = Op.getDebugLoc(); + unsigned Opcode = Node->getOpcode(); + + // Dump information about the Node being selected + DEBUG(errs().indent(Indent) << "Selecting: "; + Node->dump(CurDAG); + errs() << "\n"); + DEBUG(Indent += 2); + + // If we have a custom node, we already have selected! + if (Node->isMachineOpcode()) { + DEBUG(errs().indent(Indent-2) << "== "; + Node->dump(CurDAG); + errs() << "\n"); + DEBUG(Indent -= 2); + return NULL; // Already selected. + } + + switch (Opcode) { + default: break; + case ISD::SDIVREM: { + unsigned Opc, MOpc; + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + EVT ResVT; + bool is32Bit = false; + switch (NVT.getSimpleVT().SimpleTy) { + default: assert(0 && "Unsupported VT!"); + case MVT::i32: + Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m; + ResVT = MVT::v2i64; + is32Bit = true; + break; + case MVT::i64: + Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m; + ResVT = MVT::v2i64; + break; + } + + SDValue Tmp0, Tmp1, Tmp2; + bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2); + + // Prepare the dividend + SDNode *Dividend; + if (is32Bit) + Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0); + else + Dividend = N0.getNode(); + + // Insert prepared dividend into suitable 'subreg' + SDNode *Tmp = CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + dl, ResVT); + Dividend = + CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT, + SDValue(Tmp, 0), SDValue(Dividend, 0), + CurDAG->getTargetConstant(subreg_odd, MVT::i32)); + + SDNode *Result; + SDValue DivVal = SDValue(Dividend, 0); + if (foldedLoad) { + SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; + Result = CurDAG->getMachineNode(MOpc, dl, ResVT, + Ops, array_lengthof(Ops)); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(Result, 0)); + } else { + Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1); + } + + // Copy the division (odd subreg) result, if it is needed. + if (!Op.getValue(0).use_empty()) { + unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, NVT, + SDValue(Result, 0), + CurDAG->getTargetConstant(SubRegIdx, + MVT::i32)); + + ReplaceUses(Op.getValue(0), SDValue(Div, 0)); + DEBUG(errs().indent(Indent-2) << "=> "; + Result->dump(CurDAG); + errs() << "\n"); + } + + // Copy the remainder (even subreg) result, if it is needed. + if (!Op.getValue(1).use_empty()) { + unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); + SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, NVT, + SDValue(Result, 0), + CurDAG->getTargetConstant(SubRegIdx, + MVT::i32)); + + ReplaceUses(Op.getValue(1), SDValue(Rem, 0)); + DEBUG(errs().indent(Indent-2) << "=> "; + Result->dump(CurDAG); + errs() << "\n"); + } + +#ifndef NDEBUG + Indent -= 2; +#endif + + return NULL; + } + case ISD::UDIVREM: { + unsigned Opc, MOpc, ClrOpc; + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + EVT ResVT; + + bool is32Bit = false; + switch (NVT.getSimpleVT().SimpleTy) { + default: assert(0 && "Unsupported VT!"); + case MVT::i32: + Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m; + ClrOpc = SystemZ::MOV64Pr0_even; + ResVT = MVT::v2i32; + is32Bit = true; + break; + case MVT::i64: + Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m; + ClrOpc = SystemZ::MOV128r0_even; + ResVT = MVT::v2i64; + break; + } + + SDValue Tmp0, Tmp1, Tmp2; + bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2); + + // Prepare the dividend + SDNode *Dividend = N0.getNode(); + + // Insert prepared dividend into suitable 'subreg' + SDNode *Tmp = CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + dl, ResVT); + { + unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + Dividend = + CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT, + SDValue(Tmp, 0), SDValue(Dividend, 0), + CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); + } + + // Zero out even subreg + Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0)); + + SDValue DivVal = SDValue(Dividend, 0); + SDNode *Result; + if (foldedLoad) { + SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; + Result = CurDAG->getMachineNode(MOpc, dl,ResVT, + Ops, array_lengthof(Ops)); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(Result, 0)); + } else { + Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1); + } + + // Copy the division (odd subreg) result, if it is needed. + if (!Op.getValue(0).use_empty()) { + unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd); + SDNode *Div = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, NVT, + SDValue(Result, 0), + CurDAG->getTargetConstant(SubRegIdx, + MVT::i32)); + ReplaceUses(Op.getValue(0), SDValue(Div, 0)); + DEBUG(errs().indent(Indent-2) << "=> "; + Result->dump(CurDAG); + errs() << "\n"); + } + + // Copy the remainder (even subreg) result, if it is needed. + if (!Op.getValue(1).use_empty()) { + unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even); + SDNode *Rem = CurDAG->getMachineNode(TargetInstrInfo::EXTRACT_SUBREG, + dl, NVT, + SDValue(Result, 0), + CurDAG->getTargetConstant(SubRegIdx, + MVT::i32)); + ReplaceUses(Op.getValue(1), SDValue(Rem, 0)); + DEBUG(errs().indent(Indent-2) << "=> "; + Result->dump(CurDAG); + errs() << "\n"); + } + +#ifndef NDEBUG + Indent -= 2; +#endif + + return NULL; + } + } + + // Select the default instruction + SDNode *ResNode = SelectCode(Op); + + DEBUG(errs().indent(Indent-2) << "=> "; + if (ResNode == NULL || ResNode == Op.getNode()) + Op.getNode()->dump(CurDAG); + else + ResNode->dump(CurDAG); + errs() << "\n"; + ); + DEBUG(Indent -= 2); + + return ResNode; +} diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp new file mode 100644 index 0000000000000..07e0d8305806b --- /dev/null +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -0,0 +1,843 @@ +//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation -----==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-lower" + +#include "SystemZISelLowering.h" +#include "SystemZ.h" +#include "SystemZTargetMachine.h" +#include "SystemZSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/GlobalVariable.h" +#include "llvm/GlobalAlias.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/VectorExtras.h" +using namespace llvm; + +SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : + TargetLowering(tm, new TargetLoweringObjectFileELF()), + Subtarget(*tm.getSubtargetImpl()), TM(tm) { + + RegInfo = TM.getRegisterInfo(); + + // Set up the register classes. + addRegisterClass(MVT::i32, SystemZ::GR32RegisterClass); + addRegisterClass(MVT::i64, SystemZ::GR64RegisterClass); + addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass); + addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass); + + if (!UseSoftFloat) { + addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass); + addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass); + + addLegalFPImmediate(APFloat(+0.0)); // lzer + addLegalFPImmediate(APFloat(+0.0f)); // lzdr + addLegalFPImmediate(APFloat(-0.0)); // lzer + lner + addLegalFPImmediate(APFloat(-0.0f)); // lzdr + lndr + } + + // Compute derived properties from the register classes + computeRegisterProperties(); + + // Set shifts properties + setShiftAmountType(MVT::i64); + + // Provide all sorts of operation actions + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + + setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + + setStackPointerRegisterToSaveRestore(SystemZ::R15D); + setSchedulingPreference(SchedulingForLatency); + setBooleanContents(ZeroOrOneBooleanContent); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::i64, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); + setOperationAction(ISD::CTLZ, MVT::i32, Promote); + setOperationAction(ISD::CTLZ, MVT::i64, Legal); + + // FIXME: Can we lower these 2 efficiently? + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::i64, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::i64, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); + + setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + + // FIXME: Can we support these natively? + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + + // Lower some FP stuff + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + // We have only 64-bit bitconverts + setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); + setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); + + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); +} + +SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + case ISD::BR_CC: return LowerBR_CC(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + default: + llvm_unreachable("Should not custom lower this!"); + return SDValue(); + } +} + +//===----------------------------------------------------------------------===// +// SystemZ Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +TargetLowering::ConstraintType +SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return C_RegisterClass; + default: + break; + } + } + return TargetLowering::getConstraintType(Constraint); +} + +std::pair +SystemZTargetLowering:: +getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const { + if (Constraint.size() == 1) { + // GCC Constraint Letters + switch (Constraint[0]) { + default: break; + case 'r': // GENERAL_REGS + if (VT == MVT::i32) + return std::make_pair(0U, SystemZ::GR32RegisterClass); + else if (VT == MVT::i128) + return std::make_pair(0U, SystemZ::GR128RegisterClass); + + return std::make_pair(0U, SystemZ::GR64RegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "SystemZGenCallingConv.inc" + +SDValue +SystemZTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); + } +} + +SDValue +SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + case CallingConv::Fast: + case CallingConv::C: + return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, + Outs, Ins, dl, DAG, InVals); + } +} + +/// LowerCCCArguments - transform physical registers into virtual registers and +/// generate load operations for arguments places on the stack. +// FIXME: struct return stuff +// FIXME: varargs +SDValue +SystemZTargetLowering::LowerCCCArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); + + if (isVarArg) + llvm_report_error("Varargs not supported yet"); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + SDValue ArgValue; + CCValAssign &VA = ArgLocs[i]; + EVT LocVT = VA.getLocVT(); + if (VA.isRegLoc()) { + // Arguments passed in registers + TargetRegisterClass *RC; + switch (LocVT.getSimpleVT().SimpleTy) { + default: +#ifndef NDEBUG + errs() << "LowerFormalArguments Unhandled argument type: " + << LocVT.getSimpleVT().SimpleTy + << "\n"; +#endif + llvm_unreachable(0); + case MVT::i64: + RC = SystemZ::GR64RegisterClass; + break; + case MVT::f32: + RC = SystemZ::FP32RegisterClass; + break; + case MVT::f64: + RC = SystemZ::FP64RegisterClass; + break; + } + + unsigned VReg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); + } else { + // Sanity check + assert(VA.isMemLoc()); + + // Create the nodes corresponding to a load from this parameter slot. + // Create the frame index object for this incoming parameter... + int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, + VA.getLocMemOffset()); + + // Create the SelectionDAG nodes corresponding to a load + // from this parameter + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN, + PseudoSourceValue::getFixedStack(FI), 0); + } + + // If this is an 8/16/32-bit value, it is really passed promoted to 64 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + + InVals.push_back(ArgValue); + } + + return Chain; +} + +/// LowerCCCCallTo - functions arguments are copied from virtual regs to +/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. +/// TODO: sret. +SDValue +SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl + &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + MachineFunction &MF = DAG.getMachineFunction(); + + // Offset to first argument stack slot. + const unsigned FirstArgOffset = 160; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + + CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + + Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes, + getPointerTy(), true)); + + SmallVector, 4> RegsToPass; + SmallVector MemOpChains; + SDValue StackPtr; + + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + SDValue Arg = Outs[i].Val; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + } + + // Arguments that can be passed on register must be kept at RegsToPass + // vector + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); + + if (StackPtr.getNode() == 0) + StackPtr = + DAG.getCopyFromReg(Chain, dl, + (RegInfo->hasFP(MF) ? + SystemZ::R11D : SystemZ::R15D), + getPointerTy()); + + unsigned Offset = FirstArgOffset + VA.getLocMemOffset(); + SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), + StackPtr, + DAG.getIntPtrConstant(Offset)); + + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), Offset)); + } + } + + // Transform all store nodes into one single node because all store nodes are + // independent of each other. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + + // Build a sequence of copy-to-reg nodes chained together with token chain and + // flag operands which copy the outgoing args into registers. The InFlag in + // necessary since all emited instructions must be stuck together. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); + + // Returns a chain & a flag for retval copy to use. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + + Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + // Create the CALLSEQ_END node. + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, getPointerTy(), true), + DAG.getConstant(0, getPointerTy(), true), + InFlag); + InFlag = Chain.getValue(1); + + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, + DAG, InVals); +} + +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +/// +SDValue +SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, + *DAG.getContext()); + + CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + + Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), + VA.getLocVT(), InFlag).getValue(1); + SDValue RetValue = Chain.getValue(0); + InFlag = Chain.getValue(2); + + // If this is an 8/16/32-bit value, it is really passed promoted to 64 + // bits. Insert an assert[sz]ext to capture this, then truncate to the + // right size. + if (VA.getLocInfo() == CCValAssign::SExt) + RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue, + DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::ZExt) + RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue, + DAG.getValueType(VA.getValVT())); + + if (VA.getLocInfo() != CCValAssign::Full) + RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue); + + InVals.push_back(RetValue); + } + + return Chain; +} + + +SDValue +SystemZTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + + // CCValAssign - represent the assignment of the return value to a location + SmallVector RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + SDValue ResValue = Outs[i].Val; + assert(VA.isRegLoc() && "Can only return in registers!"); + + // If this is an 8/16/32-bit value, it is really should be passed promoted + // to 64 bits. + if (VA.getLocInfo() == CCValAssign::SExt) + ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue); + else if (VA.getLocInfo() == CCValAssign::ZExt) + ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue); + else if (VA.getLocInfo() == CCValAssign::AExt) + ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue); + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad. + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + + // Return Void + return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain); +} + +SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue &SystemZCC, + SelectionDAG &DAG) { + // FIXME: Emit a test if RHS is zero + + bool isUnsigned = false; + SystemZCC::CondCodes TCC; + switch (CC) { + default: + llvm_unreachable("Invalid integer condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: + TCC = SystemZCC::E; + break; + case ISD::SETUEQ: + TCC = SystemZCC::NLH; + break; + case ISD::SETNE: + case ISD::SETONE: + TCC = SystemZCC::NE; + break; + case ISD::SETUNE: + TCC = SystemZCC::LH; + break; + case ISD::SETO: + TCC = SystemZCC::O; + break; + case ISD::SETUO: + TCC = SystemZCC::NO; + break; + case ISD::SETULE: + if (LHS.getValueType().isFloatingPoint()) { + TCC = SystemZCC::NH; + break; + } + isUnsigned = true; // FALLTHROUGH + case ISD::SETLE: + case ISD::SETOLE: + TCC = SystemZCC::LE; + break; + case ISD::SETUGE: + if (LHS.getValueType().isFloatingPoint()) { + TCC = SystemZCC::NL; + break; + } + isUnsigned = true; // FALLTHROUGH + case ISD::SETGE: + case ISD::SETOGE: + TCC = SystemZCC::HE; + break; + case ISD::SETUGT: + if (LHS.getValueType().isFloatingPoint()) { + TCC = SystemZCC::NLE; + break; + } + isUnsigned = true; // FALLTHROUGH + case ISD::SETGT: + case ISD::SETOGT: + TCC = SystemZCC::H; + break; + case ISD::SETULT: + if (LHS.getValueType().isFloatingPoint()) { + TCC = SystemZCC::NHE; + break; + } + isUnsigned = true; // FALLTHROUGH + case ISD::SETLT: + case ISD::SETOLT: + TCC = SystemZCC::L; + break; + } + + SystemZCC = DAG.getConstant(TCC, MVT::i32); + + DebugLoc dl = LHS.getDebugLoc(); + return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP), + dl, MVT::Flag, LHS, RHS); +} + + +SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + SDValue SystemZCC; + SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG); + return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, SystemZCC, Flag); +} + +SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue TrueV = Op.getOperand(2); + SDValue FalseV = Op.getOperand(3); + ISD::CondCode CC = cast(Op.getOperand(4))->get(); + DebugLoc dl = Op.getDebugLoc(); + + SDValue SystemZCC; + SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); + SmallVector Ops; + Ops.push_back(TrueV); + Ops.push_back(FalseV); + Ops.push_back(SystemZCC); + Ops.push_back(Flag); + + return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size()); +} + +SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + GlobalValue *GV = cast(Op)->getGlobal(); + int64_t Offset = cast(Op)->getOffset(); + + bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; + bool ExtraLoadRequired = + Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false); + + SDValue Result; + if (!IsPic && !ExtraLoadRequired) { + Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Offset = 0; + } else { + unsigned char OpFlags = 0; + if (ExtraLoadRequired) + OpFlags = SystemZII::MO_GOTENT; + + Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + } + + Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl, + getPointerTy(), Result); + + if (ExtraLoadRequired) + Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, + PseudoSourceValue::getGOT(), 0); + + // If there was a non-zero offset that we didn't fold, create an explicit + // addition for it. + if (Offset != 0) + Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result, + DAG.getConstant(Offset, getPointerTy())); + + return Result; +} + +// FIXME: PIC here +SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + JumpTableSDNode *JT = cast(Op); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); + + return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result); +} + + +// FIXME: PIC here +// FIXME: This is just dirty hack. We need to lower cpool properly +SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); + ConstantPoolSDNode *CP = cast(Op); + + SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), + CP->getAlignment(), + CP->getOffset()); + + return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result); +} + +const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + case SystemZISD::RET_FLAG: return "SystemZISD::RET_FLAG"; + case SystemZISD::CALL: return "SystemZISD::CALL"; + case SystemZISD::BRCOND: return "SystemZISD::BRCOND"; + case SystemZISD::CMP: return "SystemZISD::CMP"; + case SystemZISD::UCMP: return "SystemZISD::UCMP"; + case SystemZISD::SELECT: return "SystemZISD::SELECT"; + case SystemZISD::PCRelativeWrapper: return "SystemZISD::PCRelativeWrapper"; + default: return NULL; + } +} + +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock* +SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap *EM) const { + const SystemZInstrInfo &TII = *TM.getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + assert((MI->getOpcode() == SystemZ::Select32 || + MI->getOpcode() == SystemZ::SelectF32 || + MI->getOpcode() == SystemZ::Select64 || + MI->getOpcode() == SystemZ::SelectF64) && + "Unexpected instr type to insert"); + + // To "insert" a SELECT instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg + // to set, the condition code register to branch on, the true/false values to + // select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator I = BB; + ++I; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // jCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); + SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm(); + BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); + F->insert(I, copy0MBB); + F->insert(I, copy1MBB); + // Inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + EM->insert(std::make_pair(*SI, copy1MBB)); + // Update machine-CFG edges by transferring all successors of the current + // block to the new block which will contain the Phi node for the select. + copy1MBB->transferSuccessors(BB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(copy1MBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to copy1MBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(copy1MBB); + + // copy1MBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = copy1MBB; + BuildMI(BB, dl, TII.get(SystemZ::PHI), + MI->getOperand(0).getReg()) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h new file mode 100644 index 0000000000000..c2c24bc1f3abd --- /dev/null +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -0,0 +1,141 @@ +//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that SystemZ uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H +#define LLVM_TARGET_SystemZ_ISELLOWERING_H + +#include "SystemZ.h" +#include "SystemZRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + namespace SystemZISD { + enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + /// Return with a flag operand. Operand 0 is the chain operand. + RET_FLAG, + + /// CALL - These operations represent an abstract call + /// instruction, which includes a bunch of information. + CALL, + + /// PCRelativeWrapper - PC relative address + PCRelativeWrapper, + + /// CMP, UCMP - Compare instruction + CMP, + UCMP, + + /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is + /// the block to branch if condition is true, operand 2 is condition code + /// and operand 3 is the flag operand produced by a CMP instruction. + BRCOND, + + /// SELECT - Operands 0 and 1 are selection variables, operand 2 is + /// condition code and operand 3 is the flag operand. + SELECT + }; + } + + class SystemZSubtarget; + class SystemZTargetMachine; + + class SystemZTargetLowering : public TargetLowering { + public: + explicit SystemZTargetLowering(SystemZTargetMachine &TM); + + /// LowerOperation - Provide custom lowering hooks for some operations. + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + + /// getTargetNodeName - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const { + return 1; + } + + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + TargetLowering::ConstraintType + getConstraintType(const std::string &Constraint) const; + + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + + SDValue EmitCmp(SDValue LHS, SDValue RHS, + ISD::CondCode CC, SDValue &SystemZCC, + SelectionDAG &DAG); + + + MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap *EM) const; + + private: + SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + SDValue LowerCCCArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals); + + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); + + const SystemZSubtarget &Subtarget; + const SystemZTargetMachine &TM; + const SystemZRegisterInfo *RegInfo; + }; +} // namespace llvm + +#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h new file mode 100644 index 0000000000000..b69d2f6ce9ff8 --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -0,0 +1,128 @@ +//===- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way. +// +// The BuildMem function may be used with the BuildMI function to add entire +// memory references in a single, typed, function call. +// +// For reference, the order of operands for memory references is: +// (Operand), Base, Displacement, Index. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZINSTRBUILDER_H +#define SYSTEMZINSTRBUILDER_H + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" + +namespace llvm { + +/// SystemZAddressMode - This struct holds a generalized full x86 address mode. +/// The base register can be a frame index, which will eventually be replaced +/// with R15 or R11 and Disp being offsetted accordingly. +struct SystemZAddressMode { + enum { + RegBase, + FrameIndexBase + } BaseType; + + union { + unsigned Reg; + int FrameIndex; + } Base; + + unsigned IndexReg; + int32_t Disp; + GlobalValue *GV; + + SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) { + Base.Reg = 0; + } +}; + +/// addDirectMem - This function is used to add a direct memory reference to the +/// current instruction -- that is, a dereference of an address in a register, +/// with no index or displacement. +/// +static inline const MachineInstrBuilder & +addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { + // Because memory references are always represented with 3 + // values, this adds: Reg, [0, NoReg] to the instruction. + return MIB.addReg(Reg).addImm(0).addReg(0); +} + +static inline const MachineInstrBuilder & +addOffset(const MachineInstrBuilder &MIB, int Offset) { + return MIB.addImm(Offset).addReg(0); +} + +/// addRegOffset - This function is used to add a memory reference of the form +/// [Reg + Offset], i.e., one with no or index, but with a +/// displacement. An example is: 10(%r15). +/// +static inline const MachineInstrBuilder & +addRegOffset(const MachineInstrBuilder &MIB, + unsigned Reg, bool isKill, int Offset) { + return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); +} + +/// addRegReg - This function is used to add a memory reference of the form: +/// [Reg + Reg]. +static inline const MachineInstrBuilder & +addRegReg(const MachineInstrBuilder &MIB, + unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) { + return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0) + .addReg(Reg2, getKillRegState(isKill2)); +} + +static inline const MachineInstrBuilder & +addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) { + if (AM.BaseType == SystemZAddressMode::RegBase) + MIB.addReg(AM.Base.Reg); + else if (AM.BaseType == SystemZAddressMode::FrameIndexBase) + MIB.addFrameIndex(AM.Base.FrameIndex); + else + assert(0); + + return MIB.addImm(AM.Disp).addReg(AM.IndexReg); +} + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +static inline const MachineInstrBuilder & +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { + MachineInstr *MI = MIB; + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const TargetInstrDesc &TID = MI->getDesc(); + unsigned Flags = 0; + if (TID.mayLoad()) + Flags |= MachineMemOperand::MOLoad; + if (TID.mayStore()) + Flags |= MachineMemOperand::MOStore; + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + Flags, Offset, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + return addOffset(MIB.addFrameIndex(FI), Offset) + .addMemOperand(MMO); +} + +} // End llvm namespace + +#endif diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td new file mode 100644 index 0000000000000..8a202d4523a58 --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -0,0 +1,340 @@ +//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the SystemZ (binary) floating point instructions in +// TableGen format. +// +//===----------------------------------------------------------------------===// + +// FIXME: multiclassify! + +//===----------------------------------------------------------------------===// +// FP Pattern fragments + +def fpimm0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +def fpimmneg0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-0.0); +}]>; + +let usesCustomDAGSchedInserter = 1 in { + def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc), + "# SelectF32 PSEUDO", + [(set FP32:$dst, + (SystemZselect FP32:$src1, FP32:$src2, imm:$cc))]>; + def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc), + "# SelectF64 PSEUDO", + [(set FP64:$dst, + (SystemZselect FP64:$src1, FP64:$src2, imm:$cc))]>; +} + +//===----------------------------------------------------------------------===// +// Move Instructions + +// Floating point constant loads. +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins), + "lzer\t{$dst}", + [(set FP32:$dst, fpimm0)]>; +def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins), + "lzdr\t{$dst}", + [(set FP64:$dst, fpimm0)]>; +} + +let neverHasSideEffects = 1 in { +def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), + "ler\t{$dst, $src}", + []>; +def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), + "ldr\t{$dst, $src}", + []>; +} + +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src), + "le\t{$dst, $src}", + [(set FP32:$dst, (load rriaddr12:$src))]>; +def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src), + "ley\t{$dst, $src}", + [(set FP32:$dst, (load rriaddr:$src))]>; +def FMOV64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src), + "ld\t{$dst, $src}", + [(set FP64:$dst, (load rriaddr12:$src))]>; +def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src), + "ldy\t{$dst, $src}", + [(set FP64:$dst, (load rriaddr:$src))]>; +} + +def FMOV32mr : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src), + "ste\t{$src, $dst}", + [(store FP32:$src, rriaddr12:$dst)]>; +def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src), + "stey\t{$src, $dst}", + [(store FP32:$src, rriaddr:$dst)]>; +def FMOV64mr : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src), + "std\t{$src, $dst}", + [(store FP64:$src, rriaddr12:$dst)]>; +def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src), + "stdy\t{$src, $dst}", + [(store FP64:$src, rriaddr:$dst)]>; + +def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), + "cpsdr\t{$dst, $src2, $src1}", + [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>; +def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), + "cpsdr\t{$dst, $src2, $src1}", + [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>; + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions + + +let Defs = [PSW] in { +def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), + "lcebr\t{$dst, $src}", + [(set FP32:$dst, (fneg FP32:$src)), + (implicit PSW)]>; +def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), + "lcdbr\t{$dst, $src}", + [(set FP64:$dst, (fneg FP64:$src)), + (implicit PSW)]>; + +def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), + "lpebr\t{$dst, $src}", + [(set FP32:$dst, (fabs FP32:$src)), + (implicit PSW)]>; +def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), + "lpdbr\t{$dst, $src}", + [(set FP64:$dst, (fabs FP64:$src)), + (implicit PSW)]>; + +def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), + "lnebr\t{$dst, $src}", + [(set FP32:$dst, (fneg(fabs FP32:$src))), + (implicit PSW)]>; +def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), + "lndbr\t{$dst, $src}", + [(set FP64:$dst, (fneg(fabs FP64:$src))), + (implicit PSW)]>; +} + +let isTwoAddress = 1 in { +let Defs = [PSW] in { +let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y +def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), + "aebr\t{$dst, $src2}", + [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)), + (implicit PSW)]>; +def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), + "adbr\t{$dst, $src2}", + [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)), + (implicit PSW)]>; +} + +def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2), + "aeb\t{$dst, $src2}", + [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; +def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), + "adb\t{$dst, $src2}", + [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; + +def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), + "sebr\t{$dst, $src2}", + [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)), + (implicit PSW)]>; +def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), + "sdbr\t{$dst, $src2}", + [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)), + (implicit PSW)]>; + +def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2), + "seb\t{$dst, $src2}", + [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; +def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), + "sdb\t{$dst, $src2}", + [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; +} // Defs = [PSW] + +let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y +def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), + "meebr\t{$dst, $src2}", + [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>; +def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), + "mdbr\t{$dst, $src2}", + [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>; +} + +def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2), + "meeb\t{$dst, $src2}", + [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>; +def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), + "mdb\t{$dst, $src2}", + [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>; + +def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3), + "maebr\t{$dst, $src3, $src2}", + [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3), + FP32:$src1))]>; +def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3), + "maeb\t{$dst, $src3, $src2}", + [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2), + FP32:$src3), + FP32:$src1))]>; + +def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3), + "madbr\t{$dst, $src3, $src2}", + [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3), + FP64:$src1))]>; +def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3), + "madb\t{$dst, $src3, $src2}", + [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2), + FP64:$src3), + FP64:$src1))]>; + +def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3), + "msebr\t{$dst, $src3, $src2}", + [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3), + FP32:$src1))]>; +def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3), + "mseb\t{$dst, $src3, $src2}", + [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2), + FP32:$src3), + FP32:$src1))]>; + +def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3), + "msdbr\t{$dst, $src3, $src2}", + [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3), + FP64:$src1))]>; +def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3), + "msdb\t{$dst, $src3, $src2}", + [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2), + FP64:$src3), + FP64:$src1))]>; + +def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), + "debr\t{$dst, $src2}", + [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>; +def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), + "ddbr\t{$dst, $src2}", + [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>; + +def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2), + "deb\t{$dst, $src2}", + [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>; +def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), + "ddb\t{$dst, $src2}", + [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>; + +} // isTwoAddress = 1 + +def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), + "sqebr\t{$dst, $src}", + [(set FP32:$dst, (fsqrt FP32:$src))]>; +def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), + "sqdbr\t{$dst, $src}", + [(set FP64:$dst, (fsqrt FP64:$src))]>; + +def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src), + "sqeb\t{$dst, $src}", + [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>; +def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src), + "sqdb\t{$dst, $src}", + [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>; + +def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src), + "ledbr\t{$dst, $src}", + [(set FP32:$dst, (fround FP64:$src))]>; + +def FEXT32r64 : Pseudo<(outs FP64:$dst), (ins FP32:$src), + "ldebr\t{$dst, $src}", + [(set FP64:$dst, (fextend FP32:$src))]>; +def FEXT32m64 : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src), + "ldeb\t{$dst, $src}", + [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>; + +let Defs = [PSW] in { +def FCONVFP32 : Pseudo<(outs FP32:$dst), (ins GR32:$src), + "cefbr\t{$dst, $src}", + [(set FP32:$dst, (sint_to_fp GR32:$src)), + (implicit PSW)]>; +def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src), + "cegbr\t{$dst, $src}", + [(set FP32:$dst, (sint_to_fp GR64:$src)), + (implicit PSW)]>; + +def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src), + "cdfbr\t{$dst, $src}", + [(set FP64:$dst, (sint_to_fp GR32:$src)), + (implicit PSW)]>; +def FCONVFP64 : Pseudo<(outs FP64:$dst), (ins GR64:$src), + "cdgbr\t{$dst, $src}", + [(set FP64:$dst, (sint_to_fp GR64:$src)), + (implicit PSW)]>; + +def FCONVGR32 : Pseudo<(outs GR32:$dst), (ins FP32:$src), + "cfebr\t{$dst, 5, $src}", + [(set GR32:$dst, (fp_to_sint FP32:$src)), + (implicit PSW)]>; +def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src), + "cfdbr\t{$dst, 5, $src}", + [(set GR32:$dst, (fp_to_sint FP64:$src)), + (implicit PSW)]>; + +def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src), + "cgebr\t{$dst, 5, $src}", + [(set GR64:$dst, (fp_to_sint FP32:$src)), + (implicit PSW)]>; +def FCONVGR64 : Pseudo<(outs GR64:$dst), (ins FP64:$src), + "cgdbr\t{$dst, 5, $src}", + [(set GR64:$dst, (fp_to_sint FP64:$src)), + (implicit PSW)]>; +} // Defs = [PSW] + +def FBCONVG64 : Pseudo<(outs GR64:$dst), (ins FP64:$src), + "lgdr\t{$dst, $src}", + [(set GR64:$dst, (bitconvert FP64:$src))]>; +def FBCONVF64 : Pseudo<(outs FP64:$dst), (ins GR64:$src), + "ldgr\t{$dst, $src}", + [(set FP64:$dst, (bitconvert GR64:$src))]>; + +//===----------------------------------------------------------------------===// +// Test instructions (like AND but do not produce any result) + +// Integer comparisons +let Defs = [PSW] in { +def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2), + "cebr\t$src1, $src2", + [(SystemZcmp FP32:$src1, FP32:$src2), (implicit PSW)]>; +def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2), + "cdbr\t$src1, $src2", + [(SystemZcmp FP64:$src1, FP64:$src2), (implicit PSW)]>; + +def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2), + "ceb\t$src1, $src2", + [(SystemZcmp FP32:$src1, (load rriaddr12:$src2)), + (implicit PSW)]>; +def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2), + "cdb\t$src1, $src2", + [(SystemZcmp FP64:$src1, (load rriaddr12:$src2)), + (implicit PSW)]>; +} // Defs = [PSW] + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// Floating point constant -0.0 +def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>; +def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>; diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td new file mode 100644 index 0000000000000..b4a8993c19717 --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -0,0 +1,133 @@ +//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<5> Value = val; +} + +def Pseudo : Format<0>; +def EForm : Format<1>; +def IForm : Format<2>; +def RIForm : Format<3>; +def RIEForm : Format<4>; +def RILForm : Format<5>; +def RISForm : Format<6>; +def RRForm : Format<7>; +def RREForm : Format<8>; +def RRFForm : Format<9>; +def RRRForm : Format<10>; +def RRSForm : Format<11>; +def RSForm : Format<12>; +def RSIForm : Format<13>; +def RSILForm : Format<14>; +def RSYForm : Format<15>; +def RXForm : Format<16>; +def RXEForm : Format<17>; +def RXFForm : Format<18>; +def RXYForm : Format<19>; +def SForm : Format<20>; +def SIForm : Format<21>; +def SILForm : Format<22>; +def SIYForm : Format<23>; +def SSForm : Format<24>; +def SSEForm : Format<25>; +def SSFForm : Format<26>; + +class InstSystemZ op, Format f, dag outs, dag ins> : Instruction { + let Namespace = "SystemZ"; + + bits<16> Opcode = op; + + Format Form = f; + bits<5> FormBits = Form.Value; + + dag OutOperandList = outs; + dag InOperandList = ins; +} + +class I8 op, Format f, dag outs, dag ins, string asmstr, + list pattern> + : InstSystemZ<0, f, outs, ins> { + let Opcode{0-7} = op; + let Opcode{8-15} = 0; + + let Pattern = pattern; + let AsmString = asmstr; +} + +class I12 op, Format f, dag outs, dag ins, string asmstr, + list pattern> + : InstSystemZ<0, f, outs, ins> { + let Opcode{0-11} = op; + let Opcode{12-15} = 0; + + let Pattern = pattern; + let AsmString = asmstr; +} + +class I16 op, Format f, dag outs, dag ins, string asmstr, + list pattern> + : InstSystemZ { + let Pattern = pattern; + let AsmString = asmstr; +} + +class RRI op, dag outs, dag ins, string asmstr, list pattern> + : I8; + +class RII op, dag outs, dag ins, string asmstr, list pattern> + : I12; + +class RILI op, dag outs, dag ins, string asmstr, list pattern> + : I12; + +class RREI op, dag outs, dag ins, string asmstr, list pattern> + : I16; + +class RXI op, dag outs, dag ins, string asmstr, list pattern> + : I8 { + let AddedComplexity = 1; +} + +class RXYI op, dag outs, dag ins, string asmstr, list pattern> + : I16; + +class RSI op, dag outs, dag ins, string asmstr, list pattern> + : I8 { + let AddedComplexity = 1; +} + +class RSYI op, dag outs, dag ins, string asmstr, list pattern> + : I16; + +class SII op, dag outs, dag ins, string asmstr, list pattern> + : I8 { + let AddedComplexity = 1; +} + +class SIYI op, dag outs, dag ins, string asmstr, list pattern> + : I16; + +class SILI op, dag outs, dag ins, string asmstr, list pattern> + : I16; + + +//===----------------------------------------------------------------------===// +// Pseudo instructions +//===----------------------------------------------------------------------===// + +class Pseudo pattern> + : InstSystemZ<0, Pseudo, outs, ins> { + + let Pattern = pattern; + let AsmString = asmstr; +} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp new file mode 100644 index 0000000000000..236711cc0bcc7 --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -0,0 +1,648 @@ +//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrBuilder.h" +#include "SystemZInstrInfo.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZTargetMachine.h" +#include "SystemZGenInstrInfo.inc" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" + +using namespace llvm; + +SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) + : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)), + RI(tm, *this), TM(tm) { + // Fill the spill offsets map + static const unsigned SpillOffsTab[][2] = { + { SystemZ::R2D, 0x10 }, + { SystemZ::R3D, 0x18 }, + { SystemZ::R4D, 0x20 }, + { SystemZ::R5D, 0x28 }, + { SystemZ::R6D, 0x30 }, + { SystemZ::R7D, 0x38 }, + { SystemZ::R8D, 0x40 }, + { SystemZ::R9D, 0x48 }, + { SystemZ::R10D, 0x50 }, + { SystemZ::R11D, 0x58 }, + { SystemZ::R12D, 0x60 }, + { SystemZ::R13D, 0x68 }, + { SystemZ::R14D, 0x70 }, + { SystemZ::R15D, 0x78 } + }; + + RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); + + for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i) + RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1]; +} + +/// isGVStub - Return true if the GV requires an extra load to get the +/// real address. +static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) { + return TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); +} + +void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + unsigned Opc = 0; + if (RC == &SystemZ::GR32RegClass || + RC == &SystemZ::ADDR32RegClass) + Opc = SystemZ::MOV32mr; + else if (RC == &SystemZ::GR64RegClass || + RC == &SystemZ::ADDR64RegClass) { + Opc = SystemZ::MOV64mr; + } else if (RC == &SystemZ::FP32RegClass) { + Opc = SystemZ::FMOV32mr; + } else if (RC == &SystemZ::FP64RegClass) { + Opc = SystemZ::FMOV64mr; + } else if (RC == &SystemZ::GR64PRegClass) { + Opc = SystemZ::MOV64Pmr; + } else if (RC == &SystemZ::GR128RegClass) { + Opc = SystemZ::MOV128mr; + } else + llvm_unreachable("Unsupported regclass to store"); + + addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) + .addReg(SrcReg, getKillRegState(isKill)); +} + +void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const{ + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + unsigned Opc = 0; + if (RC == &SystemZ::GR32RegClass || + RC == &SystemZ::ADDR32RegClass) + Opc = SystemZ::MOV32rm; + else if (RC == &SystemZ::GR64RegClass || + RC == &SystemZ::ADDR64RegClass) { + Opc = SystemZ::MOV64rm; + } else if (RC == &SystemZ::FP32RegClass) { + Opc = SystemZ::FMOV32rm; + } else if (RC == &SystemZ::FP64RegClass) { + Opc = SystemZ::FMOV64rm; + } else if (RC == &SystemZ::GR64PRegClass) { + Opc = SystemZ::MOV64Prm; + } else if (RC == &SystemZ::GR128RegClass) { + Opc = SystemZ::MOV128rm; + } else + llvm_unreachable("Unsupported regclass to load"); + + addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); +} + +bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const { + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (I != MBB.end()) DL = I->getDebugLoc(); + + // Determine if DstRC and SrcRC have a common superclass. + const TargetRegisterClass *CommonRC = DestRC; + if (DestRC == SrcRC) + /* Same regclass for source and dest */; + else if (CommonRC->hasSuperClass(SrcRC)) + CommonRC = SrcRC; + else if (!CommonRC->hasSubClass(SrcRC)) + CommonRC = 0; + + if (CommonRC) { + if (CommonRC == &SystemZ::GR64RegClass || + CommonRC == &SystemZ::ADDR64RegClass) { + BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg); + } else if (CommonRC == &SystemZ::GR32RegClass || + CommonRC == &SystemZ::ADDR32RegClass) { + BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg); + } else if (CommonRC == &SystemZ::GR64PRegClass) { + BuildMI(MBB, I, DL, get(SystemZ::MOV64rrP), DestReg).addReg(SrcReg); + } else if (CommonRC == &SystemZ::GR128RegClass) { + BuildMI(MBB, I, DL, get(SystemZ::MOV128rr), DestReg).addReg(SrcReg); + } else if (CommonRC == &SystemZ::FP32RegClass) { + BuildMI(MBB, I, DL, get(SystemZ::FMOV32rr), DestReg).addReg(SrcReg); + } else if (CommonRC == &SystemZ::FP64RegClass) { + BuildMI(MBB, I, DL, get(SystemZ::FMOV64rr), DestReg).addReg(SrcReg); + } else { + return false; + } + + return true; + } + + if ((SrcRC == &SystemZ::GR64RegClass && + DestRC == &SystemZ::ADDR64RegClass) || + (DestRC == &SystemZ::GR64RegClass && + SrcRC == &SystemZ::ADDR64RegClass)) { + BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg); + return true; + } else if ((SrcRC == &SystemZ::GR32RegClass && + DestRC == &SystemZ::ADDR32RegClass) || + (DestRC == &SystemZ::GR32RegClass && + SrcRC == &SystemZ::ADDR32RegClass)) { + BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg); + return true; + } + + return false; +} + +bool +SystemZInstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + switch (MI.getOpcode()) { + default: + return false; + case SystemZ::MOV32rr: + case SystemZ::MOV64rr: + case SystemZ::MOV64rrP: + case SystemZ::MOV128rr: + case SystemZ::FMOV32rr: + case SystemZ::FMOV64rr: + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && + MI.getOperand(1).isReg() && + "invalid register-register move instruction"); + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SrcSubIdx = MI.getOperand(1).getSubReg(); + DstSubIdx = MI.getOperand(0).getSubReg(); + return true; + } +} + +unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case SystemZ::MOV32rm: + case SystemZ::MOV32rmy: + case SystemZ::MOV64rm: + case SystemZ::MOVSX32rm8: + case SystemZ::MOVSX32rm16y: + case SystemZ::MOVSX64rm8: + case SystemZ::MOVSX64rm16: + case SystemZ::MOVSX64rm32: + case SystemZ::MOVZX32rm8: + case SystemZ::MOVZX32rm16: + case SystemZ::MOVZX64rm8: + case SystemZ::MOVZX64rm16: + case SystemZ::MOVZX64rm32: + case SystemZ::FMOV32rm: + case SystemZ::FMOV32rmy: + case SystemZ::FMOV64rm: + case SystemZ::FMOV64rmy: + case SystemZ::MOV64Prm: + case SystemZ::MOV64Prmy: + case SystemZ::MOV128rm: + if (MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && MI->getOperand(3).isReg() && + MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + return 0; +} + +unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case SystemZ::MOV32mr: + case SystemZ::MOV32mry: + case SystemZ::MOV64mr: + case SystemZ::MOV32m8r: + case SystemZ::MOV32m8ry: + case SystemZ::MOV32m16r: + case SystemZ::MOV32m16ry: + case SystemZ::MOV64m8r: + case SystemZ::MOV64m8ry: + case SystemZ::MOV64m16r: + case SystemZ::MOV64m16ry: + case SystemZ::MOV64m32r: + case SystemZ::MOV64m32ry: + case SystemZ::FMOV32mr: + case SystemZ::FMOV32mry: + case SystemZ::FMOV64mr: + case SystemZ::FMOV64mry: + case SystemZ::MOV64Pmr: + case SystemZ::MOV64Pmry: + case SystemZ::MOV128mr: + if (MI->getOperand(0).isFI() && + MI->getOperand(1).isImm() && MI->getOperand(2).isReg() && + MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) { + FrameIndex = MI->getOperand(0).getIndex(); + return MI->getOperand(3).getReg(); + } + break; + } + return 0; +} + +bool +SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + SystemZMachineFunctionInfo *MFI = MF.getInfo(); + unsigned CalleeFrameSize = 0; + + // Scan the callee-saved and find the bounds of register spill area. + unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RegClass = CSI[i].getRegClass(); + if (RegClass != &SystemZ::FP64RegClass) { + unsigned Offset = RegSpillOffsets[Reg]; + CalleeFrameSize += 8; + if (StartOffset > Offset) { + LowReg = Reg; StartOffset = Offset; + } + if (EndOffset < Offset) { + HighReg = Reg; EndOffset = RegSpillOffsets[Reg]; + } + } + } + + // Save information for epilogue inserter. + MFI->setCalleeSavedFrameSize(CalleeFrameSize); + MFI->setLowReg(LowReg); MFI->setHighReg(HighReg); + + // Save GPRs + if (StartOffset) { + // Build a store instruction. Use STORE MULTIPLE instruction if there are many + // registers to store, otherwise - just STORE. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, get((LowReg == HighReg ? + SystemZ::MOV64mr : SystemZ::MOV64mrm))); + + // Add store operands. + MIB.addReg(SystemZ::R15D).addImm(StartOffset); + if (LowReg == HighReg) + MIB.addReg(0); + MIB.addReg(LowReg, RegState::Kill); + if (LowReg != HighReg) + MIB.addReg(HighReg, RegState::Kill); + + // Do a second scan adding regs as being killed by instruction + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + if (Reg != LowReg && Reg != HighReg) + MIB.addReg(Reg, RegState::ImplicitKill); + } + } + + // Save FPRs + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RegClass = CSI[i].getRegClass(); + if (RegClass == &SystemZ::FP64RegClass) { + MBB.addLiveIn(Reg); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass); + } + } + + return true; +} + +bool +SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const { + if (CSI.empty()) + return false; + + DebugLoc DL = DebugLoc::getUnknownLoc(); + if (MI != MBB.end()) DL = MI->getDebugLoc(); + + MachineFunction &MF = *MBB.getParent(); + const TargetRegisterInfo *RegInfo= MF.getTarget().getRegisterInfo(); + SystemZMachineFunctionInfo *MFI = MF.getInfo(); + + // Restore FP registers + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RegClass = CSI[i].getRegClass(); + if (RegClass == &SystemZ::FP64RegClass) + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + } + + // Restore GP registers + unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg(); + unsigned StartOffset = RegSpillOffsets[LowReg]; + + if (StartOffset) { + // Build a load instruction. Use LOAD MULTIPLE instruction if there are many + // registers to load, otherwise - just LOAD. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, get((LowReg == HighReg ? + SystemZ::MOV64rm : SystemZ::MOV64rmm))); + // Add store operands. + MIB.addReg(LowReg, RegState::Define); + if (LowReg != HighReg) + MIB.addReg(HighReg, RegState::Define); + + MIB.addReg((RegInfo->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D)); + MIB.addImm(StartOffset); + if (LowReg == HighReg) + MIB.addReg(0); + + // Do a second scan adding regs as being defined by instruction + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (Reg != LowReg && Reg != HighReg) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + } + + return true; +} + +bool SystemZInstrInfo:: +ReverseBranchCondition(SmallVectorImpl &Cond) const { + assert(Cond.size() == 1 && "Invalid Xbranch condition!"); + + SystemZCC::CondCodes CC = static_cast(Cond[0].getImm()); + Cond[0].setImm(getOppositeCondition(CC)); + return false; +} + +bool SystemZInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{ + if (MBB.empty()) return false; + + switch (MBB.back().getOpcode()) { + case SystemZ::RET: // Return. + case SystemZ::JMP: // Uncond branch. + case SystemZ::JMPr: // Indirect branch. + return true; + default: return false; + } +} + +bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isTerminator()) return false; + + // Conditional branch is a special case. + if (TID.isBranch() && !TID.isBarrier()) + return true; + if (!TID.isPredicable()) + return true; + return !isPredicated(MI); +} + +bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + while (I != MBB.begin()) { + --I; + // Working from the bottom, when we see a non-terminator + // instruction, we're done. + if (!isUnpredicatedTerminator(I)) + break; + + // A terminator that isn't a branch can't easily be handled + // by this analysis. + if (!I->getDesc().isBranch()) + return true; + + // Handle unconditional branches. + if (I->getOpcode() == SystemZ::JMP) { + if (!AllowModify) { + TBB = I->getOperand(0).getMBB(); + continue; + } + + // If the block has any instructions after a JMP, delete them. + while (next(I) != MBB.end()) + next(I)->eraseFromParent(); + Cond.clear(); + FBB = 0; + + // Delete the JMP if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditinal destination. + TBB = I->getOperand(0).getMBB(); + continue; + } + + // Handle conditional branches. + SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode()); + if (BranchCode == SystemZCC::INVALID) + return true; // Can't handle indirect branch. + + // Working from the bottom, handle the first conditional branch. + if (Cond.empty()) { + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(MachineOperand::CreateImm(BranchCode)); + continue; + } + + // Handle subsequent conditional branches. Only handle the case where all + // conditional branches branch to the same destination. + assert(Cond.size() == 1); + assert(TBB); + + // Only handle the case where all conditional branches branch to + // the same destination. + if (TBB != I->getOperand(0).getMBB()) + return true; + + SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm(); + // If the conditions are the same, we can leave them alone. + if (OldBranchCode == BranchCode) + continue; + + return true; + } + + return false; +} + +unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->getOpcode() != SystemZ::JMP && + getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID) + break; + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +unsigned +SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const { + // FIXME: this should probably have a DebugLoc operand + DebugLoc dl = DebugLoc::getUnknownLoc(); + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 1 || Cond.size() == 0) && + "SystemZ branch conditions have one component!"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, dl, get(SystemZ::JMP)).addMBB(TBB); + return 1; + } + + // Conditional branch. + unsigned Count = 0; + SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm(); + BuildMI(&MBB, dl, getBrCond(CC)).addMBB(TBB); + ++Count; + + if (FBB) { + // Two-way Conditional branch. Insert the second branch. + BuildMI(&MBB, dl, get(SystemZ::JMP)).addMBB(FBB); + ++Count; + } + return Count; +} + +const TargetInstrDesc& +SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const { + switch (CC) { + default: + llvm_unreachable("Unknown condition code!"); + case SystemZCC::O: return get(SystemZ::JO); + case SystemZCC::H: return get(SystemZ::JH); + case SystemZCC::NLE: return get(SystemZ::JNLE); + case SystemZCC::L: return get(SystemZ::JL); + case SystemZCC::NHE: return get(SystemZ::JNHE); + case SystemZCC::LH: return get(SystemZ::JLH); + case SystemZCC::NE: return get(SystemZ::JNE); + case SystemZCC::E: return get(SystemZ::JE); + case SystemZCC::NLH: return get(SystemZ::JNLH); + case SystemZCC::HE: return get(SystemZ::JHE); + case SystemZCC::NL: return get(SystemZ::JNL); + case SystemZCC::LE: return get(SystemZ::JLE); + case SystemZCC::NH: return get(SystemZ::JNH); + case SystemZCC::NO: return get(SystemZ::JNO); + } +} + +SystemZCC::CondCodes +SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const { + switch (Opc) { + default: return SystemZCC::INVALID; + case SystemZ::JO: return SystemZCC::O; + case SystemZ::JH: return SystemZCC::H; + case SystemZ::JNLE: return SystemZCC::NLE; + case SystemZ::JL: return SystemZCC::L; + case SystemZ::JNHE: return SystemZCC::NHE; + case SystemZ::JLH: return SystemZCC::LH; + case SystemZ::JNE: return SystemZCC::NE; + case SystemZ::JE: return SystemZCC::E; + case SystemZ::JNLH: return SystemZCC::NLH; + case SystemZ::JHE: return SystemZCC::HE; + case SystemZ::JNL: return SystemZCC::NL; + case SystemZ::JLE: return SystemZCC::LE; + case SystemZ::JNH: return SystemZCC::NH; + case SystemZ::JNO: return SystemZCC::NO; + } +} + +SystemZCC::CondCodes +SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const { + switch (CC) { + default: + llvm_unreachable("Invalid condition!"); + case SystemZCC::O: return SystemZCC::NO; + case SystemZCC::H: return SystemZCC::NH; + case SystemZCC::NLE: return SystemZCC::LE; + case SystemZCC::L: return SystemZCC::NL; + case SystemZCC::NHE: return SystemZCC::HE; + case SystemZCC::LH: return SystemZCC::NLH; + case SystemZCC::NE: return SystemZCC::E; + case SystemZCC::E: return SystemZCC::NE; + case SystemZCC::NLH: return SystemZCC::LH; + case SystemZCC::HE: return SystemZCC::NHE; + case SystemZCC::NL: return SystemZCC::L; + case SystemZCC::LE: return SystemZCC::NLE; + case SystemZCC::NH: return SystemZCC::H; + case SystemZCC::NO: return SystemZCC::O; + } +} + +const TargetInstrDesc& +SystemZInstrInfo::getLongDispOpc(unsigned Opc) const { + switch (Opc) { + default: + llvm_unreachable("Don't have long disp version of this instruction"); + case SystemZ::MOV32mr: return get(SystemZ::MOV32mry); + case SystemZ::MOV32rm: return get(SystemZ::MOV32rmy); + case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y); + case SystemZ::MOV32m8r: return get(SystemZ::MOV32m8ry); + case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry); + case SystemZ::MOV64m8r: return get(SystemZ::MOV64m8ry); + case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry); + case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry); + case SystemZ::MOV8mi: return get(SystemZ::MOV8miy); + case SystemZ::MUL32rm: return get(SystemZ::MUL32rmy); + case SystemZ::CMP32rm: return get(SystemZ::CMP32rmy); + case SystemZ::UCMP32rm: return get(SystemZ::UCMP32rmy); + case SystemZ::FMOV32mr: return get(SystemZ::FMOV32mry); + case SystemZ::FMOV64mr: return get(SystemZ::FMOV64mry); + case SystemZ::FMOV32rm: return get(SystemZ::FMOV32rmy); + case SystemZ::FMOV64rm: return get(SystemZ::FMOV64rmy); + case SystemZ::MOV64Pmr: return get(SystemZ::MOV64Pmry); + case SystemZ::MOV64Prm: return get(SystemZ::MOV64Prmy); + } +} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h new file mode 100644 index 0000000000000..e16d704164e0e --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -0,0 +1,119 @@ +//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H +#define LLVM_TARGET_SYSTEMZINSTRINFO_H + +#include "SystemZ.h" +#include "SystemZRegisterInfo.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + +class SystemZTargetMachine; + +/// SystemZII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace SystemZII { + enum { + //===------------------------------------------------------------------===// + // SystemZ Specific MachineOperand flags. + + MO_NO_FLAG = 0, + + /// MO_GOTENT - On a symbol operand this indicates that the immediate is + /// the offset to the location of the symbol name from the base of the GOT. + /// + /// SYMBOL_LABEL @GOTENT + MO_GOTENT = 1, + + /// MO_PLT - On a symbol operand this indicates that the immediate is + /// offset to the PLT entry of symbol name from the current code location. + /// + /// SYMBOL_LABEL @PLT + MO_PLT = 2 + }; +} + +class SystemZInstrInfo : public TargetInstrInfoImpl { + const SystemZRegisterInfo RI; + SystemZTargetMachine &TM; + IndexedMap RegSpillOffsets; +public: + explicit SystemZInstrInfo(SystemZTargetMachine &TM); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; } + + bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SrcReg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC) const; + + bool isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC) const; + + virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI) const; + + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const; + SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const; + const TargetInstrDesc& getBrCond(SystemZCC::CondCodes CC) const; + const TargetInstrDesc& getLongDispOpc(unsigned Opc) const; + + const TargetInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const { + if (Offset < 0 || Offset >= 4096) + return getLongDispOpc(Opc); + else + return get(Opc); + } +}; + +} + +#endif diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td new file mode 100644 index 0000000000000..56d75ddfc0c70 --- /dev/null +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -0,0 +1,1155 @@ +//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the SystemZ instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// SystemZ Instruction Predicate Definitions. +def IsZ10 : Predicate<"Subtarget.isZ10()">; + +include "SystemZInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Type Constraints. +//===----------------------------------------------------------------------===// +class SDTCisI8 : SDTCisVT; +class SDTCisI16 : SDTCisVT; +class SDTCisI32 : SDTCisVT; +class SDTCisI64 : SDTCisVT; + +//===----------------------------------------------------------------------===// +// Type Profiles. +//===----------------------------------------------------------------------===// +def SDT_SystemZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>; +def SDT_SystemZCallSeqEnd : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>; +def SDT_CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_BrCond : SDTypeProfile<0, 2, + [SDTCisVT<0, OtherVT>, + SDTCisI8<1>]>; +def SDT_SelectCC : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, + SDTCisI8<3>]>; +def SDT_Address : SDTypeProfile<1, 1, + [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; + +//===----------------------------------------------------------------------===// +// SystemZ Specific Node Definitions. +//===----------------------------------------------------------------------===// +def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInFlag]>; +def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall, + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; +def SystemZcallseq_start : + SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart, + [SDNPHasChain, SDNPOutFlag]>; +def SystemZcallseq_end : + SDNode<"ISD::CALLSEQ_END", SDT_SystemZCallSeqEnd, + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def SystemZcmp : SDNode<"SystemZISD::CMP", SDT_CmpTest, [SDNPOutFlag]>; +def SystemZucmp : SDNode<"SystemZISD::UCMP", SDT_CmpTest, [SDNPOutFlag]>; +def SystemZbrcond : SDNode<"SystemZISD::BRCOND", SDT_BrCond, + [SDNPHasChain, SDNPInFlag]>; +def SystemZselect : SDNode<"SystemZISD::SELECT", SDT_SelectCC, [SDNPInFlag]>; +def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>; + + +include "SystemZOperands.td" + +//===----------------------------------------------------------------------===// +// Instruction list.. + +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt), + "#ADJCALLSTACKDOWN", + [(SystemZcallseq_start timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), + "#ADJCALLSTACKUP", + [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>; + +let usesCustomDAGSchedInserter = 1 in { + def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc), + "# Select32 PSEUDO", + [(set GR32:$dst, + (SystemZselect GR32:$src1, GR32:$src2, imm:$cc))]>; + def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc), + "# Select64 PSEUDO", + [(set GR64:$dst, + (SystemZselect GR64:$src1, GR64:$src2, imm:$cc))]>; +} + + +//===----------------------------------------------------------------------===// +// Control Flow Instructions... +// + +// FIXME: Provide proper encoding! +let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { + def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>; +} + +let isBranch = 1, isTerminator = 1 in { + let isBarrier = 1 in { + def JMP : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>; + + let isIndirectBranch = 1 in + def JMPr : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>; + } + + let Uses = [PSW] in { + def JO : Pseudo<(outs), (ins brtarget:$dst), + "jo\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O)]>; + def JH : Pseudo<(outs), (ins brtarget:$dst), + "jh\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H)]>; + def JNLE: Pseudo<(outs), (ins brtarget:$dst), + "jnle\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE)]>; + def JL : Pseudo<(outs), (ins brtarget:$dst), + "jl\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L)]>; + def JNHE: Pseudo<(outs), (ins brtarget:$dst), + "jnhe\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE)]>; + def JLH : Pseudo<(outs), (ins brtarget:$dst), + "jlh\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH)]>; + def JNE : Pseudo<(outs), (ins brtarget:$dst), + "jne\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE)]>; + def JE : Pseudo<(outs), (ins brtarget:$dst), + "je\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E)]>; + def JNLH: Pseudo<(outs), (ins brtarget:$dst), + "jnlh\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH)]>; + def JHE : Pseudo<(outs), (ins brtarget:$dst), + "jhe\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE)]>; + def JNL : Pseudo<(outs), (ins brtarget:$dst), + "jnl\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL)]>; + def JLE : Pseudo<(outs), (ins brtarget:$dst), + "jle\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE)]>; + def JNH : Pseudo<(outs), (ins brtarget:$dst), + "jnh\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH)]>; + def JNO : Pseudo<(outs), (ins brtarget:$dst), + "jno\t$dst", + [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO)]>; + } // Uses = [PSW] +} // isBranch = 1 + +//===----------------------------------------------------------------------===// +// Call Instructions... +// + +let isCall = 1 in + // All calls clobber the non-callee saved registers. Uses for argument + // registers are added manually. + let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, + F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in { + def CALLi : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops), + "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>; + def CALLr : Pseudo<(outs), (ins ADDR64:$dst, variable_ops), + "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>; + } + +//===----------------------------------------------------------------------===// +// Miscellaneous Instructions. +// + +let isReMaterializable = 1 in +// FIXME: Provide imm12 variant +// FIXME: Address should be halfword aligned... +def LA64r : RXI<0x47, + (outs GR64:$dst), (ins laaddr:$src), + "lay\t{$dst, $src}", + [(set GR64:$dst, laaddr:$src)]>; +def LA64rm : RXYI<0x71E3, + (outs GR64:$dst), (ins i64imm:$src), + "larl\t{$dst, $src}", + [(set GR64:$dst, + (SystemZpcrelwrapper tglobaladdr:$src))]>; + +let neverHasSideEffects = 1 in +def NOP : Pseudo<(outs), (ins), "# no-op", []>; + +//===----------------------------------------------------------------------===// +// Move Instructions + +let neverHasSideEffects = 1 in { +def MOV32rr : RRI<0x18, + (outs GR32:$dst), (ins GR32:$src), + "lr\t{$dst, $src}", + []>; +def MOV64rr : RREI<0xB904, + (outs GR64:$dst), (ins GR64:$src), + "lgr\t{$dst, $src}", + []>; +def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src), + "# MOV128 PSEUDO!\n" + "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n" + "\tlgr\t${dst:subreg_even}, ${src:subreg_even}", + []>; +def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), + "# MOV64P PSEUDO!\n" + "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n" + "\tlr\t${dst:subreg_even}, ${src:subreg_even}", + []>; +} + +def MOVSX64rr32 : RREI<0xB914, + (outs GR64:$dst), (ins GR32:$src), + "lgfr\t{$dst, $src}", + [(set GR64:$dst, (sext GR32:$src))]>; +def MOVZX64rr32 : RREI<0xB916, + (outs GR64:$dst), (ins GR32:$src), + "llgfr\t{$dst, $src}", + [(set GR64:$dst, (zext GR32:$src))]>; + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { +def MOV32ri16 : RII<0x8A7, + (outs GR32:$dst), (ins s16imm:$src), + "lhi\t{$dst, $src}", + [(set GR32:$dst, immSExt16:$src)]>; +def MOV64ri16 : RII<0x9A7, + (outs GR64:$dst), (ins s16imm64:$src), + "lghi\t{$dst, $src}", + [(set GR64:$dst, immSExt16:$src)]>; + +def MOV64rill16 : RII<0xFA5, + (outs GR64:$dst), (ins i64imm:$src), + "llill\t{$dst, $src}", + [(set GR64:$dst, i64ll16:$src)]>; +def MOV64rilh16 : RII<0xEA5, + (outs GR64:$dst), (ins i64imm:$src), + "llilh\t{$dst, $src}", + [(set GR64:$dst, i64lh16:$src)]>; +def MOV64rihl16 : RII<0xDA5, + (outs GR64:$dst), (ins i64imm:$src), + "llihl\t{$dst, $src}", + [(set GR64:$dst, i64hl16:$src)]>; +def MOV64rihh16 : RII<0xCA5, + (outs GR64:$dst), (ins i64imm:$src), + "llihh\t{$dst, $src}", + [(set GR64:$dst, i64hh16:$src)]>; + +def MOV64ri32 : RILI<0x1C0, + (outs GR64:$dst), (ins s32imm64:$src), + "lgfi\t{$dst, $src}", + [(set GR64:$dst, immSExt32:$src)]>; +def MOV64rilo32 : RILI<0xFC0, + (outs GR64:$dst), (ins i64imm:$src), + "llilf\t{$dst, $src}", + [(set GR64:$dst, i64lo32:$src)]>; +def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src), + "llihf\t{$dst, $src}", + [(set GR64:$dst, i64hi32:$src)]>; +} + +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { +def MOV32rm : RXI<0x58, + (outs GR32:$dst), (ins rriaddr12:$src), + "l\t{$dst, $src}", + [(set GR32:$dst, (load rriaddr12:$src))]>; +def MOV32rmy : RXYI<0x58E3, + (outs GR32:$dst), (ins rriaddr:$src), + "ly\t{$dst, $src}", + [(set GR32:$dst, (load rriaddr:$src))]>; +def MOV64rm : RXYI<0x04E3, + (outs GR64:$dst), (ins rriaddr:$src), + "lg\t{$dst, $src}", + [(set GR64:$dst, (load rriaddr:$src))]>; +def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src), + "# MOV64P PSEUDO!\n" + "\tl\t${dst:subreg_odd}, $src\n" + "\tl\t${dst:subreg_even}, 4+$src", + [(set GR64P:$dst, (load rriaddr12:$src))]>; +def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src), + "# MOV64P PSEUDO!\n" + "\tly\t${dst:subreg_odd}, $src\n" + "\tly\t${dst:subreg_even}, 4+$src", + [(set GR64P:$dst, (load rriaddr:$src))]>; +def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src), + "# MOV128 PSEUDO!\n" + "\tlg\t${dst:subreg_odd}, $src\n" + "\tlg\t${dst:subreg_even}, 8+$src", + [(set GR128:$dst, (load rriaddr:$src))]>; +} + +def MOV32mr : RXI<0x50, + (outs), (ins rriaddr12:$dst, GR32:$src), + "st\t{$src, $dst}", + [(store GR32:$src, rriaddr12:$dst)]>; +def MOV32mry : RXYI<0x50E3, + (outs), (ins rriaddr:$dst, GR32:$src), + "sty\t{$src, $dst}", + [(store GR32:$src, rriaddr:$dst)]>; +def MOV64mr : RXYI<0x24E3, + (outs), (ins rriaddr:$dst, GR64:$src), + "stg\t{$src, $dst}", + [(store GR64:$src, rriaddr:$dst)]>; +def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src), + "# MOV64P PSEUDO!\n" + "\tst\t${src:subreg_odd}, $dst\n" + "\tst\t${src:subreg_even}, 4+$dst", + [(store GR64P:$src, rriaddr12:$dst)]>; +def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src), + "# MOV64P PSEUDO!\n" + "\tsty\t${src:subreg_odd}, $dst\n" + "\tsty\t${src:subreg_even}, 4+$dst", + [(store GR64P:$src, rriaddr:$dst)]>; +def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src), + "# MOV128 PSEUDO!\n" + "\tstg\t${src:subreg_odd}, $dst\n" + "\tstg\t${src:subreg_even}, 8+$dst", + [(store GR128:$src, rriaddr:$dst)]>; + +def MOV8mi : SII<0x92, + (outs), (ins riaddr12:$dst, i32i8imm:$src), + "mvi\t{$dst, $src}", + [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>; +def MOV8miy : SIYI<0x52EB, + (outs), (ins riaddr:$dst, i32i8imm:$src), + "mviy\t{$dst, $src}", + [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>; + +let AddedComplexity = 2 in { +def MOV16mi : SILI<0xE544, + (outs), (ins riaddr12:$dst, s16imm:$src), + "mvhhi\t{$dst, $src}", + [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>, + Requires<[IsZ10]>; +def MOV32mi16 : SILI<0xE54C, + (outs), (ins riaddr12:$dst, s32imm:$src), + "mvhi\t{$dst, $src}", + [(store (i32 immSExt16:$src), riaddr12:$dst)]>, + Requires<[IsZ10]>; +def MOV64mi16 : SILI<0xE548, + (outs), (ins riaddr12:$dst, s32imm64:$src), + "mvghi\t{$dst, $src}", + [(store (i64 immSExt16:$src), riaddr12:$dst)]>, + Requires<[IsZ10]>; +} + +// sexts +def MOVSX32rr8 : RREI<0xB926, + (outs GR32:$dst), (ins GR32:$src), + "lbr\t{$dst, $src}", + [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>; +def MOVSX64rr8 : RREI<0xB906, + (outs GR64:$dst), (ins GR64:$src), + "lgbr\t{$dst, $src}", + [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>; +def MOVSX32rr16 : RREI<0xB927, + (outs GR32:$dst), (ins GR32:$src), + "lhr\t{$dst, $src}", + [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>; +def MOVSX64rr16 : RREI<0xB907, + (outs GR64:$dst), (ins GR64:$src), + "lghr\t{$dst, $src}", + [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>; + +// extloads +def MOVSX32rm8 : RXYI<0x76E3, + (outs GR32:$dst), (ins rriaddr:$src), + "lb\t{$dst, $src}", + [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>; +def MOVSX32rm16 : RXI<0x48, + (outs GR32:$dst), (ins rriaddr12:$src), + "lh\t{$dst, $src}", + [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>; +def MOVSX32rm16y : RXYI<0x78E3, + (outs GR32:$dst), (ins rriaddr:$src), + "lhy\t{$dst, $src}", + [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>; +def MOVSX64rm8 : RXYI<0x77E3, + (outs GR64:$dst), (ins rriaddr:$src), + "lgb\t{$dst, $src}", + [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>; +def MOVSX64rm16 : RXYI<0x15E3, + (outs GR64:$dst), (ins rriaddr:$src), + "lgh\t{$dst, $src}", + [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>; +def MOVSX64rm32 : RXYI<0x14E3, + (outs GR64:$dst), (ins rriaddr:$src), + "lgf\t{$dst, $src}", + [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>; + +def MOVZX32rm8 : RXYI<0x94E3, + (outs GR32:$dst), (ins rriaddr:$src), + "llc\t{$dst, $src}", + [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>; +def MOVZX32rm16 : RXYI<0x95E3, + (outs GR32:$dst), (ins rriaddr:$src), + "llh\t{$dst, $src}", + [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>; +def MOVZX64rm8 : RXYI<0x90E3, + (outs GR64:$dst), (ins rriaddr:$src), + "llgc\t{$dst, $src}", + [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>; +def MOVZX64rm16 : RXYI<0x91E3, + (outs GR64:$dst), (ins rriaddr:$src), + "llgh\t{$dst, $src}", + [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>; +def MOVZX64rm32 : RXYI<0x16E3, + (outs GR64:$dst), (ins rriaddr:$src), + "llgf\t{$dst, $src}", + [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>; + +// truncstores +def MOV32m8r : RXI<0x42, + (outs), (ins rriaddr12:$dst, GR32:$src), + "stc\t{$src, $dst}", + [(truncstorei8 GR32:$src, rriaddr12:$dst)]>; + +def MOV32m8ry : RXYI<0x72E3, + (outs), (ins rriaddr:$dst, GR32:$src), + "stcy\t{$src, $dst}", + [(truncstorei8 GR32:$src, rriaddr:$dst)]>; + +def MOV32m16r : RXI<0x40, + (outs), (ins rriaddr12:$dst, GR32:$src), + "sth\t{$src, $dst}", + [(truncstorei16 GR32:$src, rriaddr12:$dst)]>; + +def MOV32m16ry : RXYI<0x70E3, + (outs), (ins rriaddr:$dst, GR32:$src), + "sthy\t{$src, $dst}", + [(truncstorei16 GR32:$src, rriaddr:$dst)]>; + +def MOV64m8r : RXI<0x42, + (outs), (ins rriaddr12:$dst, GR64:$src), + "stc\t{$src, $dst}", + [(truncstorei8 GR64:$src, rriaddr12:$dst)]>; + +def MOV64m8ry : RXYI<0x72E3, + (outs), (ins rriaddr:$dst, GR64:$src), + "stcy\t{$src, $dst}", + [(truncstorei8 GR64:$src, rriaddr:$dst)]>; + +def MOV64m16r : RXI<0x40, + (outs), (ins rriaddr12:$dst, GR64:$src), + "sth\t{$src, $dst}", + [(truncstorei16 GR64:$src, rriaddr12:$dst)]>; + +def MOV64m16ry : RXYI<0x70E3, + (outs), (ins rriaddr:$dst, GR64:$src), + "sthy\t{$src, $dst}", + [(truncstorei16 GR64:$src, rriaddr:$dst)]>; + +def MOV64m32r : RXI<0x50, + (outs), (ins rriaddr12:$dst, GR64:$src), + "st\t{$src, $dst}", + [(truncstorei32 GR64:$src, rriaddr12:$dst)]>; + +def MOV64m32ry : RXYI<0x50E3, + (outs), (ins rriaddr:$dst, GR64:$src), + "sty\t{$src, $dst}", + [(truncstorei32 GR64:$src, rriaddr:$dst)]>; + +// multiple regs moves +// FIXME: should we use multiple arg nodes? +def MOV32mrm : RSYI<0x90EB, + (outs), (ins riaddr:$dst, GR32:$from, GR32:$to), + "stmy\t{$from, $to, $dst}", + []>; +def MOV64mrm : RSYI<0x24EB, + (outs), (ins riaddr:$dst, GR64:$from, GR64:$to), + "stmg\t{$from, $to, $dst}", + []>; +def MOV32rmm : RSYI<0x90EB, + (outs GR32:$from, GR32:$to), (ins riaddr:$dst), + "lmy\t{$from, $to, $dst}", + []>; +def MOV64rmm : RSYI<0x04EB, + (outs GR64:$from, GR64:$to), (ins riaddr:$dst), + "lmg\t{$from, $to, $dst}", + []>; + +let isReMaterializable = 1, isAsCheapAsAMove = 1, isTwoAddress = 1 in { +def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), + "lhi\t${dst:subreg_even}, 0", + []>; +def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src), + "lghi\t${dst:subreg_even}, 0", + []>; +} + +// Byte swaps +def BSWAP32rr : RREI<0xB91F, + (outs GR32:$dst), (ins GR32:$src), + "lrvr\t{$dst, $src}", + [(set GR32:$dst, (bswap GR32:$src))]>; +def BSWAP64rr : RREI<0xB90F, + (outs GR64:$dst), (ins GR64:$src), + "lrvgr\t{$dst, $src}", + [(set GR64:$dst, (bswap GR64:$src))]>; + +// FIXME: this is invalid pattern for big-endian +//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src), +// "lrvh\t{$dst, $src}", +// [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>; +def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src), + "lrv\t{$dst, $src}", + [(set GR32:$dst, (bswap (load rriaddr:$src)))]>; +def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src), + "lrvg\t{$dst, $src}", + [(set GR64:$dst, (bswap (load rriaddr:$src)))]>; + +//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src), +// "strvh\t{$src, $dst}", +// [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>; +def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src), + "strv\t{$src, $dst}", + [(store (bswap GR32:$src), rriaddr:$dst)]>; +def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src), + "strvg\t{$src, $dst}", + [(store (bswap GR64:$src), rriaddr:$dst)]>; + +//===----------------------------------------------------------------------===// +// Arithmetic Instructions + +let Defs = [PSW] in { +def NEG32rr : RRI<0x13, + (outs GR32:$dst), (ins GR32:$src), + "lcr\t{$dst, $src}", + [(set GR32:$dst, (ineg GR32:$src)), + (implicit PSW)]>; +def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src), + "lcgr\t{$dst, $src}", + [(set GR64:$dst, (ineg GR64:$src)), + (implicit PSW)]>; +def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src), + "lcgfr\t{$dst, $src}", + [(set GR64:$dst, (ineg (sext GR32:$src))), + (implicit PSW)]>; +} + +let isTwoAddress = 1 in { + +let Defs = [PSW] in { + +let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y +def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "ar\t{$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, GR32:$src2)), + (implicit PSW)]>; +def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "agr\t{$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, GR64:$src2)), + (implicit PSW)]>; +} + +def ADD32rm : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), + "a\t{$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; +def ADD32rmy : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), + "ay\t{$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; +def ADD64rm : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), + "ag\t{$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; + + +def ADD32ri16 : RII<0xA7A, + (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2), + "ahi\t{$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)), + (implicit PSW)]>; +def ADD32ri : RILI<0xC29, + (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2), + "afi\t{$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, imm:$src2)), + (implicit PSW)]>; +def ADD64ri16 : RILI<0xA7B, + (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2), + "aghi\t{$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)), + (implicit PSW)]>; +def ADD64ri32 : RILI<0xC28, + (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2), + "agfi\t{$dst, $src2}", + [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)), + (implicit PSW)]>; + +let isCommutable = 1 in { // X = ADC Y, Z == X = ADC Z, Y +def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "alr\t{$dst, $src2}", + [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>; +def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "algr\t{$dst, $src2}", + [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>; +} + +def ADC32ri : RILI<0xC2B, + (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2), + "alfi\t{$dst, $src2}", + [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>; +def ADC64ri32 : RILI<0xC2A, + (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2), + "algfi\t{$dst, $src2}", + [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>; + +let Uses = [PSW] in { +def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "alcr\t{$dst, $src2}", + [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)), + (implicit PSW)]>; +def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "alcgr\t{$dst, $src2}", + [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)), + (implicit PSW)]>; +} + +let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y +def AND32rr : RRI<0x14, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "nr\t{$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>; +def AND64rr : RREI<0xB980, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "ngr\t{$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>; +} + +def AND32rm : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), + "n\t{$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; +def AND32rmy : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), + "ny\t{$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; +def AND64rm : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), + "ng\t{$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; + +def AND32rill16 : RII<0xA57, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "nill\t{$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>; +def AND64rill16 : RII<0xA57, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "nill\t{$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>; + +def AND32rilh16 : RII<0xA56, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "nilh\t{$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>; +def AND64rilh16 : RII<0xA56, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "nilh\t{$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>; + +def AND64rihl16 : RII<0xA55, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "nihl\t{$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>; +def AND64rihh16 : RII<0xA54, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "nihh\t{$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>; + +def AND32ri : RILI<0xC0B, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "nilf\t{$dst, $src2}", + [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>; +def AND64rilo32 : RILI<0xC0B, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "nilf\t{$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>; +def AND64rihi32 : RILI<0xC0A, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "nihf\t{$dst, $src2}", + [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>; + +let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y +def OR32rr : RRI<0x16, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "or\t{$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>; +def OR64rr : RREI<0xB981, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "ogr\t{$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>; +} + +def OR32rm : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), + "o\t{$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; +def OR32rmy : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), + "oy\t{$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; +def OR64rm : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), + "og\t{$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; + + // FIXME: Provide proper encoding! +def OR32ri16 : RII<0xA5B, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "oill\t{$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>; +def OR32ri16h : RII<0xA5A, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "oilh\t{$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>; +def OR32ri : RILI<0xC0D, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "oilf\t{$dst, $src2}", + [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>; + +def OR64rill16 : RII<0xA5B, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "oill\t{$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>; +def OR64rilh16 : RII<0xA5A, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "oilh\t{$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>; +def OR64rihl16 : RII<0xA59, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "oihl\t{$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>; +def OR64rihh16 : RII<0xA58, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "oihh\t{$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>; + +def OR64rilo32 : RILI<0xC0D, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "oilf\t{$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>; +def OR64rihi32 : RILI<0xC0C, + (outs GR64:$dst), (ins GR64:$src1, i64imm:$src2), + "oihf\t{$dst, $src2}", + [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>; + +def SUB32rr : RRI<0x1B, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "sr\t{$dst, $src2}", + [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>; +def SUB64rr : RREI<0xB909, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "sgr\t{$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>; + +def SUB32rm : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), + "s\t{$dst, $src2}", + [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; +def SUB32rmy : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), + "sy\t{$dst, $src2}", + [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; +def SUB64rm : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), + "sg\t{$dst, $src2}", + [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; + +def SBC32rr : RRI<0x1F, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "slr\t{$dst, $src2}", + [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>; +def SBC64rr : RREI<0xB90B, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "slgr\t{$dst, $src2}", + [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>; + +def SBC32ri : RILI<0xC25, + (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2), + "sllfi\t{$dst, $src2}", + [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>; +def SBC64ri32 : RILI<0xC24, + (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2), + "slgfi\t{$dst, $src2}", + [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>; + +let Uses = [PSW] in { +def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "slbr\t{$dst, $src2}", + [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)), + (implicit PSW)]>; +def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "slbgr\t{$dst, $src2}", + [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)), + (implicit PSW)]>; +} + +let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y +def XOR32rr : RRI<0x17, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "xr\t{$dst, $src2}", + [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>; +def XOR64rr : RREI<0xB982, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "xgr\t{$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>; +} + +def XOR32rm : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), + "x\t{$dst, $src2}", + [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))), + (implicit PSW)]>; +def XOR32rmy : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), + "xy\t{$dst, $src2}", + [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; +def XOR64rm : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), + "xg\t{$dst, $src2}", + [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))), + (implicit PSW)]>; + +def XOR32ri : RILI<0xC07, + (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), + "xilf\t{$dst, $src2}", + [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>; + +} // Defs = [PSW] + +let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y +def MUL32rr : RREI<0xB252, + (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), + "msr\t{$dst, $src2}", + [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>; +def MUL64rr : RREI<0xB90C, + (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "msgr\t{$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>; +} + +def MUL64rrP : RRI<0x1C, + (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), + "mr\t{$dst, $src2}", + []>; +def UMUL64rrP : RREI<0xB996, + (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), + "mlr\t{$dst, $src2}", + []>; +def UMUL128rrP : RREI<0xB986, + (outs GR128:$dst), (ins GR128:$src1, GR64:$src2), + "mlgr\t{$dst, $src2}", + []>; + +def MUL32ri16 : RII<0xA7C, + (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2), + "mhi\t{$dst, $src2}", + [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>; +def MUL64ri16 : RII<0xA7D, + (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2), + "mghi\t{$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>; + +let AddedComplexity = 2 in { +def MUL32ri : RILI<0xC21, + (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2), + "msfi\t{$dst, $src2}", + [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>, + Requires<[IsZ10]>; +def MUL64ri32 : RILI<0xC20, + (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2), + "msgfi\t{$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>, + Requires<[IsZ10]>; +} + +def MUL32rm : RXI<0x71, + (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), + "ms\t{$dst, $src2}", + [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>; +def MUL32rmy : RXYI<0xE351, + (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), + "msy\t{$dst, $src2}", + [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>; +def MUL64rm : RXYI<0xE30C, + (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), + "msg\t{$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>; + +def MULSX64rr32 : RREI<0xB91C, + (outs GR64:$dst), (ins GR64:$src1, GR32:$src2), + "msgfr\t{$dst, $src2}", + [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>; + +def SDIVREM32r : RREI<0xB91D, + (outs GR128:$dst), (ins GR128:$src1, GR32:$src2), + "dsgfr\t{$dst, $src2}", + []>; +def SDIVREM64r : RREI<0xB90D, + (outs GR128:$dst), (ins GR128:$src1, GR64:$src2), + "dsgr\t{$dst, $src2}", + []>; + +def UDIVREM32r : RREI<0xB997, + (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), + "dlr\t{$dst, $src2}", + []>; +def UDIVREM64r : RREI<0xB987, + (outs GR128:$dst), (ins GR128:$src1, GR64:$src2), + "dlgr\t{$dst, $src2}", + []>; +let mayLoad = 1 in { +def SDIVREM32m : RXYI<0xE31D, + (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2), + "dsgf\t{$dst, $src2}", + []>; +def SDIVREM64m : RXYI<0xE30D, + (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2), + "dsg\t{$dst, $src2}", + []>; + +def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2), + "dl\t{$dst, $src2}", + []>; +def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2), + "dlg\t{$dst, $src2}", + []>; +} // mayLoad +} // isTwoAddress = 1 + +//===----------------------------------------------------------------------===// +// Shifts + +let isTwoAddress = 1 in +def SRL32rri : RSI<0x88, + (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), + "srl\t{$src, $amt}", + [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>; +def SRL64rri : RSYI<0xEB0C, + (outs GR64:$dst), (ins GR64:$src, riaddr:$amt), + "srlg\t{$dst, $src, $amt}", + [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>; + +let isTwoAddress = 1 in +def SHL32rri : RSI<0x89, + (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), + "sll\t{$src, $amt}", + [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>; +def SHL64rri : RSYI<0xEB0D, + (outs GR64:$dst), (ins GR64:$src, riaddr:$amt), + "sllg\t{$dst, $src, $amt}", + [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>; + +let Defs = [PSW] in { +let isTwoAddress = 1 in +def SRA32rri : RSI<0x8A, + (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), + "sra\t{$src, $amt}", + [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)), + (implicit PSW)]>; + +def SRA64rri : RSYI<0xEB0A, + (outs GR64:$dst), (ins GR64:$src, riaddr:$amt), + "srag\t{$dst, $src, $amt}", + [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)), + (implicit PSW)]>; +} // Defs = [PSW] + +def ROTL32rri : RSYI<0xEB1D, + (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), + "rll\t{$dst, $src, $amt}", + [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>; +def ROTL64rri : RSYI<0xEB1C, + (outs GR64:$dst), (ins GR64:$src, riaddr:$amt), + "rllg\t{$dst, $src, $amt}", + [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>; + +//===----------------------------------------------------------------------===// +// Test instructions (like AND but do not produce any result) + +// Integer comparisons +let Defs = [PSW] in { +def CMP32rr : RRI<0x19, + (outs), (ins GR32:$src1, GR32:$src2), + "cr\t$src1, $src2", + [(SystemZcmp GR32:$src1, GR32:$src2), + (implicit PSW)]>; +def CMP64rr : RREI<0xB920, + (outs), (ins GR64:$src1, GR64:$src2), + "cgr\t$src1, $src2", + [(SystemZcmp GR64:$src1, GR64:$src2), + (implicit PSW)]>; + +def CMP32ri : RILI<0xC2D, + (outs), (ins GR32:$src1, s32imm:$src2), + "cfi\t$src1, $src2", + [(SystemZcmp GR32:$src1, imm:$src2), + (implicit PSW)]>; +def CMP64ri32 : RILI<0xC2C, + (outs), (ins GR64:$src1, s32imm64:$src2), + "cgfi\t$src1, $src2", + [(SystemZcmp GR64:$src1, i64immSExt32:$src2), + (implicit PSW)]>; + +def CMP32rm : RXI<0x59, + (outs), (ins GR32:$src1, rriaddr12:$src2), + "c\t$src1, $src2", + [(SystemZcmp GR32:$src1, (load rriaddr12:$src2)), + (implicit PSW)]>; +def CMP32rmy : RXYI<0xE359, + (outs), (ins GR32:$src1, rriaddr:$src2), + "cy\t$src1, $src2", + [(SystemZcmp GR32:$src1, (load rriaddr:$src2)), + (implicit PSW)]>; +def CMP64rm : RXYI<0xE320, + (outs), (ins GR64:$src1, rriaddr:$src2), + "cg\t$src1, $src2", + [(SystemZcmp GR64:$src1, (load rriaddr:$src2)), + (implicit PSW)]>; + +def UCMP32rr : RRI<0x15, + (outs), (ins GR32:$src1, GR32:$src2), + "clr\t$src1, $src2", + [(SystemZucmp GR32:$src1, GR32:$src2), + (implicit PSW)]>; +def UCMP64rr : RREI<0xB921, + (outs), (ins GR64:$src1, GR64:$src2), + "clgr\t$src1, $src2", + [(SystemZucmp GR64:$src1, GR64:$src2), + (implicit PSW)]>; + +def UCMP32ri : RILI<0xC2F, + (outs), (ins GR32:$src1, i32imm:$src2), + "clfi\t$src1, $src2", + [(SystemZucmp GR32:$src1, imm:$src2), + (implicit PSW)]>; +def UCMP64ri32 : RILI<0xC2E, + (outs), (ins GR64:$src1, i64i32imm:$src2), + "clgfi\t$src1, $src2", + [(SystemZucmp GR64:$src1, i64immZExt32:$src2), + (implicit PSW)]>; + +def UCMP32rm : RXI<0x55, + (outs), (ins GR32:$src1, rriaddr12:$src2), + "cl\t$src1, $src2", + [(SystemZucmp GR32:$src1, (load rriaddr12:$src2)), + (implicit PSW)]>; +def UCMP32rmy : RXYI<0xE355, + (outs), (ins GR32:$src1, rriaddr:$src2), + "cly\t$src1, $src2", + [(SystemZucmp GR32:$src1, (load rriaddr:$src2)), + (implicit PSW)]>; +def UCMP64rm : RXYI<0xE351, + (outs), (ins GR64:$src1, rriaddr:$src2), + "clg\t$src1, $src2", + [(SystemZucmp GR64:$src1, (load rriaddr:$src2)), + (implicit PSW)]>; + +def CMPSX64rr32 : RREI<0xB930, + (outs), (ins GR64:$src1, GR32:$src2), + "cgfr\t$src1, $src2", + [(SystemZucmp GR64:$src1, (sext GR32:$src2)), + (implicit PSW)]>; +def UCMPZX64rr32 : RREI<0xB931, + (outs), (ins GR64:$src1, GR32:$src2), + "clgfr\t$src1, $src2", + [(SystemZucmp GR64:$src1, (zext GR32:$src2)), + (implicit PSW)]>; + +def CMPSX64rm32 : RXYI<0xE330, + (outs), (ins GR64:$src1, rriaddr:$src2), + "cgf\t$src1, $src2", + [(SystemZucmp GR64:$src1, (sextloadi64i32 rriaddr:$src2)), + (implicit PSW)]>; +def UCMPZX64rm32 : RXYI<0xE331, + (outs), (ins GR64:$src1, rriaddr:$src2), + "clgf\t$src1, $src2", + [(SystemZucmp GR64:$src1, (zextloadi64i32 rriaddr:$src2)), + (implicit PSW)]>; + +// FIXME: Add other crazy ucmp forms + +} // Defs = [PSW] + +//===----------------------------------------------------------------------===// +// Other crazy stuff +let Defs = [PSW] in { +def FLOGR64 : RREI<0xB983, + (outs GR128:$dst), (ins GR64:$src), + "flogr\t{$dst, $src}", + []>; +} // Defs = [PSW] + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns. +//===----------------------------------------------------------------------===// + +// ConstPools, JumpTables +def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>; +def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>; + +// anyext +def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>; + +// calls +def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>; +def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>; + +//===----------------------------------------------------------------------===// +// Peepholes. +//===----------------------------------------------------------------------===// + +// FIXME: use add/sub tricks with 32678/-32768 + +// Arbitrary immediate support. +def : Pat<(i32 imm:$src), + (EXTRACT_SUBREG (MOV64ri32 (i64 imm:$src)), subreg_32bit)>; + +// Implement in terms of LLIHF/OILF. +def : Pat<(i64 imm:$imm), + (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>; + +// trunc patterns +def : Pat<(i32 (trunc GR64:$src)), + (EXTRACT_SUBREG GR64:$src, subreg_32bit)>; + +// sext_inreg patterns +def : Pat<(sext_inreg GR64:$src, i32), + (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; + +// extload patterns +def : Pat<(extloadi32i8 rriaddr:$src), (MOVZX32rm8 rriaddr:$src)>; +def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>; +def : Pat<(extloadi64i8 rriaddr:$src), (MOVZX64rm8 rriaddr:$src)>; +def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>; +def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>; + +// muls +def : Pat<(mulhs GR32:$src1, GR32:$src2), + (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + GR32:$src1, subreg_odd32), + GR32:$src2), + subreg_even32)>; + +def : Pat<(mulhu GR32:$src1, GR32:$src2), + (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + GR32:$src1, subreg_odd32), + GR32:$src2), + subreg_even32)>; +def : Pat<(mulhu GR64:$src1, GR64:$src2), + (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + GR64:$src1, subreg_odd), + GR64:$src2), + subreg_even)>; + +def : Pat<(ctlz GR64:$src), + (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>; diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp new file mode 100644 index 0000000000000..8ea11c95b27da --- /dev/null +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp @@ -0,0 +1,26 @@ +//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the SystemZMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCAsmInfo.h" +using namespace llvm; + +SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) { + AlignmentIsInBytes = true; + + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + PCSymbol = "."; + + NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits"; +} diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h new file mode 100644 index 0000000000000..3bebcb74e37c4 --- /dev/null +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h @@ -0,0 +1,29 @@ +//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SystemZMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SystemZTARGETASMINFO_H +#define SystemZTARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + struct SystemZMCAsmInfo : public MCAsmInfo { + explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h new file mode 100644 index 0000000000000..e47d41962ea89 --- /dev/null +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -0,0 +1,50 @@ +//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares SystemZ-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZMACHINEFUNCTIONINFO_H +#define SYSTEMZMACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and +/// contains private SystemZ target-specific information for each MachineFunction. +class SystemZMachineFunctionInfo : public MachineFunctionInfo { + /// CalleeSavedFrameSize - Size of the callee-saved register portion of the + /// stack frame in bytes. + unsigned CalleeSavedFrameSize; + + /// LowReg - Low register of range of callee-saved registers to store. + unsigned LowReg; + + /// HighReg - High register of range of callee-saved registers to store. + unsigned HighReg; +public: + SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {} + + SystemZMachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {} + + unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } + void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } + + unsigned getLowReg() const { return LowReg; } + void setLowReg(unsigned Reg) { LowReg = Reg; } + + unsigned getHighReg() const { return HighReg; } + void setHighReg(unsigned Reg) { HighReg = Reg; } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td new file mode 100644 index 0000000000000..156cace9c3746 --- /dev/null +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -0,0 +1,306 @@ +//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the various SystemZ instruction operands. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction Pattern Stuff. +//===----------------------------------------------------------------------===// + +// SystemZ specific condition code. These correspond to CondCode in +// SystemZ.h. They must be kept in synch. +def SYSTEMZ_COND_O : PatLeaf<(i8 0)>; +def SYSTEMZ_COND_H : PatLeaf<(i8 1)>; +def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>; +def SYSTEMZ_COND_L : PatLeaf<(i8 3)>; +def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>; +def SYSTEMZ_COND_LH : PatLeaf<(i8 5)>; +def SYSTEMZ_COND_NE : PatLeaf<(i8 6)>; +def SYSTEMZ_COND_E : PatLeaf<(i8 7)>; +def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>; +def SYSTEMZ_COND_HE : PatLeaf<(i8 9)>; +def SYSTEMZ_COND_NL : PatLeaf<(i8 10)>; +def SYSTEMZ_COND_LE : PatLeaf<(i8 11)>; +def SYSTEMZ_COND_NH : PatLeaf<(i8 12)>; +def SYSTEMZ_COND_NO : PatLeaf<(i8 13)>; + +def LO8 : SDNodeXFormgetZExtValue() & 0x00000000000000FFULL); +}]>; + +def LL16 : SDNodeXFormgetZExtValue() & 0x000000000000FFFFULL); +}]>; + +def LH16 : SDNodeXFormgetZExtValue() & 0x00000000FFFF0000ULL) >> 16); +}]>; + +def HL16 : SDNodeXFormgetZExtValue() & 0x0000FFFF00000000ULL) >> 32); +}]>; + +def HH16 : SDNodeXFormgetZExtValue() & 0xFFFF000000000000ULL) >> 48); +}]>; + +def LO32 : SDNodeXFormgetZExtValue() & 0x00000000FFFFFFFFULL); +}]>; + +def HI32 : SDNodeXFormgetZExtValue() >> 32); +}]>; + +def i32ll16 : PatLeaf<(i32 imm), [{ + // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16 + // bits set. + return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue()); +}], LL16>; + +def i32lh16 : PatLeaf<(i32 imm), [{ + // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set. + return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue()); +}], LH16>; + +def i32ll16c : PatLeaf<(i32 imm), [{ + // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set. + return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue()); +}], LL16>; + +def i32lh16c : PatLeaf<(i32 imm), [{ + // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16 + // bits set. + return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue()); +}], LH16>; + +def i64ll16 : PatLeaf<(i64 imm), [{ + // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16 + // bits set. + return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue()); +}], LL16>; + +def i64lh16 : PatLeaf<(i64 imm), [{ + // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set. + return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue()); +}], LH16>; + +def i64hl16 : PatLeaf<(i64 imm), [{ + // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set. + return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue()); +}], HL16>; + +def i64hh16 : PatLeaf<(i64 imm), [{ + // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set. + return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue()); +}], HH16>; + +def i64ll16c : PatLeaf<(i64 imm), [{ + // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16 + // bits set. + return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue()); +}], LL16>; + +def i64lh16c : PatLeaf<(i64 imm), [{ + // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set. + return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue()); +}], LH16>; + +def i64hl16c : PatLeaf<(i64 imm), [{ + // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set. + return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue()); +}], HL16>; + +def i64hh16c : PatLeaf<(i64 imm), [{ + // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set. + return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue()); +}], HH16>; + +def immSExt16 : PatLeaf<(imm), [{ + // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended + // field. + if (N->getValueType(0) == MVT::i64) { + uint64_t val = N->getZExtValue(); + return ((int64_t)val == (int16_t)val); + } else if (N->getValueType(0) == MVT::i32) { + uint32_t val = N->getZExtValue(); + return ((int32_t)val == (int16_t)val); + } + + return false; +}], LL16>; + +def immSExt32 : PatLeaf<(i64 imm), [{ + // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended + // field. + uint64_t val = N->getZExtValue(); + return ((int64_t)val == (int32_t)val); +}], LO32>; + +def i64lo32 : PatLeaf<(i64 imm), [{ + // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32 + // bits set. + return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue()); +}], LO32>; + +def i64hi32 : PatLeaf<(i64 imm), [{ + // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set. + return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue()); +}], HI32>; + +def i64lo32c : PatLeaf<(i64 imm), [{ + // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32 + // bits set. + return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue()); +}], LO32>; + +def i64hi32c : PatLeaf<(i64 imm), [{ + // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set. + return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue()); +}], HI32>; + +def i32immSExt8 : PatLeaf<(i32 imm), [{ + // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit + // sign extended field. + return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue(); +}], LO8>; + +def i32immSExt16 : PatLeaf<(i32 imm), [{ + // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit + // sign extended field. + return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue(); +}], LL16>; + +def i64immSExt32 : PatLeaf<(i64 imm), [{ + // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // sign extended field. + return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); +}], LO32>; + +def i64immZExt32 : PatLeaf<(i64 imm), [{ + // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit + // zero extended field. + return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); +}], LO32>; + +// extloads +def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>; +def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; +def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; +def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; +def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; + +def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; +def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>; +def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>; +def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>; +def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>; + +def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>; +def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>; +def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>; +def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>; +def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>; + +// A couple of more descriptive operand definitions. +// 32-bits but only 8 bits are significant. +def i32i8imm : Operand; +// 32-bits but only 16 bits are significant. +def i32i16imm : Operand; +// 64-bits but only 32 bits are significant. +def i64i32imm : Operand; +// Branch targets have OtherVT type. +def brtarget : Operand; + +// Unsigned i12 +def u12imm : Operand { + let PrintMethod = "printU12ImmOperand"; +} +def u12imm64 : Operand { + let PrintMethod = "printU12ImmOperand"; +} + +// Signed i16 +def s16imm : Operand { + let PrintMethod = "printS16ImmOperand"; +} +def s16imm64 : Operand { + let PrintMethod = "printS16ImmOperand"; +} +// Signed i20 +def s20imm : Operand { + let PrintMethod = "printS20ImmOperand"; +} +def s20imm64 : Operand { + let PrintMethod = "printS20ImmOperand"; +} +// Signed i32 +def s32imm : Operand { + let PrintMethod = "printS32ImmOperand"; +} +def s32imm64 : Operand { + let PrintMethod = "printS32ImmOperand"; +} + +def imm_pcrel : Operand { + let PrintMethod = "printPCRelImmOperand"; +} + +//===----------------------------------------------------------------------===// +// SystemZ Operand Definitions. +//===----------------------------------------------------------------------===// + +// Address operands + +// riaddr := reg + imm +def riaddr32 : Operand, + ComplexPattern { + let PrintMethod = "printRIAddrOperand"; + let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp); +} + +def riaddr12 : Operand, + ComplexPattern { + let PrintMethod = "printRIAddrOperand"; + let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp); +} + +def riaddr : Operand, + ComplexPattern { + let PrintMethod = "printRIAddrOperand"; + let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp); +} + +//===----------------------------------------------------------------------===// + +// rriaddr := reg + reg + imm +def rriaddr12 : Operand, + ComplexPattern { + let PrintMethod = "printRRIAddrOperand"; + let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index); +} +def rriaddr : Operand, + ComplexPattern { + let PrintMethod = "printRRIAddrOperand"; + let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index); +} +def laaddr : Operand, + ComplexPattern { + let PrintMethod = "printRRIAddrOperand"; + let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index); +} diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp new file mode 100644 index 0000000000000..38460a63712d6 --- /dev/null +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -0,0 +1,343 @@ +//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZMachineFunctionInfo.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/BitVector.h" +using namespace llvm; + +SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm, + const SystemZInstrInfo &tii) + : SystemZGenRegisterInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN), + TM(tm), TII(tii) { +} + +const unsigned* +SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const unsigned CalleeSavedRegs[] = { + SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D, + SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D, + SystemZ::R14D, SystemZ::R15D, + SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, + SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L, + 0 + }; + + return CalleeSavedRegs; +} + +const TargetRegisterClass* const* +SystemZRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { + &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, + &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, + &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, + &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, + &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, + &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, + &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, + &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, + &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, 0 + }; + return CalleeSavedRegClasses; +} + +BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + if (hasFP(MF)) + Reserved.set(SystemZ::R11D); + Reserved.set(SystemZ::R14D); + Reserved.set(SystemZ::R15D); + return Reserved; +} + +/// needsFP - Return true if the specified function should have a dedicated +/// frame pointer register. This is true if the function has variable sized +/// allocas or if frame pointer elimination is disabled. +bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return NoFramePointerElim || MFI->hasVarSizedObjects(); +} + +void SystemZRegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + MBB.erase(I); +} + +int SystemZRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + SystemZMachineFunctionInfo *SystemZMFI = + MF.getInfo(); + int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment(); + uint64_t StackSize = MFI->getStackSize(); + + // Fixed objects are really located in the "previous" frame. + if (FI < 0) + StackSize -= SystemZMFI->getCalleeSavedFrameSize(); + + Offset += StackSize - TFI.getOffsetOfLocalArea(); + + // Skip the register save area if we generated the stack frame. + if (StackSize || MFI->hasCalls()) + Offset -= TFI.getOffsetOfLocalArea(); + + return Offset; +} + +unsigned +SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unxpected"); + + unsigned i = 0; + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(i).getIndex(); + + unsigned BasePtr = (hasFP(MF) ? SystemZ::R11D : SystemZ::R15D); + + // This must be part of a rri or ri operand memory reference. Replace the + // FrameIndex with base register with BasePtr. Add an offset to the + // displacement field. + MI.getOperand(i).ChangeToRegister(BasePtr, false); + + // Offset is a either 12-bit unsigned or 20-bit signed integer. + // FIXME: handle "too long" displacements. + int Offset = getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm(); + + // Check whether displacement is too long to fit into 12 bit zext field. + MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); + + MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; +} + +void +SystemZRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + // Determine whether R15/R14 will ever be clobbered inside the function. And + // if yes - mark it as 'callee' saved. + MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Check whether high FPRs are ever used, if yes - we need to save R15 as + // well. + static const unsigned HighFPRs[] = { + SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, + SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, + }; + + bool HighFPRsUsed = false; + for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i) + HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]); + + if (FFI->hasCalls()) + /* FIXME: function is varargs */ + /* FIXME: function grabs RA */ + /* FIXME: function calls eh_return */ + MRI.setPhysRegUsed(SystemZ::R14D); + + if (HighFPRsUsed || + FFI->hasCalls() || + FFI->getObjectIndexEnd() != 0 || // Contains automatic variables + FFI->hasVarSizedObjects() // Function calls dynamic alloca's + /* FIXME: function is varargs */) + MRI.setPhysRegUsed(SystemZ::R15D); +} + +/// emitSPUpdate - Emit a series of instructions to increment / decrement the +/// stack pointer by a constant value. +static +void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + int64_t NumBytes, const TargetInstrInfo &TII) { + unsigned Opc; uint64_t Chunk; + bool isSub = NumBytes < 0; + uint64_t Offset = isSub ? -NumBytes : NumBytes; + + if (Offset >= (1LL << 15) - 1) { + Opc = SystemZ::ADD64ri32; + Chunk = (1LL << 31) - 1; + } else { + Opc = SystemZ::ADD64ri16; + Chunk = (1LL << 15) - 1; + } + + DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() : + DebugLoc::getUnknownLoc()); + + while (Offset) { + uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; + MachineInstr *MI = + BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D) + .addReg(SystemZ::R15D).addImm((isSub ? -(int64_t)ThisVal : ThisVal)); + // The PSW implicit def is dead. + MI->getOperand(3).setIsDead(); + Offset -= ThisVal; + } +} + +void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + SystemZMachineFunctionInfo *SystemZMFI = + MF.getInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() : + DebugLoc::getUnknownLoc()); + + // Get the number of bytes to allocate from the FrameInfo. + // Note that area for callee-saved stuff is already allocated, thus we need to + // 'undo' the stack movement. + uint64_t StackSize = MFI->getStackSize(); + StackSize -= SystemZMFI->getCalleeSavedFrameSize(); + + uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea(); + + // Skip the callee-saved push instructions. + while (MBBI != MBB.end() && + (MBBI->getOpcode() == SystemZ::MOV64mr || + MBBI->getOpcode() == SystemZ::MOV64mrm)) + ++MBBI; + + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + // adjust stack pointer: R15 -= numbytes + if (StackSize || MFI->hasCalls()) { + assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && + "Invalid stack frame calculation!"); + emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII); + } + + if (hasFP(MF)) { + // Update R11 with the new base value... + BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D) + .addReg(SystemZ::R15D); + + // Mark the FramePtr as live-in in every block except the entry. + for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); + I != E; ++I) + I->addLiveIn(SystemZ::R11D); + + } +} + +void SystemZRegisterInfo::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + MachineBasicBlock::iterator MBBI = prior(MBB.end()); + SystemZMachineFunctionInfo *SystemZMFI = + MF.getInfo(); + unsigned RetOpcode = MBBI->getOpcode(); + + switch (RetOpcode) { + case SystemZ::RET: break; // These are ok + default: + assert(0 && "Can only insert epilog into returning blocks"); + } + + // Get the number of bytes to allocate from the FrameInfo + // Note that area for callee-saved stuff is already allocated, thus we need to + // 'undo' the stack movement. + uint64_t StackSize = + MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize(); + uint64_t NumBytes = StackSize - TFI.getOffsetOfLocalArea(); + + // Skip the final terminator instruction. + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = prior(MBBI); + --MBBI; + if (!PI->getDesc().isTerminator()) + break; + } + + // During callee-saved restores emission stack frame was not yet finialized + // (and thus - the stack size was unknown). Tune the offset having full stack + // size in hands. + if (StackSize || MFI->hasCalls()) { + assert((MBBI->getOpcode() == SystemZ::MOV64rmm || + MBBI->getOpcode() == SystemZ::MOV64rm) && + "Expected to see callee-save register restore code"); + assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && + "Invalid stack frame calculation!"); + + unsigned i = 0; + MachineInstr &MI = *MBBI; + while (!MI.getOperand(i).isImm()) { + ++i; + assert(i < MI.getNumOperands() && "Unexpected restore code!"); + } + + uint64_t Offset = NumBytes + MI.getOperand(i).getImm(); + // If Offset does not fit into 20-bit signed displacement field we need to + // emit some additional code... + if (Offset > 524287) { + // Fold the displacement into load instruction as much as possible. + NumBytes = Offset - 524287; + Offset = 524287; + emitSPUpdate(MBB, MBBI, NumBytes, TII); + } + + MI.getOperand(i).ChangeToImmediate(Offset); + } +} + +unsigned SystemZRegisterInfo::getRARegister() const { + assert(0 && "What is the return address register"); + return 0; +} + +unsigned SystemZRegisterInfo::getFrameRegister(MachineFunction &MF) const { + assert(0 && "What is the frame register"); + return 0; +} + +unsigned SystemZRegisterInfo::getEHExceptionRegister() const { + assert(0 && "What is the exception register"); + return 0; +} + +unsigned SystemZRegisterInfo::getEHHandlerRegister() const { + assert(0 && "What is the exception handler register"); + return 0; +} + +int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + assert(0 && "What is the dwarf register number"); + return -1; +} + +#include "SystemZGenRegisterInfo.inc" diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h new file mode 100644 index 0000000000000..b22b05da401e5 --- /dev/null +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -0,0 +1,82 @@ +//===- SystemZRegisterInfo.h - SystemZ Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SystemZ implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SystemZREGISTERINFO_H +#define SystemZREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "SystemZGenRegisterInfo.h.inc" + +namespace llvm { + +namespace SystemZ { + /// SubregIndex - The index of various sized subregister classes. Note that + /// these indices must be kept in sync with the class indices in the + /// SystemZRegisterInfo.td file. + enum SubregIndex { + SUBREG_32BIT = 1, SUBREG_EVEN = 1, SUBREG_ODD = 2 + }; +} + +class SystemZSubtarget; +class SystemZInstrInfo; +class Type; + +struct SystemZRegisterInfo : public SystemZGenRegisterInfo { + SystemZTargetMachine &TM; + const SystemZInstrInfo &TII; + + SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii); + + /// Code Generation virtual methods... + const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector getReservedRegs(const MachineFunction &MF) const; + + bool hasReservedCallFrame(MachineFunction &MF) const { return true; } + bool hasFP(const MachineFunction &MF) const; + + int getFrameIndexOffset(MachineFunction &MF, int FI) const; + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; + + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; + + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + // Debug information queries. + unsigned getRARegister() const; + unsigned getFrameRegister(MachineFunction &MF) const; + + // Exception handling queries. + unsigned getEHExceptionRegister() const; + unsigned getEHHandlerRegister() const; + + int getDwarfRegNum(unsigned RegNum, bool isEH) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td new file mode 100644 index 0000000000000..8795847a6c3c6 --- /dev/null +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -0,0 +1,490 @@ +//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +class SystemZReg : Register { + let Namespace = "SystemZ"; +} + +class SystemZRegWithSubregs subregs> + : RegisterWithSubRegs { + let Namespace = "SystemZ"; +} + +// We identify all our registers with a 4-bit ID, for consistency's sake. + +// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers +class GPR32 num, string n> : SystemZReg { + field bits<4> Num = num; +} + +// GPR64 - One of the 16 64-bit general-purpose registers +class GPR64 num, string n, list subregs, + list aliases = []> + : SystemZRegWithSubregs { + field bits<4> Num = num; + let Aliases = aliases; +} + +// GPR128 - 8 even-odd register pairs +class GPR128 num, string n, list subregs, + list aliases = []> + : SystemZRegWithSubregs { + field bits<4> Num = num; + let Aliases = aliases; +} + +// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers +class FPRS num, string n> : SystemZReg { + field bits<4> Num = num; +} + +// FPRL - One of the 16 64-bit floating-point registers +class FPRL num, string n, list subregs> + : SystemZRegWithSubregs { + field bits<4> Num = num; +} + +// General-purpose registers +def R0W : GPR32< 0, "r0">, DwarfRegNum<[0]>; +def R1W : GPR32< 1, "r1">, DwarfRegNum<[1]>; +def R2W : GPR32< 2, "r2">, DwarfRegNum<[2]>; +def R3W : GPR32< 3, "r3">, DwarfRegNum<[3]>; +def R4W : GPR32< 4, "r4">, DwarfRegNum<[4]>; +def R5W : GPR32< 5, "r5">, DwarfRegNum<[5]>; +def R6W : GPR32< 6, "r6">, DwarfRegNum<[6]>; +def R7W : GPR32< 7, "r7">, DwarfRegNum<[7]>; +def R8W : GPR32< 8, "r8">, DwarfRegNum<[8]>; +def R9W : GPR32< 9, "r9">, DwarfRegNum<[9]>; +def R10W : GPR32<10, "r10">, DwarfRegNum<[10]>; +def R11W : GPR32<11, "r11">, DwarfRegNum<[11]>; +def R12W : GPR32<12, "r12">, DwarfRegNum<[12]>; +def R13W : GPR32<13, "r13">, DwarfRegNum<[13]>; +def R14W : GPR32<14, "r14">, DwarfRegNum<[14]>; +def R15W : GPR32<15, "r15">, DwarfRegNum<[15]>; + +def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>; +def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>; +def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>; +def R3D : GPR64< 3, "r3", [R3W]>, DwarfRegNum<[3]>; +def R4D : GPR64< 4, "r4", [R4W]>, DwarfRegNum<[4]>; +def R5D : GPR64< 5, "r5", [R5W]>, DwarfRegNum<[5]>; +def R6D : GPR64< 6, "r6", [R6W]>, DwarfRegNum<[6]>; +def R7D : GPR64< 7, "r7", [R7W]>, DwarfRegNum<[7]>; +def R8D : GPR64< 8, "r8", [R8W]>, DwarfRegNum<[8]>; +def R9D : GPR64< 9, "r9", [R9W]>, DwarfRegNum<[9]>; +def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>; +def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>; +def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>; +def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>; +def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>; +def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>; + +// Register pairs +def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>; +def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>; +def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>; +def R6P : GPR64< 6, "r6", [R6W, R7W], [R6D, R7D]>, DwarfRegNum<[6]>; +def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>, DwarfRegNum<[8]>; +def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>, DwarfRegNum<[10]>; +def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>, DwarfRegNum<[12]>; +def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>; + +def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>; +def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>; +def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>; +def R6Q : GPR128< 6, "r6", [R6D, R7D], [R6P]>, DwarfRegNum<[6]>; +def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>, DwarfRegNum<[8]>; +def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>, DwarfRegNum<[10]>; +def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>, DwarfRegNum<[12]>; +def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>, DwarfRegNum<[14]>; + +// Floating-point registers +def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>; +def F1S : FPRS< 1, "f1">, DwarfRegNum<[17]>; +def F2S : FPRS< 2, "f2">, DwarfRegNum<[18]>; +def F3S : FPRS< 3, "f3">, DwarfRegNum<[19]>; +def F4S : FPRS< 4, "f4">, DwarfRegNum<[20]>; +def F5S : FPRS< 5, "f5">, DwarfRegNum<[21]>; +def F6S : FPRS< 6, "f6">, DwarfRegNum<[22]>; +def F7S : FPRS< 7, "f7">, DwarfRegNum<[23]>; +def F8S : FPRS< 8, "f8">, DwarfRegNum<[24]>; +def F9S : FPRS< 9, "f9">, DwarfRegNum<[25]>; +def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>; +def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>; +def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>; +def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>; +def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>; +def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>; + +def F0L : FPRL< 0, "f0", [F0S]>, DwarfRegNum<[16]>; +def F1L : FPRL< 1, "f1", [F1S]>, DwarfRegNum<[17]>; +def F2L : FPRL< 2, "f2", [F2S]>, DwarfRegNum<[18]>; +def F3L : FPRL< 3, "f3", [F3S]>, DwarfRegNum<[19]>; +def F4L : FPRL< 4, "f4", [F4S]>, DwarfRegNum<[20]>; +def F5L : FPRL< 5, "f5", [F5S]>, DwarfRegNum<[21]>; +def F6L : FPRL< 6, "f6", [F6S]>, DwarfRegNum<[22]>; +def F7L : FPRL< 7, "f7", [F7S]>, DwarfRegNum<[23]>; +def F8L : FPRL< 8, "f8", [F8S]>, DwarfRegNum<[24]>; +def F9L : FPRL< 9, "f9", [F9S]>, DwarfRegNum<[25]>; +def F10L : FPRL<10, "f10", [F10S]>, DwarfRegNum<[26]>; +def F11L : FPRL<11, "f11", [F11S]>, DwarfRegNum<[27]>; +def F12L : FPRL<12, "f12", [F12S]>, DwarfRegNum<[28]>; +def F13L : FPRL<13, "f13", [F13S]>, DwarfRegNum<[29]>; +def F14L : FPRL<14, "f14", [F14S]>, DwarfRegNum<[30]>; +def F15L : FPRL<15, "f15", [F15S]>, DwarfRegNum<[31]>; + +// Status register +def PSW : SystemZReg<"psw">; + +def subreg_32bit : PatLeaf<(i32 1)>; +def subreg_even32 : PatLeaf<(i32 1)>; +def subreg_odd32 : PatLeaf<(i32 2)>; +def subreg_even : PatLeaf<(i32 3)>; +def subreg_odd : PatLeaf<(i32 4)>; + +def : SubRegSet<1, [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, + R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D], + [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, + R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>; + +def : SubRegSet<3, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], + [R0D, R2D, R4D, R6D, R8D, R10D, R12D, R14D]>; + +def : SubRegSet<4, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], + [R1D, R3D, R5D, R7D, R9D, R11D, R13D, R15D]>; + +def : SubRegSet<1, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P], + [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>; + +def : SubRegSet<2, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P], + [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>; + +def : SubRegSet<1, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], + [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>; + +def : SubRegSet<2, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q], + [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>; + +/// Register classes +def GR32 : RegisterClass<"SystemZ", [i32], 32, + // Volatile registers + [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W, + // Frame pointer, sometimes allocable + R11W, + // Volatile, but not allocable + R14W, R15W]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + static const unsigned SystemZ_REG32[] = { + SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W, + SystemZ::R5W, SystemZ::R0W, SystemZ::R12W, SystemZ::R11W, + SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W, + SystemZ::R6W, SystemZ::R14W, SystemZ::R13W + }; + static const unsigned SystemZ_REG32_nofp[] = { + SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W, + SystemZ::R5W, SystemZ::R0W, SystemZ::R12W, /* No R11W */ + SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W, + SystemZ::R6W, SystemZ::R14W, SystemZ::R13W + }; + GR32Class::iterator + GR32Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_REG32_nofp; + else + return SystemZ_REG32; + } + GR32Class::iterator + GR32Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned)); + else + return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned)); + } + }]; +} + +/// Registers used to generate address. Everything except R0. +def ADDR32 : RegisterClass<"SystemZ", [i32], 32, + // Volatile registers + [R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W, + // Frame pointer, sometimes allocable + R11W, + // Volatile, but not allocable + R14W, R15W]> +{ + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + static const unsigned SystemZ_ADDR32[] = { + SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W, + SystemZ::R5W, /* No R0W */ SystemZ::R12W, SystemZ::R11W, + SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W, + SystemZ::R6W, SystemZ::R14W, SystemZ::R13W + }; + static const unsigned SystemZ_ADDR32_nofp[] = { + SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W, + SystemZ::R5W, /* No R0W */ SystemZ::R12W, /* No R11W */ + SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W, + SystemZ::R6W, SystemZ::R14W, SystemZ::R13W + }; + ADDR32Class::iterator + ADDR32Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_ADDR32_nofp; + else + return SystemZ_ADDR32; + } + ADDR32Class::iterator + ADDR32Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned)); + else + return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned)); + } + }]; +} + +def GR64 : RegisterClass<"SystemZ", [i64], 64, + // Volatile registers + [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D, + // Frame pointer, sometimes allocable + R11D, + // Volatile, but not allocable + R14D, R15D]> +{ + let SubRegClassList = [GR32]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + static const unsigned SystemZ_REG64[] = { + SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, + SystemZ::R5D, SystemZ::R0D, SystemZ::R12D, SystemZ::R11D, + SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D, + SystemZ::R6D, SystemZ::R14D, SystemZ::R13D + }; + static const unsigned SystemZ_REG64_nofp[] = { + SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, + SystemZ::R5D, SystemZ::R0D, SystemZ::R12D, /* No R11D */ + SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D, + SystemZ::R6D, SystemZ::R14D, SystemZ::R13D + }; + GR64Class::iterator + GR64Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_REG64_nofp; + else + return SystemZ_REG64; + } + GR64Class::iterator + GR64Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned)); + else + return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned)); + } + }]; +} + +def ADDR64 : RegisterClass<"SystemZ", [i64], 64, + // Volatile registers + [R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D, + // Frame pointer, sometimes allocable + R11D, + // Volatile, but not allocable + R14D, R15D]> +{ + let SubRegClassList = [ADDR32]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + static const unsigned SystemZ_ADDR64[] = { + SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, + SystemZ::R5D, /* No R0D */ SystemZ::R12D, SystemZ::R11D, + SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D, + SystemZ::R6D, SystemZ::R14D, SystemZ::R13D + }; + static const unsigned SystemZ_ADDR64_nofp[] = { + SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, + SystemZ::R5D, /* No R0D */ SystemZ::R12D, /* No R11D */ + SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D, + SystemZ::R6D, SystemZ::R14D, SystemZ::R13D + }; + ADDR64Class::iterator + ADDR64Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_ADDR64_nofp; + else + return SystemZ_ADDR64; + } + ADDR64Class::iterator + ADDR64Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned)); + else + return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned)); + } + }]; +} + +// Even-odd register pairs +def GR64P : RegisterClass<"SystemZ", [v2i32], 64, + [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> +{ + let SubRegClassList = [GR32, GR32]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + static const unsigned SystemZ_REG64P[] = { + SystemZ::R0P, SystemZ::R2P, SystemZ::R4P, SystemZ::R10P, + SystemZ::R8P, SystemZ::R6P }; + static const unsigned SystemZ_REG64P_nofp[] = { + SystemZ::R0P, SystemZ::R2P, SystemZ::R4P, /* NO R10P */ + SystemZ::R8P, SystemZ::R6P }; + GR64PClass::iterator + GR64PClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_REG64P_nofp; + else + return SystemZ_REG64P; + } + GR64PClass::iterator + GR64PClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned)); + else + return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned)); + } + }]; +} + +def GR128 : RegisterClass<"SystemZ", [v2i64], 128, + [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> +{ + let SubRegClassList = [GR32, GR32, GR64, GR64]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + static const unsigned SystemZ_REG128[] = { + SystemZ::R0Q, SystemZ::R2Q, SystemZ::R4Q, SystemZ::R10Q, + SystemZ::R8Q, SystemZ::R6Q }; + static const unsigned SystemZ_REG128_nofp[] = { + SystemZ::R0Q, SystemZ::R2Q, SystemZ::R4Q, /* NO R10Q */ + SystemZ::R8Q, SystemZ::R6Q }; + GR128Class::iterator + GR128Class::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_REG128_nofp; + else + return SystemZ_REG128; + } + GR128Class::iterator + GR128Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->hasFP(MF)) + return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned)); + else + return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned)); + } + }]; +} + +def FP32 : RegisterClass<"SystemZ", [f32], 32, + [F0S, F1S, F2S, F3S, F4S, F5S, F6S, F7S, + F8S, F9S, F10S, F11S, F12S, F13S, F14S, F15S]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + static const unsigned SystemZ_REGFP32[] = { + SystemZ::F0S, SystemZ::F2S, SystemZ::F4S, SystemZ::F6S, + SystemZ::F1S, SystemZ::F3S, SystemZ::F5S, SystemZ::F7S, + SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, + SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S }; + FP32Class::iterator + FP32Class::allocation_order_begin(const MachineFunction &MF) const { + return SystemZ_REGFP32; + } + FP32Class::iterator + FP32Class::allocation_order_end(const MachineFunction &MF) const { + return SystemZ_REGFP32 + (sizeof(SystemZ_REGFP32) / sizeof(unsigned)); + } + }]; +} + +def FP64 : RegisterClass<"SystemZ", [f64], 64, + [F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L, + F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> { + let SubRegClassList = [FP32]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + static const unsigned SystemZ_REGFP64[] = { + SystemZ::F0L, SystemZ::F2L, SystemZ::F4L, SystemZ::F6L, + SystemZ::F1L, SystemZ::F3L, SystemZ::F5L, SystemZ::F7L, + SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, + SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L }; + FP64Class::iterator + FP64Class::allocation_order_begin(const MachineFunction &MF) const { + return SystemZ_REGFP64; + } + FP64Class::iterator + FP64Class::allocation_order_end(const MachineFunction &MF) const { + return SystemZ_REGFP64 + (sizeof(SystemZ_REGFP64) / sizeof(unsigned)); + } + }]; +} + +// Status flags registers. +def CCR : RegisterClass<"SystemZ", [i64], 64, [PSW]> { + let CopyCost = -1; // Don't allow copying of status registers. +} diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp new file mode 100644 index 0000000000000..a8b5e1f186797 --- /dev/null +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -0,0 +1,47 @@ +//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZ specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "SystemZSubtarget.h" +#include "SystemZ.h" +#include "SystemZGenSubtarget.inc" +#include "llvm/GlobalValue.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +SystemZSubtarget::SystemZSubtarget(const std::string &TT, + const std::string &FS): + HasZ10Insts(false) { + std::string CPU = "z9"; + + // Parse features string. + ParseSubtargetFeatures(FS, CPU); +} + +/// True if accessing the GV requires an extra load. +bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV, + const TargetMachine& TM, + bool isDirectCall) const { + if (TM.getRelocationModel() == Reloc::PIC_) { + // Extra load is needed for all externally visible. + if (isDirectCall) + return false; + + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + return false; + + return true; + } + + return false; +} diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h new file mode 100644 index 0000000000000..405d6e91b7ee5 --- /dev/null +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -0,0 +1,45 @@ +//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SystemZ specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H +#define LLVM_TARGET_SystemZ_SUBTARGET_H + +#include "llvm/Target/TargetSubtarget.h" + +#include + +namespace llvm { +class GlobalValue; +class TargetMachine; + +class SystemZSubtarget : public TargetSubtarget { + bool HasZ10Insts; +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + SystemZSubtarget(const std::string &TT, const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + std::string ParseSubtargetFeatures(const std::string &FS, + const std::string &CPU); + + bool isZ10() const { return HasZ10Insts; } + + bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM, + bool isDirectCall) const; +}; +} // End llvm namespace + +#endif // LLVM_TARGET_SystemZ_SUBTARGET_H diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp new file mode 100644 index 0000000000000..990e0031c5ec9 --- /dev/null +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -0,0 +1,44 @@ +//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZMCAsmInfo.h" +#include "SystemZTargetMachine.h" +#include "SystemZ.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +extern "C" void LLVMInitializeSystemZTarget() { + // Register the target. + RegisterTargetMachine X(TheSystemZTarget); + RegisterAsmInfo Y(TheSystemZTarget); +} + +/// SystemZTargetMachine ctor - Create an ILP64 architecture model +/// +SystemZTargetMachine::SystemZTargetMachine(const Target &T, + const std::string &TT, + const std::string &FS) + : LLVMTargetMachine(T, TT), + Subtarget(TT, FS), + DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" + "-f64:64:64-f128:128:128-a0:16:16"), + InstrInfo(*this), TLInfo(*this), + FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) { + + if (getRelocationModel() == Reloc::Default) + setRelocationModel(Reloc::Static); +} + +bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // Install an instruction selector. + PM.add(createSystemZISelDag(*this, OptLevel)); + return false; +} diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h new file mode 100644 index 0000000000000..551aeb5a3e47b --- /dev/null +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -0,0 +1,61 @@ +//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SystemZ specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H +#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H + +#include "SystemZInstrInfo.h" +#include "SystemZISelLowering.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +/// SystemZTargetMachine +/// +class SystemZTargetMachine : public LLVMTargetMachine { + SystemZSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + SystemZInstrInfo InstrInfo; + SystemZTargetLowering TLInfo; + + // SystemZ does not have any call stack frame, therefore not having + // any SystemZ specific FrameInfo class. + TargetFrameInfo FrameInfo; +public: + SystemZTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); + + virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } + virtual const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } + + virtual const SystemZRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual SystemZTargetLowering *getTargetLowering() const { + return const_cast(&TLInfo); + } + + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); +}; // SystemZTargetMachine. + +} // end namespace llvm + +#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H diff --git a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..743d8d322d054 --- /dev/null +++ b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMSystemZInfo + SystemZTargetInfo.cpp + ) + +add_dependencies(LLVMSystemZInfo SystemZCodeGenTable_gen) diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile new file mode 100644 index 0000000000000..0be80eb4e6ad3 --- /dev/null +++ b/lib/Target/SystemZ/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMSystemZInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp new file mode 100644 index 0000000000000..8272b1188201c --- /dev/null +++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheSystemZTarget; + +extern "C" void LLVMInitializeSystemZTargetInfo() { + RegisterTarget X(TheSystemZTarget, "systemz", "SystemZ"); +} diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index ed544b73eaeda..cc6be9fa7abbb 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -41,7 +41,7 @@ unsigned LLVMPointerSize(LLVMTargetDataRef TD) { } LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) { - return wrap(unwrap(TD)->getIntPtrType()); + return wrap(unwrap(TD)->getIntPtrType(getGlobalContext())); } unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) { diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 7b843df7422dd..5bcd6583635be 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/System/Mutex.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringExtras.h" @@ -155,13 +156,13 @@ const TargetAlignElem TargetData::InvalidAlignmentElem =

    @verbatim::@endverbatim: Numeric type alignment. Type is - one of i|f|v|a, corresponding to integer, floating point, vector (aka - packed) or aggregate. Size indicates the size, e.g., 32 or 64 bits. + one of i|f|v|a, corresponding to integer, floating point, vector, or + aggregate. Size indicates the size, e.g., 32 or 64 bits. \p - The default string, fully specified is: + The default string, fully specified, is:

    - "E-p:64:64:64-a0:0:0-f32:32:32-f64:0:64" - "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:0:64" + "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64" + "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64" "-v64:64:64-v128:128:128"

    Note that in the case of aggregates, 0 is the default ABI and preferred @@ -171,6 +172,7 @@ const TargetAlignElem TargetData::InvalidAlignmentElem = void TargetData::init(const std::string &TargetDescription) { std::string temp = TargetDescription; + LayoutMap = 0; LittleEndian = false; PointerMemSize = 8; PointerABIAlign = 8; @@ -184,9 +186,9 @@ void TargetData::init(const std::string &TargetDescription) { setAlignment(INTEGER_ALIGN, 4, 8, 64); // i64 setAlignment(FLOAT_ALIGN, 4, 4, 32); // float setAlignment(FLOAT_ALIGN, 8, 8, 64); // double - setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32 + setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ... setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ... - setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct, union, class, ... + setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct while (!temp.empty()) { std::string token = getToken(temp, "-"); @@ -316,61 +318,30 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, : Alignments[BestMatchIdx].PrefAlign; } -namespace { - -/// LayoutInfo - The lazy cache of structure layout information maintained by -/// TargetData. Note that the struct types must have been free'd before -/// llvm_shutdown is called (and thus this is deallocated) because all the -/// targets with cached elements should have been destroyed. -/// -typedef std::pair LayoutKey; - -struct DenseMapLayoutKeyInfo { - static inline LayoutKey getEmptyKey() { return LayoutKey(0, 0); } - static inline LayoutKey getTombstoneKey() { - return LayoutKey((TargetData*)(intptr_t)-1, 0); - } - static unsigned getHashValue(const LayoutKey &Val) { - return DenseMapInfo::getHashValue(Val.first) ^ - DenseMapInfo::getHashValue(Val.second); - } - static bool isEqual(const LayoutKey &LHS, const LayoutKey &RHS) { - return LHS == RHS; - } - - static bool isPod() { return true; } -}; - -typedef DenseMap LayoutInfoTy; - -} - -static ManagedStatic LayoutInfo; -static ManagedStatic > LayoutLock; +typedef DenseMapLayoutInfoTy; TargetData::~TargetData() { - if (!LayoutInfo.isConstructed()) + if (!LayoutMap) return; - sys::SmartScopedLock Lock(&*LayoutLock); // Remove any layouts for this TD. - LayoutInfoTy &TheMap = *LayoutInfo; + LayoutInfoTy &TheMap = *static_cast(LayoutMap); for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) { - if (I->first.first == this) { - I->second->~StructLayout(); - free(I->second); - TheMap.erase(I++); - } else { - ++I; - } + I->second->~StructLayout(); + free(I->second); + TheMap.erase(I++); } + + delete static_cast(LayoutMap); } const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { - LayoutInfoTy &TheMap = *LayoutInfo; + if (!LayoutMap) + LayoutMap = static_cast(new LayoutInfoTy()); + + LayoutInfoTy &TheMap = *static_cast(LayoutMap); - sys::SmartScopedLock Lock(&*LayoutLock); - StructLayout *&SL = TheMap[LayoutKey(this, Ty)]; + StructLayout *&SL = TheMap[Ty]; if (SL) return SL; // Otherwise, create the struct layout. Because it is variable length, we @@ -392,10 +363,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { /// removed, this method must be called whenever a StructType is removed to /// avoid a dangling pointer in this cache. void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { - if (!LayoutInfo.isConstructed()) return; // No cache. + if (!LayoutMap) return; // No cache. - sys::SmartScopedLock Lock(&*LayoutLock); - LayoutInfoTy::iterator I = LayoutInfo->find(LayoutKey(this, Ty)); + LayoutInfoTy* LayoutInfo = static_cast(LayoutMap); + LayoutInfoTy::iterator I = LayoutInfo->find(Ty); if (I == LayoutInfo->end()) return; I->second->~StructLayout(); @@ -453,7 +424,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { case Type::VectorTyID: return cast(Ty)->getBitWidth(); default: - assert(0 && "TargetData::getTypeSizeInBits(): Unsupported type"); + llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type"); break; } return 0; @@ -508,7 +479,7 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { AlignType = VECTOR_ALIGN; break; default: - assert(0 && "Bad type for getAlignment!!!"); + llvm_unreachable("Bad type for getAlignment!!!"); break; } @@ -540,8 +511,8 @@ unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const { /// getIntPtrType - Return an unsigned integer type that is the same size or /// greater to the host pointer size. -const IntegerType *TargetData::getIntPtrType() const { - return IntegerType::get(getPointerSizeInBits()); +const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const { + return IntegerType::get(C, getPointerSizeInBits()); } @@ -555,7 +526,8 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices); for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) { if (const StructType *STy = dyn_cast(*TI)) { - assert(Indices[CurIDX]->getType() == Type::Int32Ty && + assert(Indices[CurIDX]->getType() == + Type::getInt32Ty(ptrTy->getContext()) && "Illegal struct idx"); unsigned FieldNo = cast(Indices[CurIDX])->getZExtValue(); diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index ceaea0c2027ce..094a57edb419a 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -12,11 +12,29 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Constant.h" -#include "llvm/DerivedTypes.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; +//===----------------------------------------------------------------------===// +// TargetOperandInfo +//===----------------------------------------------------------------------===// + +/// getRegClass - Get the register class for the operand, handling resolution +/// of "symbolic" pointer register classes etc. If this is not a register +/// operand, this returns null. +const TargetRegisterClass * +TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const { + if (isLookupPtrRegClass()) + return TRI->getPointerRegClass(RegClass); + return TRI->getRegClass(RegClass); +} + +//===----------------------------------------------------------------------===// +// TargetInstrInfo +//===----------------------------------------------------------------------===// + TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc, unsigned numOpcodes) : Descriptors(Desc), NumOpcodes(numOpcodes) { @@ -25,6 +43,14 @@ TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc, TargetInstrInfo::~TargetInstrInfo() { } +/// insertNoop - Insert a noop into the instruction stream at the specified +/// point. +void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + llvm_unreachable("Target didn't implement insertNoop!"); +} + + bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { const TargetInstrDesc &TID = MI->getDesc(); if (!TID.isTerminator()) return false; @@ -37,14 +63,33 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { return !isPredicated(MI); } -/// getInstrOperandRegClass - Return register class of the operand of an -/// instruction of the specified TargetInstrDesc. -const TargetRegisterClass* -llvm::getInstrOperandRegClass(const TargetRegisterInfo *TRI, - const TargetInstrDesc &II, unsigned Op) { - if (Op >= II.getNumOperands()) - return NULL; - if (II.OpInfo[Op].isLookupPtrRegClass()) - return TRI->getPointerRegClass(); - return TRI->getRegClass(II.OpInfo[Op].RegClass); + +/// Measure the specified inline asm to determine an approximation of its +/// length. +/// Comments (which run till the next SeparatorChar or newline) do not +/// count as an instruction. +/// Any other non-whitespace text is considered an instruction, with +/// multiple instructions separated by SeparatorChar or newlines. +/// Variable-length instructions are not handled here; this function +/// may be overloaded in the target code to do that. +unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const { + + + // Count the number of instructions in the asm. + bool atInsnStart = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (*Str == '\n' || *Str == MAI.getSeparatorChar()) + atInsnStart = true; + if (atInsnStart && !isspace(*Str)) { + Length += MAI.getMaxInstLength(); + atInsnStart = false; + } + if (atInsnStart && strncmp(Str, MAI.getCommentString(), + strlen(MAI.getCommentString())) == 0) + atInsnStart = false; + } + + return Length; } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp new file mode 100644 index 0000000000000..c1aab9921fb22 --- /dev/null +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -0,0 +1,1089 @@ +//===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Generic Code +//===----------------------------------------------------------------------===// + +TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) { + TextSection = 0; + DataSection = 0; + BSSSection = 0; + ReadOnlySection = 0; + StaticCtorSection = 0; + StaticDtorSection = 0; + LSDASection = 0; + EHFrameSection = 0; + + DwarfAbbrevSection = 0; + DwarfInfoSection = 0; + DwarfLineSection = 0; + DwarfFrameSection = 0; + DwarfPubNamesSection = 0; + DwarfPubTypesSection = 0; + DwarfDebugInlineSection = 0; + DwarfStrSection = 0; + DwarfLocSection = 0; + DwarfARangesSection = 0; + DwarfRangesSection = 0; + DwarfMacroInfoSection = 0; +} + +TargetLoweringObjectFile::~TargetLoweringObjectFile() { +} + +static bool isSuitableForBSS(const GlobalVariable *GV) { + Constant *C = GV->getInitializer(); + + // Must have zero initializer. + if (!C->isNullValue()) + return false; + + // Leave constant zeros in readonly constant sections, so they can be shared. + if (GV->isConstant()) + return false; + + // If the global has an explicit section specified, don't put it in BSS. + if (!GV->getSection().empty()) + return false; + + // If -nozero-initialized-in-bss is specified, don't ever use BSS. + if (NoZerosInBSS) + return false; + + // Otherwise, put it in BSS! + return true; +} + +/// IsNullTerminatedString - Return true if the specified constant (which is +/// known to have a type that is an array of 1/2/4 byte elements) ends with a +/// nul value and contains no other nuls in it. +static bool IsNullTerminatedString(const Constant *C) { + const ArrayType *ATy = cast(C->getType()); + + // First check: is we have constant array of i8 terminated with zero + if (const ConstantArray *CVA = dyn_cast(C)) { + if (ATy->getNumElements() == 0) return false; + + ConstantInt *Null = + dyn_cast(CVA->getOperand(ATy->getNumElements()-1)); + if (Null == 0 || Null->getZExtValue() != 0) + return false; // Not null terminated. + + // Verify that the null doesn't occur anywhere else in the string. + for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i) + // Reject constantexpr elements etc. + if (!isa(CVA->getOperand(i)) || + CVA->getOperand(i) == Null) + return false; + return true; + } + + // Another possibility: [1 x i8] zeroinitializer + if (isa(C)) + return ATy->getNumElements() == 1; + + return false; +} + +/// getKindForGlobal - This is a top-level target-independent classifier for +/// a global variable. Given an global variable and information from TM, it +/// classifies the global in a variety of ways that make various target +/// implementations simpler. The target implementation is free to ignore this +/// extra info of course. +SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, + const TargetMachine &TM){ + assert(!GV->isDeclaration() && !GV->hasAvailableExternallyLinkage() && + "Can only be used for global definitions"); + + Reloc::Model ReloModel = TM.getRelocationModel(); + + // Early exit - functions should be always in text sections. + const GlobalVariable *GVar = dyn_cast(GV); + if (GVar == 0) + return SectionKind::getText(); + + // Handle thread-local data first. + if (GVar->isThreadLocal()) { + if (isSuitableForBSS(GVar)) + return SectionKind::getThreadBSS(); + return SectionKind::getThreadData(); + } + + // Variable can be easily put to BSS section. + if (isSuitableForBSS(GVar)) + return SectionKind::getBSS(); + + Constant *C = GVar->getInitializer(); + + // If the global is marked constant, we can put it into a mergable section, + // a mergable string section, or general .data if it contains relocations. + if (GVar->isConstant()) { + // If the initializer for the global contains something that requires a + // relocation, then we may have to drop this into a wriable data section + // even though it is marked const. + switch (C->getRelocationInfo()) { + default: llvm_unreachable("unknown relocation info kind"); + case Constant::NoRelocation: + // If initializer is a null-terminated string, put it in a "cstring" + // section of the right width. + if (const ArrayType *ATy = dyn_cast(C->getType())) { + if (const IntegerType *ITy = + dyn_cast(ATy->getElementType())) { + if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 || + ITy->getBitWidth() == 32) && + IsNullTerminatedString(C)) { + if (ITy->getBitWidth() == 8) + return SectionKind::getMergeable1ByteCString(); + if (ITy->getBitWidth() == 16) + return SectionKind::getMergeable2ByteCString(); + + assert(ITy->getBitWidth() == 32 && "Unknown width"); + return SectionKind::getMergeable4ByteCString(); + } + } + } + + // Otherwise, just drop it into a mergable constant section. If we have + // a section for this size, use it, otherwise use the arbitrary sized + // mergable section. + switch (TM.getTargetData()->getTypeAllocSize(C->getType())) { + case 4: return SectionKind::getMergeableConst4(); + case 8: return SectionKind::getMergeableConst8(); + case 16: return SectionKind::getMergeableConst16(); + default: return SectionKind::getMergeableConst(); + } + + case Constant::LocalRelocation: + // In static relocation model, the linker will resolve all addresses, so + // the relocation entries will actually be constants by the time the app + // starts up. However, we can't put this into a mergable section, because + // the linker doesn't take relocations into consideration when it tries to + // merge entries in the section. + if (ReloModel == Reloc::Static) + return SectionKind::getReadOnly(); + + // Otherwise, the dynamic linker needs to fix it up, put it in the + // writable data.rel.local section. + return SectionKind::getReadOnlyWithRelLocal(); + + case Constant::GlobalRelocations: + // In static relocation model, the linker will resolve all addresses, so + // the relocation entries will actually be constants by the time the app + // starts up. However, we can't put this into a mergable section, because + // the linker doesn't take relocations into consideration when it tries to + // merge entries in the section. + if (ReloModel == Reloc::Static) + return SectionKind::getReadOnly(); + + // Otherwise, the dynamic linker needs to fix it up, put it in the + // writable data.rel section. + return SectionKind::getReadOnlyWithRel(); + } + } + + // Okay, this isn't a constant. If the initializer for the global is going + // to require a runtime relocation by the dynamic linker, put it into a more + // specific section to improve startup time of the app. This coalesces these + // globals together onto fewer pages, improving the locality of the dynamic + // linker. + if (ReloModel == Reloc::Static) + return SectionKind::getDataNoRel(); + + switch (C->getRelocationInfo()) { + default: llvm_unreachable("unknown relocation info kind"); + case Constant::NoRelocation: + return SectionKind::getDataNoRel(); + case Constant::LocalRelocation: + return SectionKind::getDataRelLocal(); + case Constant::GlobalRelocations: + return SectionKind::getDataRel(); + } +} + +/// SectionForGlobal - This method computes the appropriate section to emit +/// the specified global variable or function definition. This should not +/// be passed external (or available externally) globals. +const MCSection *TargetLoweringObjectFile:: +SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, + const TargetMachine &TM) const { + // Select section name. + if (GV->hasSection()) + return getExplicitSectionGlobal(GV, Kind, Mang, TM); + + + // Use default section depending on the 'type' of global + return SelectSectionForGlobal(GV, Kind, Mang, TM); +} + + +// Lame default implementation. Calculate the section name for global. +const MCSection * +TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler *Mang, + const TargetMachine &TM) const{ + assert(!Kind.isThreadLocal() && "Doesn't support TLS"); + + if (Kind.isText()) + return getTextSection(); + + if (Kind.isBSS() && BSSSection != 0) + return BSSSection; + + if (Kind.isReadOnly() && ReadOnlySection != 0) + return ReadOnlySection; + + return getDataSection(); +} + +/// getSectionForConstant - Given a mergable constant with the +/// specified size and relocation information, return a section that it +/// should be placed in. +const MCSection * +TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const { + if (Kind.isReadOnly() && ReadOnlySection != 0) + return ReadOnlySection; + + return DataSection; +} + +/// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a +/// pc-relative reference to the specified global variable from exception +/// handling information. In addition to the symbol, this returns +/// by-reference: +/// +/// IsIndirect - True if the returned symbol is actually a stub that contains +/// the address of the symbol, false if the symbol is the global itself. +/// +/// IsPCRel - True if the symbol reference is already pc-relative, false if +/// the caller needs to subtract off the address of the reference from the +/// symbol. +/// +const MCExpr *TargetLoweringObjectFile:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const { + // The generic implementation of this just returns a direct reference to the + // symbol. + IsIndirect = false; + IsPCRel = false; + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, false); + return MCSymbolRefExpr::Create(Name.str(), getContext()); +} + + +//===----------------------------------------------------------------------===// +// ELF +//===----------------------------------------------------------------------===// +typedef StringMap ELFUniqueMapTy; + +TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() { + // If we have the section uniquing map, free it. + delete (ELFUniqueMapTy*)UniquingMap; +} + +const MCSection *TargetLoweringObjectFileELF:: +getELFSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind, bool IsExplicit) const { + if (UniquingMap == 0) + UniquingMap = new ELFUniqueMapTy(); + ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap; + + // Do the lookup, if we have a hit, return it. + const MCSectionELF *&Entry = Map[Section]; + if (Entry) return Entry; + + return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit, + getContext()); +} + +void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((ELFUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + + BSSSection = + getELFSection(".bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); + + TextSection = + getELFSection(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC, + SectionKind::getText()); + + DataSection = + getELFSection(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + + ReadOnlySection = + getELFSection(".rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, + SectionKind::getReadOnly()); + + TLSDataSection = + getELFSection(".tdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, SectionKind::getThreadData()); + + TLSBSSSection = + getELFSection(".tbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS | + MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS()); + + DataRelSection = + getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + DataRelLocalSection = + getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRelLocal()); + + DataRelROSection = + getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRel()); + + DataRelROLocalSection = + getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getReadOnlyWithRelLocal()); + + MergeableConst4Section = + getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst4()); + + MergeableConst8Section = + getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst8()); + + MergeableConst16Section = + getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE, + SectionKind::getMergeableConst16()); + + StaticCtorSection = + getELFSection(".ctors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + StaticDtorSection = + getELFSection(".dtors", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Exception Handling Sections. + + // FIXME: We're emitting LSDA info into a readonly section on ELF, even though + // it contains relocatable pointers. In PIC mode, this is probably a big + // runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly()); + EHFrameSection = + getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE, + SectionKind::getDataRel()); + + // Debug Info Sections. + DwarfAbbrevSection = + getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfInfoSection = + getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLineSection = + getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfFrameSection = + getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfStrSection = + getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfLocSection = + getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfARangesSection = + getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfRangesSection = + getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); +} + + +static SectionKind +getELFKindForNamedSection(const char *Name, SectionKind K) { + if (Name[0] != '.') return K; + + // Some lame default implementation based on some magic section names. + if (strcmp(Name, ".bss") == 0 || + strncmp(Name, ".bss.", 5) == 0 || + strncmp(Name, ".gnu.linkonce.b.", 16) == 0 || + strncmp(Name, ".llvm.linkonce.b.", 17) == 0 || + strcmp(Name, ".sbss") == 0 || + strncmp(Name, ".sbss.", 6) == 0 || + strncmp(Name, ".gnu.linkonce.sb.", 17) == 0 || + strncmp(Name, ".llvm.linkonce.sb.", 18) == 0) + return SectionKind::getBSS(); + + if (strcmp(Name, ".tdata") == 0 || + strncmp(Name, ".tdata.", 7) == 0 || + strncmp(Name, ".gnu.linkonce.td.", 17) == 0 || + strncmp(Name, ".llvm.linkonce.td.", 18) == 0) + return SectionKind::getThreadData(); + + if (strcmp(Name, ".tbss") == 0 || + strncmp(Name, ".tbss.", 6) == 0 || + strncmp(Name, ".gnu.linkonce.tb.", 17) == 0 || + strncmp(Name, ".llvm.linkonce.tb.", 18) == 0) + return SectionKind::getThreadBSS(); + + return K; +} + + +static unsigned +getELFSectionType(const char *Name, SectionKind K) { + + if (strcmp(Name, ".init_array") == 0) + return MCSectionELF::SHT_INIT_ARRAY; + + if (strcmp(Name, ".fini_array") == 0) + return MCSectionELF::SHT_FINI_ARRAY; + + if (strcmp(Name, ".preinit_array") == 0) + return MCSectionELF::SHT_PREINIT_ARRAY; + + if (K.isBSS() || K.isThreadBSS()) + return MCSectionELF::SHT_NOBITS; + + return MCSectionELF::SHT_PROGBITS; +} + + +static unsigned +getELFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (!K.isMetadata()) + Flags |= MCSectionELF::SHF_ALLOC; + + if (K.isText()) + Flags |= MCSectionELF::SHF_EXECINSTR; + + if (K.isWriteable()) + Flags |= MCSectionELF::SHF_WRITE; + + if (K.isThreadLocal()) + Flags |= MCSectionELF::SHF_TLS; + + // K.isMergeableConst() is left out to honour PR4650 + if (K.isMergeableCString() || K.isMergeableConst4() || + K.isMergeableConst8() || K.isMergeableConst16()) + Flags |= MCSectionELF::SHF_MERGE; + + if (K.isMergeableCString()) + Flags |= MCSectionELF::SHF_STRINGS; + + return Flags; +} + + +const MCSection *TargetLoweringObjectFileELF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + const char *SectionName = GV->getSection().c_str(); + + // Infer section flags from the section name if we can. + Kind = getELFKindForNamedSection(SectionName, Kind); + + return getELFSection(SectionName, + getELFSectionType(SectionName, Kind), + getELFSectionFlags(Kind), Kind, true); +} + +static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) return ".gnu.linkonce.t."; + if (Kind.isReadOnly()) return ".gnu.linkonce.r."; + + if (Kind.isThreadData()) return ".gnu.linkonce.td."; + if (Kind.isThreadBSS()) return ".gnu.linkonce.tb."; + + if (Kind.isBSS()) return ".gnu.linkonce.b."; + if (Kind.isDataNoRel()) return ".gnu.linkonce.d."; + if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local."; + if (Kind.isDataRel()) return ".gnu.linkonce.d.rel."; + if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local."; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".gnu.linkonce.d.rel.ro."; +} + +const MCSection *TargetLoweringObjectFileELF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if (GV->isWeakForLinker()) { + const char *Prefix = getSectionPrefixForUniqueGlobal(Kind); + std::string Name = Mang->makeNameProper(GV->getNameStr()); + + return getELFSection((Prefix+Name).c_str(), + getELFSectionType((Prefix+Name).c_str(), Kind), + getELFSectionFlags(Kind), + Kind); + } + + if (Kind.isText()) return TextSection; + + if (Kind.isMergeable1ByteCString() || + Kind.isMergeable2ByteCString() || + Kind.isMergeable4ByteCString()) { + + // We also need alignment here. + // FIXME: this is getting the alignment of the character, not the + // alignment of the global! + unsigned Align = + TM.getTargetData()->getPreferredAlignment(cast(GV)); + + const char *SizeSpec = ".rodata.str1."; + if (Kind.isMergeable2ByteCString()) + SizeSpec = ".rodata.str2."; + else if (Kind.isMergeable4ByteCString()) + SizeSpec = ".rodata.str4."; + else + assert(Kind.isMergeable1ByteCString() && "unknown string width"); + + + std::string Name = SizeSpec + utostr(Align); + return getELFSection(Name.c_str(), MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::SHF_MERGE | + MCSectionELF::SHF_STRINGS, + Kind); + } + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + return ReadOnlySection; // .const + } + + if (Kind.isReadOnly()) return ReadOnlySection; + + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isThreadBSS()) return TLSBSSSection; + + if (Kind.isBSS()) return BSSSection; + + if (Kind.isDataNoRel()) return DataSection; + if (Kind.isDataRelLocal()) return DataRelLocalSection; + if (Kind.isDataRel()) return DataRelSection; + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +/// getSectionForConstant - Given a mergeable constant with the +/// specified size and relocation information, return a section that it +/// should be placed in. +const MCSection *TargetLoweringObjectFileELF:: +getSectionForConstant(SectionKind Kind) const { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection; + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +//===----------------------------------------------------------------------===// +// MachO +//===----------------------------------------------------------------------===// + +typedef StringMap MachOUniqueMapTy; + +TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { + // If we have the MachO uniquing map, free it. + delete (MachOUniqueMapTy*)UniquingMap; +} + + +const MCSectionMachO *TargetLoweringObjectFileMachO:: +getMachOSection(const StringRef &Segment, const StringRef &Section, + unsigned TypeAndAttributes, + unsigned Reserved2, SectionKind Kind) const { + // We unique sections by their segment/section pair. The returned section + // may not have the same flags as the requested section, if so this should be + // diagnosed by the client as an error. + + // Create the map if it doesn't already exist. + if (UniquingMap == 0) + UniquingMap = new MachOUniqueMapTy(); + MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap; + + // Form the name to look up. + SmallString<64> Name; + Name += Segment; + Name.push_back(','); + Name += Section; + + // Do the lookup, if we have a hit, return it. + const MCSectionMachO *&Entry = Map[Name.str()]; + if (Entry) return Entry; + + // Otherwise, return a new section. + return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes, + Reserved2, Kind, getContext()); +} + + +void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((MachOUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + + TextSection // .text + = getMachOSection("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + DataSection // .data + = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); + + CStringSection // .cstring + = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS, + SectionKind::getMergeable1ByteCString()); + UStringSection + = getMachOSection("__TEXT","__ustring", 0, + SectionKind::getMergeable2ByteCString()); + FourByteConstantSection // .literal4 + = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS, + SectionKind::getMergeableConst4()); + EightByteConstantSection // .literal8 + = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS, + SectionKind::getMergeableConst8()); + + // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back + // to using it in -static mode. + SixteenByteConstantSection = 0; + if (TM.getRelocationModel() != Reloc::Static && + TM.getTargetData()->getPointerSize() == 32) + SixteenByteConstantSection = // .literal16 + getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS, + SectionKind::getMergeableConst16()); + + ReadOnlySection // .const + = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly()); + + TextCoalSection + = getMachOSection("__TEXT", "__textcoal_nt", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + SectionKind::getText()); + ConstTextCoalSection + = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED, + SectionKind::getText()); + ConstDataCoalSection + = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED, + SectionKind::getText()); + ConstDataSection // .const_data + = getMachOSection("__DATA", "__const", 0, + SectionKind::getReadOnlyWithRel()); + DataCoalSection + = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED, + SectionKind::getDataRel()); + + + LazySymbolPointerSection + = getMachOSection("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + NonLazySymbolPointerSection + = getMachOSection("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + + if (TM.getRelocationModel() == Reloc::Static) { + StaticCtorSection + = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel()); + StaticDtorSection + = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel()); + } else { + StaticCtorSection + = getMachOSection("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, + SectionKind::getDataRel()); + StaticDtorSection + = getMachOSection("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, + SectionKind::getDataRel()); + } + + // Exception Handling. + LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, + SectionKind::getDataRel()); + EHFrameSection = + getMachOSection("__TEXT", "__eh_frame", + MCSectionMachO::S_COALESCED | + MCSectionMachO::S_ATTR_NO_TOC | + MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS | + MCSectionMachO::S_ATTR_LIVE_SUPPORT, + SectionKind::getReadOnly()); + + // Debug Information. + DwarfAbbrevSection = + getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfInfoSection = + getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLineSection = + getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfFrameSection = + getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubNamesSection = + getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfPubTypesSection = + getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfStrSection = + getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfLocSection = + getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfARangesSection = + getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfRangesSection = + getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfMacroInfoSection = + getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfDebugInlineSection = + getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); +} + +const MCSection *TargetLoweringObjectFileMachO:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + // Parse the section specifier and create it if valid. + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, + TAA, StubSize); + if (!ErrorCode.empty()) { + // If invalid, report the error with llvm_report_error. + llvm_report_error("Global variable '" + GV->getNameStr() + + "' has an invalid section specifier '" + GV->getSection()+ + "': " + ErrorCode + "."); + // Fall back to dropping it into the data section. + return DataSection; + } + + // Get the section. + const MCSectionMachO *S = + getMachOSection(Segment, Section, TAA, StubSize, Kind); + + // Okay, now that we got the section, verify that the TAA & StubSize agree. + // If the user declared multiple globals with different section flags, we need + // to reject it here. + if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { + // If invalid, report the error with llvm_report_error. + llvm_report_error("Global variable '" + GV->getNameStr() + + "' section type or attributes does not match previous" + " section specifier"); + } + + return S; +} + +const MCSection *TargetLoweringObjectFileMachO:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS"); + + if (Kind.isText()) + return GV->isWeakForLinker() ? TextCoalSection : TextSection; + + // If this is weak/linkonce, put this in a coalescable section, either in text + // or data depending on if it is writable. + if (GV->isWeakForLinker()) { + if (Kind.isReadOnly()) + return ConstTextCoalSection; + return DataCoalSection; + } + + // FIXME: Alignment check should be handled by section classifier. + if (Kind.isMergeable1ByteCString() || + Kind.isMergeable2ByteCString()) { + if (TM.getTargetData()->getPreferredAlignment( + cast(GV)) < 32) { + if (Kind.isMergeable1ByteCString()) + return CStringSection; + assert(Kind.isMergeable2ByteCString()); + return UStringSection; + } + } + + if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + } + + // Otherwise, if it is readonly, but not something we can specially optimize, + // just drop it in .const. + if (Kind.isReadOnly()) + return ReadOnlySection; + + // If this is marked const, put it into a const section. But if the dynamic + // linker needs to write to it, put it in the data segment. + if (Kind.isReadOnlyWithRel()) + return ConstDataSection; + + // Otherwise, just drop the variable in the normal data section. + return DataSection; +} + +const MCSection * +TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const { + // If this constant requires a relocation, we have to put it in the data + // segment, not in the text segment. + if (Kind.isDataRel()) + return ConstDataSection; + + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16() && SixteenByteConstantSection) + return SixteenByteConstantSection; + return ReadOnlySection; // .const +} + +/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide +/// not to emit the UsedDirective for some symbols in llvm.used. +// FIXME: REMOVE this (rdar://7071300) +bool TargetLoweringObjectFileMachO:: +shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { + /// On Darwin, internally linked data beginning with "L" or "l" does not have + /// the directive emitted (this occurs in ObjC metadata). + if (!GV) return false; + + // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix. + if (GV->hasLocalLinkage() && !isa(GV)) { + // FIXME: ObjC metadata is currently emitted as internal symbols that have + // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and + // this horrible hack can go away. + const std::string &Name = Mang->getMangledName(GV); + if (Name[0] == 'L' || Name[0] == 'l') + return false; + } + + return true; +} + +const MCExpr *TargetLoweringObjectFileMachO:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const { + // The mach-o version of this method defaults to returning a stub reference. + IsIndirect = true; + IsPCRel = false; + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + return MCSymbolRefExpr::Create(Name.str(), getContext()); +} + + +//===----------------------------------------------------------------------===// +// COFF +//===----------------------------------------------------------------------===// + +typedef StringMap COFFUniqueMapTy; + +TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() { + delete (COFFUniqueMapTy*)UniquingMap; +} + + +const MCSection *TargetLoweringObjectFileCOFF:: +getCOFFSection(const char *Name, bool isDirective, SectionKind Kind) const { + // Create the map if it doesn't already exist. + if (UniquingMap == 0) + UniquingMap = new MachOUniqueMapTy(); + COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap; + + // Do the lookup, if we have a hit, return it. + const MCSectionCOFF *&Entry = Map[Name]; + if (Entry) return Entry; + + return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext()); +} + +void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + if (UniquingMap != 0) + ((COFFUniqueMapTy*)UniquingMap)->clear(); + TargetLoweringObjectFile::Initialize(Ctx, TM); + TextSection = getCOFFSection("\t.text", true, SectionKind::getText()); + DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel()); + StaticCtorSection = + getCOFFSection(".ctors", false, SectionKind::getDataRel()); + StaticDtorSection = + getCOFFSection(".dtors", false, SectionKind::getDataRel()); + + // FIXME: We're emitting LSDA info into a readonly section on COFF, even + // though it contains relocatable pointers. In PIC mode, this is probably a + // big runtime hit for C++ apps. Either the contents of the LSDA need to be + // adjusted or this should be a data section. + LSDASection = + getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly()); + EHFrameSection = + getCOFFSection(".eh_frame", false, SectionKind::getDataRel()); + + // Debug info. + // FIXME: Don't use 'directive' mode here. + DwarfAbbrevSection = + getCOFFSection("\t.section\t.debug_abbrev,\"dr\"", + true, SectionKind::getMetadata()); + DwarfInfoSection = + getCOFFSection("\t.section\t.debug_info,\"dr\"", + true, SectionKind::getMetadata()); + DwarfLineSection = + getCOFFSection("\t.section\t.debug_line,\"dr\"", + true, SectionKind::getMetadata()); + DwarfFrameSection = + getCOFFSection("\t.section\t.debug_frame,\"dr\"", + true, SectionKind::getMetadata()); + DwarfPubNamesSection = + getCOFFSection("\t.section\t.debug_pubnames,\"dr\"", + true, SectionKind::getMetadata()); + DwarfPubTypesSection = + getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"", + true, SectionKind::getMetadata()); + DwarfStrSection = + getCOFFSection("\t.section\t.debug_str,\"dr\"", + true, SectionKind::getMetadata()); + DwarfLocSection = + getCOFFSection("\t.section\t.debug_loc,\"dr\"", + true, SectionKind::getMetadata()); + DwarfARangesSection = + getCOFFSection("\t.section\t.debug_aranges,\"dr\"", + true, SectionKind::getMetadata()); + DwarfRangesSection = + getCOFFSection("\t.section\t.debug_ranges,\"dr\"", + true, SectionKind::getMetadata()); + DwarfMacroInfoSection = + getCOFFSection("\t.section\t.debug_macinfo,\"dr\"", + true, SectionKind::getMetadata()); +} + +const MCSection *TargetLoweringObjectFileCOFF:: +getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + return getCOFFSection(GV->getSection().c_str(), false, Kind); +} + +static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) + return ".text$linkonce"; + if (Kind.isWriteable()) + return ".data$linkonce"; + return ".rdata$linkonce"; +} + + +const MCSection *TargetLoweringObjectFileCOFF:: +SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler *Mang, const TargetMachine &TM) const { + assert(!Kind.isThreadLocal() && "Doesn't support TLS"); + + // If this global is linkonce/weak and the target handles this by emitting it + // into a 'uniqued' section name, create and return the section now. + if (GV->isWeakForLinker()) { + const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); + std::string Name = Mang->makeNameProper(GV->getNameStr()); + return getCOFFSection((Prefix+Name).c_str(), false, Kind); + } + + if (Kind.isText()) + return getTextSection(); + + return getDataSection(); +} + diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index c487cb805306a..fec59b5e2b50d 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -33,7 +33,10 @@ namespace llvm { FloatABI::ABIType FloatABIType; bool NoImplicitFloat; bool NoZerosInBSS; - bool ExceptionHandling; + bool DwarfExceptionHandling; + bool SjLjExceptionHandling; + bool JITEmitDebugInfo; + bool JITEmitDebugInfoToDisk; bool UnwindTablesMandatory; Reloc::Model RelocationModel; CodeModel::Model CMModel; @@ -104,9 +107,32 @@ DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::location(NoZerosInBSS), cl::init(false)); static cl::opt -EnableExceptionHandling("enable-eh", +EnableDwarfExceptionHandling("enable-eh", cl::desc("Emit DWARF exception handling (default if target supports)"), - cl::location(ExceptionHandling), + cl::location(DwarfExceptionHandling), + cl::init(false)); +static cl::opt +EnableSjLjExceptionHandling("enable-sjlj-eh", + cl::desc("Emit SJLJ exception handling (default if target supports)"), + cl::location(SjLjExceptionHandling), + cl::init(false)); +// In debug builds, make this default to true. +#ifdef NDEBUG +#define EMIT_DEBUG false +#else +#define EMIT_DEBUG true +#endif +static cl::opt +EmitJitDebugInfo("jit-emit-debug", + cl::desc("Emit debug information to debugger"), + cl::location(JITEmitDebugInfo), + cl::init(EMIT_DEBUG)); +#undef EMIT_DEBUG +static cl::opt +EmitJitDebugInfoToDisk("jit-emit-debug-to-disk", + cl::Hidden, + cl::desc("Emit debug info objfiles to disk"), + cl::location(JITEmitDebugInfoToDisk), cl::init(false)); static cl::opt EnableUnwindTables("unwind-tables", @@ -176,8 +202,8 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", // TargetMachine Class // -TargetMachine::TargetMachine() - : AsmInfo(0) { +TargetMachine::TargetMachine(const Target &T) + : TheTarget(T), AsmInfo(0) { // Typically it will be subtargets that will adjust FloatABIType from Default // to Soft or Hard. if (UseSoftFloat) @@ -237,4 +263,3 @@ namespace llvm { return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; } } - diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index a84fdaa4a8029..fac67e2e1aaf1 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -40,10 +40,10 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, TargetRegisterInfo::~TargetRegisterInfo() {} /// getPhysicalRegisterRegClass - Returns the Register Class of a physical -/// register of the given type. If type is MVT::Other, then just return any +/// register of the given type. If type is EVT::Other, then just return any /// register class the register belongs to. const TargetRegisterClass * -TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, MVT VT) const { +TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { assert(isPhysicalRegister(reg) && "reg must be a physical register"); // Pick the most super register class of the right type that contains @@ -62,14 +62,14 @@ TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, MVT VT) const { /// getAllocatableSetForRC - Toggle the bits that represent allocatable /// registers for the specific register class. -static void getAllocatableSetForRC(MachineFunction &MF, +static void getAllocatableSetForRC(const MachineFunction &MF, const TargetRegisterClass *RC, BitVector &R){ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), E = RC->allocation_order_end(MF); I != E; ++I) R.set(*I); } -BitVector TargetRegisterInfo::getAllocatableSet(MachineFunction &MF, +BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, const TargetRegisterClass *RC) const { BitVector Allocatable(NumRegs); if (RC) { @@ -85,7 +85,7 @@ BitVector TargetRegisterInfo::getAllocatableSet(MachineFunction &MF, /// getFrameIndexOffset - Returns the displacement from the frame register to /// the stack frame of the specified index. This is the default implementation -/// which is likely incorrect for the target. +/// which is overridden for some targets. int TargetRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt new file mode 100644 index 0000000000000..034d5aba83270 --- /dev/null +++ b/lib/Target/X86/AsmParser/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86AsmParser + X86AsmParser.cpp + ) +add_dependencies(LLVMX86AsmParser X86CodeGenTable_gen) diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile new file mode 100644 index 0000000000000..25fb0a2836dba --- /dev/null +++ b/lib/Target/X86/AsmParser/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/X86/AsmParser/Makefile -------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86AsmParser + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp new file mode 100644 index 0000000000000..c357b4d0dee15 --- /dev/null +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -0,0 +1,479 @@ +//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmLexer.h" +#include "llvm/MC/MCAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmParser.h" +using namespace llvm; + +namespace { +struct X86Operand; + +class X86ATTAsmParser : public TargetAsmParser { + MCAsmParser &Parser; + +private: + MCAsmParser &getParser() const { return Parser; } + + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + + bool ParseRegister(X86Operand &Op); + + bool ParseOperand(X86Operand &Op); + + bool ParseMemOperand(X86Operand &Op); + + bool ParseDirectiveWord(unsigned Size, SMLoc L); + + /// @name Auto-generated Match Functions + /// { + + bool MatchInstruction(SmallVectorImpl &Operands, + MCInst &Inst); + + /// MatchRegisterName - Match the given string to a register name, or 0 if + /// there is no match. + unsigned MatchRegisterName(const StringRef &Name); + + /// } + +public: + X86ATTAsmParser(const Target &T, MCAsmParser &_Parser) + : TargetAsmParser(T), Parser(_Parser) {} + + virtual bool ParseInstruction(const StringRef &Name, MCInst &Inst); + + virtual bool ParseDirective(AsmToken DirectiveID); +}; + +} // end anonymous namespace + + +namespace { + +/// X86Operand - Instances of this class represent a parsed X86 machine +/// instruction. +struct X86Operand { + enum { + Token, + Register, + Immediate, + Memory + } Kind; + + union { + struct { + const char *Data; + unsigned Length; + } Tok; + + struct { + unsigned RegNo; + } Reg; + + struct { + const MCExpr *Val; + } Imm; + + struct { + unsigned SegReg; + const MCExpr *Disp; + unsigned BaseReg; + unsigned IndexReg; + unsigned Scale; + } Mem; + }; + + StringRef getToken() const { + assert(Kind == Token && "Invalid access!"); + return StringRef(Tok.Data, Tok.Length); + } + + unsigned getReg() const { + assert(Kind == Register && "Invalid access!"); + return Reg.RegNo; + } + + const MCExpr *getImm() const { + assert(Kind == Immediate && "Invalid access!"); + return Imm.Val; + } + + const MCExpr *getMemDisp() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.Disp; + } + unsigned getMemSegReg() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.SegReg; + } + unsigned getMemBaseReg() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.BaseReg; + } + unsigned getMemIndexReg() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.IndexReg; + } + unsigned getMemScale() const { + assert(Kind == Memory && "Invalid access!"); + return Mem.Scale; + } + + bool isToken() const {return Kind == Token; } + + bool isImm() const { return Kind == Immediate; } + + bool isImmSExt8() const { + // Accept immediates which fit in 8 bits when sign extended, and + // non-absolute immediates. + if (!isImm()) + return false; + + if (const MCConstantExpr *CE = dyn_cast(getImm())) { + int64_t Value = CE->getValue(); + return Value == (int64_t) (int8_t) Value; + } + + return true; + } + + bool isMem() const { return Kind == Memory; } + + bool isReg() const { return Kind == Register; } + + void addRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateExpr(getImm())); + } + + void addImmSExt8Operands(MCInst &Inst, unsigned N) const { + // FIXME: Support user customization of the render method. + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateExpr(getImm())); + } + + void addMemOperands(MCInst &Inst, unsigned N) const { + assert((N == 4 || N == 5) && "Invalid number of operands!"); + + Inst.addOperand(MCOperand::CreateReg(getMemBaseReg())); + Inst.addOperand(MCOperand::CreateImm(getMemScale())); + Inst.addOperand(MCOperand::CreateReg(getMemIndexReg())); + Inst.addOperand(MCOperand::CreateExpr(getMemDisp())); + + // FIXME: What a hack. + if (N == 5) + Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); + } + + static X86Operand CreateToken(StringRef Str) { + X86Operand Res; + Res.Kind = Token; + Res.Tok.Data = Str.data(); + Res.Tok.Length = Str.size(); + return Res; + } + + static X86Operand CreateReg(unsigned RegNo) { + X86Operand Res; + Res.Kind = Register; + Res.Reg.RegNo = RegNo; + return Res; + } + + static X86Operand CreateImm(const MCExpr *Val) { + X86Operand Res; + Res.Kind = Immediate; + Res.Imm.Val = Val; + return Res; + } + + static X86Operand CreateMem(unsigned SegReg, const MCExpr *Disp, + unsigned BaseReg, unsigned IndexReg, + unsigned Scale) { + // We should never just have a displacement, that would be an immediate. + assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); + + // The scale should always be one of {1,2,4,8}. + assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && + "Invalid scale!"); + X86Operand Res; + Res.Kind = Memory; + Res.Mem.SegReg = SegReg; + Res.Mem.Disp = Disp; + Res.Mem.BaseReg = BaseReg; + Res.Mem.IndexReg = IndexReg; + Res.Mem.Scale = Scale; + return Res; + } +}; + +} // end anonymous namespace. + + +bool X86ATTAsmParser::ParseRegister(X86Operand &Op) { + const AsmToken &TokPercent = getLexer().getTok(); + (void)TokPercent; // Avoid warning when assertions are disabled. + assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!"); + getLexer().Lex(); // Eat percent token. + + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return Error(Tok.getLoc(), "invalid register name"); + + // FIXME: Validate register for the current architecture; we have to do + // validation later, so maybe there is no need for this here. + unsigned RegNo; + + RegNo = MatchRegisterName(Tok.getString()); + if (RegNo == 0) + return Error(Tok.getLoc(), "invalid register name"); + + Op = X86Operand::CreateReg(RegNo); + getLexer().Lex(); // Eat identifier token. + + return false; +} + +bool X86ATTAsmParser::ParseOperand(X86Operand &Op) { + switch (getLexer().getKind()) { + default: + return ParseMemOperand(Op); + case AsmToken::Percent: + // FIXME: if a segment register, this could either be just the seg reg, or + // the start of a memory operand. + return ParseRegister(Op); + case AsmToken::Dollar: { + // $42 -> immediate. + getLexer().Lex(); + const MCExpr *Val; + if (getParser().ParseExpression(Val)) + return true; + Op = X86Operand::CreateImm(Val); + return false; + } + } +} + +/// ParseMemOperand: segment: disp(basereg, indexreg, scale) +bool X86ATTAsmParser::ParseMemOperand(X86Operand &Op) { + // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. + unsigned SegReg = 0; + + // We have to disambiguate a parenthesized expression "(4+5)" from the start + // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The + // only way to do this without lookahead is to eat the ( and see what is after + // it. + const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); + if (getLexer().isNot(AsmToken::LParen)) { + if (getParser().ParseExpression(Disp)) return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (getLexer().isNot(AsmToken::LParen)) { + // Unless we have a segment register, treat this as an immediate. + if (SegReg) + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1); + else + Op = X86Operand::CreateImm(Disp); + return false; + } + + // Eat the '('. + getLexer().Lex(); + } else { + // Okay, we have a '('. We don't know if this is an expression or not, but + // so we have to eat the ( to see beyond it. + getLexer().Lex(); // Eat the '('. + + if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) { + // Nothing to do here, fall into the code below with the '(' part of the + // memory operand consumed. + } else { + // It must be an parenthesized expression, parse it now. + if (getParser().ParseParenExpression(Disp)) + return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (getLexer().isNot(AsmToken::LParen)) { + // Unless we have a segment register, treat this as an immediate. + if (SegReg) + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 1); + else + Op = X86Operand::CreateImm(Disp); + return false; + } + + // Eat the '('. + getLexer().Lex(); + } + } + + // If we reached here, then we just ate the ( of the memory operand. Process + // the rest of the memory operand. + unsigned BaseReg = 0, IndexReg = 0, Scale = 1; + + if (getLexer().is(AsmToken::Percent)) { + if (ParseRegister(Op)) + return true; + BaseReg = Op.getReg(); + } + + if (getLexer().is(AsmToken::Comma)) { + getLexer().Lex(); // Eat the comma. + + // Following the comma we should have either an index register, or a scale + // value. We don't support the later form, but we want to parse it + // correctly. + // + // Not that even though it would be completely consistent to support syntax + // like "1(%eax,,1)", the assembler doesn't. + if (getLexer().is(AsmToken::Percent)) { + if (ParseRegister(Op)) + return true; + IndexReg = Op.getReg(); + + if (getLexer().isNot(AsmToken::RParen)) { + // Parse the scale amount: + // ::= ',' [scale-expression] + if (getLexer().isNot(AsmToken::Comma)) + return true; + getLexer().Lex(); // Eat the comma. + + if (getLexer().isNot(AsmToken::RParen)) { + SMLoc Loc = getLexer().getTok().getLoc(); + + int64_t ScaleVal; + if (getParser().ParseAbsoluteExpression(ScaleVal)) + return true; + + // Validate the scale amount. + if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8) + return Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); + Scale = (unsigned)ScaleVal; + } + } + } else if (getLexer().isNot(AsmToken::RParen)) { + // Otherwise we have the unsupported form of a scale amount without an + // index. + SMLoc Loc = getLexer().getTok().getLoc(); + + int64_t Value; + if (getParser().ParseAbsoluteExpression(Value)) + return true; + + return Error(Loc, "cannot have scale factor without index register"); + } + } + + // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. + if (getLexer().isNot(AsmToken::RParen)) + return Error(getLexer().getTok().getLoc(), + "unexpected token in memory operand"); + getLexer().Lex(); // Eat the ')'. + + Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale); + return false; +} + +bool X86ATTAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { + SmallVector Operands; + + Operands.push_back(X86Operand::CreateToken(Name)); + + SMLoc Loc = getLexer().getTok().getLoc(); + if (getLexer().isNot(AsmToken::EndOfStatement)) { + + // Parse '*' modifier. + if (getLexer().is(AsmToken::Star)) { + getLexer().Lex(); // Eat the star. + Operands.push_back(X86Operand::CreateToken("*")); + } + + // Read the first operand. + Operands.push_back(X86Operand()); + if (ParseOperand(Operands.back())) + return true; + + while (getLexer().is(AsmToken::Comma)) { + getLexer().Lex(); // Eat the comma. + + // Parse and remember the operand. + Operands.push_back(X86Operand()); + if (ParseOperand(Operands.back())) + return true; + } + } + + if (!MatchInstruction(Operands, Inst)) + return false; + + // FIXME: We should give nicer diagnostics about the exact failure. + + Error(Loc, "unrecognized instruction"); + return true; +} + +bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) { + StringRef IDVal = DirectiveID.getIdentifier(); + if (IDVal == ".word") + return ParseDirectiveWord(2, DirectiveID.getLoc()); + return true; +} + +/// ParseDirectiveWord +/// ::= .word [ expression (, expression)* ] +bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + for (;;) { + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + getParser().getStreamer().EmitValue(Value, Size); + + if (getLexer().is(AsmToken::EndOfStatement)) + break; + + // FIXME: Improve diagnostic. + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); + getLexer().Lex(); + } + } + + getLexer().Lex(); + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeX86AsmParser() { + RegisterAsmParser X(TheX86_32Target); + RegisterAsmParser Y(TheX86_64Target); +} + +#include "X86GenAsmMatcher.inc" diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt index a28c8266b82e1..b70a587ec4e24 100644 --- a/lib/Target/X86/AsmPrinter/CMakeLists.txt +++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMX86AsmPrinter - X86ATTAsmPrinter.cpp X86ATTInstPrinter.cpp X86AsmPrinter.cpp - X86IntelAsmPrinter.cpp + X86IntelInstPrinter.cpp + X86MCInstLower.cpp ) -add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) \ No newline at end of file +add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen) diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile index ba89ac69bf68e..2368761ac9f45 100644 --- a/lib/Target/X86/AsmPrinter/Makefile +++ b/lib/Target/X86/AsmPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===## +##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index fa0ee753f02c5..bc70ffe8d633c 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -13,10 +13,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" +#include "X86ATTInstPrinter.h" #include "llvm/MC/MCInst.h" -#include "X86ATTAsmPrinter.h" -#include "llvm/Target/TargetAsmInfo.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "X86GenInstrNames.inc" using namespace llvm; // Include the auto-generated portion of the assembly writer. @@ -25,9 +28,11 @@ using namespace llvm; #include "X86GenAsmWriter.inc" #undef MachineInstr -void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) { +void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } + +void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { switch (MI->getOperand(Op).getImm()) { - default: assert(0 && "Invalid ssecc argument!"); + default: llvm_unreachable("Invalid ssecc argument!"); case 0: O << "eq"; break; case 1: O << "lt"; break; case 2: O << "le"; break; @@ -39,61 +44,36 @@ void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) { } } - -void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) { - assert(0 && - "This is only used for MOVPC32r, should lower before asm printing!"); -} - - /// print_pcrel_imm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value. These print slightly differently, for /// example, a $ is not emitted. -void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { +void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isImm()) O << Op.getImm(); - else if (Op.isMBBLabel()) - // FIXME: Keep in sync with printBasicBlockLabel. printBasicBlockLabel - // should eventually call into this code, not the other way around. - O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction() - << '_' << Op.getMBBLabelBlock(); - else - assert(0 && "Unknown pcrel immediate operand"); + else { + assert(Op.isExpr() && "unknown pcrel immediate operand"); + Op.getExpr()->print(O, &MAI); + } } - -void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo, - const char *Modifier) { +void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier) { assert(Modifier == 0 && "Modifiers should not be used"); const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - O << '%'; - unsigned Reg = Op.getReg(); -#if 0 - if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { - MVT VT = (strcmp(Modifier+6,"64") == 0) ? - MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : - ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); - Reg = getX86SubSuperRegister(Reg, VT); - } -#endif - O << TRI->getAsmName(Reg); - return; + O << '%' << getRegisterName(Op.getReg()); } else if (Op.isImm()) { - //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem"))) + O << '$' << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); O << '$'; - O << Op.getImm(); - return; + Op.getExpr()->print(O, &MAI); } - - O << "<>"; } -void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { - +void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { const MCOperand &BaseReg = MI->getOperand(Op); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); @@ -103,19 +83,11 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) O << DispVal; } else { - abort(); - //assert(DispSpec.isGlobal() || DispSpec.isCPI() || - // DispSpec.isJTI() || DispSpec.isSymbol()); - //printOperand(MI, Op+3, "mem"); + assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); + DispSpec.getExpr()->print(O, &MAI); } if (IndexReg.getReg() || BaseReg.getReg()) { - // There are cases where we can end up with ESP/RSP in the indexreg slot. - // If this happens, swap the base/index register to support assemblers that - // don't work when the index is *SP. - // FIXME: REMOVE THIS. - assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP); - O << '('; if (BaseReg.getReg()) printOperand(MI, Op); @@ -131,9 +103,9 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { } } -void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) { - const MCOperand &Segment = MI->getOperand(Op+4); - if (Segment.getReg()) { +void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op) { + // If this has a segment register, print it. + if (MI->getOperand(Op+4).getReg()) { printOperand(MI, Op+4); O << ':'; } diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h new file mode 100644 index 0000000000000..5f28fa46f5f83 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -0,0 +1,86 @@ +//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an X86 MCInst to AT&T style .s file syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_ATT_INST_PRINTER_H +#define X86_ATT_INST_PRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + +class X86ATTInstPrinter : public MCInstPrinter { +public: + X86ATTInstPrinter(raw_ostream &O, const MCAsmInfo &MAI) + : MCInstPrinter(O, MAI) {} + + + virtual void printInst(const MCInst *MI); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0); + void printMemReference(const MCInst *MI, unsigned Op); + void printLeaMemReference(const MCInst *MI, unsigned Op); + void printSSECC(const MCInst *MI, unsigned Op); + void print_pcrel_imm(const MCInst *MI, unsigned OpNo); + + void printopaquemem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + + void printi8mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi16mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi32mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi64mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi128mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf32mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf64mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf80mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf128mem(const MCInst *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printlea32mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MCInst *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } +}; + +} + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index e5d80a4cbdec1..2a0290db97e96 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- X86AsmPrinter.cpp - Convert X86 LLVM IR to X86 assembly -----------===// +//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===// // // The LLVM Compiler Infrastructure // @@ -7,42 +7,937 @@ // //===----------------------------------------------------------------------===// // -// This file the shared super class printer that converts from our internal -// representation of machine-dependent LLVM code to Intel and AT&T format -// assembly language. -// This printer is the output mechanism used by `llc'. +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to AT&T format assembly +// language. This printer is the output mechanism used by `llc'. // //===----------------------------------------------------------------------===// -#include "X86ATTAsmPrinter.h" -#include "X86IntelAsmPrinter.h" -#include "X86Subtarget.h" +#define DEBUG_TYPE "asm-printer" +#include "X86AsmPrinter.h" +#include "X86ATTInstPrinter.h" +#include "X86IntelInstPrinter.h" +#include "X86MCInstLower.h" +#include "X86.h" +#include "X86COFF.h" +#include "X86COFFMachineModuleInfo.h" +#include "X86MachineFunctionInfo.h" +#include "X86TargetMachine.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; -/// createX86CodePrinterPass - Returns a pass that prints the X86 assembly code -/// for a MachineFunction to the given output stream, using the given target -/// machine description. +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +//===----------------------------------------------------------------------===// +// Primitive Helper Functions. +//===----------------------------------------------------------------------===// + +void X86AsmPrinter::printMCInst(const MCInst *MI) { + if (MAI->getAssemblerDialect() == 0) + X86ATTInstPrinter(O, *MAI).printInstruction(MI); + else + X86IntelInstPrinter(O, *MAI).printInstruction(MI); +} + +void X86AsmPrinter::PrintPICBaseSymbol() const { + // FIXME: Gross const cast hack. + X86AsmPrinter *AP = const_cast(this); + X86MCInstLower(OutContext, 0, *AP).GetPICBaseSymbol()->print(O, MAI); +} + +void X86AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + unsigned FnAlign = MF.getAlignment(); + const Function *F = MF.getFunction(); + + if (Subtarget->isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo(); + COFFMMI.DecorateCygMingName(CurrentFnName, F, *TM.getTargetData()); + } + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + EmitAlignment(FnAlign, F); + + switch (F->getLinkage()) { + default: llvm_unreachable("Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + break; + case Function::DLLExportLinkage: + case Function::ExternalLinkage: + O << "\t.globl\t" << CurrentFnName << '\n'; + break; + case Function::LinkerPrivateLinkage: + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + if (Subtarget->isTargetDarwin()) { + O << "\t.globl\t" << CurrentFnName << '\n'; + O << MAI->getWeakDefDirective() << CurrentFnName << '\n'; + } else if (Subtarget->isTargetCygMing()) { + O << "\t.globl\t" << CurrentFnName << "\n" + "\t.linkonce discard\n"; + } else { + O << "\t.weak\t" << CurrentFnName << '\n'; + } + break; + } + + printVisibility(CurrentFnName, F->getVisibility()); + + if (Subtarget->isTargetELF()) + O << "\t.type\t" << CurrentFnName << ",@function\n"; + else if (Subtarget->isTargetCygMing()) { + O << "\t.def\t " << CurrentFnName + << ";\t.scl\t" << + (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) + << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) + << ";\t.endef\n"; + } + + O << CurrentFnName << ':'; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, F, /*PrintType=*/false, F->getParent()); + } + O << '\n'; + + // Add some workaround for linkonce linkage on Cygwin\MinGW + if (Subtarget->isTargetCygMing() && + (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) + O << "Lllvm$workaround$fake$stub$" << CurrentFnName << ":\n"; +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + const Function *F = MF.getFunction(); + this->MF = &MF; + CallingConv::ID CC = F->getCallingConv(); + + SetupMachineFunction(MF); + O << "\n\n"; + + if (Subtarget->isTargetCOFF()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo(); + + // Populate function information map. Don't want to populate + // non-stdcall or non-fastcall functions' information right now. + if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall) + COFFMMI.AddFunctionInfo(F, *MF.getInfo()); + } + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print the 'header' of function + emitFunctionHeader(MF); + + // Emit pre-function debug and/or EH information. + if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) + DW->BeginFunction(&MF); + + // Print out code for the function. + bool hasAnyRealCode = false; + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + // Print a label for the basic block. + EmitBasicBlockStart(I); + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + // Print the assembly for the instruction. + if (!II->isLabel()) + hasAnyRealCode = true; + printMachineInstruction(II); + } + } + + if (Subtarget->isTargetDarwin() && !hasAnyRealCode) { + // If the function is empty, then we need to emit *something*. Otherwise, + // the function's label might be associated with something that it wasn't + // meant to be associated with. We emit a noop in this situation. + // We are assuming inline asms are code. + O << "\tnop\n"; + } + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n'; + + // Emit post-function debug information. + if (MAI->doesSupportDebugInformation() || MAI->doesSupportExceptionHandling()) + DW->EndFunction(&MF); + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // We didn't modify anything. + return false; +} + +/// printSymbolOperand - Print a raw symbol reference operand. This handles +/// jump tables, constant pools, global address and external symbols, all of +/// which print to a label with various suffixes for relocation types etc. +void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) { + switch (MO.getType()) { + default: llvm_unreachable("unknown symbol type!"); + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' + << MO.getIndex(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' + << MO.getIndex(); + printOffset(MO.getOffset()); + break; + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + + const char *Suffix = ""; + if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) + Suffix = "$stub"; + else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || + MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) + Suffix = "$non_lazy_ptr"; + + std::string Name = Mang->getMangledName(GV, Suffix, Suffix[0] != '\0'); + if (Subtarget->isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo(); + COFFMMI.DecorateCygMingName(Name, GV, *TM.getTargetData()); + } + + // Handle dllimport linkage. + if (MO.getTargetFlags() == X86II::MO_DLLIMPORT) + Name = "__imp_" + Name; + + if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) { + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, GV, true); + NameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + + const MCSymbol *&StubSym = + MMI->getObjFileInfo().getGVStubEntry(Sym); + if (StubSym == 0) { + NameStr.clear(); + Mang->getNameWithPrefix(NameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + } + } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, GV, true); + NameStr += "$non_lazy_ptr"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + const MCSymbol *&StubSym = + MMI->getObjFileInfo().getHiddenGVStubEntry(Sym); + if (StubSym == 0) { + NameStr.clear(); + Mang->getNameWithPrefix(NameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + } + } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, GV, true); + NameStr += "$stub"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(NameStr.str()); + const MCSymbol *&StubSym = + MMI->getObjFileInfo().getFnStubEntry(Sym); + if (StubSym == 0) { + NameStr.clear(); + Mang->getNameWithPrefix(NameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + } + } + + // If the name begins with a dollar-sign, enclose it in parens. We do this + // to avoid having it look like an integer immediate to the assembler. + if (Name[0] == '$') + O << '(' << Name << ')'; + else + O << Name; + + printOffset(MO.getOffset()); + break; + } + case MachineOperand::MO_ExternalSymbol: { + std::string Name = Mang->makeNameProper(MO.getSymbolName()); + if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { + Name += "$stub"; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(Name); + const MCSymbol *&StubSym = + MMI->getObjFileInfo().getFnStubEntry(Sym); + if (StubSym == 0) { + Name.erase(Name.end()-5, Name.end()); + StubSym = OutContext.GetOrCreateSymbol(Name); + } + } + + // If the name begins with a dollar-sign, enclose it in parens. We do this + // to avoid having it look like an integer immediate to the assembler. + if (Name[0] == '$') + O << '(' << Name << ')'; + else + O << Name; + break; + } + } + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + break; + case X86II::MO_DARWIN_NONLAZY: + case X86II::MO_DLLIMPORT: + case X86II::MO_DARWIN_STUB: + // These affect the name of the symbol, not any suffix. + break; + case X86II::MO_GOT_ABSOLUTE_ADDRESS: + O << " + [.-"; + PrintPICBaseSymbol(); + O << ']'; + break; + case X86II::MO_PIC_BASE_OFFSET: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + O << '-'; + PrintPICBaseSymbol(); + break; + case X86II::MO_TLSGD: O << "@TLSGD"; break; + case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break; + case X86II::MO_TPOFF: O << "@TPOFF"; break; + case X86II::MO_NTPOFF: O << "@NTPOFF"; break; + case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break; + case X86II::MO_GOT: O << "@GOT"; break; + case X86II::MO_GOTOFF: O << "@GOTOFF"; break; + case X86II::MO_PLT: O << "@PLT"; break; + } +} + +/// print_pcrel_imm - This is used to print an immediate value that ends up +/// being encoded as a pc-relative value. These print slightly differently, for +/// example, a $ is not emitted. +void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + default: llvm_unreachable("Unknown pcrel immediate operand"); + case MachineOperand::MO_Immediate: + O << MO.getImm(); + return; + case MachineOperand::MO_MachineBasicBlock: + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); + return; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + printSymbolOperand(MO); + return; + } +} + + +void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + default: llvm_unreachable("unknown operand type!"); + case MachineOperand::MO_Register: { + O << '%'; + unsigned Reg = MO.getReg(); + if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) { + EVT VT = (strcmp(Modifier+6,"64") == 0) ? + MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 : + ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8)); + Reg = getX86SubSuperRegister(Reg, VT); + } + O << X86ATTInstPrinter::getRegisterName(Reg); + return; + } + + case MachineOperand::MO_Immediate: + O << '$' << MO.getImm(); + return; + + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: { + O << '$'; + printSymbolOperand(MO); + break; + } + } +} + +void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) { + unsigned char value = MI->getOperand(Op).getImm(); + assert(value <= 7 && "Invalid ssecc argument!"); + switch (value) { + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + +void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier) { + const MachineOperand &BaseReg = MI->getOperand(Op); + const MachineOperand &IndexReg = MI->getOperand(Op+2); + const MachineOperand &DispSpec = MI->getOperand(Op+3); + + // If we really don't want to print out (rip), don't. + bool HasBaseReg = BaseReg.getReg() != 0; + if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") && + BaseReg.getReg() == X86::RIP) + HasBaseReg = false; + + // HasParenPart - True if we will print out the () part of the mem ref. + bool HasParenPart = IndexReg.getReg() || HasBaseReg; + + if (DispSpec.isImm()) { + int DispVal = DispSpec.getImm(); + if (DispVal || !HasParenPart) + O << DispVal; + } else { + assert(DispSpec.isGlobal() || DispSpec.isCPI() || + DispSpec.isJTI() || DispSpec.isSymbol()); + printSymbolOperand(MI->getOperand(Op+3)); + } + + if (HasParenPart) { + assert(IndexReg.getReg() != X86::ESP && + "X86 doesn't allow scaling by ESP"); + + O << '('; + if (HasBaseReg) + printOperand(MI, Op, Modifier); + + if (IndexReg.getReg()) { + O << ','; + printOperand(MI, Op+2, Modifier); + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + if (ScaleVal != 1) + O << ',' << ScaleVal; + } + O << ')'; + } +} + +void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier) { + assert(isMem(MI, Op) && "Invalid memory reference!"); + const MachineOperand &Segment = MI->getOperand(Op+4); + if (Segment.getReg()) { + printOperand(MI, Op+4, Modifier); + O << ':'; + } + printLeaMemReference(MI, Op, Modifier); +} + +void X86AsmPrinter::printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const { + if (!MAI->getSetDirective()) + return; + + // We don't need .set machinery if we have GOT-style relocations + if (Subtarget->isPICStyleGOT()) + return; + + O << MAI->getSetDirective() << ' ' << MAI->getPrivateGlobalPrefix() + << getFunctionNumber() << '_' << uid << "_set_" << MBB->getNumber() << ','; + + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + + if (Subtarget->isPICStyleRIPRel()) + O << '-' << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << uid << '\n'; + else { + O << '-'; + PrintPICBaseSymbol(); + O << '\n'; + } +} + + +void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) { + PrintPICBaseSymbol(); + O << '\n'; + PrintPICBaseSymbol(); + O << ':'; +} + +void X86AsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid) const { + const char *JTEntryDirective = MJTI->getEntrySize() == 4 ? + MAI->getData32bitsDirective() : MAI->getData64bitsDirective(); + + O << JTEntryDirective << ' '; + + if (Subtarget->isPICStyleRIPRel() || Subtarget->isPICStyleStubPIC()) { + O << MAI->getPrivateGlobalPrefix() << getFunctionNumber() + << '_' << uid << "_set_" << MBB->getNumber(); + } else if (Subtarget->isPICStyleGOT()) { + GetMBBSymbol(MBB->getNumber())->print(O, MAI); + O << "@GOTOFF"; + } else + GetMBBSymbol(MBB->getNumber())->print(O, MAI); +} + +bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) { + unsigned Reg = MO.getReg(); + switch (Mode) { + default: return true; // Unknown mode. + case 'b': // Print QImode register + Reg = getX86SubSuperRegister(Reg, MVT::i8); + break; + case 'h': // Print QImode high register + Reg = getX86SubSuperRegister(Reg, MVT::i8, true); + break; + case 'w': // Print HImode register + Reg = getX86SubSuperRegister(Reg, MVT::i16); + break; + case 'k': // Print SImode register + Reg = getX86SubSuperRegister(Reg, MVT::i32); + break; + case 'q': // Print DImode register + Reg = getX86SubSuperRegister(Reg, MVT::i64); + break; + } + + O << '%' << X86ATTInstPrinter::getRegisterName(Reg); + return false; +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. /// -FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o, - X86TargetMachine &tm, - bool verbose) { - const X86Subtarget *Subtarget = &tm.getSubtarget(); +bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'a': // This is an address. Currently only 'i' and 'r' are expected. + if (MO.isImm()) { + O << MO.getImm(); + return false; + } + if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) { + printSymbolOperand(MO); + return false; + } + if (MO.isReg()) { + O << '('; + printOperand(MI, OpNo); + O << ')'; + return false; + } + return true; + + case 'c': // Don't print "$" before a global var name or constant. + if (MO.isImm()) + O << MO.getImm(); + else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) + printSymbolOperand(MO); + else + printOperand(MI, OpNo); + return false; + + case 'A': // Print '*' before a register (it must be a register) + if (MO.isReg()) { + O << '*'; + printOperand(MI, OpNo); + return false; + } + return true; + + case 'b': // Print QImode register + case 'h': // Print QImode high register + case 'w': // Print HImode register + case 'k': // Print SImode register + case 'q': // Print DImode register + if (MO.isReg()) + return printAsmMRegister(MO, ExtraCode[0]); + printOperand(MI, OpNo); + return false; + + case 'P': // This is the operand of a call, treat specially. + print_pcrel_imm(MI, OpNo); + return false; + + case 'n': // Negate the immediate or print a '-' before the operand. + // Note: this is a temporary solution. It should be handled target + // independently as part of the 'MC' work. + if (MO.isImm()) { + O << -MO.getImm(); + return false; + } + O << '-'; + } + } - if (Subtarget->isFlavorIntel()) - return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); - return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); + printOperand(MI, OpNo); + return false; } -namespace { - static struct Register { - Register() { - X86TargetMachine::registerAsmPrinter(createX86CodePrinterPass); +bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode) { + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'b': // Print QImode register + case 'h': // Print QImode high register + case 'w': // Print HImode register + case 'k': // Print SImode register + case 'q': // Print SImode register + // These only apply to registers, ignore on mem. + break; + case 'P': // Don't print @PLT, but do print as memory. + printMemReference(MI, OpNo, "no-rip"); + return false; } - } Registrator; + } + printMemReference(MI, OpNo); + return false; +} + + + +/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in +/// AT&T syntax to the current output stream. +/// +void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + processDebugLoc(MI, true); + + printInstructionThroughMCStreamer(MI); + + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + O << '\n'; + + processDebugLoc(MI, false); } -extern "C" int X86AsmPrinterForceLink; -int X86AsmPrinterForceLink = 0; +void X86AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) { + if (Subtarget->isTargetDarwin() && + TM.getRelocationModel() == Reloc::Static) { + if (GVar->getName() == "llvm.global_ctors") + O << ".reference .constructors_used\n"; + else if (GVar->getName() == "llvm.global_dtors") + O << ".reference .destructors_used\n"; + } + return; + } + + const TargetData *TD = TM.getTargetData(); + + std::string name = Mang->getMangledName(GVar); + Constant *C = GVar->getInitializer(); + const Type *Type = C->getType(); + unsigned Size = TD->getTypeAllocSize(Type); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + printVisibility(name, GVar->getVisibility()); + + if (Subtarget->isTargetELF()) + O << "\t.type\t" << name << ",@object\n"; + + + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GVar, TM); + const MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GVar, GVKind, Mang, TM); + OutStreamer.SwitchSection(TheSection); + + // FIXME: get this stuff from section kind flags. + if (C->isNullValue() && !GVar->hasSection() && + // Don't put things that should go in the cstring section into "comm". + !TheSection->getKind().isMergeableCString()) { + if (GVar->hasExternalLinkage()) { + if (const char *Directive = MAI->getZeroFillDirective()) { + O << "\t.globl " << name << '\n'; + O << Directive << "__DATA, __common, " << name << ", " + << Size << ", " << Align << '\n'; + return; + } + } + + if (!GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (MAI->getLCOMMDirective() != NULL) { + if (GVar->hasLocalLinkage()) { + O << MAI->getLCOMMDirective() << name << ',' << Size; + if (Subtarget->isTargetDarwin()) + O << ',' << Align; + } else if (Subtarget->isTargetDarwin() && !GVar->hasCommonLinkage()) { + O << "\t.globl " << name << '\n' + << MAI->getWeakDefDirective() << name << '\n'; + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + EmitGlobalConstant(C); + return; + } else { + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + } else { + if (!Subtarget->isTargetCygMing()) { + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << '\n'; + } + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + } + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + return; + } + } + + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::LinkerPrivateLinkage: + if (Subtarget->isTargetDarwin()) { + O << "\t.globl " << name << '\n' + << MAI->getWeakDefDirective() << name << '\n'; + } else if (Subtarget->isTargetCygMing()) { + O << "\t.globl\t" << name << "\n" + "\t.linkonce same_size\n"; + } else { + O << "\t.weak\t" << name << '\n'; + } + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.globl " << name << '\n'; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + llvm_unreachable("Unknown linkage type!"); + } + + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm){ + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + + EmitGlobalConstant(C); + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << name << ", " << Size << '\n'; +} + +void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { + if (Subtarget->isTargetDarwin()) { + // All darwin targets use mach-o. + TargetLoweringObjectFileMachO &TLOFMacho = + static_cast(getObjFileLowering()); + + MachineModuleInfoMachO &MMIMacho = + MMI->getObjFileInfo(); + + // Output stubs for dynamically-linked functions. + MachineModuleInfoMachO::SymbolListTy Stubs; + + Stubs = MMIMacho.GetFnStubList(); + if (!Stubs.empty()) { + const MCSection *TheSection = + TLOFMacho.getMachOSection("__IMPORT", "__jump_table", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 5, SectionKind::getMetadata()); + OutStreamer.SwitchSection(TheSection); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n" << "\t.indirect_symbol "; + // Get the MCSymbol without the $stub suffix. + Stubs[i].second->print(O, MAI); + O << "\n\thlt ; hlt ; hlt ; hlt ; hlt\n"; + } + O << '\n'; + + Stubs.clear(); + } + + // Output stubs for external and common global variables. + Stubs = MMIMacho.GetGVStubList(); + if (!Stubs.empty()) { + const MCSection *TheSection = + TLOFMacho.getMachOSection("__IMPORT", "__pointers", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + OutStreamer.SwitchSection(TheSection); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n\t.indirect_symbol "; + Stubs[i].second->print(O, MAI); + O << "\n\t.long\t0\n"; + } + Stubs.clear(); + } + + Stubs = MMIMacho.GetHiddenGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + EmitAlignment(2); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n" << MAI->getData32bitsDirective(); + Stubs[i].second->print(O, MAI); + O << '\n'; + } + Stubs.clear(); + } + + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never + // generates code that does this, it is always safe to set. + O << "\t.subsections_via_symbols\n"; + } + + if (Subtarget->isTargetCOFF()) { + // Necessary for dllexport support + std::vector DLLExportedFns, DLLExportedGlobals; + + X86COFFMachineModuleInfo &COFFMMI = + MMI->getObjFileInfo(); + TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast(getObjFileLowering()); + + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedFns.push_back(Mang->getMangledName(I)); + + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + if (I->hasDLLExportLinkage()) + DLLExportedGlobals.push_back(Mang->getMangledName(I)); + + if (Subtarget->isTargetCygMing()) { + // Emit type information for external functions + for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(), + E = COFFMMI.stub_end(); I != E; ++I) { + O << "\t.def\t " << I->getKeyData() + << ";\t.scl\t" << COFF::C_EXT + << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT) + << ";\t.endef\n"; + } + } + + // Output linker support code for dllexported globals on windows. + if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) { + OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve", + true, + SectionKind::getMetadata())); + + for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) + O << "\t.ascii \" -export:" << DLLExportedGlobals[i] << ",data\"\n"; + + for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) + O << "\t.ascii \" -export:" << DLLExportedFns[i] << "\"\n"; + } + } +} + + +//===----------------------------------------------------------------------===// +// Target Registry Stuff +//===----------------------------------------------------------------------===// + +static MCInstPrinter *createX86MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + raw_ostream &O) { + if (SyntaxVariant == 0) + return new X86ATTInstPrinter(O, MAI); + if (SyntaxVariant == 1) + return new X86IntelInstPrinter(O, MAI); + return 0; +} // Force static initialization. -extern "C" void LLVMInitializeX86AsmPrinter() { } +extern "C" void LLVMInitializeX86AsmPrinter() { + RegisterAsmPrinter X(TheX86_32Target); + RegisterAsmPrinter Y(TheX86_64Target); + + TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter); +} diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h new file mode 100644 index 0000000000000..0351829b0856f --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -0,0 +1,150 @@ +//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AT&T assembly code printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86ASMPRINTER_H +#define X86ASMPRINTER_H + +#include "../X86.h" +#include "../X86MachineFunctionInfo.h" +#include "../X86TargetMachine.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MachineJumpTableInfo; +class MCContext; +class MCInst; +class MCStreamer; +class MCSymbol; + +class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { + const X86Subtarget *Subtarget; + public: + explicit X86AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V) { + Subtarget = &TM.getSubtarget(); + } + + virtual const char *getPassName() const { + return "X86 AT&T-Style Assembly Printer"; + } + + const X86Subtarget &getSubtarget() const { return *Subtarget; } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AsmPrinter::getAnalysisUsage(AU); + } + + + virtual void EmitEndOfAsmFile(Module &M); + + void printInstructionThroughMCStreamer(const MachineInstr *MI); + + + void printMCInst(const MCInst *MI); + + void printSymbolOperand(const MachineOperand &MO); + + + + // These methods are used by the tablegen'erated instruction printer. + void printOperand(const MachineInstr *MI, unsigned OpNo, + const char *Modifier = 0); + void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo); + + void printopaquemem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + + void printi8mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi16mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi32mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi64mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printi128mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf32mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf64mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf80mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printf128mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } + void printlea32mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MachineInstr *MI, unsigned OpNo) { + printLeaMemReference(MI, OpNo, "subreg64"); + } + + bool printAsmMRegister(const MachineOperand &MO, char Mode); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + + void printMachineInstruction(const MachineInstr *MI); + void printSSECC(const MachineInstr *MI, unsigned Op); + void printMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); + void printLeaMemReference(const MachineInstr *MI, unsigned Op, + const char *Modifier=NULL); + void printPICJumpTableSetLabel(unsigned uid, + const MachineBasicBlock *MBB) const; + void printPICJumpTableSetLabel(unsigned uid, unsigned uid2, + const MachineBasicBlock *MBB) const { + AsmPrinter::printPICJumpTableSetLabel(uid, uid2, MBB); + } + void printPICJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned uid) const; + + void printPICLabel(const MachineInstr *MI, unsigned Op); + void PrintGlobalVariable(const GlobalVariable* GVar); + + void PrintPICBaseSymbol() const; + + bool runOnMachineFunction(MachineFunction &F); + + void emitFunctionHeader(const MachineFunction &MF); + +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp new file mode 100644 index 0000000000000..fde5902357b28 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -0,0 +1,131 @@ +//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes code for rendering MCInst instances as AT&T-style +// assembly. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "X86IntelInstPrinter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "X86GenInstrNames.inc" +using namespace llvm; + +// Include the auto-generated portion of the assembly writer. +#define MachineInstr MCInst +#define NO_ASM_WRITER_BOILERPLATE +#include "X86GenAsmWriter1.inc" +#undef MachineInstr + +void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } + +void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) { + switch (MI->getOperand(Op).getImm()) { + default: llvm_unreachable("Invalid ssecc argument!"); + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + } +} + +/// print_pcrel_imm - This is used to print an immediate value that ends up +/// being encoded as a pc-relative value. +void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) + O << Op.getImm(); + else { + assert(Op.isExpr() && "unknown pcrel immediate operand"); + Op.getExpr()->print(O, &MAI); + } +} + +static void PrintRegName(raw_ostream &O, StringRef RegName) { + for (unsigned i = 0, e = RegName.size(); i != e; ++i) + O << (char)toupper(RegName[i]); +} + +void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier) { + assert(Modifier == 0 && "Modifiers should not be used"); + + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + PrintRegName(O, getRegisterName(Op.getReg())); + } else if (Op.isImm()) { + O << Op.getImm(); + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI); + } +} + +void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) { + const MCOperand &BaseReg = MI->getOperand(Op); + unsigned ScaleVal = MI->getOperand(Op+1).getImm(); + const MCOperand &IndexReg = MI->getOperand(Op+2); + const MCOperand &DispSpec = MI->getOperand(Op+3); + + O << '['; + + bool NeedPlus = false; + if (BaseReg.getReg()) { + printOperand(MI, Op); + NeedPlus = true; + } + + if (IndexReg.getReg()) { + if (NeedPlus) O << " + "; + if (ScaleVal != 1) + O << ScaleVal << '*'; + printOperand(MI, Op+2); + NeedPlus = true; + } + + + if (!DispSpec.isImm()) { + if (NeedPlus) O << " + "; + assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); + DispSpec.getExpr()->print(O, &MAI); + } else { + int64_t DispVal = DispSpec.getImm(); + if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) { + if (NeedPlus) { + if (DispVal > 0) + O << " + "; + else { + O << " - "; + DispVal = -DispVal; + } + } + O << DispVal; + } + } + + O << ']'; +} + +void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op) { + // If this has a segment register, print it. + if (MI->getOperand(Op+4).getReg()) { + printOperand(MI, Op+4); + O << ':'; + } + printLeaMemReference(MI, Op); +} diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h new file mode 100644 index 0000000000000..1976177eb13c0 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -0,0 +1,99 @@ +//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an X86 MCInst to intel style .s file syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_INTEL_INST_PRINTER_H +#define X86_INTEL_INST_PRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class MCOperand; + +class X86IntelInstPrinter : public MCInstPrinter { +public: + X86IntelInstPrinter(raw_ostream &O, const MCAsmInfo &MAI) + : MCInstPrinter(O, MAI) {} + + virtual void printInst(const MCInst *MI); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0); + void printMemReference(const MCInst *MI, unsigned Op); + void printLeaMemReference(const MCInst *MI, unsigned Op); + void printSSECC(const MCInst *MI, unsigned Op); + void print_pcrel_imm(const MCInst *MI, unsigned OpNo); + + void printopaquemem(const MCInst *MI, unsigned OpNo) { + O << "OPAQUE PTR "; + printMemReference(MI, OpNo); + } + + void printi8mem(const MCInst *MI, unsigned OpNo) { + O << "BYTE PTR "; + printMemReference(MI, OpNo); + } + void printi16mem(const MCInst *MI, unsigned OpNo) { + O << "WORD PTR "; + printMemReference(MI, OpNo); + } + void printi32mem(const MCInst *MI, unsigned OpNo) { + O << "DWORD PTR "; + printMemReference(MI, OpNo); + } + void printi64mem(const MCInst *MI, unsigned OpNo) { + O << "QWORD PTR "; + printMemReference(MI, OpNo); + } + void printi128mem(const MCInst *MI, unsigned OpNo) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo); + } + void printf32mem(const MCInst *MI, unsigned OpNo) { + O << "DWORD PTR "; + printMemReference(MI, OpNo); + } + void printf64mem(const MCInst *MI, unsigned OpNo) { + O << "QWORD PTR "; + printMemReference(MI, OpNo); + } + void printf80mem(const MCInst *MI, unsigned OpNo) { + O << "XWORD PTR "; + printMemReference(MI, OpNo); + } + void printf128mem(const MCInst *MI, unsigned OpNo) { + O << "XMMWORD PTR "; + printMemReference(MI, OpNo); + } + void printlea32mem(const MCInst *MI, unsigned OpNo) { + O << "DWORD PTR "; + printLeaMemReference(MI, OpNo); + } + void printlea64mem(const MCInst *MI, unsigned OpNo) { + O << "QWORD PTR "; + printLeaMemReference(MI, OpNo); + } + void printlea64_32mem(const MCInst *MI, unsigned OpNo) { + O << "QWORD PTR "; + printLeaMemReference(MI, OpNo); + } +}; + +} + +#endif diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp new file mode 100644 index 0000000000000..5ccddf57e7abd --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -0,0 +1,485 @@ +//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower X86 MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "X86MCInstLower.h" +#include "X86AsmPrinter.h" +#include "X86MCAsmInfo.h" +#include "X86COFFMachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + + +const X86Subtarget &X86MCInstLower::getSubtarget() const { + return AsmPrinter.getSubtarget(); +} + +MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const { + assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin"); + return AsmPrinter.MMI->getObjFileInfo(); +} + + +MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { + SmallString<60> Name; + raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() + << AsmPrinter.getFunctionNumber() << "$pb"; + return Ctx.GetOrCreateSymbol(Name.str()); +} + + +/// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an +/// MCOperand. +MCSymbol *X86MCInstLower:: +GetGlobalAddressSymbol(const MachineOperand &MO) const { + const GlobalValue *GV = MO.getGlobal(); + + bool isImplicitlyPrivate = false; + if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY || + MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || + MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) + isImplicitlyPrivate = true; + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); + + if (getSubtarget().isTargetCygMing()) { + X86COFFMachineModuleInfo &COFFMMI = + AsmPrinter.MMI->getObjFileInfo(); + COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData()); + } + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name. + break; + case X86II::MO_DLLIMPORT: { + // Handle dllimport linkage. + const char *Prefix = "__imp_"; + Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix)); + break; + } + case X86II::MO_DARWIN_NONLAZY: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { + Name += "$non_lazy_ptr"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + + const MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = Ctx.GetOrCreateSymbol(Name.str()); + } + return Sym; + } + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: { + Name += "$non_lazy_ptr"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + const MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = Ctx.GetOrCreateSymbol(Name.str()); + } + return Sym; + } + case X86II::MO_DARWIN_STUB: { + Name += "$stub"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = Ctx.GetOrCreateSymbol(Name.str()); + } + return Sym; + } + // FIXME: These probably should be a modifier on the symbol or something?? + case X86II::MO_TLSGD: Name += "@TLSGD"; break; + case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; + case X86II::MO_TPOFF: Name += "@TPOFF"; break; + case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; + case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; + case X86II::MO_GOT: Name += "@GOT"; break; + case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; + case X86II::MO_PLT: Name += "@PLT"; break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *X86MCInstLower:: +GetExternalSymbolSymbol(const MachineOperand &MO) const { + SmallString<128> Name; + Name += AsmPrinter.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + case X86II::MO_GOT_ABSOLUTE_ADDRESS: // Doesn't modify symbol name. + case X86II::MO_PIC_BASE_OFFSET: // Doesn't modify symbol name. + break; + case X86II::MO_DLLIMPORT: { + // Handle dllimport linkage. + const char *Prefix = "__imp_"; + Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix)); + break; + } + case X86II::MO_DARWIN_STUB: { + Name += "$stub"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str()); + const MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym); + + if (StubSym == 0) { + Name.erase(Name.end()-5, Name.end()); + StubSym = Ctx.GetOrCreateSymbol(Name.str()); + } + return Sym; + } + // FIXME: These probably should be a modifier on the symbol or something?? + case X86II::MO_TLSGD: Name += "@TLSGD"; break; + case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; + case X86II::MO_TPOFF: Name += "@TPOFF"; break; + case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; + case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; + case X86II::MO_GOT: Name += "@GOT"; break; + case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; + case X86II::MO_PLT: Name += "@PLT"; break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *X86MCInstLower::GetJumpTableSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "JTI" + << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + case X86II::MO_PIC_BASE_OFFSET: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + break; + // FIXME: These probably should be a modifier on the symbol or something?? + case X86II::MO_TLSGD: Name += "@TLSGD"; break; + case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; + case X86II::MO_TPOFF: Name += "@TPOFF"; break; + case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; + case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; + case X86II::MO_GOT: Name += "@GOT"; break; + case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; + case X86II::MO_PLT: Name += "@PLT"; break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + + +MCSymbol *X86MCInstLower:: +GetConstantPoolIndexSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << AsmPrinter.MAI->getPrivateGlobalPrefix() << "CPI" + << AsmPrinter.getFunctionNumber() << '_' << MO.getIndex(); + + switch (MO.getTargetFlags()) { + default: + llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + case X86II::MO_PIC_BASE_OFFSET: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + break; + // FIXME: These probably should be a modifier on the symbol or something?? + case X86II::MO_TLSGD: Name += "@TLSGD"; break; + case X86II::MO_GOTTPOFF: Name += "@GOTTPOFF"; break; + case X86II::MO_INDNTPOFF: Name += "@INDNTPOFF"; break; + case X86II::MO_TPOFF: Name += "@TPOFF"; break; + case X86II::MO_NTPOFF: Name += "@NTPOFF"; break; + case X86II::MO_GOTPCREL: Name += "@GOTPCREL"; break; + case X86II::MO_GOT: Name += "@GOT"; break; + case X86II::MO_GOTOFF: Name += "@GOTOFF"; break; + case X86II::MO_PLT: Name += "@PLT"; break; + } + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + case X86II::MO_NO_FLAG: // No flag. + + // These affect the name of the symbol, not any suffix. + case X86II::MO_DARWIN_NONLAZY: + case X86II::MO_DLLIMPORT: + case X86II::MO_DARWIN_STUB: + case X86II::MO_TLSGD: + case X86II::MO_GOTTPOFF: + case X86II::MO_INDNTPOFF: + case X86II::MO_TPOFF: + case X86II::MO_NTPOFF: + case X86II::MO_GOTPCREL: + case X86II::MO_GOT: + case X86II::MO_GOTOFF: + case X86II::MO_PLT: + break; + case X86II::MO_PIC_BASE_OFFSET: + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: + // Subtract the pic base. + Expr = MCBinaryExpr::CreateSub(Expr, + MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), + Ctx); + break; + } + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + return MCOperand::CreateExpr(Expr); +} + + + +static void lower_subreg32(MCInst *MI, unsigned OpNo) { + // Convert registers in the addr mode according to subreg32. + unsigned Reg = MI->getOperand(OpNo).getReg(); + if (Reg != 0) + MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32)); +} + +static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { + // Convert registers in the addr mode according to subreg64. + for (unsigned i = 0; i != 4; ++i) { + if (!MI->getOperand(OpNo+i).isReg()) continue; + + unsigned Reg = MI->getOperand(OpNo+i).getReg(); + if (Reg == 0) continue; + + MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); + } +} + + + +void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_Register: + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + AsmPrinter.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } + + // Handle a few special cases to eliminate operand modifiers. + switch (OutMI.getOpcode()) { + case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. + lower_lea64_32mem(&OutMI, 1); + break; + case X86::MOV16r0: + OutMI.setOpcode(X86::MOV32r0); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX16rr8: + OutMI.setOpcode(X86::MOVZX32rr8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX16rm8: + OutMI.setOpcode(X86::MOVZX32rm8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVSX16rr8: + OutMI.setOpcode(X86::MOVSX32rr8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVSX16rm8: + OutMI.setOpcode(X86::MOVSX32rm8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rr32: + OutMI.setOpcode(X86::MOV32rr); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rm32: + OutMI.setOpcode(X86::MOV32rm); + lower_subreg32(&OutMI, 0); + break; + case X86::MOV64ri64i32: + OutMI.setOpcode(X86::MOV32ri); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rr8: + OutMI.setOpcode(X86::MOVZX32rr8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rm8: + OutMI.setOpcode(X86::MOVZX32rm8); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rr16: + OutMI.setOpcode(X86::MOVZX32rr16); + lower_subreg32(&OutMI, 0); + break; + case X86::MOVZX64rm16: + OutMI.setOpcode(X86::MOVZX32rm16); + lower_subreg32(&OutMI, 0); + break; + } +} + + + +void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { + X86MCInstLower MCInstLowering(OutContext, Mang, *this); + switch (MI->getOpcode()) { + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + case TargetInstrInfo::GC_LABEL: + printLabel(MI); + return; + case TargetInstrInfo::INLINEASM: + O << '\t'; + printInlineAsm(MI); + return; + case TargetInstrInfo::IMPLICIT_DEF: + printImplicitDef(MI); + return; + case TargetInstrInfo::KILL: + return; + case X86::MOVPC32r: { + MCInst TmpInst; + // This is a pseudo op for a two instruction sequence with a label, which + // looks like: + // call "L1$pb" + // "L1$pb": + // popl %esi + + // Emit the call. + MCSymbol *PICBase = MCInstLowering.GetPICBaseSymbol(); + TmpInst.setOpcode(X86::CALLpcrel32); + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase, + OutContext))); + printMCInst(&TmpInst); + O << '\n'; + + // Emit the label. + OutStreamer.EmitLabel(PICBase); + + // popl $reg + TmpInst.setOpcode(X86::POP32r); + TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg()); + printMCInst(&TmpInst); + return; + } + + case X86::ADD32ri: { + // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. + if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) + break; + + // Okay, we have something like: + // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) + + // For this, we want to print something like: + // MYGLOBAL + (. - PICBASE) + // However, we can't generate a ".", so just emit a new label here and refer + // to it. We know that this operand flag occurs at most once per function. + SmallString<64> Name; + raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() + << "picbaseref" << getFunctionNumber(); + MCSymbol *DotSym = OutContext.GetOrCreateSymbol(Name.str()); + OutStreamer.EmitLabel(DotSym); + + // Now that we have emitted the label, lower the complex operand expression. + MCSymbol *OpSym = MCInstLowering.GetExternalSymbolSymbol(MI->getOperand(2)); + + const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); + const MCExpr *PICBase = + MCSymbolRefExpr::Create(MCInstLowering.GetPICBaseSymbol(), OutContext); + DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); + + DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), + DotExpr, OutContext); + + MCInst TmpInst; + TmpInst.setOpcode(X86::ADD32ri); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + TmpInst.addOperand(MCOperand::CreateExpr(DotExpr)); + printMCInst(&TmpInst); + return; + } + } + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + + printMCInst(&TmpInst); +} + diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h new file mode 100644 index 0000000000000..fa25b906d5437 --- /dev/null +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -0,0 +1,54 @@ +//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_MCINSTLOWER_H +#define X86_MCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + class MCContext; + class MCInst; + class MCOperand; + class MCSymbol; + class MachineInstr; + class MachineModuleInfoMachO; + class MachineOperand; + class Mangler; + class X86AsmPrinter; + class X86Subtarget; + +/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. +class VISIBILITY_HIDDEN X86MCInstLower { + MCContext &Ctx; + Mangler *Mang; + X86AsmPrinter &AsmPrinter; + + const X86Subtarget &getSubtarget() const; +public: + X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter) + : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {} + + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCSymbol *GetPICBaseSymbol() const; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + +private: + MachineModuleInfoMachO &getMachOMMI() const; +}; + +} + +#endif diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 7ea0e5170d27a..3ad65fbedc54a 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -7,13 +7,15 @@ tablegen(X86GenInstrNames.inc -gen-instr-enums) tablegen(X86GenInstrInfo.inc -gen-instr-desc) tablegen(X86GenAsmWriter.inc -gen-asm-writer) tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) +tablegen(X86GenAsmMatcher.inc -gen-asm-matcher) tablegen(X86GenDAGISel.inc -gen-dag-isel) tablegen(X86GenFastISel.inc -gen-fast-isel) tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) -add_llvm_target(X86CodeGen +set(sources X86CodeEmitter.cpp + X86COFFMachineModuleInfo.cpp X86ELFWriterInfo.cpp X86FloatingPoint.cpp X86FloatingPointRegKill.cpp @@ -21,11 +23,19 @@ add_llvm_target(X86CodeGen X86ISelLowering.cpp X86InstrInfo.cpp X86JITInfo.cpp + X86MCAsmInfo.cpp X86RegisterInfo.cpp X86Subtarget.cpp - X86TargetAsmInfo.cpp X86TargetMachine.cpp + X86TargetObjectFile.cpp X86FastISel.cpp ) +if( CMAKE_CL_64 ) + enable_language(ASM_MASM) + set(sources ${sources} X86CompilationCallback_Win64.asm) +endif() + +add_llvm_target(X86CodeGen ${sources}) + target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG) diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 44f1c5d5a509a..220831d88db3a 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -13,11 +13,11 @@ TARGET = X86 # Make sure that tblgen is run, first thing. BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ X86GenRegisterInfo.inc X86GenInstrNames.inc \ - X86GenInstrInfo.inc X86GenAsmWriter.inc \ + X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \ X86GenAsmWriter1.inc X86GenDAGISel.inc \ X86GenFastISel.inc \ X86GenCallingConv.inc X86GenSubtarget.inc -DIRS = AsmPrinter +DIRS = AsmPrinter AsmParser TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index ad12137c89134..e8f7c5d6dd223 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -249,3 +249,52 @@ lowered return value, and it would free non-C frontends from a complication only required by a C-based ABI. //===---------------------------------------------------------------------===// + +We get a redundant zero extension for code like this: + +int mask[1000]; +int foo(unsigned x) { + if (x < 10) + x = x * 45; + else + x = x * 78; + return mask[x]; +} + +_foo: +LBB1_0: ## entry + cmpl $9, %edi + jbe LBB1_3 ## bb +LBB1_1: ## bb1 + imull $78, %edi, %eax +LBB1_2: ## bb2 + movl %eax, %eax <---- + movq _mask@GOTPCREL(%rip), %rcx + movl (%rcx,%rax,4), %eax + ret +LBB1_3: ## bb + imull $45, %edi, %eax + jmp LBB1_2 ## bb2 + +Before regalloc, we have: + + %reg1025 = IMUL32rri8 %reg1024, 45, %EFLAGS + JMP mbb + Successors according to CFG: 0x203afb0 (#3) + +bb1: 0x203af60, LLVM BB @0x1e02310, ID#2: + Predecessors according to CFG: 0x203aec0 (#0) + %reg1026 = IMUL32rri8 %reg1024, 78, %EFLAGS + Successors according to CFG: 0x203afb0 (#3) + +bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3: + Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2) + %reg1027 = PHI %reg1025, mbb, + %reg1026, mbb + %reg1029 = MOVZX64rr32 %reg1027 + +so we'd have to know that IMUL32rri8 leaves the high word zero extended and to +be able to recognize the zero extend. This could also presumably be implemented +if we have whole-function selectiondags. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 4464878ce2173..046d35ce5b69d 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1932,3 +1932,23 @@ Replacing an icmp+select with a shift should always be considered profitable in instcombine. //===---------------------------------------------------------------------===// + +Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch +properly. + +When the return value is not used (i.e. only care about the value in the +memory), x86 does not have to use add to implement these. Instead, it can use +add, sub, inc, dec instructions with the "lock" prefix. + +This is currently implemented using a bit of instruction selection trick. The +issue is the target independent pattern produces one output and a chain and we +want to map it into one that just output a chain. The current trick is to select +it into a MERGE_VALUES with the first definition being an implicit_def. The +proper solution is to add new ISD opcodes for the no-output variant. DAG +combiner can then transform the node before it gets to target node selection. + +Problem #2 is we are adding a whole bunch of x86 atomic instructions when in +fact these instructions are identical to the non-lock versions. We need a way to +add target specific information to target nodes and have this information +carried over to machine instructions. Asm printer (or JIT) can use this +information to add the "lock" prefix. diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..90be9f58cc73f --- /dev/null +++ b/lib/Target/X86/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86Info + X86TargetInfo.cpp + ) + +add_dependencies(LLVMX86Info X86CodeGenTable_gen) diff --git a/lib/Target/X86/TargetInfo/Makefile b/lib/Target/X86/TargetInfo/Makefile new file mode 100644 index 0000000000000..6677d4bdfde1d --- /dev/null +++ b/lib/Target/X86/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/X86/TargetInfo/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Info + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp new file mode 100644 index 0000000000000..08d4d84f8a8ad --- /dev/null +++ b/lib/Target/X86/TargetInfo/X86TargetInfo.cpp @@ -0,0 +1,23 @@ +//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheX86_32Target, llvm::TheX86_64Target; + +extern "C" void LLVMInitializeX86TargetInfo() { + RegisterTarget + X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above"); + + RegisterTarget + Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64"); +} diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 22de3f6425637..a1671185afb43 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -22,8 +22,10 @@ namespace llvm { class X86TargetMachine; class FunctionPass; class MachineCodeEmitter; +class MCCodeEmitter; class JITCodeEmitter; -class raw_ostream; +class Target; +class formatted_raw_ostream; /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. @@ -42,13 +44,6 @@ FunctionPass *createX86FloatingPointStackifierPass(); /// FunctionPass *createX87FPRegKillInserterPass(); -/// createX86CodePrinterPass - Returns a pass that prints the X86 -/// assembly code for a MachineFunction to the given output stream, -/// using the given target machine description. -/// -FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm, - bool Verbose); - /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified MCE object. @@ -56,6 +51,10 @@ FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, MachineCodeEmitter &MCE); FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createX86ObjectCodeEmitterPass(X86TargetMachine &TM, + ObjectCodeEmitter &OCE); + +MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM); /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically @@ -68,6 +67,8 @@ FunctionPass *createEmitX86CodeToMemory(); /// FunctionPass *createX86MaxStackAlignmentCalculatorPass(); +extern Target TheX86_32Target, TheX86_64Target; + } // End llvm namespace // Defines symbolic names for X86 registers. This defines a mapping from diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 47861d5a67dc9..da467fe6aa725 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -19,12 +19,17 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // X86 Subtarget features. //===----------------------------------------------------------------------===// - + +def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", + "Enable conditional move instructions">; + def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", "Enable MMX instructions">; def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", "Enable SSE instructions", - [FeatureMMX]>; + // SSE codegen depends on cmovs, and all + // SSE1+ processors support them. + [FeatureMMX, FeatureCMOV]>; def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions", [FeatureSSE1]>; @@ -76,8 +81,8 @@ def : Proc<"i586", []>; def : Proc<"pentium", []>; def : Proc<"pentium-mmx", [FeatureMMX]>; def : Proc<"i686", []>; -def : Proc<"pentiumpro", []>; -def : Proc<"pentium2", [FeatureMMX]>; +def : Proc<"pentiumpro", [FeatureCMOV]>; +def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>; def : Proc<"pentium3", [FeatureSSE1]>; def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; @@ -178,21 +183,34 @@ include "X86CallingConv.td" // Assembly Printers //===----------------------------------------------------------------------===// +// Currently the X86 assembly parser only supports ATT syntax. +def ATTAsmParser : AsmParser { + string AsmParserClassName = "ATTAsmParser"; + int Variant = 0; + + // Discard comments in assembly strings. + string CommentDelimiter = "#"; + + // Recognize hard coded registers. + string RegisterPrefix = "%"; +} + // The X86 target supports two different syntaxes for emitting machine code. // This is controlled by the -x86-asm-syntax={att|intel} def ATTAsmWriter : AsmWriter { - string AsmWriterClassName = "ATTAsmPrinter"; + string AsmWriterClassName = "ATTInstPrinter"; int Variant = 0; } def IntelAsmWriter : AsmWriter { - string AsmWriterClassName = "IntelAsmPrinter"; + string AsmWriterClassName = "IntelInstPrinter"; int Variant = 1; } - def X86 : Target { // Information about the instructions... let InstructionSet = X86InstrInfo; + let AssemblyParsers = [ATTAsmParser]; + let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter]; } diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp new file mode 100644 index 0000000000000..01c4fcfa1bfeb --- /dev/null +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -0,0 +1,123 @@ +//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an MMI implementation for X86 COFF (windows) targets. +// +//===----------------------------------------------------------------------===// + +#include "X86COFFMachineModuleInfo.h" +#include "X86MachineFunctionInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Target/TargetData.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) { +} +X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { + +} + +void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F, + const X86MachineFunctionInfo &Val) { + FunctionInfoMap[F] = Val; +} + + + +static X86MachineFunctionInfo calculateFunctionInfo(const Function *F, + const TargetData &TD) { + X86MachineFunctionInfo Info; + uint64_t Size = 0; + + switch (F->getCallingConv()) { + case CallingConv::X86_StdCall: + Info.setDecorationStyle(StdCall); + break; + case CallingConv::X86_FastCall: + Info.setDecorationStyle(FastCall); + break; + default: + return Info; + } + + unsigned argNum = 1; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI, ++argNum) { + const Type* Ty = AI->getType(); + + // 'Dereference' type in case of byval parameter attribute + if (F->paramHasAttr(argNum, Attribute::ByVal)) + Ty = cast(Ty)->getElementType(); + + // Size should be aligned to DWORD boundary + Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4; + } + + // We're not supporting tooooo huge arguments :) + Info.setBytesToPopOnReturn((unsigned int)Size); + return Info; +} + + +/// DecorateCygMingName - Query FunctionInfoMap and use this information for +/// various name decorations for Cygwin and MingW. +void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl &Name, + const GlobalValue *GV, + const TargetData &TD) { + const Function *F = dyn_cast(GV); + if (!F) return; + + // Save function name for later type emission. + if (F->isDeclaration()) + CygMingStubs.insert(StringRef(Name.data(), Name.size())); + + // We don't want to decorate non-stdcall or non-fastcall functions right now + CallingConv::ID CC = F->getCallingConv(); + if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall) + return; + + const X86MachineFunctionInfo *Info; + + FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F); + if (info_item == FunctionInfoMap.end()) { + // Calculate apropriate function info and populate map + FunctionInfoMap[F] = calculateFunctionInfo(F, TD); + Info = &FunctionInfoMap[F]; + } else { + Info = &info_item->second; + } + + if (Info->getDecorationStyle() == None) return; + const FunctionType *FT = F->getFunctionType(); + + // "Pure" variadic functions do not receive @0 suffix. + if (!FT->isVarArg() || FT->getNumParams() == 0 || + (FT->getNumParams() == 1 && F->hasStructRetAttr())) + raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn(); + + if (Info->getDecorationStyle() == FastCall) { + if (Name[0] == '_') + Name[0] = '@'; + else + Name.insert(Name.begin(), '@'); + } +} + +/// DecorateCygMingName - Query FunctionInfoMap and use this information for +/// various name decorations for Cygwin and MingW. +void X86COFFMachineModuleInfo::DecorateCygMingName(std::string &Name, + const GlobalValue *GV, + const TargetData &TD) { + SmallString<128> NameStr(Name.begin(), Name.end()); + DecorateCygMingName(NameStr, GV, TD); + Name.assign(NameStr.begin(), NameStr.end()); +} diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h new file mode 100644 index 0000000000000..afd552563d919 --- /dev/null +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -0,0 +1,67 @@ +//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an MMI implementation for X86 COFF (windows) targets. +// +//===----------------------------------------------------------------------===// + +#ifndef X86COFF_MACHINEMODULEINFO_H +#define X86COFF_MACHINEMODULEINFO_H + +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/StringSet.h" + +namespace llvm { + class X86MachineFunctionInfo; + class TargetData; + +/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation +/// for X86 COFF targets. +class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { + StringSet<> CygMingStubs; + + // We have to propagate some information about MachineFunction to + // AsmPrinter. It's ok, when we're printing the function, since we have + // access to MachineFunction and can get the appropriate MachineFunctionInfo. + // Unfortunately, this is not possible when we're printing reference to + // Function (e.g. calling it and so on). Even more, there is no way to get the + // corresponding MachineFunctions: it can even be not created at all. That's + // why we should use additional structure, when we're collecting all necessary + // information. + // + // This structure is using e.g. for name decoration for stdcall & fastcall'ed + // function, since we have to use arguments' size for decoration. + typedef std::map FMFInfoMap; + FMFInfoMap FunctionInfoMap; + +public: + X86COFFMachineModuleInfo(const MachineModuleInfo &); + ~X86COFFMachineModuleInfo(); + + + void DecorateCygMingName(std::string &Name, const GlobalValue *GV, + const TargetData &TD); + void DecorateCygMingName(SmallVectorImpl &Name, const GlobalValue *GV, + const TargetData &TD); + + void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val); + + + typedef StringSet<>::const_iterator stub_iterator; + stub_iterator stub_begin() const { return CygMingStubs.begin(); } + stub_iterator stub_end() const { return CygMingStubs.end(); } + + +}; + + + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index e9fcbd5a4895f..d77f0390b10c2 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[ // X86-Win64 C return-value convention. def RetCC_X86_Win64_C : CallingConv<[ // The X86-Win64 calling convention always returns __m64 values in RAX. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[RAX]>>, + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType>, // And FP in XMM0 only. CCIfType<[f32], CCAssignToReg<[XMM0]>>, @@ -137,26 +137,26 @@ def CC_X86_64_C : CallingConv<[ // The 'nest' parameter, if any, is passed in R10. CCIfNest>, + // The first 6 v1i64 vector arguments are passed in GPRs on Darwin. + CCIfType<[v1i64], + CCIfSubtarget<"isTargetDarwin()", + CCBitConvertToType>>, + // The first 6 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>, CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>, - - // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. CCIfType<[v8i8, v4i16, v2i32, v2f32], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasSSE2()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>>, + CCPromoteToType>>>, - // The first 8 v1i64 vector arguments are passed in GPRs on Darwin. - CCIfType<[v1i64], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>, + // The first 8 FP/Vector arguments are passed in XMM registers. + CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCIfSubtarget<"hasSSE1()", + CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. @@ -184,6 +184,13 @@ def CC_X86_Win64_C : CallingConv<[ // The 'nest' parameter, if any, is passed in R10. CCIfNest>, + // 128 bit vectors are passed by pointer + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect>, + + // The first 4 MMX vector arguments are passed in GPRs. + CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], + CCBitConvertToType>, + // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], [XMM0, XMM1, XMM2, XMM3]>>, @@ -195,24 +202,16 @@ def CC_X86_Win64_C : CallingConv<[ CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3], [RCX , RDX , R8 , R9 ]>>, - // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], - CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ], - [XMM0, XMM1, XMM2, XMM3]>>, - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 16-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>, + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, // Long doubles get stack slots whose size and alignment depends on the // subtarget. CCIfType<[f80], CCAssignToStack<0, 0>>, - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, - - // __m64 vectors get 8-byte stack slots that are 16-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>> + // __m64 vectors get 8-byte stack slots that are 8-byte aligned. + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index d5846a049afbc..f942f3f851070 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -22,21 +22,27 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/ObjectCodeEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" #include "llvm/ADT/Statistic.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { -template + template class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass { const X86InstrInfo *II; const TargetData *TD; @@ -67,6 +73,7 @@ template const TargetInstrDesc *Desc); void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -83,7 +90,7 @@ template intptr_t PCAdj = 0); void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, - intptr_t PCAdj = 0); + intptr_t Adj = 0, bool IsPCRel = true); void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); void emitRegModRMByte(unsigned RegOpcodeField); @@ -95,29 +102,27 @@ template intptr_t PCAdj = 0); unsigned getX86RegNum(unsigned RegNo) const; - - bool gvNeedsNonLazyPtr(const GlobalValue *GV); }; template char Emitter::ID = 0; -} +} // end anonymous namespace. /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code /// to the specified templated MachineCodeEmitter object. -namespace llvm { - -FunctionPass *createX86CodeEmitterPass(X86TargetMachine &TM, - MachineCodeEmitter &MCE) { +FunctionPass *llvm::createX86CodeEmitterPass(X86TargetMachine &TM, + MachineCodeEmitter &MCE) { return new Emitter(TM, MCE); } -FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, - JITCodeEmitter &JCE) { +FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, + JITCodeEmitter &JCE) { return new Emitter(TM, JCE); } - -} // end namespace llvm +FunctionPass *llvm::createX86ObjectCodeEmitterPass(X86TargetMachine &TM, + ObjectCodeEmitter &OCE) { + return new Emitter(TM, OCE); +} template bool Emitter::runOnMachineFunction(MachineFunction &MF) { @@ -130,7 +135,8 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { IsPIC = TM.getRelocationModel() == Reloc::PIC_; do { - DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n"; + DEBUG(errs() << "JITTing function '" + << MF.getFunction()->getName() << "'\n"); MCE.startFunction(MF); for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E; ++MBB) { @@ -172,7 +178,7 @@ void Emitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t PCAdj /* = 0 */, bool NeedStub /* = false */, bool Indirect /* = false */) { - intptr_t RelocCST = 0; + intptr_t RelocCST = Disp; if (Reloc == X86::reloc_picrel_word) RelocCST = PICBaseOffset; else if (Reloc == X86::reloc_pcrel_word) @@ -291,53 +297,61 @@ static bool isDisp8(int Value) { return Value == (signed char)Value; } -template -bool Emitter::gvNeedsNonLazyPtr(const GlobalValue *GV) { - // For Darwin, simulate the linktime GOT by using the same non-lazy-pointer +static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp, + const TargetMachine &TM) { + // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer // mechanism as 32-bit mode. - return (!Is64BitMode || TM.getSubtarget().isTargetDarwin()) && - TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); + if (TM.getSubtarget().is64Bit() && + !TM.getSubtarget().isTargetDarwin()) + return false; + + // Return true if this is a reference to a stub containing the address of the + // global, not the global itself. + return isGlobalStubReference(GVOp.getTargetFlags()); } template void Emitter::emitDisplacementField(const MachineOperand *RelocOp, - int DispVal, intptr_t PCAdj) { + int DispVal, + intptr_t Adj /* = 0 */, + bool IsPCRel /* = true */) { // If this is a simple integer displacement that doesn't require a relocation, // emit it now. if (!RelocOp) { emitConstant(DispVal, 4); return; } - + // Otherwise, this is something that requires a relocation. Emit it as such // now. + unsigned RelocType = Is64BitMode ? + (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); if (RelocOp->isGlobal()) { // In 64-bit static small code model, we could potentially emit absolute. - // But it's probably not beneficial. + // But it's probably not beneficial. If the MCE supports using RIP directly + // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); bool NeedStub = isa(RelocOp->getGlobal()); - bool Indirect = gvNeedsNonLazyPtr(RelocOp->getGlobal()); - emitGlobalAddress(RelocOp->getGlobal(), rt, RelocOp->getOffset(), - PCAdj, NeedStub, Indirect); + bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); + emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), + Adj, NeedStub, Indirect); + } else if (RelocOp->isSymbol()) { + emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); } else if (RelocOp->isCPI()) { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word; - emitConstPoolAddress(RelocOp->getIndex(), rt, - RelocOp->getOffset(), PCAdj); - } else if (RelocOp->isJTI()) { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word : X86::reloc_picrel_word; - emitJumpTableAddress(RelocOp->getIndex(), rt, PCAdj); + emitConstPoolAddress(RelocOp->getIndex(), RelocType, + RelocOp->getOffset(), Adj); } else { - assert(0 && "Unknown value to relocate!"); + assert(RelocOp->isJTI() && "Unexpected machine operand!"); + emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj); } } template void Emitter::emitMemModRMByte(const MachineInstr &MI, - unsigned Op, unsigned RegOpcodeField, - intptr_t PCAdj) { + unsigned Op,unsigned RegOpcodeField, + intptr_t PCAdj) { const MachineOperand &Op3 = MI.getOperand(Op+3); int DispVal = 0; const MachineOperand *DispForReloc = 0; @@ -345,15 +359,17 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, // Figure out what sort of displacement we have to handle here. if (Op3.isGlobal()) { DispForReloc = &Op3; + } else if (Op3.isSymbol()) { + DispForReloc = &Op3; } else if (Op3.isCPI()) { - if (Is64BitMode || IsPIC) { + if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { DispForReloc = &Op3; } else { DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex()); DispVal += Op3.getOffset(); } } else if (Op3.isJTI()) { - if (Is64BitMode || IsPIC) { + if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { DispForReloc = &Op3; } else { DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex()); @@ -368,17 +384,23 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, unsigned BaseReg = Base.getReg(); + // Indicate that the displacement will use an pcrel or absolute reference + // by default. MCEs able to resolve addresses on-the-fly use pcrel by default + // while others, unless explicit asked to use RIP, use absolute references. + bool IsPCRel = MCE.earlyResolveAddresses() ? true : false; + // Is a SIB byte needed? + // If no BaseReg, issue a RIP relative instruction only if the MCE can + // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table + // 2-7) and absolute references. if ((!Is64BitMode || DispForReloc || BaseReg != 0) && - IndexReg.getReg() == 0 && - (BaseReg == 0 || BaseReg == X86::RIP || - getX86RegNum(BaseReg) != N86::ESP)) { - if (BaseReg == 0 || - BaseReg == X86::RIP) { // Just a displacement? + IndexReg.getReg() == 0 && + ((BaseReg == 0 && MCE.earlyResolveAddresses()) || BaseReg == X86::RIP || + (BaseReg != 0 && getX86RegNum(BaseReg) != N86::ESP))) { + if (BaseReg == 0 || BaseReg == X86::RIP) { // Just a displacement? // Emit special case [disp32] encoding MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); - - emitDisplacementField(DispForReloc, DispVal, PCAdj); + emitDisplacementField(DispForReloc, DispVal, PCAdj, true); } else { unsigned BaseRegNo = getX86RegNum(BaseReg); if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { @@ -391,7 +413,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, } else { // Emit the most general non-SIB encoding: [REG+disp32] MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); - emitDisplacementField(DispForReloc, DispVal, PCAdj); + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); } } @@ -427,13 +449,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, unsigned SS = SSTable[Scale.getImm()]; if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base. The - // displacement has already been output. + // Handle the SIB byte for the case where there is no base, see Intel + // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) IndexRegNo = getX86RegNum(IndexReg.getReg()); - else - IndexRegNo = 4; // For example [ESP+1*+4] + else // Examples: [ESP+1*+4] or [scaled idx]+disp32 (MOD=0,BASE=5) + IndexRegNo = 4; emitSIBByte(SS, IndexRegNo, 5); } else { unsigned BaseRegNo = getX86RegNum(BaseReg); @@ -449,21 +471,23 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, if (ForceDisp8) { emitConstant(DispVal, 1); } else if (DispVal != 0 || ForceDisp32) { - emitDisplacementField(DispForReloc, DispVal, PCAdj); + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); } } } template -void Emitter::emitInstruction( - const MachineInstr &MI, - const TargetInstrDesc *Desc) { - DOUT << MI; +void Emitter::emitInstruction(const MachineInstr &MI, + const TargetInstrDesc *Desc) { + DEBUG(errs() << MI); + + MCE.processDebugLoc(MI.getDebugLoc(), true); unsigned Opcode = Desc->Opcode; // Emit the lock opcode prefix as needed. - if (Desc->TSFlags & X86II::LOCK) MCE.emitByte(0xF0); + if (Desc->TSFlags & X86II::LOCK) + MCE.emitByte(0xF0); // Emit segment override opcode prefix as needed. switch (Desc->TSFlags & X86II::SegOvrMask) { @@ -473,18 +497,21 @@ void Emitter::emitInstruction( case X86II::GS: MCE.emitByte(0x65); break; - default: assert(0 && "Invalid segment!"); + default: llvm_unreachable("Invalid segment!"); case 0: break; // No segment override! } // Emit the repeat opcode prefix as needed. - if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) MCE.emitByte(0xF3); + if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) + MCE.emitByte(0xF3); // Emit the operand size opcode prefix as needed. - if (Desc->TSFlags & X86II::OpSize) MCE.emitByte(0x66); + if (Desc->TSFlags & X86II::OpSize) + MCE.emitByte(0x66); // Emit the address size opcode prefix as needed. - if (Desc->TSFlags & X86II::AdSize) MCE.emitByte(0x67); + if (Desc->TSFlags & X86II::AdSize) + MCE.emitByte(0x67); bool Need0FPrefix = false; switch (Desc->TSFlags & X86II::Op0Mask) { @@ -493,6 +520,10 @@ void Emitter::emitInstruction( case X86II::TA: // 0F 3A Need0FPrefix = true; break; + case X86II::TF: // F2 0F 38 + MCE.emitByte(0xF2); + Need0FPrefix = true; + break; case X86II::REP: break; // already handled. case X86II::XS: // F3 0F MCE.emitByte(0xF3); @@ -508,14 +539,13 @@ void Emitter::emitInstruction( (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8) >> X86II::Op0Shift)); break; // Two-byte opcode prefix - default: assert(0 && "Invalid prefix!"); + default: llvm_unreachable("Invalid prefix!"); case 0: break; // No prefix! } + // Handle REX prefix. if (Is64BitMode) { - // REX prefix - unsigned REX = X86InstrInfo::determineREX(MI); - if (REX) + if (unsigned REX = X86InstrInfo::determineREX(MI)) MCE.emitByte(0x40 | REX); } @@ -524,7 +554,8 @@ void Emitter::emitInstruction( MCE.emitByte(0x0F); switch (Desc->TSFlags & X86II::Op0Mask) { - case X86II::T8: // 0F 38 + case X86II::TF: // F2 0F 38 + case X86II::T8: // 0F 38 MCE.emitByte(0x38); break; case X86II::TA: // 0F 3A @@ -543,29 +574,29 @@ void Emitter::emitInstruction( unsigned char BaseOpcode = II->getBaseOpcodeFor(Desc); switch (Desc->TSFlags & X86II::FormMask) { - default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + default: + llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); case X86II::Pseudo: // Remember the current PC offset, this is the PIC relocation // base address. switch (Opcode) { default: - assert(0 && "psuedo instructions should be removed before code emission"); + llvm_unreachable("psuedo instructions should be removed before code" + " emission"); break; - case TargetInstrInfo::INLINEASM: { + case TargetInstrInfo::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. - if (MI.getOperand(0).getSymbolName()[0]) { - assert(0 && "JIT does not support inline asm!\n"); - abort(); - } + if (MI.getOperand(0).getSymbolName()[0]) + llvm_report_error("JIT does not support inline asm!"); break; - } case TargetInstrInfo::DBG_LABEL: case TargetInstrInfo::EH_LABEL: + case TargetInstrInfo::GC_LABEL: MCE.emitLabel(MI.getOperand(0).getImm()); break; case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: + case TargetInstrInfo::KILL: case X86::DWARF_LOC: case X86::FP_REG_KILL: break; @@ -582,73 +613,86 @@ void Emitter::emitInstruction( } CurOp = NumOps; break; - case X86II::RawFrm: + case X86II::RawFrm: { MCE.emitByte(BaseOpcode); - if (CurOp != NumOps) { - const MachineOperand &MO = MI.getOperand(CurOp++); - - DOUT << "RawFrm CurOp " << CurOp << "\n"; - DOUT << "isMBB " << MO.isMBB() << "\n"; - DOUT << "isGlobal " << MO.isGlobal() << "\n"; - DOUT << "isSymbol " << MO.isSymbol() << "\n"; - DOUT << "isImm " << MO.isImm() << "\n"; - - if (MO.isMBB()) { - emitPCRelativeBlockAddress(MO.getMBB()); - } else if (MO.isGlobal()) { - // Assume undefined functions may be outside the Small codespace. - bool NeedStub = - (Is64BitMode && - (TM.getCodeModel() == CodeModel::Large || - TM.getSubtarget().isTargetDarwin())) || - Opcode == X86::TAILJMPd; - emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, - MO.getOffset(), 0, NeedStub); - } else if (MO.isSymbol()) { - emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); - } else if (MO.isImm()) { - if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { - // Fix up immediate operand for pc relative calls. - intptr_t Imm = (intptr_t)MO.getImm(); - Imm = Imm - MCE.getCurrentPCValue() - 4; - emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc)); - } else - emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc)); - } else { - assert(0 && "Unknown RawFrm operand!"); - } + if (CurOp == NumOps) + break; + + const MachineOperand &MO = MI.getOperand(CurOp++); + + DEBUG(errs() << "RawFrm CurOp " << CurOp << "\n"); + DEBUG(errs() << "isMBB " << MO.isMBB() << "\n"); + DEBUG(errs() << "isGlobal " << MO.isGlobal() << "\n"); + DEBUG(errs() << "isSymbol " << MO.isSymbol() << "\n"); + DEBUG(errs() << "isImm " << MO.isImm() << "\n"); + + if (MO.isMBB()) { + emitPCRelativeBlockAddress(MO.getMBB()); + break; } + + if (MO.isGlobal()) { + // Assume undefined functions may be outside the Small codespace. + bool NeedStub = + (Is64BitMode && + (TM.getCodeModel() == CodeModel::Large || + TM.getSubtarget().isTargetDarwin())) || + Opcode == X86::TAILJMPd; + emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, + MO.getOffset(), 0, NeedStub); + break; + } + + if (MO.isSymbol()) { + emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); + break; + } + + assert(MO.isImm() && "Unknown RawFrm operand!"); + if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { + // Fix up immediate operand for pc relative calls. + intptr_t Imm = (intptr_t)MO.getImm(); + Imm = Imm - MCE.getCurrentPCValue() - 4; + emitConstant(Imm, X86InstrInfo::sizeOfImm(Desc)); + } else + emitConstant(MO.getImm(), X86InstrInfo::sizeOfImm(Desc)); break; - - case X86II::AddRegFrm: + } + + case X86II::AddRegFrm: { MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg())); - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); - if (MO1.isImm()) - emitConstant(MO1.getImm(), Size); - else { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - // This should not occur on Darwin for relocatable objects. - if (Opcode == X86::MOV64ri) - rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? - if (MO1.isGlobal()) { - bool NeedStub = isa(MO1.getGlobal()); - bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal()); - emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); - } else if (MO1.isSymbol()) - emitExternalSymbolAddress(MO1.getSymbolName(), rt); - else if (MO1.isCPI()) - emitConstPoolAddress(MO1.getIndex(), rt); - else if (MO1.isJTI()) - emitJumpTableAddress(MO1.getIndex(), rt); - } + if (CurOp == NumOps) + break; + + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImm()) { + emitConstant(MO1.getImm(), Size); + break; } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri64i32) + rt = X86::reloc_absolute_word; // FIXME: add X86II flag? + // This should not occur on Darwin for relocatable objects. + if (Opcode == X86::MOV64ri) + rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool NeedStub = isa(MO1.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO1, TM); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + NeedStub, Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); break; + } case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); @@ -656,7 +700,8 @@ void Emitter::emitInstruction( getX86RegNum(MI.getOperand(CurOp+1).getReg())); CurOp += 2; if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc)); + emitConstant(MI.getOperand(CurOp++).getImm(), + X86InstrInfo::sizeOfImm(Desc)); break; } case X86II::MRMDestMem: { @@ -666,7 +711,8 @@ void Emitter::emitInstruction( .getReg())); CurOp += X86AddrNumOperands + 1; if (CurOp != NumOps) - emitConstant(MI.getOperand(CurOp++).getImm(), X86InstrInfo::sizeOfImm(Desc)); + emitConstant(MI.getOperand(CurOp++).getImm(), + X86InstrInfo::sizeOfImm(Desc)); break; } @@ -729,29 +775,31 @@ void Emitter::emitInstruction( (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r); } - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); - if (MO1.isImm()) - emitConstant(MO1.getImm(), Size); - else { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64ri32) - rt = X86::reloc_absolute_word; // FIXME: add X86II flag? - if (MO1.isGlobal()) { - bool NeedStub = isa(MO1.getGlobal()); - bool Indirect = gvNeedsNonLazyPtr(MO1.getGlobal()); - emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); - } else if (MO1.isSymbol()) - emitExternalSymbolAddress(MO1.getSymbolName(), rt); - else if (MO1.isCPI()) - emitConstPoolAddress(MO1.getIndex(), rt); - else if (MO1.isJTI()) - emitJumpTableAddress(MO1.getIndex(), rt); - } + if (CurOp == NumOps) + break; + + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImm()) { + emitConstant(MO1.getImm(), Size); + break; } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri32) + rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool NeedStub = isa(MO1.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO1, TM); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + NeedStub, Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); break; } @@ -768,29 +816,31 @@ void Emitter::emitInstruction( PCAdj); CurOp += X86AddrNumOperands; - if (CurOp != NumOps) { - const MachineOperand &MO = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); - if (MO.isImm()) - emitConstant(MO.getImm(), Size); - else { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64mi32) - rt = X86::reloc_absolute_word; // FIXME: add X86II flag? - if (MO.isGlobal()) { - bool NeedStub = isa(MO.getGlobal()); - bool Indirect = gvNeedsNonLazyPtr(MO.getGlobal()); - emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - NeedStub, Indirect); - } else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), rt); - else if (MO.isCPI()) - emitConstPoolAddress(MO.getIndex(), rt); - else if (MO.isJTI()) - emitJumpTableAddress(MO.getIndex(), rt); - } + if (CurOp == NumOps) + break; + + const MachineOperand &MO = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO.isImm()) { + emitConstant(MO.getImm(), Size); + break; } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64mi32) + rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? + if (MO.isGlobal()) { + bool NeedStub = isa(MO.getGlobal()); + bool Indirect = gvNeedsNonLazyPtr(MO, TM); + emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, + NeedStub, Indirect); + } else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), rt); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), rt); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), rt); break; } @@ -804,10 +854,264 @@ void Emitter::emitInstruction( } if (!Desc->isVariadic() && CurOp != NumOps) { - cerr << "Cannot encode: "; - MI.dump(); - cerr << '\n'; - abort(); +#ifndef NDEBUG + errs() << "Cannot encode all operands of: " << MI << "\n"; +#endif + llvm_unreachable(0); + } + + MCE.processDebugLoc(MI.getDebugLoc(), false); +} + +// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter. +// +// FIXME: This is a total hack designed to allow work on llvm-mc to proceed +// without being blocked on various cleanups needed to support a clean interface +// to instruction encoding. +// +// Look away! + +#include "llvm/DerivedTypes.h" + +namespace { +class MCSingleInstructionCodeEmitter : public MachineCodeEmitter { + uint8_t Data[256]; + +public: + MCSingleInstructionCodeEmitter() { reset(); } + + void reset() { + BufferBegin = Data; + BufferEnd = array_endof(Data); + CurBufferPtr = Data; + } + + StringRef str() { + return StringRef(reinterpret_cast(BufferBegin), + CurBufferPtr - BufferBegin); + } + + virtual void startFunction(MachineFunction &F) {} + virtual bool finishFunction(MachineFunction &F) { return false; } + virtual void emitLabel(uint64_t LabelID) {} + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {} + virtual bool earlyResolveAddresses() const { return false; } + virtual void addRelocation(const MachineRelocation &MR) { } + virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { + return 0; + } + virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { + return 0; + } + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + return 0; + } + virtual uintptr_t getLabelAddress(uint64_t LabelID) const { + return 0; + } + virtual void setModuleInfo(MachineModuleInfo* Info) {} +}; + +class X86MCCodeEmitter : public MCCodeEmitter { + X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + +private: + X86TargetMachine &TM; + llvm::Function *DummyF; + TargetData *DummyTD; + mutable llvm::MachineFunction *DummyMF; + llvm::MachineBasicBlock *DummyMBB; + + MCSingleInstructionCodeEmitter *InstrEmitter; + Emitter *Emit; + +public: + X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) { + // Verily, thou shouldst avert thine eyes. + const llvm::FunctionType *FTy = + FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false); + DummyF = Function::Create(FTy, GlobalValue::InternalLinkage); + DummyTD = new TargetData(""); + DummyMF = new MachineFunction(DummyF, TM); + DummyMBB = DummyMF->CreateMachineBasicBlock(); + + InstrEmitter = new MCSingleInstructionCodeEmitter(); + Emit = new Emitter(TM, *InstrEmitter, + *TM.getInstrInfo(), + *DummyTD, false); + } + ~X86MCCodeEmitter() { + delete Emit; + delete InstrEmitter; + delete DummyMF; + delete DummyF; + } + + bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr, + unsigned Start) const { + if (Start + 1 > MI.getNumOperands()) + return false; + + const MCOperand &Op = MI.getOperand(Start); + if (!Op.isReg()) return false; + + Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false)); + return true; + } + + bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr, + unsigned Start) const { + if (Start + 1 > MI.getNumOperands()) + return false; + + const MCOperand &Op = MI.getOperand(Start); + if (Op.isImm()) { + Instr->addOperand(MachineOperand::CreateImm(Op.getImm())); + return true; + } + if (!Op.isExpr()) + return false; + + const MCExpr *Expr = Op.getExpr(); + if (const MCConstantExpr *CE = dyn_cast(Expr)) { + Instr->addOperand(MachineOperand::CreateImm(CE->getValue())); + return true; + } + + // FIXME: Relocation / fixup. + Instr->addOperand(MachineOperand::CreateImm(0)); + return true; + } + + bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr, + unsigned Start) const { + return (AddRegToInstr(MI, Instr, Start + 0) && + AddImmToInstr(MI, Instr, Start + 1) && + AddRegToInstr(MI, Instr, Start + 2) && + AddImmToInstr(MI, Instr, Start + 3)); + } + + bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr, + unsigned Start) const { + return (AddRegToInstr(MI, Instr, Start + 0) && + AddImmToInstr(MI, Instr, Start + 1) && + AddRegToInstr(MI, Instr, Start + 2) && + AddImmToInstr(MI, Instr, Start + 3) && + AddRegToInstr(MI, Instr, Start + 4)); + } + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS) const { + // Don't look yet! + + // Convert the MCInst to a MachineInstr so we can (ab)use the regular + // emitter. + const X86InstrInfo &II = *TM.getInstrInfo(); + const TargetInstrDesc &Desc = II.get(MI.getOpcode()); + MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc()); + DummyMBB->push_back(Instr); + + unsigned Opcode = MI.getOpcode(); + unsigned NumOps = MI.getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) { + Instr->addOperand(MachineOperand::CreateReg(0, false)); + ++CurOp; + } else if (NumOps > 2 && + Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) + // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 + --NumOps; + + bool OK = true; + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMDestReg: + case X86II::MRMSrcReg: + // Matching doesn't fill this in completely, we have to choose operand 0 + // for a tied register. + OK &= AddRegToInstr(MI, Instr, 0); CurOp++; + OK &= AddRegToInstr(MI, Instr, CurOp++); + if (CurOp < NumOps) + OK &= AddImmToInstr(MI, Instr, CurOp); + break; + + case X86II::RawFrm: + if (CurOp < NumOps) { + // Hack to make branches work. + if (!(Desc.TSFlags & X86II::ImmMask) && + MI.getOperand(0).isExpr() && + isa(MI.getOperand(0).getExpr())) + Instr->addOperand(MachineOperand::CreateMBB(DummyMBB)); + else + OK &= AddImmToInstr(MI, Instr, CurOp); + } + break; + + case X86II::AddRegFrm: + OK &= AddRegToInstr(MI, Instr, CurOp++); + if (CurOp < NumOps) + OK &= AddImmToInstr(MI, Instr, CurOp); + break; + + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + // Matching doesn't fill this in completely, we have to choose operand 0 + // for a tied register. + OK &= AddRegToInstr(MI, Instr, 0); CurOp++; + if (CurOp < NumOps) + OK &= AddImmToInstr(MI, Instr, CurOp); + break; + + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; + if (CurOp < NumOps) + OK &= AddImmToInstr(MI, Instr, CurOp); + break; + + case X86II::MRMSrcMem: + OK &= AddRegToInstr(MI, Instr, CurOp++); + if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + Opcode == X86::LEA16r || Opcode == X86::LEA32r) + OK &= AddLMemToInstr(MI, Instr, CurOp); + else + OK &= AddMemToInstr(MI, Instr, CurOp); + break; + + case X86II::MRMDestMem: + OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5; + OK &= AddRegToInstr(MI, Instr, CurOp); + break; + + default: + case X86II::MRMInitReg: + case X86II::Pseudo: + OK = false; + break; + } + + if (!OK) { + errs() << "couldn't convert inst '"; + MI.dump(); + errs() << "' to machine instr:\n"; + Instr->dump(); + } + + InstrEmitter->reset(); + if (OK) + Emit->emitInstruction(*Instr, &Desc); + OS << InstrEmitter->str(); + + Instr->eraseFromParent(); } +}; } +// Ok, now you can look. +MCCodeEmitter *llvm::createX86MCCodeEmitter(const Target &, + TargetMachine &TM) { + return new X86MCCodeEmitter(static_cast(TM)); +} diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm index 8002f98765f04..f321778db24bb 100644 --- a/lib/Target/X86/X86CompilationCallback_Win64.asm +++ b/lib/Target/X86/X86CompilationCallback_Win64.asm @@ -17,10 +17,11 @@ extrn X86CompilationCallback2: PROC X86CompilationCallback proc push rbp - ; Save RSP + ; Save RSP. mov rbp, rsp ; Save all int arg registers + ; WARNING: We cannot use register spill area - we're generating stubs by hands! push rcx push rdx push r8 @@ -29,27 +30,27 @@ X86CompilationCallback proc ; Align stack on 16-byte boundary. and rsp, -16 - ; Save all XMM arg registers - sub rsp, 64 - movaps [rsp], xmm0 - movaps [rsp+16], xmm1 - movaps [rsp+32], xmm2 - movaps [rsp+48], xmm3 + ; Save all XMM arg registers. Also allocate reg spill area. + sub rsp, 96 + movaps [rsp +32], xmm0 + movaps [rsp+16+32], xmm1 + movaps [rsp+32+32], xmm2 + movaps [rsp+48+32], xmm3 ; JIT callee - ; Pass prev frame and return address + ; Pass prev frame and return address. mov rcx, rbp mov rdx, qword ptr [rbp+8] call X86CompilationCallback2 - ; Restore all XMM arg registers - movaps xmm3, [rsp+48] - movaps xmm2, [rsp+32] - movaps xmm1, [rsp+16] - movaps xmm0, [rsp] + ; Restore all XMM arg registers. + movaps xmm3, [rsp+48+32] + movaps xmm2, [rsp+32+32] + movaps xmm1, [rsp+16+32] + movaps xmm0, [rsp +32] - ; Restore RSP + ; Restore RSP. mov rsp, rbp ; Restore all int arg registers @@ -59,7 +60,7 @@ X86CompilationCallback proc pop rdx pop rcx - ; Restore RBP + ; Restore RBP. pop rbp ret X86CompilationCallback endp diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 912ab0e886f4d..1597d2b31d222 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -14,6 +14,7 @@ #include "X86ELFWriterInfo.h" #include "X86Relocations.h" #include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -38,11 +39,13 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { return R_X86_64_PC32; case X86::reloc_absolute_word: return R_X86_64_32; + case X86::reloc_absolute_word_sext: + return R_X86_64_32S; case X86::reloc_absolute_dword: return R_X86_64_64; case X86::reloc_picrel_word: default: - assert(0 && "unknown relocation type"); + llvm_unreachable("unknown x86_64 machine relocation type"); } } else { switch(MachineRelTy) { @@ -50,23 +53,101 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { return R_386_PC32; case X86::reloc_absolute_word: return R_386_32; + case X86::reloc_absolute_word_sext: case X86::reloc_absolute_dword: case X86::reloc_picrel_word: default: - assert(0 && "unknown relocation type"); + llvm_unreachable("unknown x86 machine relocation type"); } } return 0; } -long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const { +long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier) const { if (is64Bit) { switch(RelTy) { - case R_X86_64_PC32: return -4; - break; + case R_X86_64_PC32: return Modifier - 4; + case R_X86_64_32: + case R_X86_64_32S: + case R_X86_64_64: + return Modifier; default: - assert(0 && "unknown x86 relocation type"); + llvm_unreachable("unknown x86_64 relocation type"); + } + } else { + switch(RelTy) { + case R_386_PC32: return Modifier - 4; + case R_386_32: return Modifier; + default: + llvm_unreachable("unknown x86 relocation type"); + } + } + return 0; +} + +unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const { + if (is64Bit) { + switch(RelTy) { + case R_X86_64_PC32: + case R_X86_64_32: + case R_X86_64_32S: + return 32; + case R_X86_64_64: + return 64; + default: + llvm_unreachable("unknown x86_64 relocation type"); + } + } else { + switch(RelTy) { + case R_386_PC32: + case R_386_32: + return 32; + default: + llvm_unreachable("unknown x86 relocation type"); } } return 0; } + +bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { + if (is64Bit) { + switch(RelTy) { + case R_X86_64_PC32: + return true; + case R_X86_64_32: + case R_X86_64_32S: + case R_X86_64_64: + return false; + default: + llvm_unreachable("unknown x86_64 relocation type"); + } + } else { + switch(RelTy) { + case R_386_PC32: + return true; + case R_386_32: + return false; + default: + llvm_unreachable("unknown x86 relocation type"); + } + } + return 0; +} + +unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const { + return is64Bit ? + X86::reloc_absolute_dword : X86::reloc_absolute_word; +} + +long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset, + unsigned RelOffset, + unsigned RelTy) const { + + if (RelTy == R_X86_64_PC32 || RelTy == R_386_PC32) + return SymOffset - (RelOffset + 4); + else + assert("computeRelocation unknown for this relocation type"); + + return 0; +} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 2ba1a0bd70a23..342e6e627d267 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -49,9 +49,26 @@ namespace llvm { /// ELF relocation entry. virtual bool hasRelocationAddend() const { return is64Bit ? true : false; } - /// getAddendForRelTy - Gets the addend value for an ELF relocation entry - /// based on the target relocation type - virtual long int getAddendForRelTy(unsigned RelTy) const; + /// getDefaultAddendForRelTy - Gets the default addend value for a + /// relocation entry based on the target ELF relocation type. + virtual long int getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier = 0) const; + + /// getRelTySize - Returns the size of relocatable field in bits + virtual unsigned getRelocationTySize(unsigned RelTy) const; + + /// isPCRelativeRel - True if the relocation type is pc relative + virtual bool isPCRelativeRel(unsigned RelTy) const; + + /// getJumpTableRelocationTy - Returns the machine relocation type used + /// to reference a jumptable. + virtual unsigned getAbsoluteLabelMachineRelTy() const; + + /// computeRelocation - Some relocatable fields could be relocated + /// directly, avoiding the relocation symbol emission, compute the + /// final relocation value for this symbol. + virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, + unsigned RelTy) const; }; } // end llvm namespace diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b336d780c505d..3401df0c9092c 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -78,19 +79,20 @@ public: #include "X86GenFastISel.inc" private: - bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT); + bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT); - bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR); + bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(MVT VT, Value *Val, + bool X86FastEmitStore(EVT VT, Value *Val, const X86AddressMode &AM); - bool X86FastEmitStore(MVT VT, unsigned Val, + bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); - bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT, + bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); - bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall); + bool X86SelectAddress(Value *V, X86AddressMode &AM); + bool X86SelectCallAddress(Value *V, X86AddressMode &AM); bool X86SelectLoad(Instruction *I); @@ -116,7 +118,7 @@ private: bool X86VisitIntrinsicCall(IntrinsicInst &I); bool X86SelectCall(Instruction *I); - CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false); + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); const X86InstrInfo *getInstrInfo() const { return getTargetMachine()->getInstrInfo(); @@ -131,17 +133,17 @@ private: /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. - bool isScalarFPTypeInSSEReg(MVT VT) const { + bool isScalarFPTypeInSSEReg(EVT VT) const { return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } - bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); + bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false); }; } // end anonymous namespace. -bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { +bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -167,7 +169,8 @@ bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { /// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling /// convention. -CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { +CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, + bool isTaillCall) { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; @@ -186,13 +189,14 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. -bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM, +bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &ResultReg) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: return false; + case MVT::i1: case MVT::i8: Opc = X86::MOV8rm; RC = X86::GR8RegisterClass; @@ -243,13 +247,21 @@ bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM, /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool -X86FastISel::X86FastEmitStore(MVT VT, unsigned Val, +X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { case MVT::f80: // No f80 support yet. default: return false; + case MVT::i1: { + // Mask out all but lowest bit. + unsigned AndResult = createResultReg(X86::GR8RegisterClass); + BuildMI(MBB, DL, + TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); + Val = AndResult; + } + // FALLTHROUGH, handling i1 as i8. case MVT::i8: Opc = X86::MOV8mr; break; case MVT::i16: Opc = X86::MOV16mr; break; case MVT::i32: Opc = X86::MOV32mr; break; @@ -266,17 +278,19 @@ X86FastISel::X86FastEmitStore(MVT VT, unsigned Val, return true; } -bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, +bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val, const X86AddressMode &AM) { // Handle 'null' like i32/i64 0. if (isa(Val)) - Val = Constant::getNullValue(TD.getIntPtrType()); + Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); // If this is a store of a simple constant, fold the constant into the store. if (ConstantInt *CI = dyn_cast(Val)) { unsigned Opc = 0; - switch (VT.getSimpleVT()) { + bool Signed = true; + switch (VT.getSimpleVT().SimpleTy) { default: break; + case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. case MVT::i8: Opc = X86::MOV8mi; break; case MVT::i16: Opc = X86::MOV16mi; break; case MVT::i32: Opc = X86::MOV32mi; break; @@ -289,7 +303,8 @@ bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, if (Opc) { addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) - .addImm(CI->getSExtValue()); + .addImm(Signed ? CI->getSExtValue() : + CI->getZExtValue()); return true; } } @@ -304,8 +319,8 @@ bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. /// ISD::SIGN_EXTEND). -bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, - unsigned Src, MVT SrcVT, +bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, + unsigned Src, EVT SrcVT, unsigned &ResultReg) { unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); @@ -318,7 +333,7 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, /// X86SelectAddress - Attempt to fill in an address from the given value. /// -bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { +bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { User *U = NULL; unsigned Opcode = Instruction::UserOp1; if (Instruction *I = dyn_cast(V)) { @@ -333,22 +348,21 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { default: break; case Instruction::BitCast: // Look past bitcasts. - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); break; case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); break; case Instruction::Alloca: { - if (isCall) break; // Do static allocas. const AllocaInst *A = cast(V); DenseMap::iterator SI = StaticAllocaMap.find(A); @@ -361,21 +375,19 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { } case Instruction::Add: { - if (isCall) break; // Adds of constants are common and easy enough. if (ConstantInt *CI = dyn_cast(U->getOperand(1))) { uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); // They have to fit in the 32-bit signed displacement field though. if (isInt32(Disp)) { AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); } } break; } case Instruction::GetElementPtr: { - if (isCall) break; // Pattern-match simple GEPs. uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; @@ -416,7 +428,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM, isCall); + return X86SelectAddress(U->getOperand(0), AM); unsupported_gep: // Ok, the GEP indices weren't all covered. break; @@ -426,8 +438,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { // Handle constant address. if (GlobalValue *GV = dyn_cast(V)) { // Can't handle alternate code models yet. - if (TM.getCodeModel() != CodeModel::Default && - TM.getCodeModel() != CodeModel::Small) + if (TM.getCodeModel() != CodeModel::Small) return false; // RIP-relative addresses can't have additional register operands. @@ -440,63 +451,149 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { if (GVar->isThreadLocal()) return false; - // Set up the basic address. + // Okay, we've committed to selecting this global. Set up the basic address. AM.GV = GV; - if (!isCall && - TM.getRelocationModel() == Reloc::PIC_ && - !Subtarget->is64Bit()) - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); + // Allow the subtarget to classify the global. + unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); - // Emit an extra load if the ABI requires it. - if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) { - // Check to see if we've already materialized this - // value in a register in this block. - DenseMap::iterator I = LocalValueMap.find(V); - if (I != LocalValueMap.end() && I->second != 0) { - AM.Base.Reg = I->second; - AM.GV = 0; - return true; + // If this reference is relative to the pic base, set it now. + if (isGlobalRelativeToPICBase(GVFlags)) { + // FIXME: How do we know Base.Reg is free?? + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); + } + + // Unless the ABI requires an extra load, return a direct reference to + // the global. + if (!isGlobalStubReference(GVFlags)) { + if (Subtarget->isPICStyleRIPRel()) { + // Use rip-relative addressing if we can. Above we verified that the + // base and index registers are unused. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); + AM.Base.Reg = X86::RIP; } - + AM.GVOpFlags = GVFlags; + return true; + } + + // Ok, we need to do a load from a stub. If we've already loaded from this + // stub, reuse the loaded pointer, otherwise emit the load now. + DenseMap::iterator I = LocalValueMap.find(V); + unsigned LoadReg; + if (I != LocalValueMap.end() && I->second != 0) { + LoadReg = I->second; + } else { // Issue load from stub. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; X86AddressMode StubAM; StubAM.Base.Reg = AM.Base.Reg; - StubAM.GV = AM.GV; - - if (TLI.getPointerTy() == MVT::i32) { - Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; - - if (Subtarget->isPICStyleGOT() && - TM.getRelocationModel() == Reloc::PIC_) - StubAM.GVOpFlags = X86II::MO_GOT; - - } else { + StubAM.GV = GV; + StubAM.GVOpFlags = GVFlags; + + if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; - if (TM.getRelocationModel() != Reloc::Static) { - StubAM.GVOpFlags = X86II::MO_GOTPCREL; + if (Subtarget->isPICStyleRIPRel()) StubAM.Base.Reg = X86::RIP; - } + } else { + Opc = X86::MOV32rm; + RC = X86::GR32RegisterClass; } + + LoadReg = createResultReg(RC); + addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM); + + // Prevent loading GV stub multiple times in same MBB. + LocalValueMap[V] = LoadReg; + } + + // Now construct the final address. Note that the Disp, Scale, + // and Index values may already be set here. + AM.Base.Reg = LoadReg; + AM.GV = 0; + return true; + } - unsigned ResultReg = createResultReg(RC); - addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM); + // If all else fails, try to materialize the value in a register. + if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { + if (AM.Base.Reg == 0) { + AM.Base.Reg = getRegForValue(V); + return AM.Base.Reg != 0; + } + if (AM.IndexReg == 0) { + assert(AM.Scale == 1 && "Scale with no index!"); + AM.IndexReg = getRegForValue(V); + return AM.IndexReg != 0; + } + } - // Now construct the final address. Note that the Disp, Scale, - // and Index values may already be set here. - AM.Base.Reg = ResultReg; - AM.GV = 0; + return false; +} - // Prevent loading GV stub multiple times in same MBB. - LocalValueMap[V] = AM.Base.Reg; - } else if (Subtarget->isPICStyleRIPRel()) { - // Use rip-relative addressing if we can. +/// X86SelectCallAddress - Attempt to fill in an address from the given value. +/// +bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { + User *U = NULL; + unsigned Opcode = Instruction::UserOp1; + if (Instruction *I = dyn_cast(V)) { + Opcode = I->getOpcode(); + U = I; + } else if (ConstantExpr *C = dyn_cast(V)) { + Opcode = C->getOpcode(); + U = C; + } + + switch (Opcode) { + default: break; + case Instruction::BitCast: + // Look past bitcasts. + return X86SelectCallAddress(U->getOperand(0), AM); + + case Instruction::IntToPtr: + // Look past no-op inttoptrs. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return X86SelectCallAddress(U->getOperand(0), AM); + break; + + case Instruction::PtrToInt: + // Look past no-op ptrtoints. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return X86SelectCallAddress(U->getOperand(0), AM); + break; + } + + // Handle constant address. + if (GlobalValue *GV = dyn_cast(V)) { + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return false; + + // RIP-relative addresses can't have additional register operands. + if (Subtarget->isPICStyleRIPRel() && + (AM.Base.Reg != 0 || AM.IndexReg != 0)) + return false; + + // Can't handle TLS or DLLImport. + if (GlobalVariable *GVar = dyn_cast(GV)) + if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) + return false; + + // Okay, we've committed to selecting this global. Set up the basic address. + AM.GV = GV; + + // No ABI requires an extra load for anything other than DLLImport, which + // we rejected above. Return a direct reference to the global. + if (Subtarget->isPICStyleRIPRel()) { + // Use rip-relative addressing if we can. Above we verified that the + // base and index registers are unused. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); AM.Base.Reg = X86::RIP; + } else if (Subtarget->isPICStyleStubPIC()) { + AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; + } else if (Subtarget->isPICStyleGOT()) { + AM.GVOpFlags = X86II::MO_GOTOFF; } return true; @@ -518,14 +615,15 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { return false; } + /// X86SelectStore - Select and emit code to implement store instructions. bool X86FastISel::X86SelectStore(Instruction* I) { - MVT VT; - if (!isTypeLegal(I->getOperand(0)->getType(), VT)) + EVT VT; + if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; X86AddressMode AM; - if (!X86SelectAddress(I->getOperand(1), AM, false)) + if (!X86SelectAddress(I->getOperand(1), AM)) return false; return X86FastEmitStore(VT, I->getOperand(0), AM); @@ -534,12 +632,12 @@ bool X86FastISel::X86SelectStore(Instruction* I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(Instruction *I) { - MVT VT; - if (!isTypeLegal(I->getType(), VT)) + EVT VT; + if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) return false; X86AddressMode AM; - if (!X86SelectAddress(I->getOperand(0), AM, false)) + if (!X86SelectAddress(I->getOperand(0), AM)) return false; unsigned ResultReg = 0; @@ -550,8 +648,8 @@ bool X86FastISel::X86SelectLoad(Instruction *I) { return false; } -static unsigned X86ChooseCmpOpcode(MVT VT) { - switch (VT.getSimpleVT()) { +static unsigned X86ChooseCmpOpcode(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { default: return 0; case MVT::i8: return X86::CMP8rr; case MVT::i16: return X86::CMP16rr; @@ -565,8 +663,8 @@ static unsigned X86ChooseCmpOpcode(MVT VT) { /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS /// of the comparison, return an opcode that works for the compare (e.g. /// CMP32ri) otherwise return 0. -static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) { - switch (VT.getSimpleVT()) { +static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) { + switch (VT.getSimpleVT().SimpleTy) { // Otherwise, we can't fold the immediate into this comparison. default: return 0; case MVT::i8: return X86::CMP8ri; @@ -581,13 +679,13 @@ static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) { } } -bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) { +bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) { unsigned Op0Reg = getRegForValue(Op0); if (Op0Reg == 0) return false; // Handle 'null' like i32/i64 0. if (isa(Op1)) - Op1 = Constant::getNullValue(TD.getIntPtrType()); + Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use @@ -613,7 +711,7 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) { bool X86FastISel::X86SelectCmp(Instruction *I) { CmpInst *CI = cast(I); - MVT VT; + EVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT)) return false; @@ -688,8 +786,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { bool X86FastISel::X86SelectZExt(Instruction *I) { // Handle zero-extension from i1 to i8, which is common. - if (I->getType() == Type::Int8Ty && - I->getOperand(0)->getType() == Type::Int1Ty) { + if (I->getType() == Type::getInt8Ty(I->getContext()) && + I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) { unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. @@ -713,7 +811,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { // Fold the common case of a conditional branch with a comparison. if (CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse()) { - MVT VT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); // Try to take advantage of fallthrough opportunities. CmpInst::Predicate Predicate = CI->getPredicate(); @@ -850,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { bool X86FastISel::X86SelectShift(Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; - if (I->getType() == Type::Int8Ty) { + if (I->getType() == Type::getInt8Ty(I->getContext())) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { @@ -859,7 +957,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; default: return false; } - } else if (I->getType() == Type::Int16Ty) { + } else if (I->getType() == Type::getInt16Ty(I->getContext())) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -868,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; default: return false; } - } else if (I->getType() == Type::Int32Ty) { + } else if (I->getType() == Type::getInt32Ty(I->getContext())) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { @@ -877,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; default: return false; } - } else if (I->getType() == Type::Int64Ty) { + } else if (I->getType() == Type::getInt64Ty(I->getContext())) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { @@ -890,7 +988,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { return false; } - MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); + EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) return false; @@ -924,7 +1022,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { } bool X86FastISel::X86SelectSelect(Instruction *I) { - MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); + EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) return false; @@ -959,9 +1057,10 @@ bool X86FastISel::X86SelectSelect(Instruction *I) { bool X86FastISel::X86SelectFPExt(Instruction *I) { // fpext from float to double. - if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) { + if (Subtarget->hasSSE2() && + I->getType()->isDoubleTy()) { Value *V = I->getOperand(0); - if (V->getType() == Type::FloatTy) { + if (V->getType()->isFloatTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR64RegisterClass); @@ -976,9 +1075,9 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) { bool X86FastISel::X86SelectFPTrunc(Instruction *I) { if (Subtarget->hasSSE2()) { - if (I->getType() == Type::FloatTy) { + if (I->getType()->isFloatTy()) { Value *V = I->getOperand(0); - if (V->getType() == Type::DoubleTy) { + if (V->getType()->isDoubleTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR32RegisterClass); @@ -996,8 +1095,8 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) { if (Subtarget->is64Bit()) // All other cases should be handled by the tblgen generated code. return false; - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); // This code only handles truncation to byte right now. if (DstVT != MVT::i8 && DstVT != MVT::i1) @@ -1065,7 +1164,7 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { const Type *RetTy = cast(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); - MVT VT; + EVT VT; if (!isTypeLegal(RetTy, VT)) return false; @@ -1125,7 +1224,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Handle only C and fastcc calling conventions for now. CallSite CS(CI); - unsigned CC = CS.getCallingConv(); + CallingConv::ID CC = CS.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && CC != CallingConv::X86_FastCall) @@ -1144,8 +1243,8 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Handle *simple* calls for now. const Type *RetTy = CS.getType(); - MVT RetVT; - if (RetTy == Type::VoidTy) + EVT RetVT; + if (RetTy->isVoidTy()) RetVT = MVT::isVoid; else if (!isTypeLegal(RetTy, RetVT, true)) return false; @@ -1153,7 +1252,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Materialize callee address in a register. FIXME: GV address can be // handled with a CALLpcrel32 instead. X86AddressMode CalleeAM; - if (!X86SelectAddress(Callee, CalleeAM, true)) + if (!X86SelectCallAddress(Callee, CalleeAM)) return false; unsigned CalleeOp = 0; GlobalValue *GV = 0; @@ -1174,7 +1273,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Deal with call operands first. SmallVector ArgVals; SmallVector Args; - SmallVector ArgVTs; + SmallVector ArgVTs; SmallVector ArgFlags; Args.reserve(CS.arg_size()); ArgVals.reserve(CS.arg_size()); @@ -1200,7 +1299,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { return false; const Type *ArgTy = (*i)->getType(); - MVT ArgVT; + EVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); @@ -1214,7 +1313,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, false, TM, ArgLocs); + CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. @@ -1230,11 +1329,11 @@ bool X86FastISel::X86SelectCall(Instruction *I) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; unsigned Arg = Args[VA.getValNo()]; - MVT ArgVT = ArgVTs[VA.getValNo()]; + EVT ArgVT = ArgVTs[VA.getValNo()]; // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: { bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), @@ -1266,6 +1365,14 @@ bool X86FastISel::X86SelectCall(Instruction *I) { ArgVT = VA.getLocVT(); break; } + case CCValAssign::BCvt: { + unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), + ISD::BIT_CONVERT, Arg); + assert(BC != 0 && "Failed to emit a bitcast!"); + Arg = BC; + ArgVT = VA.getLocVT(); + break; + } } if (VA.isRegLoc()) { @@ -1294,28 +1401,53 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. - if (!Subtarget->is64Bit() && - TM.getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) { + if (Subtarget->isPICStyleGOT()) { TargetRegisterClass *RC = X86::GR32RegisterClass; unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; Emitted = true; } - + // Issue the call. - unsigned CallOpc = CalleeOp - ? (Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r) - : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32); - MachineInstrBuilder MIB = CalleeOp - ? BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp) - : BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV); + MachineInstrBuilder MIB; + if (CalleeOp) { + // Register-indirect call. + unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; + MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp); + + } else { + // Direct call. + assert(GV && "Not a direct call"); + unsigned CallOpc = + Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; + + // See if we need any target-specific flags on the GV operand. + unsigned char OpFlags = 0; + + // On ELF targets, in both X86-64 and X86-32 mode, direct calls to + // external symbols most go through the PLT in PIC mode. If the symbol + // has hidden or protected visibility, or if it is static or local, then + // we don't need to use the PLT - we can directly call it. + if (Subtarget->isTargetELF() && + TM.getRelocationModel() == Reloc::PIC_ && + GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { + OpFlags = X86II::MO_PLT; + } else if (Subtarget->isPICStyleStubAny() && + (GV->isDeclaration() || GV->isWeakForLinker()) && + Subtarget->getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = X86II::MO_DARWIN_STUB; + } + + + MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags); + } // Add an implicit use GOT pointer in EBX. - if (!Subtarget->is64Bit() && - TM.getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) + if (Subtarget->isPICStyleGOT()) MIB.addReg(X86::EBX); // Add implicit physical register uses to the call. @@ -1327,14 +1459,14 @@ bool X86FastISel::X86SelectCall(Instruction *I) { BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); // Now handle call return value (if any). - if (RetVT.getSimpleVT() != MVT::isVoid) { + if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { SmallVector RVLocs; - CCState CCInfo(CC, false, TM, RVLocs); + CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); // Copy all of the result registers out of their specified physreg. assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); - MVT CopyVT = RVLocs[0].getValVT(); + EVT CopyVT = RVLocs[0].getValVT(); TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); TargetRegisterClass *SrcRC = DstRC; @@ -1358,7 +1490,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { // Round the F80 the right size, which also moves to the appropriate xmm // register. This is accomplished by storing the F80 value in memory and // then loading it back. Ewww... - MVT ResVT = RVLocs[0].getValVT(); + EVT ResVT = RVLocs[0].getValVT(); unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; int FI = MFI.CreateStackObject(MemSize, MemSize); @@ -1418,8 +1550,8 @@ X86FastISel::TargetSelectInstruction(Instruction *I) { return X86SelectExtractValue(I); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { - MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(I->getType()); + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(I->getType()); if (DstVT.bitsGT(SrcVT)) return X86SelectZExt(I); if (DstVT.bitsLT(SrcVT)) @@ -1435,14 +1567,14 @@ X86FastISel::TargetSelectInstruction(Instruction *I) { } unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { - MVT VT; + EVT VT; if (!isTypeLegal(C->getType(), VT)) return false; // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i8: Opc = X86::MOV8rm; @@ -1487,7 +1619,7 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { // Materialize addresses with LEA instructions. if (isa(C)) { X86AddressMode AM; - if (X86SelectAddress(C, AM, false)) { + if (X86SelectAddress(C, AM)) { if (TLI.getPointerTy() == MVT::i32) Opc = X86::LEA32r; else @@ -1509,16 +1641,15 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; unsigned char OpFlag = 0; - if (TM.getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) { - OpFlag = X86II::MO_PIC_BASE_OFFSET; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); - } else if (Subtarget->isPICStyleGOT()) { - OpFlag = X86II::MO_GOTOFF; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); - } else if (Subtarget->isPICStyleRIPRel() && - TM.getCodeModel() == CodeModel::Small) - PICBase = X86::RIP; + if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic + OpFlag = X86II::MO_PIC_BASE_OFFSET; + PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + } else if (Subtarget->isPICStyleGOT()) { + OpFlag = X86II::MO_GOTOFF; + PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + } else if (Subtarget->isPICStyleRIPRel() && + TM.getCodeModel() == CodeModel::Small) { + PICBase = X86::RIP; } // Create the load from the constant pool. @@ -1542,7 +1673,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) { return 0; X86AddressMode AM; - if (!X86SelectAddress(C, AM, false)) + if (!X86SelectAddress(C, AM)) return 0; unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 37027ee8bebae..d9a05a83b9e56 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -31,19 +31,21 @@ #define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Compiler.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include using namespace llvm; @@ -56,6 +58,7 @@ namespace { FPS() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -73,12 +76,12 @@ namespace { unsigned StackTop; // The current top of the FP stack. void dumpStack() const { - cerr << "Stack contents:"; + errs() << "Stack contents:"; for (unsigned i = 0; i != StackTop; ++i) { - cerr << " FP" << Stack[i]; + errs() << " FP" << Stack[i]; assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); } - cerr << "\n"; + errs() << "\n"; } private: /// isStackEmpty - Return true if the FP stack is empty. @@ -210,6 +213,14 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) { I != E; ++I) Changed |= processBasicBlock(MF, **I); + // Process any unreachable blocks in arbitrary order now. + if (MF.size() == Processed.size()) + return Changed; + + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + if (Processed.insert(BB)) + Changed |= processBasicBlock(MF, *BB); + return Changed; } @@ -236,7 +247,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { PrevMI = prior(I); ++NumFP; // Keep track of # of pseudo instrs - DOUT << "\nFPInst:\t" << *MI; + DEBUG(errs() << "\nFPInst:\t" << *MI); // Get dead variables list now because the MI pointer may be deleted as part // of processing! @@ -255,7 +266,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { case X86II::CompareFP: handleCompareFP(I); break; case X86II::CondMovFP: handleCondMovFP(I); break; case X86II::SpecialFP: handleSpecialFP(I); break; - default: assert(0 && "Unknown FP Type!"); + default: llvm_unreachable("Unknown FP Type!"); } // Check to see if any of the values defined by this instruction are dead @@ -263,7 +274,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { unsigned Reg = DeadRegs[i]; if (Reg >= X86::FP0 && Reg <= X86::FP6) { - DOUT << "Register FP#" << Reg-X86::FP0 << " is dead!\n"; + DEBUG(errs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); freeStackSlotAfter(I, Reg-X86::FP0); } } @@ -272,13 +283,13 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { DEBUG( MachineBasicBlock::iterator PrevI(PrevMI); if (I == PrevI) { - cerr << "Just deleted pseudo instruction\n"; + errs() << "Just deleted pseudo instruction\n"; } else { MachineBasicBlock::iterator Start = I; // Rewind to first instruction newly inserted. while (Start != BB.begin() && prior(Start) != PrevI) --Start; - cerr << "Inserted instructions:\n\t"; - Start->print(*cerr.stream(), &MF.getTarget()); + errs() << "Inserted instructions:\n\t"; + Start->print(errs(), &MF.getTarget()); while (++Start != next(I)) {} } dumpStack(); @@ -945,7 +956,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { - default: assert(0 && "Unknown SpecialFP instruction!"); + default: llvm_unreachable("Unknown SpecialFP instruction!"); case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 009846e2e0b5c..3e0385c79c19e 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -35,6 +35,7 @@ namespace { FPRegKiller() : MachineFunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -117,9 +118,10 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { !ContainsFPCode && SI != E; ++SI) { for (BasicBlock::const_iterator II = SI->begin(); (PN = dyn_cast(II)); ++II) { - if (PN->getType()==Type::X86_FP80Ty || + if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) || (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) || - (!Subtarget.hasSSE2() && PN->getType()==Type::DoubleTy)) { + (!Subtarget.hasSSE2() && + PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) { ContainsFPCode = true; break; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 1336177de249f..5b678fb602dc9 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -35,8 +35,9 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -78,7 +79,8 @@ namespace { X86ISelAddressMode() : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), - Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) { + Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), + SymbolFlags(X86II::MO_NO_FLAG) { } bool hasSymbolicDisplacement() const { @@ -105,23 +107,37 @@ namespace { } void dump() { - cerr << "X86ISelAddressMode " << this << "\n"; - cerr << "Base.Reg "; - if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); - else cerr << "nul"; - cerr << " Base.FrameIndex " << Base.FrameIndex << "\n"; - cerr << " Scale" << Scale << "\n"; - cerr << "IndexReg "; - if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); - else cerr << "nul"; - cerr << " Disp " << Disp << "\n"; - cerr << "GV "; if (GV) GV->dump(); - else cerr << "nul"; - cerr << " CP "; if (CP) CP->dump(); - else cerr << "nul"; - cerr << "\n"; - cerr << "ES "; if (ES) cerr << ES; else cerr << "nul"; - cerr << " JT" << JT << " Align" << Align << "\n"; + errs() << "X86ISelAddressMode " << this << '\n'; + errs() << "Base.Reg "; + if (Base.Reg.getNode() != 0) + Base.Reg.getNode()->dump(); + else + errs() << "nul"; + errs() << " Base.FrameIndex " << Base.FrameIndex << '\n' + << " Scale" << Scale << '\n' + << "IndexReg "; + if (IndexReg.getNode() != 0) + IndexReg.getNode()->dump(); + else + errs() << "nul"; + errs() << " Disp " << Disp << '\n' + << "GV "; + if (GV) + GV->dump(); + else + errs() << "nul"; + errs() << " CP "; + if (CP) + CP->dump(); + else + errs() << "nul"; + errs() << '\n' + << "ES "; + if (ES) + errs() << ES; + else + errs() << "nul"; + errs() << " JT" << JT << " Align" << Align << '\n'; } }; } @@ -140,10 +156,6 @@ namespace { /// make the right decision when generating code for different targets. const X86Subtarget *Subtarget; - /// CurBB - Current BB being isel'd. - /// - MachineBasicBlock *CurBB; - /// OptForSize - If true, selector should try to optimize for code size /// instead of performance. bool OptForSize; @@ -174,12 +186,14 @@ namespace { private: SDNode *Select(SDValue N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); bool MatchLoad(SDValue N, X86ISelAddressMode &AM); bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); - bool MatchAddress(SDValue N, X86ISelAddressMode &AM, - unsigned Depth = 0); + bool MatchAddress(SDValue N, X86ISelAddressMode &AM); + bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -342,13 +356,17 @@ static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, Ops.push_back(Load.getOperand(0)); else Ops.push_back(TF.getOperand(i)); - CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); - CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); - CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), + SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); + SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, + Load.getOperand(1), + Load.getOperand(2)); + CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), Store.getOperand(2), Store.getOperand(3)); } -/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. +/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The +/// chain produced by the load must only be used by the store's chain operand, +/// otherwise this may produce a cycle in the DAG. /// static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, SDValue &Load) { @@ -366,8 +384,9 @@ static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, return false; if (N.hasOneUse() && + LD->hasNUsesOfValue(1, 1) && N.getOperand(1) == Address && - N.getNode()->isOperandOf(Chain.getNode())) { + LD->isOperandOf(Chain.getNode())) { Load = N; return true; } @@ -431,7 +450,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { if (Chain.getOperand(0).getNode() == Callee.getNode()) return true; if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && - Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode())) + Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && + Callee.getValue(1).hasOneUse()) return true; return false; } @@ -583,8 +603,8 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // If the source and destination are SSE registers, then this is a legal // conversion that should not be lowered. - MVT SrcVT = N->getOperand(0).getValueType(); - MVT DstVT = N->getValueType(0); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DstVT = N->getValueType(0); bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); if (SrcIsSSE && DstIsSSE) @@ -602,7 +622,7 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. - MVT MemVT; + EVT MemVT; if (N->getOpcode() == ISD::FP_ROUND) MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. else @@ -635,8 +655,7 @@ void X86DAGToDAGISel::PreprocessForFPConvert() { /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel /// when it has created a SelectionDAG for us to codegen. void X86DAGToDAGISel::InstructionSelect() { - CurBB = BB; // BB can change as result of isel. - const Function *F = CurDAG->getMachineFunction().getFunction(); + const Function *F = MF->getFunction(); OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); DEBUG(BB->dump()); @@ -648,12 +667,12 @@ void X86DAGToDAGISel::InstructionSelect() { // Codegen the basic block. #ifndef NDEBUG - DOUT << "===== Instruction selection begins:\n"; + DEBUG(errs() << "===== Instruction selection begins:\n"); Indent = 0; #endif SelectRoot(*CurDAG); #ifndef NDEBUG - DOUT << "===== Instruction selection ends:\n"; + DEBUG(errs() << "===== Instruction selection ends:\n"); #endif CurDAG->RemoveDeadNodes(); @@ -706,7 +725,7 @@ bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes /// into an addressing mode. These wrap things that will resolve down into a /// symbol reference. If no match is possible, this returns true, otherwise it -/// returns false. +/// returns false. bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // If the addressing mode already has a symbol as the displacement, we can // never match another symbol. @@ -714,28 +733,27 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { return true; SDValue N0 = N.getOperand(0); - + CodeModel::Model M = TM.getCodeModel(); + // Handle X86-64 rip-relative addresses. We check this before checking direct // folding because RIP is preferable to non-RIP accesses. if (Subtarget->is64Bit() && // Under X86-64 non-small code model, GV (and friends) are 64-bits, so // they cannot be folded into immediate fields. // FIXME: This can be improved for kernel and other models? - TM.getCodeModel() == CodeModel::Small && - + (M == CodeModel::Small || M == CodeModel::Kernel) && // Base and index reg must be 0 in order to use %rip as base and lowering // must allow RIP. !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { - if (GlobalAddressSDNode *G = dyn_cast(N0)) { int64_t Offset = AM.Disp + G->getOffset(); - if (!isInt32(Offset)) return true; + if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; AM.GV = G->getGlobal(); AM.Disp = Offset; AM.SymbolFlags = G->getTargetFlags(); } else if (ConstantPoolSDNode *CP = dyn_cast(N0)) { int64_t Offset = AM.Disp + CP->getOffset(); - if (!isInt32(Offset)) return true; + if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; AM.CP = CP->getConstVal(); AM.Align = CP->getAlignment(); AM.Disp = Offset; @@ -748,7 +766,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { AM.JT = J->getIndex(); AM.SymbolFlags = J->getTargetFlags(); } - + if (N.getOpcode() == X86ISD::WrapperRIP) AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); return false; @@ -758,7 +776,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit // mode, this results in a non-RIP-relative computation. if (!Subtarget->is64Bit() || - (TM.getCodeModel() == CodeModel::Small && + ((M == CodeModel::Small || M == CodeModel::Kernel) && TM.getRelocationModel() == Reloc::Static)) { if (GlobalAddressSDNode *G = dyn_cast(N0)) { AM.GV = G->getGlobal(); @@ -786,15 +804,49 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { /// MatchAddress - Add the specified node to the specified addressing mode, /// returning true if it cannot be done. This just pattern matches for the /// addressing mode. -bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, - unsigned Depth) { +bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { + if (MatchAddressRecursively(N, AM, 0)) + return true; + + // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has + // a smaller encoding and avoids a scaled-index. + if (AM.Scale == 2 && + AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base.Reg.getNode() == 0) { + AM.Base.Reg = AM.IndexReg; + AM.Scale = 1; + } + + // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, + // because it has a smaller encoding. + // TODO: Which other code models can use this? + if (TM.getCodeModel() == CodeModel::Small && + Subtarget->is64Bit() && + AM.Scale == 1 && + AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base.Reg.getNode() == 0 && + AM.IndexReg.getNode() == 0 && + AM.SymbolFlags == X86II::MO_NO_FLAG && + AM.hasSymbolicDisplacement()) + AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64); + + return false; +} + +bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); - DOUT << "MatchAddress: "; DEBUG(AM.dump()); + DEBUG({ + errs() << "MatchAddress: "; + AM.dump(); + }); // Limit recursion. if (Depth > 5) return MatchAddressBase(N, AM); - + + CodeModel::Model M = TM.getCodeModel(); + // If this is already a %rip relative address, we can only merge immediates // into it. Instead of handling this in every case, we handle it here. // RIP relative addressing: %rip + 32-bit displacement! @@ -803,10 +855,11 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, // displacements. It isn't very important, but this should be fixed for // consistency. if (!AM.ES && AM.JT != -1) return true; - + if (ConstantSDNode *Cst = dyn_cast(N)) { int64_t Val = AM.Disp + Cst->getSExtValue(); - if (isInt32(Val)) { + if (X86::isOffsetSuitableForCodeModel(Val, M, + AM.hasSymbolicDisplacement())) { AM.Disp = Val; return false; } @@ -818,7 +871,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, default: break; case ISD::Constant: { uint64_t Val = cast(N)->getSExtValue(); - if (!is64Bit || isInt32(AM.Disp + Val)) { + if (!is64Bit || + X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, + AM.hasSymbolicDisplacement())) { AM.Disp += Val; return false; } @@ -857,6 +912,10 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, if (ConstantSDNode *CN = dyn_cast(N.getNode()->getOperand(1))) { unsigned Val = CN->getZExtValue(); + // Note that we handle x<<1 as (,x,2) rather than (x,x) here so + // that the base operand remains free for further matching. If + // the base doesn't end up getting used, a post-processing step + // in MatchAddress turns (,x,2) into (x,x), which is cheaper. if (Val == 1 || Val == 2 || Val == 3) { AM.Scale = 1 << Val; SDValue ShVal = N.getNode()->getOperand(0); @@ -870,7 +929,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, ConstantSDNode *AddVal = cast(ShVal.getNode()->getOperand(1)); uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); - if (!is64Bit || isInt32(Disp)) + if (!is64Bit || + X86::isOffsetSuitableForCodeModel(Disp, M, + AM.hasSymbolicDisplacement())) AM.Disp = Disp; else AM.IndexReg = ShVal; @@ -912,7 +973,9 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, cast(MulVal.getNode()->getOperand(1)); uint64_t Disp = AM.Disp + AddVal->getSExtValue() * CN->getZExtValue(); - if (!is64Bit || isInt32(Disp)) + if (!is64Bit || + X86::isOffsetSuitableForCodeModel(Disp, M, + AM.hasSymbolicDisplacement())) AM.Disp = Disp; else Reg = N.getNode()->getOperand(0); @@ -936,7 +999,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) { + if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { AM = Backup; break; } @@ -998,12 +1061,12 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, case ISD::ADD: { X86ISelAddressMode Backup = AM; - if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) && - !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1)) + if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && + !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) return false; AM = Backup; - if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) && - !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) + if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && + !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) return false; AM = Backup; @@ -1027,11 +1090,13 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, X86ISelAddressMode Backup = AM; uint64_t Offset = CN->getSExtValue(); // Start with the LHS as an addr mode. - if (!MatchAddress(N.getOperand(0), AM, Depth+1) && + if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && // Address could not have picked a GV address for the displacement. AM.GV == NULL && // On x86-64, the resultant disp must fit in 32-bits. - (!is64Bit || isInt32(AM.Disp + Offset)) && + (!is64Bit || + X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, + AM.hasSymbolicDisplacement())) && // Check to see if the LHS & C is zero. CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { AM.Disp += Offset; @@ -1219,7 +1284,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, if (!Done && MatchAddress(N, AM)) return false; - MVT VT = N.getValueType(); + EVT VT = N.getValueType(); if (AM.BaseType == X86ISelAddressMode::RegBase) { if (!AM.Base.Reg.getNode()) AM.Base.Reg = CurDAG->getRegister(0, VT); @@ -1292,7 +1357,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, assert (T == AM.Segment); AM.Segment = Copy; - MVT VT = N.getValueType(); + EVT VT = N.getValueType(); unsigned Complexity = 0; if (AM.BaseType == X86ISelAddressMode::RegBase) if (AM.Base.Reg.getNode()) @@ -1329,12 +1394,13 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) Complexity++; - if (Complexity > 2) { - SDValue Segment; - getAddressOperands(AM, Base, Scale, Index, Disp, Segment); - return true; - } - return false; + // If it isn't worth using an LEA, reject it. + if (Complexity <= 2) + return false; + + SDValue Segment; + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + return true; } /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. @@ -1380,7 +1446,6 @@ bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, /// initialize the global base register, if necessary. /// SDNode *X86DAGToDAGISel::getGlobalBaseReg() { - MachineFunction *MF = CurBB->getParent(); unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } @@ -1400,367 +1465,686 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) return NULL; - SDValue LSI = Node->getOperand(4); // MemOperand - const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain}; - return CurDAG->getTargetNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, Ops, - array_lengthof(Ops)); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(Node)->getMemOperand(); + const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + MVT::i32, MVT::i32, MVT::Other, Ops, + array_lengthof(Ops)); + cast(ResNode)->setMemRefs(MemOp, MemOp + 1); + return ResNode; +} + +SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { + if (Node->hasAnyUseOfValue(0)) + return 0; + + // Optimize common patterns for __sync_add_and_fetch and + // __sync_sub_and_fetch where the result is not used. This allows us + // to use "lock" version of add, sub, inc, dec instructions. + // FIXME: Do not use special instructions but instead add the "lock" + // prefix to the target node somehow. The extra information will then be + // transferred to machine instruction and it denotes the prefix. + SDValue Chain = Node->getOperand(0); + SDValue Ptr = Node->getOperand(1); + SDValue Val = Node->getOperand(2); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) + return 0; + + bool isInc = false, isDec = false, isSub = false, isCN = false; + ConstantSDNode *CN = dyn_cast(Val); + if (CN) { + isCN = true; + int64_t CNVal = CN->getSExtValue(); + if (CNVal == 1) + isInc = true; + else if (CNVal == -1) + isDec = true; + else if (CNVal >= 0) + Val = CurDAG->getTargetConstant(CNVal, NVT); + else { + isSub = true; + Val = CurDAG->getTargetConstant(-CNVal, NVT); + } + } else if (Val.hasOneUse() && + Val.getOpcode() == ISD::SUB && + X86::isZeroNode(Val.getOperand(0))) { + isSub = true; + Val = Val.getOperand(1); + } + + unsigned Opc = 0; + switch (NVT.getSimpleVT().SimpleTy) { + default: return 0; + case MVT::i8: + if (isInc) + Opc = X86::LOCK_INC8m; + else if (isDec) + Opc = X86::LOCK_DEC8m; + else if (isSub) { + if (isCN) + Opc = X86::LOCK_SUB8mi; + else + Opc = X86::LOCK_SUB8mr; + } else { + if (isCN) + Opc = X86::LOCK_ADD8mi; + else + Opc = X86::LOCK_ADD8mr; + } + break; + case MVT::i16: + if (isInc) + Opc = X86::LOCK_INC16m; + else if (isDec) + Opc = X86::LOCK_DEC16m; + else if (isSub) { + if (isCN) { + if (Predicate_i16immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB16mi8; + else + Opc = X86::LOCK_SUB16mi; + } else + Opc = X86::LOCK_SUB16mr; + } else { + if (isCN) { + if (Predicate_i16immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD16mi8; + else + Opc = X86::LOCK_ADD16mi; + } else + Opc = X86::LOCK_ADD16mr; + } + break; + case MVT::i32: + if (isInc) + Opc = X86::LOCK_INC32m; + else if (isDec) + Opc = X86::LOCK_DEC32m; + else if (isSub) { + if (isCN) { + if (Predicate_i32immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB32mi8; + else + Opc = X86::LOCK_SUB32mi; + } else + Opc = X86::LOCK_SUB32mr; + } else { + if (isCN) { + if (Predicate_i32immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD32mi8; + else + Opc = X86::LOCK_ADD32mi; + } else + Opc = X86::LOCK_ADD32mr; + } + break; + case MVT::i64: + if (isInc) + Opc = X86::LOCK_INC64m; + else if (isDec) + Opc = X86::LOCK_DEC64m; + else if (isSub) { + Opc = X86::LOCK_SUB64mr; + if (isCN) { + if (Predicate_i64immSExt8(Val.getNode())) + Opc = X86::LOCK_SUB64mi8; + else if (Predicate_i64immSExt32(Val.getNode())) + Opc = X86::LOCK_SUB64mi32; + } + } else { + Opc = X86::LOCK_ADD64mr; + if (isCN) { + if (Predicate_i64immSExt8(Val.getNode())) + Opc = X86::LOCK_ADD64mi8; + else if (Predicate_i64immSExt32(Val.getNode())) + Opc = X86::LOCK_ADD64mi32; + } + } + break; + } + + DebugLoc dl = Node->getDebugLoc(); + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + dl, NVT), 0); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(Node)->getMemOperand(); + if (isInc || isDec) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; + SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); + cast(Ret)->setMemRefs(MemOp, MemOp + 1); + SDValue RetVals[] = { Undef, Ret }; + return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + } else { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; + SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); + cast(Ret)->setMemRefs(MemOp, MemOp + 1); + SDValue RetVals[] = { Undef, Ret }; + return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + } +} + +/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has +/// any uses which require the SF or OF bits to be accurate. +static bool HasNoSignedComparisonUses(SDNode *N) { + // Examine each user of the node. + for (SDNode::use_iterator UI = N->use_begin(), + UE = N->use_end(); UI != UE; ++UI) { + // Only examine CopyToReg uses. + if (UI->getOpcode() != ISD::CopyToReg) + return false; + // Only examine CopyToReg uses that copy to EFLAGS. + if (cast(UI->getOperand(1))->getReg() != + X86::EFLAGS) + return false; + // Examine each user of the CopyToReg use. + for (SDNode::use_iterator FlagUI = UI->use_begin(), + FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { + // Only examine the Flag result. + if (FlagUI.getUse().getResNo() != 1) continue; + // Anything unusual: assume conservatively. + if (!FlagUI->isMachineOpcode()) return false; + // Examine the opcode of the user. + switch (FlagUI->getMachineOpcode()) { + // These comparisons don't treat the most significant bit specially. + case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: + case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: + case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: + case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: + case X86::JA: case X86::JAE: case X86::JB: case X86::JBE: + case X86::JE: case X86::JNE: case X86::JP: case X86::JNP: + case X86::CMOVA16rr: case X86::CMOVA16rm: + case X86::CMOVA32rr: case X86::CMOVA32rm: + case X86::CMOVA64rr: case X86::CMOVA64rm: + case X86::CMOVAE16rr: case X86::CMOVAE16rm: + case X86::CMOVAE32rr: case X86::CMOVAE32rm: + case X86::CMOVAE64rr: case X86::CMOVAE64rm: + case X86::CMOVB16rr: case X86::CMOVB16rm: + case X86::CMOVB32rr: case X86::CMOVB32rm: + case X86::CMOVB64rr: case X86::CMOVB64rm: + case X86::CMOVBE16rr: case X86::CMOVBE16rm: + case X86::CMOVBE32rr: case X86::CMOVBE32rm: + case X86::CMOVBE64rr: case X86::CMOVBE64rm: + case X86::CMOVE16rr: case X86::CMOVE16rm: + case X86::CMOVE32rr: case X86::CMOVE32rm: + case X86::CMOVE64rr: case X86::CMOVE64rm: + case X86::CMOVNE16rr: case X86::CMOVNE16rm: + case X86::CMOVNE32rr: case X86::CMOVNE32rm: + case X86::CMOVNE64rr: case X86::CMOVNE64rm: + case X86::CMOVNP16rr: case X86::CMOVNP16rm: + case X86::CMOVNP32rr: case X86::CMOVNP32rm: + case X86::CMOVNP64rr: case X86::CMOVNP64rm: + case X86::CMOVP16rr: case X86::CMOVP16rm: + case X86::CMOVP32rr: case X86::CMOVP32rm: + case X86::CMOVP64rr: case X86::CMOVP64rm: + continue; + // Anything else: assume conservatively. + default: return false; + } + } + } + return true; } SDNode *X86DAGToDAGISel::Select(SDValue N) { SDNode *Node = N.getNode(); - MVT NVT = Node->getValueType(0); + EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); #ifndef NDEBUG - DOUT << std::string(Indent, ' ') << "Selecting: "; - DEBUG(Node->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent, ' ') << "Selecting: "; + Node->dump(CurDAG); + errs() << '\n'; + }); Indent += 2; #endif if (Node->isMachineOpcode()) { #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "== "; - DEBUG(Node->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "== "; + Node->dump(CurDAG); + errs() << '\n'; + }); Indent -= 2; #endif return NULL; // Already selected. } switch (Opcode) { - default: break; - case X86ISD::GlobalBaseReg: - return getGlobalBaseReg(); - - case X86ISD::ATOMOR64_DAG: - return SelectAtomic64(Node, X86::ATOMOR6432); - case X86ISD::ATOMXOR64_DAG: - return SelectAtomic64(Node, X86::ATOMXOR6432); - case X86ISD::ATOMADD64_DAG: - return SelectAtomic64(Node, X86::ATOMADD6432); - case X86ISD::ATOMSUB64_DAG: - return SelectAtomic64(Node, X86::ATOMSUB6432); - case X86ISD::ATOMNAND64_DAG: - return SelectAtomic64(Node, X86::ATOMNAND6432); - case X86ISD::ATOMAND64_DAG: - return SelectAtomic64(Node, X86::ATOMAND6432); - case X86ISD::ATOMSWAP64_DAG: - return SelectAtomic64(Node, X86::ATOMSWAP6432); - - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - - bool isSigned = Opcode == ISD::SMUL_LOHI; - if (!isSigned) - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; - case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; - case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; - case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; - } - else - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; - case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; - case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; - case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; - } + default: break; + case X86ISD::GlobalBaseReg: + return getGlobalBaseReg(); + + case X86ISD::ATOMOR64_DAG: + return SelectAtomic64(Node, X86::ATOMOR6432); + case X86ISD::ATOMXOR64_DAG: + return SelectAtomic64(Node, X86::ATOMXOR6432); + case X86ISD::ATOMADD64_DAG: + return SelectAtomic64(Node, X86::ATOMADD6432); + case X86ISD::ATOMSUB64_DAG: + return SelectAtomic64(Node, X86::ATOMSUB6432); + case X86ISD::ATOMNAND64_DAG: + return SelectAtomic64(Node, X86::ATOMNAND6432); + case X86ISD::ATOMAND64_DAG: + return SelectAtomic64(Node, X86::ATOMAND6432); + case X86ISD::ATOMSWAP64_DAG: + return SelectAtomic64(Node, X86::ATOMSWAP6432); + + case ISD::ATOMIC_LOAD_ADD: { + SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); + if (RetVal) + return RetVal; + break; + } - unsigned LoReg, HiReg; - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; - case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; - case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; - case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + bool isSigned = Opcode == ISD::SMUL_LOHI; + if (!isSigned) { + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; + case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; + case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; + case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; } - - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); - // multiplty is commmutative - if (!foldedLoad) { - foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); - if (foldedLoad) - std::swap(N0, N1); + } else { + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; + case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; + case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; + case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; } + } - SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, - N0, SDValue()).getValue(1); - - if (foldedLoad) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), - InFlag }; - SDNode *CNode = - CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); - // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); - } else { - InFlag = - SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); - } + unsigned LoReg, HiReg; + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; + case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; + case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; + case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; + } - // Copy the low half of the result, if it is needed. - if (!N.getValue(0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + // Multiply is commmutative. + if (!foldedLoad) { + foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + if (foldedLoad) + std::swap(N0, N1); + } + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + N0, SDValue()).getValue(1); + + if (foldedLoad) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), + InFlag }; + SDNode *CNode = + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 1); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + } else { + InFlag = + SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + } + + // Copy the low half of the result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + LoReg, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.getNode()->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + Result.getNode()->dump(CurDAG); + errs() << '\n'; + }); #endif + } + // Copy the high half of the result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDValue Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + MVT::i8, Result); + } else { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); } - // Copy the high half of the result, if it is needed. - if (!N.getValue(1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), 0); - // Then truncate it down to i8. - SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); - Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, - MVT::i8, Result, SRIdx), 0); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(N.getValue(1), Result); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.getNode()->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + Result.getNode()->dump(CurDAG); + errs() << '\n'; + }); #endif - } + } #ifndef NDEBUG - Indent -= 2; + Indent -= 2; #endif - return NULL; - } - - case ISD::SDIVREM: - case ISD::UDIVREM: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - - bool isSigned = Opcode == ISD::SDIVREM; - if (!isSigned) - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; - case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; - case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; - case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; - } - else - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; - case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; - case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; - case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; - } + return NULL; + } - unsigned LoReg, HiReg; - unsigned ClrOpcode, SExtOpcode; - switch (NVT.getSimpleVT()) { - default: assert(0 && "Unsupported VT!"); - case MVT::i8: - LoReg = X86::AL; HiReg = X86::AH; - ClrOpcode = 0; - SExtOpcode = X86::CBW; - break; - case MVT::i16: - LoReg = X86::AX; HiReg = X86::DX; - ClrOpcode = X86::MOV16r0; - SExtOpcode = X86::CWD; - break; - case MVT::i32: - LoReg = X86::EAX; HiReg = X86::EDX; - ClrOpcode = X86::MOV32r0; - SExtOpcode = X86::CDQ; - break; - case MVT::i64: - LoReg = X86::RAX; HiReg = X86::RDX; - ClrOpcode = X86::MOV64r0; - SExtOpcode = X86::CQO; - break; + case ISD::SDIVREM: + case ISD::UDIVREM: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + bool isSigned = Opcode == ISD::SDIVREM; + if (!isSigned) { + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; + case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; + case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; + case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; } + } else { + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; + case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; + case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; + case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; + } + } - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; - bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); - bool signBitIsZero = CurDAG->SignBitIsZero(N0); - - SDValue InFlag; - if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { - // Special case for div8, just use a move with zero extension to AX to - // clear the upper 8 bits (AH). - SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; - if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; - Move = - SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16, - MVT::Other, Ops, - array_lengthof(Ops)), 0); - Chain = Move.getValue(1); - ReplaceUses(N0.getValue(1), Chain); - } else { - Move = - SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); - Chain = CurDAG->getEntryNode(); - } - Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); - InFlag = Chain.getValue(1); + unsigned LoReg, HiReg; + unsigned ClrOpcode, SExtOpcode; + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: + LoReg = X86::AL; HiReg = X86::AH; + ClrOpcode = 0; + SExtOpcode = X86::CBW; + break; + case MVT::i16: + LoReg = X86::AX; HiReg = X86::DX; + ClrOpcode = X86::MOV16r0; + SExtOpcode = X86::CWD; + break; + case MVT::i32: + LoReg = X86::EAX; HiReg = X86::EDX; + ClrOpcode = X86::MOV32r0; + SExtOpcode = X86::CDQ; + break; + case MVT::i64: + LoReg = X86::RAX; HiReg = X86::RDX; + ClrOpcode = ~0U; // NOT USED. + SExtOpcode = X86::CQO; + break; + } + + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); + bool signBitIsZero = CurDAG->SignBitIsZero(N0); + + SDValue InFlag; + if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { + // Special case for div8, just use a move with zero extension to AX to + // clear the upper 8 bits (AH). + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; + if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; + Move = + SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, + MVT::Other, Ops, + array_lengthof(Ops)), 0); + Chain = Move.getValue(1); + ReplaceUses(N0.getValue(1), Chain); } else { + Move = + SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); + Chain = CurDAG->getEntryNode(); + } + Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); + InFlag = Chain.getValue(1); + } else { + InFlag = + CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, + LoReg, N0, SDValue()).getValue(1); + if (isSigned && !signBitIsZero) { + // Sign extend the low part into the high part. InFlag = - CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, - LoReg, N0, SDValue()).getValue(1); - if (isSigned && !signBitIsZero) { - // Sign extend the low part into the high part. - InFlag = - SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0); + SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); + } else { + // Zero out the high part, effectively zero extending the input. + SDValue ClrNode; + + if (NVT.getSimpleVT() == MVT::i64) { + ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, MVT::i32), + 0); + // We just did a 32-bit clear, insert it into a 64-bit register to + // clear the whole 64-bit reg. + SDValue Undef = + SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + dl, MVT::i64), 0); + SDValue SubRegNo = + CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); + ClrNode = + SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + MVT::i64, Undef, ClrNode, SubRegNo), + 0); } else { - // Zero out the high part, effectively zero extending the input. - SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT), - 0); - InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, - ClrNode, InFlag).getValue(1); + ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); } - } - if (foldedLoad) { - SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), - InFlag }; - SDNode *CNode = - CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, - array_lengthof(Ops)); - InFlag = SDValue(CNode, 1); - // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); - } else { - InFlag = - SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, + ClrNode, InFlag).getValue(1); } + } - // Copy the division (low) result, if it is needed. - if (!N.getValue(0).use_empty()) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - LoReg, NVT, InFlag); - InFlag = Result.getValue(2); - ReplaceUses(N.getValue(0), Result); + if (foldedLoad) { + SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), + InFlag }; + SDNode *CNode = + CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, + array_lengthof(Ops)); + InFlag = SDValue(CNode, 1); + // Update the chain. + ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); + } else { + InFlag = + SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + } + + // Copy the division (low) result, if it is needed. + if (!N.getValue(0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + LoReg, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(N.getValue(0), Result); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.getNode()->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + Result.getNode()->dump(CurDAG); + errs() << '\n'; + }); #endif + } + // Copy the remainder (high) result, if it is needed. + if (!N.getValue(1).use_empty()) { + SDValue Result; + if (HiReg == X86::AH && Subtarget->is64Bit()) { + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), + 0); + // Then truncate it down to i8. + Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + MVT::i8, Result); + } else { + Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); } - // Copy the remainder (high) result, if it is needed. - if (!N.getValue(1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), - 0); - // Then truncate it down to i8. - SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); - Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, - MVT::i8, Result, SRIdx), 0); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } - ReplaceUses(N.getValue(1), Result); + ReplaceUses(N.getValue(1), Result); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - DEBUG(Result.getNode()->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + Result.getNode()->dump(CurDAG); + errs() << '\n'; + }); #endif - } + } #ifndef NDEBUG - Indent -= 2; + Indent -= 2; #endif - return NULL; - } + return NULL; + } - case ISD::DECLARE: { - // Handle DECLARE nodes here because the second operand may have been - // wrapped in X86ISD::Wrapper. - SDValue Chain = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - SDValue N2 = Node->getOperand(2); - FrameIndexSDNode *FINode = dyn_cast(N1); - - // FIXME: We need to handle this for VLAs. - if (!FINode) { - ReplaceUses(N.getValue(0), Chain); - return NULL; + case X86ISD::CMP: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to + // use a smaller encoding. + if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + N0.getValueType() != MVT::i8 && + X86::isZeroNode(N1)) { + ConstantSDNode *C = dyn_cast(N0.getNode()->getOperand(1)); + if (!C) break; + + // For example, convert "testl %eax, $8" to "testb %al, $8" + if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && + (!(C->getZExtValue() & 0x80) || + HasNoSignedComparisonUses(Node))) { + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Reg = N0.getNode()->getOperand(0); + + // On x86-32, only the ABCD registers have 8-bit subregisters. + if (!Subtarget->is64Bit()) { + TargetRegisterClass *TRC = 0; + switch (N0.getValueType().getSimpleVT().SimpleTy) { + case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; + case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; + default: llvm_unreachable("Unsupported TEST operand type!"); + } + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); + Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, + Reg.getValueType(), Reg, RC), 0); + } + + // Extract the l-register. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, + MVT::i8, Reg); + + // Emit a testb. + return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); } - - if (N2.getOpcode() == ISD::ADD && - N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg) - N2 = N2.getOperand(1); - - // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled - // somehow, just ignore it. - if (N2.getOpcode() != X86ISD::Wrapper && - N2.getOpcode() != X86ISD::WrapperRIP) { - ReplaceUses(N.getValue(0), Chain); - return NULL; + + // For example, "testl %eax, $2048" to "testb %ah, $8". + if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && + (!(C->getZExtValue() & 0x8000) || + HasNoSignedComparisonUses(Node))) { + // Shift the immediate right by 8 bits. + SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, + MVT::i8); + SDValue Reg = N0.getNode()->getOperand(0); + + // Put the value in an ABCD register. + TargetRegisterClass *TRC = 0; + switch (N0.getValueType().getSimpleVT().SimpleTy) { + case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; + case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; + case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; + default: llvm_unreachable("Unsupported TEST operand type!"); + } + SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); + Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, + Reg.getValueType(), Reg, RC), 0); + + // Extract the h-register. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, + MVT::i8, Reg); + + // Emit a testb. No special NOREX tricks are needed since there's + // only one GPR operand! + return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, + Subreg, ShiftedImm); } - GlobalAddressSDNode *GVNode = - dyn_cast(N2.getOperand(0)); - if (GVNode == 0) { - ReplaceUses(N.getValue(0), Chain); - return NULL; + + // For example, "testl %eax, $32776" to "testw %ax, $32776". + if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && + N0.getValueType() != MVT::i16 && + (!(C->getZExtValue() & 0x8000) || + HasNoSignedComparisonUses(Node))) { + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); + SDValue Reg = N0.getNode()->getOperand(0); + + // Extract the 16-bit subregister. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, + MVT::i16, Reg); + + // Emit a testw. + return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); + } + + // For example, "testq %rax, $268468232" to "testl %eax, $268468232". + if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && + N0.getValueType() == MVT::i64 && + (!(C->getZExtValue() & 0x80000000) || + HasNoSignedComparisonUses(Node))) { + SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); + SDValue Reg = N0.getNode()->getOperand(0); + + // Extract the 32-bit subregister. + SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, + MVT::i32, Reg); + + // Emit a testl. + return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); } - SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(), - TLI.getPointerTy()); - SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(), - TLI.getPointerTy()); - SDValue Ops[] = { Tmp1, Tmp2, Chain }; - return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, - MVT::Other, Ops, - array_lengthof(Ops)); } + break; + } } SDNode *ResNode = SelectCode(N); #ifndef NDEBUG - DOUT << std::string(Indent-2, ' ') << "=> "; - if (ResNode == NULL || ResNode == N.getNode()) - DEBUG(N.getNode()->dump(CurDAG)); - else - DEBUG(ResNode->dump(CurDAG)); - DOUT << "\n"; + DEBUG({ + errs() << std::string(Indent-2, ' ') << "=> "; + if (ResNode == NULL || ResNode == N.getNode()) + N.getNode()->dump(CurDAG); + else + ResNode->dump(CurDAG); + errs() << '\n'; + }); Indent -= 2; #endif diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5a6294a211c8a..fadc81839491c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16,13 +16,16 @@ #include "X86InstrBuilder.h" #include "X86ISelLowering.h" #include "X86TargetMachine.h" +#include "X86TargetObjectFile.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/Function.h" +#include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -33,21 +36,48 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); +// Disable16Bit - 16-bit operations typically have a larger encoding than +// corresponding 32-bit instructions, and 16-bit code is slow on some +// processors. This is an experimental flag to disable 16-bit operations +// (which forces them to be Legalized to 32-bit operations). +static cl::opt +Disable16Bit("disable-16bit", cl::Hidden, + cl::desc("Disable use of 16-bit instructions")); + // Forward declarations. -static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); +static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { + switch (TM.getSubtarget().TargetType) { + default: llvm_unreachable("unknown subtarget type"); + case X86Subtarget::isDarwin: + if (TM.getSubtarget().is64Bit()) + return new X8664_MachoTargetObjectFile(); + return new X8632_MachoTargetObjectFile(); + case X86Subtarget::isELF: + return new TargetLoweringObjectFileELF(); + case X86Subtarget::isMingw: + case X86Subtarget::isCygwin: + case X86Subtarget::isWindows: + return new TargetLoweringObjectFileCOFF(); + } + +} + X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) - : TargetLowering(TM) { + : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget(); X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); @@ -62,7 +92,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(SchedulingForRegPressure); - setShiftAmountFlavor(Mask); // shl X, 32 == shl X, 0 setStackPointerRegisterToSaveRestore(X86StackPtr); if (Subtarget->isTargetDarwin()) { @@ -80,7 +109,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Set up the register classes. addRegisterClass(MVT::i8, X86::GR8RegisterClass); - addRegisterClass(MVT::i16, X86::GR16RegisterClass); + if (!Disable16Bit) + addRegisterClass(MVT::i16, X86::GR16RegisterClass); addRegisterClass(MVT::i32, X86::GR32RegisterClass); if (Subtarget->is64Bit()) addRegisterClass(MVT::i64, X86::GR64RegisterClass); @@ -89,9 +119,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // We don't accept any truncstore of integer registers. setTruncStoreAction(MVT::i64, MVT::i32, Expand); - setTruncStoreAction(MVT::i64, MVT::i16, Expand); + if (!Disable16Bit) + setTruncStoreAction(MVT::i64, MVT::i16, Expand); setTruncStoreAction(MVT::i64, MVT::i8 , Expand); - setTruncStoreAction(MVT::i32, MVT::i16, Expand); + if (!Disable16Bit) + setTruncStoreAction(MVT::i32, MVT::i16, Expand); setTruncStoreAction(MVT::i32, MVT::i8 , Expand); setTruncStoreAction(MVT::i16, MVT::i8, Expand); @@ -242,8 +274,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i8 , Custom); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); - setOperationAction(ISD::CTTZ , MVT::i16 , Custom); - setOperationAction(ISD::CTLZ , MVT::i16 , Custom); + if (Disable16Bit) { + setOperationAction(ISD::CTTZ , MVT::i16 , Expand); + setOperationAction(ISD::CTLZ , MVT::i16 , Expand); + } else { + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTLZ , MVT::i16 , Custom); + } setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); @@ -257,16 +294,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BSWAP , MVT::i16 , Expand); // These should be promoted to a larger select which is supported. - setOperationAction(ISD::SELECT , MVT::i1 , Promote); - setOperationAction(ISD::SELECT , MVT::i8 , Promote); + setOperationAction(ISD::SELECT , MVT::i1 , Promote); // X86 wants to expand cmov itself. - setOperationAction(ISD::SELECT , MVT::i16 , Custom); + setOperationAction(ISD::SELECT , MVT::i8 , Custom); + if (Disable16Bit) + setOperationAction(ISD::SELECT , MVT::i16 , Expand); + else + setOperationAction(ISD::SELECT , MVT::i16 , Custom); setOperationAction(ISD::SELECT , MVT::i32 , Custom); setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); setOperationAction(ISD::SELECT , MVT::f80 , Custom); setOperationAction(ISD::SETCC , MVT::i8 , Custom); - setOperationAction(ISD::SETCC , MVT::i16 , Custom); + if (Disable16Bit) + setOperationAction(ISD::SETCC , MVT::i16 , Expand); + else + setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); @@ -275,8 +318,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::i64 , Custom); } - // X86 ret instruction may pop stack. - setOperationAction(ISD::RET , MVT::Other, Custom); setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // Darwin ABI issue. @@ -330,7 +371,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); } - // Use the default ISD::DBG_STOPPOINT, ISD::DECLARE expansion. + // Use the default ISD::DBG_STOPPOINT. setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && @@ -637,6 +678,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i16, Promote); setOperationAction(ISD::SELECT, MVT::v2i32, Promote); setOperationAction(ISD::SELECT, MVT::v1i64, Custom); + setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); + setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); + setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); } if (!UseSoftFloat && Subtarget->hasSSE1()) { @@ -696,16 +740,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; + EVT VT = (MVT::SimpleValueType)i; // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) continue; // Do not attempt to custom lower non-128-bit vectors if (!VT.is128BitVector()) continue; - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, + VT.getSimpleVT().SimpleTy, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, + VT.getSimpleVT().SimpleTy, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, + VT.getSimpleVT().SimpleTy, Custom); } setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); @@ -722,22 +769,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) { - MVT VT = (MVT::SimpleValueType)i; + MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; + EVT VT = SVT; // Do not attempt to promote non-128-bit vectors if (!VT.is128BitVector()) { continue; } - setOperationAction(ISD::AND, VT, Promote); - AddPromotedToType (ISD::AND, VT, MVT::v2i64); - setOperationAction(ISD::OR, VT, Promote); - AddPromotedToType (ISD::OR, VT, MVT::v2i64); - setOperationAction(ISD::XOR, VT, Promote); - AddPromotedToType (ISD::XOR, VT, MVT::v2i64); - setOperationAction(ISD::LOAD, VT, Promote); - AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); - setOperationAction(ISD::SELECT, VT, Promote); - AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); + setOperationAction(ISD::AND, SVT, Promote); + AddPromotedToType (ISD::AND, SVT, MVT::v2i64); + setOperationAction(ISD::OR, SVT, Promote); + AddPromotedToType (ISD::OR, SVT, MVT::v2i64); + setOperationAction(ISD::XOR, SVT, Promote); + AddPromotedToType (ISD::XOR, SVT, MVT::v2i64); + setOperationAction(ISD::LOAD, SVT, Promote); + AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64); + setOperationAction(ISD::SELECT, SVT, Promote); + AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -847,7 +895,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Custom lower build_vector, vector_shuffle, and extract_vector_elt. // This includes 256-bit vectors for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) { - MVT VT = (MVT::SimpleValueType)i; + EVT VT = (MVT::SimpleValueType)i; // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) @@ -861,7 +909,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (Subtarget->is64Bit()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom); - } + } #endif #if 0 @@ -871,7 +919,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64. // Including 256-bit vectors for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) { - MVT VT = (MVT::SimpleValueType)i; + EVT VT = (MVT::SimpleValueType)i; if (!VT.is256BitVector()) { continue; @@ -933,13 +981,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores - allowUnalignedMemoryAccesses = true; // x86 supports it! setPrefLoopAlignment(16); benefitFromCodePlacementOpt = true; } -MVT X86TargetLowering::getSetCCResultType(MVT VT) const { +MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const { return MVT::i8; } @@ -993,7 +1040,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { /// and store operations as a result of memset, memcpy, and memmove /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for /// determining it. -MVT +EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, bool isSrcConst, bool isSrcStr, SelectionDAG &DAG) const { @@ -1019,7 +1066,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { if (usesGlobalOffsetTable()) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); - if (!Subtarget->isPICStyleRIPRel()) + if (!Subtarget->is64Bit()) // This doesn't have DebugLoc associated with it, but is not really the // same as a Register. return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(), @@ -1029,7 +1076,7 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { - return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; + return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } //===----------------------------------------------------------------------===// @@ -1038,16 +1085,16 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { #include "X86GenCallingConv.inc" -/// LowerRET - Lower an ISD::RET node. -SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args"); +SDValue +X86TargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_X86); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_X86); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. @@ -1056,49 +1103,19 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { if (RVLocs[i].isRegLoc()) DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - SDValue Chain = Op.getOperand(0); - - // Handle tail call return. - Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL); - if (Chain.getOpcode() == X86ISD::TAILCALL) { - SDValue TailCall = Chain; - SDValue TargetAddress = TailCall.getOperand(1); - SDValue StackAdjustment = TailCall.getOperand(2); - assert(((TargetAddress.getOpcode() == ISD::Register && - (cast(TargetAddress)->getReg() == X86::EAX || - cast(TargetAddress)->getReg() == X86::R11)) || - TargetAddress.getOpcode() == ISD::TargetExternalSymbol || - TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && - "Expecting an global address, external symbol, or register"); - assert(StackAdjustment.getOpcode() == ISD::Constant && - "Expecting a const value"); - - SmallVector Operands; - Operands.push_back(Chain.getOperand(0)); - Operands.push_back(TargetAddress); - Operands.push_back(StackAdjustment); - // Copy registers used by the call. Last operand is a flag so it is not - // copied. - for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { - Operands.push_back(Chain.getOperand(i)); - } - return DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, &Operands[0], - Operands.size()); - } - // Regular return. SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop - RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16)); + RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16)); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue ValToCopy = Op.getOperand(i*2+1); + SDValue ValToCopy = Outs[i].Val; // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -1116,7 +1133,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 // which is returned in RAX / RDX. if (Subtarget->is64Bit()) { - MVT ValVT = ValToCopy.getValueType(); + EVT ValVT = ValToCopy.getValueType(); if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) @@ -1145,6 +1162,9 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); Flag = Chain.getValue(1); + + // RAX now acts like a return value. + MF.getRegInfo().addLiveOut(X86::RAX); } RetOps[0] = Chain; // Update chain. @@ -1157,36 +1177,32 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { MVT::Other, &RetOps[0], RetOps.size()); } +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +/// +SDValue +X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { -/// LowerCallResult - Lower the result values of an ISD::CALL into the -/// appropriate copies out of appropriate physical registers. This assumes that -/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call -/// being lowered. The returns a SDNode with the same number of values as the -/// ISD::CALL. -SDNode *X86TargetLowering:: -LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG) { - - DebugLoc dl = TheCall->getDebugLoc(); // Assign locations to each value returned by this call. SmallVector RVLocs; - bool isVarArg = TheCall->isVarArg(); bool Is64Bit = Subtarget->is64Bit(); - CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); - CCInfo.AnalyzeCallResult(TheCall, RetCC_X86); - - SmallVector ResultVals; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; - MVT CopyVT = VA.getValVT(); + EVT CopyVT = VA.getValVT(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && - ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) { - cerr << "SSE register return with SSE disabled\n"; - exit(1); + ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { + llvm_report_error("SSE register return with SSE disabled"); } // If this is a call to a function that returns an fp value on the floating @@ -1206,7 +1222,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, MVT::v2i64, InFlag).getValue(1); Val = Chain.getValue(0); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, - Val, DAG.getConstant(0, MVT::i64)); + Val, DAG.getConstant(0, MVT::i64)); } else { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i64, InFlag).getValue(1); @@ -1228,13 +1244,10 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, DAG.getIntPtrConstant(1)); } - ResultVals.push_back(Val); + InVals.push_back(Val); } - // Merge everything together with a MERGE_VALUES node. - ResultVals.push_back(Chain); - return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()).getNode(); + return Chain; } @@ -1248,30 +1261,28 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, // For info on fast calling convention see Fast Calling Convention (tail call) // implementation LowerX86_32FastCCCallTo. -/// CallIsStructReturn - Determines whether a CALL node uses struct return +/// CallIsStructReturn - Determines whether a call uses struct return /// semantics. -static bool CallIsStructReturn(CallSDNode *TheCall) { - unsigned NumOps = TheCall->getNumArgs(); - if (!NumOps) +static bool CallIsStructReturn(const SmallVectorImpl &Outs) { + if (Outs.empty()) return false; - return TheCall->getArgFlags(0).isSRet(); + return Outs[0].Flags.isSRet(); } -/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct +/// ArgsAreStructReturn - Determines whether a function uses struct /// return semantics. -static bool ArgsAreStructReturn(SDValue Op) { - unsigned NumArgs = Op.getNode()->getNumValues() - 1; - if (!NumArgs) +static bool +ArgsAreStructReturn(const SmallVectorImpl &Ins) { + if (Ins.empty()) return false; - return cast(Op.getOperand(3))->getArgFlags().isSRet(); + return Ins[0].Flags.isSRet(); } -/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires -/// the callee to pop its own arguments. Callee pop is necessary to support tail -/// calls. -bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) { +/// IsCalleePop - Determines whether the callee is required to pop its +/// own arguments. Callee pop is necessary to support tail calls. +bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ if (IsVarArg) return false; @@ -1289,7 +1300,7 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) { /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. -CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { +CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; @@ -1305,36 +1316,18 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { return CC_X86_32_C; } -/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to -/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node. +/// NameDecorationForCallConv - Selects the appropriate decoration to +/// apply to a MachineFunction containing a given calling convention. NameDecorationStyle -X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) { - unsigned CC = cast(Op.getOperand(1))->getZExtValue(); - if (CC == CallingConv::X86_FastCall) +X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) { + if (CallConv == CallingConv::X86_FastCall) return FastCall; - else if (CC == CallingConv::X86_StdCall) + else if (CallConv == CallingConv::X86_StdCall) return StdCall; return None; } -/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer -/// in a register before calling. -bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) { - return !IsTailCall && !Is64Bit && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT(); -} - -/// CallRequiresFnAddressInReg - Check whether the call requires the function -/// address to be loaded in a register. -bool -X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) { - return !Is64Bit && IsTailCall && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT(); -} - /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by /// the specific parameter attribute. The copy will be passed as a byval @@ -1348,35 +1341,52 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, /*AlwaysInline=*/true, NULL, 0, NULL, 0); } -SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG, - const CCValAssign &VA, - MachineFrameInfo *MFI, - unsigned CC, - SDValue Root, unsigned i) { +SDValue +X86TargetLowering::LowerMemArgument(SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + MachineFrameInfo *MFI, + unsigned i) { + // Create the nodes corresponding to a load from this parameter slot. - ISD::ArgFlagsTy Flags = - cast(Op.getOperand(3 + i))->getArgFlags(); - bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && PerformTailCallOpt; bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); + EVT ValVT; + + // If value is passed by pointer we have address passed instead of the value + // itself. + if (VA.getLocInfo() == CCValAssign::Indirect) + ValVT = VA.getLocVT(); + else + ValVT = VA.getValVT(); // FIXME: For now, all byval parameter objects are marked mutable. This can be // changed with more analysis. // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. - int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) return FIN; - return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN, + return DAG.getLoad(ValVT, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0); } SDValue -X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); - DebugLoc dl = Op.getDebugLoc(); const Function* Fn = MF.getFunction(); if (Fn->hasExternalLinkage() && @@ -1385,25 +1395,23 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { FuncInfo->setForceFramePointer(true); // Decorate the function name. - FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op)); + FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv)); MachineFrameInfo *MFI = MF.getFrameInfo(); - SDValue Root = Op.getOperand(0); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - unsigned CC = MF.getFunction()->getCallingConv(); bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); - assert(!(isVarArg && CC == CallingConv::Fast) && + assert(!(isVarArg && CallConv == CallingConv::Fast) && "Var args not supported with calling convention fastcc"); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC)); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); - SmallVector ArgValues; unsigned LastVal = ~0U; + SDValue ArgValue; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; // TODO: If an arg is passed in two places (e.g. reg and stack), skip later @@ -1413,7 +1421,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { LastVal = VA.getValNo(); if (VA.isRegLoc()) { - MVT RegVT = VA.getLocVT(); + EVT RegVT = VA.getLocVT(); TargetRegisterClass *RC = NULL; if (RegVT == MVT::i32) RC = X86::GR32RegisterClass; @@ -1425,27 +1433,13 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { RC = X86::FR64RegisterClass; else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) RC = X86::VR128RegisterClass; - else if (RegVT.isVector()) { - assert(RegVT.getSizeInBits() == 64); - if (!Is64Bit) - RC = X86::VR64RegisterClass; // MMX values are passed in MMXs. - else { - // Darwin calling convention passes MMX values in either GPRs or - // XMMs in x86-64. Other targets pass them in memory. - if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) { - RC = X86::VR128RegisterClass; // MMX values are passed in XMMs. - RegVT = MVT::v2i64; - } else { - RC = X86::GR64RegisterClass; // v1i64 values are passed in GPRs. - RegVT = MVT::i64; - } - } - } else { - assert(0 && "Unknown argument type!"); - } + else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) + RC = X86::VR64RegisterClass; + else + llvm_unreachable("Unknown argument type!"); - unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC); - SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 // bits. Insert an assert[sz]ext to capture this, then truncate to the @@ -1456,52 +1450,53 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { else if (VA.getLocInfo() == CCValAssign::ZExt) ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, DAG.getValueType(VA.getValVT())); + else if (VA.getLocInfo() == CCValAssign::BCvt) + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); - if (VA.getLocInfo() != CCValAssign::Full) - ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); - - // Handle MMX values passed in GPRs. - if (Is64Bit && RegVT != VA.getLocVT()) { - if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass) - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue); - else if (RC == X86::VR128RegisterClass) { + if (VA.isExtInLoc()) { + // Handle MMX values passed in XMM regs. + if (RegVT.isVector()) { ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, ArgValue, DAG.getConstant(0, MVT::i64)); - ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue); - } + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + } else + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); } - - ArgValues.push_back(ArgValue); } else { assert(VA.isMemLoc()); - ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i)); + ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i); } + + // If value is passed via pointer - do a load. + if (VA.getLocInfo() == CCValAssign::Indirect) + ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0); + + InVals.push_back(ArgValue); } // The x86-64 ABI for returning structs by value requires that we copy // the sret argument into %rax for the return. Save the argument into // a virtual register so that we can access it from the return points. - if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { - MachineFunction &MF = DAG.getMachineFunction(); + if (Is64Bit && MF.getFunction()->hasStructRetAttr()) { X86MachineFunctionInfo *FuncInfo = MF.getInfo(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); FuncInfo->setSRetReturnReg(Reg); } - SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]); - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root); + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } unsigned StackSize = CCInfo.getNextStackOffset(); // align stack specially for tail calls - if (PerformTailCallOpt && CC == CallingConv::Fast) + if (PerformTailCallOpt && CallConv == CallingConv::Fast) StackSize = GetAlignedArgumentStackSize(StackSize, DAG); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (Is64Bit || CC != CallingConv::X86_FastCall) { + if (Is64Bit || CallConv != CallingConv::X86_FastCall) { VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); } if (Is64Bit) { @@ -1558,75 +1553,81 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { // Store the integer parameter registers. SmallVector MemOps; SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, - DAG.getIntPtrConstant(VarArgsGPOffset)); + unsigned Offset = VarArgsGPOffset; for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { + SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, + DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], X86::GR64RegisterClass); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); + PseudoSourceValue::getFixedStack(RegSaveFrameIndex), + Offset); MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getIntPtrConstant(8)); + Offset += 8; } - // Now store the XMM (fp + vector) parameter registers. - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, - DAG.getIntPtrConstant(VarArgsFPOffset)); - for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { - unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], - X86::VR128RegisterClass); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getIntPtrConstant(16)); + if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) { + // Now store the XMM (fp + vector) parameter registers. + SmallVector SaveXMMOps; + SaveXMMOps.push_back(Chain); + + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); + SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); + SaveXMMOps.push_back(ALVal); + + SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex)); + SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset)); + + for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { + unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], + X86::VR128RegisterClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); + SaveXMMOps.push_back(Val); + } + MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, + MVT::Other, + &SaveXMMOps[0], SaveXMMOps.size())); } + if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); } } - ArgValues.push_back(Root); - // Some CCs need callee pop. - if (IsCalleePop(isVarArg, CC)) { + if (IsCalleePop(isVarArg, CallConv)) { BytesToPopOnReturn = StackSize; // Callee pops everything. BytesCallerReserves = 0; } else { BytesToPopOnReturn = 0; // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. - if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op)) + if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins)) BytesToPopOnReturn = 4; BytesCallerReserves = StackSize; } if (!Is64Bit) { RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. - if (CC == CallingConv::X86_FastCall) + if (CallConv == CallingConv::X86_FastCall) VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. } FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); - // Return the new list of results. - return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), - &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); + return Chain; } SDValue -X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, - const SDValue &StackPtr, +X86TargetLowering::LowerMemOpCallTo(SDValue Chain, + SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - SDValue Chain, - SDValue Arg, ISD::ArgFlagsTy Flags) { - DebugLoc dl = TheCall->getDebugLoc(); - unsigned LocMemOffset = VA.getLocMemOffset(); + ISD::ArgFlagsTy Flags) { + const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); + unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); if (Flags.isByVal()) { @@ -1649,7 +1650,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, if (!IsTailCall || FPDiff==0) return Chain; // Adjust the Return address stack slot. - MVT VT = getPointerTy(); + EVT VT = getPointerTy(); OutRetAddr = getReturnAddressFrameIndex(DAG); // Load the "old" Return address. @@ -1669,41 +1670,45 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); - MVT VT = Is64Bit ? MVT::i64 : MVT::i32; + EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0); return Chain; } -SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { +SDValue +X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + MachineFunction &MF = DAG.getMachineFunction(); - CallSDNode *TheCall = cast(Op.getNode()); - SDValue Chain = TheCall->getChain(); - unsigned CC = TheCall->getCallingConv(); - bool isVarArg = TheCall->isVarArg(); - bool IsTailCall = TheCall->isTailCall() && - CC == CallingConv::Fast && PerformTailCallOpt; - SDValue Callee = TheCall->getCallee(); bool Is64Bit = Subtarget->is64Bit(); - bool IsStructRet = CallIsStructReturn(TheCall); - DebugLoc dl = TheCall->getDebugLoc(); + bool IsStructRet = CallIsStructReturn(Outs); - assert(!(isVarArg && CC == CallingConv::Fast) && + assert((!isTailCall || + (CallConv == CallingConv::Fast && PerformTailCallOpt)) && + "IsEligibleForTailCallOptimization missed a case!"); + assert(!(isVarArg && CallConv == CallingConv::Fast) && "Var args not supported with calling convention fastcc"); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); - CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC)); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - if (PerformTailCallOpt && CC == CallingConv::Fast) + if (PerformTailCallOpt && CallConv == CallingConv::Fast) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; - if (IsTailCall) { + if (isTailCall) { // Lower arguments at fp - stackoffset + fpdiff. unsigned NumBytesCallerPushed = MF.getInfo()->getBytesToPopOnReturn(); @@ -1719,7 +1724,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { SDValue RetAddrFrIdx; // Load return adress for tail calls. - Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit, + Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, FPDiff, dl); SmallVector, 8> RegsToPass; @@ -1730,57 +1735,54 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // of tail call optimization arguments are handle later. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = TheCall->getArg(i); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + EVT RegVT = VA.getLocVT(); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); break; case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { + // Special case: passing MMX values in XMM registers. + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); + Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); + Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); + } else + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg); + break; + case CCValAssign::Indirect: { + // Store the argument. + SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); + int FI = cast(SpillSlot)->getIndex(); + Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, + PseudoSourceValue::getFixedStack(FI), 0); + Arg = SpillSlot; + break; + } } if (VA.isRegLoc()) { - if (Is64Bit) { - MVT RegVT = VA.getLocVT(); - if (RegVT.isVector() && RegVT.getSizeInBits() == 64) - switch (VA.getLocReg()) { - default: - break; - case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX: - case X86::R8: { - // Special case: passing MMX values in GPR registers. - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); - break; - } - case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3: - case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: { - // Special case: passing MMX values in XMM registers. - Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); - Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); - Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); - break; - } - } - } RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { - if (!IsTailCall || (IsTailCall && isByVal)) { + if (!isTailCall || (isTailCall && isByVal)) { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, - Chain, Arg, Flags)); + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, + dl, DAG, VA, Flags)); } } } @@ -1794,37 +1796,41 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { SDValue InFlag; // Tail call byval lowering might overwrite argument registers so in case of // tail call optimization the copies to registers are lowered later. - if (!IsTailCall) + if (!isTailCall) for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - // ELF / PIC requires GOT in the EBX register before function calls via PLT - // GOT pointer. - if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) { - Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, - DAG.getNode(X86ISD::GlobalBaseReg, - DebugLoc::getUnknownLoc(), - getPointerTy()), - InFlag); - InFlag = Chain.getValue(1); - } - // If we are tail calling and generating PIC/GOT style code load the address - // of the callee into ecx. The value in ecx is used as target of the tail - // jump. This is done to circumvent the ebx/callee-saved problem for tail - // calls on PIC/GOT architectures. Normally we would just put the address of - // GOT into ebx and then call target@PLT. But for tail callss ebx would be - // restored (since ebx is callee saved) before jumping to the target@PLT. - if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) { - // Note: The actual moving to ecx is done further down. - GlobalAddressSDNode *G = dyn_cast(Callee); - if (G && !G->getGlobal()->hasHiddenVisibility() && - !G->getGlobal()->hasProtectedVisibility()) - Callee = LowerGlobalAddress(Callee, DAG); - else if (isa(Callee)) - Callee = LowerExternalSymbol(Callee,DAG); + + if (Subtarget->isPICStyleGOT()) { + // ELF / PIC requires GOT in the EBX register before function calls via PLT + // GOT pointer. + if (!isTailCall) { + Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc::getUnknownLoc(), + getPointerTy()), + InFlag); + InFlag = Chain.getValue(1); + } else { + // If we are tail calling and generating PIC/GOT style code load the + // address of the callee into ECX. The value in ecx is used as target of + // the tail jump. This is done to circumvent the ebx/callee-saved problem + // for tail calls on PIC/GOT architectures. Normally we would just put the + // address of GOT into ebx and then call target@PLT. But for tail calls + // ebx would be restored (since ebx is callee saved) before jumping to the + // target@PLT. + + // Note: The actual moving to ECX is done further down. + GlobalAddressSDNode *G = dyn_cast(Callee); + if (G && !G->getGlobal()->hasHiddenVisibility() && + !G->getGlobal()->hasProtectedVisibility()) + Callee = LowerGlobalAddress(Callee, DAG); + else if (isa(Callee)) + Callee = LowerExternalSymbol(Callee, DAG); + } } if (Is64Bit && isVarArg) { @@ -1853,7 +1859,15 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // For tail calls lower the arguments to the 'real' stack slot. - if (IsTailCall) { + if (isTailCall) { + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); + SmallVector MemOpChains2; SDValue FIN; int FI = 0; @@ -1863,8 +1877,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { CCValAssign &VA = ArgLocs[i]; if (!VA.isRegLoc()) { assert(VA.isMemLoc()); - SDValue Arg = TheCall->getArg(i); - ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i); + SDValue Arg = Outs[i].Val; + ISD::ArgFlagsTy Flags = Outs[i].Flags; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; @@ -1879,12 +1893,13 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { getPointerTy()); Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); - MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, + MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, + ArgChain, Flags, DAG, dl)); } else { // Store relative to framepointer. MemOpChains2.push_back( - DAG.getStore(Chain, dl, Arg, FIN, + DAG.getStore(ArgChain, dl, Arg, FIN, PseudoSourceValue::getFixedStack(FI), 0)); } } @@ -1912,13 +1927,49 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { // We should use extra load for direct calls to dllimported functions in // non-JIT mode. - if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), - getTargetMachine(), true)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(), - G->getOffset()); + GlobalValue *GV = G->getGlobal(); + if (!GV->hasDLLImportLinkage()) { + unsigned char OpFlags = 0; + + // On ELF targets, in both X86-64 and X86-32 mode, direct calls to + // external symbols most go through the PLT in PIC mode. If the symbol + // has hidden or protected visibility, or if it is static or local, then + // we don't need to use the PLT - we can directly call it. + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_ && + GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { + OpFlags = X86II::MO_PLT; + } else if (Subtarget->isPICStyleStubAny() && + (GV->isDeclaration() || GV->isWeakForLinker()) && + Subtarget->getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = X86II::MO_DARWIN_STUB; + } + + Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(), + G->getOffset(), OpFlags); + } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); - } else if (IsTailCall) { + unsigned char OpFlags = 0; + + // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external + // symbols should go through the PLT. + if (Subtarget->isTargetELF() && + getTargetMachine().getRelocationModel() == Reloc::PIC_) { + OpFlags = X86II::MO_PLT; + } else if (Subtarget->isPICStyleStubAny() && + Subtarget->getDarwinVers() < 9) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = X86II::MO_DARWIN_STUB; + } + + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), + OpFlags); + } else if (isTailCall) { unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, @@ -1926,27 +1977,23 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { Callee,InFlag); Callee = DAG.getRegister(Opc, getPointerTy()); // Add register as live out. - DAG.getMachineFunction().getRegInfo().addLiveOut(Opc); + MF.getRegInfo().addLiveOut(Opc); } // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); SmallVector Ops; - if (IsTailCall) { + if (isTailCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); - - // Returns a chain & a flag for retval copy to use. - NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); - Ops.clear(); } Ops.push_back(Chain); Ops.push_back(Callee); - if (IsTailCall) + if (isTailCall) Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); // Add argument registers to the end of the list so that they are known live @@ -1956,9 +2003,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { RegsToPass[i].second.getValueType())); // Add an implicit use GOT pointer in EBX. - if (!IsTailCall && !Is64Bit && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && - Subtarget->isPICStyleGOT()) + if (!isTailCall && Subtarget->isPICStyleGOT()) Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); // Add an implicit use of AL for x86 vararg functions. @@ -1968,13 +2013,28 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { if (InFlag.getNode()) Ops.push_back(InFlag); - if (IsTailCall) { - assert(InFlag.getNode() && - "Flag must be set. Depend on flag being set in LowerRET"); - Chain = DAG.getNode(X86ISD::TAILCALL, dl, - TheCall->getVTList(), &Ops[0], Ops.size()); + if (isTailCall) { + // If this is the first return lowered for this function, add the regs + // to the liveout set for the function. + if (MF.getRegInfo().liveout_empty()) { + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, + *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_X86); + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + assert(((Callee.getOpcode() == ISD::Register && + (cast(Callee)->getReg() == X86::EAX || + cast(Callee)->getReg() == X86::R9)) || + Callee.getOpcode() == ISD::TargetExternalSymbol || + Callee.getOpcode() == ISD::TargetGlobalAddress) && + "Expecting an global address, external symbol, or register"); - return SDValue(Chain.getNode(), Op.getResNo()); + return DAG.getNode(X86ISD::TC_RETURN, dl, + NodeTys, &Ops[0], Ops.size()); } Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); @@ -1982,9 +2042,9 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (IsCalleePop(isVarArg, CC)) + if (IsCalleePop(isVarArg, CallConv)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything - else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet) + else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet) // If this is is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. @@ -2002,8 +2062,8 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { // Handle result values, copying them out of physregs into vregs that we // return. - return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), - Op.getResNo()); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); } @@ -2060,36 +2120,18 @@ unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, return Offset; } -/// IsEligibleForTailCallElimination - Check to see whether the next instruction -/// following the call is a return. A function is eligible if caller/callee -/// calling conventions match, currently only fastcc supports tail calls, and -/// the function CALL is immediatly followed by a RET. -bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall, - SDValue Ret, - SelectionDAG& DAG) const { - if (!PerformTailCallOpt) - return false; - - if (CheckTailCallReturnConstraints(TheCall, Ret)) { - MachineFunction &MF = DAG.getMachineFunction(); - unsigned CallerCC = MF.getFunction()->getCallingConv(); - unsigned CalleeCC= TheCall->getCallingConv(); - if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { - SDValue Callee = TheCall->getCallee(); - // On x86/32Bit PIC/GOT tail calls are supported. - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ || - !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit()) - return true; - - // Can only do local tail calls (in same module, hidden or protected) on - // x86_64 PIC/GOT at the moment. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - return G->getGlobal()->hasHiddenVisibility() - || G->getGlobal()->hasProtectedVisibility(); - } - } - - return false; +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool +X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl &Ins, + SelectionDAG& DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); + return CalleeCC == CallingConv::Fast && CallerCC == CalleeCC; } FastISel * @@ -2133,6 +2175,36 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { } +bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, + bool hasSymbolicDisplacement) { + // Offset should fit into 32 bit immediate field. + if (!isInt32(Offset)) + return false; + + // If we don't have a symbolic displacement - we don't have any extra + // restrictions. + if (!hasSymbolicDisplacement) + return true; + + // FIXME: Some tweaks might be needed for medium code model. + if (M != CodeModel::Small && M != CodeModel::Kernel) + return false; + + // For small code model we assume that latest object is 16MB before end of 31 + // bits boundary. We may also accept pretty large negative constants knowing + // that all objects are in the positive half of address space. + if (M == CodeModel::Small && Offset < 16*1024*1024) + return true; + + // For kernel code model we know that all object resist in the negative half + // of 32bits address space. We may not accept negative offsets, since they may + // be just off and we may accept pretty large positive ones. + if (M == CodeModel::Kernel && Offset > 0) + return true; + + return false; +} + /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 /// specific condition code, returning the condition code and the LHS/RHS of the /// comparison to make. @@ -2155,7 +2227,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, } switch (SetCCOpcode) { - default: assert(0 && "Invalid integer condition!"); + default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: return X86::COND_E; case ISD::SETGT: return X86::COND_G; case ISD::SETGE: return X86::COND_GE; @@ -2195,7 +2267,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, // 1 | 0 | 0 | X == Y // 1 | 1 | 1 | unordered switch (SetCCOpcode) { - default: assert(0 && "Condcode should be pre-legalized away"); + default: llvm_unreachable("Condcode should be pre-legalized away"); case ISD::SETUEQ: case ISD::SETEQ: return X86::COND_E; case ISD::SETOLT: // flipped @@ -2253,7 +2325,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) { /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference /// the second operand. -static bool isPSHUFDMask(const SmallVectorImpl &Mask, MVT VT) { +static bool isPSHUFDMask(const SmallVectorImpl &Mask, EVT VT) { if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); if (VT == MVT::v2f64 || VT == MVT::v2i64) @@ -2262,68 +2334,68 @@ static bool isPSHUFDMask(const SmallVectorImpl &Mask, MVT VT) { } bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { - SmallVector M; + SmallVector M; N->getMask(M); return ::isPSHUFDMask(M, N->getValueType(0)); } /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFHW. -static bool isPSHUFHWMask(const SmallVectorImpl &Mask, MVT VT) { +static bool isPSHUFHWMask(const SmallVectorImpl &Mask, EVT VT) { if (VT != MVT::v8i16) return false; - + // Lower quadword copied in order or undef. for (int i = 0; i != 4; ++i) if (Mask[i] >= 0 && Mask[i] != i) return false; - + // Upper quadword shuffled. for (int i = 4; i != 8; ++i) if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) return false; - + return true; } bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { - SmallVector M; + SmallVector M; N->getMask(M); return ::isPSHUFHWMask(M, N->getValueType(0)); } /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that /// is suitable for input to PSHUFLW. -static bool isPSHUFLWMask(const SmallVectorImpl &Mask, MVT VT) { +static bool isPSHUFLWMask(const SmallVectorImpl &Mask, EVT VT) { if (VT != MVT::v8i16) return false; - + // Upper quadword copied in order. for (int i = 4; i != 8; ++i) if (Mask[i] >= 0 && Mask[i] != i) return false; - + // Lower quadword shuffled. for (int i = 0; i != 4; ++i) if (Mask[i] >= 4) return false; - + return true; } bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { - SmallVector M; + SmallVector M; N->getMask(M); return ::isPSHUFLWMask(M, N->getValueType(0)); } /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to SHUFP*. -static bool isSHUFPMask(const SmallVectorImpl &Mask, MVT VT) { +static bool isSHUFPMask(const SmallVectorImpl &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4) return false; - + int Half = NumElems / 2; for (int i = 0; i < Half; ++i) if (!isUndefOrInRange(Mask[i], 0, NumElems)) @@ -2331,7 +2403,7 @@ static bool isSHUFPMask(const SmallVectorImpl &Mask, MVT VT) { for (int i = Half; i < NumElems; ++i) if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) return false; - + return true; } @@ -2345,12 +2417,12 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { /// the reverse of what x86 shuffles want. x86 shuffles requires the lower /// half elements to come from vector 1 (which would equal the dest.) and /// the upper half to come from vector 2. -static bool isCommutedSHUFPMask(const SmallVectorImpl &Mask, MVT VT) { +static bool isCommutedSHUFPMask(const SmallVectorImpl &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); - - if (NumElems != 2 && NumElems != 4) + + if (NumElems != 2 && NumElems != 4) return false; - + int Half = NumElems / 2; for (int i = 0; i < Half; ++i) if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) @@ -2424,24 +2496,24 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { /// <2, 3, 2, 3> bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); - + if (NumElems != 4) return false; - - return isUndefOrEqual(N->getMaskElt(0), 2) && + + return isUndefOrEqual(N->getMaskElt(0), 2) && isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(2), 2) && isUndefOrEqual(N->getMaskElt(3), 3); } /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. -static bool isUNPCKLMask(const SmallVectorImpl &Mask, MVT VT, +static bool isUNPCKLMask(const SmallVectorImpl &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -2466,12 +2538,12 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. -static bool isUNPCKHMask(const SmallVectorImpl &Mask, MVT VT, +static bool isUNPCKHMask(const SmallVectorImpl &Mask, EVT VT, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - + for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -2497,11 +2569,11 @@ bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, /// <0, 0, 1, 1> -static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl &Mask, MVT VT) { +static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - + for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -2522,11 +2594,11 @@ bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, /// <2, 2, 3, 3> -static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl &Mask, MVT VT) { +static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl &Mask, EVT VT) { int NumElems = VT.getVectorNumElements(); if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - + for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { int BitI = Mask[i]; int BitI1 = Mask[i+1]; @@ -2547,19 +2619,19 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. -static bool isMOVLMask(const SmallVectorImpl &Mask, MVT VT) { +static bool isMOVLMask(const SmallVectorImpl &Mask, EVT VT) { if (VT.getVectorElementType().getSizeInBits() < 32) return false; int NumElts = VT.getVectorNumElements(); - + if (!isUndefOrEqual(Mask[0], NumElts)) return false; - + for (int i = 1; i < NumElts; ++i) if (!isUndefOrEqual(Mask[i], i)) return false; - + return true; } @@ -2572,21 +2644,21 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) { /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse /// of what x86 movss want. X86 movs requires the lowest element to be lowest /// element of vector 2 and the other elements to come from vector 1 in order. -static bool isCommutedMOVLMask(const SmallVectorImpl &Mask, MVT VT, +static bool isCommutedMOVLMask(const SmallVectorImpl &Mask, EVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { int NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; - + if (!isUndefOrEqual(Mask[0], 0)) return false; - + for (int i = 1; i < NumOps; ++i) if (!(isUndefOrEqual(Mask[i], i+NumOps) || (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) return false; - + return true; } @@ -2650,7 +2722,7 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { /// specifies a shuffle of elements that is suitable for input to MOVDDUP. bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { int e = N->getValueType(0).getVectorNumElements() / 2; - + for (int i = 0; i < e; ++i) if (!isUndefOrEqual(N->getMaskElt(i), i)) return false; @@ -2714,14 +2786,23 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { return Mask; } +/// isZeroNode - Returns true if Elt is a constant zero or a floating point +/// constant +0.0. +bool X86::isZeroNode(SDValue Elt) { + return ((isa(Elt) && + cast(Elt)->getZExtValue() == 0) || + (isa(Elt) && + cast(Elt)->getValueAPF().isPosZero())); +} + /// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in /// their permute mask. static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0); + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); SmallVector MaskVec; - + for (unsigned i = 0; i != NumElems; ++i) { int idx = SVOp->getMaskElt(i); if (idx < 0) @@ -2737,7 +2818,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming /// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, MVT VT) { +static void CommuteVectorShuffleMask(SmallVectorImpl &Mask, EVT VT) { unsigned NumElems = VT.getVectorNumElements(); for (unsigned i = 0; i != NumElems; ++i) { int idx = Mask[i]; @@ -2795,7 +2876,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return false; unsigned NumElems = Op->getValueType(0).getVectorNumElements(); - + if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0, e = NumElems/2; i != e; ++i) @@ -2820,17 +2901,8 @@ static bool isSplatVector(SDNode *N) { return true; } -/// isZeroNode - Returns true if Elt is a constant zero or a floating point -/// constant +0.0. -static inline bool isZeroNode(SDValue Elt) { - return ((isa(Elt) && - cast(Elt)->getZExtValue() == 0) || - (isa(Elt) && - cast(Elt)->getValueAPF().isPosZero())); -} - /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved -/// to an zero vector. +/// to an zero vector. /// FIXME: move to dag combiner / method on ShuffleVectorSDNode static bool isZeroShuffle(ShuffleVectorSDNode *N) { SDValue V1 = N->getOperand(0); @@ -2842,13 +2914,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { unsigned Opc = V2.getOpcode(); if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V2.getOperand(Idx-NumElems))) + if (Opc != ISD::BUILD_VECTOR || + !X86::isZeroNode(V2.getOperand(Idx-NumElems))) return false; } else if (Idx >= 0) { unsigned Opc = V1.getOpcode(); if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) continue; - if (Opc != ISD::BUILD_VECTOR || !isZeroNode(V1.getOperand(Idx))) + if (Opc != ISD::BUILD_VECTOR || + !X86::isZeroNode(V1.getOperand(Idx))) return false; } } @@ -2857,7 +2931,7 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) { /// getZeroVector - Returns a vector of specified type with all zero elements. /// -static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG, +static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); @@ -2879,7 +2953,7 @@ static SDValue getZeroVector(MVT VT, bool HasSSE2, SelectionDAG &DAG, /// getOnesVector - Returns a vector of specified type with all bits set. /// -static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest @@ -2897,13 +2971,13 @@ static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements /// that point to V2 points to its first element. static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { - MVT VT = SVOp->getValueType(0); + EVT VT = SVOp->getValueType(0); unsigned NumElems = VT.getVectorNumElements(); - + bool Changed = false; SmallVector MaskVec; SVOp->getMask(MaskVec); - + for (unsigned i = 0; i != NumElems; ++i) { if (MaskVec[i] > (int)NumElems) { MaskVec[i] = NumElems; @@ -2918,7 +2992,7 @@ static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, +static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; @@ -2929,7 +3003,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, } /// getUnpackl - Returns a vector_shuffle node for an unpackl operation. -static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, +static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; @@ -2941,7 +3015,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, } /// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. -static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, +static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); unsigned Half = NumElems/2; @@ -2954,13 +3028,13 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, MVT VT, SDValue V1, } /// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. -static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, +static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, bool HasSSE2) { if (SV->getValueType(0).getVectorNumElements() <= 4) return SDValue(SV, 0); - - MVT PVT = MVT::v4f32; - MVT VT = SV->getValueType(0); + + EVT PVT = MVT::v4f32; + EVT VT = SV->getValueType(0); DebugLoc dl = SV->getDebugLoc(); SDValue V1 = SV->getOperand(0); int NumElems = VT.getVectorNumElements(); @@ -2976,7 +3050,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, } NumElems >>= 1; } - + // Perform the splat. int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); @@ -2991,7 +3065,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, bool isZero, bool HasSSE2, SelectionDAG &DAG) { - MVT VT = V2.getValueType(); + EVT VT = V2.getValueType(); SDValue V1 = isZero ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); unsigned NumElems = VT.getVectorNumElements(); @@ -3016,7 +3090,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, continue; } SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); - if (Elt.getNode() && isZeroNode(Elt)) + if (Elt.getNode() && X86::isZeroNode(Elt)) ++NumZeros; else break; @@ -3142,11 +3216,11 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, /// getVShift - Return a vector logical shift node. /// -static SDValue getVShift(bool isLeft, MVT VT, SDValue SrcOp, +static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { bool isMMX = VT.getSizeInBits() == 64; - MVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; + EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -3171,9 +3245,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); } - MVT VT = Op.getValueType(); - MVT EVT = VT.getVectorElementType(); - unsigned EVTBits = EVT.getSizeInBits(); + EVT VT = Op.getValueType(); + EVT ExtVT = VT.getVectorElementType(); + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumElems = Op.getNumOperands(); unsigned NumZero = 0; @@ -3189,7 +3263,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (Elt.getOpcode() != ISD::Constant && Elt.getOpcode() != ISD::ConstantFP) IsAllConstants = false; - if (isZeroNode(Elt)) + if (X86::isZeroNode(Elt)) NumZero++; else { NonZeros |= (1 << i); @@ -3212,11 +3286,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // insertion that way. Only do this if the value is non-constant or if the // value is a constant being inserted into element 0. It is cheaper to do // a constant pool load than it is to do a movd + shuffle. - if (EVT == MVT::i64 && !Subtarget->is64Bit() && + if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && (!IsAllConstants || Idx == 0)) { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { // Handle MMX and SSE both. - MVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; + EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; // Truncate the value (which may itself be a constant) to i32, and @@ -3234,7 +3308,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { for (unsigned i = 1; i != VecElts; ++i) Mask.push_back(i); Item = DAG.getVectorShuffle(VecVT, dl, Item, - DAG.getUNDEF(Item.getValueType()), + DAG.getUNDEF(Item.getValueType()), &Mask[0]); } return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); @@ -3248,15 +3322,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { if (Idx == 0) { if (NumZero == 0) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 || - (EVT == MVT::i64 && Subtarget->is64Bit())) { + } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || + (ExtVT == MVT::i64 && Subtarget->is64Bit())) { Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); - } else if (EVT == MVT::i16 || EVT == MVT::i8) { + } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), DAG); @@ -3266,7 +3340,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // Is it a vector logical left shift? if (NumElems == 2 && Idx == 1 && - isZeroNode(Op.getOperand(0)) && !isZeroNode(Op.getOperand(1))) { + X86::isZeroNode(Op.getOperand(0)) && + !X86::isZeroNode(Op.getOperand(1))) { unsigned NumBits = VT.getSizeInBits(); return getVShift(true, VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, @@ -3374,9 +3449,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // If we have SSE 4.1, Expand into a number of inserts unless the number of // values to be inserted is equal to the number of elements, in which case // use the unpack code below in the hopes of matching the consecutive elts - // load merge pattern for shuffles. + // load merge pattern for shuffles. // FIXME: We could probably just check that here directly. - if (Values.size() < NumElems && VT.getSizeInBits() == 128 && + if (Values.size() < NumElems && VT.getSizeInBits() == 128 && getSubtarget()->hasSSE41()) { V[0] = DAG.getUNDEF(VT); for (unsigned i = 0; i < NumElems; ++i) @@ -3457,7 +3532,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, } // For SSSE3, If all 8 words of the result come from only 1 quadword of each - // of the two input vectors, shuffle them into one input vector so only a + // of the two input vectors, shuffle them into one input vector so only a // single pshufb instruction is necessary. If There are more than 2 input // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; @@ -3481,7 +3556,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, SmallVector MaskV; MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); - NewV = DAG.getVectorShuffle(MVT::v2i64, dl, + NewV = DAG.getVectorShuffle(MVT::v2i64, dl, DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); @@ -3506,7 +3581,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, int idx = MaskVals[i]; if (idx < 0) continue; - idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4; + idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4; if ((idx != i) && idx < 4) pshufhw = false; if ((idx != i) && idx > 3) @@ -3521,19 +3596,19 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // If we've eliminated the use of V2, and the new mask is a pshuflw or // pshufhw, that's as cheap as it gets. Return the new shuffle. if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { - return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, + return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); } } - + // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. if (TLI.getSubtarget()->hasSSSE3()) { SmallVector pshufbMask; - + // If we have elements from both input vectors, set the high bit of the - // shuffle mask element to zero out elements that come from V2 in the V1 + // shuffle mask element to zero out elements that come from V2 in the V1 // mask, and elements that come from V1 in the V2 mask, so that the two // results can be OR'd together. bool TwoInputs = V1Used && V2Used; @@ -3548,12 +3623,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8)); } V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1); - V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, + V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); if (!TwoInputs) return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); - + // Calculate the shuffle mask for the second input, shuffle it, and // OR it with the first shuffled input. pshufbMask.clear(); @@ -3568,7 +3643,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8)); } V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2); - V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, + V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &pshufbMask[0], 16)); V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); @@ -3597,7 +3672,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); } - + // If BestHi >= 0, generate a pshufhw to put the high elements in order, // and update MaskVals with the new element order. if (BestHiQuad >= 0) { @@ -3619,7 +3694,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); } - + // In case BestHi & BestLo were both -1, which means each quadword has a word // from each of the four input quadwords, calculate the InOrder bitvector now // before falling through to the insert/extract cleanup. @@ -3629,7 +3704,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, if (MaskVals[i] < 0 || MaskVals[i] == i) InOrder.set(i); } - + // The other elements are put in the right place using pextrw and pinsrw. for (unsigned i = 0; i != 8; ++i) { if (InOrder[i]) @@ -3660,9 +3735,9 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DebugLoc dl = SVOp->getDebugLoc(); SmallVector MaskVals; SVOp->getMask(MaskVals); - + // If we have SSSE3, case 1 is generated when all result bytes come from - // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is + // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is // present, fall back to case 3. // FIXME: kill V2Only once shuffles are canonizalized by getNode. bool V1Only = true; @@ -3676,13 +3751,13 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, else V1Only = false; } - + // If SSSE3, use 1 pshufb instruction per vector with elements in the result. if (TLI.getSubtarget()->hasSSSE3()) { SmallVector pshufbMask; - + // If all result elements are from one input vector, then only translate - // undef mask values to 0x80 (zero out result) in the pshufb mask. + // undef mask values to 0x80 (zero out result) in the pshufb mask. // // Otherwise, we have elements from both input vectors, and must zero out // elements that come from V2 in the first mask, and V1 in the second mask @@ -3705,7 +3780,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, MVT::v16i8, &pshufbMask[0], 16)); if (!TwoInputs) return V1; - + // Calculate the shuffle mask for the second input, shuffle it, and // OR it with the first shuffled input. pshufbMask.clear(); @@ -3722,7 +3797,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, MVT::v16i8, &pshufbMask[0], 16)); return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); } - + // No SSSE3 - Calculate in place words and then fix all out of place words // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from // the 16 different words that comprise the two doublequadword input vectors. @@ -3732,17 +3807,17 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, for (int i = 0; i != 8; ++i) { int Elt0 = MaskVals[i*2]; int Elt1 = MaskVals[i*2+1]; - + // This word of the result is all undef, skip it. if (Elt0 < 0 && Elt1 < 0) continue; - + // This word of the result is already in the correct place, skip it. if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1)) continue; if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17)) continue; - + SDValue Elt0Src = Elt0 < 16 ? V1 : V2; SDValue Elt1Src = Elt1 < 16 ? V1 : V2; SDValue InsElt; @@ -3801,15 +3876,15 @@ static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, TargetLowering &TLI, DebugLoc dl) { - MVT VT = SVOp->getValueType(0); + EVT VT = SVOp->getValueType(0); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; - MVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); - MVT MaskEltVT = MaskVT.getVectorElementType(); - MVT NewVT = MaskVT; - switch (VT.getSimpleVT()) { + EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); + EVT MaskEltVT = MaskVT.getVectorElementType(); + EVT NewVT = MaskVT; + switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); case MVT::v4f32: NewVT = MVT::v2f64; break; case MVT::v4i32: NewVT = MVT::v2i64; break; @@ -3849,7 +3924,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, /// getVZextMovL - Return a zero-extending vector move low node. /// -static SDValue getVZextMovL(MVT VT, MVT OpVT, +static SDValue getVZextMovL(EVT VT, EVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, DebugLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { @@ -3859,11 +3934,11 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT, if (!LD) { // movssrr and movsdrr do not clear top bits. Try to use movd, movq // instead. - MVT EVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; - if ((EVT != MVT::i64 || Subtarget->is64Bit()) && + MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; + if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) && SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && - SrcOp.getOperand(0).getOperand(0).getValueType() == EVT) { + SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) { // PR2108 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; return DAG.getNode(ISD::BIT_CONVERT, dl, VT, @@ -3889,8 +3964,8 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); - MVT VT = SVOp->getValueType(0); - + EVT VT = SVOp->getValueType(0); + SmallVector, 8> Locs; Locs.resize(4); SmallVector Mask1(4U, -1); @@ -3926,7 +4001,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); SmallVector Mask2(4U, -1); - + for (unsigned i = 0; i != 4; ++i) { if (Locs[i].first == -1) continue; @@ -4036,7 +4111,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); bool isMMX = VT.getSizeInBits() == 64; @@ -4050,7 +4125,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // Promote splats to v4f32. if (SVOp->isSplat()) { - if (isMMX || NumElems < 4) + if (isMMX || NumElems < 4) return Op; return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); } @@ -4079,10 +4154,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG, Subtarget, dl); } } - + if (X86::isPSHUFDMask(SVOp)) return Op; - + // Check if this can be converted into a logical shift. bool isLeft = false; unsigned ShAmt = 0; @@ -4092,11 +4167,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. - MVT EVT = VT.getVectorElementType(); - ShAmt *= EVT.getSizeInBits(); + EVT EltVT = VT.getVectorElementType(); + ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + if (X86::isMOVLMask(SVOp)) { if (V1IsUndef) return V2; @@ -4105,7 +4180,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX) return Op; } - + // FIXME: fold these into legal mask. if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || X86::isMOVSLDUPMask(SVOp) || @@ -4120,11 +4195,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isShift) { // No better options. Use a vshl / vsrl. - MVT EVT = VT.getVectorElementType(); - ShAmt *= EVT.getSizeInBits(); + EVT EltVT = VT.getVectorElementType(); + ShAmt *= EltVT.getSizeInBits(); return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); } - + bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. @@ -4144,7 +4219,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { // Shuffling low element of v1 into undef, just return v1. - if (V2IsUndef) + if (V2IsUndef) return V1; // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which // the instruction selector will not match, so get a canonical MOVL with @@ -4196,7 +4271,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SVOp->getMask(PermMask); if (isShuffleMaskLegal(PermMask, VT)) return Op; - + // Handle v8i16 specifically since SSE can do byte extraction and insertion. if (VT == MVT::v8i16) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); @@ -4209,7 +4284,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (NewOp.getNode()) return NewOp; } - + // Handle all 4 wide cases with a number of shuffles except for MMX. if (NumElems == 4 && !isMMX) return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); @@ -4220,7 +4295,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); if (VT.getSizeInBits() == 8) { SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, @@ -4283,7 +4358,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { return Res; } - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { @@ -4296,21 +4371,21 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { MVT::v4i32, Vec), Op.getOperand(1))); // Transform it so it match pextrw which produces a 32-bit result. - MVT EVT = (MVT::SimpleValueType)(VT.getSimpleVT()+1); - SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EVT, + EVT EltVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy+1); + SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, Op.getOperand(0), Op.getOperand(1)); - SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EVT, Extract, + SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, DAG.getValueType(VT)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); } else if (VT.getSizeInBits() == 32) { unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); if (Idx == 0) return Op; - + // SHUFPS the element to the lowest double word, then movss. int Mask[4] = { Idx, -1, -1, -1 }; - MVT VVT = Op.getOperand(0).getValueType(); - SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + EVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); @@ -4326,8 +4401,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // Note if the lower 64 bits of the result of the UNPCKHPD is then stored // to a f64mem, the whole operation is folded into a single MOVHPDmr. int Mask[2] = { 1, -1 }; - MVT VVT = Op.getOperand(0).getValueType(); - SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), + EVT VVT = Op.getOperand(0).getValueType(); + SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, DAG.getIntPtrConstant(0)); @@ -4338,18 +4413,18 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ - MVT VT = Op.getValueType(); - MVT EVT = VT.getVectorElementType(); + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if ((EVT.getSizeInBits() == 8 || EVT.getSizeInBits() == 16) && + if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && isa(N2)) { - unsigned Opc = (EVT.getSizeInBits() == 8) ? X86ISD::PINSRB - : X86ISD::PINSRW; + unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB + : X86ISD::PINSRW; // Transform it so it match pinsr{b,w} which expects a GR32 as its second // argument. if (N1.getValueType() != MVT::i32) @@ -4357,7 +4432,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ if (N2.getValueType() != MVT::i32) N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue()); return DAG.getNode(Opc, dl, VT, N0, N1, N2); - } else if (EVT == MVT::f32 && isa(N2)) { + } else if (EltVT == MVT::f32 && isa(N2)) { // Bits [7:6] of the constant are the source select. This will always be // zero here. The DAG Combiner may combine an extract_elt index into these // bits. For example (insert (extract, 3), 2) could be matched by putting @@ -4367,24 +4442,25 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may // combine either bitwise AND or insert of float 0.0 to set these bits. N2 = DAG.getIntPtrConstant(cast(N2)->getZExtValue() << 4); + // Create this as a scalar to vector.. + N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); - } else if (EVT == MVT::i32) { - // InsertPS works with constant index. - if (isa(N2)) - return Op; + } else if (EltVT == MVT::i32 && isa(N2)) { + // PINSR* works with constant index. + return Op; } return SDValue(); } SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT EVT = VT.getVectorElementType(); + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); if (Subtarget->hasSSE41()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); - if (EVT == MVT::i8) + if (EltVT == MVT::i8) return SDValue(); DebugLoc dl = Op.getDebugLoc(); @@ -4392,7 +4468,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (EVT.getSizeInBits() == 16 && isa(N2)) { + if (EltVT.getSizeInBits() == 16 && isa(N2)) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. if (N1.getValueType() != MVT::i32) @@ -4413,9 +4489,12 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op.getOperand(0)))); + if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); + SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - MVT VT = MVT::v2i32; - switch (Op.getValueType().getSimpleVT()) { + EVT VT = MVT::v2i32; + switch (Op.getValueType().getSimpleVT().SimpleTy) { default: break; case MVT::v16i8: case MVT::v8i16: @@ -4435,21 +4514,21 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { ConstantPoolSDNode *CP = cast(Op); - + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) - OpFlag = X86II::MO_PIC_BASE_OFFSET; - else if (Subtarget->isPICStyleGOT()) - OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleRIPRel() && - getTargetMachine().getCodeModel() == CodeModel::Small) - WrapperKind = X86ISD::WrapperRIP; - } - + CodeModel::Model M = getTargetMachine().getCodeModel(); + + if (Subtarget->isPICStyleRIPRel() && + (M == CodeModel::Small || M == CodeModel::Kernel)) + WrapperKind = X86ISD::WrapperRIP; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleStubPIC()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), CP->getAlignment(), CP->getOffset(), OpFlag); @@ -4468,25 +4547,26 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { JumpTableSDNode *JT = cast(Op); - + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) - OpFlag = X86II::MO_PIC_BASE_OFFSET; - else if (Subtarget->isPICStyleGOT()) - OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleRIPRel()) - WrapperKind = X86ISD::WrapperRIP; - } - + CodeModel::Model M = getTargetMachine().getCodeModel(); + + if (Subtarget->isPICStyleRIPRel() && + (M == CodeModel::Small || M == CodeModel::Kernel)) + WrapperKind = X86ISD::WrapperRIP; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleStubPIC()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), OpFlag); DebugLoc DL = JT->getDebugLoc(); Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); - + // With PIC, the address is actually $g + Offset. if (OpFlag) { Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), @@ -4494,43 +4574,44 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { DebugLoc::getUnknownLoc(), getPointerTy()), Result); } - + return Result; } SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { const char *Sym = cast(Op)->getSymbol(); - + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - if (Subtarget->isPICStyleStub()) - OpFlag = X86II::MO_PIC_BASE_OFFSET; - else if (Subtarget->isPICStyleGOT()) - OpFlag = X86II::MO_GOTOFF; - else if (Subtarget->isPICStyleRIPRel()) - WrapperKind = X86ISD::WrapperRIP; - } - + CodeModel::Model M = getTargetMachine().getCodeModel(); + + if (Subtarget->isPICStyleRIPRel() && + (M == CodeModel::Small || M == CodeModel::Kernel)) + WrapperKind = X86ISD::WrapperRIP; + else if (Subtarget->isPICStyleGOT()) + OpFlag = X86II::MO_GOTOFF; + else if (Subtarget->isPICStyleStubPIC()) + OpFlag = X86II::MO_PIC_BASE_OFFSET; + SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag); - + DebugLoc DL = Op.getDebugLoc(); Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); - - + + // With PIC, the address is actually $g + Offset. if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && - !Subtarget->isPICStyleRIPRel()) { + !Subtarget->is64Bit()) { Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(), getPointerTy()), Result); } - + return Result; } @@ -4538,53 +4619,37 @@ SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, int64_t Offset, SelectionDAG &DAG) const { - bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; - bool ExtraLoadRequired = - Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false); - // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. + unsigned char OpFlags = + Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); + CodeModel::Model M = getTargetMachine().getCodeModel(); SDValue Result; - if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) { + if (OpFlags == X86II::MO_NO_FLAG && + X86::isOffsetSuitableForCodeModel(Offset, M)) { + // A direct static reference to a global. Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); Offset = 0; } else { - unsigned char OpFlags = 0; - - if (Subtarget->isPICStyleRIPRel() && - getTargetMachine().getRelocationModel() != Reloc::Static) { - if (ExtraLoadRequired) - OpFlags = X86II::MO_GOTPCREL; - } else if (Subtarget->isPICStyleGOT() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) { - if (ExtraLoadRequired) - OpFlags = X86II::MO_GOT; - else - OpFlags = X86II::MO_GOTOFF; - } - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); } - + if (Subtarget->isPICStyleRIPRel() && - getTargetMachine().getCodeModel() == CodeModel::Small) + (M == CodeModel::Small || M == CodeModel::Kernel)) Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); else Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. - if (IsPic && !Subtarget->isPICStyleRIPRel()) { + if (isGlobalRelativeToPICBase(OpFlags)) { Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), Result); } - // For Darwin & Mingw32, external and weak symbols are indirect, so we want to - // load the value at address GV, not the value of GV itself. This means that - // the GlobalAddress must be in the base or index register of the address, not - // the GV offset field. Platform check is inside GVRequiresExtraLoad() call - // The same applies for external symbols during PIC codegen - if (ExtraLoadRequired) + // For globals that require a load from a stub to get the address, emit the + // load. + if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, PseudoSourceValue::getGOT(), 0); @@ -4606,7 +4671,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, - SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg, + SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, unsigned char OperandFlags) { SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); @@ -4628,7 +4693,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const MVT PtrVT) { + const EVT PtrVT) { SDValue InFlag; DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, @@ -4643,7 +4708,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const MVT PtrVT) { + const EVT PtrVT) { return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX, X86II::MO_TLSGD); } @@ -4651,7 +4716,7 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, // Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or // "local exec" model. static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, - const MVT PtrVT, TLSModel::Model model, + const EVT PtrVT, TLSModel::Model model, bool is64Bit) { DebugLoc dl = GA->getDebugLoc(); // Get the Thread Pointer @@ -4677,7 +4742,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, assert(model == TLSModel::InitialExec); OperandFlags = X86II::MO_INDNTPOFF; } - + // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), @@ -4701,29 +4766,29 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { "TLS not implemented for non-ELF targets"); GlobalAddressSDNode *GA = cast(Op); const GlobalValue *GV = GA->getGlobal(); - + // If GV is an alias then use the aliasee for determining // thread-localness. if (const GlobalAlias *GA = dyn_cast(GV)) GV = GA->resolveAliasedGlobal(false); - + TLSModel::Model model = getTLSModel(GV, getTargetMachine().getRelocationModel()); - + switch (model) { case TLSModel::GeneralDynamic: case TLSModel::LocalDynamic: // not implemented if (Subtarget->is64Bit()) return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); - + case TLSModel::InitialExec: case TLSModel::LocalExec: return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, Subtarget->is64Bit()); } - - assert(0 && "Unreachable"); + + llvm_unreachable("Unreachable"); return SDValue(); } @@ -4732,17 +4797,16 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { /// take a 2 x i32 value to shift plus a shift amount. SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue Tmp1 = isSRA ? - DAG.getNode(ISD::SRA, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, MVT::i8)) : - DAG.getConstant(0, VT); + SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, MVT::i8)) + : DAG.getConstant(0, VT); SDValue Tmp2, Tmp3; if (Op.getOpcode() == ISD::SHL_PARTS) { @@ -4754,9 +4818,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { } SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, - DAG.getConstant(VTBits, MVT::i8)); + DAG.getConstant(VTBits, MVT::i8)); SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT, - AndNode, DAG.getConstant(0, MVT::i8)); + AndNode, DAG.getConstant(0, MVT::i8)); SDValue Hi, Lo; SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); @@ -4776,7 +4840,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - MVT SrcVT = Op.getOperand(0).getValueType(); + EVT SrcVT = Op.getOperand(0).getValueType(); if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { @@ -4808,7 +4872,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); } -SDValue X86TargetLowering::BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, +SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG) { // Build the FILD @@ -4888,19 +4952,22 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { */ DebugLoc dl = Op.getDebugLoc(); + LLVMContext *Context = DAG.getContext(); // Build some magic constants. std::vector CV0; - CV0.push_back(ConstantInt::get(APInt(32, 0x45300000))); - CV0.push_back(ConstantInt::get(APInt(32, 0x43300000))); - CV0.push_back(ConstantInt::get(APInt(32, 0))); - CV0.push_back(ConstantInt::get(APInt(32, 0))); + CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000))); + CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000))); + CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); + CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); Constant *C0 = ConstantVector::get(CV0); SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); std::vector CV1; - CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL)))); - CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL)))); + CV1.push_back( + ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); + CV1.push_back( + ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); Constant *C1 = ConstantVector::get(CV1); SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); @@ -4965,7 +5032,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. - MVT DestVT = Op.getValueType(); + EVT DestVT = Op.getValueType(); if (DestVT.bitsLT(MVT::f64)) { return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, @@ -4988,7 +5055,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); - MVT SrcVT = N0.getValueType(); + EVT SrcVT = N0.getValueType(); if (SrcVT == MVT::i64) { // We only handle SSE2 f64 target here; caller can expand the rest. if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) @@ -5017,7 +5084,7 @@ std::pair X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { DebugLoc dl = Op.getDebugLoc(); - MVT DstTy = Op.getValueType(); + EVT DstTy = Op.getValueType(); if (!IsSigned) { assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); @@ -5043,10 +5110,10 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { unsigned MemSize = DstTy.getSizeInBits()/8; int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); - + unsigned Opc; - switch (DstTy.getSimpleVT()) { - default: assert(0 && "Invalid FP_TO_SINT to lower!"); + switch (DstTy.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; @@ -5105,18 +5172,19 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { + LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); - MVT EltVT = VT; + EVT VT = Op.getValueType(); + EVT EltVT = VT; if (VT.isVector()) EltVT = VT.getVectorElementType(); std::vector CV; if (EltVT == MVT::f64) { - Constant *C = ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63)))); + Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); CV.push_back(C); CV.push_back(C); } else { - Constant *C = ConstantFP::get(APFloat(APInt(32, ~(1U << 31)))); + Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); CV.push_back(C); CV.push_back(C); CV.push_back(C); @@ -5131,21 +5199,19 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { + LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); - MVT EltVT = VT; - unsigned EltNum = 1; - if (VT.isVector()) { + EVT VT = Op.getValueType(); + EVT EltVT = VT; + if (VT.isVector()) EltVT = VT.getVectorElementType(); - EltNum = VT.getVectorNumElements(); - } std::vector CV; if (EltVT == MVT::f64) { - Constant *C = ConstantFP::get(APFloat(APInt(64, 1ULL << 63))); + Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); CV.push_back(C); CV.push_back(C); } else { - Constant *C = ConstantFP::get(APFloat(APInt(32, 1U << 31))); + Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); CV.push_back(C); CV.push_back(C); CV.push_back(C); @@ -5168,11 +5234,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { + LLVMContext *Context = DAG.getContext(); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); - MVT VT = Op.getValueType(); - MVT SrcVT = Op1.getValueType(); + EVT VT = Op.getValueType(); + EVT SrcVT = Op1.getValueType(); // If second operand is smaller, extend it first. if (SrcVT.bitsLT(VT)) { @@ -5191,13 +5258,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // First get the sign bit of second operand. std::vector CV; if (SrcVT == MVT::f64) { - CV.push_back(ConstantFP::get(APFloat(APInt(64, 1ULL << 63)))); - CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(APFloat(APInt(32, 1U << 31)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); } Constant *C = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -5220,13 +5287,13 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // Clear first operand sign bit. CV.clear(); if (VT == MVT::f64) { - CV.push_back(ConstantFP::get(APFloat(APInt(64, ~(1ULL << 63))))); - CV.push_back(ConstantFP::get(APFloat(APInt(64, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); } else { - CV.push_back(ConstantFP::get(APFloat(APInt(32, ~(1U << 31))))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); - CV.push_back(ConstantFP::get(APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); + CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); } C = ConstantVector::get(CV); CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); @@ -5299,21 +5366,48 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, Opcode = X86ISD::ADD; NumOperands = 2; break; + case ISD::AND: { + // If the primary and result isn't used, don't bother using X86ISD::AND, + // because a TEST instruction will be better. + bool NonFlagUse = false; + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::BRCOND && + UI->getOpcode() != ISD::SELECT && + UI->getOpcode() != ISD::SETCC) { + NonFlagUse = true; + break; + } + if (!NonFlagUse) + break; + } + // FALL THROUGH case ISD::SUB: - // Due to the ISEL shortcoming noted above, be conservative if this sub is + case ISD::OR: + case ISD::XOR: + // Due to the ISEL shortcoming noted above, be conservative if this op is // likely to be selected as part of a load-modify-store instruction. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) if (UI->getOpcode() == ISD::STORE) goto default_case; - // Otherwise use a regular EFLAGS-setting sub. - Opcode = X86ISD::SUB; + // Otherwise use a regular EFLAGS-setting instruction. + switch (Op.getNode()->getOpcode()) { + case ISD::SUB: Opcode = X86ISD::SUB; break; + case ISD::OR: Opcode = X86ISD::OR; break; + case ISD::XOR: Opcode = X86ISD::XOR; break; + case ISD::AND: Opcode = X86ISD::AND; break; + default: llvm_unreachable("unexpected operator!"); + } NumOperands = 2; break; case X86ISD::ADD: case X86ISD::SUB: case X86ISD::INC: case X86ISD::DEC: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: return SDValue(Op.getNode(), 1); default: default_case: @@ -5419,14 +5513,14 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue CC = Op.getOperand(2); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); DebugLoc dl = Op.getDebugLoc(); if (isFP) { unsigned SSECC = 8; - MVT VT0 = Op0.getValueType(); + EVT VT0 = Op0.getValueType(); assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; bool Swap = false; @@ -5469,7 +5563,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ); } - assert(0 && "Illegal FP comparison"); + llvm_unreachable("Illegal FP comparison"); } // Handle all other FP comparisons here. return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); @@ -5481,10 +5575,13 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { unsigned Opc = 0, EQOpc = 0, GTOpc = 0; bool Swap = false, Invert = false, FlipSigns = false; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: break; + case MVT::v8i8: case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; + case MVT::v4i16: case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; + case MVT::v2i32: case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; } @@ -5508,7 +5605,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. if (FlipSigns) { - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), EltVT); std::vector SignBits(VT.getVectorNumElements(), SignBit); @@ -5538,7 +5635,10 @@ static bool isX86LogicalCmp(SDValue Op) { Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || Opc == X86ISD::INC || - Opc == X86ISD::DEC)) + Opc == X86ISD::DEC || + Opc == X86ISD::OR || + Opc == X86ISD::XOR || + Opc == X86ISD::AND)) return true; return false; @@ -5560,7 +5660,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { SDValue Cmp = Cond.getOperand(1); unsigned Opc = Cmp.getOpcode(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); bool IllegalFPCMov = false; if (VT.isFloatingPoint() && !VT.isVector() && @@ -5751,8 +5851,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; - MVT IntPtr = getPointerTy(); - MVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + EVT IntPtr = getPointerTy(); + EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); @@ -5802,8 +5902,8 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { - MVT IntPtr = getPointerTy(); - const Type *IntPtrTy = TD->getIntPtrType(); + EVT IntPtr = getPointerTy(); + const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; @@ -5812,8 +5912,9 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Entry.Node = Size; Args.push_back(Entry); std::pair CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, false, false, - 0, CallingConv::C, false, + LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, false, /*isReturnValueUsed=*/false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -5824,7 +5925,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag(0, 0); - MVT AVT; + EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast(Src); unsigned BytesLeft = 0; @@ -5893,7 +5994,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, if (TwoRepStos) { InFlag = Chain.getValue(1); Count = Size; - MVT CVT = Count.getValueType(); + EVT CVT = Count.getValueType(); SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : @@ -5909,8 +6010,8 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, } else if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; - MVT AddrVT = Dst.getValueType(); - MVT SizeVT = Size.getValueType(); + EVT AddrVT = Dst.getValueType(); + EVT SizeVT = Size.getValueType(); Chain = DAG.getMemset(Chain, dl, DAG.getNode(ISD::ADD, dl, AddrVT, Dst, @@ -5945,7 +6046,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, return SDValue(); // DWORD aligned - MVT AVT = MVT::i32; + EVT AVT = MVT::i32; if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned AVT = MVT::i64; @@ -5980,9 +6081,9 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; - MVT DstVT = Dst.getValueType(); - MVT SrcVT = Src.getValueType(); - MVT SizeVT = Size.getValueType(); + EVT DstVT = Dst.getValueType(); + EVT SrcVT = Src.getValueType(); + EVT SizeVT = Size.getValueType(); Results.push_back(DAG.getMemcpy(Chain, dl, DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, DstVT)), @@ -6054,8 +6155,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { SDValue SrcPtr = Op.getOperand(1); SDValue SrcSV = Op.getOperand(2); - assert(0 && "VAArgInst is not yet implemented for x86-64!"); - abort(); + llvm_report_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); } @@ -6179,6 +6279,36 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + // ptest intrinsics. The intrinsic these come from are designed to return + // an integer value, not just an instruction so lower it to the ptest + // pattern and a setcc for the result. + case Intrinsic::x86_sse41_ptestz: + case Intrinsic::x86_sse41_ptestc: + case Intrinsic::x86_sse41_ptestnzc:{ + unsigned X86CC = 0; + switch (IntNo) { + default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); + case Intrinsic::x86_sse41_ptestz: + // ZF = 1 + X86CC = X86::COND_E; + break; + case Intrinsic::x86_sse41_ptestc: + // CF = 1 + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse41_ptestnzc: + // ZF and CF = 0 + X86CC = X86::COND_A; + break; + } + + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS); + SDValue CC = DAG.getConstant(X86CC, MVT::i8); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } // Fix vector shift instructions where the last operand is a non-immediate // i32 value. @@ -6203,7 +6333,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return SDValue(); unsigned NewIntNo = 0; - MVT ShAmtVT = MVT::v4i32; + EVT ShAmtVT = MVT::v4i32; switch (IntNo) { case Intrinsic::x86_sse2_pslli_w: NewIntNo = Intrinsic::x86_sse2_psll_w; @@ -6256,14 +6386,28 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case Intrinsic::x86_mmx_psrai_d: NewIntNo = Intrinsic::x86_mmx_psra_d; break; - default: abort(); // Can't reach here. + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } break; } } - MVT VT = Op.getValueType(); - ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShAmtVT, ShAmt)); + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits + // to be zero. + SDValue ShOps[4]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + if (ShAmtVT == MVT::v4i32) { + ShOps[2] = DAG.getUNDEF(MVT::i32); + ShOps[3] = DAG.getUNDEF(MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); + } else { + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + } + + EVT VT = Op.getValueType(); + ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); @@ -6295,7 +6439,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; @@ -6401,12 +6545,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, } else { const Function *Func = cast(cast(Op.getOperand(5))->getValue()); - unsigned CC = Func->getCallingConv(); + CallingConv::ID CC = Func->getCallingConv(); unsigned NestReg; switch (CC) { default: - assert(0 && "Unsupported calling convention"); + llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::X86_StdCall: { // Pass 'nest' parameter in ECX. @@ -6428,8 +6572,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; if (InRegCount > 2) { - cerr << "Nest register in use - reduce number of inreg parameters!\n"; - abort(); + llvm_report_error("Nest register in use - reduce number of inreg parameters!"); } } break; @@ -6499,7 +6642,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { const TargetMachine &TM = MF.getTarget(); const TargetFrameInfo &TFI = *TM.getFrameInfo(); unsigned StackAlignment = TFI.getStackAlignment(); - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // Save FP Control Word to stack slot @@ -6537,8 +6680,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT OpVT = VT; + EVT VT = Op.getValueType(); + EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); @@ -6570,8 +6713,8 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); - MVT OpVT = VT; + EVT VT = Op.getValueType(); + EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); @@ -6599,7 +6742,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getValueType(); + EVT VT = Op.getValueType(); assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); DebugLoc dl = Op.getDebugLoc(); @@ -6656,7 +6799,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); switch (Op.getOpcode()) { - default: assert(0 && "Unknown ovf instruction!"); + default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: // A subtract of one will be selected as a INC. Note that INC doesn't // set CF, so we can't do this for UADDO. @@ -6712,11 +6855,11 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { - MVT T = Op.getValueType(); + EVT T = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned Reg = 0; unsigned size = 0; - switch(T.getSimpleVT()) { + switch(T.getSimpleVT().SimpleTy) { default: assert(false && "Invalid value type!"); case MVT::i8: Reg = X86::AL; size = 1; break; @@ -6763,7 +6906,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); - MVT T = Node->getValueType(0); + EVT T = Node->getValueType(0); SDValue negOp = DAG.getNode(ISD::SUB, dl, T, DAG.getConstant(0, T), Node->getOperand(2)); return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, @@ -6778,7 +6921,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - default: assert(0 && "Should not custom lower this!"); + default: llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -6805,9 +6948,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::CALL: return LowerCALL(Op, DAG); - case ISD::RET: return LowerRET(Op, DAG); - case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); @@ -6836,7 +6976,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void X86TargetLowering:: ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl&Results, SelectionDAG &DAG, unsigned NewOp) { - MVT T = Node->getValueType(0); + EVT T = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); assert (T == MVT::i64 && "Only know how to expand i64 atomics"); @@ -6846,12 +6986,11 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl&Results, Node->getOperand(2), DAG.getIntPtrConstant(0)); SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Node->getOperand(2), DAG.getIntPtrConstant(1)); - // This is a generalized SDNode, not an AtomicSDNode, so it doesn't - // have a MemOperand. Pass the info through as a normal operand. - SDValue LSI = DAG.getMemOperand(cast(Node)->getMemOperand()); - SDValue Ops[] = { Chain, In1, In2L, In2H, LSI }; + SDValue Ops[] = { Chain, In1, In2L, In2H }; SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - SDValue Result = DAG.getNode(NewOp, dl, Tys, Ops, 5); + SDValue Result = + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64, + cast(Node)->getMemOperand()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); Results.push_back(Result.getValue(2)); @@ -6872,7 +7011,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, FP_TO_INTHelper(SDValue(N, 0), DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; if (FIST.getNode() != 0) { - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); // Return a load from the stack slot. Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0)); } @@ -6893,7 +7032,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::ATOMIC_CMP_SWAP: { - MVT T = N->getValueType(0); + EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); SDValue cpInL, cpInH; cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), @@ -6969,7 +7108,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; case X86ISD::CALL: return "X86ISD::CALL"; - case X86ISD::TAILCALL: return "X86ISD::TAILCALL"; case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; case X86ISD::BT: return "X86ISD::BT"; case X86ISD::CMP: return "X86ISD::CMP"; @@ -7027,7 +7165,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UMUL: return "X86ISD::UMUL"; case X86ISD::INC: return "X86ISD::INC"; case X86ISD::DEC: return "X86ISD::DEC"; + case X86ISD::OR: return "X86ISD::OR"; + case X86ISD::XOR: return "X86ISD::XOR"; + case X86ISD::AND: return "X86ISD::AND"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; + case X86ISD::PTEST: return "X86ISD::PTEST"; + case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; } } @@ -7036,28 +7179,28 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { // X86 supports extremely general addressing modes. + CodeModel::Model M = getTargetMachine().getCodeModel(); // X86 allows a sign-extended 32-bit immediate field as a displacement. - if (AM.BaseOffs <= -(1LL << 32) || AM.BaseOffs >= (1LL << 32)-1) + if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL)) return false; if (AM.BaseGV) { - // We can only fold this if we don't need an extra load. - if (Subtarget->GVRequiresExtraLoad(AM.BaseGV, getTargetMachine(), false)) + unsigned GVFlags = + Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine()); + + // If a reference to this global requires an extra load, we can't fold it. + if (isGlobalStubReference(GVFlags)) return false; - // If BaseGV requires a register, we cannot also have a BaseReg. - if (Subtarget->GVRequiresRegister(AM.BaseGV, getTargetMachine(), false) && - AM.HasBaseReg) + + // If BaseGV requires a register for the PIC base, we cannot also have a + // BaseReg specified. + if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags)) return false; - // X86-64 only supports addr of globals in small code model. - if (Subtarget->is64Bit()) { - if (getTargetMachine().getCodeModel() != CodeModel::Small) - return false; - // If lower 4G is not available, then we must use rip-relative addressing. - if (AM.BaseOffs || AM.Scale > 1) - return false; - } + // If lower 4G is not available, then we must use rip-relative addressing. + if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) + return false; } switch (AM.Scale) { @@ -7094,7 +7237,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { return Subtarget->is64Bit() || NumBits1 < 64; } -bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { +bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); @@ -7106,15 +7249,16 @@ bool X86TargetLowering::isTruncateFree(MVT VT1, MVT VT2) const { bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. - return Ty1 == Type::Int32Ty && Ty2 == Type::Int64Ty && Subtarget->is64Bit(); + return Ty1 == Type::getInt32Ty(Ty1->getContext()) && + Ty2 == Type::getInt64Ty(Ty1->getContext()) && Subtarget->is64Bit(); } -bool X86TargetLowering::isZExtFree(MVT VT1, MVT VT2) const { +bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit(); } -bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const { +bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { // i16 instructions are longer (0x66 prefix) and potentially slower. return !(VT1 == MVT::i32 && VT2 == MVT::i16); } @@ -7124,8 +7268,8 @@ bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const { /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. bool -X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, - MVT VT) const { +X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, + EVT VT) const { // Only do shuffles on 128-bit vector types for now. if (VT.getSizeInBits() == 64) return false; @@ -7146,7 +7290,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, bool X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl &Mask, - MVT VT) const { + EVT VT) const { unsigned NumElts = VT.getVectorNumElements(); // FIXME: This collection of masks seems suspect. if (NumElts == 2) @@ -7254,7 +7398,8 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, (*MIB).addOperand(*argOpers[i]); MIB.addReg(t2); assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); + (*MIB).setMemRefs(bInstr->memoperands_begin(), + bInstr->memoperands_end()); MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg()); MIB.addReg(EAXreg); @@ -7406,7 +7551,8 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, (*MIB).addOperand(*argOpers[i]); assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).addMemOperand(*F, *bInstr->memoperands_begin()); + (*MIB).setMemRefs(bInstr->memoperands_begin(), + bInstr->memoperands_end()); MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3); MIB.addReg(X86::EAX); @@ -7450,7 +7596,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB + // Move all successors of thisMBB to nextMBB nextMBB->transferSuccessors(thisMBB); // Update thisMBB to fall through to newMBB @@ -7510,7 +7656,8 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, (*MIB).addOperand(*argOpers[i]); MIB.addReg(t3); assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); - (*MIB).addMemOperand(*F, *mInstr->memoperands_begin()); + (*MIB).setMemRefs(mInstr->memoperands_begin(), + mInstr->memoperands_end()); MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg()); MIB.addReg(X86::EAX); @@ -7522,70 +7669,190 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, return nextMBB; } - +// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 +// all of this code can be replaced with that in the .td file. MachineBasicBlock * -X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { +X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, + unsigned numArgs, bool memArg) const { + + MachineFunction *F = BB->getParent(); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + unsigned Opc; + if (memArg) + Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; + else + Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; + + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); + + for (unsigned i = 0; i < numArgs; ++i) { + MachineOperand &Op = MI->getOperand(i+1); + + if (!(Op.isReg() && Op.isImplicit())) + MIB.addOperand(Op); + } + + BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) + .addReg(X86::XMM0); + + F->DeleteMachineInstr(MI); + + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( + MachineInstr *MI, + MachineBasicBlock *MBB) const { + // Emit code to save XMM registers to the stack. The ABI says that the + // number of registers to save is given in %al, so it's theoretically + // possible to do an indirect jump trick to avoid saving all of them, + // however this code takes a simpler approach and just executes all + // of the stores if %al is non-zero. It's less code, and it's probably + // easier on the hardware branch predictor, and stores aren't all that + // expensive anyway. + + // Create the new basic blocks. One block contains all the XMM stores, + // and one block is the final destination regardless of whether any + // stores were performed. + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction *F = MBB->getParent(); + MachineFunction::iterator MBBIter = MBB; + ++MBBIter; + MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(MBBIter, XMMSaveMBB); + F->insert(MBBIter, EndMBB); + + // Set up the CFG. + // Move any original successors of MBB to the end block. + EndMBB->transferSuccessors(MBB); + // The original block will now fall through to the XMM save block. + MBB->addSuccessor(XMMSaveMBB); + // The XMMSaveMBB will fall through to the end block. + XMMSaveMBB->addSuccessor(EndMBB); + + // Now add the instructions. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + unsigned CountReg = MI->getOperand(0).getReg(); + int64_t RegSaveFrameIndex = MI->getOperand(1).getImm(); + int64_t VarArgsFPOffset = MI->getOperand(2).getImm(); + + if (!Subtarget->isTargetWin64()) { + // If %al is 0, branch around the XMM save block. + BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); + BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB); + MBB->addSuccessor(EndMBB); + } + + // In the XMM save block, save all the XMM argument registers. + for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { + int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; + MachineMemOperand *MMO = + F->getMachineMemOperand( + PseudoSourceValue::getFixedStack(RegSaveFrameIndex), + MachineMemOperand::MOStore, Offset, + /*Size=*/16, /*Align=*/16); + BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr)) + .addFrameIndex(RegSaveFrameIndex) + .addImm(/*Scale=*/1) + .addReg(/*IndexReg=*/0) + .addImm(/*Disp=*/Offset) + .addReg(/*Segment=*/0) + .addReg(MI->getOperand(i).getReg()) + .addMemOperand(MMO); + } + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + + return EndMBB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap *EM) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // cmpTY ccX, r1, r2 + // bCC copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + unsigned Opc = + X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); + BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + // Update machine-CFG edges by first adding all successors of the current + // block to the new block which will contain the Phi node for the select. + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while (!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] + // ... + BB = sinkMBB; + BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) + .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); + + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; +} + + +MachineBasicBlock * +X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB, + DenseMap *EM) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); + case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: case X86::CMOV_FR64: case X86::CMOV_V4F32: case X86::CMOV_V2F64: - case X86::CMOV_V2I64: { - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // cmpTY ccX, r1, r2 - // bCC copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned Opc = - X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); - BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - // Update machine-CFG edges by transferring all successors of the current - // block to the new block which will contain the Phi node for the select. - sinkMBB->transferSuccessors(BB); - - // Add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] - // ... - BB = sinkMBB; - BuildMI(BB, dl, TII->get(X86::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. - return BB; - } + case X86::CMOV_V2I64: + return EmitLoweredSelect(MI, BB, EM); case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: @@ -7596,33 +7863,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::FP80_TO_INT16_IN_MEM: case X86::FP80_TO_INT32_IN_MEM: case X86::FP80_TO_INT64_IN_MEM: { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); - addFrameReference(BuildMI(BB, dl, TII->get(X86::FNSTCW16m)), CWFrameIdx); + addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); - addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16rm), OldCW), + addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); // Set the high part to be round to zero... - addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mi)), CWFrameIdx) + addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx) .addImm(0xC7F); // Reload the modified control word now... - addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); // Restore the memory image of control word to original value - addFrameReference(BuildMI(BB, dl, TII->get(X86::MOV16mr)), CWFrameIdx) + addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx) .addReg(OldCW); // Get the X86 opcode to use. unsigned Opc; switch (MI->getOpcode()) { - default: assert(0 && "illegal opcode!"); + default: llvm_unreachable("illegal opcode!"); case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; @@ -7655,15 +7925,26 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } else { AM.Disp = Op.getImm(); } - addFullAddress(BuildMI(BB, dl, TII->get(Opc)), AM) + addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM) .addReg(MI->getOperand(X86AddrNumOperands).getReg()); // Reload the original control word now. - addFrameReference(BuildMI(BB, dl, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } + // String/text processing lowering. + case X86::PCMPISTRM128REG: + return EmitPCMP(MI, BB, 3, false /* in-mem */); + case X86::PCMPISTRM128MEM: + return EmitPCMP(MI, BB, 3, true /* in-mem */); + case X86::PCMPESTRM128REG: + return EmitPCMP(MI, BB, 5, false /* in mem */); + case X86::PCMPESTRM128MEM: + return EmitPCMP(MI, BB, 5, true /* in mem */); + + // Atomic Lowering. case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, @@ -7825,6 +8106,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, X86::MOV32rr, X86::MOV32rr, X86::MOV32ri, X86::MOV32ri, false); + case X86::VASTART_SAVE_XMM_REGS: + return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); } } @@ -7855,6 +8138,9 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case X86ISD::UMUL: case X86ISD::INC: case X86ISD::DEC: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: // These nodes' second result is a boolean. if (Op.getResNo() == 0) break; @@ -7891,7 +8177,7 @@ static bool isBaseAlignmentOfN(unsigned N, SDNode *Base, } static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, - MVT EVT, LoadSDNode *&LDBase, + EVT EltVT, LoadSDNode *&LDBase, unsigned &LastLoadedElt, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { @@ -7919,7 +8205,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, continue; LoadSDNode *LD = cast(Elt); - if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) + if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI)) return false; LastLoadedElt = i; } @@ -7935,8 +8221,8 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI) { DebugLoc dl = N->getDebugLoc(); - MVT VT = N->getValueType(0); - MVT EVT = VT.getVectorElementType(); + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); ShuffleVectorSDNode *SVN = cast(N); unsigned NumElems = VT.getVectorNumElements(); @@ -7947,7 +8233,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); LoadSDNode *LD = NULL; unsigned LastLoadedElt; - if (!EltsFromConsecutiveLoads(SVN, NumElems, EVT, LD, LastLoadedElt, DAG, + if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG, MFI, TLI)) return SDValue(); @@ -7976,57 +8262,159 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Get the LHS/RHS of the select. SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); - - // If we have SSE[12] support, try to form min/max nodes. + + // If we have SSE[12] support, try to form min/max nodes. SSE min/max + // instructions have the peculiarity that if either operand is a NaN, + // they chose what we call the RHS operand (and as such are not symmetric). + // It happens that this matches the semantics of the common C idiom + // xhasSSE2() && (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && Cond.getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast(Cond.getOperand(2))->get(); unsigned Opcode = 0; + // Check for x CC y ? x : y. if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) { switch (CC) { default: break; - case ISD::SETOLE: // (X <= Y) ? X : Y -> min + case ISD::SETULT: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMIN; + break; + case ISD::SETOLE: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMIN; + break; case ISD::SETULE: - case ISD::SETLE: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min + // This can be a min, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOLT: case ISD::SETLT: + case ISD::SETLE: Opcode = X86ISD::FMIN; break; - case ISD::SETOGT: // (X > Y) ? X : Y -> max + case ISD::SETOGE: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMAX; + break; case ISD::SETUGT: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMAX; + break; + case ISD::SETUGE: + // This can be a max, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOGT: case ISD::SETGT: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max case ISD::SETGE: Opcode = X86ISD::FMAX; break; } + // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) { switch (CC) { default: break; - case ISD::SETOGT: // (X > Y) ? Y : X -> min + case ISD::SETOGE: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMIN; + break; case ISD::SETUGT: + // This can be a min if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMIN; + break; + case ISD::SETUGE: + // This can be a min, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOGT: case ISD::SETGT: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min case ISD::SETGE: Opcode = X86ISD::FMIN; break; - case ISD::SETOLE: // (X <= Y) ? Y : X -> max + case ISD::SETULT: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(LHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(RHS)) + break; + } + Opcode = X86ISD::FMAX; + break; + case ISD::SETOLE: + // This can be a max if we can prove that at least one of the operands + // is not a nan. + if (!FiniteOnlyFPMath()) { + if (DAG.isKnownNeverNaN(RHS)) { + // Put the potential NaN in the RHS so that SSE will preserve it. + std::swap(LHS, RHS); + } else if (!DAG.isKnownNeverNaN(LHS)) + break; + } + Opcode = X86ISD::FMAX; + break; case ISD::SETULE: - case ISD::SETLE: - if (!UnsafeFPMath) break; - // FALL THROUGH. - case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max + // This can be a max, but if either operand is a NaN we need it to + // preserve the original LHS. + std::swap(LHS, RHS); + case ISD::SETOLT: case ISD::SETLT: + case ISD::SETLE: Opcode = X86ISD::FMAX; break; } @@ -8035,7 +8423,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (Opcode) return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); } - + // If this is a select between two integer constants, try to do some // optimizations. if (ConstantSDNode *TrueC = dyn_cast(LHS)) { @@ -8045,7 +8433,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // If this is efficiently invertible, canonicalize the LHSC/RHSC values // so that TrueC (the true value) is larger than FalseC. bool NeedsCondInvert = false; - + if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) && // Efficiently invertible. (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible. @@ -8054,41 +8442,41 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, NeedsCondInvert = true; std::swap(TrueC, FalseC); } - + // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0. if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, Cond.getValueType())); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); - + unsigned ShAmt = TrueC->getAPIntValue().logBase2(); return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, DAG.getConstant(ShAmt, MVT::i8)); } - + // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, Cond.getValueType())); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); } - + // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; - + bool isFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { @@ -8104,13 +8492,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, break; } } - + if (isFastMultiplier) { APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, DAG.getConstant(1, Cond.getValueType())); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); @@ -8118,17 +8506,17 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (Diff != 1) Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, DAG.getConstant(Diff, Cond.getValueType())); - + // Add the base if non-zero. if (FalseC->getAPIntValue() != 0) Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); return Cond; } - } + } } } - + return SDValue(); } @@ -8136,11 +8524,11 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { DebugLoc DL = N->getDebugLoc(); - + // If the flag operand isn't dead, don't touch this CMOV. if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) return SDValue(); - + // If this is a select between two integer constants, try to do some // optimizations. Note that the operands are ordered the opposite of SELECT // operands. @@ -8149,12 +8537,12 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is // larger than FalseC (the false value). X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); - + if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { CC = X86::GetOppositeBranchCondition(CC); std::swap(TrueC, FalseC); } - + // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. // This is efficient for any integer data type (including i8/i16) and // shift amount. @@ -8162,10 +8550,10 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, SDValue Cond = N->getOperand(3); Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(CC, MVT::i8), Cond); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); - + unsigned ShAmt = TrueC->getAPIntValue().logBase2(); Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, DAG.getConstant(ShAmt, MVT::i8)); @@ -8173,31 +8561,31 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return DCI.CombineTo(N, Cond, SDValue()); return Cond; } - + // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient // for any integer data type, including i8/i16. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { SDValue Cond = N->getOperand(3); Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, DAG.getConstant(CC, MVT::i8), Cond); - + // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); - + if (N->getNumValues() == 2) // Dead flag value? return DCI.CombineTo(N, Cond, SDValue()); return Cond; } - + // Optimize cases that will turn into an LEA instruction. This requires // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; - + bool isFastMultiplier = false; if (Diff < 10) { switch ((unsigned char)Diff) { @@ -8213,7 +8601,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, break; } } - + if (isFastMultiplier) { APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); SDValue Cond = N->getOperand(3); @@ -8235,7 +8623,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return DCI.CombineTo(N, Cond, SDValue()); return Cond; } - } + } } } return SDValue(); @@ -8254,7 +8642,7 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); if (VT != MVT::i64) return SDValue(); @@ -8289,17 +8677,17 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, std::swap(MulAmt1, MulAmt2); SDValue NewMul; - if (isPowerOf2_64(MulAmt1)) + if (isPowerOf2_64(MulAmt1)) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant(Log2_64(MulAmt1), MVT::i8)); else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), DAG.getConstant(MulAmt1, VT)); - if (isPowerOf2_64(MulAmt2)) + if (isPowerOf2_64(MulAmt2)) NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, DAG.getConstant(Log2_64(MulAmt2), MVT::i8)); - else + else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, DAG.getConstant(MulAmt2, VT)); @@ -8321,14 +8709,14 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (!Subtarget->hasSSE2()) return SDValue(); - MVT VT = N->getValueType(0); + EVT VT = N->getValueType(0); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); SDValue ShAmtOp = N->getOperand(1); - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); DebugLoc DL = N->getDebugLoc(); - SDValue BaseShAmt; + SDValue BaseShAmt = SDValue(); if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { unsigned NumElts = VT.getVectorNumElements(); unsigned i = 0; @@ -8347,21 +8735,40 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && cast(ShAmtOp)->isSplat()) { - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); + SDValue InVec = ShAmtOp.getOperand(0); + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + unsigned i = 0; + for (; i != NumElts; ++i) { + SDValue Arg = InVec.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + BaseShAmt = Arg; + break; + } + } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { + if (ConstantSDNode *C = dyn_cast(InVec.getOperand(2))) { + unsigned SplatIdx = cast(ShAmtOp)->getSplatIndex(); + if (C->getZExtValue() == SplatIdx) + BaseShAmt = InVec.getOperand(1); + } + } + if (BaseShAmt.getNode() == 0) + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, + DAG.getIntPtrConstant(0)); } else return SDValue(); + // The shift amount is an i32. if (EltVT.bitsGT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BaseShAmt); + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt); // The shift amount is identical so we can do a vector shift. SDValue ValOp = N->getOperand(0); switch (N->getOpcode()) { default: - assert(0 && "Unknown shift opcode!"); + llvm_unreachable("Unknown shift opcode!"); break; case ISD::SHL: if (VT == MVT::v2i64) @@ -8415,13 +8822,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. StoreSDNode *St = cast(N); - MVT VT = St->getValue().getValueType(); + EVT VT = St->getValue().getValueType(); if (VT.getSizeInBits() != 64) return SDValue(); const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); - bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps + bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && @@ -8464,7 +8871,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. if (Subtarget->is64Bit() || F64IsLegal) { - MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; + EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getSrcValue(), Ld->getSrcValueOffset(), Ld->isVolatile(), @@ -8568,9 +8975,9 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); if (Op.getOpcode() == ISD::BIT_CONVERT) Op = Op.getOperand(0); - MVT VT = N->getValueType(0), OpVT = Op.getValueType(); + EVT VT = N->getValueType(0), OpVT = Op.getValueType(); if (Op.getOpcode() == X86ISD::VZEXT_LOAD && - VT.getVectorElementType().getSizeInBits() == + VT.getVectorElementType().getSizeInBits() == OpVT.getVectorElementType().getSizeInBits()) { return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); } @@ -8580,7 +8987,7 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { // On X86 and X86-64, atomic operations are lowered to locked instructions. // Locked instructions, in turn, have implicit fence semantics (all memory // operations are flushed before issuing the locked instruction, and the -// are not buffered), so we can fold away the common pattern of +// are not buffered), so we can fold away the common pattern of // fence-atomic-fence. static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { SDValue atomic = N->getOperand(0); @@ -8601,11 +9008,11 @@ static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { default: return SDValue(); } - + SDValue fence = atomic.getOperand(0); if (fence.getOpcode() != ISD::MEMBARRIER) return SDValue(); - + switch (atomic.getOpcode()) { case ISD::ATOMIC_CMP_SWAP: return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), @@ -8657,6 +9064,101 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, // X86 Inline Assembly Support //===----------------------------------------------------------------------===// +static bool LowerToBSwap(CallInst *CI) { + // FIXME: this should verify that we are targetting a 486 or better. If not, + // we will turn this bswap into something that will be lowered to logical ops + // instead of emitting the bswap asm. For now, we don't support 486 or lower + // so don't worry about this. + + // Verify this is a simple bswap. + if (CI->getNumOperands() != 2 || + CI->getType() != CI->getOperand(1)->getType() || + !CI->getType()->isInteger()) + return false; + + const IntegerType *Ty = dyn_cast(CI->getType()); + if (!Ty || Ty->getBitWidth() % 16 != 0) + return false; + + // Okay, we can do this xform, do so now. + const Type *Tys[] = { Ty }; + Module *M = CI->getParent()->getParent()->getParent(); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + + Value *Op = CI->getOperand(1); + Op = CallInst::Create(Int, Op, CI->getName(), CI); + + CI->replaceAllUsesWith(Op); + CI->eraseFromParent(); + return true; +} + +bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { + InlineAsm *IA = cast(CI->getCalledValue()); + std::vector Constraints = IA->ParseConstraints(); + + std::string AsmStr = IA->getAsmString(); + + // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" + std::vector AsmPieces; + SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? + + switch (AsmPieces.size()) { + default: return false; + case 1: + AsmStr = AsmPieces[0]; + AsmPieces.clear(); + SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. + + // bswap $0 + if (AsmPieces.size() == 2 && + (AsmPieces[0] == "bswap" || + AsmPieces[0] == "bswapq" || + AsmPieces[0] == "bswapl") && + (AsmPieces[1] == "$0" || + AsmPieces[1] == "${0:q}")) { + // No need to check constraints, nothing other than the equivalent of + // "=r,0" would be valid here. + return LowerToBSwap(CI); + } + // rorw $$8, ${0:w} --> llvm.bswap.i16 + if (CI->getType() == Type::getInt16Ty(CI->getContext()) && + AsmPieces.size() == 3 && + AsmPieces[0] == "rorw" && + AsmPieces[1] == "$$8," && + AsmPieces[2] == "${0:w}" && + IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") { + return LowerToBSwap(CI); + } + break; + case 3: + if (CI->getType() == Type::getInt64Ty(CI->getContext()) && + Constraints.size() >= 2 && + Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && + Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { + // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 + std::vector Words; + SplitString(AsmPieces[0], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { + Words.clear(); + SplitString(AsmPieces[1], Words, " \t"); + if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { + Words.clear(); + SplitString(AsmPieces[2], Words, " \t,"); + if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && + Words[2] == "%edx") { + return LowerToBSwap(CI); + } + } + } + } + break; + } + return false; +} + + + /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. X86TargetLowering::ConstraintType @@ -8689,7 +9191,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const { /// with another that has more specific requirements based on the type of the /// corresponding operand. const char *X86TargetLowering:: -LowerXConstraint(MVT ConstraintVT) const { +LowerXConstraint(EVT ConstraintVT) const { // FP X constraints get lowered to SSE1/2 registers if available, otherwise // 'f' like normal targets. if (ConstraintVT.isFloatingPoint()) { @@ -8749,7 +9251,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // 32-bit signed value if (ConstantSDNode *C = dyn_cast(Op)) { const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::Int32Ty, C->getSExtValue())) { + if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); break; @@ -8763,7 +9266,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // 32-bit unsigned value if (ConstantSDNode *C = dyn_cast(Op)) { const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::Int32Ty, C->getZExtValue())) { + if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); break; } @@ -8803,16 +9307,22 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, continue; } } - + // Otherwise, this isn't something we can handle, reject it. return; } + GlobalValue *GV = GA->getGlobal(); + // If we require an extra load to get this address, as in PIC mode, we + // can't accept it. + if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV, + getTargetMachine()))) + return; + if (hasMemory) - Op = LowerGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), Offset, DAG); + Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); else - Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), - Offset); + Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset); Result = Op; break; } @@ -8828,12 +9338,42 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::vector X86TargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { if (Constraint.size() == 1) { // FIXME: not handling fp-stack yet! switch (Constraint[0]) { // GCC X86 Constraint Letters default: break; // Unknown constraint letter - case 'q': // Q_REGS (GENERAL_REGS in 64-bit mode) + case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. + if (Subtarget->is64Bit()) { + if (VT == MVT::i32) + return make_vector(X86::EAX, X86::EDX, X86::ECX, X86::EBX, + X86::ESI, X86::EDI, X86::R8D, X86::R9D, + X86::R10D,X86::R11D,X86::R12D, + X86::R13D,X86::R14D,X86::R15D, + X86::EBP, X86::ESP, 0); + else if (VT == MVT::i16) + return make_vector(X86::AX, X86::DX, X86::CX, X86::BX, + X86::SI, X86::DI, X86::R8W,X86::R9W, + X86::R10W,X86::R11W,X86::R12W, + X86::R13W,X86::R14W,X86::R15W, + X86::BP, X86::SP, 0); + else if (VT == MVT::i8) + return make_vector(X86::AL, X86::DL, X86::CL, X86::BL, + X86::SIL, X86::DIL, X86::R8B,X86::R9B, + X86::R10B,X86::R11B,X86::R12B, + X86::R13B,X86::R14B,X86::R15B, + X86::BPL, X86::SPL, 0); + + else if (VT == MVT::i64) + return make_vector(X86::RAX, X86::RDX, X86::RCX, X86::RBX, + X86::RSI, X86::RDI, X86::R8, X86::R9, + X86::R10, X86::R11, X86::R12, + X86::R13, X86::R14, X86::R15, + X86::RBP, X86::RSP, 0); + + break; + } + // 32-bit fallthrough case 'Q': // Q_REGS if (VT == MVT::i32) return make_vector(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); @@ -8852,7 +9392,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, std::pair X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { + EVT VT) const { // First, see if this is a constraint that directly corresponds to an LLVM // register class. if (Constraint.size() == 1) { @@ -8860,7 +9400,6 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, switch (Constraint[0]) { default: break; case 'r': // GENERAL_REGS - case 'R': // LEGACY_REGS case 'l': // INDEX_REGS if (VT == MVT::i8) return std::make_pair(0U, X86::GR8RegisterClass); @@ -8869,6 +9408,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, if (VT == MVT::i32 || !Subtarget->is64Bit()) return std::make_pair(0U, X86::GR32RegisterClass); return std::make_pair(0U, X86::GR64RegisterClass); + case 'R': // LEGACY_REGS + if (VT == MVT::i8) + return std::make_pair(0U, X86::GR8_NOREXRegisterClass); + if (VT == MVT::i16) + return std::make_pair(0U, X86::GR16_NOREXRegisterClass); + if (VT == MVT::i32 || !Subtarget->is64Bit()) + return std::make_pair(0U, X86::GR32_NOREXRegisterClass); + return std::make_pair(0U, X86::GR64_NOREXRegisterClass); case 'f': // FP Stack registers. // If SSE is enabled for this VT, use f80 to ensure the isel moves the // value to the correct fpstack register class. @@ -8886,7 +9433,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'x': // SSE_REGS if SSE1 allowed if (!Subtarget->hasSSE1()) break; - switch (VT.getSimpleVT()) { + switch (VT.getSimpleVT().SimpleTy) { default: break; // Scalar SSE types. case MVT::f32: @@ -8915,15 +9462,39 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // Not found as a standard register? if (Res.second == 0) { - // GCC calls "st(0)" just plain "st". + // Map st(0) -> st(7) -> ST0 + if (Constraint.size() == 7 && Constraint[0] == '{' && + tolower(Constraint[1]) == 's' && + tolower(Constraint[2]) == 't' && + Constraint[3] == '(' && + (Constraint[4] >= '0' && Constraint[4] <= '7') && + Constraint[5] == ')' && + Constraint[6] == '}') { + + Res.first = X86::ST0+Constraint[4]-'0'; + Res.second = X86::RFP80RegisterClass; + return Res; + } + + // GCC allows "st(0)" to be called just plain "st". if (StringsEqualNoCase("{st}", Constraint)) { Res.first = X86::ST0; Res.second = X86::RFP80RegisterClass; + return Res; + } + + // flags -> EFLAGS + if (StringsEqualNoCase("{flags}", Constraint)) { + Res.first = X86::EFLAGS; + Res.second = X86::CCRRegisterClass; + return Res; } + // 'A' means EAX + EDX. if (Constraint == "A") { Res.first = X86::EAX; - Res.second = X86::GRADRegisterClass; + Res.second = X86::GR32_ADRegisterClass; + return Res; } return Res; } @@ -9015,7 +9586,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, /// When and where to widen is target dependent based on the cost of /// scalarizing vs using the wider vector type. -MVT X86TargetLowering::getWidenVectorType(MVT VT) const { +EVT X86TargetLowering::getWidenVectorType(EVT VT) const { assert(VT.isVector()); if (isTypeLegal(VT)) return VT; @@ -9024,7 +9595,7 @@ MVT X86TargetLowering::getWidenVectorType(MVT VT) const { // type based on element type. This would speed up our search (though // it may not be worth it since the size of the list is relatively // small). - MVT EltVT = VT.getVectorElementType(); + EVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); // On X86, it make sense to widen any vector wider than 1 @@ -9033,7 +9604,7 @@ MVT X86TargetLowering::getWidenVectorType(MVT VT) const { for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; + EVT SVT = (MVT::SimpleValueType)nVT; if (isTypeLegal(SVT) && SVT.getVectorElementType() == EltVT && diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ffed46c733aaf..2f7b8ba6e694a 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -85,7 +85,7 @@ namespace llvm { /// as. FST, - /// CALL/TAILCALL - These operations represent an abstract X86 call + /// CALL - These operations represent an abstract X86 call /// instruction, which includes a bunch of information. In particular the /// operands of these node are: /// @@ -102,12 +102,8 @@ namespace llvm { /// #1 - The first register result value (optional) /// #2 - The second register result value (optional) /// - /// The CALL vs TAILCALL distinction boils down to whether the callee is - /// known not to modify the caller's stack frame, as is standard with - /// LLVM. CALL, - TAILCALL, - + /// RDTSC_DAG - This operation implements the lowering for /// readcyclecounter RDTSC_DAG, @@ -208,17 +204,6 @@ namespace llvm { LCMPXCHG_DAG, LCMPXCHG8_DAG, - // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, - // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - - // Atomic 64-bit binary operations. - ATOMADD64_DAG, - ATOMSUB64_DAG, - ATOMOR64_DAG, - ATOMXOR64_DAG, - ATOMAND64_DAG, - ATOMNAND64_DAG, - ATOMSWAP64_DAG, - // FNSTCW16m - Store FP control world into i16 memory. FNSTCW16m, @@ -241,10 +226,29 @@ namespace llvm { // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, SMUL, UMUL, - INC, DEC, + INC, DEC, OR, XOR, AND, // MUL_IMM - X86 specific multiply by immediate. - MUL_IMM + MUL_IMM, + + // PTEST - Vector bitwise comparisons + PTEST, + + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, + // according to %al. An operator is needed so that this can be expanded + // with control flow. + VASTART_SAVE_XMM_REGS, + + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, + // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - + // Atomic 64-bit binary operations. + ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, + ATOMSUB64_DAG, + ATOMOR64_DAG, + ATOMXOR64_DAG, + ATOMAND64_DAG, + ATOMNAND64_DAG, + ATOMSWAP64_DAG }; } @@ -333,6 +337,15 @@ namespace llvm { /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW /// instructions. unsigned getShufflePSHUFLWImmediate(SDNode *N); + + /// isZeroNode - Returns true if Elt is a constant zero or a floating point + /// constant +0.0. + bool isZeroNode(SDValue Elt); + + /// isOffsetSuitableForCodeModel - Returns true of the given offset can be + /// fit into displacement field of the instruction. + bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, + bool hasSymbolicDisplacement = true); } //===--------------------------------------------------------------------===// @@ -374,12 +387,17 @@ namespace llvm { /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove - /// lowering. It returns MVT::iAny if SelectionDAG should be responsible for + /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for /// determining it. - virtual - MVT getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr, - SelectionDAG &DAG) const; + virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align, + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const; + + /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// unaligned memory accesses. of the specified type. + virtual bool allowsUnalignedMemoryAccesses(EVT VT) const { + return true; + } /// LowerOperation - Provide custom lowering hooks for some operations. /// @@ -395,7 +413,8 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap *EM) const; /// getTargetNodeName - This method returns the name of a target specific @@ -403,7 +422,7 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual MVT getSetCCResultType(MVT VT) const; + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; /// computeMaskedBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the @@ -420,13 +439,15 @@ namespace llvm { SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); + virtual bool ExpandInlineAsm(CallInst *CI) const; + ConstraintType getConstraintType(const std::string &Constraint) const; std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; - virtual const char *LowerXConstraint(MVT ConstraintVT) const; + virtual const char *LowerXConstraint(EVT ConstraintVT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -444,7 +465,7 @@ namespace llvm { /// error, this returns a register number of 0. std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. @@ -454,7 +475,7 @@ namespace llvm { /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in /// register EAX to i16 by referencing its sub-register AX. virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const; - virtual bool isTruncateFree(MVT VT1, MVT VT2) const; + virtual bool isTruncateFree(EVT VT1, EVT VT2) const; /// isZExtFree - Return true if any actual instruction that defines a /// value of type Ty1 implicit zero-extends the value to Ty2 in the result @@ -465,31 +486,31 @@ namespace llvm { /// all instructions that define 32-bit values implicit zero-extend the /// result out to 64 bits. virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const; - virtual bool isZExtFree(MVT VT1, MVT VT2) const; + virtual bool isZExtFree(EVT VT1, EVT VT2) const; /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. - virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const; + virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const; /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask /// values are assumed to be legal. virtual bool isShuffleMaskLegal(const SmallVectorImpl &Mask, - MVT VT) const; + EVT VT) const; /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is /// used by Targets can use this to indicate if there is a suitable /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant /// pool entry. virtual bool isVectorClearMaskLegal(const SmallVectorImpl &Mask, - MVT VT) const; + EVT VT) const; /// ShouldShrinkFPConstant - If true, then instruction selection should /// seek to shrink the FP constant of the specified type to a smaller type /// in order to save space and / or reduce runtime. - virtual bool ShouldShrinkFPConstant(MVT VT) const { + virtual bool ShouldShrinkFPConstant(EVT VT) const { // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more // expensive than a straight movsd. On the other hand, it's important to // shrink long double fp constant since fldt is very slow. @@ -497,11 +518,14 @@ namespace llvm { } /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Target which want to do tail call + /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. - virtual bool IsEligibleForTailCallOptimization(CallSDNode *TheCall, - SDValue Ret, - SelectionDAG &DAG) const; + virtual bool + IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + const SmallVectorImpl &Ins, + SelectionDAG& DAG) const; virtual const X86Subtarget* getSubtarget() { return Subtarget; @@ -509,17 +533,17 @@ namespace llvm { /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. - bool isScalarFPTypeInSSEReg(MVT VT) const { + bool isScalarFPTypeInSSEReg(EVT VT) const { return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 } /// getWidenVectorType: given a vector type, returns the type to widen /// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself. - /// If there is no vector type that we want to widen to, returns MVT::Other + /// If there is no vector type that we want to widen to, returns EVT::Other /// When and were to widen is target dependent based on the cost of /// scalarizing vs using the wider vector type. - virtual MVT getWidenVectorType(MVT VT) const; + virtual EVT getWidenVectorType(EVT VT) const; /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. @@ -554,28 +578,30 @@ namespace llvm { bool X86ScalarSSEf32; bool X86ScalarSSEf64; - SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG); - - SDValue LowerMemArgument(SDValue Op, SelectionDAG &DAG, - const CCValAssign &VA, MachineFrameInfo *MFI, - unsigned CC, SDValue Root, unsigned i); - - SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, - const SDValue &StackPtr, - const CCValAssign &VA, SDValue Chain, - SDValue Arg, ISD::ArgFlagsTy Flags); + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + SDValue LowerMemArgument(SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl &ArgInfo, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, MachineFrameInfo *MFI, + unsigned i); + SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags); // Call lowering helpers. - bool IsCalleePop(bool isVarArg, unsigned CallingConv); - bool CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall); - bool CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall); + bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv); SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, int FPDiff, DebugLoc dl); - CCAssignFn *CCAssignFnForNode(unsigned CallingConv) const; - NameDecorationStyle NameDecorationForFORMAL_ARGUMENTS(SDValue Op); + CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; + NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv); unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); std::pair FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, @@ -595,7 +621,7 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); SDValue LowerShift(SDValue Op, SelectionDAG &DAG); - SDValue BuildFILD(SDValue Op, MVT SrcVT, SDValue Chain, SDValue StackSlot, + SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, SelectionDAG &DAG); SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG); @@ -612,10 +638,7 @@ namespace llvm { SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG); SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); - SDValue LowerRET(SDValue Op, SelectionDAG &DAG); SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG); @@ -635,6 +658,26 @@ namespace llvm { SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG); SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG); + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); + void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, unsigned NewOp); @@ -651,9 +694,17 @@ namespace llvm { const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); + /// Utility function to emit string processing sse4.2 instructions + /// that return in xmm0. + /// This takes the instruction to expand, the associated machine basic + /// block, the number of args, and whether or not the second arg is + /// in memory or not. + MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, + unsigned argNum, bool inMem) const; + /// Utility function to emit atomic bitwise operations (and, or, xor). - // It takes the bitwise instruction to expand, the associated machine basic - // block, and the associated X86 opcodes for reg/reg and reg/imm. + /// It takes the bitwise instruction to expand, the associated machine basic + /// block, and the associated X86 opcodes for reg/reg and reg/imm. MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( MachineInstr *BInstr, MachineBasicBlock *BB, @@ -683,6 +734,15 @@ namespace llvm { MachineBasicBlock *BB, unsigned cmovOpc) const; + /// Utility function to emit the xmm reg save portion of va_start. + MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( + MachineInstr *BInstr, + MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, + MachineBasicBlock *BB, + DenseMap *EM) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 472ba4c462854..ef19823a2831e 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -28,26 +28,29 @@ def i64i32imm_pcrel : Operand { // 64-bits but only 8 bits are significant. -def i64i8imm : Operand; +def i64i8imm : Operand { + let ParserMatchClass = ImmSExt8AsmOperand; +} def lea64mem : Operand { let PrintMethod = "printlea64mem"; - let MIOperandInfo = (ops GR64, i8imm, GR64, i32imm); + let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm); + let ParserMatchClass = X86MemAsmOperand; } def lea64_32mem : Operand { let PrintMethod = "printlea64_32mem"; let AsmOperandLowerMethod = "lower_lea64_32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); + let ParserMatchClass = X86MemAsmOperand; } //===----------------------------------------------------------------------===// // Complex Pattern Definitions. // def lea64addr : ComplexPattern; + [add, sub, mul, X86mul_imm, shl, or, frameindex, + X86WrapperRIP], []>; def tls64addr : ComplexPattern; @@ -129,13 +132,40 @@ let isCall = 1 in def CALL64pcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "call\t$dst", []>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, NotWin64]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), - "call\t{*}$dst", [(X86call GR64:$dst)]>; + "call\t{*}$dst", [(X86call GR64:$dst)]>, + Requires<[NotWin64]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), - "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>; + "call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, + Requires<[NotWin64]>; + + def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), + "lcall{q}\t{*}$dst", []>; } + // FIXME: We need to teach codegen about single list of call-clobbered registers. +let isCall = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS], + Uses = [RSP] in { + def WINCALL64pcrel32 : I<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call\t$dst", []>, + Requires<[IsWin64]>; + def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops), + "call\t{*}$dst", + [(X86call GR64:$dst)]>, Requires<[IsWin64]>; + def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops), + "call\t{*}$dst", + [(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>; + } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -162,6 +192,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { [(brind GR64:$dst)]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", [(brind (loadi64 addr:$dst))]>; + def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), + "ljmp{q}\t{*}$dst", []>; } //===----------------------------------------------------------------------===// @@ -182,12 +214,18 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", []>; let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in +let mayLoad = 1 in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; -let mayStore = 1 in +def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>; +def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>; +} +let mayStore = 1 in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>; +def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>; +} } let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { @@ -246,6 +284,14 @@ let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)]>, REP; +def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>; + +def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>; + +// Fast system-call instructions +def SYSEXIT64 : RI<0x35, RawFrm, + (outs), (ins), "sysexit", []>, TB; + //===----------------------------------------------------------------------===// // Move Instructions... // @@ -275,6 +321,25 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)]>; +def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins i8imm:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64o32a : RIi32<0xA1, RawFrm, (outs), (ins i32imm:$src), + "mov{q}\t{$src, %rax|%rax, $src}", []>; +def MOV64ao8 : RIi8<0xA2, RawFrm, (outs i8imm:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; +def MOV64ao32 : RIi32<0xA3, RawFrm, (outs i32imm:$dst), (ins), + "mov{q}\t{%rax, $dst|$dst, %rax}", []>; + +// Moves to and from segment registers +def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; + // Sign/Zero extenders // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -332,13 +397,15 @@ def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), [(set GR64:$dst, (zextloadi64i32 addr:$src))]>; // Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may -// be copying from a truncate, but any other 32-bit operation will zero-extend +// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may +// be copying from a truncate. And x86's cmov doesn't do anything if the +// condition is false. But any other 32-bit operation will zero-extend // up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetInstrInfo::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg; + N->getOpcode() != ISD::CopyFromReg && + N->getOpcode() != X86ISD::CMOV; }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, @@ -361,6 +428,10 @@ let neverHasSideEffects = 1 in { // let Defs = [EFLAGS] in { + +def ADD64i32 : RI<0x05, RawFrm, (outs), (ins i32imm:$src), + "add{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isConvertibleToThreeAddress = 1 in { let isCommutable = 1 in @@ -386,6 +457,12 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem: "add{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; + +// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but +// differently encoded. +def ADD64mrmrr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", []>; + } // isTwoAddress // Memory-Register Addition @@ -403,6 +480,10 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2), (implicit EFLAGS)]>; let Uses = [EFLAGS] in { + +def ADC64i32 : RI<0x15, RawFrm, (outs), (ins i32imm:$src), + "adc{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -458,6 +539,9 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (implicit EFLAGS)]>; } // isTwoAddress +def SUB64i32 : RI<0x2D, RawFrm, (outs), (ins i32imm:$src), + "sub{q}\t{$src, %rax|%rax, $src}", []>; + // Memory-Register Subtraction def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", @@ -494,6 +578,9 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm: [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; } // isTwoAddress +def SBB64i32 : RI<0x1D, RawFrm, (outs), (ins i32imm:$src), + "sbb{q}\t{$src, %rax|%rax, $src}", []>; + def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), GR64:$src2), addr:$dst)]>; @@ -665,8 +752,10 @@ let isConvertibleToThreeAddress = 1 in // Can transform into LEA. def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shl{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>; -// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is -// cheaper. +// NOTE: We don't include patterns for shifts of a register by one, because +// 'add reg,reg' is cheaper. +def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), + "shr{q}\t$dst", []>; } // isTwoAddress let Uses = [CL] in @@ -729,6 +818,39 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; // Rotate instructions + +let isTwoAddress = 1 in { +def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), + "rcl{q}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src), + "rcl{q}\t{%cl, $dst|$dst, CL}", []>; +} +def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt), + "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), + "rcr{q}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src), + "rcr{q}\t{%cl, $dst|$dst, CL}", []>; +} +def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src, i8imm:$cnt), + "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; +} + let isTwoAddress = 1 in { let Uses = [CL] in def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src), @@ -839,6 +961,9 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)]>; let Defs = [EFLAGS] in { +def AND64i32 : RI<0x25, RawFrm, (outs), (ins i32imm:$src), + "and{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def AND64rr : RI<0x21, MRMDestReg, @@ -912,6 +1037,9 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), [(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; +def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src), + "or{q}\t{$src, %rax|%rax, $src}", []>; + let isTwoAddress = 1 in { let isCommutable = 1 in def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -945,6 +1073,10 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), "xor{q}\t{$src, $dst|$dst, $src}", [(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; + +def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src), + "xor{q}\t{$src, %rax|%rax, $src}", []>; + } // Defs = [EFLAGS] //===----------------------------------------------------------------------===// @@ -953,6 +1085,8 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src), // Integer comparison let Defs = [EFLAGS] in { +def TEST64i32 : RI<0xa9, RawFrm, (outs), (ins i32imm:$src), + "test{q}\t{$src, %rax|%rax, $src}", []>; let isCommutable = 1 in def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", @@ -973,10 +1107,15 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0), (implicit EFLAGS)]>; + +def CMP64i32 : RI<0x3D, RawFrm, (outs), (ins i32imm:$src), + "cmp{q}\t{$src, %rax|%rax, $src}", []>; def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", [(X86cmp GR64:$src1, GR64:$src2), (implicit EFLAGS)]>; +def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", [(X86cmp (loadi64 addr:$src1), GR64:$src2), @@ -1306,14 +1445,12 @@ def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src) // Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's // equivalent due to implicit zero-extending, and it sometimes has a smaller // encoding. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. -// FIXME: AddedComplexity gives MOV64r0 a higher priority than MOV64ri32. Remove +// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove // when we have a better way to specify isel priority. -let Defs = [EFLAGS], AddedComplexity = 1, - isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), - "xor{l}\t${dst:subreg32}, ${dst:subreg32}", - [(set GR64:$dst, 0)]>; +let AddedComplexity = 1 in +def : Pat<(i64 0), + (SUBREG_TO_REG (i64 0), (MOV32r0), x86_subreg_32bit)>; + // Materialize i64 constant where top 32-bits are zero. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in @@ -1343,12 +1480,12 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%gs:$src, $dst", [(set GR64:$dst, (gsload addr:$src))]>, SegGS; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%fs:$src, $dst", [(set GR64:$dst, (fsload addr:$src))]>, SegFS; @@ -1371,11 +1508,43 @@ def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), "xadd\t$val, $ptr", [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, TB, LOCK; + def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$ptr,GR64:$val), "xchg\t$val, $ptr", [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>; } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs), + (ins i64mem:$dst, i64i32imm :$src2), + "lock\n\t" + "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs), + (ins i64mem:$dst, i64i8imm :$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs), + (ins i64mem:$dst, i64i32imm:$src2), + "lock\n\t" + "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "lock\n\t" + "inc{q}\t$dst", []>, LOCK; +def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), + "lock\n\t" + "dec{q}\t$dst", []>, LOCK; + // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { @@ -1405,78 +1574,88 @@ def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; } +// Segmentation support instructions + +// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo. +def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), + "lar{q}\t{$src, $dst|$dst, $src}", []>, TB; + +// String manipulation instructions + +def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// -// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable +// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small +// code model mode, should use 'movabs'. FIXME: This is really a hack, the +// 'movabs' predicate should handle this sort of thing. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri tconstpool :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri tjumptable :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri texternalsym:$dst)>, Requires<[FarData]>; + +// In static codegen with small code model, we can get the address of a label +// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri64i32 should accept these. +def : Pat<(i64 (X86Wrapper tconstpool :$dst)), + (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tjumptable :$dst)), + (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), + (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>; +def : Pat<(i64 (X86Wrapper texternalsym:$dst)), + (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>; + +// In kernel code model, we can get the address of a label +// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of +// the MOV64ri32 should accept these. def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri tconstpool :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri tjumptable :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri tglobaladdr :$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>; + (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; // If we have small model and -static mode, it is safe to store global addresses // directly as immediates. FIXME: This is really a hack, the 'imm' predicate -// should handle this sort of thing. +// for MOV64mi32 should handle this sort of thing. def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), (MOV64mi32 addr:$dst, tconstpool:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), (MOV64mi32 addr:$dst, tjumptable:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), (MOV64mi32 addr:$dst, tglobaladdr:$src)>, - Requires<[SmallCode, IsStatic]>; + Requires<[NearData, IsStatic]>; def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), (MOV64mi32 addr:$dst, texternalsym:$src)>, - Requires<[SmallCode, IsStatic]>; - -// If we have small model and -static mode, it is safe to store global addresses -// directly as immediates. FIXME: This is really a hack, the 'imm' predicate -// should handle this sort of thing. -def : Pat<(store (i64 (X86WrapperRIP tconstpool:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tconstpool:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP tjumptable:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tjumptable:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP tglobaladdr:$src)), addr:$dst), - (MOV64mi32 addr:$dst, tglobaladdr:$src)>, - Requires<[SmallCode, IsStatic]>; -def : Pat<(store (i64 (X86WrapperRIP texternalsym:$src)), addr:$dst), - (MOV64mi32 addr:$dst, texternalsym:$src)>, - Requires<[SmallCode, IsStatic]>; - + Requires<[NearData, IsStatic]>; // Calls // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; + (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>; - -def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; -def : Pat<(X86tailcall (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>; - -def : Pat<(X86tailcall GR64:$dst), - (CALL64r GR64:$dst)>; + (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; +def : Pat<(X86call (i64 tglobaladdr:$dst)), + (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; +def : Pat<(X86call (i64 texternalsym:$dst)), + (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; // tailcall stuff -def : Pat<(X86tailcall GR32:$dst), - (TAILCALL)>; -def : Pat<(X86tailcall (i64 tglobaladdr:$dst)), - (TAILCALL)>; -def : Pat<(X86tailcall (i64 texternalsym:$dst)), - (TAILCALL)>; - def : Pat<(X86tcret GR64:$dst, imm:$off), (TCRETURNri64 GR64:$dst, imm:$off)>; @@ -1540,30 +1719,15 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. def : Pat<(extloadi64i32 addr:$src), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src), + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), x86_subreg_32bit)>; -def : Pat<(extloadi16i1 addr:$src), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), - x86_subreg_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(extloadi16i8 addr:$src), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), - x86_subreg_8bit)>, - Requires<[In64BitMode]>; - -// anyext -def : Pat<(i64 (anyext GR8:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>; -def : Pat<(i64 (anyext GR16:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; -def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>; -def : Pat<(i16 (anyext GR8:$src)), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(i32 (anyext GR8:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, - Requires<[In64BitMode]>; + +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; +def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; +def : Pat<(i64 (anyext GR32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; //===----------------------------------------------------------------------===// // Some peepholes @@ -1661,6 +1825,11 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), @@ -1668,6 +1837,13 @@ def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi)), x86_subreg_32bit)>; +def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi)), + x86_subreg_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), @@ -1906,6 +2082,102 @@ def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst), (implicit EFLAGS)), (DEC64m addr:$dst)>; +// Register-Register Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (OR64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (OR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (OR64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical Or with EFLAGS result +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR64mi32 addr:$dst, i64immSExt32:$src2)>; + +// Register-Register Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (XOR64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (XOR64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical XOr with EFLAGS result +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR64mi32 addr:$dst, i64immSExt32:$src2)>; + +// Register-Register Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2), + (implicit EFLAGS)), + (AND64rr GR64:$src1, GR64:$src2)>; + +// Register-Integer Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt8:$src2), + (implicit EFLAGS)), + (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; +def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt32:$src2), + (implicit EFLAGS)), + (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Register-Memory Logical And with EFLAGS result +def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)), + (implicit EFLAGS)), + (AND64rm GR64:$src1, addr:$src2)>; + +// Memory-Register Logical And with EFLAGS result +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mr addr:$dst, GR64:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mi8 addr:$dst, i64immSExt8:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt32:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND64mi32 addr:$dst, i64immSExt32:$src2)>; + //===----------------------------------------------------------------------===// // X86-64 SSE Instructions //===----------------------------------------------------------------------===// @@ -1977,3 +2249,15 @@ let isTwoAddress = 1 in { } defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; + +// -disable-16bit support. +def : Pat<(truncstorei16 (i64 imm:$src), addr:$dst), + (MOV16mi addr:$dst, imm:$src)>; +def : Pat<(truncstorei16 GR64:$src, addr:$dst), + (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; +def : Pat<(i64 (sextloadi16 addr:$dst)), + (MOVSX64rm16 addr:$dst)>; +def : Pat<(i64 (zextloadi16 addr:$dst)), + (MOVZX64rm16 addr:$dst)>; +def : Pat<(i64 (extloadi16 addr:$dst)), + (MOVZX64rm16 addr:$dst)>; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 6359542819f46..c475b56d12f45 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" namespace llvm { @@ -47,7 +48,7 @@ struct X86AddressMode { unsigned Scale; unsigned IndexReg; - unsigned Disp; + int Disp; GlobalValue *GV; unsigned GVOpFlags; @@ -61,20 +62,20 @@ struct X86AddressMode { /// current instruction -- that is, a dereference of an address in a register, /// with no scale, index or displacement. An example is: DWORD PTR [EAX]. /// -inline const MachineInstrBuilder &addDirectMem(const MachineInstrBuilder &MIB, - unsigned Reg) { +static inline const MachineInstrBuilder & +addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { // Because memory references are always represented with four // values, this adds: Reg, [1, NoReg, 0] to the instruction. return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0); } -inline const MachineInstrBuilder &addLeaOffset(const MachineInstrBuilder &MIB, - int Offset) { +static inline const MachineInstrBuilder & +addLeaOffset(const MachineInstrBuilder &MIB, int Offset) { return MIB.addImm(1).addReg(0).addImm(Offset); } -inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB, - int Offset) { +static inline const MachineInstrBuilder & +addOffset(const MachineInstrBuilder &MIB, int Offset) { return addLeaOffset(MIB, Offset).addReg(0); } @@ -82,29 +83,29 @@ inline const MachineInstrBuilder &addOffset(const MachineInstrBuilder &MIB, /// [Reg + Offset], i.e., one with no scale or index, but with a /// displacement. An example is: DWORD PTR [EAX + 4]. /// -inline const MachineInstrBuilder &addRegOffset(const MachineInstrBuilder &MIB, - unsigned Reg, bool isKill, - int Offset) { +static inline const MachineInstrBuilder & +addRegOffset(const MachineInstrBuilder &MIB, + unsigned Reg, bool isKill, int Offset) { return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); } -inline const MachineInstrBuilder &addLeaRegOffset(const MachineInstrBuilder &MIB, - unsigned Reg, bool isKill, - int Offset) { +static inline const MachineInstrBuilder & +addLeaRegOffset(const MachineInstrBuilder &MIB, + unsigned Reg, bool isKill, int Offset) { return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); } /// addRegReg - This function is used to add a memory reference of the form: /// [Reg + Reg]. -inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB, +static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) { return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1) .addReg(Reg2, getKillRegState(isKill2)).addImm(0); } -inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB, - const X86AddressMode &AM) { +static inline const MachineInstrBuilder & +addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); if (AM.BaseType == X86AddressMode::RegBase) @@ -120,8 +121,9 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB, return MIB.addImm(AM.Disp); } -inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, - const X86AddressMode &AM) { +static inline const MachineInstrBuilder & +addFullAddress(const MachineInstrBuilder &MIB, + const X86AddressMode &AM) { return addLeaAddress(MIB, AM).addReg(0); } @@ -130,7 +132,7 @@ inline const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, /// reference has base register as the FrameIndex offset until it is resolved. /// This allows a constant offset to be specified as well... /// -inline const MachineInstrBuilder & +static inline const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { MachineInstr *MI = MIB; MachineFunction &MF = *MI->getParent()->getParent(); @@ -141,11 +143,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { Flags |= MachineMemOperand::MOLoad; if (TID.mayStore()) Flags |= MachineMemOperand::MOStore; - MachineMemOperand MMO(PseudoSourceValue::getFixedStack(FI), - Flags, - MFI.getObjectOffset(FI) + Offset, - MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + Flags, Offset, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); return addOffset(MIB.addFrameIndex(FI), Offset) .addMemOperand(MMO); } @@ -157,7 +159,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { /// the GlobalBaseReg parameter can be used to make this a /// GlobalBaseReg-relative reference. /// -inline const MachineInstrBuilder & +static inline const MachineInstrBuilder & addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI, unsigned GlobalBaseReg, unsigned char OpFlags) { //FIXME: factor this diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index bc7def457c0f1..7e373730b30ad 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -303,6 +303,31 @@ def TST_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>; } def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9; +// Versions of FP instructions that take a single memory operand. Added for the +// disassembler; remove as they are included with patterns elsewhere. +def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom\t$src">; +def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp\t$src">; + +def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">; +def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fstenv\t$dst">; + +def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">; +def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; + +def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom\t$src">; +def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp\t$src">; + +def FISTTP32m: FPI<0xDD, MRM1m, (outs i32mem:$dst), (ins), "fisttp{l}\t$dst">; +def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">; +def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fsave\t$dst">; +def FSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fstsw\t$dst">; + +def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">; +def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">; + +def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">; +def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">; + // Floating point cmovs. multiclass FPCMov { def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index eeed5bd27ff3c..abdb3135c3ac2 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -79,6 +79,7 @@ class XD { bits<4> Prefix = 11; } class XS { bits<4> Prefix = 12; } class T8 { bits<4> Prefix = 13; } class TA { bits<4> Prefix = 14; } +class TF { bits<4> Prefix = 15; } class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr> @@ -142,6 +143,24 @@ class FpI_ pattern> let Pattern = pattern; } +// Templates for instructions that use a 16- or 32-bit segmented address as +// their only operand: lcall (FAR CALL) and ljmp (FAR JMP) +// +// Iseg16 - 16-bit segment selector, 16-bit offset +// Iseg32 - 16-bit segment selector, 32-bit offset + +class Iseg16 o, Format f, dag outs, dag ins, string asm, + list pattern> : X86Inst { + let Pattern = pattern; + let CodeSize = 3; +} + +class Iseg32 o, Format f, dag outs, dag ins, string asm, + list pattern> : X86Inst { + let Pattern = pattern; + let CodeSize = 3; +} + // SSE1 Instruction Templates: // // SSI - SSE1 instructions with XS prefix. @@ -229,6 +248,16 @@ class SS428I o, Format F, dag outs, dag ins, string asm, list pattern> : I, T8, Requires<[HasSSE42]>; +// SS42FI - SSE 4.2 instructions with TF prefix. +class SS42FI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, TF, Requires<[HasSSE42]>; + +// SS42AI = SSE 4.2 instructions with TA prefix +class SS42AI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, TA, Requires<[HasSSE42]>; + // X86-64 Instruction templates... // @@ -282,4 +311,3 @@ class MMXID o, Format F, dag outs, dag ins, string asm, list patter : Ii8, XD, Requires<[HasMMX]>; class MMXIS o, Format F, dag outs, dag ins, string asm, list pattern> : Ii8, XS, Requires<[HasMMX]>; - diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index e5d84c5077837..e8a39d11040af 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -18,8 +18,8 @@ #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -27,24 +27,24 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" using namespace llvm; -namespace { - cl::opt - NoFusing("disable-spill-fusing", - cl::desc("Disable fusing of spill code into instructions")); - cl::opt - PrintFailedFusing("print-failed-fuse-candidates", - cl::desc("Print instructions that the allocator wants to" - " fuse, but the X86 backend currently can't"), - cl::Hidden); - cl::opt - ReMatPICStubLoad("remat-pic-stub-load", - cl::desc("Re-materialize load from stub in PIC mode"), - cl::init(false), cl::Hidden); -} +static cl::opt +NoFusing("disable-spill-fusing", + cl::desc("Disable fusing of spill code into instructions")); +static cl::opt +PrintFailedFusing("print-failed-fuse-candidates", + cl::desc("Print instructions that the allocator wants to" + " fuse, but the X86 backend currently can't"), + cl::Hidden); +static cl::opt +ReMatPICStubLoad("remat-pic-stub-load", + cl::desc("Re-materialize load from stub in PIC mode"), + cl::init(false), cl::Hidden); X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), @@ -212,9 +212,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) unsigned RegOp = OpTbl2Addr[i][0]; unsigned MemOp = OpTbl2Addr[i][1]; if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,0))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store + // Index 0, folded load and store, no alignment requirement. + unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) @@ -222,93 +223,94 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) } // If the third value is 1, then it's folding either a load or a store. - static const unsigned OpTbl0[][3] = { - { X86::BT16ri8, X86::BT16mi8, 1 }, - { X86::BT32ri8, X86::BT32mi8, 1 }, - { X86::BT64ri8, X86::BT64mi8, 1 }, - { X86::CALL32r, X86::CALL32m, 1 }, - { X86::CALL64r, X86::CALL64m, 1 }, - { X86::CMP16ri, X86::CMP16mi, 1 }, - { X86::CMP16ri8, X86::CMP16mi8, 1 }, - { X86::CMP16rr, X86::CMP16mr, 1 }, - { X86::CMP32ri, X86::CMP32mi, 1 }, - { X86::CMP32ri8, X86::CMP32mi8, 1 }, - { X86::CMP32rr, X86::CMP32mr, 1 }, - { X86::CMP64ri32, X86::CMP64mi32, 1 }, - { X86::CMP64ri8, X86::CMP64mi8, 1 }, - { X86::CMP64rr, X86::CMP64mr, 1 }, - { X86::CMP8ri, X86::CMP8mi, 1 }, - { X86::CMP8rr, X86::CMP8mr, 1 }, - { X86::DIV16r, X86::DIV16m, 1 }, - { X86::DIV32r, X86::DIV32m, 1 }, - { X86::DIV64r, X86::DIV64m, 1 }, - { X86::DIV8r, X86::DIV8m, 1 }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 }, - { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, - { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, - { X86::IDIV16r, X86::IDIV16m, 1 }, - { X86::IDIV32r, X86::IDIV32m, 1 }, - { X86::IDIV64r, X86::IDIV64m, 1 }, - { X86::IDIV8r, X86::IDIV8m, 1 }, - { X86::IMUL16r, X86::IMUL16m, 1 }, - { X86::IMUL32r, X86::IMUL32m, 1 }, - { X86::IMUL64r, X86::IMUL64m, 1 }, - { X86::IMUL8r, X86::IMUL8m, 1 }, - { X86::JMP32r, X86::JMP32m, 1 }, - { X86::JMP64r, X86::JMP64m, 1 }, - { X86::MOV16ri, X86::MOV16mi, 0 }, - { X86::MOV16rr, X86::MOV16mr, 0 }, - { X86::MOV32ri, X86::MOV32mi, 0 }, - { X86::MOV32rr, X86::MOV32mr, 0 }, - { X86::MOV64ri32, X86::MOV64mi32, 0 }, - { X86::MOV64rr, X86::MOV64mr, 0 }, - { X86::MOV8ri, X86::MOV8mi, 0 }, - { X86::MOV8rr, X86::MOV8mr, 0 }, - { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 }, - { X86::MOVAPDrr, X86::MOVAPDmr, 0 }, - { X86::MOVAPSrr, X86::MOVAPSmr, 0 }, - { X86::MOVDQArr, X86::MOVDQAmr, 0 }, - { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 }, - { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 }, - { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 }, - { X86::MOVSDrr, X86::MOVSDmr, 0 }, - { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 }, - { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 }, - { X86::MOVSSrr, X86::MOVSSmr, 0 }, - { X86::MOVUPDrr, X86::MOVUPDmr, 0 }, - { X86::MOVUPSrr, X86::MOVUPSmr, 0 }, - { X86::MUL16r, X86::MUL16m, 1 }, - { X86::MUL32r, X86::MUL32m, 1 }, - { X86::MUL64r, X86::MUL64m, 1 }, - { X86::MUL8r, X86::MUL8m, 1 }, - { X86::SETAEr, X86::SETAEm, 0 }, - { X86::SETAr, X86::SETAm, 0 }, - { X86::SETBEr, X86::SETBEm, 0 }, - { X86::SETBr, X86::SETBm, 0 }, - { X86::SETEr, X86::SETEm, 0 }, - { X86::SETGEr, X86::SETGEm, 0 }, - { X86::SETGr, X86::SETGm, 0 }, - { X86::SETLEr, X86::SETLEm, 0 }, - { X86::SETLr, X86::SETLm, 0 }, - { X86::SETNEr, X86::SETNEm, 0 }, - { X86::SETNOr, X86::SETNOm, 0 }, - { X86::SETNPr, X86::SETNPm, 0 }, - { X86::SETNSr, X86::SETNSm, 0 }, - { X86::SETOr, X86::SETOm, 0 }, - { X86::SETPr, X86::SETPm, 0 }, - { X86::SETSr, X86::SETSm, 0 }, - { X86::TAILJMPr, X86::TAILJMPm, 1 }, - { X86::TEST16ri, X86::TEST16mi, 1 }, - { X86::TEST32ri, X86::TEST32mi, 1 }, - { X86::TEST64ri32, X86::TEST64mi32, 1 }, - { X86::TEST8ri, X86::TEST8mi, 1 } + static const unsigned OpTbl0[][4] = { + { X86::BT16ri8, X86::BT16mi8, 1, 0 }, + { X86::BT32ri8, X86::BT32mi8, 1, 0 }, + { X86::BT64ri8, X86::BT64mi8, 1, 0 }, + { X86::CALL32r, X86::CALL32m, 1, 0 }, + { X86::CALL64r, X86::CALL64m, 1, 0 }, + { X86::CMP16ri, X86::CMP16mi, 1, 0 }, + { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, + { X86::CMP16rr, X86::CMP16mr, 1, 0 }, + { X86::CMP32ri, X86::CMP32mi, 1, 0 }, + { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, + { X86::CMP32rr, X86::CMP32mr, 1, 0 }, + { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, + { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, + { X86::CMP64rr, X86::CMP64mr, 1, 0 }, + { X86::CMP8ri, X86::CMP8mi, 1, 0 }, + { X86::CMP8rr, X86::CMP8mr, 1, 0 }, + { X86::DIV16r, X86::DIV16m, 1, 0 }, + { X86::DIV32r, X86::DIV32m, 1, 0 }, + { X86::DIV64r, X86::DIV64m, 1, 0 }, + { X86::DIV8r, X86::DIV8m, 1, 0 }, + { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, + { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, + { X86::IDIV16r, X86::IDIV16m, 1, 0 }, + { X86::IDIV32r, X86::IDIV32m, 1, 0 }, + { X86::IDIV64r, X86::IDIV64m, 1, 0 }, + { X86::IDIV8r, X86::IDIV8m, 1, 0 }, + { X86::IMUL16r, X86::IMUL16m, 1, 0 }, + { X86::IMUL32r, X86::IMUL32m, 1, 0 }, + { X86::IMUL64r, X86::IMUL64m, 1, 0 }, + { X86::IMUL8r, X86::IMUL8m, 1, 0 }, + { X86::JMP32r, X86::JMP32m, 1, 0 }, + { X86::JMP64r, X86::JMP64m, 1, 0 }, + { X86::MOV16ri, X86::MOV16mi, 0, 0 }, + { X86::MOV16rr, X86::MOV16mr, 0, 0 }, + { X86::MOV32ri, X86::MOV32mi, 0, 0 }, + { X86::MOV32rr, X86::MOV32mr, 0, 0 }, + { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, + { X86::MOV64rr, X86::MOV64mr, 0, 0 }, + { X86::MOV8ri, X86::MOV8mi, 0, 0 }, + { X86::MOV8rr, X86::MOV8mr, 0, 0 }, + { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, + { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, + { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, + { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, + { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, + { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, + { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, + { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, + { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, + { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, + { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, + { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, + { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, + { X86::MUL16r, X86::MUL16m, 1, 0 }, + { X86::MUL32r, X86::MUL32m, 1, 0 }, + { X86::MUL64r, X86::MUL64m, 1, 0 }, + { X86::MUL8r, X86::MUL8m, 1, 0 }, + { X86::SETAEr, X86::SETAEm, 0, 0 }, + { X86::SETAr, X86::SETAm, 0, 0 }, + { X86::SETBEr, X86::SETBEm, 0, 0 }, + { X86::SETBr, X86::SETBm, 0, 0 }, + { X86::SETEr, X86::SETEm, 0, 0 }, + { X86::SETGEr, X86::SETGEm, 0, 0 }, + { X86::SETGr, X86::SETGm, 0, 0 }, + { X86::SETLEr, X86::SETLEm, 0, 0 }, + { X86::SETLr, X86::SETLm, 0, 0 }, + { X86::SETNEr, X86::SETNEm, 0, 0 }, + { X86::SETNOr, X86::SETNOm, 0, 0 }, + { X86::SETNPr, X86::SETNPm, 0, 0 }, + { X86::SETNSr, X86::SETNSm, 0, 0 }, + { X86::SETOr, X86::SETOm, 0, 0 }, + { X86::SETPr, X86::SETPm, 0, 0 }, + { X86::SETSr, X86::SETSm, 0, 0 }, + { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, + { X86::TEST16ri, X86::TEST16mi, 1, 0 }, + { X86::TEST32ri, X86::TEST32mi, 1, 0 }, + { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, + { X86::TEST8ri, X86::TEST8mi, 1, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { unsigned RegOp = OpTbl0[i][0]; unsigned MemOp = OpTbl0[i][1]; + unsigned Align = OpTbl0[i][3]; if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); unsigned FoldedLoad = OpTbl0[i][2]; // Index 0, folded load or store. @@ -319,338 +321,342 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) AmbEntries.push_back(MemOp); } - static const unsigned OpTbl1[][2] = { - { X86::CMP16rr, X86::CMP16rm }, - { X86::CMP32rr, X86::CMP32rm }, - { X86::CMP64rr, X86::CMP64rm }, - { X86::CMP8rr, X86::CMP8rm }, - { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, - { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, - { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, - { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, - { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, - { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, - { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, - { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, - { X86::FsMOVAPDrr, X86::MOVSDrm }, - { X86::FsMOVAPSrr, X86::MOVSSrm }, - { X86::IMUL16rri, X86::IMUL16rmi }, - { X86::IMUL16rri8, X86::IMUL16rmi8 }, - { X86::IMUL32rri, X86::IMUL32rmi }, - { X86::IMUL32rri8, X86::IMUL32rmi8 }, - { X86::IMUL64rri32, X86::IMUL64rmi32 }, - { X86::IMUL64rri8, X86::IMUL64rmi8 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, - { X86::Int_COMISDrr, X86::Int_COMISDrm }, - { X86::Int_COMISSrr, X86::Int_COMISSrm }, - { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, - { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, - { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, - { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, - { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, - { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, - { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, - { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, - { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, - { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, - { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, - { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, - { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, - { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, - { X86::MOV16rr, X86::MOV16rm }, - { X86::MOV32rr, X86::MOV32rm }, - { X86::MOV64rr, X86::MOV64rm }, - { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, - { X86::MOV64toSDrr, X86::MOV64toSDrm }, - { X86::MOV8rr, X86::MOV8rm }, - { X86::MOVAPDrr, X86::MOVAPDrm }, - { X86::MOVAPSrr, X86::MOVAPSrm }, - { X86::MOVDDUPrr, X86::MOVDDUPrm }, - { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, - { X86::MOVDI2SSrr, X86::MOVDI2SSrm }, - { X86::MOVDQArr, X86::MOVDQArm }, - { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, - { X86::MOVSDrr, X86::MOVSDrm }, - { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, - { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, - { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, - { X86::MOVSSrr, X86::MOVSSrm }, - { X86::MOVSX16rr8, X86::MOVSX16rm8 }, - { X86::MOVSX32rr16, X86::MOVSX32rm16 }, - { X86::MOVSX32rr8, X86::MOVSX32rm8 }, - { X86::MOVSX64rr16, X86::MOVSX64rm16 }, - { X86::MOVSX64rr32, X86::MOVSX64rm32 }, - { X86::MOVSX64rr8, X86::MOVSX64rm8 }, - { X86::MOVUPDrr, X86::MOVUPDrm }, - { X86::MOVUPSrr, X86::MOVUPSrm }, - { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, - { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, - { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, - { X86::MOVZX16rr8, X86::MOVZX16rm8 }, - { X86::MOVZX32rr16, X86::MOVZX32rm16 }, - { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 }, - { X86::MOVZX32rr8, X86::MOVZX32rm8 }, - { X86::MOVZX64rr16, X86::MOVZX64rm16 }, - { X86::MOVZX64rr32, X86::MOVZX64rm32 }, - { X86::MOVZX64rr8, X86::MOVZX64rm8 }, - { X86::PSHUFDri, X86::PSHUFDmi }, - { X86::PSHUFHWri, X86::PSHUFHWmi }, - { X86::PSHUFLWri, X86::PSHUFLWmi }, - { X86::RCPPSr, X86::RCPPSm }, - { X86::RCPPSr_Int, X86::RCPPSm_Int }, - { X86::RSQRTPSr, X86::RSQRTPSm }, - { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int }, - { X86::RSQRTSSr, X86::RSQRTSSm }, - { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int }, - { X86::SQRTPDr, X86::SQRTPDm }, - { X86::SQRTPDr_Int, X86::SQRTPDm_Int }, - { X86::SQRTPSr, X86::SQRTPSm }, - { X86::SQRTPSr_Int, X86::SQRTPSm_Int }, - { X86::SQRTSDr, X86::SQRTSDm }, - { X86::SQRTSDr_Int, X86::SQRTSDm_Int }, - { X86::SQRTSSr, X86::SQRTSSm }, - { X86::SQRTSSr_Int, X86::SQRTSSm_Int }, - { X86::TEST16rr, X86::TEST16rm }, - { X86::TEST32rr, X86::TEST32rm }, - { X86::TEST64rr, X86::TEST64rm }, - { X86::TEST8rr, X86::TEST8rm }, + static const unsigned OpTbl1[][3] = { + { X86::CMP16rr, X86::CMP16rm, 0 }, + { X86::CMP32rr, X86::CMP32rm, 0 }, + { X86::CMP64rr, X86::CMP64rm, 0 }, + { X86::CMP8rr, X86::CMP8rm, 0 }, + { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, + { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, + { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, + { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, + { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, + { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, + { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, + { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, + { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, + { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, + { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, + { X86::IMUL16rri, X86::IMUL16rmi, 0 }, + { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, + { X86::IMUL32rri, X86::IMUL32rmi, 0 }, + { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, + { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, + { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, + { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, + { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, + { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, + { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, + { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, + { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, + { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, + { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, + { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, + { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, + { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, + { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, + { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, + { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, + { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, + { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, + { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, + { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, + { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, + { X86::MOV16rr, X86::MOV16rm, 0 }, + { X86::MOV32rr, X86::MOV32rm, 0 }, + { X86::MOV64rr, X86::MOV64rm, 0 }, + { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, + { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, + { X86::MOV8rr, X86::MOV8rm, 0 }, + { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, + { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, + { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, + { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, + { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, + { X86::MOVDQArr, X86::MOVDQArm, 16 }, + { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, + { X86::MOVSDrr, X86::MOVSDrm, 0 }, + { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, + { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, + { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, + { X86::MOVSSrr, X86::MOVSSrm, 0 }, + { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, + { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, + { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, + { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, + { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, + { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, + { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, + { X86::MOVUPSrr, X86::MOVUPSrm, 16 }, + { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, + { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, + { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, + { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, + { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, + { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, + { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, + { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, + { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, + { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, + { X86::PSHUFDri, X86::PSHUFDmi, 16 }, + { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, + { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, + { X86::RCPPSr, X86::RCPPSm, 16 }, + { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, + { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, + { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, + { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, + { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, + { X86::SQRTPDr, X86::SQRTPDm, 16 }, + { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, + { X86::SQRTPSr, X86::SQRTPSm, 16 }, + { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, + { X86::SQRTSDr, X86::SQRTSDm, 0 }, + { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, + { X86::SQRTSSr, X86::SQRTSSm, 0 }, + { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, + { X86::TEST16rr, X86::TEST16rm, 0 }, + { X86::TEST32rr, X86::TEST32rm, 0 }, + { X86::TEST64rr, X86::TEST64rm, 0 }, + { X86::TEST8rr, X86::TEST8rm, 0 }, // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 - { X86::UCOMISDrr, X86::UCOMISDrm }, - { X86::UCOMISSrr, X86::UCOMISSrm } + { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, + { X86::UCOMISSrr, X86::UCOMISSrm, 0 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { unsigned RegOp = OpTbl1[i][0]; unsigned MemOp = OpTbl1[i][1]; + unsigned Align = OpTbl1[i][2]; if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load + // Index 1, folded load + unsigned AuxInfo = 1 | (1 << 4); if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) AmbEntries.push_back(MemOp); } - static const unsigned OpTbl2[][2] = { - { X86::ADC32rr, X86::ADC32rm }, - { X86::ADC64rr, X86::ADC64rm }, - { X86::ADD16rr, X86::ADD16rm }, - { X86::ADD32rr, X86::ADD32rm }, - { X86::ADD64rr, X86::ADD64rm }, - { X86::ADD8rr, X86::ADD8rm }, - { X86::ADDPDrr, X86::ADDPDrm }, - { X86::ADDPSrr, X86::ADDPSrm }, - { X86::ADDSDrr, X86::ADDSDrm }, - { X86::ADDSSrr, X86::ADDSSrm }, - { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, - { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, - { X86::AND16rr, X86::AND16rm }, - { X86::AND32rr, X86::AND32rm }, - { X86::AND64rr, X86::AND64rm }, - { X86::AND8rr, X86::AND8rm }, - { X86::ANDNPDrr, X86::ANDNPDrm }, - { X86::ANDNPSrr, X86::ANDNPSrm }, - { X86::ANDPDrr, X86::ANDPDrm }, - { X86::ANDPSrr, X86::ANDPSrm }, - { X86::CMOVA16rr, X86::CMOVA16rm }, - { X86::CMOVA32rr, X86::CMOVA32rm }, - { X86::CMOVA64rr, X86::CMOVA64rm }, - { X86::CMOVAE16rr, X86::CMOVAE16rm }, - { X86::CMOVAE32rr, X86::CMOVAE32rm }, - { X86::CMOVAE64rr, X86::CMOVAE64rm }, - { X86::CMOVB16rr, X86::CMOVB16rm }, - { X86::CMOVB32rr, X86::CMOVB32rm }, - { X86::CMOVB64rr, X86::CMOVB64rm }, - { X86::CMOVBE16rr, X86::CMOVBE16rm }, - { X86::CMOVBE32rr, X86::CMOVBE32rm }, - { X86::CMOVBE64rr, X86::CMOVBE64rm }, - { X86::CMOVE16rr, X86::CMOVE16rm }, - { X86::CMOVE32rr, X86::CMOVE32rm }, - { X86::CMOVE64rr, X86::CMOVE64rm }, - { X86::CMOVG16rr, X86::CMOVG16rm }, - { X86::CMOVG32rr, X86::CMOVG32rm }, - { X86::CMOVG64rr, X86::CMOVG64rm }, - { X86::CMOVGE16rr, X86::CMOVGE16rm }, - { X86::CMOVGE32rr, X86::CMOVGE32rm }, - { X86::CMOVGE64rr, X86::CMOVGE64rm }, - { X86::CMOVL16rr, X86::CMOVL16rm }, - { X86::CMOVL32rr, X86::CMOVL32rm }, - { X86::CMOVL64rr, X86::CMOVL64rm }, - { X86::CMOVLE16rr, X86::CMOVLE16rm }, - { X86::CMOVLE32rr, X86::CMOVLE32rm }, - { X86::CMOVLE64rr, X86::CMOVLE64rm }, - { X86::CMOVNE16rr, X86::CMOVNE16rm }, - { X86::CMOVNE32rr, X86::CMOVNE32rm }, - { X86::CMOVNE64rr, X86::CMOVNE64rm }, - { X86::CMOVNO16rr, X86::CMOVNO16rm }, - { X86::CMOVNO32rr, X86::CMOVNO32rm }, - { X86::CMOVNO64rr, X86::CMOVNO64rm }, - { X86::CMOVNP16rr, X86::CMOVNP16rm }, - { X86::CMOVNP32rr, X86::CMOVNP32rm }, - { X86::CMOVNP64rr, X86::CMOVNP64rm }, - { X86::CMOVNS16rr, X86::CMOVNS16rm }, - { X86::CMOVNS32rr, X86::CMOVNS32rm }, - { X86::CMOVNS64rr, X86::CMOVNS64rm }, - { X86::CMOVO16rr, X86::CMOVO16rm }, - { X86::CMOVO32rr, X86::CMOVO32rm }, - { X86::CMOVO64rr, X86::CMOVO64rm }, - { X86::CMOVP16rr, X86::CMOVP16rm }, - { X86::CMOVP32rr, X86::CMOVP32rm }, - { X86::CMOVP64rr, X86::CMOVP64rm }, - { X86::CMOVS16rr, X86::CMOVS16rm }, - { X86::CMOVS32rr, X86::CMOVS32rm }, - { X86::CMOVS64rr, X86::CMOVS64rm }, - { X86::CMPPDrri, X86::CMPPDrmi }, - { X86::CMPPSrri, X86::CMPPSrmi }, - { X86::CMPSDrr, X86::CMPSDrm }, - { X86::CMPSSrr, X86::CMPSSrm }, - { X86::DIVPDrr, X86::DIVPDrm }, - { X86::DIVPSrr, X86::DIVPSrm }, - { X86::DIVSDrr, X86::DIVSDrm }, - { X86::DIVSSrr, X86::DIVSSrm }, - { X86::FsANDNPDrr, X86::FsANDNPDrm }, - { X86::FsANDNPSrr, X86::FsANDNPSrm }, - { X86::FsANDPDrr, X86::FsANDPDrm }, - { X86::FsANDPSrr, X86::FsANDPSrm }, - { X86::FsORPDrr, X86::FsORPDrm }, - { X86::FsORPSrr, X86::FsORPSrm }, - { X86::FsXORPDrr, X86::FsXORPDrm }, - { X86::FsXORPSrr, X86::FsXORPSrm }, - { X86::HADDPDrr, X86::HADDPDrm }, - { X86::HADDPSrr, X86::HADDPSrm }, - { X86::HSUBPDrr, X86::HSUBPDrm }, - { X86::HSUBPSrr, X86::HSUBPSrm }, - { X86::IMUL16rr, X86::IMUL16rm }, - { X86::IMUL32rr, X86::IMUL32rm }, - { X86::IMUL64rr, X86::IMUL64rm }, - { X86::MAXPDrr, X86::MAXPDrm }, - { X86::MAXPDrr_Int, X86::MAXPDrm_Int }, - { X86::MAXPSrr, X86::MAXPSrm }, - { X86::MAXPSrr_Int, X86::MAXPSrm_Int }, - { X86::MAXSDrr, X86::MAXSDrm }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int }, - { X86::MAXSSrr, X86::MAXSSrm }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int }, - { X86::MINPDrr, X86::MINPDrm }, - { X86::MINPDrr_Int, X86::MINPDrm_Int }, - { X86::MINPSrr, X86::MINPSrm }, - { X86::MINPSrr_Int, X86::MINPSrm_Int }, - { X86::MINSDrr, X86::MINSDrm }, - { X86::MINSDrr_Int, X86::MINSDrm_Int }, - { X86::MINSSrr, X86::MINSSrm }, - { X86::MINSSrr_Int, X86::MINSSrm_Int }, - { X86::MULPDrr, X86::MULPDrm }, - { X86::MULPSrr, X86::MULPSrm }, - { X86::MULSDrr, X86::MULSDrm }, - { X86::MULSSrr, X86::MULSSrm }, - { X86::OR16rr, X86::OR16rm }, - { X86::OR32rr, X86::OR32rm }, - { X86::OR64rr, X86::OR64rm }, - { X86::OR8rr, X86::OR8rm }, - { X86::ORPDrr, X86::ORPDrm }, - { X86::ORPSrr, X86::ORPSrm }, - { X86::PACKSSDWrr, X86::PACKSSDWrm }, - { X86::PACKSSWBrr, X86::PACKSSWBrm }, - { X86::PACKUSWBrr, X86::PACKUSWBrm }, - { X86::PADDBrr, X86::PADDBrm }, - { X86::PADDDrr, X86::PADDDrm }, - { X86::PADDQrr, X86::PADDQrm }, - { X86::PADDSBrr, X86::PADDSBrm }, - { X86::PADDSWrr, X86::PADDSWrm }, - { X86::PADDWrr, X86::PADDWrm }, - { X86::PANDNrr, X86::PANDNrm }, - { X86::PANDrr, X86::PANDrm }, - { X86::PAVGBrr, X86::PAVGBrm }, - { X86::PAVGWrr, X86::PAVGWrm }, - { X86::PCMPEQBrr, X86::PCMPEQBrm }, - { X86::PCMPEQDrr, X86::PCMPEQDrm }, - { X86::PCMPEQWrr, X86::PCMPEQWrm }, - { X86::PCMPGTBrr, X86::PCMPGTBrm }, - { X86::PCMPGTDrr, X86::PCMPGTDrm }, - { X86::PCMPGTWrr, X86::PCMPGTWrm }, - { X86::PINSRWrri, X86::PINSRWrmi }, - { X86::PMADDWDrr, X86::PMADDWDrm }, - { X86::PMAXSWrr, X86::PMAXSWrm }, - { X86::PMAXUBrr, X86::PMAXUBrm }, - { X86::PMINSWrr, X86::PMINSWrm }, - { X86::PMINUBrr, X86::PMINUBrm }, - { X86::PMULDQrr, X86::PMULDQrm }, - { X86::PMULHUWrr, X86::PMULHUWrm }, - { X86::PMULHWrr, X86::PMULHWrm }, - { X86::PMULLDrr, X86::PMULLDrm }, - { X86::PMULLDrr_int, X86::PMULLDrm_int }, - { X86::PMULLWrr, X86::PMULLWrm }, - { X86::PMULUDQrr, X86::PMULUDQrm }, - { X86::PORrr, X86::PORrm }, - { X86::PSADBWrr, X86::PSADBWrm }, - { X86::PSLLDrr, X86::PSLLDrm }, - { X86::PSLLQrr, X86::PSLLQrm }, - { X86::PSLLWrr, X86::PSLLWrm }, - { X86::PSRADrr, X86::PSRADrm }, - { X86::PSRAWrr, X86::PSRAWrm }, - { X86::PSRLDrr, X86::PSRLDrm }, - { X86::PSRLQrr, X86::PSRLQrm }, - { X86::PSRLWrr, X86::PSRLWrm }, - { X86::PSUBBrr, X86::PSUBBrm }, - { X86::PSUBDrr, X86::PSUBDrm }, - { X86::PSUBSBrr, X86::PSUBSBrm }, - { X86::PSUBSWrr, X86::PSUBSWrm }, - { X86::PSUBWrr, X86::PSUBWrm }, - { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, - { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, - { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, - { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, - { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, - { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, - { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, - { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, - { X86::PXORrr, X86::PXORrm }, - { X86::SBB32rr, X86::SBB32rm }, - { X86::SBB64rr, X86::SBB64rm }, - { X86::SHUFPDrri, X86::SHUFPDrmi }, - { X86::SHUFPSrri, X86::SHUFPSrmi }, - { X86::SUB16rr, X86::SUB16rm }, - { X86::SUB32rr, X86::SUB32rm }, - { X86::SUB64rr, X86::SUB64rm }, - { X86::SUB8rr, X86::SUB8rm }, - { X86::SUBPDrr, X86::SUBPDrm }, - { X86::SUBPSrr, X86::SUBPSrm }, - { X86::SUBSDrr, X86::SUBSDrm }, - { X86::SUBSSrr, X86::SUBSSrm }, + static const unsigned OpTbl2[][3] = { + { X86::ADC32rr, X86::ADC32rm, 0 }, + { X86::ADC64rr, X86::ADC64rm, 0 }, + { X86::ADD16rr, X86::ADD16rm, 0 }, + { X86::ADD32rr, X86::ADD32rm, 0 }, + { X86::ADD64rr, X86::ADD64rm, 0 }, + { X86::ADD8rr, X86::ADD8rm, 0 }, + { X86::ADDPDrr, X86::ADDPDrm, 16 }, + { X86::ADDPSrr, X86::ADDPSrm, 16 }, + { X86::ADDSDrr, X86::ADDSDrm, 0 }, + { X86::ADDSSrr, X86::ADDSSrm, 0 }, + { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, + { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, + { X86::AND16rr, X86::AND16rm, 0 }, + { X86::AND32rr, X86::AND32rm, 0 }, + { X86::AND64rr, X86::AND64rm, 0 }, + { X86::AND8rr, X86::AND8rm, 0 }, + { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, + { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, + { X86::ANDPDrr, X86::ANDPDrm, 16 }, + { X86::ANDPSrr, X86::ANDPSrm, 16 }, + { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, + { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, + { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, + { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, + { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, + { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, + { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, + { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, + { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, + { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, + { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, + { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, + { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, + { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, + { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, + { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, + { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, + { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, + { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, + { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, + { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, + { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, + { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, + { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, + { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, + { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, + { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, + { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, + { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, + { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, + { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, + { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, + { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, + { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, + { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, + { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, + { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, + { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, + { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, + { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, + { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, + { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, + { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, + { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, + { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, + { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, + { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, + { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, + { X86::CMPPDrri, X86::CMPPDrmi, 16 }, + { X86::CMPPSrri, X86::CMPPSrmi, 16 }, + { X86::CMPSDrr, X86::CMPSDrm, 0 }, + { X86::CMPSSrr, X86::CMPSSrm, 0 }, + { X86::DIVPDrr, X86::DIVPDrm, 16 }, + { X86::DIVPSrr, X86::DIVPSrm, 16 }, + { X86::DIVSDrr, X86::DIVSDrm, 0 }, + { X86::DIVSSrr, X86::DIVSSrm, 0 }, + { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, + { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, + { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, + { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, + { X86::FsORPDrr, X86::FsORPDrm, 16 }, + { X86::FsORPSrr, X86::FsORPSrm, 16 }, + { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, + { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, + { X86::HADDPDrr, X86::HADDPDrm, 16 }, + { X86::HADDPSrr, X86::HADDPSrm, 16 }, + { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, + { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, + { X86::IMUL16rr, X86::IMUL16rm, 0 }, + { X86::IMUL32rr, X86::IMUL32rm, 0 }, + { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::MAXPDrr, X86::MAXPDrm, 16 }, + { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, + { X86::MAXPSrr, X86::MAXPSrm, 16 }, + { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, + { X86::MAXSDrr, X86::MAXSDrm, 0 }, + { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, + { X86::MAXSSrr, X86::MAXSSrm, 0 }, + { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, + { X86::MINPDrr, X86::MINPDrm, 16 }, + { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, + { X86::MINPSrr, X86::MINPSrm, 16 }, + { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, + { X86::MINSDrr, X86::MINSDrm, 0 }, + { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, + { X86::MINSSrr, X86::MINSSrm, 0 }, + { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, + { X86::MULPDrr, X86::MULPDrm, 16 }, + { X86::MULPSrr, X86::MULPSrm, 16 }, + { X86::MULSDrr, X86::MULSDrm, 0 }, + { X86::MULSSrr, X86::MULSSrm, 0 }, + { X86::OR16rr, X86::OR16rm, 0 }, + { X86::OR32rr, X86::OR32rm, 0 }, + { X86::OR64rr, X86::OR64rm, 0 }, + { X86::OR8rr, X86::OR8rm, 0 }, + { X86::ORPDrr, X86::ORPDrm, 16 }, + { X86::ORPSrr, X86::ORPSrm, 16 }, + { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, + { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, + { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, + { X86::PADDBrr, X86::PADDBrm, 16 }, + { X86::PADDDrr, X86::PADDDrm, 16 }, + { X86::PADDQrr, X86::PADDQrm, 16 }, + { X86::PADDSBrr, X86::PADDSBrm, 16 }, + { X86::PADDSWrr, X86::PADDSWrm, 16 }, + { X86::PADDWrr, X86::PADDWrm, 16 }, + { X86::PANDNrr, X86::PANDNrm, 16 }, + { X86::PANDrr, X86::PANDrm, 16 }, + { X86::PAVGBrr, X86::PAVGBrm, 16 }, + { X86::PAVGWrr, X86::PAVGWrm, 16 }, + { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, + { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, + { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, + { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, + { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, + { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, + { X86::PINSRWrri, X86::PINSRWrmi, 16 }, + { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, + { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, + { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, + { X86::PMINSWrr, X86::PMINSWrm, 16 }, + { X86::PMINUBrr, X86::PMINUBrm, 16 }, + { X86::PMULDQrr, X86::PMULDQrm, 16 }, + { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, + { X86::PMULHWrr, X86::PMULHWrm, 16 }, + { X86::PMULLDrr, X86::PMULLDrm, 16 }, + { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, + { X86::PMULLWrr, X86::PMULLWrm, 16 }, + { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, + { X86::PORrr, X86::PORrm, 16 }, + { X86::PSADBWrr, X86::PSADBWrm, 16 }, + { X86::PSLLDrr, X86::PSLLDrm, 16 }, + { X86::PSLLQrr, X86::PSLLQrm, 16 }, + { X86::PSLLWrr, X86::PSLLWrm, 16 }, + { X86::PSRADrr, X86::PSRADrm, 16 }, + { X86::PSRAWrr, X86::PSRAWrm, 16 }, + { X86::PSRLDrr, X86::PSRLDrm, 16 }, + { X86::PSRLQrr, X86::PSRLQrm, 16 }, + { X86::PSRLWrr, X86::PSRLWrm, 16 }, + { X86::PSUBBrr, X86::PSUBBrm, 16 }, + { X86::PSUBDrr, X86::PSUBDrm, 16 }, + { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, + { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, + { X86::PSUBWrr, X86::PSUBWrm, 16 }, + { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, + { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, + { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, + { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, + { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, + { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, + { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, + { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, + { X86::PXORrr, X86::PXORrm, 16 }, + { X86::SBB32rr, X86::SBB32rm, 0 }, + { X86::SBB64rr, X86::SBB64rm, 0 }, + { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, + { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, + { X86::SUB16rr, X86::SUB16rm, 0 }, + { X86::SUB32rr, X86::SUB32rm, 0 }, + { X86::SUB64rr, X86::SUB64rm, 0 }, + { X86::SUB8rr, X86::SUB8rm, 0 }, + { X86::SUBPDrr, X86::SUBPDrm, 16 }, + { X86::SUBPSrr, X86::SUBPSrm, 16 }, + { X86::SUBSDrr, X86::SUBSDrm, 0 }, + { X86::SUBSSrr, X86::SUBSSrm, 0 }, // FIXME: TEST*rr -> swapped operand of TEST*mr. - { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, - { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, - { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, - { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, - { X86::XOR16rr, X86::XOR16rm }, - { X86::XOR32rr, X86::XOR32rm }, - { X86::XOR64rr, X86::XOR64rm }, - { X86::XOR8rr, X86::XOR8rm }, - { X86::XORPDrr, X86::XORPDrm }, - { X86::XORPSrr, X86::XORPSrm } + { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, + { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, + { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, + { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, + { X86::XOR16rr, X86::XOR16rm, 0 }, + { X86::XOR32rr, X86::XOR32rm, 0 }, + { X86::XOR64rr, X86::XOR64rm, 0 }, + { X86::XOR8rr, X86::XOR8rm, 0 }, + { X86::XORPDrr, X86::XORPDrm, 16 }, + { X86::XORPSrr, X86::XORPSrm, 16 } }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { unsigned RegOp = OpTbl2[i][0]; unsigned MemOp = OpTbl2[i][1]; + unsigned Align = OpTbl2[i][2]; if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, - MemOp)).second) + std::make_pair(MemOp,Align))).second) assert(false && "Duplicated entries?"); - unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load + // Index 2, folded load + unsigned AuxInfo = 2 | (1 << 4); if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, std::make_pair(RegOp, AuxInfo))).second) AmbEntries.push_back(MemOp); @@ -760,7 +766,6 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } - /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -776,37 +781,9 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { return isPICBase; } -/// isGVStub - Return true if the GV requires an extra load to get the -/// real address. -static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) { - return TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); -} - -/// CanRematLoadWithDispOperand - Return true if a load with the specified -/// operand is a candidate for remat: for this to be true we need to know that -/// the load will always return the same value, even if moved. -static bool CanRematLoadWithDispOperand(const MachineOperand &MO, - X86TargetMachine &TM) { - // Loads from constant pool entries can be remat'd. - if (MO.isCPI()) return true; - - // We can remat globals in some cases. - if (MO.isGlobal()) { - // If this is a load of a stub, not of the global, we can remat it. This - // access will always return the address of the global. - if (isGVStub(MO.getGlobal(), TM)) - return true; - - // If the global itself is constant, we can remat the load. - if (GlobalVariable *GV = dyn_cast(MO.getGlobal())) - if (GV->isConstant()) - return true; - } - return false; -} - bool -X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { +X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, + AliasAnalysis *AA) const { switch (MI->getOpcode()) { default: break; case X86::MOV8rm: @@ -825,7 +802,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - CanRematLoadWithDispOperand(MI->getOperand(4), TM)) { + MI->isInvariantLoad(AA)) { unsigned BaseReg = MI->getOperand(1).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; @@ -876,7 +853,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that /// would clobber the EFLAGS condition register. Note the result may be /// conservative. If it cannot definitely determine the safety after visiting -/// two instructions it assumes it's not safe. +/// a few instructions in each direction it assumes it's not safe. static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { // It's always safe to clobber EFLAGS at the end of a block. @@ -884,11 +861,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, return true; // For compile time consideration, if we are not able to determine the - // safety after visiting 2 instructions, we will assume it's not safe. - for (unsigned i = 0; i < 2; ++i) { + // safety after visiting 4 instructions in each direction, we will assume + // it's not safe. + MachineBasicBlock::iterator Iter = I; + for (unsigned i = 0; i < 4; ++i) { bool SeenDef = false; - for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) { - MachineOperand &MO = I->getOperand(j); + for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { + MachineOperand &MO = Iter->getOperand(j); if (!MO.isReg()) continue; if (MO.getReg() == X86::EFLAGS) { @@ -901,10 +880,33 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, if (SeenDef) // This instruction defines EFLAGS, no need to look any further. return true; - ++I; + ++Iter; // If we make it to the end of the block, it's safe to clobber EFLAGS. - if (I == MBB.end()) + if (Iter == MBB.end()) + return true; + } + + Iter = I; + for (unsigned i = 0; i < 4; ++i) { + // If we make it to the beginning of the block, it's safe to clobber + // EFLAGS iff EFLAGS is not live-in. + if (Iter == MBB.begin()) + return !MBB.isLiveIn(X86::EFLAGS); + + --Iter; + bool SawKill = false; + for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { + MachineOperand &MO = Iter->getOperand(j); + if (MO.isReg() && MO.getReg() == X86::EFLAGS) { + if (MO.isDef()) return MO.isDead(); + if (MO.isKill()) SawKill = true; + } + } + + if (SawKill) + // This instruction kills EFLAGS and doesn't redefine it, so + // there's no need to look further. return true; } @@ -914,14 +916,11 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, + unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - unsigned SubIdx = Orig->getOperand(0).isReg() - ? Orig->getOperand(0).getSubReg() : 0; - bool ChangeSubIdx = SubIdx != 0; if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { DestReg = RI.getSubReg(DestReg, SubIdx); SubIdx = 0; @@ -929,76 +928,36 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. - bool Emitted = false; - switch (Orig->getOpcode()) { + bool Clone = true; + unsigned Opc = Orig->getOpcode(); + switch (Opc) { default: break; case X86::MOV8r0: case X86::MOV16r0: - case X86::MOV32r0: - case X86::MOV64r0: { + case X86::MOV32r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { - unsigned Opc = 0; - switch (Orig->getOpcode()) { + switch (Opc) { default: break; case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; - case X86::MOV64r0: Opc = X86::MOV64ri32; break; } - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); - Emitted = true; + Clone = false; } break; } } - if (!Emitted) { + if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); + } else { + BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); } - if (ChangeSubIdx) { - MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); - } -} - -/// isInvariantLoad - Return true if the specified instruction (which is marked -/// mayLoad) is loading from a location whose value is invariant across the -/// function. For example, loading a value from the constant pool or from -/// from the argument area of a function if it does not change. This should -/// only return true of *all* loads the instruction does are invariant (if it -/// does multiple loads). -bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const { - // This code cares about loads from three cases: constant pool entries, - // invariant argument slots, and global stubs. In order to handle these cases - // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV - // operand and base our analysis on it. This is safe because the address of - // none of these three cases is ever used as anything other than a load base - // and X86 doesn't have any instructions that load from multiple places. - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - // Loads from constant pools are trivially invariant. - if (MO.isCPI()) - return true; - - if (MO.isGlobal()) - return isGVStub(MO.getGlobal(), TM); - - // If this is a load from an invariant stack slot, the load is a constant. - if (MO.isFI()) { - const MachineFrameInfo &MFI = - *MI->getParent()->getParent()->getFrameInfo(); - int Idx = MO.getIndex(); - return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); - } - } - - // All other instances of these instructions are presumed to have other - // issues. - return false; + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); } /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that @@ -1304,7 +1263,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { unsigned Opc; unsigned Size; switch (MI->getOpcode()) { - default: assert(0 && "Unreachable!"); + default: llvm_unreachable("Unreachable!"); case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; @@ -1459,7 +1418,7 @@ static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case X86::COND_E: return X86::JE; case X86::COND_NE: return X86::JNE; case X86::COND_L: return X86::JL; @@ -1483,7 +1442,7 @@ unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { /// e.g. turning COND_E to COND_NE. X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case X86::COND_E: return X86::COND_NE; case X86::COND_NE: return X86::COND_E; case X86::COND_L: return X86::COND_GE; @@ -1699,14 +1658,26 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, /* Source and destination have the same register class. */; else if (CommonRC->hasSuperClass(SrcRC)) CommonRC = SrcRC; - else if (!DestRC->hasSubClass(SrcRC)) - CommonRC = 0; + else if (!DestRC->hasSubClass(SrcRC)) { + // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, + // but we want to copy then as GR64. Similarly, for GR32_NOREX and + // GR32_NOSP, copy as GR32. + if (SrcRC->hasSuperClass(&X86::GR64RegClass) && + DestRC->hasSuperClass(&X86::GR64RegClass)) + CommonRC = &X86::GR64RegClass; + else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && + DestRC->hasSuperClass(&X86::GR32RegClass)) + CommonRC = &X86::GR32RegClass; + else + CommonRC = 0; + } if (CommonRC) { unsigned Opc; - if (CommonRC == &X86::GR64RegClass) { + if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32RegClass) { + } else if (CommonRC == &X86::GR32RegClass || + CommonRC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32rr; } else if (CommonRC == &X86::GR16RegClass) { Opc = X86::MOV16rr; @@ -1731,7 +1702,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, Opc = X86::MOV8rr_NOREX; else Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_NOREXRegClass) { + } else if (CommonRC == &X86::GR64_NOREXRegClass || + CommonRC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64rr; } else if (CommonRC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32rr; @@ -1759,16 +1731,17 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); return true; } - + // Moving EFLAGS to / from another register requires a push and a pop. if (SrcRC == &X86::CCRRegClass) { if (SrcReg != X86::EFLAGS) return false; - if (DestRC == &X86::GR64RegClass) { + if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSHFQ)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return true; - } else if (DestRC == &X86::GR32RegClass) { + } else if (DestRC == &X86::GR32RegClass || + DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSHFD)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return true; @@ -1776,11 +1749,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, } else if (DestRC == &X86::CCRRegClass) { if (DestReg != X86::EFLAGS) return false; - if (SrcRC == &X86::GR64RegClass) { + if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); BuildMI(MBB, MI, DL, get(X86::POPFQ)); return true; - } else if (SrcRC == &X86::GR32RegClass) { + } else if (SrcRC == &X86::GR32RegClass || + DestRC == &X86::GR32_NOSPRegClass) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); BuildMI(MBB, MI, DL, get(X86::POPFD)); return true; @@ -1838,9 +1812,9 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, bool isStackAligned, TargetMachine &TM) { unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { + if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64mr; - } else if (RC == &X86::GR32RegClass) { + } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32mr; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16mr; @@ -1865,7 +1839,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, Opc = X86::MOV8mr_NOREX; else Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_NOREXRegClass) { + } else if (RC == &X86::GR64_NOREXRegClass || + RC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64mr; } else if (RC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32mr; @@ -1889,8 +1864,7 @@ static unsigned getStoreRegOpcode(unsigned SrcReg, } else if (RC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64mr; } else { - assert(0 && "Unknown regclass"); - abort(); + llvm_unreachable("Unknown regclass"); } return Opc; @@ -1914,6 +1888,8 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); @@ -1923,6 +1899,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, for (unsigned i = 0, e = Addr.size(); i != e; ++i) MIB.addOperand(Addr[i]); MIB.addReg(SrcReg, getKillRegState(isKill)); + (*MIB).setMemRefs(MMOBegin, MMOEnd); NewMIs.push_back(MIB); } @@ -1931,9 +1908,9 @@ static unsigned getLoadRegOpcode(unsigned DestReg, bool isStackAligned, const TargetMachine &TM) { unsigned Opc = 0; - if (RC == &X86::GR64RegClass) { + if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass) { + } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { Opc = X86::MOV32rm; } else if (RC == &X86::GR16RegClass) { Opc = X86::MOV16rm; @@ -1958,7 +1935,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg, Opc = X86::MOV8rm_NOREX; else Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_NOREXRegClass) { + } else if (RC == &X86::GR64_NOREXRegClass || + RC == &X86::GR64_NOREX_NOSPRegClass) { Opc = X86::MOV64rm; } else if (RC == &X86::GR32_NOREXRegClass) { Opc = X86::MOV32rm; @@ -1982,8 +1960,7 @@ static unsigned getLoadRegOpcode(unsigned DestReg, } else if (RC == &X86::VR64RegClass) { Opc = X86::MMX_MOVQ64rm; } else { - assert(0 && "Unknown regclass"); - abort(); + llvm_unreachable("Unknown regclass"); } return Opc; @@ -2005,6 +1982,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); @@ -2013,6 +1992,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) MIB.addOperand(Addr[i]); + (*MIB).setMemRefs(MMOBegin, MMOEnd); NewMIs.push_back(MIB); } @@ -2026,9 +2006,11 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, if (MI != MBB.end()) DL = MI->getDebugLoc(); bool is64Bit = TM.getSubtarget().is64Bit(); + bool isWin64 = TM.getSubtarget().isTargetWin64(); unsigned SlotSize = is64Bit ? 8 : 4; MachineFunction &MF = *MBB.getParent(); + unsigned FPReg = RI.getFrameRegister(MF); X86MachineFunctionInfo *X86FI = MF.getInfo(); unsigned CalleeFrameSize = 0; @@ -2038,10 +2020,12 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - if (RegClass != &X86::VR128RegClass) { + if (Reg == FPReg) + // X86RegisterInfo::emitPrologue will handle spilling of frame register. + continue; + if (RegClass != &X86::VR128RegClass && !isWin64) { CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, get(Opc)) - .addReg(Reg, RegState::Kill); + BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); } @@ -2060,13 +2044,18 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + unsigned FPReg = RI.getFrameRegister(MF); bool is64Bit = TM.getSubtarget().is64Bit(); - + bool isWin64 = TM.getSubtarget().isTargetWin64(); unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + if (Reg == FPReg) + // X86RegisterInfo::emitEpilogue will handle restoring of frame register. + continue; const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &X86::VR128RegClass) { + if (RegClass != &X86::VR128RegClass && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); @@ -2143,8 +2132,9 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, - const SmallVectorImpl &MOs) const{ - const DenseMap *OpcodeTablePtr = NULL; + const SmallVectorImpl &MOs, + unsigned Size, unsigned Align) const { + const DenseMap > *OpcodeTablePtr=NULL; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && @@ -2165,8 +2155,6 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); else if (MI->getOpcode() == X86::MOV32r0) NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); - else if (MI->getOpcode() == X86::MOV64r0) - NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); else if (MI->getOpcode() == X86::MOV8r0) NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); if (NewMI) @@ -2182,60 +2170,82 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap::iterator I = + DenseMap >::iterator I = OpcodeTablePtr->find((unsigned*)MI->getOpcode()); if (I != OpcodeTablePtr->end()) { + unsigned Opcode = I->second.first; + unsigned MinAlign = I->second.second; + if (Align < MinAlign) + return NULL; + bool NarrowToMOV32rm = false; + if (Size) { + unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); + if (Size < RCSize) { + // Check if it's safe to fold the load. If the size of the object is + // narrower than the load width, then it's not. + if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) + return NULL; + // If this is a 64-bit load, but the spill slot is 32, then we can do + // a 32-bit load which is implicitly zero-extended. This likely is due + // to liveintervalanalysis remat'ing a load from stack slot. + if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) + return NULL; + Opcode = X86::MOV32rm; + NarrowToMOV32rm = true; + } + } + if (isTwoAddrFold) - NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this); + NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); else - NewMI = FuseInst(MF, I->second, i, MOs, MI, *this); + NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); + + if (NarrowToMOV32rm) { + // If this is the special case where we use a MOV32rm to load a 32-bit + // value and zero-extend the top bits. Change the destination register + // to a 32-bit one. + unsigned DstReg = NewMI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, + 4/*x86_subreg_32bit*/)); + else + NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); + } return NewMI; } } // No fusion if (PrintFailedFusing) - cerr << "We failed to fuse operand " << i << " in " << *MI; + errs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, + const SmallVectorImpl &Ops, int FrameIndex) const { // Check switch flag if (NoFusing) return NULL; const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Alignment = MFI->getObjectAlignment(FrameIndex); - // FIXME: Move alignment requirement into tables? - if (Alignment < 16) { - switch (MI->getOpcode()) { - default: break; - // Not always safe to fold movsd into these instructions since their load - // folding variants expects the address to be 16 byte aligned. - case X86::FsANDNPDrr: - case X86::FsANDNPSrr: - case X86::FsANDPDrr: - case X86::FsANDPSrr: - case X86::FsORPDrr: - case X86::FsORPSrr: - case X86::FsXORPDrr: - case X86::FsXORPSrr: - return NULL; - } - } - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; + unsigned RCSize = 0; switch (MI->getOpcode()) { default: return NULL; - case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; } + // Check if it's safe to fold the load. If the size of the object is + // narrower than the load width, then it's not. + if (Size < RCSize) + return NULL; // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); MI->getOperand(1).ChangeToImmediate(0); @@ -2244,12 +2254,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector MOs; MOs.push_back(MachineOperand::CreateFI(FrameIndex)); - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); } MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, + const SmallVectorImpl &Ops, MachineInstr *LoadMI) const { // Check switch flag if (NoFusing) return NULL; @@ -2257,26 +2267,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Determine the alignment of the load. unsigned Alignment = 0; if (LoadMI->hasOneMemOperand()) - Alignment = LoadMI->memoperands_begin()->getAlignment(); - - // FIXME: Move alignment requirement into tables? - if (Alignment < 16) { - switch (MI->getOpcode()) { - default: break; - // Not always safe to fold movsd into these instructions since their load - // folding variants expects the address to be 16 byte aligned. - case X86::FsANDNPDrr: - case X86::FsANDNPSrr: - case X86::FsANDPDrr: - case X86::FsANDPSrr: - case X86::FsORPDrr: - case X86::FsORPSrr: - case X86::FsXORPDrr: - case X86::FsXORPSrr: - return NULL; + Alignment = (*LoadMI->memoperands_begin())->getAlignment(); + else + switch (LoadMI->getOpcode()) { + case X86::V_SET0: + case X86::V_SETALLONES: + Alignment = 16; + break; + case X86::FsFLD0SD: + Alignment = 8; + break; + case X86::FsFLD0SS: + Alignment = 4; + break; + default: + llvm_unreachable("Don't know how to fold this instruction!"); } - } - if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { @@ -2293,28 +2299,40 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return NULL; SmallVector MOs; - if (LoadMI->getOpcode() == X86::V_SET0 || - LoadMI->getOpcode() == X86::V_SETALLONES) { + switch (LoadMI->getOpcode()) { + case X86::V_SET0: + case X86::V_SETALLONES: + case X86::FsFLD0SD: + case X86::FsFLD0SS: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (TM.getRelocationModel() == Reloc::PIC_ && - !TM.getSubtarget().is64Bit()) - // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); - // This doesn't work for several reasons. - // 1. GlobalBaseReg may have been spilled. - // 2. It may not be live at MI. - return false; + if (TM.getRelocationModel() == Reloc::PIC_) { + if (TM.getSubtarget().is64Bit()) + PICBase = X86::RIP; + else + // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // This doesn't work for several reasons. + // 1. GlobalBaseReg may have been spilled. + // 2. It may not be live at MI. + return NULL; + } - // Create a v4i32 constant-pool entry. + // Create a constant-pool entry. MachineConstantPool &MCP = *MF.getConstantPool(); - const VectorType *Ty = VectorType::get(Type::Int32Ty, 4); - Constant *C = LoadMI->getOpcode() == X86::V_SET0 ? - ConstantVector::getNullValue(Ty) : - ConstantVector::getAllOnesValue(Ty); - unsigned CPI = MCP.getConstantPoolIndex(C, 16); + const Type *Ty; + if (LoadMI->getOpcode() == X86::FsFLD0SS) + Ty = Type::getFloatTy(MF.getFunction()->getContext()); + else if (LoadMI->getOpcode() == X86::FsFLD0SD) + Ty = Type::getDoubleTy(MF.getFunction()->getContext()); + else + Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); + Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? + Constant::getAllOnesValue(Ty) : + Constant::getNullValue(Ty); + unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); // Create operands to load from the constant pool entry. MOs.push_back(MachineOperand::CreateReg(PICBase, false)); @@ -2322,13 +2340,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MOs.push_back(MachineOperand::CreateReg(0, false)); MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); MOs.push_back(MachineOperand::CreateReg(0, false)); - } else { + break; + } + default: { // Folding a normal load. Just copy the load's address operands. unsigned NumOps = LoadMI->getDesc().getNumOperands(); for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); + break; + } } - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); } @@ -2360,15 +2382,14 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap *OpcodeTablePtr = NULL; + const DenseMap > *OpcodeTablePtr=NULL; if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 switch (Opc) { + case X86::MOV8r0: case X86::MOV16r0: case X86::MOV32r0: - case X86::MOV64r0: - case X86::MOV8r0: return true; default: break; } @@ -2381,7 +2402,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap::iterator I = + DenseMap >::iterator I = OpcodeTablePtr->find((unsigned*)Opc); if (I != OpcodeTablePtr->end()) return true; @@ -2410,8 +2431,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); + const TargetRegisterClass *RC = TOI.getRegClass(&RI); SmallVector AddrOps; SmallVector BeforeOps; SmallVector AfterOps; @@ -2430,7 +2450,11 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the load instruction. if (UnfoldLoad) { - loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); + std::pair MMOs = + MF.extractLoadMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); if (UnfoldStore) { // Address operands cannot be marked isKill. for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { @@ -2489,10 +2513,12 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, // Emit the store instruction. if (UnfoldStore) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); - storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs); + const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); + std::pair MMOs = + MF.extractStoreMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); } return true; @@ -2513,9 +2539,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, bool FoldedLoad = I->second.second & (1 << 4); bool FoldedStore = I->second.second & (1 << 5); const TargetInstrDesc &TID = get(Opc); - const TargetOperandInfo &TOI = TID.OpInfo[Index]; - const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); + const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); unsigned NumDefs = TID.NumDefs; std::vector AddrOps; std::vector BeforeOps; @@ -2536,35 +2560,40 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = 0; - const MachineFunction &MF = DAG.getMachineFunction(); + MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { - MVT VT = *RC->vt_begin(); + EVT VT = *RC->vt_begin(); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, - VT, MVT::Other, &AddrOps[0], AddrOps.size()); + Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); + + // Preserve memory reference information. + std::pair MMOs = + MF.extractLoadMemRefs(cast(N)->memoperands_begin(), + cast(N)->memoperands_end()); + cast(Load)->setMemRefs(MMOs.first, MMOs.second); } // Emit the data processing instruction. - std::vector VTs; + std::vector VTs; const TargetRegisterClass *DstRC = 0; if (TID.getNumDefs() > 0) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - DstRC = DstTOI.isLookupPtrRegClass() - ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); + DstRC = TID.OpInfo[0].getRegClass(&RI); VTs.push_back(*DstRC->vt_begin()); } for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - MVT VT = N->getValueType(i); + EVT VT = N->getValueType(i); if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) VTs.push_back(VT); } if (Load) BeforeOps.push_back(SDValue(Load, 0)); std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); - SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0], - BeforeOps.size()); + SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], + BeforeOps.size()); NewNodes.push_back(NewNode); // Emit the store instruction. @@ -2574,11 +2603,18 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.push_back(Chain); bool isAligned = (RI.getStackAlignment() >= 16) || RI.needsStackRealignment(MF); - SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC, - isAligned, TM), - dl, MVT::Other, - &AddrOps[0], AddrOps.size()); + SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, + isAligned, TM), + dl, MVT::Other, + &AddrOps[0], AddrOps.size()); NewNodes.push_back(Store); + + // Preserve memory reference information. + std::pair MMOs = + MF.extractStoreMemRefs(cast(N)->memoperands_begin(), + cast(N)->memoperands_end()); + cast(Load)->setMemRefs(MMOs.first, MMOs.second); } return true; @@ -2644,7 +2680,7 @@ unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { case X86II::Imm16: return 2; case X86II::Imm32: return 4; case X86II::Imm64: return 8; - default: assert(0 && "Immediate size not set!"); + default: llvm_unreachable("Immediate size not set!"); return 0; } } @@ -2829,7 +2865,7 @@ static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { } else if (RelocOp->isJTI()) { FinalSize += sizeJumpTableAddress(false); } else { - assert(0 && "Unknown value to relocate!"); + llvm_unreachable("Unknown value to relocate!"); } return FinalSize; } @@ -2926,7 +2962,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::GS: ++FinalSize; break; - default: assert(0 && "Invalid segment!"); + default: llvm_unreachable("Invalid segment!"); case 0: break; // No segment override! } @@ -2946,6 +2982,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::TA: // 0F 3A Need0FPrefix = true; break; + case X86II::TF: // F2 0F 38 + ++FinalSize; + Need0FPrefix = true; + break; case X86II::REP: break; // already handled. case X86II::XS: // F3 0F ++FinalSize; @@ -2959,7 +2999,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: ++FinalSize; break; // Two-byte opcode prefix - default: assert(0 && "Invalid prefix!"); + default: llvm_unreachable("Invalid prefix!"); case 0: break; // No prefix! } @@ -2981,6 +3021,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::TA: // 0F 3A ++FinalSize; break; + case X86II::TF: // F2 0F 38 + ++FinalSize; + break; } // If this is a two-address instruction, skip one of the register operands. @@ -2993,7 +3036,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, --NumOps; switch (Desc->TSFlags & X86II::FormMask) { - default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); + default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); case X86II::Pseudo: // Remember the current PC offset, this is the PIC relocation // base address. @@ -3002,16 +3045,16 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case TargetInstrInfo::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); - const char *AsmStr = MI.getOperand(0).getSymbolName(); - const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo(); - FinalSize += AI->getInlineAsmLength(AsmStr); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), + *MF->getTarget().getMCAsmInfo()); break; } case TargetInstrInfo::DBG_LABEL: case TargetInstrInfo::EH_LABEL: break; case TargetInstrInfo::IMPLICIT_DEF: - case TargetInstrInfo::DECLARE: + case TargetInstrInfo::KILL: case X86::DWARF_LOC: case X86::FP_REG_KILL: break; @@ -3038,7 +3081,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } else if (MO.isImm()) { FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); } else { - assert(0 && "Unknown RawFrm operand!"); + llvm_unreachable("Unknown RawFrm operand!"); } } break; @@ -3196,10 +3239,10 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } if (!Desc->isVariadic() && CurOp != NumOps) { - cerr << "Cannot determine size: "; - MI.dump(); - cerr << '\n'; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot determine size: " << MI; + llvm_report_error(Msg.str()); } @@ -3209,7 +3252,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { const TargetInstrDesc &Desc = MI->getDesc(); - bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); + bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); if (Desc.getOpcode() == X86::MOVPC32r) @@ -3245,12 +3288,11 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // If we're using vanilla 'GOT' PIC style, we should use relative addressing // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. - if (TM.getRelocationModel() == Reloc::PIC_ && - TM.getSubtarget().isPICStyleGOT()) { + if (TM.getSubtarget().isPICStyleGOT()) { GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0, + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", X86II::MO_GOT_ABSOLUTE_ADDRESS); } else { GlobalBaseReg = PC; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 83f01945ea21b..2237c8be517af 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -69,35 +69,36 @@ namespace X86 { /// instruction info tracks. /// namespace X86II { - enum { + /// Target Operand Flag enum. + enum TOF { //===------------------------------------------------------------------===// // X86 Specific MachineOperand flags. - MO_NO_FLAG = 0, + MO_NO_FLAG, /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a /// relocation of: /// SYMBOL_LABEL + [. - PICBASELABEL] - MO_GOT_ABSOLUTE_ADDRESS = 1, + MO_GOT_ABSOLUTE_ADDRESS, /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the /// immediate should get the value of the symbol minus the PIC base label: /// SYMBOL_LABEL - PICBASELABEL - MO_PIC_BASE_OFFSET = 2, + MO_PIC_BASE_OFFSET, /// MO_GOT - On a symbol operand this indicates that the immediate is the /// offset to the GOT entry for the symbol name from the base of the GOT. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOT - MO_GOT = 3, + MO_GOT, /// MO_GOTOFF - On a symbol operand this indicates that the immediate is /// the offset to the location of the symbol name from the base of the GOT. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTOFF - MO_GOTOFF = 4, + MO_GOTOFF, /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is /// offset to the GOT entry for the symbol name from the current code @@ -105,50 +106,115 @@ namespace X86II { /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTPCREL - MO_GOTPCREL = 5, + MO_GOTPCREL, /// MO_PLT - On a symbol operand this indicates that the immediate is /// offset to the PLT entry of symbol name from the current code location. /// /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @PLT - MO_PLT = 6, + MO_PLT, /// MO_TLSGD - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSGD - MO_TLSGD = 7, + MO_TLSGD, /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @GOTTPOFF - MO_GOTTPOFF = 8, + MO_GOTTPOFF, /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @INDNTPOFF - MO_INDNTPOFF = 9, + MO_INDNTPOFF, /// MO_TPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TPOFF - MO_TPOFF = 10, + MO_TPOFF, /// MO_NTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @NTPOFF - MO_NTPOFF = 11, + MO_NTPOFF, + + /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "__imp_FOO" symbol. This is used for + /// dllimport linkage on windows. + MO_DLLIMPORT, + + /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$stub" symbol. This is used for calls + /// and jumps to external functions on Tiger and before. + MO_DARWIN_STUB, + /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the + /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a + /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. + MO_DARWIN_NONLAZY, + + /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates + /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is + /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. + MO_DARWIN_NONLAZY_PIC_BASE, + + /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this + /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", + /// which is a PIC-base-relative reference to a hidden dyld lazy pointer + /// stub. + MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE + }; +} + +/// isGlobalStubReference - Return true if the specified TargetFlag operand is +/// a reference to a stub for a global, not the global itself. +inline static bool isGlobalStubReference(unsigned char TargetFlag) { + switch (TargetFlag) { + case X86II::MO_DLLIMPORT: // dllimport stub. + case X86II::MO_GOTPCREL: // rip-relative GOT reference. + case X86II::MO_GOT: // normal GOT reference. + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref. + case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref. + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref. + return true; + default: + return false; + } +} + +/// isGlobalRelativeToPICBase - Return true if the specified global value +/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg). If this +/// is true, the addressing mode has the PIC base register added in (e.g. EBX). +inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { + switch (TargetFlag) { + case X86II::MO_GOTOFF: // isPICStyleGOT: local global. + case X86II::MO_GOT: // isPICStyleGOT: other global. + case X86II::MO_PIC_BASE_OFFSET: // Darwin local global. + case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global. + case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global. + return true; + default: + return false; + } +} + +/// X86II - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace X86II { + enum { //===------------------------------------------------------------------===// // Instruction encodings. These are the standard/most common forms for X86 // instructions. @@ -249,6 +315,9 @@ namespace X86II { // T8, TA - Prefix after the 0x0F prefix. T8 = 13 << Op0Shift, TA = 14 << Op0Shift, + + // TF - Prefix before and after 0x0F + TF = 15 << Op0Shift, //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. @@ -355,10 +424,10 @@ class X86InstrInfo : public TargetInstrInfoImpl { /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, /// RegOp2MemOpTable2 - Load / store folding opcode maps. /// - DenseMap RegOp2MemOpTable2Addr; - DenseMap RegOp2MemOpTable0; - DenseMap RegOp2MemOpTable1; - DenseMap RegOp2MemOpTable2; + DenseMap > RegOp2MemOpTable2Addr; + DenseMap > RegOp2MemOpTable0; + DenseMap > RegOp2MemOpTable1; + DenseMap > RegOp2MemOpTable2; /// MemOp2RegOpTable - Load / store unfolding opcode map. /// @@ -382,11 +451,11 @@ public: unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - bool isReallyTriviallyReMaterializable(const MachineInstr *MI) const; + bool isReallyTriviallyReMaterializable(const MachineInstr *MI, + AliasAnalysis *AA) const; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, const MachineInstr *Orig) const; - - bool isInvariantLoad(const MachineInstr *MI) const; + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -430,6 +499,8 @@ public: virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, SmallVectorImpl &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const; virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -440,6 +511,8 @@ public: virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, SmallVectorImpl &Addr, const TargetRegisterClass *RC, + MachineInstr::mmo_iterator MMOBegin, + MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const; virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -530,9 +603,10 @@ public: private: MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - unsigned OpNum, - const SmallVectorImpl &MOs) const; + MachineInstr* MI, + unsigned OpNum, + const SmallVectorImpl &MOs, + unsigned Size, unsigned Alignment) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 03df10db61c54..30b57d85d012a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -56,6 +56,10 @@ def SDT_X86CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; +def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, + SDTCisVT<1, iPTR>, + SDTCisVT<2, iPTR>]>; + def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>; def SDTX86RdTsc : SDTypeProfile<0, 0, []>; @@ -114,6 +118,11 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInFlag]>; +def X86vastart_save_xmm_regs : + SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", + SDT_X86VASTART_SAVE_XMM_REGS, + [SDNPHasChain]>; + def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, [SDNPHasChain, SDNPOutFlag]>; @@ -124,9 +133,6 @@ def X86callseq_end : def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; -def X86tailcall: SDNode<"X86ISD::TAILCALL", SDT_X86Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; - def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>; def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, @@ -156,6 +162,9 @@ def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>; def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>; def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>; +def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags>; +def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags>; +def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -167,57 +176,80 @@ def i32imm_pcrel : Operand { let PrintMethod = "print_pcrel_imm"; } +// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for +// the index operand of an address, to conform to x86 encoding restrictions. +def ptr_rc_nosp : PointerLikeRegClass<1>; // *mem - Operand definitions for the funky X86 addressing mode operands. // +def X86MemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let SuperClass = ?; +} class X86MemOperand : Operand { let PrintMethod = printMethod; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; } +def opaque32mem : X86MemOperand<"printopaquemem">; +def opaque48mem : X86MemOperand<"printopaquemem">; +def opaque80mem : X86MemOperand<"printopaquemem">; + def i8mem : X86MemOperand<"printi8mem">; def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; def i64mem : X86MemOperand<"printi64mem">; def i128mem : X86MemOperand<"printi128mem">; -def i256mem : X86MemOperand<"printi256mem">; +//def i256mem : X86MemOperand<"printi256mem">; def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; -def f256mem : X86MemOperand<"printf256mem">; +//def f256mem : X86MemOperand<"printf256mem">; // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. def i8mem_NOREX : Operand { let PrintMethod = "printi8mem"; - let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm); + let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; } def lea32mem : Operand { let PrintMethod = "printlea32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm); + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); + let ParserMatchClass = X86MemAsmOperand; } def SSECC : Operand { let PrintMethod = "printSSECC"; } -def piclabel: Operand { - let PrintMethod = "printPICLabel"; +def ImmSExt8AsmOperand : AsmOperandClass { + let Name = "ImmSExt8"; + let SuperClass = ImmAsmOperand; } // A couple of more descriptive operand definitions. // 16-bits but only 8 bits are significant. -def i16i8imm : Operand; +def i16i8imm : Operand { + let ParserMatchClass = ImmSExt8AsmOperand; +} // 32-bits but only 8 bits are significant. -def i32i8imm : Operand; +def i32i8imm : Operand { + let ParserMatchClass = ImmSExt8AsmOperand; +} // Branch targets have OtherVT type and print as pc-relative values. def brtarget : Operand { let PrintMethod = "print_pcrel_imm"; } +def brtarget8 : Operand { + let PrintMethod = "print_pcrel_imm"; +} + //===----------------------------------------------------------------------===// // X86 Complex Pattern Definitions. // @@ -225,7 +257,8 @@ def brtarget : Operand { // Define X86 specific addressing mode. def addr : ComplexPattern; def lea32addr : ComplexPattern; + [add, sub, mul, X86mul_imm, shl, or, frameindex], + []>; def tls32addr : ComplexPattern; @@ -246,8 +279,14 @@ def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def In32BitMode : Predicate<"!Subtarget->is64Bit()">; def In64BitMode : Predicate<"Subtarget->is64Bit()">; +def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; +def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; -def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">; +def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; +def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&" + "TM.getCodeModel() != CodeModel::Kernel">; +def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||" + "TM.getCodeModel() == CodeModel::Kernel">; def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; @@ -484,15 +523,35 @@ def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), Requires<[In32BitMode]>; } +// x86-64 va_start lowering magic. +let usesCustomDAGSchedInserter = 1 in +def VASTART_SAVE_XMM_REGS : I<0, Pseudo, + (outs), + (ins GR8:$al, + i64imm:$regsavefi, i64imm:$offset, + variable_ops), + "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", + [(X86vastart_save_xmm_regs GR8:$al, + imm:$regsavefi, + imm:$offset)]>; + // Nop -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1 in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>; + def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero), + "nopl\t$zero", []>, TB; +} -// PIC base +// Trap +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>; +def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; + +// PIC base construction. This expands to code that looks like this: +// call $next_inst +// popl %destreg" let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in - def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label), - "call\t$label\n\t" - "pop{l}\t$reg", []>; + def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), + "", []>; //===----------------------------------------------------------------------===// // Control Flow Instructions... @@ -506,7 +565,11 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, [(X86retflag 0)]>; def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", - [(X86retflag imm:$amt)]>; + [(X86retflag timm:$amt)]>; + def LRET : I <0xCB, RawFrm, (outs), (ins), + "lret", []>; + def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "lret\t$amt", []>; } // All branches are RawFrm, Void, Branch, and Terminators @@ -514,8 +577,10 @@ let isBranch = 1, isTerminator = 1 in class IBr opcode, dag ins, string asm, list pattern> : I; -let isBranch = 1, isBarrier = 1 in +let isBranch = 1, isBarrier = 1 in { def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; + def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>; +} // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { @@ -523,10 +588,42 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { [(brind GR32:$dst)]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", [(brind (loadi32 addr:$dst))]>; + + def FARJMP16i : Iseg16<0xEA, RawFrm, (outs), + (ins i16imm:$seg, i16imm:$off), + "ljmp{w}\t$seg, $off", []>, OpSize; + def FARJMP32i : Iseg32<0xEA, RawFrm, (outs), + (ins i16imm:$seg, i32imm:$off), + "ljmp{l}\t$seg, $off", []>; + + def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), + "ljmp{w}\t{*}$dst", []>, OpSize; + def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), + "ljmp{l}\t{*}$dst", []>; } // Conditional branches let Uses = [EFLAGS] in { +// Short conditional jumps +def JO8 : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>; +def JNO8 : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>; +def JB8 : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>; +def JAE8 : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>; +def JE8 : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>; +def JNE8 : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>; +def JBE8 : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>; +def JA8 : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>; +def JS8 : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>; +def JNS8 : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>; +def JP8 : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>; +def JNP8 : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>; +def JL8 : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>; +def JGE8 : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>; +def JLE8 : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>; +def JG8 : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>; + +def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>; + def JE : IBr<0x84, (ins brtarget:$dst), "je\t$dst", [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB; def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst", @@ -563,6 +660,12 @@ def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst", [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB; } // Uses = [EFLAGS] +// Loop instructions + +def LOOP : I<0xE2, RawFrm, (ins brtarget8:$dst), (outs), "loop\t$dst", []>; +def LOOPE : I<0xE1, RawFrm, (ins brtarget8:$dst), (outs), "loope\t$dst", []>; +def LOOPNE : I<0xE0, RawFrm, (ins brtarget8:$dst), (outs), "loopne\t$dst", []>; + //===----------------------------------------------------------------------===// // Call Instructions... // @@ -583,13 +686,26 @@ let isCall = 1 in "call\t{*}$dst", [(X86call GR32:$dst)]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops), "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>; + + def FARCALL16i : Iseg16<0x9A, RawFrm, (outs), + (ins i16imm:$seg, i16imm:$off), + "lcall{w}\t$seg, $off", []>, OpSize; + def FARCALL32i : Iseg32<0x9A, RawFrm, (outs), + (ins i16imm:$seg, i32imm:$off), + "lcall{l}\t$seg, $off", []>; + + def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), + "lcall{w}\t{*}$dst", []>, OpSize; + def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), + "lcall{l}\t{*}$dst", []>; } -// Tail call stuff. +// Constructing a stack frame. + +def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl), + "enter\t$len, $lvl", []>; -def TAILCALL : I<0, Pseudo, (outs), (ins), - "#TAILCALL", - []>; +// Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in def TCRETURNdi : I<0, Pseudo, (outs), (ins i32imm:$dst, i32imm:$offset, variable_ops), @@ -620,11 +736,29 @@ def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", []>; let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { -let mayLoad = 1 in -def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; +let mayLoad = 1 in { +def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, + OpSize; +def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; +def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, + OpSize; +def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>, + OpSize; +def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>; +def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>; +} -let mayStore = 1 in +let mayStore = 1 in { +def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>, + OpSize; def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; +def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>, + OpSize; +def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>, + OpSize; +def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>; +def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>; +} } let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in { @@ -710,6 +844,14 @@ let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)]>, REP; +def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>; +def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize; +def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>; + +def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>; +def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize; +def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>; + let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB; @@ -718,6 +860,18 @@ let isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } +def SYSCALL : I<0x05, RawFrm, + (outs), (ins), "syscall", []>, TB; +def SYSRET : I<0x07, RawFrm, + (outs), (ins), "sysret", []>, TB; +def SYSENTER : I<0x34, RawFrm, + (outs), (ins), "sysenter", []>, TB; +def SYSEXIT : I<0x35, RawFrm, + (outs), (ins), "sysexit", []>, TB; + +def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>; + + //===----------------------------------------------------------------------===// // Input/Output Instructions... // @@ -793,6 +947,30 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; +def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins i8imm:$src), + "mov{b}\t{$src, %al|%al, $src}", []>; +def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins i16imm:$src), + "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; +def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins i32imm:$src), + "mov{l}\t{$src, %eax|%eax, $src}", []>; + +def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs i8imm:$dst), (ins), + "mov{b}\t{%al, $dst|$dst, %al}", []>; +def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs i16imm:$dst), (ins), + "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; +def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs i32imm:$dst), (ins), + "mov{l}\t{%eax, $dst|$dst, %eax}", []>; + +// Moves to and from segment registers +def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; +def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src), + "mov{w}\t{$src, $dst|$dst, $src}", []>; + let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", @@ -950,6 +1128,20 @@ let isTwoAddress = 1 in { // Conditional moves let Uses = [EFLAGS] in { + +// X86 doesn't have 8-bit conditional moves. Use a customDAGSchedInserter to +// emit control flow. An alternative to this is to mark i8 SELECT as Promote, +// however that requires promoting the operands, and can induce additional +// i8 register pressure. Note that CMOV_GR8 is conservatively considered to +// clobber EFLAGS, because if one of the operands is zero, the expansion +// could involve an xor. +let usesCustomDAGSchedInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in +def CMOV_GR8 : I<0, Pseudo, + (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), + "#CMOV_GR8 PSEUDO!", + [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2, + imm:$cond, EFLAGS))]>; + let isCommutable = 1 in { def CMOVB16rr : I<0x42, MRMSrcReg, // if ; + + def AND8i8 : Ii8<0x24, RawFrm, (outs), (ins i8imm:$src), + "and{b}\t{$src, %al|%al, $src}", []>; + def AND16i16 : Ii16<0x25, RawFrm, (outs), (ins i16imm:$src), + "and{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src), + "and{l}\t{$src, %eax|%eax, $src}", []>; + } @@ -1635,6 +1835,13 @@ let isTwoAddress = 0 in { "or{l}\t{$src, $dst|$dst, $src}", [(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst), (implicit EFLAGS)]>; + + def OR8i8 : Ii8 <0x0C, RawFrm, (outs), (ins i8imm:$src), + "or{b}\t{$src, %al|%al, $src}", []>; + def OR16i16 : Ii16 <0x0D, RawFrm, (outs), (ins i16imm:$src), + "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src), + "or{l}\t{$src, %eax|%eax, $src}", []>; } // isTwoAddress = 0 @@ -1744,6 +1951,13 @@ let isTwoAddress = 0 in { "xor{l}\t{$src, $dst|$dst, $src}", [(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst), (implicit EFLAGS)]>; + + def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src), + "xor{b}\t{$src, %al|%al, $src}", []>; + def XOR16i16 : Ii16 <0x35, RawFrm, (outs), (ins i16imm:$src), + "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def XOR32i32 : Ii32 <0x35, RawFrm, (outs), (ins i32imm:$src), + "xor{l}\t{$src, %eax|%eax, $src}", []>; } // isTwoAddress = 0 } // Defs = [EFLAGS] @@ -1771,8 +1985,17 @@ def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2), "shl{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>; -// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is -// cheaper. + +// NOTE: We don't include patterns for shifts of a register by one, because +// 'add reg,reg' is cheaper. + +def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1), + "shl{b}\t$dst", []>; +def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1), + "shl{w}\t$dst", []>, OpSize; +def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), + "shl{l}\t$dst", []>; + } // isConvertibleToThreeAddress = 1 let isTwoAddress = 0 in { @@ -1951,6 +2174,97 @@ let isTwoAddress = 0 in { } // Rotate instructions + +def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), + "rcl{b}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src), + "rcl{b}\t{%cl, $dst|$dst, CL}", []>; +} +def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), + "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), + "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; +let Uses = [CL] in { +def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src), + "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +} +def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt), + "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + +def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), + "rcl{l}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src), + "rcl{l}\t{%cl, $dst|$dst, CL}", []>; +} +def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt), + "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), + "rcr{b}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src), + "rcr{b}\t{%cl, $dst|$dst, CL}", []>; +} +def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt), + "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; + +def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), + "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; +let Uses = [CL] in { +def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src), + "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; +} +def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; +def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src, i8imm:$cnt), + "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; + +def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), + "rcr{l}\t{1, $dst|$dst, 1}", []>; +let Uses = [CL] in { +def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src), + "rcr{l}\t{%cl, $dst|$dst, CL}", []>; +} +def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; +def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src, i8imm:$cnt), + "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; + // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), @@ -2228,6 +2542,15 @@ def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), "add{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))), (implicit EFLAGS)]>; + +// Register-Register Addition - Equivalent to the normal rr forms (ADD8rr, +// ADD16rr, and ADD32rr), but differently encoded. +def ADD8mrmrr: I<0x02, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "add{b}\t{$src2, $dst|$dst, $src2}", []>; +def ADD16mrmrr: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2), + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize; +def ADD32mrmrr: I<0x03, MRMSrcReg,(outs GR16:$dst),(ins GR16:$src1, GR16:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", []>; // Register-Integer Addition def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), @@ -2295,6 +2618,14 @@ let isTwoAddress = 0 in { [(store (add (load addr:$dst), i32immSExt8:$src2), addr:$dst), (implicit EFLAGS)]>; + + // addition to rAX + def ADD8i8 : Ii8<0x04, RawFrm, (outs), (ins i8imm:$src), + "add{b}\t{$src, %al|%al, $src}", []>; + def ADD16i16 : Ii16<0x05, RawFrm, (outs), (ins i16imm:$src), + "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src), + "add{l}\t{$src, %eax|%eax, $src}", []>; } let Uses = [EFLAGS] in { @@ -2373,6 +2704,13 @@ let isTwoAddress = 0 in { def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2), "adc{l}\t{$src2, $dst|$dst, $src2}", [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; + + def ADC8i8 : Ii8<0x14, RawFrm, (outs), (ins i8imm:$src), + "adc{b}\t{$src, %al|%al, $src}", []>; + def ADC16i16 : Ii16<0x15, RawFrm, (outs), (ins i16imm:$src), + "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src), + "adc{l}\t{$src, %eax|%eax, $src}", []>; } } // Uses = [EFLAGS] @@ -2472,6 +2810,13 @@ let isTwoAddress = 0 in { [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst), (implicit EFLAGS)]>; + + def SUB8i8 : Ii8<0x2C, RawFrm, (outs), (ins i8imm:$src), + "sub{b}\t{$src, %al|%al, $src}", []>; + def SUB16i16 : Ii16<0x2D, RawFrm, (outs), (ins i16imm:$src), + "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src), + "sub{l}\t{$src, %eax|%eax, $src}", []>; } let Uses = [EFLAGS] in { @@ -2516,6 +2861,13 @@ let isTwoAddress = 0 in { def SBB32mi8 : Ii8<0x83, MRM3m, (outs), (ins i32mem:$dst, i32i8imm :$src2), "sbb{l}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; + + def SBB8i8 : Ii8<0x1C, RawFrm, (outs), (ins i8imm:$src), + "sbb{b}\t{$src, %al|%al, $src}", []>; + def SBB16i16 : Ii16<0x1D, RawFrm, (outs), (ins i16imm:$src), + "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize; + def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src), + "sbb{l}\t{$src, %eax|%eax, $src}", []>; } def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", @@ -2647,6 +2999,13 @@ def TEST32rr : I<0x85, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), (implicit EFLAGS)]>; } +def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src), + "test{b}\t{$src, %al|%al, $src}", []>; +def TEST16i16 : Ii16<0xA9, RawFrm, (outs), (ins i16imm:$src), + "test{w}\t{$src, %ax|%ax, $src}", []>, OpSize; +def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src), + "test{l}\t{$src, %eax|%eax, $src}", []>; + def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), "test{b}\t{$src2, $src1|$src1, $src2}", [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0), @@ -2878,6 +3237,13 @@ def SETNOm : I<0x91, MRM0m, // Integer comparisons let Defs = [EFLAGS] in { +def CMP8i8 : Ii8<0x3C, RawFrm, (outs), (ins i8imm:$src), + "cmp{b}\t{$src, %al|%al, $src}", []>; +def CMP16i16 : Ii16<0x3D, RawFrm, (outs), (ins i16imm:$src), + "cmp{w}\t{$src, %ax|%ax, $src}", []>, OpSize; +def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src), + "cmp{l}\t{$src, %eax|%eax, $src}", []>; + def CMP8rr : I<0x38, MRMDestReg, (outs), (ins GR8 :$src1, GR8 :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", @@ -2920,6 +3286,12 @@ def CMP32rm : I<0x3B, MRMSrcMem, "cmp{l}\t{$src2, $src1|$src1, $src2}", [(X86cmp GR32:$src1, (loadi32 addr:$src2)), (implicit EFLAGS)]>; +def CMP8mrmrr : I<0x3A, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2), + "cmp{b}\t{$src2, $src1|$src1, $src2}", []>; +def CMP16mrmrr : I<0x3B, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), + "cmp{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize; +def CMP32mrmrr : I<0x3B, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), + "cmp{l}\t{$src2, $src1|$src1, $src2}", []>; def CMP8ri : Ii8<0x80, MRM7r, (outs), (ins GR8:$src1, i8imm:$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", @@ -3095,7 +3467,8 @@ let neverHasSideEffects = 1 in { // Alias instructions that map movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. -let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in { +let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, + isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "xor{b}\t$dst, $dst", [(set GR8:$dst, 0)]>; @@ -3127,12 +3500,12 @@ def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym), [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movl\t%gs:$src, $dst", [(set GR32:$dst, (gsload addr:$src))]>, SegGS; -let AddedComplexity = 5 in +let AddedComplexity = 5, isCodeGenOnly = 1 in def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movl\t%fs:$src, $dst", [(set GR32:$dst, (fsload addr:$src))]>, SegFS; @@ -3143,7 +3516,7 @@ def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def DWARF_LOC : I<0, Pseudo, (outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), - ".loc\t${file:debug} ${line:debug} ${col:debug}", + ".loc\t$file $line $col", [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; @@ -3151,7 +3524,7 @@ def DWARF_LOC : I<0, Pseudo, (outs), // EH Pseudo Instructions // let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1 in { + hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", [(X86ehret GR32:$addr)]>; @@ -3223,6 +3596,78 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), TB, LOCK; } +// Optimized codegen when the non-memory output is not used. +// FIXME: Use normal add / sub instructions and add lock prefix dynamically. +def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2), + "lock\n\t" + "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), + "lock\n\t" + "inc{b}\t$dst", []>, LOCK; +def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), + "lock\n\t" + "inc{w}\t$dst", []>, OpSize, LOCK; +def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), + "lock\n\t" + "inc{l}\t$dst", []>, LOCK; + +def LOCK_SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), + "lock\n\t" + "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; +def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), + "lock\n\t" + "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK; +def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), + "lock\n\t" + "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK; + +def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), + "lock\n\t" + "dec{b}\t$dst", []>, LOCK; +def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), + "lock\n\t" + "dec{w}\t$dst", []>, OpSize, LOCK; +def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), + "lock\n\t" + "dec{l}\t$dst", []>, LOCK; + // Atomic exchange, and, or, xor let Constraints = "$val = $dst", Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { @@ -3318,6 +3763,25 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), "#ATOMSWAP6432 PSEUDO!", []>; } +// Segmentation support instructions. + +def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), + "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; +def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), + "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; + +// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo. +def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), + "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; +def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), + "lar{l}\t{$src, $dst|$dst, $src}", []>, TB; + +// String manipulation instructions + +def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>; +def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize; +def LODSD : I<0xAD, RawFrm, (outs), (ins), "lodsd", []>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -3345,14 +3809,6 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst), // Calls // tailcall stuff -def : Pat<(X86tailcall GR32:$dst), - (TAILCALL)>; - -def : Pat<(X86tailcall (i32 tglobaladdr:$dst)), - (TAILCALL)>; -def : Pat<(X86tailcall (i32 texternalsym:$dst)), - (TAILCALL)>; - def : Pat<(X86tcret GR32:$dst, imm:$off), (TCRETURNri GR32:$dst, imm:$off)>; @@ -3362,6 +3818,7 @@ def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>; +// Normal calls, with various flavors of addresses. def : Pat<(X86call (i32 tglobaladdr:$dst)), (CALLpcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i32 texternalsym:$dst)), @@ -3472,21 +3929,17 @@ def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; // extload bool -> extload byte def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; -def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>, - Requires<[In32BitMode]>; +def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; -def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>, - Requires<[In32BitMode]>; +def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; -// anyext -def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>, - Requires<[In32BitMode]>; -def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>, - Requires<[In32BitMode]>; -def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; +def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; +def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; // (and (i32 load), 255) -> (zextload i8) def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))), @@ -3567,6 +4020,10 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In32BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit_hi))>, @@ -3961,6 +4418,243 @@ def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), (implicit EFLAGS)), (DEC32m addr:$dst)>, Requires<[In32BitMode]>; +// Register-Register Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2), + (implicit EFLAGS)), + (OR8rr GR8:$src1, GR8:$src2)>; +def : Pat<(parallel (X86or_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (OR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86or_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (OR32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)), + (OR8rm GR8:$src1, addr:$src2)>; +def : Pat<(parallel (X86or_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (OR16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86or_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (OR32rm GR32:$src1, addr:$src2)>; + +// Register-Integer Or with EFLAGS result +def : Pat<(parallel (X86or_flag GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (OR8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (X86or_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (OR16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86or_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (OR32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86or_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (OR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Register Or with EFLAGS result +def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), GR8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR8mr addr:$dst, GR8:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), GR16:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR16mr addr:$dst, GR16:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), GR32:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR32mr addr:$dst, GR32:$src2)>; + +// Memory-Integer Or with EFLAGS result +def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR8mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR16mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR32mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR16mi8 addr:$dst, i16immSExt8:$src2)>; +def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (OR32mi8 addr:$dst, i32immSExt8:$src2)>; + +// Register-Register XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2), + (implicit EFLAGS)), + (XOR8rr GR8:$src1, GR8:$src2)>; +def : Pat<(parallel (X86xor_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (XOR16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86xor_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (XOR32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)), + (XOR8rm GR8:$src1, addr:$src2)>; +def : Pat<(parallel (X86xor_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (XOR16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86xor_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (XOR32rm GR32:$src1, addr:$src2)>; + +// Register-Integer XOr with EFLAGS result +def : Pat<(parallel (X86xor_flag GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (XOR8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (X86xor_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (XOR16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86xor_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (XOR32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86xor_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Register XOr with EFLAGS result +def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), GR8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR8mr addr:$dst, GR8:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), GR16:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR16mr addr:$dst, GR16:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), GR32:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR32mr addr:$dst, GR32:$src2)>; + +// Memory-Integer XOr with EFLAGS result +def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR8mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR16mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR32mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR16mi8 addr:$dst, i16immSExt8:$src2)>; +def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (XOR32mi8 addr:$dst, i32immSExt8:$src2)>; + +// Register-Register And with EFLAGS result +def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2), + (implicit EFLAGS)), + (AND8rr GR8:$src1, GR8:$src2)>; +def : Pat<(parallel (X86and_flag GR16:$src1, GR16:$src2), + (implicit EFLAGS)), + (AND16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (X86and_flag GR32:$src1, GR32:$src2), + (implicit EFLAGS)), + (AND32rr GR32:$src1, GR32:$src2)>; + +// Register-Memory And with EFLAGS result +def : Pat<(parallel (X86and_flag GR8:$src1, (loadi8 addr:$src2)), + (implicit EFLAGS)), + (AND8rm GR8:$src1, addr:$src2)>; +def : Pat<(parallel (X86and_flag GR16:$src1, (loadi16 addr:$src2)), + (implicit EFLAGS)), + (AND16rm GR16:$src1, addr:$src2)>; +def : Pat<(parallel (X86and_flag GR32:$src1, (loadi32 addr:$src2)), + (implicit EFLAGS)), + (AND32rm GR32:$src1, addr:$src2)>; + +// Register-Integer And with EFLAGS result +def : Pat<(parallel (X86and_flag GR8:$src1, imm:$src2), + (implicit EFLAGS)), + (AND8ri GR8:$src1, imm:$src2)>; +def : Pat<(parallel (X86and_flag GR16:$src1, imm:$src2), + (implicit EFLAGS)), + (AND16ri GR16:$src1, imm:$src2)>; +def : Pat<(parallel (X86and_flag GR32:$src1, imm:$src2), + (implicit EFLAGS)), + (AND32ri GR32:$src1, imm:$src2)>; +def : Pat<(parallel (X86and_flag GR16:$src1, i16immSExt8:$src2), + (implicit EFLAGS)), + (AND16ri8 GR16:$src1, i16immSExt8:$src2)>; +def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2), + (implicit EFLAGS)), + (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; + +// Memory-Register And with EFLAGS result +def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), GR8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND8mr addr:$dst, GR8:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), GR16:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND16mr addr:$dst, GR16:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), GR32:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND32mr addr:$dst, GR32:$src2)>; + +// Memory-Integer And with EFLAGS result +def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND8mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND16mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), imm:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND32mi addr:$dst, imm:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), i16immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND16mi8 addr:$dst, i16immSExt8:$src2)>; +def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), i32immSExt8:$src2), + addr:$dst), + (implicit EFLAGS)), + (AND32mi8 addr:$dst, i32immSExt8:$src2)>; + +// -disable-16bit support. +def : Pat<(truncstorei16 (i32 imm:$src), addr:$dst), + (MOV16mi addr:$dst, imm:$src)>; +def : Pat<(truncstorei16 GR32:$src, addr:$dst), + (MOV16mr addr:$dst, (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; +def : Pat<(i32 (sextloadi16 addr:$dst)), + (MOVSX32rm16 addr:$dst)>; +def : Pat<(i32 (zextloadi16 addr:$dst)), + (MOVZX32rm16 addr:$dst)>; +def : Pat<(i32 (extloadi16 addr:$dst)), + (MOVZX32rm16 addr:$dst)>; + //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index b79a00643324a..ce76b4e8b11e9 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -1,10 +1,10 @@ //====- X86InstrMMX.td - Describe the X86 Instruction Set --*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 MMX instruction set, defining the instructions, @@ -67,16 +67,18 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), // MMX Multiclasses //===----------------------------------------------------------------------===// -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // MMXI_binop_rm - Simple MMX binary operator. multiclass MMXI_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : MMXI { let isCommutable = Commutable; } - def rm : MMXI opc, string OpcodeStr, Intrinsic IntId, bit Commutable = 0> { - def rr : MMXI { let isCommutable = Commutable; } - def rm : MMXI; @@ -139,8 +143,10 @@ let isTwoAddress = 1 in { // MMX EMMS & FEMMS Instructions //===----------------------------------------------------------------------===// -def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", [(int_x86_mmx_emms)]>; -def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; +def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms", + [(int_x86_mmx_emms)]>; +def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", + [(int_x86_mmx_femms)]>; //===----------------------------------------------------------------------===// // MMX Scalar Instructions @@ -149,12 +155,14 @@ def MMX_FEMMS : MMXI<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)] // Data Transfer Instructions def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (v2i32 (scalar_to_vector GR32:$src)))]>; + [(set VR64:$dst, + (v2i32 (scalar_to_vector GR32:$src)))]>; let canFoldAsLoad = 1, isReMaterializable = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; -let mayStore = 1 in + [(set VR64:$dst, + (v2i32 (scalar_to_vector (loadi32 addr:$src))))]>; +let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; @@ -164,9 +172,16 @@ def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), []>; let neverHasSideEffects = 1 in -def MMX_MOVD64from64rr : MMXRI<0x7E, MRMSrcReg, +// These are 64 bit moves, but since the OS X assembler doesn't +// recognize a register-register movq, we write them as +// movd. +def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR64:$dst, + (v1i64 (scalar_to_vector GR64:$src)))]>; let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), @@ -179,21 +194,21 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (v1i64 VR64:$src), addr:$dst)]>; -def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMDestMem, (outs VR64:$dst), (ins VR128:$src), +def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (v1i64 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))))]>; -def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMDestMem, (outs VR128:$dst), (ins VR64:$src), +def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (movl immAllZerosV, (v2i64 (scalar_to_vector (i64 (bitconvert VR64:$src))))))]>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMDestMem, (outs FR64:$dst), (ins VR64:$src), +def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), @@ -207,7 +222,8 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), [(set VR64:$dst, (v2i32 (X86vzmovl (v2i32 (scalar_to_vector GR32:$src)))))]>; let AddedComplexity = 20 in -def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), +def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), + (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (v2i32 (X86vzmovl (v2i32 @@ -265,7 +281,7 @@ defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>; defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>; defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "pandn\t{$src2, $dst|$dst, $src2}", @@ -316,33 +332,33 @@ defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>; // Conversion Instructions // -- Unpack Instructions -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // Unpack High Packed Data Instructions - def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, + def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v8i8 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, + def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v8i8 (mmx_unpckh VR64:$src1, (bc_v8i8 (load_mmx addr:$src2)))))]>; - def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, + def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v4i16 (mmx_unpckh VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, + def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v4i16 (mmx_unpckh VR64:$src1, (bc_v4i16 (load_mmx addr:$src2)))))]>; - def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, + def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, @@ -379,12 +395,12 @@ let isTwoAddress = 1 in { (v4i16 (mmx_unpckl VR64:$src1, (bc_v4i16 (load_mmx addr:$src2)))))]>; - def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, + def MMX_PUNPCKLDQrr : MMXI<0x62, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v2i32 (mmx_unpckl VR64:$src1, VR64:$src2)))]>; - def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, + def MMX_PUNPCKLDQrm : MMXI<0x62, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR64:$dst, @@ -415,19 +431,22 @@ let neverHasSideEffects = 1 in { def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in -def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), +def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst), + (ins f128mem:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in -def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), +def MMX_CVTPI2PDrm : MMX2I<0x2A, MRMSrcMem, (outs VR128:$dst), + (ins i64mem:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in -def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), +def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst), + (ins i64mem:$src), "cvtpi2ps\t{$src, $dst|$dst, $src}", []>; def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), @@ -439,7 +458,8 @@ def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in -def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), +def MMX_CVTTPD2PIrm : MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst), + (ins f128mem:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", []>; def MMX_CVTTPS2PIrr : MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), @@ -459,14 +479,16 @@ def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg, "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1), (iPTR imm:$src2)))]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg, - (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, i16i8imm:$src3), + (outs VR64:$dst), (ins VR64:$src1, GR32:$src2, + i16i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), - GR32:$src2, (iPTR imm:$src3))))]>; + GR32:$src2,(iPTR imm:$src3))))]>; def MMX_PINSRWrmi : MMXIi8<0xC4, MRMSrcMem, - (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, i16i8imm:$src3), + (outs VR64:$dst), (ins VR64:$src1, i16mem:$src2, + i16i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (v4i16 (MMX_X86pinsrw (v4i16 VR64:$src1), @@ -494,7 +516,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), //===----------------------------------------------------------------------===// // Alias instructions that map zero vector to pxor. -let isReMaterializable = 1 in { +let isReMaterializable = 1, isCodeGenOnly = 1 in { def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (outs VR64:$dst), (ins), "pxor\t$dst, $dst", [(set VR64:$dst, (v2i32 immAllZerosV))]>; @@ -579,7 +601,7 @@ def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), let AddedComplexity = 20 in { def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; + (MMX_MOVZDI2PDIrm addr:$src)>; } // Clear top half. @@ -657,6 +679,33 @@ def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))))), (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>; +// Patterns for vector comparisons +def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, VR64:$src2)), + (MMX_PCMPEQBrr VR64:$src1, VR64:$src2)>; +def : Pat<(v8i8 (X86pcmpeqb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPEQBrm VR64:$src1, addr:$src2)>; +def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, VR64:$src2)), + (MMX_PCMPEQWrr VR64:$src1, VR64:$src2)>; +def : Pat<(v4i16 (X86pcmpeqw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPEQWrm VR64:$src1, addr:$src2)>; +def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, VR64:$src2)), + (MMX_PCMPEQDrr VR64:$src1, VR64:$src2)>; +def : Pat<(v2i32 (X86pcmpeqd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPEQDrm VR64:$src1, addr:$src2)>; + +def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, VR64:$src2)), + (MMX_PCMPGTBrr VR64:$src1, VR64:$src2)>; +def : Pat<(v8i8 (X86pcmpgtb VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPGTBrm VR64:$src1, addr:$src2)>; +def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, VR64:$src2)), + (MMX_PCMPGTWrr VR64:$src1, VR64:$src2)>; +def : Pat<(v4i16 (X86pcmpgtw VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPGTWrm VR64:$src1, addr:$src2)>; +def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, VR64:$src2)), + (MMX_PCMPGTDrr VR64:$src1, VR64:$src2)>; +def : Pat<(v2i32 (X86pcmpgtd VR64:$src1, (bitconvert (load_mmx addr:$src2)))), + (MMX_PCMPGTDrm VR64:$src1, addr:$src2)>; + // CMOV* - Used to implement the SELECT DAG operation. Expanded by the // scheduler into a branch sequence. // These are expanded by the scheduler. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5d6ef36414a5d..96fc932fc88c0 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1,10 +1,10 @@ //====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file describes the X86 SSE instruction set, defining the instructions, @@ -36,22 +36,22 @@ def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; -def X86pshufb : SDNode<"X86ISD::PSHUFB", +def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; -def X86pinsrb : SDNode<"X86ISD::PINSRB", +def X86pinsrb : SDNode<"X86ISD::PINSRB", SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86pinsrw : SDNode<"X86ISD::PINSRW", +def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86insrtps : SDNode<"X86ISD::INSERTPS", +def X86insrtps : SDNode<"X86ISD::INSERTPS", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, - SDTCisVT<2, f32>, SDTCisPtrTy<3>]>>; + SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, @@ -69,6 +69,10 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; +def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>, + SDTCisVT<1, v4f32>]>; +def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// @@ -83,11 +87,13 @@ def sse_load_f64 : ComplexPattern { let PrintMethod = "printf32mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; } def sdmem : Operand { let PrintMethod = "printf64mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm, i8imm); + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; } //===----------------------------------------------------------------------===// @@ -179,13 +185,13 @@ def SHUFFLE_get_shuf_imm : SDNodeXForm; -// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to // PSHUFHW imm. def SHUFFLE_get_pshufhw_imm : SDNodeXForm; -// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to // PSHUFLW imm. def SHUFFLE_get_pshuflw_imm : SDNodeXForm; def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), "cvtps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtps2pi + [(set VR64:$dst, (int_x86_sse_cvtps2pi (load addr:$src)))]>; def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvttps2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>; def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), "cvttps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttps2pi + [(set VR64:$dst, (int_x86_sse_cvttps2pi (load addr:$src)))]>; let Constraints = "$src1 = $dst" in { - def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, + def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR64:$src2), "cvtpi2ps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, VR64:$src2))]>; - def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, + def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, + [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, (load addr:$src2)))]>; } @@ -407,11 +413,11 @@ let Constraints = "$src1 = $dst" in { // Comparison instructions let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSSrr : SSIi8<0xC2, MRMSrcReg, + def CMPSSrr : SSIi8<0xC2, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in - def CMPSSrm : SSIi8<0xC2, MRMSrcMem, + def CMPSSrm : SSIi8<0xC2, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; } @@ -428,13 +434,15 @@ def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { - def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), + def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, + SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, - VR128:$src, imm:$cc))]>; - def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc), + VR128:$src, imm:$cc))]>; + def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src, + SSECC:$cc), "cmp${cc}ss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, (load addr:$src), imm:$cc))]>; @@ -460,18 +468,19 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), (implicit EFLAGS)]>; } // Defs = [EFLAGS] -// Aliases of packed SSE1 instructions for scalar use. These all have names that -// start with 'Fs'. +// Aliases of packed SSE1 instructions for scalar use. These all have names +// that start with 'Fs'. // Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "pxor\t$dst, $dst", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>, TB, OpSize; // Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are // disregarded. -let neverHasSideEffects = 1 in +let neverHasSideEffects = 1 in def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", []>; @@ -552,7 +561,7 @@ multiclass basic_sse1_fp_binop_rm opc, string OpcodeStr, (ins FR32:$src1, f32mem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; - + // Vector operation, reg+reg. def PSrr : PSI opc, string OpcodeStr, (ins FR32:$src1, f32mem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; - + // Vector operation, reg+reg. def PSrr : PSI; let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in @@ -708,13 +717,13 @@ let Constraints = "$src1 = $dst" in { def MOVLPSrm : PSI<0x12, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, + [(set VR128:$dst, (movlp VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; def MOVHPSrm : PSI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, + [(set VR128:$dst, (movhp VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; } // AddedComplexity @@ -789,7 +798,7 @@ multiclass sse1_fp_unop_rm opc, string OpcodeStr, def SSm : SSI; - + // Vector operation, reg. def PSr : PSI; - def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, + def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), "cmp${cc}ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, @@ -909,13 +918,13 @@ def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), // Shuffle and unpack instructions let Constraints = "$src1 = $dst" in { let isConvertibleToThreeAddress = 1 in // Convert to pshufd - def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, + def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, + def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", @@ -924,24 +933,24 @@ let Constraints = "$src1 = $dst" in { VR128:$src1, (memopv4f32 addr:$src2))))]>; let AddedComplexity = 10 in { - def UNPCKHPSrr : PSI<0x15, MRMSrcReg, + def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPSrm : PSI<0x15, MRMSrcMem, + def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (unpckh VR128:$src1, (memopv4f32 addr:$src2))))]>; - def UNPCKLPSrr : PSI<0x14, MRMSrcReg, + def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPSrm : PSI<0x14, MRMSrcMem, + def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -984,7 +993,8 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1 in def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "xorps\t$dst, $dst", [(set VR128:$dst, (v4i32 immAllZerosV))]>; @@ -1046,14 +1056,14 @@ let AddedComplexity = 20 in def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src), "movss\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector - (loadf32 addr:$src))))))]>; + (loadf32 addr:$src))))))]>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), (MOVZSS2PSrm addr:$src)>; -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSE2 Instructions -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Move Instructions let neverHasSideEffects = 1 in @@ -1077,7 +1087,7 @@ def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src), def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))]>; -def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), +def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))]>; def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src), @@ -1087,6 +1097,27 @@ def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src), "cvtsi2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>; +def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; +def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "comisd\t{$src2, $src1|$src1, $src2}", []>; +def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), + "comisd\t{$src2, $src1|$src1, $src2}", []>; + // SSE2 instructions with XS prefix def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1112,21 +1143,21 @@ def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), "cvtpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtpd2pi + [(set VR64:$dst, (int_x86_sse_cvtpd2pi (memop addr:$src)))]>; def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>; def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), "cvttpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttpd2pi + [(set VR64:$dst, (int_x86_sse_cvttpd2pi (memop addr:$src)))]>; def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>; def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtpi2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cvtpi2pd + [(set VR128:$dst, (int_x86_sse_cvtpi2pd (load addr:$src)))]>; // Aliases for intrinsics @@ -1141,11 +1172,11 @@ def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), // Comparison instructions let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSDrr : SDIi8<0xC2, MRMSrcReg, + def CMPSDrr : SDIi8<0xC2, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; let mayLoad = 1 in - def CMPSDrm : SDIi8<0xC2, MRMSrcMem, + def CMPSDrm : SDIi8<0xC2, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; } @@ -1162,13 +1193,15 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), // Aliases to match intrinsics which expect XMM operand(s). let Constraints = "$src1 = $dst" in { - def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), + def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src, + SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, VR128:$src, imm:$cc))]>; - def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc), + def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src, + SSECC:$cc), "cmp${cc}sd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, (load addr:$src), imm:$cc))]>; @@ -1194,11 +1227,12 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), (implicit EFLAGS)]>; } // Defs = [EFLAGS] -// Aliases of packed SSE2 instructions for scalar use. These all have names that -// start with 'Fs'. +// Aliases of packed SSE2 instructions for scalar use. These all have names +// that start with 'Fs'. // Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "pxor\t$dst, $dst", [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>, TB, OpSize; @@ -1286,7 +1320,7 @@ multiclass basic_sse2_fp_binop_rm opc, string OpcodeStr, (ins FR64:$src1, f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; - + // Vector operation, reg+reg. def PDrr : PDI opc, string OpcodeStr, (ins FR64:$src1, f64mem:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; - + // Vector operation, reg+reg. def PDrr : PDI; -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSE packed FP Instructions // Move Instructions @@ -1442,13 +1476,13 @@ let Constraints = "$src1 = $dst" in { def MOVLPDrm : PDI<0x12, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movlpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, + [(set VR128:$dst, (v2f64 (movlp VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))]>; def MOVHPDrm : PDI<0x16, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, + [(set VR128:$dst, (v2f64 (movhp VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))]>; } // AddedComplexity @@ -1564,7 +1598,7 @@ def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg, [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>; def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, (load addr:$src2)))]>; @@ -1612,7 +1646,7 @@ multiclass sse2_fp_unop_rm opc, string OpcodeStr, def SDm : SDI; - + // Vector operation, reg. def PDr : PDI; - def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, + def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), "cmp${cc}pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, @@ -1730,12 +1764,12 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), // Shuffle and unpack instructions let Constraints = "$src1 = $dst" in { - def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, + def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, + def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2, i8imm:$src3), "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", @@ -1744,24 +1778,24 @@ let Constraints = "$src1 = $dst" in { VR128:$src1, (memopv2f64 addr:$src2))))]>; let AddedComplexity = 10 in { - def UNPCKHPDrr : PDI<0x15, MRMSrcReg, + def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPDrm : PDI<0x15, MRMSrcMem, + def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpckhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (unpckh VR128:$src1, (memopv2f64 addr:$src2))))]>; - def UNPCKLPDrr : PDI<0x14, MRMSrcReg, + def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPDrm : PDI<0x14, MRMSrcMem, + def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), "unpcklpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -1770,7 +1804,7 @@ let Constraints = "$src1 = $dst" in { } // Constraints = "$src1 = $dst" -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSE integer instructions // Move Instructions @@ -1825,14 +1859,17 @@ multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, Intrinsic IntId2> { - def rr : PDI; - def rm : PDI; - def ri : PDIi8; + def ri : PDIi8; } @@ -1840,15 +1877,17 @@ multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, ValueType OpVT, bit Commutable = 0> { - def rr : PDI { let isCommutable = Commutable; } - def rm : PDI; + (bitconvert (memopv2i64 addr:$src2)))))]>; } /// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64. @@ -1858,14 +1897,17 @@ multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, /// multiclass PDI_binop_rm_v2i64 opc, string OpcodeStr, SDNode OpNode, bit Commutable = 0> { - def rr : PDI { let isCommutable = Commutable; } - def rm : PDI; + [(set VR128:$dst, (OpNode VR128:$src1, + (memopv2i64 addr:$src2)))]>; } } // Constraints = "$src1 = $dst" @@ -2029,8 +2071,8 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4i32 (pshufd:$src2 - (bc_v4i32(memopv2i64 addr:$src1)), - (undef))))]>; + (bc_v4i32(memopv2i64 addr:$src1)), + (undef))))]>; // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, @@ -2043,8 +2085,8 @@ def PSHUFHWmi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v8i16 (pshufhw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, + (bc_v8i16 (memopv2i64 addr:$src1)), + (undef))))]>, XS, Requires<[HasSSE2]>; // SSE2 with ImmT == Imm8 and XD prefix. @@ -2064,90 +2106,90 @@ def PSHUFLWmi : Ii8<0x70, MRMSrcMem, let Constraints = "$src1 = $dst" in { - def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, + def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, + def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckl VR128:$src1, (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, + def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, + def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, + def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, + def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckldq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2))))]>; - def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2i64 (unpckl VR128:$src1, (memopv2i64 addr:$src2))))]>; - - def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, + + def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, + def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, + [(set VR128:$dst, + (unpckh VR128:$src1, (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, + def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, + def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhwd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckh VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, + def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, + def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (unpckh VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2))))]>; - def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, + def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, @@ -2172,7 +2214,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, + [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), imm:$src3))]>; } @@ -2202,7 +2244,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, + [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, TB, Requires<[HasSSE2]>; // Flush cache @@ -2217,17 +2259,18 @@ def MFENCE : I<0xAE, MRM6r, (outs), (ins), "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; //TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), (i8 0)), (NOOP)>; def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), +def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss), (i8 1)), (MFENCE)>; // Alias instructions that map zero vector to pxor / xorp* for sse. // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1 in def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "pcmpeqd\t$dst, $dst", [(set VR128:$dst, (v4i32 immAllOnesV))]>; @@ -2240,7 +2283,7 @@ def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src), (v2f64 (scalar_to_vector FR64:$src)))]>; def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "movsd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, + [(set VR128:$dst, (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), @@ -2399,9 +2442,9 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), (MOVZPQILo2PQIrm addr:$src)>; } -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSE3 Instructions -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Move Instructions def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -2525,9 +2568,9 @@ let AddedComplexity = 20 in def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // SSSE3 Instructions -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// /// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. multiclass SS3I_unop_rm_int_8 opc, string OpcodeStr, @@ -2801,12 +2844,13 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask), def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// // Non-Instruction Patterns -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// -// extload f32 -> f64. This matches load+fextend because we have a hack in -// the isel (PreprocessForFPConvert) that can introduce loads after dag combine. +// extload f32 -> f64. This matches load+fextend because we have a hack in +// the isel (PreprocessForFPConvert) that can introduce loads after dag +// combine. // Since these loads aren't folded into the fextend, we have to match it // explicitly here. let Predicates = [HasSSE2] in @@ -2884,12 +2928,12 @@ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))), Requires<[HasSSE2]>; // Special unary SHUFPDrri case. def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFPDrri VR128:$src1, VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Special unary SHUFPDrri case. def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))), - (SHUFPDrri VR128:$src1, VR128:$src1, + (SHUFPDrri VR128:$src1, VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Unary v4f32 shuffle with PSHUF* in order to fold a load. @@ -2899,16 +2943,16 @@ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)), // Special binary v4i32 shuffle cases with SHUFPS. def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))), - (SHUFPSrri VR128:$src1, VR128:$src2, + (SHUFPSrri VR128:$src1, VR128:$src2, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (SHUFPSrmi VR128:$src1, addr:$src2, + (SHUFPSrmi VR128:$src1, addr:$src2, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; // Special binary v2i64 shuffle cases using SHUFPDrri. def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)), - (SHUFPDrri VR128:$src1, VR128:$src2, + (SHUFPDrri VR128:$src1, VR128:$src2, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE2]>; @@ -3030,7 +3074,7 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)), // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but // fall back to this for SSE1) def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))), - (SHUFPSrri VR128:$src2, VR128:$src1, + (SHUFPSrri VR128:$src2, VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>; // Set lowest element and zero upper elements. @@ -3097,7 +3141,7 @@ def : Pat<(store (v8i16 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>; - + //===----------------------------------------------------------------------===// // SSE4.1 Instructions //===----------------------------------------------------------------------===// @@ -3108,7 +3152,7 @@ multiclass sse41_fp_unop_rm opcps, bits<8> opcpd, Intrinsic V2F64Int> { // Intrinsic operation, reg. // Vector intrinsic operation, reg - def PSr_Int : SS4AIi8 opcss, bits<8> opcsd, Intrinsic F64Int> { // Intrinsic operation, reg. def SSr_Int : SS4AIi8, OpSize; // Intrinsic operation, mem. - def SSm_Int : SS4AIi8, OpSize; // Intrinsic operation, reg. def SDr_Int : SS4AIi8, OpSize; // Intrinsic operation, mem. def SDm_Int : SS4AIi8, OpSize; } @@ -3302,9 +3346,9 @@ let Constraints = "$src1 = $dst" in { Intrinsic IntId128, bit Commutable = 0> { def rri : SS4AIi8, OpSize { let isCommutable = Commutable; @@ -3339,7 +3383,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int opc, string OpcodeStr, Intrinsic IntId> { def rr0 : SS48I, OpSize; @@ -3471,13 +3515,13 @@ def : Pat<(int_x86_sse41_pmovzxbq multiclass SS41I_extract8 opc, string OpcodeStr> { def rr : SS4AIi8, OpSize; def mr : SS4AIi8, OpSize; // FIXME: @@ -3492,7 +3536,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">; multiclass SS41I_extract16 opc, string OpcodeStr> { def mr : SS4AIi8, OpSize; // FIXME: @@ -3507,13 +3551,13 @@ defm PEXTRW : SS41I_extract16<0x15, "pextrw">; multiclass SS41I_extract32 opc, string OpcodeStr> { def rr : SS4AIi8, OpSize; def mr : SS4AIi8, OpSize; @@ -3527,14 +3571,14 @@ defm PEXTRD : SS41I_extract32<0x16, "pextrd">; multiclass SS41I_extractf32 opc, string OpcodeStr> { def rr : SS4AIi8, OpSize; - def mr : SS4AIi8, OpSize; @@ -3553,15 +3597,15 @@ let Constraints = "$src1 = $dst" in { multiclass SS41I_insert8 opc, string OpcodeStr> { def rr : SS4AIi8, OpSize; def rm : SS4AIi8, OpSize; } @@ -3573,16 +3617,16 @@ let Constraints = "$src1 = $dst" in { multiclass SS41I_insert32 opc, string OpcodeStr> { def rr : SS4AIi8, OpSize; def rm : SS4AIi8, OpSize; } @@ -3590,37 +3634,57 @@ let Constraints = "$src1 = $dst" in { defm PINSRD : SS41I_insert32<0x22, "pinsrd">; +// insertps has a few different modes, there's the first two here below which +// are optimized inserts that won't zero arbitrary elements in the destination +// vector. The next one matches the intrinsic and could zero arbitrary elements +// in the target vector. let Constraints = "$src1 = $dst" in { multiclass SS41I_insertf32 opc, string OpcodeStr> { def rr : SS4AIi8, OpSize; + [(set VR128:$dst, + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; def rm : SS4AIi8, OpSize; } } defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), + (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; + +// ptest instruction we'll lower to this in X86ISelLowering primarily from +// the intel intrinsic that corresponds to this. let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; + "ptest \t{$src2, $src1|$src1, $src2}", + [(X86ptest VR128:$src1, VR128:$src2), + (implicit EFLAGS)]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", []>, OpSize; + "ptest \t{$src2, $src1|$src1, $src2}", + [(X86ptest VR128:$src1, (load addr:$src2)), + (implicit EFLAGS)]>, OpSize; } def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; + +//===----------------------------------------------------------------------===// +// SSE4.2 Instructions +//===----------------------------------------------------------------------===// + /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator let Constraints = "$src1 = $dst" in { multiclass SS42I_binop_rm_int opc, string OpcodeStr, @@ -3647,3 +3711,171 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), (PCMPGTQrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; + +// crc intrinsic instruction +// This set of instructions are only rm, the only difference is the size +// of r and m. +let Constraints = "$src1 = $dst" in { + def CRC32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i8mem:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_8 GR32:$src1, + (load addr:$src2)))]>, OpSize; + def CRC32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR8:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>, + OpSize; + def CRC32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i16mem:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_16 GR32:$src1, + (load addr:$src2)))]>, + OpSize; + def CRC32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR16:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>, + OpSize; + def CRC32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), + (ins GR32:$src1, i32mem:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_32 GR32:$src1, + (load addr:$src2)))]>, OpSize; + def CRC32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR32:$dst, + (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>, + OpSize; + def CRC64m64 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i64mem:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR64:$dst, + (int_x86_sse42_crc32_64 GR64:$src1, + (load addr:$src2)))]>, + OpSize, REX_W; + def CRC64r64 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "crc32 \t{$src2, $src1|$src1, $src2}", + [(set GR64:$dst, + (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>, + OpSize, REX_W; +} + +// String/text processing instructions. +let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { +def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "#PCMPISTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; +def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "#PCMPISTRM128rm PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpistrm128 VR128:$src1, + (load addr:$src2), + imm:$src3))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS] in { +def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", + []>, OpSize; +def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", + []>, OpSize; +} + +let Defs = [EFLAGS], Uses = [EAX, EDX], + usesCustomDAGSchedInserter = 1 in { +def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "#PCMPESTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + VR128:$src3, + EDX, imm:$src5))]>, OpSize; +def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "#PCMPESTRM128rm PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + (load addr:$src3), + EDX, imm:$src5))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { +def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", + []>, OpSize; +def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", + []>, OpSize; +} + +let Defs = [ECX, EFLAGS] in { + multiclass SS42AI_pcmpistri { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", + [(set ECX, + (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), + (implicit EFLAGS)]>, + OpSize; + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", + [(set ECX, + (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), + (implicit EFLAGS)]>, + OpSize; + } +} + +defm PCMPISTRI : SS42AI_pcmpistri; +defm PCMPISTRIA : SS42AI_pcmpistri; +defm PCMPISTRIC : SS42AI_pcmpistri; +defm PCMPISTRIO : SS42AI_pcmpistri; +defm PCMPISTRIS : SS42AI_pcmpistri; +defm PCMPISTRIZ : SS42AI_pcmpistri; + +let Defs = [ECX, EFLAGS] in { +let Uses = [EAX, EDX] in { + multiclass SS42AI_pcmpestri { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", + [(set ECX, + (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), + (implicit EFLAGS)]>, + OpSize; + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", + [(set ECX, + (IntId128 VR128:$src1, EAX, (load addr:$src3), + EDX, imm:$src5)), + (implicit EFLAGS)]>, + OpSize; + } +} +} + +defm PCMPESTRI : SS42AI_pcmpestri; +defm PCMPESTRIA : SS42AI_pcmpestri; +defm PCMPESTRIC : SS42AI_pcmpestri; +defm PCMPESTRIO : SS42AI_pcmpestri; +defm PCMPESTRIS : SS42AI_pcmpestri; +defm PCMPESTRIZ : SS42AI_pcmpestri; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index f92310607a8e5..62ca47ff787a4 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -15,15 +15,16 @@ #include "X86JITInfo.h" #include "X86Relocations.h" #include "X86Subtarget.h" +#include "X86TargetMachine.h" #include "llvm/Function.h" -#include "llvm/Config/alloca.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include #include using namespace llvm; // Determine the platform we're running on -#if defined (__x86_64__) || defined (_M_AMD64) +#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64) # define X86_64_JIT #elif defined(__i386__) || defined(i386) || defined(_M_IX86) # define X86_32_JIT @@ -51,13 +52,6 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; #define GETASMPREFIX(X) GETASMPREFIX2(X) #define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) -// Check if building with -fPIC -#if defined(__PIC__) && __PIC__ && defined(__linux__) -#define ASMCALLSUFFIX "@PLT" -#else -#define ASMCALLSUFFIX -#endif - // For ELF targets, use a .size and .type directive, to let tools // know the extent of functions defined in assembler. #if defined(__ELF__) @@ -130,7 +124,7 @@ extern "C" { // JIT callee "movq %rbp, %rdi\n" // Pass prev frame and return address "movq 8(%rbp), %rsi\n" - "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + "call " ASMPREFIX "X86CompilationCallback2\n" // Restore all XMM arg registers "movaps 112(%rsp), %xmm7\n" "movaps 96(%rsp), %xmm6\n" @@ -206,7 +200,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + "call " ASMPREFIX "X86CompilationCallback2\n" "movl %ebp, %esp\n" // Restore ESP CFI(".cfi_def_cfa_register %esp\n") "subl $12, %esp\n" @@ -262,7 +256,7 @@ extern "C" { "movl 4(%ebp), %eax\n" // Pass prev frame and return address "movl %eax, 4(%esp)\n" "movl %ebp, (%esp)\n" - "call " ASMPREFIX "X86CompilationCallback2" ASMCALLSUFFIX "\n" + "call " ASMPREFIX "X86CompilationCallback2\n" "addl $16, %esp\n" "movaps 48(%esp), %xmm3\n" CFI(".cfi_restore %xmm3\n") @@ -321,8 +315,7 @@ extern "C" { #else // Not an i386 host void X86CompilationCallback() { - assert(0 && "Cannot call X86CompilationCallback() on a non-x86 arch!\n"); - abort(); + llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!"); } #endif } @@ -331,14 +324,21 @@ extern "C" { /// function stub when we did not know the real target of a call. This function /// must locate the start of the stub or call site and pass it into the JIT /// compiler function. -extern "C" void ATTRIBUTE_USED +extern "C" { +#if !(defined (X86_64_JIT) && defined(_MSC_VER)) + // the following function is called only from this translation unit, + // unless we are under 64bit Windows with MSC, where there is + // no support for inline assembly +static +#endif +void ATTRIBUTE_USED X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { intptr_t *RetAddrLoc = &StackPtr[1]; assert(*RetAddrLoc == RetAddr && "Could not find return address on the stack!"); // It's a stub if there is an interrupt marker after the call. - bool isStub = ((unsigned char*)RetAddr)[0] == 0xCD; + bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE; // The call instruction should have pushed the return value onto the stack... #if defined (X86_64_JIT) @@ -348,10 +348,10 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { #endif #if 0 - DOUT << "In callback! Addr=" << (void*)RetAddr - << " ESP=" << (void*)StackPtr - << ": Resolving call to function: " - << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"; + DEBUG(errs() << "In callback! Addr=" << (void*)RetAddr + << " ESP=" << (void*)StackPtr + << ": Resolving call to function: " + << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"); #endif // Sanity check to make sure this really is a call instruction. @@ -377,7 +377,7 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { // If this is a stub, rewrite the call into an unconditional branch // instruction so that two return addresses are not pushed onto the stack // when the requested function finally gets called. This also makes the - // 0xCD byte (interrupt) dead, so the marker doesn't effect anything. + // 0xCE byte (interrupt) dead, so the marker doesn't effect anything. #if defined (X86_64_JIT) // If the target address is within 32-bit range of the stub, use a // PC-relative branch instead of loading the actual address. (This is @@ -403,31 +403,26 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { *RetAddrLoc -= 5; #endif } +} TargetJITInfo::LazyResolverFn X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { JITCompilerFunction = F; #if defined (X86_32_JIT) && !defined (_MSC_VER) - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - union { - unsigned u[3]; - char c[12]; - } text; - - if (!X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) { - // FIXME: support for AMD family of processors. - if (memcmp(text.c, "GenuineIntel", 12) == 0) { - X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 25) & 0x1) - return X86CompilationCallback_SSE; - } - } + if (Subtarget->hasSSE1()) + return X86CompilationCallback_SSE; #endif return X86CompilationCallback; } +X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) { + Subtarget = &TM.getSubtarget(); + useGOT = 0; + TLSOffset = 0; +} + void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE) { #if defined (X86_64_JIT) @@ -485,7 +480,10 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); #endif - JCE.emitByte(0xCD); // Interrupt - Just a marker identifying the stub! + // This used to use 0xCD, but that value is used by JITMemoryManager to + // initialize the buffer with garbage, which means it may follow a + // noreturn function call, confusing X86CompilationCallback2. PR 4929. + JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! return JCE.finishGVStub(F); } @@ -495,9 +493,11 @@ void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub, // complains about casting a function pointer to a normal pointer. JCE.startGVStub(F, Stub, 5); JCE.emitByte(0xE9); -#if defined (X86_64_JIT) - assert(((((intptr_t)Fn-JCE.getCurrentPCValue()-5) << 32) >> 32) == - ((intptr_t)Fn-JCE.getCurrentPCValue()-5) +#if defined (X86_64_JIT) && !defined (NDEBUG) + // Yes, we need both of these casts, or some broken versions of GCC (4.2.4) + // get the signed-ness of the expression wrong. Go figure. + intptr_t Displacement = (intptr_t)Fn - (intptr_t)JCE.getCurrentPCValue() - 5; + assert(((Displacement << 32) >> 32) == Displacement && "PIC displacement does not fit in displacement field!"); #endif JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); @@ -538,6 +538,7 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR, break; } case X86::reloc_absolute_word: + case X86::reloc_absolute_word_sext: // Absolute relocation, just add the relocated value to the value already // in memory. *((unsigned*)RelocPos) += (unsigned)ResultPtr; @@ -554,7 +555,7 @@ char* X86JITInfo::allocateThreadLocalMemory(size_t size) { TLSOffset -= size; return TLSOffset; #else - assert(0 && "Cannot allocate thread local storage on this arch!\n"); + llvm_unreachable("Cannot allocate thread local storage on this arch!"); return 0; #endif } diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index 6a4e2148a5aac..c381433bf3578 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -20,16 +20,15 @@ namespace llvm { class X86TargetMachine; + class X86Subtarget; class X86JITInfo : public TargetJITInfo { X86TargetMachine &TM; + const X86Subtarget *Subtarget; uintptr_t PICBase; char* TLSOffset; public: - explicit X86JITInfo(X86TargetMachine &tm) : TM(tm) { - useGOT = 0; - TLSOffset = 0; - } + explicit X86JITInfo(X86TargetMachine &tm); /// replaceMachineCodeForFunction - Make it so that calling the function /// whose machine code is at OLD turns into a call to NEW, perhaps by diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp new file mode 100644 index 0000000000000..9d7e66debb907 --- /dev/null +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -0,0 +1,123 @@ +//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the X86MCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "X86MCAsmInfo.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +enum AsmWriterFlavorTy { + // Note: This numbering has to match the GCC assembler dialects for inline + // asm alternatives to work right. + ATT = 0, Intel = 1 +}; + +static cl::opt +AsmWriterFlavor("x86-asm-syntax", cl::init(ATT), + cl::desc("Choose style of code to emit from X86 backend:"), + cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"), + clEnumValN(Intel, "intel", "Emit Intel-style assembly"), + clEnumValEnd)); + + +static const char *const x86_asm_table[] = { + "{si}", "S", + "{di}", "D", + "{ax}", "a", + "{cx}", "c", + "{memory}", "memory", + "{flags}", "", + "{dirflag}", "", + "{fpsr}", "", + "{cc}", "cc", + 0,0}; + +X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; + + bool is64Bit = Triple.getArch() == Triple::x86_64; + + TextAlignFillValue = 0x90; + + if (!is64Bit) + Data64bitsDirective = 0; // we can't emit a 64-bit unit + + // Leopard and above support aligned common symbols. + COMMDirectiveTakesAlignment = Triple.getDarwinMajorNumber() >= 9; + + CommentString = "##"; + PCSymbol = "."; + + SupportsDebugInformation = true; + DwarfUsesInlineInfoSection = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::Dwarf; + AbsoluteEHSectionOffsets = false; +} + +X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) { + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; + + PrivateGlobalPrefix = ".L"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + PCSymbol = "."; + + // Set up DWARF directives + HasLEB128 = true; // Target asm supports leb128 directives (little-endian) + + // Debug Information + AbsoluteDebugSectionOffsets = true; + SupportsDebugInformation = true; + + // Exceptions handling + ExceptionsType = ExceptionHandling::Dwarf; + AbsoluteEHSectionOffsets = false; + + // On Linux we must declare when we can use a non-executable stack. + if (Triple.getOS() == Triple::Linux) + NonexecutableStackDirective = "\t.section\t.note.GNU-stack,\"\",@progbits"; +} + +X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; +} + + +X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) { + AsmTransCBE = x86_asm_table; + AssemblerDialect = AsmWriterFlavor; + + GlobalPrefix = "_"; + CommentString = ";"; + + PrivateGlobalPrefix = "$"; + AlignDirective = "\tALIGN\t"; + ZeroDirective = "\tdb\t"; + ZeroDirectiveSuffix = " dup(0)"; + AsciiDirective = "\tdb\t"; + AscizDirective = 0; + Data8bitsDirective = "\tdb\t"; + Data16bitsDirective = "\tdw\t"; + Data32bitsDirective = "\tdd\t"; + Data64bitsDirective = "\tdq\t"; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + + AlignmentIsInBytes = true; +} diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h new file mode 100644 index 0000000000000..18e2bdbcba916 --- /dev/null +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -0,0 +1,42 @@ +//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the X86MCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86TARGETASMINFO_H +#define X86TARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoCOFF.h" +#include "llvm/MC/MCAsmInfoDarwin.h" + +namespace llvm { + class Triple; + + struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin { + explicit X86MCAsmInfoDarwin(const Triple &Triple); + }; + + struct X86ELFMCAsmInfo : public MCAsmInfo { + explicit X86ELFMCAsmInfo(const Triple &Triple); + }; + + struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { + explicit X86MCAsmInfoCOFF(const Triple &Triple); + }; + + struct X86WinMCAsmInfo : public MCAsmInfo { + explicit X86WinMCAsmInfo(const Triple &Triple); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a2f319f9a7ab7..f03723ae30982 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -30,14 +30,16 @@ #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, @@ -54,6 +56,7 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, Is64Bit = Subtarget->is64Bit(); IsWin64 = Subtarget->isTargetWin64(); StackAlign = TM.getFrameInfo()->getStackAlignment(); + if (Is64Bit) { SlotSize = 8; StackPtr = X86::RSP; @@ -65,12 +68,12 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, } } -// getDwarfRegNum - This function maps LLVM register identifiers to the -// Dwarf specific numbering, used in debug info and exception tables. - +/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF +/// specific numbering, used in debug info and exception tables. int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { const X86Subtarget *Subtarget = &TM.getSubtarget(); unsigned Flavour = DWARFFlavour::X86_64; + if (!Subtarget->is64Bit()) { if (Subtarget->isTargetDarwin()) { if (isEH) @@ -88,9 +91,8 @@ int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const { return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour); } -// getX86RegNum - This function maps LLVM register identifiers to their X86 -// specific numbering, which is used in various places encoding instructions. -// +/// getX86RegNum - This function maps LLVM register identifiers to their X86 +/// specific numbering, which is used in various places encoding instructions. unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { switch(RegNo) { case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX; @@ -146,17 +148,131 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); - assert(0 && "Register allocator hasn't allocated reg correctly yet!"); + llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); return 0; } } -const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const { - const X86Subtarget *Subtarget = &TM.getSubtarget(); - if (Subtarget->is64Bit()) - return &X86::GR64RegClass; - else +const TargetRegisterClass * +X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + unsigned SubIdx) const { + switch (SubIdx) { + default: return 0; + case 1: + // 8-bit + if (B == &X86::GR8RegClass) { + if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8) + return A; + } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || + A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || + A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || + A == &X86::GR32_NOREXRegClass || + A == &X86::GR32_NOSPRegClass) + return &X86::GR32_ABCDRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || + A == &X86::GR16_NOREXRegClass) + return &X86::GR16_ABCDRegClass; + } else if (B == &X86::GR8_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_NOREXRegClass; + else if (A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || + A == &X86::GR32_NOSPRegClass) + return &X86::GR32_NOREXRegClass; + else if (A == &X86::GR32_ABCDRegClass) + return &X86::GR32_ABCDRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass) + return &X86::GR16_NOREXRegClass; + else if (A == &X86::GR16_ABCDRegClass) + return &X86::GR16_ABCDRegClass; + } + break; + case 2: + // 8-bit hi + if (B == &X86::GR8_ABCD_HRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || + A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || + A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || + A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) + return &X86::GR32_ABCDRegClass; + else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass || + A == &X86::GR16_NOREXRegClass) + return &X86::GR16_ABCDRegClass; + } + break; + case 3: + // 16-bit + if (B == &X86::GR16RegClass) { + if (A->getSize() == 4 || A->getSize() == 8) + return A; + } else if (B == &X86::GR16_ABCDRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || + A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || + A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass || + A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass) + return &X86::GR32_ABCDRegClass; + } else if (B == &X86::GR16_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_NOREXRegClass; + else if (A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass || + A == &X86::GR32_NOSPRegClass) + return &X86::GR32_NOREXRegClass; + else if (A == &X86::GR32_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + } + break; + case 4: + // 32-bit + if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) { + if (A->getSize() == 8) + return A; + } else if (B == &X86::GR32_ABCDRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass || + A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || + A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_ABCDRegClass; + } else if (B == &X86::GR32_NOREXRegClass) { + if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass || + A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass) + return &X86::GR64_NOREXRegClass; + else if (A == &X86::GR64_ABCDRegClass) + return &X86::GR64_ABCDRegClass; + } + break; + } + return 0; +} + +const TargetRegisterClass * +X86RegisterInfo::getPointerRegClass(unsigned Kind) const { + switch (Kind) { + default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); + case 0: // Normal GPRs. + if (TM.getSubtarget().is64Bit()) + return &X86::GR64RegClass; return &X86::GR32RegClass; + case 1: // Normal GRPs except the stack pointer (for encoding reasons). + if (TM.getSubtarget().is64Bit()) + return &X86::GR64_NOSPRegClass; + return &X86::GR32_NOSPRegClass; + } } const TargetRegisterClass * @@ -276,6 +392,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::ESP); Reserved.set(X86::SP); Reserved.set(X86::SPL); + // Set the frame-pointer register and its aliases as reserved if needed. if (hasFP(MF)) { Reserved.set(X86::RBP); @@ -283,10 +400,10 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::BP); Reserved.set(X86::BPL); } - // Mark the x87 stack registers as reserved, since they don't - // behave normally with respect to liveness. We don't fully - // model the effects of x87 stack pushes and pops after - // stackification. + + // Mark the x87 stack registers as reserved, since they don't behave normally + // with respect to liveness. We don't fully model the effects of x87 stack + // pushes and pops after stackification. Reserved.set(X86::ST0); Reserved.set(X86::ST1); Reserved.set(X86::ST2); @@ -304,10 +421,12 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { unsigned MaxAlign = 0; + for (int i = FFI->getObjectIndexBegin(), e = FFI->getObjectIndexEnd(); i != e; ++i) { if (FFI->isDeadObjectIndex(i)) continue; + unsigned Align = FFI->getObjectAlignment(i); MaxAlign = std::max(MaxAlign, Align); } @@ -315,10 +434,9 @@ static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { return MaxAlign; } -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -// +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. This is true if the function has variable sized allocas +/// or if frame pointer elimination is disabled. bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); @@ -335,7 +453,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); // FIXME: Currently we don't support stack realignment for functions with - // variable-sized allocas + // variable-sized allocas return (RealignStack && (MFI->getMaxAlignment() > StackAlign && !MFI->hasVarSizedObjects())); @@ -345,34 +463,45 @@ bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { return !MF.getFrameInfo()->hasVarSizedObjects(); } +bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, + int &FrameIdx) const { + if (Reg == FramePtr && hasFP(MF)) { + FrameIdx = MF.getFrameInfo()->getObjectIndexBegin(); + return true; + } + return false; +} + int X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { - int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize; - uint64_t StackSize = MF.getFrameInfo()->getStackSize(); + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea(); + uint64_t StackSize = MFI->getStackSize(); if (needsStackRealignment(MF)) { - if (FI < 0) - // Skip the saved EBP + if (FI < 0) { + // Skip the saved EBP. Offset += SlotSize; - else { - unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI); + } else { + unsigned Align = MFI->getObjectAlignment(FI); assert( (-(Offset + StackSize)) % Align == 0); Align = 0; return Offset + StackSize; } - // FIXME: Support tail calls } else { if (!hasFP(MF)) return Offset + StackSize; - // Skip the saved EBP + // Skip the saved EBP. Offset += SlotSize; // Skip the RETADDR move area X86MachineFunctionInfo *X86FI = MF.getInfo(); int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); - if (TailCallReturnAddrDelta < 0) Offset -= TailCallReturnAddrDelta; + if (TailCallReturnAddrDelta < 0) + Offset -= TailCallReturnAddrDelta; } return Offset; @@ -392,24 +521,29 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; + Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; MachineInstr *New = 0; if (Old->getOpcode() == getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri), - StackPtr).addReg(StackPtr).addImm(Amount); + StackPtr) + .addReg(StackPtr) + .addImm(Amount); } else { assert(Old->getOpcode() == getCallFrameDestroyOpcode()); - // factor out the amount the callee already popped. + + // Factor out the amount the callee already popped. uint64_t CalleeAmt = Old->getOperand(1).getImm(); Amount -= CalleeAmt; - if (Amount) { + + if (Amount) { unsigned Opc = (Amount < 128) ? (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) : (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri); New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(Amount); + .addReg(StackPtr) + .addImm(Amount); } } @@ -417,7 +551,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); - // Replace the pseudo instruction with a new instruction... + // Replace the pseudo instruction with a new instruction. MBB.insert(I, New); } } @@ -432,10 +566,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineInstr *Old = I; MachineInstr *New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), - StackPtr).addReg(StackPtr).addImm(CalleeAmt); + StackPtr) + .addReg(StackPtr) + .addImm(CalleeAmt); + // The EFLAGS implicit def is dead. New->getOperand(3).setIsDead(); - MBB.insert(I, New); } } @@ -443,21 +579,24 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const{ +unsigned +X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const{ assert(SPAdj == 0 && "Unexpected"); unsigned i = 0; MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); + while (!MI.getOperand(i).isFI()) { ++i; assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } int FrameIndex = MI.getOperand(i).getIndex(); - unsigned BasePtr; + if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); else @@ -471,34 +610,33 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. int Offset = getFrameIndexOffset(MF, FrameIndex) + - (int)(MI.getOperand(i+3).getImm()); + (int)(MI.getOperand(i + 3).getImm()); - MI.getOperand(i+3).ChangeToImmediate(Offset); + MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + (uint64_t)MI.getOperand(i+3).getOffset(); MI.getOperand(i+3).setOffset(Offset); } + return 0; } void X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { - MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - unsigned MaxAlign = std::max(FFI->getMaxAlignment(), - calculateMaxStackAlignment(FFI)); + unsigned MaxAlign = std::max(MFI->getMaxAlignment(), + calculateMaxStackAlignment(MFI)); - FFI->setMaxAlignment(MaxAlign); -} + MFI->setMaxAlignment(MaxAlign); -void -X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{ X86MachineFunctionInfo *X86FI = MF.getInfo(); int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); + if (TailCallReturnAddrDelta < 0) { // create RETURNADDR area // arg @@ -509,18 +647,21 @@ X86RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const{ // ... // } // [EBP] - MF.getFrameInfo()-> - CreateFixedObject(-TailCallReturnAddrDelta, - (-1*SlotSize)+TailCallReturnAddrDelta); + MFI->CreateFixedObject(-TailCallReturnAddrDelta, + (-1U*SlotSize)+TailCallReturnAddrDelta); } + if (hasFP(MF)) { assert((TailCallReturnAddrDelta <= 0) && "The Delta should always be zero or negative"); + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, - (int)SlotSize * -2+ - TailCallReturnAddrDelta); - assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && + int FrameIdx = MFI->CreateFixedObject(SlotSize, + -(int)SlotSize + + TFI.getOffsetOfLocalArea() + + TailCallReturnAddrDelta); + assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; } @@ -549,14 +690,14 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) - .addReg(StackPtr).addImm(ThisVal); - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); + .addReg(StackPtr) + .addImm(ThisVal); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. Offset -= ThisVal; } } -// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. +/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. static void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, uint64_t *NumBytes = NULL) { @@ -579,11 +720,12 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } -// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. +/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator. static void mergeSPUpdatesDown(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, uint64_t *NumBytes = NULL) { + // FIXME: THIS ISN'T RUN!!! return; if (MBBI == MBB.end()) return; @@ -610,23 +752,22 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, } /// mergeSPUpdates - Checks the instruction before/after the passed -/// instruction. If it is an ADD/SUB instruction it is deleted -/// argument and the stack adjustment is returned as a positive value for ADD -/// and a negative for SUB. +/// instruction. If it is an ADD/SUB instruction it is deleted argument and the +/// stack adjustment is returned as a positive value for ADD and a negative for +/// SUB. static int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, unsigned StackPtr, bool doMergeWithPrevious) { - if ((doMergeWithPrevious && MBBI == MBB.begin()) || (!doMergeWithPrevious && MBBI == MBB.end())) return 0; - int Offset = 0; - MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI; MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI); unsigned Opc = PI->getOpcode(); + int Offset = 0; + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && PI->getOperand(0).getReg() == StackPtr){ @@ -644,122 +785,116 @@ static int mergeSPUpdates(MachineBasicBlock &MBB, return Offset; } -void X86RegisterInfo::emitFrameMoves(MachineFunction &MF, - unsigned FrameLabelId, - unsigned ReadyLabelId) const { +void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF, + unsigned LabelId, + unsigned FramePtr) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); - if (!MMI) - return; + if (!MMI) return; + + // Add callee saved registers to move list. + const std::vector &CSI = MFI->getCalleeSavedInfo(); + if (CSI.empty()) return; - uint64_t StackSize = MFI->getStackSize(); std::vector &Moves = MMI->getFrameMoves(); const TargetData *TD = MF.getTarget().getTargetData(); + bool HasFP = hasFP(MF); - // Calculate amount of bytes used for return address storing + // Calculate amount of bytes used for return address storing. int stackGrowth = (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == TargetFrameInfo::StackGrowsUp ? TD->getPointerSize() : -TD->getPointerSize()); - MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); - - if (StackSize) { - // Show update of SP. - if (hasFP(MF)) { - // Adjust SP - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, 2*stackGrowth); - Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); - } else { - MachineLocation SPDst(MachineLocation::VirtualFP); - MachineLocation SPSrc(MachineLocation::VirtualFP, - -StackSize+stackGrowth); - Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); - } - } else { - // FIXME: Verify & implement for FP - MachineLocation SPDst(StackPtr); - MachineLocation SPSrc(StackPtr, stackGrowth); - Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); - } - - // Add callee saved registers to move list. - const std::vector &CSI = MFI->getCalleeSavedInfo(); - // FIXME: This is dirty hack. The code itself is pretty mess right now. // It should be rewritten from scratch and generalized sometimes. - // Determine maximum offset (minumum due to stack growth) + // Determine maximum offset (minumum due to stack growth). int64_t MaxOffset = 0; - for (unsigned I = 0, E = CSI.size(); I!=E; ++I) + for (std::vector::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(CSI[I].getFrameIdx())); - - // Calculate offsets - int64_t saveAreaOffset = (hasFP(MF) ? 3 : 2)*stackGrowth; - for (unsigned I = 0, E = CSI.size(); I!=E; ++I) { - int64_t Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - unsigned Reg = CSI[I].getReg(); - Offset = (MaxOffset-Offset+saveAreaOffset); + MFI->getObjectOffset(I->getFrameIdx())); + + // Calculate offsets. + int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; + for (std::vector::const_iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + Offset = MaxOffset - Offset + saveAreaOffset; + + // Don't output a new machine move if we're re-saving the frame + // pointer. This happens when the PrologEpilogInserter has inserted an extra + // "PUSH" of the frame pointer -- the "emitPrologue" method automatically + // generates one when frame pointers are used. If we generate a "machine + // move" for this extra "PUSH", the linker will lose track of the fact that + // the frame pointer should have the value of the first "PUSH" when it's + // trying to unwind. + // + // FIXME: This looks inelegant. It's possibly correct, but it's covering up + // another bug. I.e., one where we generate a prolog like this: + // + // pushl %ebp + // movl %esp, %ebp + // pushl %ebp + // pushl %esi + // ... + // + // The immediate re-push of EBP is unnecessary. At the least, it's an + // optimization bug. EBP can be used as a scratch register in certain + // cases, but probably not when we have a frame pointer. + if (HasFP && FramePtr == Reg) + continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); - } - - if (hasFP(MF)) { - // Save FP - MachineLocation FPDst(MachineLocation::VirtualFP, 2*stackGrowth); - MachineLocation FPSrc(FramePtr); - Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + Moves.push_back(MachineMove(LabelId, CSDst, CSSrc)); } } - +/// emitPrologue - Push callee-saved registers onto the stack, which +/// automatically adjust the stack pointer. Adjust the stack pointer to allocate +/// space for local variables. Also emit labels used by the exception handler to +/// generate the exception handling frames. void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. + MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const Function* Fn = MF.getFunction(); - const X86Subtarget* Subtarget = &MF.getTarget().getSubtarget(); + const Function *Fn = MF.getFunction(); + const X86Subtarget *Subtarget = &MF.getTarget().getSubtarget(); MachineModuleInfo *MMI = MFI->getMachineModuleInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - MachineBasicBlock::iterator MBBI = MBB.begin(); bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) || - !Fn->doesNotThrow() || - UnwindTablesMandatory; + !Fn->doesNotThrow() || UnwindTablesMandatory; + uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. + uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. + bool HasFP = hasFP(MF); DebugLoc DL; - // Prepare for frame info. - unsigned FrameLabelId = 0; - - // Get the number of bytes to allocate from the FrameInfo. - uint64_t StackSize = MFI->getStackSize(); - - // Get desired stack alignment - uint64_t MaxAlign = MFI->getMaxAlignment(); - // Add RETADDR move area to callee saved frame size. int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); if (TailCallReturnAddrDelta < 0) X86FI->setCalleeSavedFrameSize( - X86FI->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta)); + X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). - bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone); - if (Is64Bit && !DisableRedZone && + if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->hasCalls() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); - if (hasFP(MF)) MinSize += SlotSize; - StackSize = std::max(MinSize, - StackSize > 128 ? StackSize - 128 : 0); + if (HasFP) MinSize += SlotSize; + StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); + MFI->setStackSize(StackSize); + } else if (Subtarget->isTargetWin64()) { + // We need to always allocate 32 bytes as register spill area. + // FIXME: We might reuse these 32 bytes for leaf functions. + StackSize += 32; MFI->setStackSize(StackSize); } @@ -769,33 +904,73 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (TailCallReturnAddrDelta < 0) { MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri), - StackPtr).addReg(StackPtr).addImm(-TailCallReturnAddrDelta); - // The EFLAGS implicit def is dead. - MI->getOperand(3).setIsDead(); + StackPtr) + .addReg(StackPtr) + .addImm(-TailCallReturnAddrDelta); + MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } + // Mapping for machine moves: + // + // DST: VirtualFP AND + // SRC: VirtualFP => DW_CFA_def_cfa_offset + // ELSE => DW_CFA_def_cfa + // + // SRC: VirtualFP AND + // DST: Register => DW_CFA_def_cfa_register + // + // ELSE + // OFFSET < 0 => DW_CFA_offset_extended_sf + // REG < 64 => DW_CFA_offset + Reg + // ELSE => DW_CFA_offset_extended + + std::vector &Moves = MMI->getFrameMoves(); + const TargetData *TD = MF.getTarget().getTargetData(); uint64_t NumBytes = 0; - if (hasFP(MF)) { - // Calculate required stack adjustment + int stackGrowth = + (MF.getTarget().getFrameInfo()->getStackGrowthDirection() == + TargetFrameInfo::StackGrowsUp ? + TD->getPointerSize() : -TD->getPointerSize()); + + if (HasFP) { + // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; if (needsStackRealignment(MF)) - FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; + FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign; NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); - // Get the offset of the stack slot for the EBP register... which is + // Get the offset of the stack slot for the EBP register, which is // guaranteed to be the last slot by processFunctionBeforeFrameFinalized. // Update the frame offset adjustment. MFI->setOffsetAdjustment(-NumBytes); - // Save EBP into the appropriate stack slot... + // Save EBP/RBP into the appropriate stack slot. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) .addReg(FramePtr, RegState::Kill); if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabelId = MMI->NextLabelID(); + // Mark the place where EBP/RBP was saved. + unsigned FrameLabelId = MMI->NextLabelID(); BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); + + // Define the current CFA rule to use the provided offset. + if (StackSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } else { + // FIXME: Verify & implement for FP + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); + } + + // Change the rule for the FramePtr to be an "offset" rule. + MachineLocation FPDst(MachineLocation::VirtualFP, + 2 * stackGrowth); + MachineLocation FPSrc(FramePtr); + Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); } // Update EBP with the new base value... @@ -803,6 +978,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr); + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer becomes valid. + unsigned FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId); + + // Define the current CFA to use the EBP/RBP register. + MachineLocation FPDst(FramePtr); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); + } + // Mark the FramePtr as live-in in every block except the entry. for (MachineFunction::iterator I = next(MF.begin()), E = MF.end(); I != E; ++I) @@ -814,6 +1000,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr).addReg(StackPtr).addImm(-MaxAlign); + // The EFLAGS implicit def is dead. MI->getOperand(3).setIsDead(); } @@ -822,11 +1009,30 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { } // Skip the callee-saved push instructions. + bool PushedRegs = false; + int StackOffset = 2 * stackGrowth; + while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || - MBBI->getOpcode() == X86::PUSH64r)) + MBBI->getOpcode() == X86::PUSH64r)) { + PushedRegs = true; ++MBBI; + if (!HasFP && needsFrameMoves) { + // Mark callee-saved push instruction. + unsigned LabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId); + + // Define the current CFA rule to use the provided offset. + unsigned Ptr = StackSize ? + MachineLocation::VirtualFP : StackPtr; + MachineLocation SPDst(Ptr); + MachineLocation SPSrc(Ptr, StackOffset); + Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); + StackOffset += stackGrowth; + } + } + if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -883,12 +1089,29 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); } - if (needsFrameMoves) { - unsigned ReadyLabelId = 0; - // Mark effective beginning of when frame pointer is ready. - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); - emitFrameMoves(MF, FrameLabelId, ReadyLabelId); + if ((NumBytes || PushedRegs) && needsFrameMoves) { + // Mark end of stack pointer adjustment. + unsigned LabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId); + + if (!HasFP && NumBytes) { + // Define the current CFA rule to use the provided offset. + if (StackSize) { + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, + -StackSize + stackGrowth); + Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); + } else { + // FIXME: Verify & implement for FP + MachineLocation SPDst(StackPtr); + MachineLocation SPSrc(StackPtr, stackGrowth); + Moves.push_back(MachineMove(LabelId, SPDst, SPSrc)); + } + } + + // Emit DWARF info specifying the offsets of the callee-saved registers. + if (PushedRegs) + emitCalleeSavedFrameMoves(MF, LabelId, HasFP ? FramePtr : StackPtr); } } @@ -901,6 +1124,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, DebugLoc DL = MBBI->getDebugLoc(); switch (RetOpcode) { + default: + llvm_unreachable("Can only insert epilog into returning blocks"); case X86::RET: case X86::RETI: case X86::TCRETURNdi: @@ -911,26 +1136,25 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, case X86::EH_RETURN64: case X86::TAILJMPd: case X86::TAILJMPr: - case X86::TAILJMPm: break; // These are ok - default: - assert(0 && "Can only insert epilog into returning blocks"); + case X86::TAILJMPm: + break; // These are ok } - // Get the number of bytes to allocate from the FrameInfo + // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI->getStackSize(); uint64_t MaxAlign = MFI->getMaxAlignment(); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t NumBytes = 0; if (hasFP(MF)) { - // Calculate required stack adjustment + // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; if (needsStackRealignment(MF)) FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign; NumBytes = FrameSize - CSSize; - // pop EBP. + // Pop EBP. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr); } else { @@ -942,9 +1166,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, while (MBBI != MBB.begin()) { MachineBasicBlock::iterator PI = prior(MBBI); unsigned Opc = PI->getOpcode(); + if (Opc != X86::POP32r && Opc != X86::POP64r && !PI->getDesc().isTerminator()) break; + --MBBI; } @@ -957,10 +1183,10 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was - // realigned + // realigned. if (needsStackRealignment(MF)) { // We cannot use LEA here, because stack pointer was realigned. We need to - // deallocate local frame back + // deallocate local frame back. if (CSSize) { emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); MBBI = prior(LastCSPop); @@ -972,17 +1198,18 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } else if (MFI->hasVarSizedObjects()) { if (CSSize) { unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; - MachineInstr *MI = addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); + MachineInstr *MI = + addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); MBB.insert(MBBI, MI); - } else - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), - StackPtr).addReg(FramePtr); - - } else { - // adjust stack pointer back: ESP += numbytes - if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); + } else { + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr) + .addReg(FramePtr); + } + } else if (NumBytes) { + // Adjust stack pointer back: ESP += numbytes. + emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII); } // We're returning from function via eh_return. @@ -993,9 +1220,9 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr).addReg(DestAddr.getReg()); - // Tail call return: adjust the stack pointer and jump to callee } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) { + // Tail call return: adjust the stack pointer and jump to callee. MBBI = prior(MBB.end()); MachineOperand &JumpTarget = MBBI->getOperand(0); MachineOperand &StackAdjust = MBBI->getOperand(1); @@ -1006,6 +1233,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, int MaxTCDelta = X86FI->getTCReturnAddrDelta(); int Offset = 0; assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); + // Incoporate the retaddr area. Offset = StackAdj-MaxTCDelta; assert(Offset >= 0 && "Offset should never be negative"); @@ -1032,6 +1260,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, // Add the return addr area delta back since we are not tail calling. int delta = -1*X86FI->getTCReturnAddrDelta(); MBBI = prior(MBB.end()); + // Check for possible merge with preceeding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII); @@ -1039,18 +1268,16 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, } unsigned X86RegisterInfo::getRARegister() const { - if (Is64Bit) - return X86::RIP; // Should have dwarf #16 - else - return X86::EIP; // Should have dwarf #8 + return Is64Bit ? X86::RIP // Should have dwarf #16. + : X86::EIP; // Should have dwarf #8. } unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { return hasFP(MF) ? FramePtr : StackPtr; } -void X86RegisterInfo::getInitialFrameState(std::vector &Moves) - const { +void +X86RegisterInfo::getInitialFrameState(std::vector &Moves) const { // Calculate amount of bytes used for return address storing int stackGrowth = (Is64Bit ? -8 : -4); @@ -1066,18 +1293,18 @@ void X86RegisterInfo::getInitialFrameState(std::vector &Moves) } unsigned X86RegisterInfo::getEHExceptionRegister() const { - assert(0 && "What is the exception register"); + llvm_unreachable("What is the exception register"); return 0; } unsigned X86RegisterInfo::getEHHandlerRegister() const { - assert(0 && "What is the exception handler register"); + llvm_unreachable("What is the exception handler register"); return 0; } namespace llvm { -unsigned getX86SubSuperRegister(unsigned Reg, MVT VT, bool High) { - switch (VT.getSimpleVT()) { +unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { + switch (VT.getSimpleVT().SimpleTy) { default: return Reg; case MVT::i8: if (High) { @@ -1264,14 +1491,21 @@ namespace { RegNum < RI.getLastVirtReg(); ++RegNum) MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); - FFI->setMaxAlignment(MaxAlign); + if (FFI->getMaxAlignment() == MaxAlign) + return false; - return false; + FFI->setMaxAlignment(MaxAlign); + return true; } virtual const char *getPassName() const { return "X86 Maximal Stack Alignment Calculator"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } }; char MSAC::ID = 0; diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 33b9f5edc73a6..f63570706d23a 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -93,9 +93,16 @@ public: /// Code Generation virtual methods... /// + /// getMatchingSuperRegClass - Return a subclass of the specified register + /// class A so that each register in it has a sub-register of the + /// specified sub-register index which is in the specified register class B. + virtual const TargetRegisterClass * + getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, unsigned Idx) const; + /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. - const TargetRegisterClass *getPointerRegClass() const; + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; /// getCrossCopyRegClass - Returns a legal register class to copy a register /// in the specified class to or from. Returns NULL if it is possible to copy @@ -125,23 +132,25 @@ public: bool hasReservedCallFrame(MachineFunction &MF) const; + bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg, + int &FrameIdx) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; - void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; + void emitCalleeSavedFrameMoves(MachineFunction &MF, unsigned LabelId, + unsigned FramePtr) const; void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - void emitFrameMoves(MachineFunction &MF, - unsigned FrameLabelId, unsigned ReadyLabelId) const; - // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(MachineFunction &MF) const; @@ -155,8 +164,8 @@ public: // getX86SubSuperRegister - X86 utility function. It returns the sub or super // register of a specific X86 register. -// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX -unsigned getX86SubSuperRegister(unsigned, MVT, bool High=false); +// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX +unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false); } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 2e6f017e27047..7bf074d4991e6 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -270,42 +270,27 @@ def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d" // cannot be encoded. def GR8 : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL, + [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL, R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate SPL or BPL. - static const unsigned X86_GR8_AO_64_fp[] = { - X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, - X86::R8B, X86::R9B, X86::R10B, X86::R11B, - X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B - }; - // If not, just don't allocate SPL. static const unsigned X86_GR8_AO_64[] = { X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::R8B, X86::R9B, X86::R10B, X86::R11B, X86::BL, X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL }; - // In 32-mode, none of the 8-bit registers aliases EBP or ESP. - static const unsigned X86_GR8_AO_32[] = { - X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH - }; GR8Class::iterator GR8Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); - if (!Subtarget.is64Bit()) - return X86_GR8_AO_32; - else if (RI->hasFP(MF)) - return X86_GR8_AO_64_fp; - else + if (Subtarget.is64Bit()) return X86_GR8_AO_64; + else + return begin(); } GR8Class::iterator @@ -313,17 +298,20 @@ def GR8 : RegisterClass<"X86", [i8], 8, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); + // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) - return X86_GR8_AO_32 + (sizeof(X86_GR8_AO_32) / sizeof(unsigned)); + // In 32-mode, none of the 8-bit registers aliases EBP or ESP. + return begin() + 8; else if (RI->hasFP(MF)) - return X86_GR8_AO_64_fp + (sizeof(X86_GR8_AO_64_fp) / sizeof(unsigned)); + // If so, don't allocate SPL or BPL. + return array_endof(X86_GR8_AO_64) - 1; else - return X86_GR8_AO_64 + (sizeof(X86_GR8_AO_64) / sizeof(unsigned)); + // If not, just don't allocate SPL. + return array_endof(X86_GR8_AO_64); } }]; } - def GR16 : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> { @@ -333,42 +321,20 @@ def GR16 : RegisterClass<"X86", [i16], 16, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate SP or BP. - static const unsigned X86_GR16_AO_64_fp[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, - X86::R8W, X86::R9W, X86::R10W, X86::R11W, - X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W - }; - static const unsigned X86_GR16_AO_32_fp[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX - }; - // If not, just don't allocate SP. static const unsigned X86_GR16_AO_64[] = { X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::R8W, X86::R9W, X86::R10W, X86::R11W, X86::BX, X86::R14W, X86::R15W, X86::R12W, X86::R13W, X86::BP }; - static const unsigned X86_GR16_AO_32[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP - }; GR16Class::iterator GR16Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); - if (Subtarget.is64Bit()) { - if (RI->hasFP(MF)) - return X86_GR16_AO_64_fp; - else - return X86_GR16_AO_64; - } else { - if (RI->hasFP(MF)) - return X86_GR16_AO_32_fp; - else - return X86_GR16_AO_32; - } + if (Subtarget.is64Bit()) + return X86_GR16_AO_64; + else + return begin(); } GR16Class::iterator @@ -377,21 +343,26 @@ def GR16 : RegisterClass<"X86", [i16], 16, const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); if (Subtarget.is64Bit()) { + // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR16_AO_64_fp+(sizeof(X86_GR16_AO_64_fp)/sizeof(unsigned)); + // If so, don't allocate SP or BP. + return array_endof(X86_GR16_AO_64) - 1; else - return X86_GR16_AO_64 + (sizeof(X86_GR16_AO_64) / sizeof(unsigned)); + // If not, just don't allocate SP. + return array_endof(X86_GR16_AO_64); } else { + // Does the function dedicate EBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR16_AO_32_fp+(sizeof(X86_GR16_AO_32_fp)/sizeof(unsigned)); + // If so, don't allocate SP or BP. + return begin() + 6; else - return X86_GR16_AO_32 + (sizeof(X86_GR16_AO_32) / sizeof(unsigned)); + // If not, just don't allocate SP. + return begin() + 7; } } }]; } - def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { @@ -401,42 +372,20 @@ def GR32 : RegisterClass<"X86", [i32], 32, iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate ESP or EBP. - static const unsigned X86_GR32_AO_64_fp[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, - X86::R8D, X86::R9D, X86::R10D, X86::R11D, - X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D - }; - static const unsigned X86_GR32_AO_32_fp[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX - }; - // If not, just don't allocate ESP. static const unsigned X86_GR32_AO_64[] = { X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::R8D, X86::R9D, X86::R10D, X86::R11D, X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP }; - static const unsigned X86_GR32_AO_32[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP - }; GR32Class::iterator GR32Class::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); - if (Subtarget.is64Bit()) { - if (RI->hasFP(MF)) - return X86_GR32_AO_64_fp; - else - return X86_GR32_AO_64; - } else { - if (RI->hasFP(MF)) - return X86_GR32_AO_32_fp; - else - return X86_GR32_AO_32; - } + if (Subtarget.is64Bit()) + return X86_GR32_AO_64; + else + return begin(); } GR32Class::iterator @@ -445,21 +394,29 @@ def GR32 : RegisterClass<"X86", [i32], 32, const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); if (Subtarget.is64Bit()) { + // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR32_AO_64_fp+(sizeof(X86_GR32_AO_64_fp)/sizeof(unsigned)); + // If so, don't allocate ESP or EBP. + return array_endof(X86_GR32_AO_64) - 1; else - return X86_GR32_AO_64 + (sizeof(X86_GR32_AO_64) / sizeof(unsigned)); + // If not, just don't allocate ESP. + return array_endof(X86_GR32_AO_64); } else { + // Does the function dedicate EBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR32_AO_32_fp+(sizeof(X86_GR32_AO_32_fp)/sizeof(unsigned)); + // If so, don't allocate ESP or EBP. + return begin() + 6; else - return X86_GR32_AO_32 + (sizeof(X86_GR32_AO_32) / sizeof(unsigned)); + // If not, just don't allocate ESP. + return begin() + 7; } } }]; } - +// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since +// RIP isn't really a register and it can't be used anywhere except in an +// address, but it doesn't cause trouble. def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { @@ -483,6 +440,11 @@ def GR64 : RegisterClass<"X86", [i64], 64, }]; } +// Segment registers for use by MOV instructions (and others) that have a +// segment register as one operand. Always contain a 16-bit segment +// descriptor. +def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { +} // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d" @@ -509,38 +471,25 @@ def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> { // On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes // of registers which do not by themselves require a REX prefix. def GR8_NOREX : RegisterClass<"X86", [i8], 8, - [AL, CL, DL, BL, AH, CH, DH, BH, + [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL]> { let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate SPL or BPL. - static const unsigned X86_GR8_NOREX_AO_64_fp[] = { - X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL - }; - // If not, just don't allocate SPL. static const unsigned X86_GR8_NOREX_AO_64[] = { X86::AL, X86::CL, X86::DL, X86::SIL, X86::DIL, X86::BL, X86::BPL }; - // In 32-mode, none of the 8-bit registers aliases EBP or ESP. - static const unsigned X86_GR8_NOREX_AO_32[] = { - X86::AL, X86::CL, X86::DL, X86::AH, X86::CH, X86::DH, X86::BL, X86::BH - }; GR8_NOREXClass::iterator GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); - if (!Subtarget.is64Bit()) - return X86_GR8_NOREX_AO_32; - else if (RI->hasFP(MF)) - return X86_GR8_NOREX_AO_64_fp; - else + if (Subtarget.is64Bit()) return X86_GR8_NOREX_AO_64; + else + return begin(); } GR8_NOREXClass::iterator @@ -548,15 +497,16 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget(); + // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) - return X86_GR8_NOREX_AO_32 + - (sizeof(X86_GR8_NOREX_AO_32) / sizeof(unsigned)); + // In 32-mode, none of the 8-bit registers aliases EBP or ESP. + return begin() + 8; else if (RI->hasFP(MF)) - return X86_GR8_NOREX_AO_64_fp + - (sizeof(X86_GR8_NOREX_AO_64_fp) / sizeof(unsigned)); + // If so, don't allocate SPL or BPL. + return array_endof(X86_GR8_NOREX_AO_64) - 1; else - return X86_GR8_NOREX_AO_64 + - (sizeof(X86_GR8_NOREX_AO_64) / sizeof(unsigned)); + // If not, just don't allocate SPL. + return array_endof(X86_GR8_NOREX_AO_64); } }]; } @@ -564,38 +514,20 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, [AX, CX, DX, SI, DI, BX, BP, SP]> { let SubRegClassList = [GR8_NOREX, GR8_NOREX]; let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate SP or BP. - static const unsigned X86_GR16_AO_fp[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX - }; - // If not, just don't allocate SP. - static const unsigned X86_GR16_AO[] = { - X86::AX, X86::CX, X86::DX, X86::SI, X86::DI, X86::BX, X86::BP - }; - - GR16_NOREXClass::iterator - GR16_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) - return X86_GR16_AO_fp; - else - return X86_GR16_AO; - } - GR16_NOREXClass::iterator GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Does the function dedicate RBP / EBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR16_AO_fp+(sizeof(X86_GR16_AO_fp)/sizeof(unsigned)); + // If so, don't allocate SP or BP. + return end() - 2; else - return X86_GR16_AO + (sizeof(X86_GR16_AO) / sizeof(unsigned)); + // If not, just don't allocate SP. + return end() - 1; } }]; } @@ -604,89 +536,149 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> { let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX]; let MethodProtos = [{ - iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate ESP or EBP. - static const unsigned X86_GR32_NOREX_AO_fp[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX - }; - // If not, just don't allocate ESP. - static const unsigned X86_GR32_NOREX_AO[] = { - X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP - }; - GR32_NOREXClass::iterator - GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Does the function dedicate RBP / EBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR32_NOREX_AO_fp; + // If so, don't allocate ESP or EBP. + return end() - 2; else - return X86_GR32_NOREX_AO; + // If not, just don't allocate ESP. + return end() - 1; } - - GR32_NOREXClass::iterator - GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { + }]; +} +// GR64_NOREX - GR64 registers which do not require a REX prefix. +def GR64_NOREX : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR64_NOREXClass::iterator + GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR32_NOREX_AO_fp + - (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned)); + // If so, don't allocate RIP, RSP or RBP. + return end() - 3; else - return X86_GR32_NOREX_AO + - (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned)); + // If not, just don't allocate RIP or RSP. + return end() - 2; } }]; } -// GR64_NOREX - GR64 registers which do not require a REX prefix. -def GR64_NOREX : RegisterClass<"X86", [i64], 64, - [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> { - let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; +// GR32_NOSP - GR32 registers except ESP. +def GR32_NOSP : RegisterClass<"X86", [i32], 32, + [EAX, ECX, EDX, ESI, EDI, EBX, EBP, + R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { + let SubRegClassList = [GR8, GR8, GR16]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; let MethodBodies = [{ - // Does the function dedicate RBP / EBP to being a frame ptr? - // If so, don't allocate RSP or RBP. - static const unsigned X86_GR64_NOREX_AO_fp[] = { - X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX - }; - // If not, just don't allocate RSP. - static const unsigned X86_GR64_NOREX_AO[] = { - X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP + static const unsigned X86_GR32_NOSP_AO_64[] = { + X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, + X86::R8D, X86::R9D, X86::R10D, X86::R11D, + X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP }; - GR64_NOREXClass::iterator - GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const { + GR32_NOSPClass::iterator + GR32_NOSPClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.is64Bit()) + return X86_GR32_NOSP_AO_64; + else + return begin(); + } + + GR32_NOSPClass::iterator + GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->hasFP(MF)) - return X86_GR64_NOREX_AO_fp; + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.is64Bit()) { + // Does the function dedicate RBP to being a frame ptr? + if (RI->hasFP(MF)) + // If so, don't allocate EBP. + return array_endof(X86_GR32_NOSP_AO_64) - 1; + else + // If not, any reg in this class is ok. + return array_endof(X86_GR32_NOSP_AO_64); + } else { + // Does the function dedicate EBP to being a frame ptr? + if (RI->hasFP(MF)) + // If so, don't allocate EBP. + return begin() + 6; + else + // If not, any reg in this class is ok. + return begin() + 7; + } + } + }]; +} + +// GR64_NOSP - GR64 registers except RSP (and RIP). +def GR64_NOSP : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + RBX, R14, R15, R12, R13, RBP]> { + let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR64_NOSPClass::iterator + GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return begin(); // None of these are allocatable in 32-bit. + if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + return end()-1; // If so, don't allocate RBP else - return X86_GR64_NOREX_AO; + return end(); // If not, any reg in this class is ok. } + }]; +} - GR64_NOREXClass::iterator - GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { +// GR64_NOREX_NOSP - GR64_NOREX registers except RSP. +def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, + [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> { + let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX]; + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + GR64_NOREX_NOSPClass::iterator + GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + // Does the function dedicate RBP to being a frame ptr? if (RI->hasFP(MF)) - return X86_GR64_NOREX_AO_fp + - (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned)); + // If so, don't allocate RBP. + return end() - 1; else - return X86_GR64_NOREX_AO + - (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned)); + // If not, any reg in this class is ok. + return end(); } }]; } // A class to support the 'A' assembler constraint: EAX then EDX. -def GRAD : RegisterClass<"X86", [i32], 32, [EAX, EDX]>; +def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> { + let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD]; +} // Scalar SSE2 floating point registers. def FR32 : RegisterClass<"X86", [f32], 32, diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h index b225f480e4ed9..990962dc4173c 100644 --- a/lib/Target/X86/X86Relocations.h +++ b/lib/Target/X86/X86Relocations.h @@ -20,21 +20,31 @@ namespace llvm { namespace X86 { /// RelocationType - An enum for the x86 relocation codes. Note that /// the terminology here doesn't follow x86 convention - word means - /// 32-bit and dword means 64-bit. + /// 32-bit and dword means 64-bit. The relocations will be treated + /// by JIT or ObjectCode emitters, this is transparent to the x86 code + /// emitter but JIT and ObjectCode will treat them differently enum RelocationType { - // reloc_pcrel_word - PC relative relocation, add the relocated value to - // the value already in memory, after we adjust it for where the PC is. + /// reloc_pcrel_word - PC relative relocation, add the relocated value to + /// the value already in memory, after we adjust it for where the PC is. reloc_pcrel_word = 0, - // reloc_picrel_word - PIC base relative relocation, add the relocated - // value to the value already in memory, after we adjust it for where the - // PIC base is. + /// reloc_picrel_word - PIC base relative relocation, add the relocated + /// value to the value already in memory, after we adjust it for where the + /// PIC base is. reloc_picrel_word = 1, - - // reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just - // add the relocated value to the value already in memory. + + /// reloc_absolute_word - absolute relocation, just add the relocated + /// value to the value already in memory. reloc_absolute_word = 2, - reloc_absolute_dword = 3 + + /// reloc_absolute_word_sext - absolute relocation, just add the relocated + /// value to the value already in memory. In object files, it represents a + /// value which must be sign-extended when resolving the relocation. + reloc_absolute_word_sext = 3, + + /// reloc_absolute_dword - absolute relocation, just add the relocated + /// value to the value already in memory. + reloc_absolute_dword = 4 }; } } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 8506fa66a645a..fb76aeb05556a 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -13,80 +13,111 @@ #define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" +#include "X86InstrInfo.h" #include "X86GenSubtarget.inc" -#include "llvm/Module.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; #if defined(_MSC_VER) - #include +#include #endif -static cl::opt -AsmWriterFlavor("x86-asm-syntax", cl::init(X86Subtarget::Unset), - cl::desc("Choose style of code to emit from X86 backend:"), - cl::values( - clEnumValN(X86Subtarget::ATT, "att", "Emit AT&T-style assembly"), - clEnumValN(X86Subtarget::Intel, "intel", "Emit Intel-style assembly"), - clEnumValEnd)); - - -/// True if accessing the GV requires an extra load. For Windows, dllimported -/// symbols are indirect, loading the value at address GV rather then the -/// value of GV itself. This means that the GlobalAddress must be in the base -/// or index register of the address, not the GV offset field. -bool X86Subtarget::GVRequiresExtraLoad(const GlobalValue* GV, - const TargetMachine& TM, - bool isDirectCall) const -{ - // FIXME: PIC - if (TM.getRelocationModel() != Reloc::Static && - TM.getCodeModel() != CodeModel::Large) { +/// ClassifyGlobalReference - Classify a global variable reference for the +/// current subtarget according to how we should reference it in a non-pcrel +/// context. +unsigned char X86Subtarget:: +ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { + // DLLImport only exists on windows, it is implemented as a load from a + // DLLIMPORT stub. + if (GV->hasDLLImportLinkage()) + return X86II::MO_DLLIMPORT; + + // GV with ghost linkage (in JIT lazy compilation mode) do not require an + // extra load from stub. + bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + + // X86-64 in PIC mode. + if (isPICStyleRIPRel()) { + // Large model never uses stubs. + if (TM.getCodeModel() == CodeModel::Large) + return X86II::MO_NO_FLAG; + if (isTargetDarwin()) { - if (isDirectCall) - return false; - bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); - if (GV->hasHiddenVisibility() && - (Is64Bit || (!isDecl && !GV->hasCommonLinkage()))) - // If symbol visibility is hidden, the extra load is not needed if - // target is x86-64 or the symbol is definitely defined in the current - // translation unit. - return false; - return !isDirectCall && (isDecl || GV->isWeakForLinker()); - } else if (isTargetELF()) { + // If symbol visibility is hidden, the extra load is not needed if + // target is x86-64 or the symbol is definitely defined in the current + // translation unit. + if (GV->hasDefaultVisibility() && + (isDecl || GV->isWeakForLinker())) + return X86II::MO_GOTPCREL; + } else { + assert(isTargetELF() && "Unknown rip-relative target"); + // Extra load is needed for all externally visible. - if (isDirectCall) - return false; - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - return false; - return true; - } else if (isTargetCygMing() || isTargetWindows()) { - return (GV->hasDLLImportLinkage()); + if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility()) + return X86II::MO_GOTPCREL; } + + return X86II::MO_NO_FLAG; } - return false; -} + + if (isPICStyleGOT()) { // 32-bit ELF targets. + // Extra load is needed for all externally visible. + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + return X86II::MO_GOTOFF; + return X86II::MO_GOT; + } + + if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode. + // Determine whether we have a stub reference and/or whether the reference + // is relative to the PIC base or not. + + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (!isDecl && !GV->isWeakForLinker()) + return X86II::MO_PIC_BASE_OFFSET; -/// True if accessing the GV requires a register. This is a superset of the -/// cases where GVRequiresExtraLoad is true. Some variations of PIC require -/// a register, but not an extra load. -bool X86Subtarget::GVRequiresRegister(const GlobalValue *GV, - const TargetMachine& TM, - bool isDirectCall) const -{ - if (GVRequiresExtraLoad(GV, TM, isDirectCall)) - return true; - // Code below here need only consider cases where GVRequiresExtraLoad - // returns false. - if (TM.getRelocationModel() == Reloc::PIC_) - return !isDirectCall && - (GV->hasLocalLinkage() || GV->hasExternalLinkage()); - return false; + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return X86II::MO_DARWIN_NONLAZY_PIC_BASE; + + // If symbol visibility is hidden, we have a stub for common symbol + // references and external declarations. + if (isDecl || GV->hasCommonLinkage()) { + // Hidden $non_lazy_ptr reference. + return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE; + } + + // Otherwise, no stub. + return X86II::MO_PIC_BASE_OFFSET; + } + + if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode. + // Determine whether we have a stub reference. + + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (!isDecl && !GV->isWeakForLinker()) + return X86II::MO_NO_FLAG; + + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return X86II::MO_DARWIN_NONLAZY; + + // Otherwise, no stub. + return X86II::MO_NO_FLAG; + } + + // Direct static reference to global. + return X86II::MO_NO_FLAG; } + /// getBZeroEntry - This function returns the name of a function which has an /// interface like the non-standard bzero function, if such a function exists on /// the current subtarget and it is considered prefereable over memset with zero @@ -120,9 +151,9 @@ unsigned X86Subtarget::getSpecialAddressLatency() const { /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the /// specified arguments. If we can't run cpuid on the host, return true. -bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, - unsigned *rECX, unsigned *rEDX) { -#if defined(__x86_64__) || defined(_M_AMD64) +static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) #if defined(__GNUC__) // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. asm ("movq\t%%rbx, %%rsi\n\t" @@ -192,18 +223,19 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { char c[12]; } text; - if (X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) + if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1)) return; - X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); + GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 23) & 0x1) X86SSELevel = MMX; - if ((EDX >> 25) & 0x1) X86SSELevel = SSE1; - if ((EDX >> 26) & 0x1) X86SSELevel = SSE2; - if (ECX & 0x1) X86SSELevel = SSE3; - if ((ECX >> 9) & 0x1) X86SSELevel = SSSE3; - if ((ECX >> 19) & 0x1) X86SSELevel = SSE41; - if ((ECX >> 20) & 0x1) X86SSELevel = SSE42; + if ((EDX >> 15) & 1) HasCMov = true; + if ((EDX >> 23) & 1) X86SSELevel = MMX; + if ((EDX >> 25) & 1) X86SSELevel = SSE1; + if ((EDX >> 26) & 1) X86SSELevel = SSE2; + if (ECX & 0x1) X86SSELevel = SSE3; + if ((ECX >> 9) & 1) X86SSELevel = SSSE3; + if ((ECX >> 19) & 1) X86SSELevel = SSE41; + if ((ECX >> 20) & 1) X86SSELevel = SSE42; bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; @@ -218,7 +250,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { DetectFamilyModel(EAX, Family, Model); IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13); - X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); HasX86_64 = (EDX >> 29) & 0x1; HasSSE4A = IsAMD && ((ECX >> 6) & 0x1); HasFMA4 = IsAMD && ((ECX >> 16) & 0x1); @@ -227,13 +259,13 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { static const char *GetCurrentX86CPU() { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) + if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) return "generic"; unsigned Family = 0; unsigned Model = 0; DetectFamilyModel(EAX, Family, Model); - X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); bool Em64T = (EDX >> 29) & 0x1; bool HasSSE3 = (ECX & 0x1); @@ -242,7 +274,7 @@ static const char *GetCurrentX86CPU() { char c[12]; } text; - X86::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); + GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); if (memcmp(text.c, "GenuineIntel", 12) == 0) { switch (Family) { case 3: @@ -319,9 +351,7 @@ static const char *GetCurrentX86CPU() { } case 15: if (HasSSE3) { - switch (Model) { - default: return "k8-sse3"; - } + return "k8-sse3"; } else { switch (Model) { case 1: return "opteron"; @@ -330,9 +360,7 @@ static const char *GetCurrentX86CPU() { } } case 16: - switch (Model) { - default: return "amdfam10"; - } + return "amdfam10"; default: return "generic"; } @@ -341,11 +369,12 @@ static const char *GetCurrentX86CPU() { } } -X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) - : AsmFlavor(AsmWriterFlavor) - , PICStyle(PICStyles::None) +X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, + bool is64Bit) + : PICStyle(PICStyles::None) , X86SSELevel(NoMMXSSE) , X863DNowLevel(NoThreeDNow) + , HasCMov(false) , HasX86_64(false) , HasSSE4A(false) , HasAVX(false) @@ -384,15 +413,14 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) if (Is64Bit) HasX86_64 = true; - DOUT << "Subtarget features: SSELevel " << X86SSELevel - << ", 3DNowLevel " << X863DNowLevel - << ", 64bit " << HasX86_64 << "\n"; + DEBUG(errs() << "Subtarget features: SSELevel " << X86SSELevel + << ", 3DNowLevel " << X863DNowLevel + << ", 64bit " << HasX86_64 << "\n"); assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); // Set the boolean corresponding to the current target triple, or the default // if one cannot be determined, to true. - const std::string& TT = M.getTargetTriple(); if (TT.length() > 5) { size_t Pos; if ((Pos = TT.find("-darwin")) != std::string::npos) { @@ -415,38 +443,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) TargetType = isWindows; } else if (TT.find("windows") != std::string::npos) { TargetType = isWindows; - } - else if (TT.find("-cl") != std::string::npos) { + } else if (TT.find("-cl") != std::string::npos) { TargetType = isDarwin; DarwinVers = 9; } - } else if (TT.empty()) { -#if defined(__CYGWIN__) - TargetType = isCygwin; -#elif defined(__MINGW32__) || defined(__MINGW64__) - TargetType = isMingw; -#elif defined(__APPLE__) - TargetType = isDarwin; -#if __APPLE_CC__ > 5400 - DarwinVers = 9; // GCC 5400+ is Leopard. -#else - DarwinVers = 8; // Minimum supported darwin is Tiger. -#endif - -#elif defined(_WIN32) || defined(_WIN64) - TargetType = isWindows; -#elif defined(__linux__) - // Linux doesn't imply ELF, but we don't currently support anything else. - TargetType = isELF; - IsLinux = true; -#endif - } - - // If the asm syntax hasn't been overridden on the command line, use whatever - // the target wants. - if (AsmFlavor == X86Subtarget::Unset) { - AsmFlavor = (TargetType == isWindows) - ? X86Subtarget::Intel : X86Subtarget::ATT; } // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64 diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 0d1434f8e9998..a2e368de6f0e8 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -18,23 +18,22 @@ #include namespace llvm { -class Module; class GlobalValue; class TargetMachine; +/// PICStyles - The X86 backend supports a number of different styles of PIC. +/// namespace PICStyles { enum Style { - Stub, GOT, RIPRel, WinPIC, None + StubPIC, // Used on i386-darwin in -fPIC mode. + StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode. + GOT, // Used on many 32-bit unices in -fPIC mode. + RIPRel, // Used on X86-64 when not in -static mode. + None // Set when in -static mode (not PIC or DynamicNoPIC mode). }; } class X86Subtarget : public TargetSubtarget { -public: - enum AsmWriterFlavorTy { - // Note: This numbering has to match the GCC assembler dialects for inline - // asm alternatives to work right. - ATT = 0, Intel = 1, Unset - }; protected: enum X86SSEEnum { NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42 @@ -44,10 +43,6 @@ protected: NoThreeDNow, ThreeDNow, ThreeDNowA }; - /// AsmFlavor - Which x86 asm dialect to use. - /// - AsmWriterFlavorTy AsmFlavor; - /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; @@ -60,6 +55,10 @@ protected: /// X863DNowEnum X863DNowLevel; + /// HasCMov - True if this processor has conditional move instructions + /// (generally pentium pro+). + bool HasCMov; + /// HasX86_64 - True if the processor supports X86-64 instructions. /// bool HasX86_64; @@ -95,7 +94,7 @@ protected: unsigned MaxInlineSizeThreshold; private: - /// Is64Bit - True if the processor supports 64-bit instructions and module + /// Is64Bit - True if the processor supports 64-bit instructions and /// pointer size is 64 bit. bool Is64Bit; @@ -105,9 +104,9 @@ public: } TargetType; /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - X86Subtarget(const Module &M, const std::string &FS, bool is64Bit); + X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit); /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every @@ -145,66 +144,67 @@ public: bool hasAVX() const { return HasAVX; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } - bool isBTMemSlow() const { return IsBTMemSlow; } - unsigned getAsmFlavor() const { - return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0; - } - - bool isFlavorAtt() const { return AsmFlavor == ATT; } - bool isFlavorIntel() const { return AsmFlavor == Intel; } - bool isTargetDarwin() const { return TargetType == isDarwin; } - bool isTargetELF() const { - return TargetType == isELF; - } + bool isTargetELF() const { return TargetType == isELF; } + bool isTargetWindows() const { return TargetType == isWindows; } bool isTargetMingw() const { return TargetType == isMingw; } - bool isTargetCygMing() const { return (TargetType == isMingw || - TargetType == isCygwin); } bool isTargetCygwin() const { return TargetType == isCygwin; } + bool isTargetCygMing() const { + return TargetType == isMingw || TargetType == isCygwin; + } + + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. + bool isTargetCOFF() const { + return TargetType == isMingw || TargetType == isCygwin || + TargetType == isWindows; + } + bool isTargetWin64() const { - return (Is64Bit && (TargetType == isMingw || TargetType == isWindows)); + return Is64Bit && (TargetType == isMingw || TargetType == isWindows); } std::string getDataLayout() const { const char *p; if (is64Bit()) p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128"; - else { - if (isTargetDarwin()) - p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128"; - else - p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"; - } + else if (isTargetDarwin()) + p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128"; + else + p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"; return std::string(p); } bool isPICStyleSet() const { return PICStyle != PICStyles::None; } bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } - bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; } bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } - bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; } + + bool isPICStyleStubPIC() const { + return PICStyle == PICStyles::StubPIC; + } + + bool isPICStyleStubNoDynamic() const { + return PICStyle == PICStyles::StubDynamicNoPIC; + } + bool isPICStyleStubAny() const { + return PICStyle == PICStyles::StubDynamicNoPIC || + PICStyle == PICStyles::StubPIC; } - /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. + /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, + /// 10 = Snow Leopard, etc. unsigned getDarwinVers() const { return DarwinVers; } /// isLinux - Return true if the target is "Linux". bool isLinux() const { return IsLinux; } - /// True if accessing the GV requires an extra load. For Windows, dllimported - /// symbols are indirect, loading the value at address GV rather then the - /// value of GV itself. This means that the GlobalAddress must be in the base - /// or index register of the address, not the GV offset field. - bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM, - bool isDirectCall) const; - - /// True if accessing the GV requires a register. This is a superset of the - /// cases where GVRequiresExtraLoad is true. Some variations of PIC require - /// a register, but not an extra load. - bool GVRequiresRegister(const GlobalValue* GV, const TargetMachine& TM, - bool isDirectCall) const; + + /// ClassifyGlobalReference - Classify a global variable reference for the + /// current subtarget according to how we should reference it in a non-pcrel + /// context. + unsigned char ClassifyGlobalReference(const GlobalValue *GV, + const TargetMachine &TM)const; /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. @@ -224,13 +224,6 @@ public: unsigned getSpecialAddressLatency() const; }; -namespace X86 { - /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in - /// the specified arguments. If we can't run cpuid on the host, return true. - bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, - unsigned *rECX, unsigned *rEDX); -} - } // End llvm namespace #endif diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index b000914c92039..a61de1cd182ac 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -11,172 +11,134 @@ // //===----------------------------------------------------------------------===// -#include "X86TargetAsmInfo.h" +#include "X86MCAsmInfo.h" #include "X86TargetMachine.h" #include "X86.h" -#include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -/// X86TargetMachineModule - Note that this is used on hosts that cannot link -/// in a library unless there are references into the library. In particular, -/// it seems that it is not possible to get things to work on Win32 without -/// this. Though it is unused, do not remove it. -extern "C" int X86TargetMachineModule; -int X86TargetMachineModule = 0; - -// Register the target. -static RegisterTarget -X("x86", "32-bit X86: Pentium-Pro and above"); -static RegisterTarget -Y("x86-64", "64-bit X86: EM64T and AMD64"); - -// Force static initialization. -extern "C" void LLVMInitializeX86Target() { } - -// No assembler printer by default -X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0; - -const TargetAsmInfo *X86TargetMachine::createTargetAsmInfo() const { - if (Subtarget.isFlavorIntel()) - return new X86WinTargetAsmInfo(*this); - else - switch (Subtarget.TargetType) { - case X86Subtarget::isDarwin: - return new X86DarwinTargetAsmInfo(*this); - case X86Subtarget::isELF: - return new X86ELFTargetAsmInfo(*this); - case X86Subtarget::isMingw: - case X86Subtarget::isCygwin: - return new X86COFFTargetAsmInfo(*this); - case X86Subtarget::isWindows: - return new X86WinTargetAsmInfo(*this); - default: - return new X86GenericTargetAsmInfo(*this); - } -} - -unsigned X86_32TargetMachine::getJITMatchQuality() { -#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) - return 10; -#endif - return 0; -} - -unsigned X86_64TargetMachine::getJITMatchQuality() { -#if defined(__x86_64__) || defined(_M_AMD64) - return 10; -#endif - return 0; +static const MCAsmInfo *createMCAsmInfo(const Target &T, + const StringRef &TT) { + Triple TheTriple(TT); + switch (TheTriple.getOS()) { + case Triple::Darwin: + return new X86MCAsmInfoDarwin(TheTriple); + case Triple::MinGW32: + case Triple::MinGW64: + case Triple::Cygwin: + return new X86MCAsmInfoCOFF(TheTriple); + case Triple::Win32: + return new X86WinMCAsmInfo(TheTriple); + default: + return new X86ELFMCAsmInfo(TheTriple); + } } -unsigned X86_32TargetMachine::getModuleMatchQuality(const Module &M) { - // We strongly match "i[3-9]86-*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && - TT[4] == '-' && TT[1] - '3' < 6) - return 20; - // If the target triple is something non-X86, we don't match. - if (!TT.empty()) return 0; +extern "C" void LLVMInitializeX86Target() { + // Register the target. + RegisterTargetMachine X(TheX86_32Target); + RegisterTargetMachine Y(TheX86_64Target); - if (M.getEndianness() == Module::LittleEndian && - M.getPointerSize() == Module::Pointer32) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target + // Register the target asm info. + RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo); + RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo); - return getJITMatchQuality()/2; + // Register the code emitter. + TargetRegistry::RegisterCodeEmitter(TheX86_32Target, createX86MCCodeEmitter); + TargetRegistry::RegisterCodeEmitter(TheX86_64Target, createX86MCCodeEmitter); } -unsigned X86_64TargetMachine::getModuleMatchQuality(const Module &M) { - // We strongly match "x86_64-*". - std::string TT = M.getTargetTriple(); - if (TT.size() >= 7 && TT[0] == 'x' && TT[1] == '8' && TT[2] == '6' && - TT[3] == '_' && TT[4] == '6' && TT[5] == '4' && TT[6] == '-') - return 20; - - // We strongly match "amd64-*". - if (TT.size() >= 6 && TT[0] == 'a' && TT[1] == 'm' && TT[2] == 'd' && - TT[3] == '6' && TT[4] == '4' && TT[5] == '-') - return 20; - - // If the target triple is something non-X86-64, we don't match. - if (!TT.empty()) return 0; - - if (M.getEndianness() == Module::LittleEndian && - M.getPointerSize() == Module::Pointer64) - return 10; // Weak match - else if (M.getEndianness() != Module::AnyEndianness || - M.getPointerSize() != Module::AnyPointerSize) - return 0; // Match for some other target - return getJITMatchQuality()/2; -} - -X86_32TargetMachine::X86_32TargetMachine(const Module &M, const std::string &FS) - : X86TargetMachine(M, FS, false) { +X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : X86TargetMachine(T, TT, FS, false) { } -X86_64TargetMachine::X86_64TargetMachine(const Module &M, const std::string &FS) - : X86TargetMachine(M, FS, true) { +X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : X86TargetMachine(T, TT, FS, true) { } -/// X86TargetMachine ctor - Create an ILP32 architecture model +/// X86TargetMachine ctor - Create an X86 target. /// -X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, - bool is64Bit) - : Subtarget(M, FS, is64Bit), +X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool is64Bit) + : LLVMTargetMachine(T, TT), + Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, - Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), + Subtarget.getStackAlignment(), + (Subtarget.isTargetWin64() ? -40 : + (Subtarget.is64Bit() ? -8 : -4))), InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); - // FIXME: Correctly select PIC model for Win64 stuff + + // If no relocation model was picked, default as appropriate for the target. if (getRelocationModel() == Reloc::Default) { - if (Subtarget.isTargetDarwin() || - (Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64())) - setRelocationModel(Reloc::DynamicNoPIC); - else + if (!Subtarget.isTargetDarwin()) setRelocationModel(Reloc::Static); + else if (Subtarget.is64Bit()) + setRelocationModel(Reloc::PIC_); + else + setRelocationModel(Reloc::DynamicNoPIC); } - // ELF doesn't have a distinct dynamic-no-PIC model. Dynamic-no-PIC - // is defined as a model for code which may be used in static or - // dynamic executables but not necessarily a shared library. On ELF - // implement this by using the Static model. - if (Subtarget.isTargetELF() && - getRelocationModel() == Reloc::DynamicNoPIC) - setRelocationModel(Reloc::Static); - - if (Subtarget.is64Bit()) { - // No DynamicNoPIC support under X86-64. - if (getRelocationModel() == Reloc::DynamicNoPIC) + assert(getRelocationModel() != Reloc::Default && + "Relocation mode not picked"); + + // If no code model is picked, default to small. + if (getCodeModel() == CodeModel::Default) + setCodeModel(CodeModel::Small); + + // ELF and X86-64 don't have a distinct DynamicNoPIC model. DynamicNoPIC + // is defined as a model for code which may be used in static or dynamic + // executables but not necessarily a shared library. On X86-32 we just + // compile in -static mode, in x86-64 we use PIC. + if (getRelocationModel() == Reloc::DynamicNoPIC) { + if (is64Bit) setRelocationModel(Reloc::PIC_); - // Default X86-64 code model is small. - if (getCodeModel() == CodeModel::Default) - setCodeModel(CodeModel::Small); + else if (!Subtarget.isTargetDarwin()) + setRelocationModel(Reloc::Static); } - if (Subtarget.isTargetCygMing()) - Subtarget.setPICStyle(PICStyles::WinPIC); - else if (Subtarget.isTargetDarwin()) { + // If we are on Darwin, disallow static relocation model in X86-64 mode, since + // the Mach-O file format doesn't support it. + if (getRelocationModel() == Reloc::Static && + Subtarget.isTargetDarwin() && + is64Bit) + setRelocationModel(Reloc::PIC_); + + // Determine the PICStyle based on the target selected. + if (getRelocationModel() == Reloc::Static) { + // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. + Subtarget.setPICStyle(PICStyles::None); + } else if (Subtarget.isTargetCygMing()) { + Subtarget.setPICStyle(PICStyles::None); + } else if (Subtarget.isTargetDarwin()) { if (Subtarget.is64Bit()) Subtarget.setPICStyle(PICStyles::RIPRel); - else - Subtarget.setPICStyle(PICStyles::Stub); + else if (getRelocationModel() == Reloc::PIC_) + Subtarget.setPICStyle(PICStyles::StubPIC); + else { + assert(getRelocationModel() == Reloc::DynamicNoPIC); + Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC); + } } else if (Subtarget.isTargetELF()) { if (Subtarget.is64Bit()) Subtarget.setPICStyle(PICStyles::RIPRel); else Subtarget.setPICStyle(PICStyles::GOT); } + + // Finally, if we have "none" as our PIC style, force to static mode. + if (Subtarget.getPICStyle() == PICStyles::None) + setRelocationModel(Reloc::Static); } //===----------------------------------------------------------------------===// @@ -212,33 +174,16 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, return true; // -print-machineinstr should print after this. } -bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { - // FIXME: Move this somewhere else! - // On Darwin, override 64-bit static relocation to pic_ since the - // assembler doesn't support it. - if (DefRelocModel == Reloc::Static && - Subtarget.isTargetDarwin() && Subtarget.is64Bit() && - getCodeModel() == CodeModel::Small) - setRelocationModel(Reloc::PIC_); - - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, Verbose)); - return false; -} - bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { // FIXME: Move this to TargetJITInfo! // On Darwin, do not override 64-bit setting made in X86TargetMachine(). if (DefRelocModel == Reloc::Default && - (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) + (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { setRelocationModel(Reloc::Static); + Subtarget.setPICStyle(PICStyles::None); + } // 64-bit JIT places everything in the same buffer except external functions. // On Darwin, use small code model but hack the call instruction for @@ -251,24 +196,20 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } PM.add(createX86CodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } return false; } bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { // FIXME: Move this to TargetJITInfo! // On Darwin, do not override 64-bit setting made in X86TargetMachine(). if (DefRelocModel == Reloc::Default && - (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) + (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) { setRelocationModel(Reloc::Static); + Subtarget.setPICStyle(PICStyles::None); + } // 64-bit JIT places everything in the same buffer except external functions. // On Darwin, use small code model but hack the call instruction for @@ -281,40 +222,34 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } PM.add(createX86JITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } return false; } +bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + PM.add(createX86ObjectCodeEmitterPass(*this, OCE)); + return false; +} + bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE) { PM.add(createX86CodeEmitterPass(*this, MCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } - return false; } bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE) { PM.add(createX86JITCodeEmitterPass(*this, JCE)); - if (DumpAsm) { - assert(AsmPrinterCtor && "AsmPrinter was not linked in"); - if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, true)); - } - return false; } +bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE) { + PM.add(createX86ObjectCodeEmitterPass(*this, OCE)); + return false; +} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 90a5cc243d26a..b538408e8a45f 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -26,7 +26,7 @@ namespace llvm { -class raw_ostream; +class formatted_raw_ostream; class X86TargetMachine : public LLVMTargetMachine { X86Subtarget Subtarget; @@ -38,18 +38,9 @@ class X86TargetMachine : public LLVMTargetMachine { X86ELFWriterInfo ELFWriterInfo; Reloc::Model DefRelocModel; // Reloc model before it's overridden. -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - - // To avoid having target depend on the asmprinter stuff libraries, asmprinter - // set this functions to ctor pointer at startup time if they are linked in. - typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, - X86TargetMachine &tm, - bool verbose); - static AsmPrinterCtorFn AsmPrinterCtor; - public: - X86TargetMachine(const Module &M, const std::string &FS, bool is64Bit); + X86TargetMachine(const Target &T, const std::string &TT, + const std::string &FS, bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } @@ -66,50 +57,41 @@ public: return Subtarget.isTargetELF() ? &ELFWriterInfo : 0; } - static unsigned getModuleMatchQuality(const Module &M); - static unsigned getJITMatchQuality(); - - static void registerAsmPrinter(AsmPrinterCtorFn F) { - AsmPrinterCtor = F; - } - // Set up the pass pipeline. virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); + MachineCodeEmitter &MCE); virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE); + JITCodeEmitter &JCE); + virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + ObjectCodeEmitter &OCE); + virtual bool addSimpleCodeEmitter(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + MachineCodeEmitter &MCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, MachineCodeEmitter &MCE); + JITCodeEmitter &JCE); virtual bool addSimpleCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - bool DumpAsm, JITCodeEmitter &JCE); + ObjectCodeEmitter &OCE); }; /// X86_32TargetMachine - X86 32-bit target machine. /// class X86_32TargetMachine : public X86TargetMachine { public: - X86_32TargetMachine(const Module &M, const std::string &FS); - - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); + X86_32TargetMachine(const Target &T, const std::string &M, + const std::string &FS); }; /// X86_64TargetMachine - X86 64-bit target machine. /// class X86_64TargetMachine : public X86TargetMachine { public: - X86_64TargetMachine(const Module &M, const std::string &FS); - - static unsigned getJITMatchQuality(); - static unsigned getModuleMatchQuality(const Module &M); + X86_64TargetMachine(const Target &T, const std::string &TT, + const std::string &FS); }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp new file mode 100644 index 0000000000000..d39b3c4324205 --- /dev/null +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -0,0 +1,65 @@ +//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86TargetObjectFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Mangler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +using namespace llvm; + +const MCExpr *X8632_MachoTargetObjectFile:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const { + // The mach-o version of this method defaults to returning a stub reference. + IsIndirect = true; + IsPCRel = false; + + + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub gets + // emitted by the asmprinter. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str()); + const MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym); + if (StubSym == 0) { + Name.clear(); + Mang->getNameWithPrefix(Name, GV, false); + StubSym = getContext().GetOrCreateSymbol(Name.str()); + } + + return MCSymbolRefExpr::Create(Sym, getContext()); +} + +const MCExpr *X8664_MachoTargetObjectFile:: +getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const { + + // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which + // is an indirect pc-relative reference. + IsIndirect = true; + IsPCRel = true; + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, false); + Name += "@GOTPCREL"; + const MCExpr *Res = + MCSymbolRefExpr::Create(Name.str(), getContext()); + const MCExpr *Four = MCConstantExpr::Create(4, getContext()); + return MCBinaryExpr::CreateAdd(Res, Four, getContext()); +} + diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h new file mode 100644 index 0000000000000..377a93bb71529 --- /dev/null +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -0,0 +1,40 @@ +//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H +#define LLVM_TARGET_X86_TARGETOBJECTFILE_H + +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + + /// X8632_MachoTargetObjectFile - This TLOF implementation is used for + /// Darwin/x86-32. + class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { + public: + + virtual const MCExpr * + getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const; + }; + + /// X8664_MachoTargetObjectFile - This TLOF implementation is used for + /// Darwin/x86-64. + class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { + public: + + virtual const MCExpr * + getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, + bool &IsIndirect, bool &IsPCRel) const; + }; +} // end namespace llvm + +#endif diff --git a/lib/Target/XCore/AsmPrinter/CMakeLists.txt b/lib/Target/XCore/AsmPrinter/CMakeLists.txt new file mode 100644 index 0000000000000..7c7c2f4ded045 --- /dev/null +++ b/lib/Target/XCore/AsmPrinter/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMXCoreAsmPrinter + XCoreAsmPrinter.cpp + ) +add_dependencies(LLVMXCoreAsmPrinter XCoreCodeGenTable_gen) diff --git a/lib/Target/XCore/AsmPrinter/Makefile b/lib/Target/XCore/AsmPrinter/Makefile new file mode 100644 index 0000000000000..82dc1df95d3be --- /dev/null +++ b/lib/Target/XCore/AsmPrinter/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/XCore/AsmPrinter/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMXCoreAsmPrinter + +# Hack: we need to include 'main' XCore target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp new file mode 100644 index 0000000000000..e58edda0c5dc6 --- /dev/null +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -0,0 +1,374 @@ +//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the XAS-format XCore assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "XCore.h" +#include "XCoreInstrInfo.h" +#include "XCoreSubtarget.h" +#include "XCoreMCAsmInfo.h" +#include "XCoreTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DwarfWriter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/MathExtras.h" +#include +#include +using namespace llvm; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +static cl::opt MaxThreads("xcore-max-threads", cl::Optional, + cl::desc("Maximum number of threads (for emulation thread-local storage)"), + cl::Hidden, + cl::value_desc("number"), + cl::init(8)); + +namespace { + class VISIBILITY_HIDDEN XCoreAsmPrinter : public AsmPrinter { + const XCoreSubtarget &Subtarget; + public: + explicit XCoreAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, + const MCAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V), + Subtarget(TM.getSubtarget()) {} + + virtual const char *getPassName() const { + return "XCore Assembly Printer"; + } + + void printMemOperand(const MachineInstr *MI, int opNum); + void printOperand(const MachineInstr *MI, int opNum); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode); + + void emitGlobalDirective(const std::string &name); + void emitExternDirective(const std::string &name); + + void emitArrayBound(const std::string &name, const GlobalVariable *GV); + virtual void PrintGlobalVariable(const GlobalVariable *GV); + + void emitFunctionStart(MachineFunction &MF); + void emitFunctionEnd(MachineFunction &MF); + + void printInstruction(const MachineInstr *MI); // autogenerated. + static const char *getRegisterName(unsigned RegNo); + + void printMachineInstruction(const MachineInstr *MI); + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AsmPrinter::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + } + }; +} // end of anonymous namespace + +#include "XCoreGenAsmWriter.inc" + +void XCoreAsmPrinter:: +emitGlobalDirective(const std::string &name) +{ + O << MAI->getGlobalDirective() << name; + O << "\n"; +} + +void XCoreAsmPrinter:: +emitExternDirective(const std::string &name) +{ + O << "\t.extern\t" << name; + O << '\n'; +} + +void XCoreAsmPrinter:: +emitArrayBound(const std::string &name, const GlobalVariable *GV) +{ + assert(((GV->hasExternalLinkage() || + GV->hasWeakLinkage()) || + GV->hasLinkOnceLinkage()) && "Unexpected linkage"); + if (const ArrayType *ATy = dyn_cast( + cast(GV->getType())->getElementType())) + { + O << MAI->getGlobalDirective() << name << ".globound" << "\n"; + O << MAI->getSetDirective() << name << ".globound" << "," + << ATy->getNumElements() << "\n"; + if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) { + // TODO Use COMDAT groups for LinkOnceLinkage + O << MAI->getWeakDefDirective() << name << ".globound" << "\n"; + } + } +} + +void XCoreAsmPrinter::PrintGlobalVariable(const GlobalVariable *GV) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (!GV->hasInitializer() || + EmitSpecialLLVMGlobal(GV)) + return; + + const TargetData *TD = TM.getTargetData(); + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM)); + + std::string name = Mang->getMangledName(GV); + Constant *C = GV->getInitializer(); + unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType()); + + // Mark the start of the global + O << "\t.cc_top " << name << ".data," << name << "\n"; + + switch (GV->getLinkage()) { + case GlobalValue::AppendingLinkage: + llvm_report_error("AppendingLinkage is not supported by this target!"); + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::ExternalLinkage: + emitArrayBound(name, GV); + emitGlobalDirective(name); + // TODO Use COMDAT groups for LinkOnceLinkage + if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) { + O << MAI->getWeakDefDirective() << name << "\n"; + } + // FALL THROUGH + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: + break; + case GlobalValue::GhostLinkage: + llvm_unreachable("Should not have any unmaterialized functions!"); + case GlobalValue::DLLImportLinkage: + llvm_unreachable("DLLImport linkage is not supported by this target!"); + case GlobalValue::DLLExportLinkage: + llvm_unreachable("DLLExport linkage is not supported by this target!"); + default: + llvm_unreachable("Unknown linkage type!"); + } + + EmitAlignment(Align, GV, 2); + + unsigned Size = TD->getTypeAllocSize(C->getType()); + if (GV->isThreadLocal()) { + Size *= MaxThreads; + } + if (MAI->hasDotTypeDotSizeDirective()) { + O << "\t.type " << name << ",@object\n"; + O << "\t.size " << name << "," << Size << "\n"; + } + O << name << ":\n"; + + EmitGlobalConstant(C); + if (GV->isThreadLocal()) { + for (unsigned i = 1; i < MaxThreads; ++i) { + EmitGlobalConstant(C); + } + } + if (Size < 4) { + // The ABI requires that unsigned scalar types smaller than 32 bits + // are are padded to 32 bits. + EmitZeros(4 - Size); + } + + // Mark the end of the global + O << "\t.cc_bottom " << name << ".data\n"; +} + +/// Emit the directives on the start of functions +void XCoreAsmPrinter::emitFunctionStart(MachineFunction &MF) { + // Print out the label for the function. + const Function *F = MF.getFunction(); + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); + + // Mark the start of the function + O << "\t.cc_top " << CurrentFnName << ".function," << CurrentFnName << "\n"; + + switch (F->getLinkage()) { + default: llvm_unreachable("Unknown linkage type!"); + case Function::InternalLinkage: // Symbols default to internal. + case Function::PrivateLinkage: + case Function::LinkerPrivateLinkage: + break; + case Function::ExternalLinkage: + emitGlobalDirective(CurrentFnName); + break; + case Function::LinkOnceAnyLinkage: + case Function::LinkOnceODRLinkage: + case Function::WeakAnyLinkage: + case Function::WeakODRLinkage: + // TODO Use COMDAT groups for LinkOnceLinkage + O << MAI->getGlobalDirective() << CurrentFnName << "\n"; + O << MAI->getWeakDefDirective() << CurrentFnName << "\n"; + break; + } + // (1 << 1) byte aligned + EmitAlignment(MF.getAlignment(), F, 1); + if (MAI->hasDotTypeDotSizeDirective()) { + O << "\t.type " << CurrentFnName << ",@function\n"; + } + O << CurrentFnName << ":\n"; +} + +/// Emit the directives on the end of functions +void XCoreAsmPrinter:: +emitFunctionEnd(MachineFunction &MF) +{ + // Mark the end of the function + O << "\t.cc_bottom " << CurrentFnName << ".function\n"; +} + +/// runOnMachineFunction - This uses the printMachineInstruction() +/// method to print assembly for each instruction. +/// +bool XCoreAsmPrinter::runOnMachineFunction(MachineFunction &MF) +{ + this->MF = &MF; + + SetupMachineFunction(MF); + + // Print out constants referenced by the function + EmitConstantPool(MF.getConstantPool()); + + // Print out jump tables referenced by the function + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + + // Emit the function start directives + emitFunctionStart(MF); + + // Emit pre-function debug information. + DW->BeginFunction(&MF); + + // Print out code for the function. + for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + + // Print a label for the basic block. + if (I != MF.begin()) { + EmitBasicBlockStart(I); + } + + for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); + II != E; ++II) { + // Print the assembly for the instruction. + printMachineInstruction(II); + } + + // Each Basic Block is separated by a newline + O << '\n'; + } + + // Emit function end directives + emitFunctionEnd(MF); + + // Emit post-function debug information. + DW->EndFunction(&MF); + + // We didn't modify anything. + return false; +} + +void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum) +{ + printOperand(MI, opNum); + + if (MI->getOperand(opNum+1).isImm() + && MI->getOperand(opNum+1).getImm() == 0) + return; + + O << "+"; + printOperand(MI, opNum+1); +} + +void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { + const MachineOperand &MO = MI->getOperand(opNum); + switch (MO.getType()) { + case MachineOperand::MO_Register: + O << getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + O << MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + GetMBBSymbol(MO.getMBB()->getNumber())->print(O, MAI); + break; + case MachineOperand::MO_GlobalAddress: + O << Mang->getMangledName(MO.getGlobal()); + break; + case MachineOperand::MO_ExternalSymbol: + O << MO.getSymbolName(); + break; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + break; + default: + llvm_unreachable("not implemented"); + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode) { + printOperand(MI, OpNo); + return false; +} + +void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) { + ++EmittedInsts; + + processDebugLoc(MI, true); + + // Check for mov mnemonic + unsigned src, dst, srcSR, dstSR; + if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) { + O << "\tmov " << getRegisterName(dst) << ", "; + O << getRegisterName(src) << '\n'; + return; + } + printInstruction(MI); + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + EmitComments(*MI); + O << '\n'; + + processDebugLoc(MI, false); +} + +// Force static initialization. +extern "C" void LLVMInitializeXCoreAsmPrinter() { + RegisterAsmPrinter X(TheXCoreTarget); +} diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index a7aba14a7a14c..0965323b998ad 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -11,13 +11,14 @@ tablegen(XCoreGenCallingConv.inc -gen-callingconv) tablegen(XCoreGenSubtarget.inc -gen-subtarget) add_llvm_target(XCore - XCoreAsmPrinter.cpp + MCSectionXCore.cpp XCoreFrameInfo.cpp XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp XCoreISelLowering.cpp + XCoreMCAsmInfo.cpp XCoreRegisterInfo.cpp XCoreSubtarget.cpp - XCoreTargetAsmInfo.cpp XCoreTargetMachine.cpp + XCoreTargetObjectFile.cpp ) diff --git a/lib/Target/XCore/MCSectionXCore.cpp b/lib/Target/XCore/MCSectionXCore.cpp new file mode 100644 index 0000000000000..5acceafe9ea3b --- /dev/null +++ b/lib/Target/XCore/MCSectionXCore.cpp @@ -0,0 +1,35 @@ +//===- MCSectionXCore.cpp - XCore-specific section representation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MCSectionXCore class. +// +//===----------------------------------------------------------------------===// + +#include "MCSectionXCore.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +MCSectionXCore * +MCSectionXCore::Create(const StringRef &Section, unsigned Type, + unsigned Flags, SectionKind K, + bool isExplicit, MCContext &Ctx) { + return new (Ctx) MCSectionXCore(Section, Type, Flags, K, isExplicit); +} + + +/// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp +/// section flags. +void MCSectionXCore::PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI, + raw_ostream &OS) const { + if (getFlags() & MCSectionXCore::SHF_CP_SECTION) + OS << 'c'; + if (getFlags() & MCSectionXCore::SHF_DP_SECTION) + OS << 'd'; +} diff --git a/lib/Target/XCore/MCSectionXCore.h b/lib/Target/XCore/MCSectionXCore.h new file mode 100644 index 0000000000000..02f8f95572c8b --- /dev/null +++ b/lib/Target/XCore/MCSectionXCore.h @@ -0,0 +1,54 @@ +//===- MCSectionXCore.h - XCore-specific section representation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MCSectionXCore class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCSECTION_XCORE_H +#define LLVM_MCSECTION_XCORE_H + +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + +class MCSectionXCore : public MCSectionELF { + MCSectionXCore(const StringRef &Section, unsigned Type, unsigned Flags, + SectionKind K, bool isExplicit) + : MCSectionELF(Section, Type, Flags, K, isExplicit) {} + +public: + + enum { + /// SHF_CP_SECTION - All sections with the "c" flag are grouped together + /// by the linker to form the constant pool and the cp register is set to + /// the start of the constant pool by the boot code. + SHF_CP_SECTION = FIRST_TARGET_DEP_FLAG, + + /// SHF_DP_SECTION - All sections with the "d" flag are grouped together + /// by the linker to form the data section and the dp register is set to + /// the start of the section by the boot code. + SHF_DP_SECTION = FIRST_TARGET_DEP_FLAG << 1 + }; + + static MCSectionXCore *Create(const StringRef &Section, unsigned Type, + unsigned Flags, SectionKind K, + bool isExplicit, MCContext &Ctx); + + + /// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp + /// section flags. + virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI, + raw_ostream &OS) const; + +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile index 568df70ab63a9..bd3b52a7ac10f 100644 --- a/lib/Target/XCore/Makefile +++ b/lib/Target/XCore/Makefile @@ -7,7 +7,7 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../.. -LIBRARYNAME = LLVMXCore +LIBRARYNAME = LLVMXCoreCodeGen TARGET = XCore # Make sure that tblgen is run, first thing. @@ -17,5 +17,7 @@ BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \ XCoreGenDAGISel.inc XCoreGenCallingConv.inc \ XCoreGenSubtarget.inc +DIRS = AsmPrinter TargetInfo + include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000000000..0a568de1624b9 --- /dev/null +++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMXCoreInfo + XCoreTargetInfo.cpp + ) + +add_dependencies(LLVMXCoreInfo XCoreTable_gen) diff --git a/lib/Target/XCore/TargetInfo/Makefile b/lib/Target/XCore/TargetInfo/Makefile new file mode 100644 index 0000000000000..07473d223f6bc --- /dev/null +++ b/lib/Target/XCore/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/XCore/TargetInfo/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMXCoreInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp new file mode 100644 index 0000000000000..7aa8965c4ac68 --- /dev/null +++ b/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp @@ -0,0 +1,19 @@ +//===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "XCore.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheXCoreTarget; + +extern "C" void LLVMInitializeXCoreTargetInfo() { + RegisterTarget X(TheXCoreTarget, "xcore", "XCore"); +} diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h index d95aab3979a50..8937fbe123c64 100644 --- a/lib/Target/XCore/XCore.h +++ b/lib/Target/XCore/XCore.h @@ -21,12 +21,12 @@ namespace llvm { class FunctionPass; class TargetMachine; class XCoreTargetMachine; - class raw_ostream; + class formatted_raw_ostream; FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM); - FunctionPass *createXCoreCodePrinterPass(raw_ostream &OS, - XCoreTargetMachine &TM, - bool Verbose); + + extern Target TheXCoreTarget; + } // end namespace llvm; // Defines symbolic names for XCore registers. This defines a mapping from diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td index 7a2dcdbf9fe58..b07445dd386f2 100644 --- a/lib/Target/XCore/XCore.td +++ b/lib/Target/XCore/XCore.td @@ -29,18 +29,6 @@ def XCoreInstrInfo : InstrInfo { let TSFlagsShifts = []; } -//===----------------------------------------------------------------------===// -// XCore Subtarget features. -//===----------------------------------------------------------------------===// - -def FeatureXS1A - : SubtargetFeature<"xs1a", "IsXS1A", "true", - "Enable XS1A instructions">; - -def FeatureXS1B - : SubtargetFeature<"xs1b", "IsXS1B", "true", - "Enable XS1B instructions">; - //===----------------------------------------------------------------------===// // XCore processors supported. //===----------------------------------------------------------------------===// @@ -48,9 +36,8 @@ def FeatureXS1B class Proc Features> : Processor; -def : Proc<"generic", [FeatureXS1A]>; -def : Proc<"xs1a-generic", [FeatureXS1A]>; -def : Proc<"xs1b-generic", [FeatureXS1B]>; +def : Proc<"generic", []>; +def : Proc<"xs1b-generic", []>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index eed34a4b635b1..860b72f9402ea 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -19,6 +19,7 @@ #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" +#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -28,6 +29,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; @@ -159,69 +162,62 @@ InstructionSelect() { SDNode *XCoreDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - MVT NVT = N->getValueType(0); + EVT NVT = N->getValueType(0); if (NVT == MVT::i32) { switch (N->getOpcode()) { default: break; case ISD::Constant: { if (Predicate_immMskBitp(N)) { SDValue MskSize = Transform_msksize_xform(N); - return CurDAG->getTargetNode(XCore::MKMSK_rus, dl, MVT::i32, MskSize); + return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, + MVT::i32, MskSize); } else if (! Predicate_immU16(N)) { unsigned Val = cast(N)->getZExtValue(); SDValue CPIdx = - CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val), + CurDAG->getTargetConstantPool(ConstantInt::get( + Type::getInt32Ty(*CurDAG->getContext()), Val), TLI.getPointerTy()); - return CurDAG->getTargetNode(XCore::LDWCP_lru6, dl, MVT::i32, - MVT::Other, CPIdx, - CurDAG->getEntryNode()); + return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, + MVT::Other, CPIdx, + CurDAG->getEntryNode()); } break; } case ISD::SMUL_LOHI: { // FIXME fold addition into the macc instruction - if (!Subtarget.isXS1A()) { - SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32, - CurDAG->getTargetConstant(0, MVT::i32)), 0); - SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) }; - SDNode *ResNode = CurDAG->getTargetNode(XCore::MACCS_l4r, dl, - MVT::i32, MVT::i32, Ops, 4); - ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); - ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); - return NULL; - } - break; + SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32, + CurDAG->getTargetConstant(0, MVT::i32)), 0); + SDValue Ops[] = { Zero, Zero, Op.getOperand(0), Op.getOperand(1) }; + SDNode *ResNode = CurDAG->getMachineNode(XCore::MACCS_l4r, dl, + MVT::i32, MVT::i32, Ops, 4); + ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); + ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); + return NULL; } case ISD::UMUL_LOHI: { // FIXME fold addition into the macc / lmul instruction - SDValue Zero(CurDAG->getTargetNode(XCore::LDC_ru6, dl, MVT::i32, + SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32, CurDAG->getTargetConstant(0, MVT::i32)), 0); SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), Zero, Zero }; - SDNode *ResNode = CurDAG->getTargetNode(XCore::LMUL_l6r, dl, MVT::i32, - MVT::i32, Ops, 4); + SDNode *ResNode = CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, + MVT::i32, Ops, 4); ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1)); ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0)); return NULL; } case XCoreISD::LADD: { - if (!Subtarget.isXS1A()) { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2) }; - return CurDAG->getTargetNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - break; + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + Op.getOperand(2) }; + return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); } case XCoreISD::LSUB: { - if (!Subtarget.isXS1A()) { - SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2) }; - return CurDAG->getTargetNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - break; + SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1), + Op.getOperand(2) }; + return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); } // Other cases are autogenerated. } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index cc11d32481395..5ef56c9ff299f 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -16,6 +16,7 @@ #include "XCoreISelLowering.h" #include "XCoreMachineFunctionInfo.h" #include "XCore.h" +#include "XCoreTargetObjectFile.h" #include "XCoreTargetMachine.h" #include "XCoreSubtarget.h" #include "llvm/DerivedTypes.h" @@ -32,6 +33,8 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/VectorExtras.h" #include #include @@ -48,12 +51,14 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper"; case XCoreISD::STWSP : return "XCoreISD::STWSP"; case XCoreISD::RETSP : return "XCoreISD::RETSP"; + case XCoreISD::LADD : return "XCoreISD::LADD"; + case XCoreISD::LSUB : return "XCoreISD::LSUB"; default : return NULL; } } XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) - : TargetLowering(XTM), + : TargetLowering(XTM, new XCoreTargetObjectFile()), TM(XTM), Subtarget(*XTM.getSubtargetImpl()) { @@ -67,8 +72,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setIntDivIsCheap(false); setShiftAmountType(MVT::i32); - // shl X, 32 == 0 - setShiftAmountFlavor(Extend); setStackPointerRegisterToSaveRestore(XCore::SP); setSchedulingPreference(SchedulingForRegPressure); @@ -88,13 +91,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); // 64bit - if (!Subtarget.isXS1A()) { - setOperationAction(ISD::ADD, MVT::i64, Custom); - setOperationAction(ISD::SUB, MVT::i64, Custom); - } - if (Subtarget.isXS1A()) { - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - } + setOperationAction(ISD::ADD, MVT::i64, Custom); + setOperationAction(ISD::SUB, MVT::i64, Custom); setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); @@ -112,9 +110,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::JumpTable, MVT::i32, Custom); - // RET must be custom lowered, to meet ABI requirements - setOperationAction(ISD::RET, MVT::Other, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); // Thread Local Storage @@ -130,7 +125,11 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); - + + // Custom expand misaligned loads / stores. + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + // Varargs setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand); @@ -145,19 +144,24 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Debug setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); + + maxStoresPerMemset = 4; + maxStoresPerMemmove = maxStoresPerMemcpy = 2; + + // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::STORE); } SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - case ISD::CALL: return LowerCALL(Op, DAG); - case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); - case ISD::RET: return LowerRET(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); @@ -166,7 +170,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SUB: return ExpandADDSUB(Op.getNode(), DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); default: - assert(0 && "unimplemented operand"); + llvm_unreachable("unimplemented operand"); return SDValue(); } } @@ -178,7 +182,7 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) { switch (N->getOpcode()) { default: - assert(0 && "Don't know how to custom expand this!"); + llvm_unreachable("Don't know how to custom expand this!"); return; case ISD::ADD: case ISD::SUB: @@ -214,17 +218,16 @@ getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG) DebugLoc dl = GA.getDebugLoc(); if (isa(GV)) { return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA); - } else if (!Subtarget.isXS1A()) { - const GlobalVariable *GVar = dyn_cast(GV); - if (!GVar) { - // If GV is an alias then use the aliasee to determine constness - if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); - } - bool isConst = GVar && GVar->isConstant(); - if (isConst) { - return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); - } + } + const GlobalVariable *GVar = dyn_cast(GV); + if (!GVar) { + // If GV is an alias then use the aliasee to determine constness + if (const GlobalAlias *GA = dyn_cast(GV)) + GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); + } + bool isConst = GVar && GVar->isConstant(); + if (isConst) { + return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); } return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); } @@ -265,14 +268,16 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) GVar = dyn_cast_or_null(GA->resolveAliasedGlobal()); } if (! GVar) { - assert(0 && "Thread local object not a GlobalVariable?"); + llvm_unreachable("Thread local object not a GlobalVariable?"); return SDValue(); } const Type *Ty = cast(GV->getType())->getElementType(); if (!Ty->isSized() || isZeroLengthArray(Ty)) { - cerr << "Size of thread local object " << GVar->getName() - << " is unknown\n"; - abort(); +#ifndef NDEBUG + errs() << "Size of thread local object " << GVar->getName() + << " is unknown\n"; +#endif + llvm_unreachable(0); } SDValue base = getGlobalAddressWrapper(GA, GV, DAG); const TargetData *TD = TM.getTargetData(); @@ -288,21 +293,16 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) ConstantPoolSDNode *CP = cast(Op); // FIXME there isn't really debug info here DebugLoc dl = CP->getDebugLoc(); - if (Subtarget.isXS1A()) { - assert(0 && "Lowering of constant pool unimplemented"); - return SDValue(); + EVT PtrVT = Op.getValueType(); + SDValue Res; + if (CP->isMachineConstantPoolEntry()) { + Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment()); } else { - MVT PtrVT = Op.getValueType(); - SDValue Res; - if (CP->isMachineConstantPoolEntry()) { - Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, - CP->getAlignment()); - } else { - Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, - CP->getAlignment()); - } - return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res); + Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment()); } + return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res); } SDValue XCoreTargetLowering:: @@ -310,19 +310,211 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) { // FIXME there isn't really debug info here DebugLoc dl = Op.getDebugLoc(); - MVT PtrVT = Op.getValueType(); + EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI); } +static bool +IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, + int64_t &Offset) +{ + if (Addr.getOpcode() != ISD::ADD) { + return false; + } + ConstantSDNode *CN = 0; + if (!(CN = dyn_cast(Addr.getOperand(1)))) { + return false; + } + int64_t off = CN->getSExtValue(); + const SDValue &Base = Addr.getOperand(0); + const SDValue *Root = &Base; + if (Base.getOpcode() == ISD::ADD && + Base.getOperand(1).getOpcode() == ISD::SHL) { + ConstantSDNode *CN = dyn_cast(Base.getOperand(1) + .getOperand(1)); + if (CN && (CN->getSExtValue() >= 2)) { + Root = &Base.getOperand(0); + } + } + if (isa(*Root)) { + // All frame indicies are word aligned + AlignedBase = Base; + Offset = off; + return true; + } + if (Root->getOpcode() == XCoreISD::DPRelativeWrapper || + Root->getOpcode() == XCoreISD::CPRelativeWrapper) { + // All dp / cp relative addresses are word aligned + AlignedBase = Base; + Offset = off; + return true; + } + return false; +} + +SDValue XCoreTargetLowering:: +LowerLOAD(SDValue Op, SelectionDAG &DAG) +{ + LoadSDNode *LD = cast(Op); + assert(LD->getExtensionType() == ISD::NON_EXTLOAD && + "Unexpected extension type"); + assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT"); + if (allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { + return SDValue(); + } + unsigned ABIAlignment = getTargetData()-> + getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext())); + // Leave aligned load alone. + if (LD->getAlignment() >= ABIAlignment) { + return SDValue(); + } + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + DebugLoc dl = Op.getDebugLoc(); + + SDValue Base; + int64_t Offset; + if (!LD->isVolatile() && + IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) { + if (Offset % 4 == 0) { + // We've managed to infer better alignment information than the load + // already has. Use an aligned load. + return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4); + } + // Lower to + // ldw low, base[offset >> 2] + // ldw high, base[(offset >> 2) + 1] + // shr low_shifted, low, (offset & 0x3) * 8 + // shl high_shifted, high, 32 - (offset & 0x3) * 8 + // or result, low_shifted, high_shifted + SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32); + SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32); + SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); + SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); + + SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset); + SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset); + + SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain, + LowAddr, NULL, 4); + SDValue High = DAG.getLoad(getPointerTy(), dl, Chain, + HighAddr, NULL, 4); + SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift); + SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift); + SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1), + High.getValue(1)); + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); + } + + if (LD->getAlignment() == 2) { + int SVOffset = LD->getSrcValueOffset(); + SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, + LD->isVolatile(), 2); + SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, + DAG.getConstant(2, MVT::i32)); + SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain, + HighAddr, LD->getSrcValue(), SVOffset + 2, + MVT::i16, LD->isVolatile(), 2); + SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, + DAG.getConstant(16, MVT::i32)); + SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1), + High.getValue(1)); + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); + } + + // Lower to a call to __misaligned_load(BasePtr). + const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + Entry.Ty = IntPtrTy; + Entry.Node = BasePtr; + Args.push_back(Entry); + + std::pair CallResult = + LowerCallTo(Chain, IntPtrTy, false, false, + false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__misaligned_load", getPointerTy()), + Args, DAG, dl); + + SDValue Ops[] = + { CallResult.first, CallResult.second }; + + return DAG.getMergeValues(Ops, 2, dl); +} + +SDValue XCoreTargetLowering:: +LowerSTORE(SDValue Op, SelectionDAG &DAG) +{ + StoreSDNode *ST = cast(Op); + assert(!ST->isTruncatingStore() && "Unexpected store type"); + assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT"); + if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { + return SDValue(); + } + unsigned ABIAlignment = getTargetData()-> + getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext())); + // Leave aligned store alone. + if (ST->getAlignment() >= ABIAlignment) { + return SDValue(); + } + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + DebugLoc dl = Op.getDebugLoc(); + + if (ST->getAlignment() == 2) { + int SVOffset = ST->getSrcValueOffset(); + SDValue Low = Value; + SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, + DAG.getConstant(16, MVT::i32)); + SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr, + ST->getSrcValue(), SVOffset, MVT::i16, + ST->isVolatile(), 2); + SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, + DAG.getConstant(2, MVT::i32)); + SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr, + ST->getSrcValue(), SVOffset + 2, + MVT::i16, ST->isVolatile(), 2); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); + } + + // Lower to a call to __misaligned_store(BasePtr, Value). + const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + Entry.Ty = IntPtrTy; + Entry.Node = BasePtr; + Args.push_back(Entry); + + Entry.Node = Value; + Args.push_back(Entry); + + std::pair CallResult = + LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, + false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__misaligned_store", getPointerTy()), + Args, DAG, dl); + + return CallResult.second; +} + SDValue XCoreTargetLowering:: ExpandADDSUB(SDNode *N, SelectionDAG &DAG) { assert(N->getValueType(0) == MVT::i64 && (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Unknown operand to lower!"); - assert(!Subtarget.isXS1A() && "Cannot custom lower ADD/SUB on xs1a"); DebugLoc dl = N->getDebugLoc(); // Extract components @@ -353,12 +545,12 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) SDValue XCoreTargetLowering:: LowerVAARG(SDValue Op, SelectionDAG &DAG) { - assert(0 && "unimplemented"); + llvm_unreachable("unimplemented"); // FIX Arguments passed by reference need a extra dereference. SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); const Value *V = cast(Node->getOperand(2))->getValue(); - MVT VT = Node->getValueType(0); + EVT VT = Node->getValueType(0); SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), Node->getOperand(1), V, 0); // Increment the pointer, VAList, to the next vararg @@ -398,35 +590,33 @@ SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { //===----------------------------------------------------------------------===// // Calling Convention Implementation -// -// The lower operations present on calling convention works on this order: -// LowerCALL (virt regs --> phys regs, virt regs --> stack) -// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) -// LowerRET (virt regs --> phys regs) -// LowerCALL (phys regs --> virt regs) -// //===----------------------------------------------------------------------===// #include "XCoreGenCallingConv.inc" //===----------------------------------------------------------------------===// -// CALL Calling Convention Implementation +// Call Calling Convention Implementation //===----------------------------------------------------------------------===// -/// XCore custom CALL implementation -SDValue XCoreTargetLowering:: -LowerCALL(SDValue Op, SelectionDAG &DAG) -{ - CallSDNode *TheCall = cast(Op.getNode()); - unsigned CallingConv = TheCall->getCallingConv(); +/// XCore call implementation +SDValue +XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { + // For now, only CallingConv::C implemented - switch (CallingConv) + switch (CallConv) { default: - assert(0 && "Unsupported calling convention"); + llvm_unreachable("Unsupported calling convention"); case CallingConv::Fast: case CallingConv::C: - return LowerCCCCallTo(Op, DAG, CallingConv); + return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, + Outs, Ins, dl, DAG, InVals); } } @@ -434,24 +624,25 @@ LowerCALL(SDValue Op, SelectionDAG &DAG) /// regs to (physical regs)/(stack frame), CALLSEQ_START and /// CALLSEQ_END are emitted. /// TODO: isTailCall, sret. -SDValue XCoreTargetLowering:: -LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC) -{ - CallSDNode *TheCall = cast(Op.getNode()); - SDValue Chain = TheCall->getChain(); - SDValue Callee = TheCall->getCallee(); - bool isVarArg = TheCall->isVarArg(); - DebugLoc dl = Op.getDebugLoc(); +SDValue +XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); // The ABI dictates there should be one stack slot available to the callee // on function entry (for saving lr). CCInfo.AllocateStack(4, 4); - CCInfo.AnalyzeCallOperands(TheCall, CC_XCore); + CCInfo.AnalyzeCallOperands(Outs, CC_XCore); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -465,13 +656,11 @@ LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC) // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - - // Arguments start after the 5 first operands of ISD::CALL - SDValue Arg = TheCall->getArg(i); + SDValue Arg = Outs[i].Val; // Promote the value if needed. switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); + default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); @@ -554,59 +743,58 @@ LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC) // Handle result values, copying them out of physregs into vregs that we // return. - return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), - Op.getResNo()); + return LowerCallResult(Chain, InFlag, CallConv, isVarArg, + Ins, dl, DAG, InVals); } -/// LowerCallResult - Lower the result values of an ISD::CALL into the -/// appropriate copies out of appropriate physical registers. This assumes that -/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call -/// being lowered. Returns a SDNode with the same number of values as the -/// ISD::CALL. -SDNode *XCoreTargetLowering:: -LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, - unsigned CallingConv, SelectionDAG &DAG) { - bool isVarArg = TheCall->isVarArg(); - DebugLoc dl = TheCall->getDebugLoc(); +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +SDValue +XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) { // Assign locations to each value returned by this call. SmallVector RVLocs; - CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(TheCall, RetCC_XCore); - SmallVector ResultVals; + CCInfo.AnalyzeCallResult(Ins, RetCC_XCore); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), RVLocs[i].getValVT(), InFlag).getValue(1); InFlag = Chain.getValue(2); - ResultVals.push_back(Chain.getValue(0)); + InVals.push_back(Chain.getValue(0)); } - ResultVals.push_back(Chain); - - // Merge everything together with a MERGE_VALUES node. - return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), - &ResultVals[0], ResultVals.size()).getNode(); + return Chain; } //===----------------------------------------------------------------------===// -// FORMAL_ARGUMENTS Calling Convention Implementation +// Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// -/// XCore custom FORMAL_ARGUMENTS implementation -SDValue XCoreTargetLowering:: -LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) -{ - unsigned CC = cast(Op.getOperand(1))->getZExtValue(); - switch(CC) +/// XCore formal arguments implementation +SDValue +XCoreTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) { + switch (CallConv) { default: - assert(0 && "Unsupported calling convention"); + llvm_unreachable("Unsupported calling convention"); case CallingConv::C: case CallingConv::Fast: - return LowerCCCArguments(Op, DAG); + return LowerCCCArguments(Chain, CallConv, isVarArg, + Ins, dl, DAG, InVals); } } @@ -614,27 +802,28 @@ LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) /// virtual registers and generate load operations for /// arguments places on the stack. /// TODO: sret -SDValue XCoreTargetLowering:: -LowerCCCArguments(SDValue Op, SelectionDAG &DAG) -{ +SDValue +XCoreTargetLowering::LowerCCCArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - SDValue Root = Op.getOperand(0); - bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; - unsigned CC = MF.getFunction()->getCallingConv(); - DebugLoc dl = Op.getDebugLoc(); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; - CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_XCore); + CCInfo.AnalyzeFormalArguments(Ins, CC_XCore); unsigned StackSlotSize = XCoreFrameInfo::stackSlotSize(); - SmallVector ArgValues; - unsigned LRSaveSize = StackSlotSize; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -643,18 +832,21 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG) if (VA.isRegLoc()) { // Arguments passed in registers - MVT RegVT = VA.getLocVT(); - switch (RegVT.getSimpleVT()) { + EVT RegVT = VA.getLocVT(); + switch (RegVT.getSimpleVT().SimpleTy) { default: - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << RegVT.getSimpleVT() - << "\n"; - abort(); + { +#ifndef NDEBUG + errs() << "LowerFormalArguments Unhandled argument type: " + << RegVT.getSimpleVT().SimpleTy << "\n"; +#endif + llvm_unreachable(0); + } case MVT::i32: unsigned VReg = RegInfo.createVirtualRegister( XCore::GRRegsRegisterClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); - ArgValues.push_back(DAG.getCopyFromReg(Root, dl, VReg, RegVT)); + InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); } } else { // sanity check @@ -662,9 +854,9 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG) // Load the argument to a virtual register unsigned ObjSize = VA.getLocVT().getSizeInBits()/8; if (ObjSize > StackSlotSize) { - cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " - << VA.getLocVT().getSimpleVT() - << "\n"; + errs() << "LowerFormalArguments Unhandled argument type: " + << (unsigned)VA.getLocVT().getSimpleVT().SimpleTy + << "\n"; } // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(ObjSize, @@ -673,7 +865,7 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG) // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - ArgValues.push_back(DAG.getLoad(VA.getLocVT(), dl, Root, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0)); } } @@ -702,14 +894,14 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG) unsigned VReg = RegInfo.createVirtualRegister( XCore::GRRegsRegisterClass); RegInfo.addLiveIn(ArgRegs[i], VReg); - SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); // Move argument from virt reg -> stack SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); } else { // This will point to the next argument passed via stack. XFI->setVarArgsFrameIndex( @@ -717,34 +909,29 @@ LowerCCCArguments(SDValue Op, SelectionDAG &DAG) } } - ArgValues.push_back(Root); - - // Return the new list of results. - std::vector RetVT(Op.getNode()->value_begin(), - Op.getNode()->value_end()); - return DAG.getNode(ISD::MERGE_VALUES, dl, RetVT, - &ArgValues[0], ArgValues.size()); + return Chain; } //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// -SDValue XCoreTargetLowering:: -LowerRET(SDValue Op, SelectionDAG &DAG) -{ +SDValue +XCoreTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG) { + // CCValAssign - represent the assignment of // the return value to a location SmallVector RVLocs; - unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); - DebugLoc dl = Op.getDebugLoc(); // CCState - Info about the registers and stack slot. - CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs); + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); - // Analize return values of ISD::RET - CCInfo.AnalyzeReturn(Op.getNode(), RetCC_XCore); + // Analize return values. + CCInfo.AnalyzeReturn(Outs, RetCC_XCore); // If this is the first return lowered for this function, add // the regs to the liveout set for the function. @@ -754,8 +941,6 @@ LowerRET(SDValue Op, SelectionDAG &DAG) DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - // The chain is always operand #0 - SDValue Chain = Op.getOperand(0); SDValue Flag; // Copy the result values into the output registers. @@ -763,10 +948,8 @@ LowerRET(SDValue Op, SelectionDAG &DAG) CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - // ISD::RET => ret chain, (regnum1,val1), ... - // So i*2+1 index only the regnums Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Op.getOperand(i*2+1), Flag); + Outs[i].Val, Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad @@ -788,7 +971,8 @@ LowerRET(SDValue Op, SelectionDAG &DAG) MachineBasicBlock * XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { + MachineBasicBlock *BB, + DenseMap *EM) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == XCore::SELECT_CC) && @@ -816,9 +1000,18 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by transferring all successors of the current + // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - sinkMBB->transferSuccessors(BB); + // Also inform sdisel of the edge changes. + for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), + E = BB->succ_end(); I != E; ++I) { + EM->insert(std::make_pair(*I, sinkMBB)); + sinkMBB->addSuccessor(*I); + } + // Next, remove all successors of the current block, and add the true + // and fallthrough blocks as its successors. + while (!BB->succ_empty()) + BB->removeSuccessor(BB->succ_begin()); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -843,6 +1036,56 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; } +//===----------------------------------------------------------------------===// +// Target Optimization Hooks +//===----------------------------------------------------------------------===// + +SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + DebugLoc dl = N->getDebugLoc(); + switch (N->getOpcode()) { + default: break; + case ISD::STORE: { + // Replace unaligned store of unaligned load with memmove. + StoreSDNode *ST = cast(N); + if (!DCI.isBeforeLegalize() || + allowsUnalignedMemoryAccesses(ST->getMemoryVT()) || + ST->isVolatile() || ST->isIndexed()) { + break; + } + SDValue Chain = ST->getChain(); + + unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits(); + if (StoreBits % 8) { + break; + } + unsigned ABIAlignment = getTargetData()->getABITypeAlignment( + ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext())); + unsigned Alignment = ST->getAlignment(); + if (Alignment >= ABIAlignment) { + break; + } + + if (LoadSDNode *LD = dyn_cast(ST->getValue())) { + if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() && + LD->getAlignment() == Alignment && + !LD->isVolatile() && !LD->isIndexed() && + Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) { + return DAG.getMemmove(Chain, dl, ST->getBasePtr(), + LD->getBasePtr(), + DAG.getConstant(StoreBits/8, MVT::i32), + Alignment, ST->getSrcValue(), + ST->getSrcValueOffset(), LD->getSrcValue(), + LD->getSrcValueOffset()); + } + } + break; + } + } + return SDValue(); +} + //===----------------------------------------------------------------------===// // Addressing mode description hooks //===----------------------------------------------------------------------===// @@ -867,44 +1110,35 @@ static inline bool isImmUs4(int64_t val) bool XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { - MVT VT = getValueType(Ty, true); - // Get expected value type after legalization - switch (VT.getSimpleVT()) { - // Legal load / stores - case MVT::i8: - case MVT::i16: - case MVT::i32: - break; - // Expand i1 -> i8 - case MVT::i1: - VT = MVT::i8; - break; - // Everything else is lowered to words - default: - VT = MVT::i32; - break; - } + // Be conservative with void + // FIXME: Can we be more aggressive? + if (Ty->getTypeID() == Type::VoidTyID) + return false; + + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(Ty); if (AM.BaseGV) { - return VT == MVT::i32 && !AM.HasBaseReg && AM.Scale == 0 && + return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs%4 == 0; } - switch (VT.getSimpleVT()) { - default: - return false; - case MVT::i8: + switch (Size) { + case 1: // reg + imm if (AM.Scale == 0) { return isImmUs(AM.BaseOffs); } + // reg + reg return AM.Scale == 1 && AM.BaseOffs == 0; - case MVT::i16: + case 2: + case 3: // reg + imm if (AM.Scale == 0) { return isImmUs2(AM.BaseOffs); } + // reg + reg<<1 return AM.Scale == 2 && AM.BaseOffs == 0; - case MVT::i32: + default: // reg + imm if (AM.Scale == 0) { return isImmUs4(AM.BaseOffs); @@ -922,7 +1156,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, std::vector XCoreTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const + EVT VT) const { if (Constraint.size() != 1) return std::vector(); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 753ea819c2bdb..ef8555e3da178 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -79,7 +79,8 @@ namespace llvm { virtual const char *getTargetNodeName(unsigned Opcode) const; virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB, + DenseMap *EM) const; virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; @@ -92,18 +93,31 @@ namespace llvm { const XCoreSubtarget &Subtarget; // Lower Operand helpers - SDValue LowerCCCArguments(SDValue Op, SelectionDAG &DAG); - SDValue LowerCCCCallTo(SDValue Op, SelectionDAG &DAG, unsigned CC); - SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode*TheCall, - unsigned CallingConv, SelectionDAG &DAG); + SDValue LowerCCCArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG); // Lower Operand specifics - SDValue LowerRET(SDValue Op, SelectionDAG &DAG); - SDValue LowerCALL(SDValue Op, SelectionDAG &DAG); - SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG); + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG); + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); @@ -116,10 +130,35 @@ namespace llvm { // Inline asm support std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + EVT VT) const; // Expand specifics SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG); + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals); + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + DebugLoc dl, SelectionDAG &DAG); }; } diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 504d2025edcff..e616fe68e2325 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "XCoreGenInstrInfo.inc" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" namespace llvm { namespace XCore { @@ -36,7 +37,7 @@ namespace XCore { using namespace llvm; -XCoreInstrInfo::XCoreInstrInfo(void) +XCoreInstrInfo::XCoreInstrInfo() : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)), RI(*this) { } @@ -115,30 +116,6 @@ XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -/// isInvariantLoad - Return true if the specified instruction (which is marked -/// mayLoad) is loading from a location whose value is invariant across the -/// function. For example, loading a value from the constant pool or from -/// from the argument area of a function if it does not change. This should -/// only return true of *all* loads the instruction does are invariant (if it -/// does multiple loads). -bool -XCoreInstrInfo::isInvariantLoad(const MachineInstr *MI) const { - // Loads from constants pools and loads from invariant argument slots are - // invariant - int Opcode = MI->getOpcode(); - if (Opcode == XCore::LDWCP_ru6 || Opcode == XCore::LDWCP_lru6) { - return MI->getOperand(1).isCPI(); - } - int FrameIndex; - if (isLoadFromStackSlot(MI, FrameIndex)) { - const MachineFrameInfo &MFI = - *MI->getParent()->getParent()->getFrameInfo(); - return MFI.isFixedObjectIndex(FrameIndex) && - MFI.isImmutableObjectIndex(FrameIndex); - } - return false; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// @@ -186,7 +163,7 @@ static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case XCore::COND_TRUE : return XCore::BRFT_lru6; case XCore::COND_FALSE : return XCore::BRFF_lru6; } @@ -197,7 +174,7 @@ static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC) { switch (CC) { - default: assert(0 && "Illegal condition code!"); + default: llvm_unreachable("Illegal condition code!"); case XCore::COND_TRUE : return XCore::COND_FALSE; case XCore::COND_FALSE : return XCore::COND_TRUE; } @@ -402,14 +379,6 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addImm(0); } -void XCoreInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, - bool isKill, SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const -{ - assert(0 && "unimplemented\n"); -} - void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FrameIndex, @@ -422,14 +391,6 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addImm(0); } -void XCoreInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const -{ - assert(0 && "unimplemented\n"); -} - bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI) const diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 08708863ad575..24230ac46a138 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -22,7 +22,7 @@ namespace llvm { class XCoreInstrInfo : public TargetInstrInfoImpl { const XCoreRegisterInfo RI; public: - XCoreInstrInfo(void); + XCoreInstrInfo(); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should @@ -52,8 +52,6 @@ public: virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - virtual bool isInvariantLoad(const MachineInstr *MI) const; - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, @@ -76,21 +74,11 @@ public: unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const; - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI) const; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 65cd4fe955591..4b9ea7a491785 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -22,18 +22,6 @@ include "XCoreInstrFormats.td" -//===----------------------------------------------------------------------===// -// Feature predicates. -//===----------------------------------------------------------------------===// - -// HasXS1A - This predicate is true when the target processor supports XS1A -// instructions. -def HasXS1A : Predicate<"Subtarget.isXS1A()">; - -// HasXS1B - This predicate is true when the target processor supports XS1B -// instructions. -def HasXS1B : Predicate<"Subtarget.isXS1B()">; - //===----------------------------------------------------------------------===// // XCore specific DAG Nodes. // @@ -95,6 +83,12 @@ def neg_xform : SDNodeXForm; +def bpwsub_xform : SDNodeXFormgetZExtValue(); + return getI32Imm(32-value); +}]>; + def div4neg_xform : SDNodeXFormgetZExtValue(); @@ -136,9 +130,6 @@ def immU20 : PatLeaf<(imm), [{ return (uint32_t)N->getZExtValue() < (1 << 20); }]>; -// FIXME check subtarget. Currently we check if the immediate -// is in the common subset of legal immediate values for both -// XS1A and XS1B. def immMskBitp : PatLeaf<(imm), [{ uint32_t value = (uint32_t)N->getZExtValue(); if (!isMask_32(value)) { @@ -151,9 +142,6 @@ def immMskBitp : PatLeaf<(imm), [{ || msksize == 32; }]>; -// FIXME check subtarget. Currently we check if the immediate -// is in the common subset of legal immediate values for both -// XS1A and XS1B. def immBitp : PatLeaf<(imm), [{ uint32_t value = (uint32_t)N->getZExtValue(); return (value >= 1 && value <= 8) @@ -162,6 +150,14 @@ def immBitp : PatLeaf<(imm), [{ || value == 32; }]>; +def immBpwSubBitp : PatLeaf<(imm), [{ + uint32_t value = (uint32_t)N->getZExtValue(); + return (value >= 24 && value <= 31) + || value == 16 + || value == 8 + || value == 0; +}]>; + def lda16f : PatFrag<(ops node:$addr, node:$offset), (add node:$addr, (shl node:$offset, 1))>; def lda16b : PatFrag<(ops node:$addr, node:$offset), @@ -469,7 +465,7 @@ def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset), } // Four operand long -let Predicates = [HasXS1B], Constraints = "$src1 = $dst1,$src2 = $dst2" in { +let Constraints = "$src1 = $dst1,$src2 = $dst2" in { def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, GRRegs:$src4), @@ -485,7 +481,6 @@ def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2), // Five operand long -let Predicates = [HasXS1B] in { def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), "ladd $dst1, $dst2, $src1, $src2, $src3", @@ -500,7 +495,6 @@ def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2), (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3), "ldiv $dst1, $dst2, $src1, $src2, $src3", []>; -} // Six operand long @@ -510,13 +504,6 @@ def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2), "lmul $dst1, $dst2, $src1, $src2, $src3, $src4", []>; -let Predicates = [HasXS1A] in -def MACC_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2), - (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3, - GRRegs:$src4), - "macc $dst1, $dst2, $src1, $src2, $src3, $src4", - []>; - // Register - U6 //let Uses = [DP] in ... @@ -664,13 +651,12 @@ def BRFU_lu6 : _FLU6< } //let Uses = [CP] in ... -let Predicates = [HasXS1B], Defs = [R11], neverHasSideEffects = 1, - isReMaterializable = 1 in +let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a), "ldaw r11, cp[$a]", []>; -let Predicates = [HasXS1B], Defs = [R11], isReMaterializable = 1 in +let Defs = [R11], isReMaterializable = 1 in def LDAWCP_lu6: _FLRU6< (outs), (ins MEMii:$a), "ldaw r11, cp[$a]", @@ -821,7 +807,7 @@ def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)), (LD8U_3r GRRegs:$addr, GRRegs:$offset)>; def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>; -def : Pat<(zextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)), +def : Pat<(sextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)), (LD16S_3r GRRegs:$addr, GRRegs:$offset)>; def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>; @@ -989,3 +975,21 @@ def : Pat<(mul GRRegs:$src, -3), def : Pat<(sra GRRegs:$src, 31), (ASHR_l2rus GRRegs:$src, 32)>; +def : Pat<(brcond (setlt GRRegs:$lhs, 0), bb:$dst), + (BRFT_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>; + +// setge X, 0 is canonicalized to setgt X, -1 +def : Pat<(brcond (setgt GRRegs:$lhs, -1), bb:$dst), + (BRFF_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>; + +def : Pat<(select (setlt GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F), + (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$T, GRRegs:$F)>; + +def : Pat<(select (setgt GRRegs:$lhs, -1), GRRegs:$T, GRRegs:$F), + (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$F, GRRegs:$T)>; + +def : Pat<(setgt GRRegs:$lhs, -1), + (EQ_2rus (ASHR_l2rus GRRegs:$lhs, 32), 0)>; + +def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm), + (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>; diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/XCoreMCAsmInfo.cpp new file mode 100644 index 0000000000000..dffdda9a1fd09 --- /dev/null +++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp @@ -0,0 +1,31 @@ +//===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "XCoreMCAsmInfo.h" +using namespace llvm; + +XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) { + SupportsDebugInformation = true; + Data16bitsDirective = "\t.short\t"; + Data32bitsDirective = "\t.long\t"; + Data64bitsDirective = 0; + ZeroDirective = "\t.space\t"; + CommentString = "#"; + + PrivateGlobalPrefix = ".L"; + AscizDirective = ".asciiz"; + WeakDefDirective = "\t.weak\t"; + WeakRefDirective = "\t.weak\t"; + SetDirective = "\t.set\t"; + + // Debug + HasLEB128 = true; + AbsoluteDebugSectionOffsets = true; +} + diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/XCoreMCAsmInfo.h new file mode 100644 index 0000000000000..01f8e481a949a --- /dev/null +++ b/lib/Target/XCore/XCoreMCAsmInfo.h @@ -0,0 +1,29 @@ +//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the XCoreMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef XCORETARGETASMINFO_H +#define XCORETARGETASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + class XCoreMCAsmInfo : public MCAsmInfo { + public: + explicit XCoreMCAsmInfo(const Target &T, const StringRef &TT); + }; + +} // namespace llvm + +#endif diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 82cd92d5685ca..136a035cb1f2c 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -30,6 +30,8 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -142,9 +144,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, if (!isU6 && !isImmU16(Amount)) { // FIX could emit multiple instructions in this case. - cerr << "eliminateCallFramePseudoInstr size too big: " - << Amount << "\n"; - abort(); +#ifndef NDEBUG + errs() << "eliminateCallFramePseudoInstr size too big: " + << Amount << "\n"; +#endif + llvm_unreachable(0); } MachineInstr *New; @@ -167,8 +171,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +unsigned +XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; DebugLoc dl = MI.getDebugLoc(); @@ -187,12 +193,13 @@ void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int StackSize = MF.getFrameInfo()->getStackSize(); #ifndef NDEBUG - DOUT << "\nFunction : " << MF.getFunction()->getName() << "\n"; - DOUT << "<--------->\n"; - MI.print(DOUT); - DOUT << "FrameIndex : " << FrameIndex << "\n"; - DOUT << "FrameOffset : " << Offset << "\n"; - DOUT << "StackSize : " << StackSize << "\n"; + DEBUG(errs() << "\nFunction : " + << MF.getFunction()->getName() << "\n"); + DEBUG(errs() << "<--------->\n"); + DEBUG(MI.print(errs())); + DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"); + DEBUG(errs() << "FrameOffset : " << Offset << "\n"); + DEBUG(errs() << "StackSize : " << StackSize << "\n"); #endif Offset += StackSize; @@ -203,10 +210,7 @@ void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(Offset%4 == 0 && "Misaligned stack offset"); - #ifndef NDEBUG - DOUT << "Offset : " << Offset << "\n"; - DOUT << "<--------->\n"; - #endif + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); Offset/=4; @@ -224,63 +228,65 @@ void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, bool isUs = isImmUs(Offset); unsigned FramePtr = XCore::R10; - MachineInstr *New = 0; if (!isUs) { if (!RS) { - cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "eliminateFrameIndex Frame size too big: " << Offset; + llvm_report_error(Msg.str()); } unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II, SPAdj); loadConstant(MBB, II, ScratchReg, Offset, dl); switch (MI.getOpcode()) { case XCore::LDWFI: - New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) + BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg) .addReg(FramePtr) .addReg(ScratchReg, RegState::Kill); break; case XCore::STWFI: - New = BuildMI(MBB, II, dl, TII.get(XCore::STW_3r)) + BuildMI(MBB, II, dl, TII.get(XCore::STW_3r)) .addReg(Reg, getKillRegState(isKill)) .addReg(FramePtr) .addReg(ScratchReg, RegState::Kill); break; case XCore::LDAWFI: - New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg) .addReg(FramePtr) .addReg(ScratchReg, RegState::Kill); break; default: - assert(0 && "Unexpected Opcode\n"); + llvm_unreachable("Unexpected Opcode"); } } else { switch (MI.getOpcode()) { case XCore::LDWFI: - New = BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) + BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg) .addReg(FramePtr) .addImm(Offset); break; case XCore::STWFI: - New = BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) + BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus)) .addReg(Reg, getKillRegState(isKill)) .addReg(FramePtr) .addImm(Offset); break; case XCore::LDAWFI: - New = BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) + BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg) .addReg(FramePtr) .addImm(Offset); break; default: - assert(0 && "Unexpected Opcode\n"); + llvm_unreachable("Unexpected Opcode"); } } } else { bool isU6 = isImmU6(Offset); if (!isU6 && !isImmU16(Offset)) { - // FIXME could make this work for LDWSP, LDAWSP. - cerr << "eliminateFrameIndex Frame size too big: " << Offset << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "eliminateFrameIndex Frame size too big: " << Offset; + llvm_report_error(Msg.str()); } switch (MI.getOpcode()) { @@ -302,11 +308,12 @@ void XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, .addImm(Offset); break; default: - assert(0 && "Unexpected Opcode\n"); + llvm_unreachable("Unexpected Opcode"); } } // Erase old instruction. MBB.erase(II); + return 0; } void @@ -354,8 +361,10 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // TODO use mkmsk if possible. if (!isImmU16(Value)) { // TODO use constant pool. - cerr << "loadConstant value too big " << Value << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "loadConstant value too big " << Value; + llvm_report_error(Msg.str()); } int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); @@ -368,8 +377,10 @@ storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Offset/=4; bool isU6 = isImmU6(Offset); if (!isU6 && !isImmU16(Offset)) { - cerr << "storeToStack offset too big " << Offset << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "storeToStack offset too big " << Offset; + llvm_report_error(Msg.str()); } int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6; BuildMI(MBB, I, dl, TII.get(Opcode)) @@ -384,8 +395,10 @@ loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Offset/=4; bool isU6 = isImmU6(Offset); if (!isU6 && !isImmU16(Offset)) { - cerr << "loadFromStack offset too big " << Offset << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "loadFromStack offset too big " << Offset; + llvm_report_error(Msg.str()); } int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; BuildMI(MBB, I, dl, TII.get(Opcode), DstReg) @@ -414,8 +427,10 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. - cerr << "emitPrologue Frame size too big: " << FrameSize << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "emitPrologue Frame size too big: " << FrameSize; + llvm_report_error(Msg.str()); } bool emitFrameMoves = needsFrameMoves(MF); @@ -538,8 +553,10 @@ void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF, if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. - cerr << "emitEpilogue Frame size too big: " << FrameSize << "\n"; - abort(); + std::string msg; + raw_string_ostream Msg(msg); + Msg << "emitEpilogue Frame size too big: " << FrameSize; + llvm_report_error(Msg.str()); } if (FrameSize) { diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 00b7caa96bc6e..a7df5102f2017 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -57,8 +57,9 @@ public: MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; + unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, int *Value = NULL, + RegScavenger *RS = NULL) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp index dc53da4ddf0bb..78a6fa5b2edbe 100644 --- a/lib/Target/XCore/XCoreSubtarget.cpp +++ b/lib/Target/XCore/XCoreSubtarget.cpp @@ -13,16 +13,8 @@ #include "XCoreSubtarget.h" #include "XCore.h" -#include "XCoreGenSubtarget.inc" using namespace llvm; -XCoreSubtarget::XCoreSubtarget(const TargetMachine &TM, const Module &M, - const std::string &FS) - : IsXS1A(false), - IsXS1B(false) +XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS) { - std::string CPU = "xs1a-generic"; - - // Parse features string. - ParseSubtargetFeatures(FS, CPU); } diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h index ff6475baa8104..f8be3ec861891 100644 --- a/lib/Target/XCore/XCoreSubtarget.h +++ b/lib/Target/XCore/XCoreSubtarget.h @@ -20,21 +20,14 @@ #include namespace llvm { -class Module; class XCoreSubtarget : public TargetSubtarget { - bool IsXS1A; - bool IsXS1B; public: /// This constructor initializes the data members to match that - /// of the specified module. + /// of the specified triple. /// - XCoreSubtarget(const TargetMachine &TM, const Module &M, - const std::string &FS); - - bool isXS1A() const { return IsXS1A; } - bool isXS1B() const { return IsXS1B; } + XCoreSubtarget(const std::string &TT, const std::string &FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index b72225f23b752..75f2055ebf9f3 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -10,38 +10,20 @@ // //===----------------------------------------------------------------------===// -#include "XCoreTargetAsmInfo.h" +#include "XCoreMCAsmInfo.h" #include "XCoreTargetMachine.h" #include "XCore.h" #include "llvm/Module.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetMachineRegistry.h" +#include "llvm/Target/TargetRegistry.h" using namespace llvm; -/// XCoreTargetMachineModule - Note that this is used on hosts that -/// cannot link in a library unless there are references into the -/// library. In particular, it seems that it is not possible to get -/// things to work on Win32 without this. Though it is unused, do not -/// remove it. -extern "C" int XCoreTargetMachineModule; -int XCoreTargetMachineModule = 0; - -namespace { - // Register the target. - RegisterTarget X("xcore", "XCore"); -} - -// Force static initialization. -extern "C" void LLVMInitializeXCoreTarget() { } - -const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const { - return new XCoreTargetAsmInfo(*this); -} - /// XCoreTargetMachine ctor - Create an ILP32 architecture model /// -XCoreTargetMachine::XCoreTargetMachine(const Module &M, const std::string &FS) - : Subtarget(*this, M, FS), +XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, + const std::string &FS) + : LLVMTargetMachine(T, TT), + Subtarget(TT, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32"), InstrInfo(), @@ -49,26 +31,14 @@ XCoreTargetMachine::XCoreTargetMachine(const Module &M, const std::string &FS) TLInfo(*this) { } -unsigned XCoreTargetMachine::getModuleMatchQuality(const Module &M) { - std::string TT = M.getTargetTriple(); - if (TT.size() >= 6 && std::string(TT.begin(), TT.begin()+6) == "xcore-") - return 20; - - // Otherwise we don't match. - return 0; -} - bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { PM.add(createXCoreISelDag(*this)); return false; } -bool XCoreTargetMachine::addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, - raw_ostream &Out) { - // Output assembly language. - PM.add(createXCoreCodePrinterPass(Out, *this, Verbose)); - return false; +// Force static initialization. +extern "C" void LLVMInitializeXCoreTarget() { + RegisterTargetMachine X(TheXCoreTarget); + RegisterAsmInfo Y(TheXCoreTarget); } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 2385aedc90790..b0b1464dbe0c8 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -23,20 +23,15 @@ namespace llvm { -class Module; - class XCoreTargetMachine : public LLVMTargetMachine { XCoreSubtarget Subtarget; const TargetData DataLayout; // Calculates type size & alignment XCoreInstrInfo InstrInfo; XCoreFrameInfo FrameInfo; XCoreTargetLowering TLInfo; - -protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; - public: - XCoreTargetMachine(const Module &M, const std::string &FS); + XCoreTargetMachine(const Target &T, const std::string &TT, + const std::string &FS); virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; } @@ -49,13 +44,9 @@ public: return &InstrInfo.getRegisterInfo(); } virtual const TargetData *getTargetData() const { return &DataLayout; } - static unsigned getModuleMatchQuality(const Module &M); // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addAssemblyEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - bool Verbose, raw_ostream &Out); }; } // end namespace llvm diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp new file mode 100644 index 0000000000000..7de3b55d38f64 --- /dev/null +++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -0,0 +1,67 @@ +//===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "XCoreTargetObjectFile.h" +#include "XCoreSubtarget.h" +#include "MCSectionXCore.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + + +void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + DataSection = + MCSectionXCore::Create(".dp.data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | + MCSectionXCore::SHF_DP_SECTION, + SectionKind::getDataRel(), false, getContext()); + BSSSection = + MCSectionXCore::Create(".dp.bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | + MCSectionXCore::SHF_DP_SECTION, + SectionKind::getBSS(), false, getContext()); + + MergeableConst4Section = + MCSectionXCore::Create(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionXCore::SHF_CP_SECTION, + SectionKind::getMergeableConst4(), false, + getContext()); + MergeableConst8Section = + MCSectionXCore::Create(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionXCore::SHF_CP_SECTION, + SectionKind::getMergeableConst8(), false, + getContext()); + MergeableConst16Section = + MCSectionXCore::Create(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionXCore::SHF_CP_SECTION, + SectionKind::getMergeableConst16(), false, + getContext()); + + // TLS globals are lowered in the backend to arrays indexed by the current + // thread id. After lowering they require no special handling by the linker + // and can be placed in the standard data / bss sections. + TLSDataSection = DataSection; + TLSBSSSection = BSSSection; + + ReadOnlySection = + MCSectionXCore::Create(".cp.rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionXCore::SHF_CP_SECTION, + SectionKind::getReadOnlyWithRel(), false, + getContext()); + + // Dynamic linking is not supported. Data with relocations is placed in the + // same section as data without relocations. + DataRelSection = DataRelLocalSection = DataSection; + DataRelROSection = DataRelROLocalSection = ReadOnlySection; +} diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h new file mode 100644 index 0000000000000..7efb990b79cf6 --- /dev/null +++ b/lib/Target/XCore/XCoreTargetObjectFile.h @@ -0,0 +1,26 @@ +//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H +#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H + +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { + + class XCoreTargetObjectFile : public TargetLoweringObjectFileELF { + public: + + void Initialize(MCContext &Ctx, const TargetMachine &TM); + + // TODO: Classify globals as xcore wishes. + }; +} // end namespace llvm + +#endif diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index d07f6135257f1..8000d0d2ff4ae 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -16,7 +16,7 @@ #include "llvm/Pass.h" #include "llvm/Function.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -32,7 +32,7 @@ namespace { HelloCounter++; std::string fname = F.getName(); EscapeString(fname); - cerr << "Hello: " << fname << "\n"; + errs() << "Hello: " << fname << "\n"; return false; } }; @@ -51,7 +51,7 @@ namespace { HelloCounter++; std::string fname = F.getName(); EscapeString(fname); - cerr << "Hello: " << fname << "\n"; + errs() << "Hello: " << fname << "\n"; return false; } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index a61263401618f..5b91f3d20992a 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -36,16 +36,18 @@ #include "llvm/Module.h" #include "llvm/CallGraphSCCPass.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Compiler.h" #include using namespace llvm; @@ -60,11 +62,10 @@ namespace { struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - AU.addRequired(); CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(const std::vector &SCC); + virtual bool runOnSCC(std::vector &SCC); static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) : CallGraphSCCPass(&ID), maxElements(maxElements) {} @@ -73,11 +74,11 @@ namespace { typedef std::vector IndicesVector; private: - bool PromoteArguments(CallGraphNode *CGN); + CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; - Function *DoPromotion(Function *F, - SmallPtrSet &ArgsToPromote, - SmallPtrSet &ByValArgsToTransform); + CallGraphNode *DoPromotion(Function *F, + SmallPtrSet &ArgsToPromote, + SmallPtrSet &ByValArgsToTransform); /// The maximum number of elements to expand, or 0 for unlimited. unsigned maxElements; }; @@ -91,14 +92,17 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { return new ArgPromotion(maxElements); } -bool ArgPromotion::runOnSCC(const std::vector &SCC) { +bool ArgPromotion::runOnSCC(std::vector &SCC) { bool Changed = false, LocalChange; do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. for (unsigned i = 0, e = SCC.size(); i != e; ++i) - LocalChange |= PromoteArguments(SCC[i]); + if (CallGraphNode *CGN = PromoteArguments(SCC[i])) { + LocalChange = true; + SCC[i] = CGN; + } Changed |= LocalChange; // Remember that we changed something. } while (LocalChange); @@ -110,11 +114,11 @@ bool ArgPromotion::runOnSCC(const std::vector &SCC) { /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// -bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { +CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. - if (!F || !F->hasLocalLinkage()) return false; + if (!F || !F->hasLocalLinkage()) return 0; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector, 16> PointerArgs; @@ -123,12 +127,12 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { I != E; ++I, ++ArgNo) if (isa(I->getType())) PointerArgs.push_back(std::pair(I, ArgNo)); - if (PointerArgs.empty()) return false; + if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. if (F->hasAddressTaken()) - return false; + return 0; // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. @@ -144,9 +148,9 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { const Type *AgTy = cast(PtrArg->getType())->getElementType(); if (const StructType *STy = dyn_cast(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { - DOUT << "argpromotion disable promoting argument '" - << PtrArg->getName() << "' because it would require adding more " - << "than " << maxElements << " arguments to the function.\n"; + DEBUG(errs() << "argpromotion disable promoting argument '" + << PtrArg->getName() << "' because it would require adding more" + << " than " << maxElements << " arguments to the function.\n"); } else { // If all the elements are single-value types, we can promote it. bool AllSimple = true; @@ -173,13 +177,10 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { } // No promotable pointer arguments. - if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return false; - - Function *NewF = DoPromotion(F, ArgsToPromote, ByValArgsToTransform); + if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) + return 0; - // Update the call graph to know that the function has been transformed. - getAnalysis().changeFunction(F, NewF); - return true; + return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); } /// IsAlwaysValidPointer - Return true if the specified pointer is always legal @@ -409,9 +410,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // to do. if (ToPromote.find(Operands) == ToPromote.end()) { if (maxElements > 0 && ToPromote.size() == maxElements) { - DOUT << "argpromotion not promoting argument '" - << Arg->getName() << "' because it would require adding more " - << "than " << maxElements << " arguments to the function.\n"; + DEBUG(errs() << "argpromotion not promoting argument '" + << Arg->getName() << "' because it would require adding more " + << "than " << maxElements << " arguments to the function.\n"); // We limit aggregate promotion to only promoting up to a fixed number // of elements of the aggregate. return false; @@ -432,7 +433,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { SmallPtrSet TranspBlocks; AliasAnalysis &AA = getAnalysis(); - TargetData &TD = getAnalysis(); + TargetData *TD = getAnalysisIfAvailable(); + if (!TD) return false; // Without TargetData, assume the worst. for (unsigned i = 0, e = Loads.size(); i != e; ++i) { // Check to see if the load is invalidated from the start of the block to @@ -442,7 +444,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { const PointerType *LoadTy = cast(Load->getPointerOperand()->getType()); - unsigned LoadSize = (unsigned)TD.getTypeStoreSize(LoadTy->getElementType()); + unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType()); if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize)) return false; // Pointer is invalidated! @@ -467,8 +469,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. -Function *ArgPromotion::DoPromotion(Function *F, - SmallPtrSet &ArgsToPromote, +CallGraphNode *ArgPromotion::DoPromotion(Function *F, + SmallPtrSet &ArgsToPromote, SmallPtrSet &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as @@ -581,19 +583,24 @@ Function *ArgPromotion::DoPromotion(Function *F, bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg()) { ExtraArgHack = true; - Params.push_back(Type::Int32Ty); + Params.push_back(Type::getInt32Ty(F->getContext())); } // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); - // Create the new function body and insert it into the module... + // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); + + DEBUG(errs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" + << "From: " << *F); + // Recompute the parameter attributes list based on the new arguments for // the function. - NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); + NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), + AttributesVec.end())); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); @@ -606,6 +613,10 @@ Function *ArgPromotion::DoPromotion(Function *F, // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis(); + + // Get a new callgraph node for NF. + CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); + // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. @@ -636,9 +647,10 @@ Function *ArgPromotion::DoPromotion(Function *F, // Emit a GEP and load for each element of the struct. const Type *AgTy = cast(I->getType())->getElementType(); const StructType *STy = cast(AgTy); - Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 }; + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::Int32Ty, i); + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, (*AI)->getName()+"."+utostr(i), Call); @@ -662,7 +674,9 @@ Function *ArgPromotion::DoPromotion(Function *F, IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. - const Type *IdxTy = (isa(ElTy) ? Type::Int32Ty : Type::Int64Ty); + const Type *IdxTy = (isa(ElTy) ? + Type::getInt32Ty(F->getContext()) : + Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); // Keep track of the type we're currently indexing ElTy = cast(ElTy)->getTypeAtIndex(*II); @@ -679,7 +693,7 @@ Function *ArgPromotion::DoPromotion(Function *F, } if (ExtraArgHack) - Args.push_back(Constant::getNullValue(Type::Int32Ty)); + Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { @@ -715,7 +729,8 @@ Function *ArgPromotion::DoPromotion(Function *F, AA.replaceWithNewValue(Call, New); // Update the callgraph to know that the callsite has been transformed. - CG[Call->getParent()->getParent()]->replaceCallSite(Call, New); + CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; + CalleeNode->replaceCallEdge(Call, New, NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); @@ -756,14 +771,16 @@ Function *ArgPromotion::DoPromotion(Function *F, const Type *AgTy = cast(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); const StructType *STy = cast(AgTy); - Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 }; + Value *Idxs[2] = { + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Idxs[1] = ConstantInt::get(Type::Int32Ty, i); - std::string Name = TheAlloca->getName()+"."+utostr(i); - Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, - Name, InsertPt); - I2->setName(I->getName()+"."+utostr(i)); + Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); + Value *Idx = + GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, + TheAlloca->getName()+"."+Twine(i), + InsertPt); + I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } @@ -792,8 +809,8 @@ Function *ArgPromotion::DoPromotion(Function *F, LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); - DOUT << "*** Promoted load of argument '" << I->getName() - << "' in function '" << F->getName() << "'\n"; + DEBUG(errs() << "*** Promoted load of argument '" << I->getName() + << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast(I->use_back()); IndicesVector Operands; @@ -819,8 +836,8 @@ Function *ArgPromotion::DoPromotion(Function *F, NewName += ".val"; TheArg->setName(NewName); - DOUT << "*** Promoted agg argument '" << TheArg->getName() - << "' of function '" << NF->getName() << "'\n"; + DEBUG(errs() << "*** Promoted agg argument '" << TheArg->getName() + << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. @@ -842,13 +859,18 @@ Function *ArgPromotion::DoPromotion(Function *F, // Notify the alias analysis implementation that we inserted a new argument. if (ExtraArgHack) - AA.copyValue(Constant::getNullValue(Type::Int32Ty), NF->arg_begin()); + AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), + NF->arg_begin()); // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); + + NF_CGN->stealCalledFunctionsFrom(CG[F]); + // Now that the old function is dead, delete it. - F->eraseFromParent(); - return NF; + delete CG.removeFunctionFromModule(F); + + return NF_CGN; } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 1438b4879d2b3..ec0f1e193ad6f 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -1,18 +1,19 @@ add_llvm_library(LLVMipo - FunctionAttrs.cpp ArgumentPromotion.cpp ConstantMerge.cpp DeadArgumentElimination.cpp DeadTypeElimination.cpp ExtractGV.cpp + FunctionAttrs.cpp GlobalDCE.cpp GlobalOpt.cpp + IPConstantPropagation.cpp + IPO.cpp IndMemRemoval.cpp InlineAlways.cpp - Inliner.cpp InlineSimple.cpp + Inliner.cpp Internalize.cpp - IPConstantPropagation.cpp LoopExtractor.cpp LowerSetJmp.cpp MergeFunctions.cpp diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 237e6db1d335c..c1a1045005b75 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -78,7 +78,7 @@ bool ConstantMerge::runOnModule(Module &M) { } // Only process constants with initializers. - if (GV->isConstant() && GV->hasInitializer()) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index e480dadca8913..79a32f02aace8 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -24,10 +24,12 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -72,7 +74,7 @@ namespace { std::string getDescription() const { return std::string((IsArg ? "Argument #" : "Return value #")) - + utostr(Idx) + " of function " + F->getName(); + + utostr(Idx) + " of function " + F->getNameStr(); } }; @@ -195,8 +197,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // Start by computing a new prototype for the function, which is the same as // the old function, but doesn't have isVarArg set. const FunctionType *FTy = Fn.getFunctionType(); + std::vector Params(FTy->param_begin(), FTy->param_end()); - FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); + FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), + Params, false); unsigned NumArgs = Params.size(); // Create the new function body and insert it into the module... @@ -277,7 +281,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { /// for void functions and 1 for functions not returning a struct. It returns /// the number of struct elements for functions returning a struct. static unsigned NumRetVals(const Function *F) { - if (F->getReturnType() == Type::VoidTy) + if (F->getReturnType() == Type::getVoidTy(F->getContext())) return 0; else if (const StructType *STy = dyn_cast(F->getReturnType())) return STy->getNumElements(); @@ -422,7 +426,7 @@ void DAE::SurveyFunction(Function &F) { return; } - DOUT << "DAE - Inspecting callers for fn: " << F.getName() << "\n"; + DEBUG(errs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n"); // Keep track of the number of live retvals, so we can skip checks once all // of them turn out to be live. unsigned NumLiveRetVals = 0; @@ -485,7 +489,7 @@ void DAE::SurveyFunction(Function &F) { for (unsigned i = 0; i != RetCount; ++i) MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); - DOUT << "DAE - Inspecting args for fn: " << F.getName() << "\n"; + DEBUG(errs() << "DAE - Inspecting args for fn: " << F.getName() << "\n"); // Now, check all of our arguments. unsigned i = 0; @@ -527,7 +531,7 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L, /// mark any values that are used as this function's parameters or by its return /// values (according to Uses) live as well. void DAE::MarkLive(const Function &F) { - DOUT << "DAE - Intrinsically live fn: " << F.getName() << "\n"; + DEBUG(errs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); // Mark the function as live. LiveFunctions.insert(&F); // Mark all arguments as live. @@ -548,7 +552,7 @@ void DAE::MarkLive(const RetOrArg &RA) { if (!LiveValues.insert(RA).second) return; // We were already marked Live. - DOUT << "DAE - Marking " << RA.getDescription() << " live\n"; + DEBUG(errs() << "DAE - Marking " << RA.getDescription() << " live\n"); PropagateLiveness(RA); } @@ -596,11 +600,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { const Type *RetTy = FTy->getReturnType(); const Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); + // -1 means unused, other numbers are the new index SmallVector NewRetIdxs(RetCount, -1); std::vector RetTypes; - if (RetTy == Type::VoidTy) { - NRetTy = Type::VoidTy; + if (RetTy == Type::getVoidTy(F->getContext())) { + NRetTy = Type::getVoidTy(F->getContext()); } else { const StructType *STy = dyn_cast(RetTy); if (STy) @@ -612,8 +617,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { NewRetIdxs[i] = RetTypes.size() - 1; } else { ++NumRetValsEliminated; - DOUT << "DAE - Removing return value " << i << " from " - << F->getNameStart() << "\n"; + DEBUG(errs() << "DAE - Removing return value " << i << " from " + << F->getName() << "\n"); } } else @@ -622,8 +627,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { RetTypes.push_back(RetTy); NewRetIdxs[0] = 0; } else { - DOUT << "DAE - Removing return value from " << F->getNameStart() - << "\n"; + DEBUG(errs() << "DAE - Removing return value from " << F->getName() + << "\n"); ++NumRetValsEliminated; } if (RetTypes.size() > 1) @@ -633,14 +638,14 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // something and {} into void. // Make the new struct packed if we used to return a packed struct // already. - NRetTy = StructType::get(RetTypes, STy->isPacked()); + NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked()); else if (RetTypes.size() == 1) // One return type? Just a simple value then, but only if we didn't use to // return a struct with that simple value before. NRetTy = RetTypes.front(); else if (RetTypes.size() == 0) // No return types? Make it void, but only if we didn't use to return {}. - NRetTy = Type::VoidTy; + NRetTy = Type::getVoidTy(F->getContext()); } assert(NRetTy && "No new return type found?"); @@ -649,7 +654,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // values. Otherwise, ensure that we don't have any conflicting attributes // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. - if (NRetTy == Type::VoidTy) + if (NRetTy == Type::getVoidTy(F->getContext())) RAttrs &= ~Attribute::typeIncompatible(NRetTy); else assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0 @@ -677,8 +682,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs)); } else { ++NumArgumentsEliminated; - DOUT << "DAE - Removing argument " << i << " (" << I->getNameStart() - << ") from " << F->getNameStart() << "\n"; + DEBUG(errs() << "DAE - Removing argument " << i << " (" << I->getName() + << ") from " << F->getName() << "\n"); } } @@ -697,11 +702,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) { ExtraArgHack = true; - Params.push_back(Type::Int32Ty); + Params.push_back(Type::getInt32Ty(F->getContext())); } // Create the new function type based on the recomputed parameters. - FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); + FunctionType *NFTy = FunctionType::get(NRetTy, Params, + FTy->isVarArg()); // No change? if (NFTy == FTy) @@ -750,7 +756,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } if (ExtraArgHack) - Args.push_back(UndefValue::get(Type::Int32Ty)); + Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list. Don't forget their attributes. for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { @@ -786,7 +792,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Return type not changed? Just replace users then. Call->replaceAllUsesWith(New); New->takeName(Call); - } else if (New->getType() == Type::VoidTy) { + } else if (New->getType() == Type::getVoidTy(F->getContext())) { // Our return value has uses, but they will get removed later on. // Replace by null for now. Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); @@ -806,7 +812,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // extract/insertvalue chaining and let instcombine clean that up. // // Start out building up our return value from undef - Value *RetVal = llvm::UndefValue::get(RetTy); + Value *RetVal = UndefValue::get(RetTy); for (unsigned i = 0; i != RetCount; ++i) if (NewRetIdxs[i] != -1) { Value *V; @@ -862,7 +868,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { Value *RetVal; - if (NFTy->getReturnType() == Type::VoidTy) { + if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) { RetVal = 0; } else { assert (isa(RetTy)); @@ -873,7 +879,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // clean that up. Value *OldRet = RI->getOperand(0); // Start out building up our return value from undef - RetVal = llvm::UndefValue::get(NRetTy); + RetVal = UndefValue::get(NRetTy); for (unsigned i = 0; i != RetCount; ++i) if (NewRetIdxs[i] != -1) { ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, @@ -893,7 +899,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } // Replace the return instruction with one returning the new return // value (possibly 0 if we became void). - ReturnInst::Create(RetVal, RI); + ReturnInst::Create(F->getContext(), RetVal, RI); BB->getInstList().erase(RI); } @@ -910,7 +916,7 @@ bool DAE::runOnModule(Module &M) { // removed. We can do this if they never call va_start. This loop cannot be // fused with the next loop, because deleting a function invalidates // information computed while surveying other functions. - DOUT << "DAE - Deleting dead varargs\n"; + DEBUG(errs() << "DAE - Deleting dead varargs\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ) { Function &F = *I++; if (F.getFunctionType()->isVarArg()) @@ -921,7 +927,7 @@ bool DAE::runOnModule(Module &M) { // We assume all arguments are dead unless proven otherwise (allowing us to // determine that dead arguments passed into recursive functions are dead). // - DOUT << "DAE - Determining liveness\n"; + DEBUG(errs() << "DAE - Determining liveness\n"); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) SurveyFunction(*I); diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 0c529d239d98c..191100c2e2417 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Constants.h" @@ -43,6 +44,7 @@ namespace { return false; // Nothing to extract } + if (deleteStuff) return deleteGV(); M.setModuleInlineAsm(""); @@ -99,7 +101,8 @@ namespace { // by putting them in the used array { std::vector AUGs; - const Type *SBP= PointerType::getUnqual(Type::Int8Ty); + const Type *SBP= + Type::getInt8PtrTy(M.getContext()); for (std::vector::iterator GI = Named.begin(), GE = Named.end(); GI != GE; ++GI) { (*GI)->setLinkage(GlobalValue::ExternalLinkage); @@ -107,9 +110,9 @@ namespace { } ArrayType *AT = ArrayType::get(SBP, AUGs.size()); Constant *Init = ConstantArray::get(AT, AUGs); - GlobalValue *gv = new GlobalVariable(AT, false, + GlobalValue *gv = new GlobalVariable(M, AT, false, GlobalValue::AppendingLinkage, - Init, "llvm.used", &M); + Init, "llvm.used"); gv->setSection("llvm.metadata"); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index e8315247b23cb..7edaa7fbef5e4 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" @@ -44,7 +45,7 @@ namespace { FunctionAttrs() : CallGraphSCCPass(&ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(const std::vector &SCC); + bool runOnSCC(std::vector &SCC); // AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool AddReadAttrs(const std::vector &SCC); @@ -54,7 +55,7 @@ namespace { // IsFunctionMallocLike - Does this function allocate new memory? bool IsFunctionMallocLike(Function *F, - SmallPtrSet &) const; + SmallPtrSet &) const; // AddNoAliasAttrs - Deduce noalias attributes for the SCC. bool AddNoAliasAttrs(const std::vector &SCC); @@ -93,13 +94,12 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) { /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC. bool FunctionAttrs::AddReadAttrs(const std::vector &SCC) { - SmallPtrSet SCCNodes; - CallGraph &CG = getAnalysis(); + SmallPtrSet SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]); + SCCNodes.insert(SCC[i]->getFunction()); // Check if any of the functions in the SCC read or write memory. If they // write memory then they can't be marked readnone or readonly. @@ -133,9 +133,9 @@ bool FunctionAttrs::AddReadAttrs(const std::vector &SCC) { // Some instructions can be ignored even if they read or write memory. // Detect these now, skipping to the next instruction if one is found. CallSite CS = CallSite::get(I); - if (CS.getInstruction()) { + if (CS.getInstruction() && CS.getCalledFunction()) { // Ignore calls to functions in the same SCC. - if (SCCNodes.count(CG[CS.getCalledFunction()])) + if (SCCNodes.count(CS.getCalledFunction())) continue; } else if (LoadInst *LI = dyn_cast(I)) { // Ignore loads from local memory. @@ -154,7 +154,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector &SCC) { return false; if (isa(I)) - // MallocInst claims not to write memory! PR3754. + // malloc claims not to write memory! PR3754. return false; // If this instruction may read memory, remember that. @@ -226,9 +226,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector &SCC) { /// IsFunctionMallocLike - A function is malloc-like if it returns either null /// or a pointer that doesn't alias any other pointer visible to the caller. bool FunctionAttrs::IsFunctionMallocLike(Function *F, - SmallPtrSet &SCCNodes) const { - CallGraph &CG = getAnalysis(); - + SmallPtrSet &SCCNodes) const { UniqueVector FlowsToReturn; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) if (ReturnInst *Ret = dyn_cast(I->getTerminator())) @@ -250,32 +248,36 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, if (Instruction *RVI = dyn_cast(RetVal)) switch (RVI->getOpcode()) { // Extend the analysis by looking upwards. - case Instruction::GetElementPtr: case Instruction::BitCast: + case Instruction::GetElementPtr: FlowsToReturn.insert(RVI->getOperand(0)); continue; case Instruction::Select: { SelectInst *SI = cast(RVI); FlowsToReturn.insert(SI->getTrueValue()); FlowsToReturn.insert(SI->getFalseValue()); - } continue; + continue; + } case Instruction::PHI: { PHINode *PN = cast(RVI); for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) FlowsToReturn.insert(PN->getIncomingValue(i)); - } continue; + continue; + } // Check whether the pointer came from an allocation. case Instruction::Alloca: case Instruction::Malloc: break; case Instruction::Call: + if (isMalloc(RVI)) + break; case Instruction::Invoke: { CallSite CS(RVI); if (CS.paramHasAttr(0, Attribute::NoAlias)) break; if (CS.getCalledFunction() && - SCCNodes.count(CG[CS.getCalledFunction()])) + SCCNodes.count(CS.getCalledFunction())) break; } // fall-through default: @@ -291,12 +293,12 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, /// AddNoAliasAttrs - Deduce noalias attributes for the SCC. bool FunctionAttrs::AddNoAliasAttrs(const std::vector &SCC) { - SmallPtrSet SCCNodes; + SmallPtrSet SCCNodes; // Fill SCCNodes with the elements of the SCC. Used for quickly // looking up whether a given CallGraphNode is in this SCC. for (unsigned i = 0, e = SCC.size(); i != e; ++i) - SCCNodes.insert(SCC[i]); + SCCNodes.insert(SCC[i]->getFunction()); // Check each function in turn, determining which functions return noalias // pointers. @@ -339,7 +341,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector &SCC) { return MadeChange; } -bool FunctionAttrs::runOnSCC(const std::vector &SCC) { +bool FunctionAttrs::runOnSCC(std::vector &SCC) { bool Changed = AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 9c652b996aebb..09f9e7c4f68a6 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -58,6 +58,7 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } bool GlobalDCE::runOnModule(Module &M) { bool Changed = false; + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); @@ -147,6 +148,9 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); + + // Remove dead metadata. + Changed |= M.getContext().RemoveDeadMetadata(); return Changed; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 7fe097c7c5763..a44386e6c15f1 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -20,20 +20,23 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include using namespace llvm; @@ -56,7 +59,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); namespace { struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(&ID) {} @@ -244,7 +246,8 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, return false; } -static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { +static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx, + LLVMContext &Context) { ConstantInt *CI = dyn_cast(Idx); if (!CI) return 0; unsigned IdxV = CI->getZExtValue(); @@ -280,7 +283,8 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { /// users of the global, cleaning up the obvious ones. This is largely just a /// quick scan over the use list to clean up the easy and obvious cruft. This /// returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, + LLVMContext &Context) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) { User *U = *UI++; @@ -301,11 +305,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { Constant *SubInit = 0; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); - Changed |= CleanupConstantGlobalUsers(CE, SubInit); + Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context); } else if (CE->getOpcode() == Instruction::BitCast && isa(CE->getType())) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, 0); + Changed |= CleanupConstantGlobalUsers(CE, 0, Context); } if (CE->use_empty()) { @@ -319,11 +323,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { Constant *SubInit = 0; if (!isa(GEP->getOperand(0))) { ConstantExpr *CE = - dyn_cast_or_null(ConstantFoldInstruction(GEP)); + dyn_cast_or_null(ConstantFoldInstruction(GEP, Context)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); } - Changed |= CleanupConstantGlobalUsers(GEP, SubInit); + Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context); if (GEP->use_empty()) { GEP->eraseFromParent(); @@ -341,7 +345,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { if (SafeToDestroyConstant(C)) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. - CleanupConstantGlobalUsers(V, Init); + CleanupConstantGlobalUsers(V, Init, Context); return true; } } @@ -423,13 +427,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { // Scalar replacing *just* the outer index of the array is probably not // going to be a win anyway, so just give up. for (++GEPI; // Skip array index. - GEPI != E && (isa(*GEPI) || isa(*GEPI)); + GEPI != E; ++GEPI) { uint64_t NumElements; if (const ArrayType *SubArrayTy = dyn_cast(*GEPI)) NumElements = SubArrayTy->getNumElements(); - else - NumElements = cast(*GEPI)->getNumElements(); + else if (const VectorType *SubVectorTy = dyn_cast(*GEPI)) + NumElements = SubVectorTy->getNumElements(); + else { + assert(isa(*GEPI) && + "Indexed GEP type is not array, vector, or struct!"); + continue; + } ConstantInt *IdxVal = dyn_cast(GEPI.getOperand()); if (!IdxVal || IdxVal->getZExtValue() >= NumElements) @@ -461,7 +470,8 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { /// behavior of the program in a more fine-grained way. We have determined that /// this transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. -static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, + LLVMContext &Context) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; @@ -483,14 +493,15 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::Int32Ty, i)); + ConstantInt::get(Type::getInt32Ty(Context), i), + Context); assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, + GlobalVariable *NGV = new GlobalVariable(Context, + STy->getElementType(i), false, GlobalVariable::InternalLinkage, - In, GV->getName()+"."+utostr(i), - (Module *)NULL, + In, GV->getName()+"."+Twine(i), GV->isThreadLocal(), - GV->getType()->getAddressSpace()); + GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); @@ -517,15 +528,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = getAggregateConstantElement(Init, - ConstantInt::get(Type::Int32Ty, i)); + ConstantInt::get(Type::getInt32Ty(Context), i), + Context); assert(In && "Couldn't get element of initializer?"); - GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, + GlobalVariable *NGV = new GlobalVariable(Context, + STy->getElementType(), false, GlobalVariable::InternalLinkage, - In, GV->getName()+"."+utostr(i), - (Module *)NULL, + In, GV->getName()+"."+Twine(i), GV->isThreadLocal(), - GV->getType()->getAddressSpace()); + GV->getType()->getAddressSpace()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); @@ -541,9 +553,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { if (NewGlobals.empty()) return 0; - DOUT << "PERFORMING GLOBAL SRA ON: " << *GV; + DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV); - Constant *NullInt = Constant::getNullValue(Type::Int32Ty); + Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context)); // Loop over all of the uses of the global, replacing the constantexpr geps, // with smaller constantexpr geps or direct references. @@ -577,7 +589,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i) Idxs.push_back(GEPI->getOperand(i)); NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), - GEPI->getName()+"."+utostr(Val), GEPI); + GEPI->getName()+"."+Twine(Val),GEPI); } } GEP->replaceAllUsesWith(NewPtr); @@ -667,7 +679,8 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) { return true; } -static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, + LLVMContext &Context) { bool Changed = false; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) { Instruction *I = cast(*UI++); @@ -700,7 +713,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { } else if (CastInst *CI = dyn_cast(I)) { Changed |= OptimizeAwayTrappingUsesOfValue(CI, ConstantExpr::getCast(CI->getOpcode(), - NewV, CI->getType())); + NewV, CI->getType()), Context); if (CI->use_empty()) { Changed = true; CI->eraseFromParent(); @@ -717,8 +730,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { break; if (Idxs.size() == GEPI->getNumOperands()-1) Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, - ConstantExpr::getGetElementPtr(NewV, &Idxs[0], - Idxs.size())); + ConstantExpr::getGetElementPtr(NewV, &Idxs[0], + Idxs.size()), Context); if (GEPI->use_empty()) { Changed = true; GEPI->eraseFromParent(); @@ -734,7 +747,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { /// value stored into it. If there are uses of the loaded value that would trap /// if the loaded value is dynamically null, then we know that they cannot be /// reachable with a null optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, + LLVMContext &Context) { bool Changed = false; // Keep track of whether we are able to remove all the uses of the global @@ -745,7 +759,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){ User *GlobalUser = *GUI++; if (LoadInst *LI = dyn_cast(GlobalUser)) { - Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); + Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context); // If we were able to delete all uses of the loads if (LI->use_empty()) { LI->eraseFromParent(); @@ -768,15 +782,15 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { } if (Changed) { - DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV; + DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV); ++NumGlobUses; } // If we nuked all of the loads, then none of the stores are needed either, // nor is the global. if (AllNonStoreUsesGone) { - DOUT << " *** GLOBAL NOW DEAD!\n"; - CleanupConstantGlobalUsers(GV, 0); + DEBUG(errs() << " *** GLOBAL NOW DEAD!\n"); + CleanupConstantGlobalUsers(GV, 0, Context); if (GV->use_empty()) { GV->eraseFromParent(); ++NumDeleted; @@ -788,10 +802,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V) { +static void ConstantPropUsersOf(Value *V, LLVMContext &Context) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) if (Instruction *I = dyn_cast(*UI++)) - if (Constant *NewC = ConstantFoldInstruction(I)) { + if (Constant *NewC = ConstantFoldInstruction(I, Context)) { I->replaceAllUsesWith(NewC); // Advance UI to the next non-I use to avoid invalidating it! @@ -808,8 +822,9 @@ static void ConstantPropUsersOf(Value *V) { /// malloc, there is no reason to actually DO the malloc. Instead, turn the /// malloc into a global, and any loads of GV as uses of the new global. static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, - MallocInst *MI) { - DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI; + MallocInst *MI, + LLVMContext &Context) { + DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *MI); ConstantInt *NElements = cast(MI->getArraySize()); if (NElements->getZExtValue() != 1) { @@ -818,10 +833,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, Type *NewTy = ArrayType::get(MI->getAllocatedType(), NElements->getZExtValue()); MallocInst *NewMI = - new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty), + new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)), MI->getAlignment(), MI->getName(), MI); Value* Indices[2]; - Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty); + Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context)); Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, NewMI->getName()+".el0", MI); MI->replaceAllUsesWith(NewGEP); @@ -831,17 +846,17 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // Create the new global variable. The contents of the malloc'd memory is // undefined, so initialize with an undef value. + // FIXME: This new global should have the alignment returned by malloc. Code + // could depend on malloc returning large alignment (on the mac, 16 bytes) but + // this would only guarantee some lower alignment. Constant *Init = UndefValue::get(MI->getAllocatedType()); - GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false, + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + MI->getAllocatedType(), false, GlobalValue::InternalLinkage, Init, GV->getName()+".body", - (Module *)NULL, + GV, GV->isThreadLocal()); - // FIXME: This new global should have the alignment returned by malloc. Code - // could depend on malloc returning large alignment (on the mac, 16 bytes) but - // this would only guarantee some lower alignment. - GV->getParent()->getGlobalList().insert(GV, NewGV); - + // Anything that used the malloc now uses the global directly. MI->replaceAllUsesWith(NewGV); @@ -853,9 +868,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // If there is a comparison against null, we will insert a global bool to // keep track of whether the global was initialized yet or not. GlobalVariable *InitBool = - new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage, - ConstantInt::getFalse(), GV->getName()+".init", - (Module *)NULL, GV->isThreadLocal()); + new GlobalVariable(Context, Type::getInt1Ty(Context), false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(Context), GV->getName()+".init", + GV->isThreadLocal()); bool InitBoolUsed = false; // Loop over all uses of GV, processing them in turn. @@ -872,10 +888,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI); InitBoolUsed = true; switch (CI->getPredicate()) { - default: assert(0 && "Unknown ICmp Predicate!"); + default: llvm_unreachable("Unknown ICmp Predicate!"); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: - LV = ConstantInt::getFalse(); // X < null -> always false + LV = ConstantInt::getFalse(Context); // X < null -> always false break; case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_SLE: @@ -897,7 +913,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, } else { StoreInst *SI = cast(GV->use_back()); // The global is initialized when the store to it occurs. - new StoreInst(ConstantInt::getTrue(), InitBool, SI); + new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); SI->eraseFromParent(); } @@ -917,9 +933,141 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // To further other optimizations, loop over all users of NewGV and try to // constant prop them. This will promote GEP instructions with constant // indices into GEP constant-exprs, which will allow global-opt to hack on it. - ConstantPropUsersOf(NewGV); + ConstantPropUsersOf(NewGV, Context); if (RepValue != NewGV) - ConstantPropUsersOf(RepValue); + ConstantPropUsersOf(RepValue, Context); + + return NewGV; +} + +/// OptimizeGlobalAddressOfMalloc - This function takes the specified global +/// variable, and transforms the program as if it always contained the result of +/// the specified malloc. Because it is always the result of the specified +/// malloc, there is no reason to actually DO the malloc. Instead, turn the +/// malloc into a global, and any loads of GV as uses of the new global. +static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, + CallInst *CI, + BitCastInst *BCI, + LLVMContext &Context, + TargetData* TD) { + const Type *IntPtrTy = TD->getIntPtrType(Context); + + DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << " MALLOC = " << *CI); + + ConstantInt *NElements = cast(getMallocArraySize(CI, + Context, TD)); + if (NElements->getZExtValue() != 1) { + // If we have an array allocation, transform it to a single element + // allocation to make the code below simpler. + Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), + NElements->getZExtValue()); + Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); + Instruction* NewMI = cast(NewM); + Value* Indices[2]; + Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); + Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, + NewMI->getName()+".el0", CI); + BCI->replaceAllUsesWith(NewGEP); + BCI->eraseFromParent(); + CI->eraseFromParent(); + BCI = cast(NewMI); + CI = extractMallocCallFromBitCast(NewMI); + } + + // Create the new global variable. The contents of the malloc'd memory is + // undefined, so initialize with an undef value. + // FIXME: This new global should have the alignment returned by malloc. Code + // could depend on malloc returning large alignment (on the mac, 16 bytes) but + // this would only guarantee some lower alignment. + const Type *MAT = getMallocAllocatedType(CI); + Constant *Init = UndefValue::get(MAT); + GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), + MAT, false, + GlobalValue::InternalLinkage, Init, + GV->getName()+".body", + GV, + GV->isThreadLocal()); + + // Anything that used the malloc now uses the global directly. + BCI->replaceAllUsesWith(NewGV); + + Constant *RepValue = NewGV; + if (NewGV->getType() != GV->getType()->getElementType()) + RepValue = ConstantExpr::getBitCast(RepValue, + GV->getType()->getElementType()); + + // If there is a comparison against null, we will insert a global bool to + // keep track of whether the global was initialized yet or not. + GlobalVariable *InitBool = + new GlobalVariable(Context, Type::getInt1Ty(Context), false, + GlobalValue::InternalLinkage, + ConstantInt::getFalse(Context), GV->getName()+".init", + GV->isThreadLocal()); + bool InitBoolUsed = false; + + // Loop over all uses of GV, processing them in turn. + std::vector Stores; + while (!GV->use_empty()) + if (LoadInst *LI = dyn_cast(GV->use_back())) { + while (!LI->use_empty()) { + Use &LoadUse = LI->use_begin().getUse(); + if (!isa(LoadUse.getUser())) + LoadUse = RepValue; + else { + ICmpInst *ICI = cast(LoadUse.getUser()); + // Replace the cmp X, 0 with a use of the bool value. + Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); + InitBoolUsed = true; + switch (ICI->getPredicate()) { + default: llvm_unreachable("Unknown ICmp Predicate!"); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_SLT: + LV = ConstantInt::getFalse(Context); // X < null -> always false + break; + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_SLE: + case ICmpInst::ICMP_EQ: + LV = BinaryOperator::CreateNot(LV, "notinit", ICI); + break; + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGE: + case ICmpInst::ICMP_SGE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_SGT: + break; // no change. + } + ICI->replaceAllUsesWith(LV); + ICI->eraseFromParent(); + } + } + LI->eraseFromParent(); + } else { + StoreInst *SI = cast(GV->use_back()); + // The global is initialized when the store to it occurs. + new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); + SI->eraseFromParent(); + } + + // If the initialization boolean was used, insert it, otherwise delete it. + if (!InitBoolUsed) { + while (!InitBool->use_empty()) // Delete initializations + cast(InitBool->use_back())->eraseFromParent(); + delete InitBool; + } else + GV->getParent()->getGlobalList().insert(GV, InitBool); + + + // Now the GV is dead, nuke it and the malloc. + GV->eraseFromParent(); + BCI->eraseFromParent(); + CI->eraseFromParent(); + + // To further other optimizations, loop over all users of NewGV and try to + // constant prop them. This will promote GEP instructions with constant + // indices into GEP constant-exprs, which will allow global-opt to hack on it. + ConstantPropUsersOf(NewGV, Context); + if (RepValue != NewGV) + ConstantPropUsersOf(RepValue, Context); return NewGV; } @@ -1071,7 +1219,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from /// GV are simple enough to perform HeapSRA, return true. static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, - MallocInst *MI) { + Instruction *StoredVal) { SmallPtrSet LoadUsingPHIs; SmallPtrSet LoadUsingPHIsPerLoad; for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; @@ -1095,7 +1243,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, Value *InVal = PN->getIncomingValue(op); // PHI of the stored value itself is ok. - if (InVal == MI) continue; + if (InVal == StoredVal) continue; if (PHINode *InPN = dyn_cast(InVal)) { // One of the PHIs in our set is (optimistically) ok. @@ -1121,7 +1269,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite) { + std::vector > &PHIsToRewrite, + LLVMContext &Context) { std::vector &FieldVals = InsertedScalarizedValues[V]; if (FieldNo >= FieldVals.size()) @@ -1139,19 +1288,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, // a new Load of the scalarized global. Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, InsertedScalarizedValues, - PHIsToRewrite), - LI->getName()+".f" + utostr(FieldNo), LI); + PHIsToRewrite, Context), + LI->getName()+".f"+Twine(FieldNo), LI); } else if (PHINode *PN = dyn_cast(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. const StructType *ST = cast(cast(PN->getType())->getElementType()); - Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), - PN->getName()+".f"+utostr(FieldNo), PN); + Result = + PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getName()+".f"+Twine(FieldNo), PN); PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); } else { - assert(0 && "Unknown usable value"); + llvm_unreachable("Unknown usable value"); Result = 0; } @@ -1162,18 +1312,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, /// the load, rewrite the derived value to use the HeapSRoA'd load. static void RewriteHeapSROALoadUser(Instruction *LoadUser, DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite) { + std::vector > &PHIsToRewrite, + LLVMContext &Context) { // If this is a comparison against null, handle it. if (ICmpInst *SCI = dyn_cast(LoadUser)) { assert(isa(SCI->getOperand(1))); // If we have a setcc of the loaded pointer, we can use a setcc of any // field. Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, - InsertedScalarizedValues, PHIsToRewrite); + InsertedScalarizedValues, PHIsToRewrite, + Context); - Value *New = new ICmpInst(SCI->getPredicate(), NPtr, - Constant::getNullValue(NPtr->getType()), - SCI->getName(), SCI); + Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, + Constant::getNullValue(NPtr->getType()), + SCI->getName()); SCI->replaceAllUsesWith(New); SCI->eraseFromParent(); return; @@ -1187,7 +1339,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // Load the pointer for this field. unsigned FieldNo = cast(GEPI->getOperand(2))->getZExtValue(); Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, - InsertedScalarizedValues, PHIsToRewrite); + InsertedScalarizedValues, PHIsToRewrite, + Context); // Create the new GEP idx vector. SmallVector GEPIdx; @@ -1219,7 +1372,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // users. for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) { Instruction *User = cast(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, + Context); } } @@ -1229,11 +1383,13 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// AllGlobalLoadUsesSimpleEnoughForHeapSRA. static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite) { + std::vector > &PHIsToRewrite, + LLVMContext &Context) { for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end(); UI != E; ) { Instruction *User = cast(*UI++); - RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); + RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, + Context); } if (Load->use_empty()) { @@ -1244,8 +1400,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, /// PerformHeapAllocSRoA - MI is an allocation of an array of structures. Break /// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ - DOUT << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI; +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI, + LLVMContext &Context){ + DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *MI); const StructType *STy = cast(MI->getAllocatedType()); // There is guaranteed to be at least one use of the malloc (storing @@ -1264,14 +1421,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ const Type *PFieldTy = PointerType::getUnqual(FieldTy); GlobalVariable *NGV = - new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage, + new GlobalVariable(*GV->getParent(), + PFieldTy, false, GlobalValue::InternalLinkage, Constant::getNullValue(PFieldTy), - GV->getName() + ".f" + utostr(FieldNo), GV, + GV->getName() + ".f" + Twine(FieldNo), GV, GV->isThreadLocal()); FieldGlobals.push_back(NGV); MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), - MI->getName() + ".f" + utostr(FieldNo),MI); + MI->getName() + ".f" + Twine(FieldNo), MI); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, MI); } @@ -1290,9 +1448,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ // } Value *RunningOr = 0; for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { - Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i], - Constant::getNullValue(FieldMallocs[i]->getType()), - "isnull", MI); + Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); if (!RunningOr) RunningOr = Cond; // First seteq else @@ -1305,7 +1463,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ // Create the block to check the first condition. Put all these blocks at the // end of the function as they are unlikely to be executed. - BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null", + BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", OrigBB->getParent()); // Remove the uncond branch from OrigBB to ContBB, turning it into a cond @@ -1317,11 +1475,13 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ // pointer, because some may be null while others are not. for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); - Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, Constant::getNullValue(GVVal->getType()), - "tmp", NullPtrBlock); - BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent()); - BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent()); + "tmp"); + BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", + OrigBB->getParent()); + BasicBlock *NextBlock = BasicBlock::Create(Context, "next", + OrigBB->getParent()); BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); // Fill in FreeBlock. @@ -1353,7 +1513,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ Instruction *User = cast(*UI++); if (LoadInst *LI = dyn_cast(User)) { - RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, + Context); continue; } @@ -1384,7 +1545,192 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *InVal = PN->getIncomingValue(i); InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, - PHIsToRewrite); + PHIsToRewrite, Context); + FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); + } + } + + // Drop all inter-phi links and any loads that made it this far. + for (DenseMap >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast(I->first)) + PN->dropAllReferences(); + else if (LoadInst *LI = dyn_cast(I->first)) + LI->dropAllReferences(); + } + + // Delete all the phis and loads now that inter-references are dead. + for (DenseMap >::iterator + I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast(I->first)) + PN->eraseFromParent(); + else if (LoadInst *LI = dyn_cast(I->first)) + LI->eraseFromParent(); + } + + // The old global is now dead, remove it. + GV->eraseFromParent(); + + ++NumHeapSRA; + return cast(FieldGlobals[0]); +} + +/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, + CallInst *CI, BitCastInst* BCI, + LLVMContext &Context, + TargetData *TD){ + DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC CALL = " << *CI + << " BITCAST = " << *BCI << '\n'); + const Type* MAT = getMallocAllocatedType(CI); + const StructType *STy = cast(MAT); + + // There is guaranteed to be at least one use of the malloc (storing + // it into GV). If there are other uses, change them to be uses of + // the global to simplify later code. This also deletes the store + // into GV. + ReplaceUsesOfMallocWithGlobal(BCI, GV); + + // Okay, at this point, there are no users of the malloc. Insert N + // new mallocs at the same place as CI, and N globals. + std::vector FieldGlobals; + std::vector FieldMallocs; + + for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ + const Type *FieldTy = STy->getElementType(FieldNo); + const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + + GlobalVariable *NGV = + new GlobalVariable(*GV->getParent(), + PFieldTy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(PFieldTy), + GV->getName() + ".f" + Twine(FieldNo), GV, + GV->isThreadLocal()); + FieldGlobals.push_back(NGV); + + Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy, + getMallocArraySize(CI, Context, TD), + BCI->getName() + ".f" + Twine(FieldNo)); + FieldMallocs.push_back(NMI); + new StoreInst(NMI, NGV, BCI); + } + + // The tricky aspect of this transformation is handling the case when malloc + // fails. In the original code, malloc failing would set the result pointer + // of malloc to null. In this case, some mallocs could succeed and others + // could fail. As such, we emit code that looks like this: + // F0 = malloc(field0) + // F1 = malloc(field1) + // F2 = malloc(field2) + // if (F0 == 0 || F1 == 0 || F2 == 0) { + // if (F0) { free(F0); F0 = 0; } + // if (F1) { free(F1); F1 = 0; } + // if (F2) { free(F2); F2 = 0; } + // } + Value *RunningOr = 0; + for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { + Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], + Constant::getNullValue(FieldMallocs[i]->getType()), + "isnull"); + if (!RunningOr) + RunningOr = Cond; // First seteq + else + RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); + } + + // Split the basic block at the old malloc. + BasicBlock *OrigBB = BCI->getParent(); + BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); + + // Create the block to check the first condition. Put all these blocks at the + // end of the function as they are unlikely to be executed. + BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", + OrigBB->getParent()); + + // Remove the uncond branch from OrigBB to ContBB, turning it into a cond + // branch on RunningOr. + OrigBB->getTerminator()->eraseFromParent(); + BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); + + // Within the NullPtrBlock, we need to emit a comparison and branch for each + // pointer, because some may be null while others are not. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); + Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, + Constant::getNullValue(GVVal->getType()), + "tmp"); + BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", + OrigBB->getParent()); + BasicBlock *NextBlock = BasicBlock::Create(Context, "next", + OrigBB->getParent()); + BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); + + // Fill in FreeBlock. + new FreeInst(GVVal, FreeBlock); + new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], + FreeBlock); + BranchInst::Create(NextBlock, FreeBlock); + + NullPtrBlock = NextBlock; + } + + BranchInst::Create(ContBB, NullPtrBlock); + + // CI and BCI are no longer needed, remove them. + BCI->eraseFromParent(); + CI->eraseFromParent(); + + /// InsertedScalarizedLoads - As we process loads, if we can't immediately + /// update all uses of the load, keep track of what scalarized loads are + /// inserted for a given load. + DenseMap > InsertedScalarizedValues; + InsertedScalarizedValues[GV] = FieldGlobals; + + std::vector > PHIsToRewrite; + + // Okay, the malloc site is completely handled. All of the uses of GV are now + // loads, and all uses of those loads are simple. Rewrite them to use loads + // of the per-field globals instead. + for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { + Instruction *User = cast(*UI++); + + if (LoadInst *LI = dyn_cast(User)) { + RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, + Context); + continue; + } + + // Must be a store of null. + StoreInst *SI = cast(User); + assert(isa(SI->getOperand(0)) && + "Unexpected heap-sra user!"); + + // Insert a store of null into each global. + for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { + const PointerType *PT = cast(FieldGlobals[i]->getType()); + Constant *Null = Constant::getNullValue(PT->getElementType()); + new StoreInst(Null, FieldGlobals[i], SI); + } + // Erase the original store. + SI->eraseFromParent(); + } + + // While we have PHIs that are interesting to rewrite, do it. + while (!PHIsToRewrite.empty()) { + PHINode *PN = PHIsToRewrite.back().first; + unsigned FieldNo = PHIsToRewrite.back().second; + PHIsToRewrite.pop_back(); + PHINode *FieldPN = cast(InsertedScalarizedValues[PN][FieldNo]); + assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); + + // Add all the incoming values. This can materialize more phis. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *InVal = PN->getIncomingValue(i); + InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, + PHIsToRewrite, Context); FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); } } @@ -1422,7 +1768,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, MallocInst *MI, Module::global_iterator &GVI, - TargetData &TD) { + TargetData *TD, + LLVMContext &Context) { // If this is a malloc of an abstract type, don't touch it. if (!MI->getAllocatedType()->isSized()) return false; @@ -1456,9 +1803,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // Restrict this transformation to only working on small allocations // (2048 bytes currently), as we don't want to introduce a 16M global or // something. - if (NElements->getZExtValue()* - TD.getTypeAllocSize(MI->getAllocatedType()) < 2048) { - GVI = OptimizeGlobalAddressOfMalloc(GV, MI); + if (TD && + NElements->getZExtValue()* + TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context); return true; } } @@ -1485,7 +1833,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, if (const ArrayType *AT = dyn_cast(MI->getAllocatedType())) { MallocInst *NewMI = new MallocInst(AllocSTy, - ConstantInt::get(Type::Int32Ty, AT->getNumElements()), + ConstantInt::get(Type::getInt32Ty(Context), + AT->getNumElements()), "", MI); NewMI->takeName(MI); Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI); @@ -1494,7 +1843,100 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, MI = NewMI; } - GVI = PerformHeapAllocSRoA(GV, MI); + GVI = PerformHeapAllocSRoA(GV, MI, Context); + return true; + } + } + + return false; +} + +/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a +/// pointer global variable with a single value stored it that is a malloc or +/// cast of malloc. +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, + CallInst *CI, + BitCastInst *BCI, + Module::global_iterator &GVI, + TargetData *TD, + LLVMContext &Context) { + // If we can't figure out the type being malloced, then we can't optimize. + const Type *AllocTy = getMallocAllocatedType(CI); + assert(AllocTy); + + // If this is a malloc of an abstract type, don't touch it. + if (!AllocTy->isSized()) + return false; + + // We can't optimize this global unless all uses of it are *known* to be + // of the malloc value, not of the null initializer value (consider a use + // that compares the global's value against zero to see if the malloc has + // been reached). To do this, we check to see if all uses of the global + // would trap if the global were null: this proves that they must all + // happen after the malloc. + if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) + return false; + + // We can't optimize this if the malloc itself is used in a complex way, + // for example, being stored into multiple globals. This allows the + // malloc to be stored into the specified global, loaded setcc'd, and + // GEP'd. These are all things we could transform to using the global + // for. + { + SmallPtrSet PHIs; + if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) + return false; + } + + // If we have a global that is only initialized with a fixed size malloc, + // transform the program to use global memory instead of malloc'd memory. + // This eliminates dynamic allocation, avoids an indirection accessing the + // data, and exposes the resultant global to further GlobalOpt. + if (ConstantInt *NElements = + dyn_cast(getMallocArraySize(CI, Context, TD))) { + // Restrict this transformation to only working on small allocations + // (2048 bytes currently), as we don't want to introduce a 16M global or + // something. + if (TD && + NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { + GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); + return true; + } + } + + // If the allocation is an array of structures, consider transforming this + // into multiple malloc'd arrays, one for each field. This is basically + // SRoA for malloc'd memory. + + // If this is an allocation of a fixed size array of structs, analyze as a + // variable size array. malloc [100 x struct],1 -> malloc struct, 100 + if (!isArrayMalloc(CI, Context, TD)) + if (const ArrayType *AT = dyn_cast(AllocTy)) + AllocTy = AT->getElementType(); + + if (const StructType *AllocSTy = dyn_cast(AllocTy)) { + // This the structure has an unreasonable number of fields, leave it + // alone. + if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && + AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + + // If this is a fixed size array, transform the Malloc to be an alloc of + // structs. malloc [100 x struct],1 -> malloc struct, 100 + if (const ArrayType *AT = dyn_cast(getMallocAllocatedType(CI))) { + Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), + AT->getNumElements()); + Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), + AllocSTy, NumElements, + BCI->getName()); + Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); + BCI->replaceAllUsesWith(Cast); + BCI->eraseFromParent(); + CI->eraseFromParent(); + BCI = cast(NewMI); + CI = extractMallocCallFromBitCast(NewMI); + } + + GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD); return true; } } @@ -1506,7 +1948,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // that only one value (besides its initializer) is ever stored to the global. static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, Module::global_iterator &GVI, - TargetData &TD) { + TargetData *TD, LLVMContext &Context) { // Ignore no-op GEPs and bitcasts. StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1518,14 +1960,25 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, GV->getInitializer()->isNullValue()) { if (Constant *SOVC = dyn_cast(StoredOnceVal)) { if (GV->getInitializer()->getType() != SOVC->getType()) - SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); + SOVC = + ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); // Optimize away any trapping uses of the loaded value. - if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) + if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context)) return true; } else if (MallocInst *MI = dyn_cast(StoredOnceVal)) { - if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD)) + if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context)) return true; + } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { + if (getMallocAllocatedType(CI)) { + BitCastInst* BCI = NULL; + for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); + UI != E; ) + BCI = dyn_cast(cast(*UI++)); + if (BCI && + TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context)) + return true; + } } } @@ -1536,7 +1989,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, /// two values ever stored into GV are its initializer and OtherVal. See if we /// can shrink the global into a boolean and select between the two values /// whenever it is used. This exposes the values to other scalar optimizations. -static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { +static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, + LLVMContext &Context) { const Type *GVElType = GV->getType()->getElementType(); // If GVElType is already i1, it is already shrunk. If the type of the GV is @@ -1544,7 +1998,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // between them is very expensive and unlikely to lead to later // simplification. In these cases, we typically end up with "cond ? v1 : v2" // where v1 and v2 both require constant pool loads, a big loss. - if (GVElType == Type::Int1Ty || GVElType->isFloatingPoint() || + if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() || isa(GVElType) || isa(GVElType)) return false; @@ -1554,18 +2008,19 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { if (!isa(I) && !isa(I)) return false; - DOUT << " *** SHRINKING TO BOOL: " << *GV; + DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. - GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false, - GlobalValue::InternalLinkage, ConstantInt::getFalse(), + GlobalVariable *NewGV = new GlobalVariable(Context, + Type::getInt1Ty(Context), false, + GlobalValue::InternalLinkage, ConstantInt::getFalse(Context), GV->getName()+".b", - (Module *)NULL, GV->isThreadLocal()); GV->getParent()->getGlobalList().insert(GV, NewGV); Constant *InitVal = GV->getInitializer(); - assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!"); + assert(InitVal->getType() != Type::getInt1Ty(Context) && + "No reason to shrink to bool!"); // If initialized to zero and storing one into the global, we can use a cast // instead of a select to synthesize the desired value. @@ -1581,7 +2036,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // Only do this if we weren't storing a loaded value. Value *StoreVal; if (StoringOther || SI->getOperand(0) == InitVal) - StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther); + StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther); else { // Otherwise, we are storing a previously loaded copy. To do this, // change the copy from copying the original value to just copying the @@ -1632,7 +2087,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->removeDeadConstantUsers(); if (GV->use_empty()) { - DOUT << "GLOBAL DEAD: " << *GV; + DEBUG(errs() << "GLOBAL DEAD: " << *GV); GV->eraseFromParent(); ++NumDeleted; return true; @@ -1675,7 +2130,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GS.AccessingFunction->getName() == "main" && GS.AccessingFunction->hasExternalLinkage() && GV->getType()->getAddressSpace() == 0) { - DOUT << "LOCALIZING GLOBAL: " << *GV; + DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV); Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin(); const Type* ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment @@ -1692,11 +2147,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // If the global is never loaded (but may be stored to), it is dead. // Delete it now. if (!GS.isLoaded) { - DOUT << "GLOBAL NEVER LOADED: " << *GV; + DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV); // Delete any stores we can find to the global. We may not be able to // make it completely dead though. - bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); + bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), + GV->getContext()); // If the global is dead now, delete it. if (GV->use_empty()) { @@ -1707,16 +2163,16 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return Changed; } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { - DOUT << "MARKING CONSTANT: " << *GV; + DEBUG(errs() << "MARKING CONSTANT: " << *GV); GV->setConstant(true); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext()); // If the global is dead now, just nuke it. if (GV->use_empty()) { - DOUT << " *** Marking constant allowed us to simplify " - << "all users and delete global!\n"; + DEBUG(errs() << " *** Marking constant allowed us to simplify " + << "all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; } @@ -1724,11 +2180,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ++NumMarked; return true; } else if (!GV->getInitializer()->getType()->isSingleValueType()) { - if (GlobalVariable *FirstNewGV = SRAGlobal(GV, - getAnalysis())) { - GVI = FirstNewGV; // Don't skip the newly produced globals! - return true; - } + if (TargetData *TD = getAnalysisIfAvailable()) + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD, + GV->getContext())) { + GVI = FirstNewGV; // Don't skip the newly produced globals! + return true; + } } else if (GS.StoredType == GlobalStatus::isStoredOnce) { // If the initial value for the global was an undef value, and if only // one other value was stored into it, we can just change the @@ -1740,11 +2197,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, GV->setInitializer(SOVConstant); // Clean up any obviously simplifiable users now. - CleanupConstantGlobalUsers(GV, GV->getInitializer()); + CleanupConstantGlobalUsers(GV, GV->getInitializer(), + GV->getContext()); if (GV->use_empty()) { - DOUT << " *** Substituting initializer allowed us to " - << "simplify all users and delete global!\n"; + DEBUG(errs() << " *** Substituting initializer allowed us to " + << "simplify all users and delete global!\n"); GV->eraseFromParent(); ++NumDeleted; } else { @@ -1757,13 +2215,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, // Try to optimize globals based on the knowledge that only one value // (besides its initializer) is ever stored to the global. if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, - getAnalysis())) + getAnalysisIfAvailable(), + GV->getContext())) return true; // Otherwise, if the global was not a boolean, we can shrink it to be a // boolean. if (Constant *SOVConstant = dyn_cast(GS.StoredOnceValue)) - if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { + if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) { ++NumShrunkToBool; return true; } @@ -1866,16 +2325,16 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { if (!ATy) return 0; const StructType *STy = dyn_cast(ATy->getElementType()); if (!STy || STy->getNumElements() != 2 || - STy->getElementType(0) != Type::Int32Ty) return 0; + STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0; const PointerType *PFTy = dyn_cast(STy->getElementType(1)); if (!PFTy) return 0; const FunctionType *FTy = dyn_cast(PFTy->getElementType()); - if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() || - FTy->getNumParams() != 0) + if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) || + FTy->isVarArg() || FTy->getNumParams() != 0) return 0; // Verify that the initializer is simple enough for us to handle. - if (!I->hasInitializer()) return 0; + if (!I->hasDefinitiveInitializer()) return 0; ConstantArray *CA = dyn_cast(I->getInitializer()); if (!CA) return 0; for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) @@ -1916,10 +2375,11 @@ static std::vector ParseGlobalCtors(GlobalVariable *GV) { /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the /// specified array, returning the new global to use. static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, - const std::vector &Ctors) { + const std::vector &Ctors, + LLVMContext &Context) { // If we made a change, reassemble the initializer list. std::vector CSVals; - CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535)); + CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535)); CSVals.push_back(0); // Create the new init list. @@ -1928,19 +2388,19 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, if (Ctors[i]) { CSVals[1] = Ctors[i]; } else { - const Type *FTy = FunctionType::get(Type::VoidTy, false); + const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false); const PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); - CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647); + CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647); } - CAList.push_back(ConstantStruct::get(CSVals)); + CAList.push_back(ConstantStruct::get(Context, CSVals, false)); } // Create the array initializer. const Type *StructTy = - cast(GCL->getType()->getElementType())->getElementType(); - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), - CAList); + cast(GCL->getType()->getElementType())->getElementType(); + Constant *CA = ConstantArray::get(ArrayType::get(StructTy, + CAList.size()), CAList); // If we didn't change the number of elements, don't create a new GV. if (CA->getType() == GCL->getInitializer()->getType()) { @@ -1949,9 +2409,9 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, } // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), + GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(), + GCL->isConstant(), GCL->getLinkage(), CA, "", - (Module *)NULL, GCL->isThreadLocal()); GCL->getParent()->getGlobalList().insert(GCL, NGV); NGV->takeName(GCL); @@ -1984,21 +2444,38 @@ static Constant *getVal(DenseMap &ComputedValues, /// enough for us to understand. In particular, if it is a cast of something, /// we punt. We basically just support direct accesses to globals and GEP's of /// globals. This should be kept up to date with CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C) { - if (GlobalVariable *GV = dyn_cast(C)) { - if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) - return false; // do not allow weak/linkonce/dllimport/dllexport linkage. - return !GV->isDeclaration(); // reject external globals. - } +static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { + // Conservatively, avoid aggregate types. This is because we don't + // want to worry about them partially overlapping other stores. + if (!cast(C->getType())->getElementType()->isSingleValueType()) + return false; + + if (GlobalVariable *GV = dyn_cast(C)) + // Do not allow weak/linkonce/dllimport/dllexport linkage or + // external globals. + return GV->hasDefinitiveInitializer(); + if (ConstantExpr *CE = dyn_cast(C)) // Handle a constantexpr gep. if (CE->getOpcode() == Instruction::GetElementPtr && - isa(CE->getOperand(0))) { + isa(CE->getOperand(0)) && + cast(CE)->isInBounds()) { GlobalVariable *GV = cast(CE->getOperand(0)); - if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) - return false; // do not allow weak/linkonce/dllimport/dllexport linkage. - return GV->hasInitializer() && - ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); + // Do not allow weak/linkonce/dllimport/dllexport linkage or + // external globals. + if (!GV->hasDefinitiveInitializer()) + return false; + + // The first index must be zero. + ConstantInt *CI = dyn_cast(*next(CE->op_begin())); + if (!CI || !CI->isZero()) return false; + + // The remaining indices must be compile-time known integers within the + // notional bounds of the corresponding static array types. + if (!CE->isGEPWithNoNotionalOverIndexing()) + return false; + + return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } return false; } @@ -2007,7 +2484,8 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { /// initializer. This returns 'Init' modified to reflect 'Val' stored into it. /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into. static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, - ConstantExpr *Addr, unsigned OpNo) { + ConstantExpr *Addr, unsigned OpNo, + LLVMContext &Context) { // Base case of the recursion. if (OpNo == Addr->getNumOperands()) { assert(Val->getType() == Init->getType() && "Type mismatch!"); @@ -2028,7 +2506,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Elts.push_back(UndefValue::get(STy->getElementType(i))); } else { - assert(0 && "This code is out of sync with " + llvm_unreachable("This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); } @@ -2036,10 +2514,10 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, ConstantInt *CU = cast(Addr->getOperand(OpNo)); unsigned Idx = CU->getZExtValue(); assert(Idx < STy->getNumElements() && "Struct index out of range!"); - Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); + Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context); // Return the modified struct. - return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked()); + return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked()); } else { ConstantInt *CI = cast(Addr->getOperand(OpNo)); const ArrayType *ATy = cast(Init->getType()); @@ -2056,20 +2534,21 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, Constant *Elt = UndefValue::get(ATy->getElementType()); Elts.assign(ATy->getNumElements(), Elt); } else { - assert(0 && "This code is out of sync with " + llvm_unreachable("This code is out of sync with " " ConstantFoldLoadThroughGEPConstantExpr"); } assert(CI->getZExtValue() < ATy->getNumElements()); Elts[CI->getZExtValue()] = - EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); + EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context); return ConstantArray::get(ATy, Elts); } } /// CommitValueTo - We have decided that Addr (which satisfies the predicate /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. -static void CommitValueTo(Constant *Val, Constant *Addr) { +static void CommitValueTo(Constant *Val, Constant *Addr, + LLVMContext &Context) { if (GlobalVariable *GV = dyn_cast(Addr)) { assert(GV->hasInitializer()); GV->setInitializer(Val); @@ -2080,7 +2559,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { GlobalVariable *GV = cast(CE->getOperand(0)); Constant *Init = GV->getInitializer(); - Init = EvaluateStoreInto(Init, Val, CE, 2); + Init = EvaluateStoreInto(Init, Val, CE, 2, Context); GV->setInitializer(Init); } @@ -2088,7 +2567,8 @@ static void CommitValueTo(Constant *Val, Constant *Addr) { /// P after the stores reflected by 'memory' have been performed. If we can't /// decide, return null. static Constant *ComputeLoadResult(Constant *P, - const DenseMap &Memory) { + const DenseMap &Memory, + LLVMContext &Context) { // If this memory location has been recently stored, use the stored value: it // is the most up-to-date. DenseMap::const_iterator I = Memory.find(P); @@ -2096,7 +2576,7 @@ static Constant *ComputeLoadResult(Constant *P, // Access it. if (GlobalVariable *GV = dyn_cast(P)) { - if (GV->hasInitializer()) + if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); return 0; } @@ -2106,7 +2586,7 @@ static Constant *ComputeLoadResult(Constant *P, if (CE->getOpcode() == Instruction::GetElementPtr && isa(CE->getOperand(0))) { GlobalVariable *GV = cast(CE->getOperand(0)); - if (GV->hasInitializer()) + if (GV->hasDefinitiveInitializer()) return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } @@ -2117,7 +2597,7 @@ static Constant *ComputeLoadResult(Constant *P, /// successful, false if we can't evaluate it. ActualArgs contains the formal /// arguments for the function. static bool EvaluateFunction(Function *F, Constant *&RetVal, - const std::vector &ActualArgs, + const SmallVectorImpl &ActualArgs, std::vector &CallStack, DenseMap &MutatedMemory, std::vector &AllocaTmps) { @@ -2126,6 +2606,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) return false; + LLVMContext &Context = F->getContext(); + CallStack.push_back(F); /// Values - As we compute SSA register values, we store their contents here. @@ -2152,7 +2634,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (StoreInst *SI = dyn_cast(CurInst)) { if (SI->isVolatile()) return false; // no volatile accesses. Constant *Ptr = getVal(Values, SI->getOperand(1)); - if (!isSimpleEnoughPointerToCommit(Ptr)) + if (!isSimpleEnoughPointerToCommit(Ptr, Context)) // If this is too complex for us to commit, reject it. return false; Constant *Val = getVal(Values, SI->getOperand(0)); @@ -2170,7 +2652,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, getVal(Values, CI->getOperand(0)), CI->getType()); } else if (SelectInst *SI = dyn_cast(CurInst)) { - InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + InstResult = + ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), getVal(Values, SI->getOperand(1)), getVal(Values, SI->getOperand(2))); } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { @@ -2179,16 +2662,18 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) GEPOps.push_back(getVal(Values, *i)); - InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); + InstResult = cast(GEP)->isInBounds() ? + ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : + ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); } else if (LoadInst *LI = dyn_cast(CurInst)) { if (LI->isVolatile()) return false; // no volatile accesses. InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), - MutatedMemory); + MutatedMemory, Context); if (InstResult == 0) return false; // Could not evaluate load. } else if (AllocaInst *AI = dyn_cast(CurInst)) { if (AI->isArrayAllocation()) return false; // Cannot handle array allocs. const Type *Ty = AI->getType()->getElementType(); - AllocaTmps.push_back(new GlobalVariable(Ty, false, + AllocaTmps.push_back(new GlobalVariable(Context, Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); @@ -2208,14 +2693,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, Function *Callee = dyn_cast(getVal(Values, CI->getOperand(0))); if (!Callee) return false; // Cannot resolve. - std::vector Formals; + SmallVector Formals; for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); i != e; ++i) Formals.push_back(getVal(Values, *i)); - + if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, &Formals[0], + if (Constant *C = ConstantFoldCall(Callee, Formals.data(), Formals.size())) { InstResult = C; } else { @@ -2310,16 +2795,17 @@ static bool EvaluateStaticConstructor(Function *F) { // Call the function. Constant *RetValDummy; - bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector(), - CallStack, MutatedMemory, AllocaTmps); + bool EvalSuccess = EvaluateFunction(F, RetValDummy, + SmallVector(), CallStack, + MutatedMemory, AllocaTmps); if (EvalSuccess) { // We succeeded at evaluation: commit the result. - DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" - << F->getName() << "' to " << MutatedMemory.size() - << " stores.\n"; + DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" + << F->getName() << "' to " << MutatedMemory.size() + << " stores.\n"); for (DenseMap::iterator I = MutatedMemory.begin(), E = MutatedMemory.end(); I != E; ++I) - CommitValueTo(I->second, I->first); + CommitValueTo(I->second, I->first, F->getContext()); } // At this point, we are done interpreting. If we created any 'alloca' @@ -2376,7 +2862,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { if (!MadeChange) return false; - GCL = InstallGlobalCtors(GCL, Ctors); + GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext()); return true; } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index e4a9deadd971e..7b0e9c727cd45 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" @@ -129,7 +130,8 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { Function::arg_iterator AI = F.arg_begin(); for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) { // Do we have a constant argument? - if (ArgumentConstants[i].second || AI->use_empty()) + if (ArgumentConstants[i].second || AI->use_empty() || + (AI->hasByValAttr() && !F.onlyReadsMemory())) continue; Value *V = ArgumentConstants[i].first; @@ -151,13 +153,15 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { // callers will be updated to use the value they pass in directly instead of // using the return value. bool IPCP::PropagateConstantReturn(Function &F) { - if (F.getReturnType() == Type::VoidTy) + if (F.getReturnType() == Type::getVoidTy(F.getContext())) return false; // No return value. // If this function could be overridden later in the link stage, we can't // propagate information about its results into callers. if (F.mayBeOverridden()) return false; + + LLVMContext &Context = F.getContext(); // Check to see if this function returns a constant. SmallVector RetVals; @@ -182,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { if (!STy) V = RI->getOperand(i); else - V = FindInsertedValue(RI->getOperand(0), i); + V = FindInsertedValue(RI->getOperand(0), i, Context); if (V) { // Ignore undefs, we can change them into anything diff --git a/lib/Transforms/IPO/IndMemRemoval.cpp b/lib/Transforms/IPO/IndMemRemoval.cpp index b55dea2c759c3..e7884ec634b68 100644 --- a/lib/Transforms/IPO/IndMemRemoval.cpp +++ b/lib/Transforms/IPO/IndMemRemoval.cpp @@ -1,4 +1,4 @@ -//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ----------===// +//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ---------===// // // The LLVM Compiler Infrastructure // @@ -10,8 +10,8 @@ // This pass finds places where memory allocation functions may escape into // indirect land. Some transforms are much easier (aka possible) only if free // or malloc are not called indirectly. -// Thus find places where the address of memory functions are taken and construct -// bounce functions with direct calls of those functions. +// Thus find places where the address of memory functions are taken and +// construct bounce functions with direct calls of those functions. // //===----------------------------------------------------------------------===// @@ -55,8 +55,8 @@ bool IndMemRemPass::runOnModule(Module &M) { Function* FN = Function::Create(F->getFunctionType(), GlobalValue::LinkOnceAnyLinkage, "free_llvm_bounce", &M); - BasicBlock* bb = BasicBlock::Create("entry",FN); - Instruction* R = ReturnInst::Create(bb); + BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN); + Instruction* R = ReturnInst::Create(M.getContext(), bb); new FreeInst(FN->arg_begin(), R); ++NumBounce; NumBounceSites += F->getNumUses(); @@ -70,11 +70,12 @@ bool IndMemRemPass::runOnModule(Module &M) { GlobalValue::LinkOnceAnyLinkage, "malloc_llvm_bounce", &M); FN->setDoesNotAlias(0); - BasicBlock* bb = BasicBlock::Create("entry",FN); + BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN); Instruction* c = CastInst::CreateIntegerCast( - FN->arg_begin(), Type::Int32Ty, false, "c", bb); - Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb); - ReturnInst::Create(a, bb); + FN->arg_begin(), Type::getInt32Ty(M.getContext()), false, "c", bb); + Instruction* a = new MallocInst(Type::getInt8Ty(M.getContext()), + c, "m", bb); + ReturnInst::Create(M.getContext(), a, bb); ++NumBounce; NumBounceSites += F->getNumUses(); F->replaceAllUsesWith(FN); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 5f9ea5453c1f6..2344403391cf2 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -19,11 +19,11 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/InlineCost.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index e107a0023ce6c..b1c643b558c57 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -18,11 +18,11 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/InlineCost.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -78,7 +78,7 @@ bool SimpleInliner::doInitialization(CallGraph &CG) { return false; // Don't crash on invalid code - if (!GV->hasInitializer()) + if (!GV->hasDefinitiveInitializer()) return false; const ConstantArray *InitList = dyn_cast(GV->getInitializer()); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index b382837289bda..ea47366f47eda 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -18,21 +18,25 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); +STATISTIC(NumMergedAllocas, "Number of allocas merged together"); static cl::opt -InlineLimit("inline-threshold", cl::Hidden, cl::init(200), +InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 200)")); Inliner::Inliner(void *ID) @@ -45,19 +49,32 @@ Inliner::Inliner(void *ID, int Threshold) /// the call graph. If the derived class implements this method, it should /// always explicitly call the implementation here. void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { - Info.addRequired(); CallGraphSCCPass::getAnalysisUsage(Info); } -// InlineCallIfPossible - If it is possible to inline the specified call site, -// do so and update the CallGraph for this operation. -bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG, - const SmallPtrSet &SCCFunctions, - const TargetData &TD) { + +typedef DenseMap > +InlinedArrayAllocasTy; + +/// InlineCallIfPossible - If it is possible to inline the specified call site, +/// do so and update the CallGraph for this operation. +/// +/// This function also does some basic book-keeping to update the IR. The +/// InlinedArrayAllocas map keeps track of any allocas that are already +/// available from other functions inlined into the caller. If we are able to +/// inline this call site we attempt to reuse already available allocas or add +/// any new allocas to the set if not possible. +static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, + const TargetData *TD, + InlinedArrayAllocasTy &InlinedArrayAllocas) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); - if (!InlineFunction(CS, &CG, &TD)) return false; + // Try to inline the function. Get the list of static allocas that were + // inlined. + SmallVector StaticAllocas; + if (!InlineFunction(CS, &CG, TD, &StaticAllocas)) + return false; // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. @@ -67,23 +84,89 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG, !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); - // If we inlined the last possible call site to the function, delete the - // function body now. - if (Callee->use_empty() && (Callee->hasLocalLinkage() || - Callee->hasAvailableExternallyLinkage()) && - !SCCFunctions.count(Callee)) { - DOUT << " -> Deleting dead function: " << Callee->getName() << "\n"; - CallGraphNode *CalleeNode = CG[Callee]; - - // Remove any call graph edges from the callee to its callees. - CalleeNode->removeAllCalledFunctions(); - - resetCachedCostInfo(CalleeNode->getFunction()); + + // Look at all of the allocas that we inlined through this call site. If we + // have already inlined other allocas through other calls into this function, + // then we know that they have disjoint lifetimes and that we can merge them. + // + // There are many heuristics possible for merging these allocas, and the + // different options have different tradeoffs. One thing that we *really* + // don't want to hurt is SRoA: once inlining happens, often allocas are no + // longer address taken and so they can be promoted. + // + // Our "solution" for that is to only merge allocas whose outermost type is an + // array type. These are usually not promoted because someone is using a + // variable index into them. These are also often the most important ones to + // merge. + // + // A better solution would be to have real memory lifetime markers in the IR + // and not have the inliner do any merging of allocas at all. This would + // allow the backend to do proper stack slot coloring of all allocas that + // *actually make it to the backend*, which is really what we want. + // + // Because we don't have this information, we do this simple and useful hack. + // + SmallPtrSet UsedAllocas; + + // Loop over all the allocas we have so far and see if they can be merged with + // a previously inlined alloca. If not, remember that we had it. + for (unsigned AllocaNo = 0, e = StaticAllocas.size(); + AllocaNo != e; ++AllocaNo) { + AllocaInst *AI = StaticAllocas[AllocaNo]; + + // Don't bother trying to merge array allocations (they will usually be + // canonicalized to be an allocation *of* an array), or allocations whose + // type is not itself an array (because we're afraid of pessimizing SRoA). + const ArrayType *ATy = dyn_cast(AI->getAllocatedType()); + if (ATy == 0 || AI->isArrayAllocation()) + continue; + + // Get the list of all available allocas for this array type. + std::vector &AllocasForType = InlinedArrayAllocas[ATy]; + + // Loop over the allocas in AllocasForType to see if we can reuse one. Note + // that we have to be careful not to reuse the same "available" alloca for + // multiple different allocas that we just inlined, we use the 'UsedAllocas' + // set to keep track of which "available" allocas are being used by this + // function. Also, AllocasForType can be empty of course! + bool MergedAwayAlloca = false; + for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { + AllocaInst *AvailableAlloca = AllocasForType[i]; + + // The available alloca has to be in the right function, not in some other + // function in this SCC. + if (AvailableAlloca->getParent() != AI->getParent()) + continue; + + // If the inlined function already uses this alloca then we can't reuse + // it. + if (!UsedAllocas.insert(AvailableAlloca)) + continue; + + // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare + // success! + DEBUG(errs() << " ***MERGED ALLOCA: " << *AI); + + AI->replaceAllUsesWith(AvailableAlloca); + AI->eraseFromParent(); + MergedAwayAlloca = true; + ++NumMergedAllocas; + break; + } - // Removing the node for callee from the call graph and delete it. - delete CG.removeFunctionFromModule(CalleeNode); - ++NumDeleted; + // If we already nuked the alloca, we're done with it. + if (MergedAwayAlloca) + continue; + + // If we were unable to merge away the alloca either because there are no + // allocas of the right type available or because we reused them all + // already, remember that this alloca came from an inlined function and mark + // it used so we don't reuse it for other allocas from this inline + // operation. + AllocasForType.push_back(AI); + UsedAllocas.insert(AI); } + return true; } @@ -91,69 +174,145 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG, /// at the given CallSite. bool Inliner::shouldInline(CallSite CS) { InlineCost IC = getInlineCost(CS); - float FudgeFactor = getInlineFudgeFactor(CS); if (IC.isAlways()) { - DOUT << " Inlining: cost=always" - << ", Call: " << *CS.getInstruction(); + DEBUG(errs() << " Inlining: cost=always" + << ", Call: " << *CS.getInstruction() << "\n"); return true; } if (IC.isNever()) { - DOUT << " NOT Inlining: cost=never" - << ", Call: " << *CS.getInstruction(); + DEBUG(errs() << " NOT Inlining: cost=never" + << ", Call: " << *CS.getInstruction() << "\n"); return false; } int Cost = IC.getValue(); int CurrentThreshold = InlineThreshold; - Function *Fn = CS.getCaller(); - if (Fn && !Fn->isDeclaration() - && Fn->hasFnAttr(Attribute::OptimizeForSize) - && InlineThreshold != 50) { + Function *Caller = CS.getCaller(); + if (Caller && !Caller->isDeclaration() && + Caller->hasFnAttr(Attribute::OptimizeForSize) && + InlineLimit.getNumOccurrences() == 0 && + InlineThreshold != 50) CurrentThreshold = 50; - } + float FudgeFactor = getInlineFudgeFactor(CS); if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { - DOUT << " NOT Inlining: cost=" << Cost - << ", Call: " << *CS.getInstruction(); + DEBUG(errs() << " NOT Inlining: cost=" << Cost + << ", Call: " << *CS.getInstruction() << "\n"); return false; - } else { - DOUT << " Inlining: cost=" << Cost - << ", Call: " << *CS.getInstruction(); - return true; } + + // Try to detect the case where the current inlining candidate caller + // (call it B) is a static function and is an inlining candidate elsewhere, + // and the current candidate callee (call it C) is large enough that + // inlining it into B would make B too big to inline later. In these + // circumstances it may be best not to inline C into B, but to inline B + // into its callers. + if (Caller->hasLocalLinkage()) { + int TotalSecondaryCost = 0; + bool outerCallsFound = false; + bool allOuterCallsWillBeInlined = true; + bool someOuterCallWouldNotBeInlined = false; + for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); + I != E; ++I) { + CallSite CS2 = CallSite::get(*I); + + // If this isn't a call to Caller (it could be some other sort + // of reference) skip it. + if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller) + continue; + + InlineCost IC2 = getInlineCost(CS2); + if (IC2.isNever()) + allOuterCallsWillBeInlined = false; + if (IC2.isAlways() || IC2.isNever()) + continue; + + outerCallsFound = true; + int Cost2 = IC2.getValue(); + int CurrentThreshold2 = InlineThreshold; + Function *Caller2 = CS2.getCaller(); + if (Caller2 && !Caller2->isDeclaration() && + Caller2->hasFnAttr(Attribute::OptimizeForSize) && + InlineThreshold != 50) + CurrentThreshold2 = 50; + + float FudgeFactor2 = getInlineFudgeFactor(CS2); + + if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) + allOuterCallsWillBeInlined = false; + + // See if we have this case. We subtract off the penalty + // for the call instruction, which we would be deleting. + if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && + Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= + (int)(CurrentThreshold2 * FudgeFactor2)) { + someOuterCallWouldNotBeInlined = true; + TotalSecondaryCost += Cost2; + } + } + // If all outer calls to Caller would get inlined, the cost for the last + // one is set very low by getInlineCost, in anticipation that Caller will + // be removed entirely. We did not account for this above unless there + // is only one caller of Caller. + if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end()) + TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; + + if (outerCallsFound && someOuterCallWouldNotBeInlined && + TotalSecondaryCost < Cost) { + DEBUG(errs() << " NOT Inlining: " << *CS.getInstruction() << + " Cost = " << Cost << + ", outer Cost = " << TotalSecondaryCost << '\n'); + return false; + } + } + + DEBUG(errs() << " Inlining: cost=" << Cost + << ", Call: " << *CS.getInstruction() << '\n'); + return true; } -bool Inliner::runOnSCC(const std::vector &SCC) { +bool Inliner::runOnSCC(std::vector &SCC) { CallGraph &CG = getAnalysis(); - TargetData &TD = getAnalysis(); + const TargetData *TD = getAnalysisIfAvailable(); SmallPtrSet SCCFunctions; - DOUT << "Inliner visiting SCC:"; + DEBUG(errs() << "Inliner visiting SCC:"); for (unsigned i = 0, e = SCC.size(); i != e; ++i) { Function *F = SCC[i]->getFunction(); if (F) SCCFunctions.insert(F); - DOUT << " " << (F ? F->getName() : "INDIRECTNODE"); + DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE")); } // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. - std::vector CallSites; + SmallVector CallSites; - for (unsigned i = 0, e = SCC.size(); i != e; ++i) - if (Function *F = SCC[i]->getFunction()) - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - CallSite CS = CallSite::get(I); - if (CS.getInstruction() && !isa(I) && - (!CS.getCalledFunction() || - !CS.getCalledFunction()->isDeclaration())) - CallSites.push_back(CS); - } + for (unsigned i = 0, e = SCC.size(); i != e; ++i) { + Function *F = SCC[i]->getFunction(); + if (!F) continue; + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + CallSite CS = CallSite::get(I); + // If this isn't a call, or it is a call to an intrinsic, it can + // never be inlined. + if (CS.getInstruction() == 0 || isa(I)) + continue; + + // If this is a direct call to an external function, we can never inline + // it. If it is an indirect call, inlining may resolve it to be a + // direct call, so we keep it. + if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) + continue; + + CallSites.push_back(CS); + } + } - DOUT << ": " << CallSites.size() << " call sites.\n"; + DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. @@ -163,6 +322,9 @@ bool Inliner::runOnSCC(const std::vector &SCC) { if (SCCFunctions.count(F)) std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); + + InlinedArrayAllocasTy InlinedArrayAllocas; + // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. bool Changed = false; @@ -171,51 +333,68 @@ bool Inliner::runOnSCC(const std::vector &SCC) { LocalChange = false; // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. - for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) - if (Function *Callee = CallSites[CSi].getCalledFunction()) { - // Calls to external functions are never inlinable. - if (Callee->isDeclaration()) { - if (SCC.size() == 1) { - std::swap(CallSites[CSi], CallSites.back()); - CallSites.pop_back(); - } else { - // Keep the 'in SCC / not in SCC' boundary correct. - CallSites.erase(CallSites.begin()+CSi); - } - --CSi; - continue; - } - - // If the policy determines that we should inline this function, - // try to do so. - CallSite CS = CallSites[CSi]; - if (shouldInline(CS)) { - Function *Caller = CS.getCaller(); - // Attempt to inline the function... - if (InlineCallIfPossible(CS, CG, SCCFunctions, TD)) { - // Remove any cached cost info for this caller, as inlining the - // callee has increased the size of the caller (which may be the - // same as the callee). - resetCachedCostInfo(Caller); - - // Remove this call site from the list. If possible, use - // swap/pop_back for efficiency, but do not use it if doing so would - // move a call site to a function in this SCC before the - // 'FirstCallInSCC' barrier. - if (SCC.size() == 1) { - std::swap(CallSites[CSi], CallSites.back()); - CallSites.pop_back(); - } else { - CallSites.erase(CallSites.begin()+CSi); - } - --CSi; - - ++NumInlined; - Changed = true; - LocalChange = true; - } - } + for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { + CallSite CS = CallSites[CSi]; + + Function *Callee = CS.getCalledFunction(); + // We can only inline direct calls to non-declarations. + if (Callee == 0 || Callee->isDeclaration()) continue; + + // If the policy determines that we should inline this function, + // try to do so. + if (!shouldInline(CS)) + continue; + + Function *Caller = CS.getCaller(); + // Attempt to inline the function... + if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) + continue; + + // If we inlined the last possible call site to the function, delete the + // function body now. + if (Callee->use_empty() && Callee->hasLocalLinkage() && + // TODO: Can remove if in SCC now. + !SCCFunctions.count(Callee) && + + // The function may be apparently dead, but if there are indirect + // callgraph references to the node, we cannot delete it yet, this + // could invalidate the CGSCC iterator. + CG[Callee]->getNumReferences() == 0) { + DEBUG(errs() << " -> Deleting dead function: " + << Callee->getName() << "\n"); + CallGraphNode *CalleeNode = CG[Callee]; + + // Remove any call graph edges from the callee to its callees. + CalleeNode->removeAllCalledFunctions(); + + resetCachedCostInfo(Callee); + + // Removing the node for callee from the call graph and delete it. + delete CG.removeFunctionFromModule(CalleeNode); + ++NumDeleted; } + + // Remove any cached cost info for this caller, as inlining the + // callee has increased the size of the caller (which may be the + // same as the callee). + resetCachedCostInfo(Caller); + + // Remove this call site from the list. If possible, use + // swap/pop_back for efficiency, but do not use it if doing so would + // move a call site to a function in this SCC before the + // 'FirstCallInSCC' barrier. + if (SCC.size() == 1) { + std::swap(CallSites[CSi], CallSites.back()); + CallSites.pop_back(); + } else { + CallSites.erase(CallSites.begin()+CSi); + } + --CSi; + + ++NumInlined; + Changed = true; + LocalChange = true; + } } while (LocalChange); return Changed; @@ -227,47 +406,55 @@ bool Inliner::doFinalization(CallGraph &CG) { return removeDeadFunctions(CG); } - /// removeDeadFunctions - Remove dead functions that are not included in - /// DNR (Do Not Remove) list. +/// removeDeadFunctions - Remove dead functions that are not included in +/// DNR (Do Not Remove) list. bool Inliner::removeDeadFunctions(CallGraph &CG, - SmallPtrSet *DNR) { - std::set FunctionsToRemove; + SmallPtrSet *DNR) { + SmallPtrSet FunctionsToRemove; // Scan for all of the functions, looking for ones that should now be removed // from the program. Insert the dead ones in the FunctionsToRemove set. for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { CallGraphNode *CGN = I->second; - if (Function *F = CGN ? CGN->getFunction() : 0) { - // If the only remaining users of the function are dead constants, remove - // them. - F->removeDeadConstantUsers(); - - if (DNR && DNR->count(F)) - continue; + if (CGN->getFunction() == 0) + continue; + + Function *F = CGN->getFunction(); + + // If the only remaining users of the function are dead constants, remove + // them. + F->removeDeadConstantUsers(); + + if (DNR && DNR->count(F)) + continue; + if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && + !F->hasAvailableExternallyLinkage()) + continue; + if (!F->use_empty()) + continue; + + // Remove any call graph edges from the function to its callees. + CGN->removeAllCalledFunctions(); + + // Remove any edges from the external node to the function's call graph + // node. These edges might have been made irrelegant due to + // optimization of the program. + CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); - if ((F->hasLinkOnceLinkage() || F->hasLocalLinkage()) && - F->use_empty()) { - - // Remove any call graph edges from the function to its callees. - CGN->removeAllCalledFunctions(); - - // Remove any edges from the external node to the function's call graph - // node. These edges might have been made irrelegant due to - // optimization of the program. - CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); - - // Removing the node for callee from the call graph and delete it. - FunctionsToRemove.insert(CGN); - } - } + // Removing the node for callee from the call graph and delete it. + FunctionsToRemove.insert(CGN); } // Now that we know which functions to delete, do so. We didn't want to do // this inline, because that would invalidate our CallGraph::iterator // objects. :( + // + // Note that it doesn't matter that we are iterating over a non-stable set + // here to do this, it doesn't matter which order the functions are deleted + // in. bool Changed = false; - for (std::set::iterator I = FunctionsToRemove.begin(), - E = FunctionsToRemove.end(); I != E; ++I) { + for (SmallPtrSet::iterator I = FunctionsToRemove.begin(), + E = FunctionsToRemove.end(); I != E; ++I) { resetCachedCostInfo((*I)->getFunction()); delete CG.removeFunctionFromModule(*I); ++NumDeleted; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 5093ae90b5ba1..e3c3c672c590e 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include #include @@ -86,7 +87,7 @@ void InternalizePass::LoadFile(const char *Filename) { // Load the APIFile... std::ifstream In(Filename); if (!In.good()) { - cerr << "WARNING: Internalize couldn't load file '" << Filename + errs() << "WARNING: Internalize couldn't load file '" << Filename << "'! Continuing as if it's empty.\n"; return; // Just continue as if the file were empty } @@ -101,7 +102,7 @@ void InternalizePass::LoadFile(const char *Filename) { bool InternalizePass::runOnModule(Module &M) { CallGraph *CG = getAnalysisIfAvailable(); CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; - + if (ExternalNames.empty()) { // Return if we're not in 'all but main' mode and have no external api if (!AllButMain) @@ -131,12 +132,14 @@ bool InternalizePass::runOnModule(Module &M) { if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]); Changed = true; ++NumFunctions; - DOUT << "Internalizing func " << I->getName() << "\n"; + DEBUG(errs() << "Internalizing func " << I->getName() << "\n"); } // Never internalize the llvm.used symbol. It is used to implement // attribute((used)). + // FIXME: Shouldn't this just filter on llvm.metadata section?? ExternalNames.insert("llvm.used"); + ExternalNames.insert("llvm.compiler.used"); // Never internalize anchors used by the machine module info, else the info // won't find them. (see MachineModuleInfo.) @@ -158,7 +161,7 @@ bool InternalizePass::runOnModule(Module &M) { I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumGlobals; - DOUT << "Internalized gvar " << I->getName() << "\n"; + DEBUG(errs() << "Internalized gvar " << I->getName() << "\n"); } // Mark all aliases that are not in the api as internal as well. @@ -169,7 +172,7 @@ bool InternalizePass::runOnModule(Module &M) { I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumAliases; - DOUT << "Internalized alias " << I->getName() << "\n"; + DEBUG(errs() << "Internalized alias " << I->getName() << "\n"); } return Changed; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 0c654438d5086..02ac3bb903c75 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -20,7 +20,7 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/Scalar.h" @@ -33,23 +33,19 @@ using namespace llvm; STATISTIC(NumExtracted, "Number of loops extracted"); namespace { - // FIXME: This is not a function pass, but the PassManager doesn't allow - // Module passes to require FunctionPasses, so we can't get loop info if we're - // not a function pass. - struct VISIBILITY_HIDDEN LoopExtractor : public FunctionPass { + struct VISIBILITY_HIDDEN LoopExtractor : public LoopPass { static char ID; // Pass identification, replacement for typeid unsigned NumLoops; explicit LoopExtractor(unsigned numLoops = ~0) - : FunctionPass(&ID), NumLoops(numLoops) {} + : LoopPass(&ID), NumLoops(numLoops) {} - virtual bool runOnFunction(Function &F); + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(BreakCriticalEdgesID); AU.addRequiredID(LoopSimplifyID); AU.addRequired(); - AU.addRequired(); } }; } @@ -73,68 +69,50 @@ Y("loop-extract-single", "Extract at most one loop into a new function"); // createLoopExtractorPass - This pass extracts all natural loops from the // program into a function if it can. // -FunctionPass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } +Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } -bool LoopExtractor::runOnFunction(Function &F) { - LoopInfo &LI = getAnalysis(); - - // If this function has no loops, there is nothing to do. - if (LI.empty()) +bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { + // Only visit top-level loops. + if (L->getParentLoop()) return false; DominatorTree &DT = getAnalysis(); - - // If there is more than one top-level loop in this function, extract all of - // the loops. bool Changed = false; - if (LI.end()-LI.begin() > 1) { - for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i) { - if (NumLoops == 0) return Changed; - --NumLoops; - Changed |= ExtractLoop(DT, *i) != 0; - ++NumExtracted; - } - } else { - // Otherwise there is exactly one top-level loop. If this function is more - // than a minimal wrapper around the loop, extract the loop. - Loop *TLL = *LI.begin(); - bool ShouldExtractLoop = false; - - // Extract the loop if the entry block doesn't branch to the loop header. - TerminatorInst *EntryTI = F.getEntryBlock().getTerminator(); - if (!isa(EntryTI) || - !cast(EntryTI)->isUnconditional() || - EntryTI->getSuccessor(0) != TLL->getHeader()) - ShouldExtractLoop = true; - else { - // Check to see if any exits from the loop are more than just return - // blocks. - SmallVector ExitBlocks; - TLL->getExitBlocks(ExitBlocks); - for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (!isa(ExitBlocks[i]->getTerminator())) { - ShouldExtractLoop = true; - break; - } - } - if (ShouldExtractLoop) { - if (NumLoops == 0) return Changed; - --NumLoops; - Changed |= ExtractLoop(DT, TLL) != 0; - ++NumExtracted; - } else { - // Okay, this function is a minimal container around the specified loop. - // If we extract the loop, we will continue to just keep extracting it - // infinitely... so don't extract it. However, if the loop contains any - // subloops, extract them. - for (Loop::iterator i = TLL->begin(), e = TLL->end(); i != e; ++i) { - if (NumLoops == 0) return Changed; - --NumLoops; - Changed |= ExtractLoop(DT, *i) != 0; - ++NumExtracted; + // If there is more than one top-level loop in this function, extract all of + // the loops. Otherwise there is exactly one top-level loop; in this case if + // this function is more than a minimal wrapper around the loop, extract + // the loop. + bool ShouldExtractLoop = false; + + // Extract the loop if the entry block doesn't branch to the loop header. + TerminatorInst *EntryTI = + L->getHeader()->getParent()->getEntryBlock().getTerminator(); + if (!isa(EntryTI) || + !cast(EntryTI)->isUnconditional() || + EntryTI->getSuccessor(0) != L->getHeader()) + ShouldExtractLoop = true; + else { + // Check to see if any exits from the loop are more than just return + // blocks. + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (!isa(ExitBlocks[i]->getTerminator())) { + ShouldExtractLoop = true; + break; } + } + if (ShouldExtractLoop) { + if (NumLoops == 0) return Changed; + --NumLoops; + if (ExtractLoop(DT, L) != 0) { + Changed = true; + // After extraction, the loop is replaced by a function call, so + // we shouldn't try to run any more loop passes on it. + LPM.deleteLoopFromQueue(L); } + ++NumExtracted; } return Changed; @@ -143,7 +121,7 @@ bool LoopExtractor::runOnFunction(Function &F) { // createSingleLoopExtractorPass - This pass extracts one natural loop from the // program into a function if it can. This is used by bugpoint. // -FunctionPass *llvm::createSingleLoopExtractorPass() { +Pass *llvm::createSingleLoopExtractorPass() { return new SingleLoopExtractor(); } @@ -193,8 +171,8 @@ void BlockExtractorPass::LoadFile(const char *Filename) { // Load the BlockFile... std::ifstream In(Filename); if (!In.good()) { - cerr << "WARNING: BlockExtractor couldn't load file '" << Filename - << "'!\n"; + errs() << "WARNING: BlockExtractor couldn't load file '" << Filename + << "'!\n"; return; } while (In) { diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index dfc040b833424..55194b34cf205 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -39,6 +39,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" @@ -200,7 +201,7 @@ bool LowerSetJmp::runOnModule(Module& M) { // This function is always successful, unless it isn't. bool LowerSetJmp::doInitialization(Module& M) { - const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type *SBPTy = Type::getInt8PtrTy(M.getContext()); const Type *SBPPTy = PointerType::getUnqual(SBPTy); // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for @@ -208,33 +209,40 @@ bool LowerSetJmp::doInitialization(Module& M) // void __llvm_sjljeh_init_setjmpmap(void**) InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap", - Type::VoidTy, SBPPTy, (Type *)0); + Type::getVoidTy(M.getContext()), + SBPPTy, (Type *)0); // void __llvm_sjljeh_destroy_setjmpmap(void**) DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap", - Type::VoidTy, SBPPTy, (Type *)0); + Type::getVoidTy(M.getContext()), + SBPPTy, (Type *)0); // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned) AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map", - Type::VoidTy, SBPPTy, SBPTy, - Type::Int32Ty, (Type *)0); + Type::getVoidTy(M.getContext()), + SBPPTy, SBPTy, + Type::getInt32Ty(M.getContext()), + (Type *)0); // void __llvm_sjljeh_throw_longjmp(int*, int) ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp", - Type::VoidTy, SBPTy, Type::Int32Ty, + Type::getVoidTy(M.getContext()), SBPTy, + Type::getInt32Ty(M.getContext()), (Type *)0); // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **) TryCatchLJ = M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception", - Type::Int32Ty, SBPPTy, (Type *)0); + Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0); // bool __llvm_sjljeh_is_longjmp_exception() IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception", - Type::Int1Ty, (Type *)0); + Type::getInt1Ty(M.getContext()), + (Type *)0); // int __llvm_sjljeh_get_longjmp_value() GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value", - Type::Int32Ty, (Type *)0); + Type::getInt32Ty(M.getContext()), + (Type *)0); return true; } @@ -257,7 +265,8 @@ bool LowerSetJmp::IsTransformableFunction(const std::string& Name) { // throwing the exception for us. void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) { - const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type* SBPTy = + Type::getInt8PtrTy(Inst->getContext()); // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the // same parameters as "longjmp", except that the buffer is cast to a @@ -278,7 +287,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) if (SVP.first) BranchInst::Create(SVP.first->getParent(), Inst); else - new UnwindInst(Inst); + new UnwindInst(Inst->getContext(), Inst); // Remove all insts after the branch/unwind inst. Go from back to front to // avoid replaceAllUsesWith if possible. @@ -309,7 +318,8 @@ AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func) assert(Inst && "Couldn't find even ONE instruction in entry block!"); // Fill in the alloca and call to initialize the SJ map. - const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type *SBPTy = + Type::getInt8PtrTy(Func->getContext()); AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst); CallInst::Create(InitSJMap, Map, "", Inst); return SJMap[Func] = Map; @@ -324,12 +334,13 @@ BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func) // The basic block we're going to jump to if we need to rethrow the // exception. - BasicBlock* Rethrow = BasicBlock::Create("RethrowExcept", Func); + BasicBlock* Rethrow = + BasicBlock::Create(Func->getContext(), "RethrowExcept", Func); // Fill in the "Rethrow" BB with a call to rethrow the exception. This // is the last instruction in the BB since at this point the runtime // should exit this function and go to the next function. - new UnwindInst(Rethrow); + new UnwindInst(Func->getContext(), Rethrow); return RethrowBBMap[Func] = Rethrow; } @@ -340,7 +351,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, { if (SwitchValMap[Func].first) return SwitchValMap[Func]; - BasicBlock* LongJmpPre = BasicBlock::Create("LongJmpBlkPre", Func); + BasicBlock* LongJmpPre = + BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func); // Keep track of the preliminary basic block for some of the other // transformations. @@ -352,7 +364,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func, // The "decision basic block" gets the number associated with the // setjmp call returning to switch on and the value returned by // longjmp. - BasicBlock* DecisionBB = BasicBlock::Create("LJDecisionBB", Func); + BasicBlock* DecisionBB = + BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func); BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre); @@ -375,12 +388,13 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) Function* Func = ABlock->getParent(); // Add this setjmp to the setjmp map. - const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty); + const Type* SBPTy = + Type::getInt8PtrTy(Inst->getContext()); CastInst* BufPtr = new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); std::vector Args = make_vector(GetSetJmpMap(Func), BufPtr, - ConstantInt::get(Type::Int32Ty, + ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++), 0); CallInst::Create(AddSJToMap, Args.begin(), Args.end(), "", Inst); @@ -424,14 +438,17 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) // This PHI node will be in the new block created from the // splitBasicBlock call. - PHINode* PHI = PHINode::Create(Type::Int32Ty, "SetJmpReturn", Inst); + PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), + "SetJmpReturn", Inst); // Coming from a call to setjmp, the return is 0. - PHI->addIncoming(ConstantInt::getNullValue(Type::Int32Ty), ABlock); + PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())), + ABlock); // Add the case for this setjmp's number... SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func)); - SVP.first->addCase(ConstantInt::get(Type::Int32Ty, SetJmpIDMap[Func] - 1), + SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()), + SetJmpIDMap[Func] - 1), SetJmpContBlock); // Value coming from the handling of the exception. @@ -503,7 +520,8 @@ void LowerSetJmp::visitInvokeInst(InvokeInst& II) BasicBlock* ExceptBB = II.getUnwindDest(); Function* Func = BB->getParent(); - BasicBlock* NewExceptBB = BasicBlock::Create("InvokeExcept", Func); + BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), + "InvokeExcept", Func); // If this is a longjmp exception, then branch to the preliminary BB of // the longjmp exception handling. Otherwise, go to the old exception. diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 5693cc0fc3b47..13bbf9c682e4a 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -47,11 +47,14 @@ #include "llvm/Constants.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; @@ -61,7 +64,7 @@ STATISTIC(NumFunctionsMerged, "Number of functions merged"); namespace { struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass { static char ID; // Pass identification, replacement for typeid - MergeFunctions() : ModulePass((intptr_t)&ID) {} + MergeFunctions() : ModulePass(&ID) {} bool runOnModule(Module &M); }; @@ -127,7 +130,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { return false; default: - assert(0 && "Unknown type!"); + llvm_unreachable("Unknown type!"); return false; case Type::PointerTyID: { @@ -185,7 +188,8 @@ static bool isEquivalentOperation(const Instruction *I1, const Instruction *I2) { if (I1->getOpcode() != I2->getOpcode() || I1->getNumOperands() != I2->getNumOperands() || - !isEquivalentType(I1->getType(), I2->getType())) + !isEquivalentType(I1->getType(), I2->getType()) || + !I1->hasSameSubclassOptionalData(I2)) return false; // We have two instructions of identical opcode and #operands. Check to see @@ -449,6 +453,7 @@ static LinkageCategory categorize(const Function *F) { switch (F->getLinkage()) { case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: return Internal; case GlobalValue::WeakAnyLinkage: @@ -468,14 +473,14 @@ static LinkageCategory categorize(const Function *F) { return ExternalStrong; } - assert(0 && "Unknown LinkageType."); + llvm_unreachable("Unknown LinkageType."); return ExternalWeak; } static void ThunkGToF(Function *F, Function *G) { Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); - BasicBlock *BB = BasicBlock::Create("", NewG); + BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); std::vector Args; unsigned i = 0; @@ -494,13 +499,13 @@ static void ThunkGToF(Function *F, Function *G) { CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); - if (NewG->getReturnType() == Type::VoidTy) { - ReturnInst::Create(BB); + if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) { + ReturnInst::Create(F->getContext(), BB); } else if (CI->getType() != NewG->getReturnType()) { Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); - ReturnInst::Create(BCI, BB); + ReturnInst::Create(F->getContext(), BCI, BB); } else { - ReturnInst::Create(CI, BB); + ReturnInst::Create(F->getContext(), CI, BB); } NewG->copyAttributesFrom(G); @@ -574,22 +579,22 @@ static bool fold(std::vector &FnVec, unsigned i, unsigned j) { case Internal: switch (catG) { case ExternalStrong: - assert(0); + llvm_unreachable(0); // fall-through case ExternalWeak: - if (F->hasAddressTaken()) + if (F->hasAddressTaken()) ThunkGToF(F, G); else AliasGToF(F, G); - break; + break; case Internal: { bool addrTakenF = F->hasAddressTaken(); bool addrTakenG = G->hasAddressTaken(); if (!addrTakenF && addrTakenG) { std::swap(FnVec[i], FnVec[j]); std::swap(F, G); - std::swap(addrTakenF, addrTakenG); - } + std::swap(addrTakenF, addrTakenG); + } if (addrTakenF && addrTakenG) { ThunkGToF(F, G); @@ -597,7 +602,7 @@ static bool fold(std::vector &FnVec, unsigned i, unsigned j) { assert(!addrTakenG); AliasGToF(F, G); } - } break; + } break; } break; } @@ -629,19 +634,19 @@ bool MergeFunctions::runOnModule(Module &M) { bool LocalChanged; do { LocalChanged = false; - DOUT << "size: " << FnMap.size() << "\n"; + DEBUG(errs() << "size: " << FnMap.size() << "\n"); for (std::map >::iterator I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { std::vector &FnVec = I->second; - DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n"; + DEBUG(errs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); for (int i = 0, e = FnVec.size(); i != e; ++i) { for (int j = i + 1; j != e; ++j) { bool isEqual = equals(FnVec[i], FnVec[j]); - DOUT << " " << FnVec[i]->getName() - << (isEqual ? " == " : " != ") - << FnVec[j]->getName() << "\n"; + DEBUG(errs() << " " << FnVec[i]->getName() + << (isEqual ? " == " : " != ") + << FnVec[j]->getName() << "\n"); if (isEqual) { if (fold(FnVec, i, j)) { diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 73ec9c1076375..8f858d35ea3f5 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -48,7 +48,8 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } Function* PartialInliner::unswitchFunction(Function* F) { // First, verify that this function is an unswitching candidate... BasicBlock* entryBlock = F->begin(); - if (!isa(entryBlock->getTerminator())) + BranchInst *BR = dyn_cast(entryBlock->getTerminator()); + if (!BR || BR->isUnconditional()) return 0; BasicBlock* returnBlock = 0; diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 2b52f464b674e..daf81e9259da5 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -19,6 +19,7 @@ #include "llvm/CallGraphSCCPass.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" @@ -40,7 +41,7 @@ namespace { PruneEH() : CallGraphSCCPass(&ID) {} // runOnSCC - Analyze the SCC, performing the transformation if possible. - bool runOnSCC(const std::vector &SCC); + bool runOnSCC(std::vector &SCC); bool SimplifyFunction(Function *F); void DeleteBasicBlock(BasicBlock *BB); @@ -54,7 +55,7 @@ X("prune-eh", "Remove unused exception handling info"); Pass *llvm::createPruneEHPass() { return new PruneEH(); } -bool PruneEH::runOnSCC(const std::vector &SCC) { +bool PruneEH::runOnSCC(std::vector &SCC) { SmallPtrSet SCCNodes; CallGraph &CG = getAnalysis(); bool MadeChange = false; @@ -164,9 +165,6 @@ bool PruneEH::runOnSCC(const std::vector &SCC) { // function if we have invokes to non-unwinding functions or code after calls to // no-return functions. bool PruneEH::SimplifyFunction(Function *F) { - CallGraph &CG = getAnalysis(); - CallGraphNode *CGN = CG[F]; - bool MadeChange = false; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast(BB->getTerminator())) @@ -180,14 +178,13 @@ bool PruneEH::SimplifyFunction(Function *F) { Call->setAttributes(II->getAttributes()); // Anything that used the value produced by the invoke instruction - // now uses the value produced by the call instruction. + // now uses the value produced by the call instruction. Note that we + // do this even for void functions and calls with no uses so that the + // callgraph edge is updated. II->replaceAllUsesWith(Call); BasicBlock *UnwindBlock = II->getUnwindDest(); UnwindBlock->removePredecessor(II->getParent()); - // Fix up the call graph. - CGN->replaceCallSite(II, Call); - // Insert a branch to the normal destination right before the // invoke. BranchInst::Create(II->getNormalDest(), II); @@ -214,7 +211,7 @@ bool PruneEH::SimplifyFunction(Function *F) { // Remove the uncond branch and add an unreachable. BB->getInstList().pop_back(); - new UnreachableInst(BB); + new UnreachableInst(BB->getContext(), BB); DeleteBasicBlock(New); // Delete the new BB. MadeChange = true; diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp index 99003689fb1f5..4c1f26d50d308 100644 --- a/lib/Transforms/IPO/RaiseAllocations.cpp +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" @@ -69,7 +70,6 @@ ModulePass *llvm::createRaiseAllocationsPass() { // function into the appropriate instruction. // void RaiseAllocations::doInitialization(Module &M) { - // Get Malloc and free prototypes if they exist! MallocFunc = M.getFunction("malloc"); if (MallocFunc) { @@ -77,22 +77,27 @@ void RaiseAllocations::doInitialization(Module &M) { // Get the expected prototype for malloc const FunctionType *Malloc1Type = - FunctionType::get(PointerType::getUnqual(Type::Int8Ty), - std::vector(1, Type::Int64Ty), false); + FunctionType::get(Type::getInt8PtrTy(M.getContext()), + std::vector(1, + Type::getInt64Ty(M.getContext())), false); // Chck to see if we got the expected malloc if (TyWeHave != Malloc1Type) { // Check to see if the prototype is wrong, giving us i8*(i32) * malloc // This handles the common declaration of: 'void *malloc(unsigned);' const FunctionType *Malloc2Type = - FunctionType::get(PointerType::getUnqual(Type::Int8Ty), - std::vector(1, Type::Int32Ty), false); + FunctionType::get(PointerType::getUnqual( + Type::getInt8Ty(M.getContext())), + std::vector(1, + Type::getInt32Ty(M.getContext())), false); if (TyWeHave != Malloc2Type) { // Check to see if the prototype is missing, giving us // i8*(...) * malloc // This handles the common declaration of: 'void *malloc();' const FunctionType *Malloc3Type = - FunctionType::get(PointerType::getUnqual(Type::Int8Ty), true); + FunctionType::get(PointerType::getUnqual( + Type::getInt8Ty(M.getContext())), + true); if (TyWeHave != Malloc3Type) // Give up MallocFunc = 0; @@ -105,19 +110,24 @@ void RaiseAllocations::doInitialization(Module &M) { const FunctionType* TyWeHave = FreeFunc->getFunctionType(); // Get the expected prototype for void free(i8*) - const FunctionType *Free1Type = FunctionType::get(Type::VoidTy, - std::vector(1, PointerType::getUnqual(Type::Int8Ty)), false); + const FunctionType *Free1Type = + FunctionType::get(Type::getVoidTy(M.getContext()), + std::vector(1, PointerType::getUnqual( + Type::getInt8Ty(M.getContext()))), + false); if (TyWeHave != Free1Type) { // Check to see if the prototype was forgotten, giving us // void (...) * free // This handles the common forward declaration of: 'void free();' - const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, true); + const FunctionType* Free2Type = + FunctionType::get(Type::getVoidTy(M.getContext()), true); if (TyWeHave != Free2Type) { // One last try, check to see if we can find free as // int (...)* free. This handles the case where NOTHING was declared. - const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, true); + const FunctionType* Free3Type = + FunctionType::get(Type::getInt32Ty(M.getContext()), true); if (TyWeHave != Free3Type) { // Give up. @@ -137,7 +147,7 @@ void RaiseAllocations::doInitialization(Module &M) { bool RaiseAllocations::runOnModule(Module &M) { // Find the malloc/free prototypes... doInitialization(M); - + bool Changed = false; // First, process all of the malloc calls... @@ -159,12 +169,15 @@ bool RaiseAllocations::runOnModule(Module &M) { // If no prototype was provided for malloc, we may need to cast the // source size. - if (Source->getType() != Type::Int32Ty) + if (Source->getType() != Type::getInt32Ty(M.getContext())) Source = - CastInst::CreateIntegerCast(Source, Type::Int32Ty, false/*ZExt*/, + CastInst::CreateIntegerCast(Source, + Type::getInt32Ty(M.getContext()), + false/*ZExt*/, "MallocAmtCast", I); - MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I); + MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()), + Source, "", I); MI->takeName(I); I->replaceAllUsesWith(MI); @@ -216,7 +229,7 @@ bool RaiseAllocations::runOnModule(Module &M) { Value *Source = *CS.arg_begin(); if (!isa(Source->getType())) Source = new IntToPtrInst(Source, - PointerType::getUnqual(Type::Int8Ty), + Type::getInt8PtrTy(M.getContext()), "FreePtrCast", I); new FreeInst(Source, I); @@ -226,7 +239,7 @@ bool RaiseAllocations::runOnModule(Module &M) { BranchInst::Create(II->getNormalDest(), I); // Delete the old call site - if (I->getType() != Type::VoidTy) + if (I->getType() != Type::getVoidTy(M.getContext())) I->replaceAllUsesWith(UndefValue::get(I->getType())); I->eraseFromParent(); Changed = true; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 046e0441b1dc6..77d44b27e2082 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -24,18 +24,18 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ValueSymbolTable.h" #include "llvm/TypeSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; namespace { - class VISIBILITY_HIDDEN StripSymbols : public ModulePass { + class StripSymbols : public ModulePass { bool OnlyDebugInfo; public: static char ID; // Pass identification, replacement for typeid @@ -49,7 +49,7 @@ namespace { } }; - class VISIBILITY_HIDDEN StripNonDebugSymbols : public ModulePass { + class StripNonDebugSymbols : public ModulePass { public: static char ID; // Pass identification, replacement for typeid explicit StripNonDebugSymbols() @@ -62,7 +62,7 @@ namespace { } }; - class VISIBILITY_HIDDEN StripDebugDeclare : public ModulePass { + class StripDebugDeclare : public ModulePass { public: static char ID; // Pass identification, replacement for typeid explicit StripDebugDeclare() @@ -138,7 +138,7 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { Value *V = VI->getValue(); ++VI; if (!isa(V) || cast(V)->hasLocalLinkage()) { - if (!PreserveDbgInfo || strncmp(V->getNameStart(), "llvm.dbg", 8)) + if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg")) // Set name to "", removing from symbol table! V->setName(""); } @@ -156,43 +156,37 @@ static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) { } /// Find values that are marked as llvm.used. -void findUsedValues(Module &M, - SmallPtrSet& llvmUsedValues) { - if (GlobalVariable *LLVMUsed = M.getGlobalVariable("llvm.used")) { - llvmUsedValues.insert(LLVMUsed); - // Collect values that are preserved as per explicit request. - // llvm.used is used to list these values. - if (ConstantArray *Inits = - dyn_cast(LLVMUsed->getInitializer())) { - for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { - if (GlobalValue *GV = dyn_cast(Inits->getOperand(i))) - llvmUsedValues.insert(GV); - else if (ConstantExpr *CE = - dyn_cast(Inits->getOperand(i))) - if (CE->getOpcode() == Instruction::BitCast) - if (GlobalValue *GV = dyn_cast(CE->getOperand(0))) - llvmUsedValues.insert(GV); - } - } - } +static void findUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSet &UsedValues) { + if (LLVMUsed == 0) return; + UsedValues.insert(LLVMUsed); + + ConstantArray *Inits = dyn_cast(LLVMUsed->getInitializer()); + if (Inits == 0) return; + + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); } /// StripSymbolNames - Strip symbol names. -bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { +static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { SmallPtrSet llvmUsedValues; - findUsedValues(M, llvmUsedValues); + findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues); + findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues); for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) - if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8)) + if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) I->setName(""); // Internal symbols can't participate in linkage } for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) - if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8)) + if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg")) I->setName(""); // Internal symbols can't participate in linkage StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo); } @@ -206,169 +200,58 @@ bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { // StripDebugInfo - Strip debug info in the module if it exists. // To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and // llvm.dbg.region.end calls, and any globals they point to if now dead. -bool StripDebugInfo(Module &M) { - - SmallPtrSet llvmUsedValues; - findUsedValues(M, llvmUsedValues); - - SmallVector CUs; - SmallVector GVs; - SmallVector SPs; - CollectDebugInfoAnchors(M, CUs, GVs, SPs); - // These anchors use LinkOnce linkage so that the optimizer does not - // remove them accidently. Set InternalLinkage for all these debug - // info anchors. - for (SmallVector::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) - (*I)->setLinkage(GlobalValue::InternalLinkage); - for (SmallVector::iterator I = GVs.begin(), - E = GVs.end(); I != E; ++I) - (*I)->setLinkage(GlobalValue::InternalLinkage); - for (SmallVector::iterator I = SPs.begin(), - E = SPs.end(); I != E; ++I) - (*I)->setLinkage(GlobalValue::InternalLinkage); - - - // Delete all dbg variables. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - GlobalVariable *GV = dyn_cast(I); - if (!GV) continue; - if (!GV->use_empty() && llvmUsedValues.count(I) == 0) { - if (strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0) { - GV->replaceAllUsesWith(UndefValue::get(GV->getType())); - } - } - } +static bool StripDebugInfo(Module &M) { + // Remove all of the calls to the debugger intrinsics, and remove them from + // the module. Function *FuncStart = M.getFunction("llvm.dbg.func.start"); Function *StopPoint = M.getFunction("llvm.dbg.stoppoint"); Function *RegionStart = M.getFunction("llvm.dbg.region.start"); Function *RegionEnd = M.getFunction("llvm.dbg.region.end"); Function *Declare = M.getFunction("llvm.dbg.declare"); - std::vector DeadConstants; - - // Remove all of the calls to the debugger intrinsics, and remove them from - // the module. if (FuncStart) { while (!FuncStart->use_empty()) { CallInst *CI = cast(FuncStart->use_back()); - Value *Arg = CI->getOperand(1); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg->use_empty()) - if (Constant *C = dyn_cast(Arg)) - DeadConstants.push_back(C); } FuncStart->eraseFromParent(); } if (StopPoint) { while (!StopPoint->use_empty()) { CallInst *CI = cast(StopPoint->use_back()); - Value *Arg = CI->getOperand(3); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg->use_empty()) - if (Constant *C = dyn_cast(Arg)) - DeadConstants.push_back(C); } StopPoint->eraseFromParent(); } if (RegionStart) { while (!RegionStart->use_empty()) { CallInst *CI = cast(RegionStart->use_back()); - Value *Arg = CI->getOperand(1); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg->use_empty()) - if (Constant *C = dyn_cast(Arg)) - DeadConstants.push_back(C); } RegionStart->eraseFromParent(); } if (RegionEnd) { while (!RegionEnd->use_empty()) { CallInst *CI = cast(RegionEnd->use_back()); - Value *Arg = CI->getOperand(1); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg->use_empty()) - if (Constant *C = dyn_cast(Arg)) - DeadConstants.push_back(C); } RegionEnd->eraseFromParent(); } if (Declare) { while (!Declare->use_empty()) { CallInst *CI = cast(Declare->use_back()); - Value *Arg1 = CI->getOperand(1); - Value *Arg2 = CI->getOperand(2); - assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); - if (Arg1->use_empty()) { - if (Constant *C = dyn_cast(Arg1)) - DeadConstants.push_back(C); - else - RecursivelyDeleteTriviallyDeadInstructions(Arg1); - } - if (Arg2->use_empty()) - if (Constant *C = dyn_cast(Arg2)) - DeadConstants.push_back(C); } Declare->eraseFromParent(); } - // llvm.dbg.compile_units and llvm.dbg.subprograms are marked as linkonce - // but since we are removing all debug information, make them internal now. - // FIXME: Use private linkage maybe? - if (Constant *C = M.getNamedGlobal("llvm.dbg.compile_units")) - if (GlobalVariable *GV = dyn_cast(C)) - GV->setLinkage(GlobalValue::InternalLinkage); - - if (Constant *C = M.getNamedGlobal("llvm.dbg.subprograms")) - if (GlobalVariable *GV = dyn_cast(C)) - GV->setLinkage(GlobalValue::InternalLinkage); - - if (Constant *C = M.getNamedGlobal("llvm.dbg.global_variables")) - if (GlobalVariable *GV = dyn_cast(C)) - GV->setLinkage(GlobalValue::InternalLinkage); - - // Delete all dbg variables. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - GlobalVariable *GV = dyn_cast(I); - if (!GV) continue; - if (GV->use_empty() && llvmUsedValues.count(I) == 0 - && (!GV->hasSection() - || strcmp(GV->getSection().c_str(), "llvm.metadata") == 0)) - DeadConstants.push_back(GV); - } - - if (DeadConstants.empty()) - return false; + NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); + if (NMD) + NMD->eraseFromParent(); - // Delete any internal globals that were only used by the debugger intrinsics. - while (!DeadConstants.empty()) { - Constant *C = DeadConstants.back(); - DeadConstants.pop_back(); - if (GlobalVariable *GV = dyn_cast(C)) { - if (GV->hasLocalLinkage()) - RemoveDeadConstant(GV); - } - else - RemoveDeadConstant(C); - } - - // Remove all llvm.dbg types. - TypeSymbolTable &ST = M.getTypeSymbolTable(); - for (TypeSymbolTable::iterator TI = ST.begin(), TE = ST.end(); TI != TE; ) { - if (!strncmp(TI->first.c_str(), "llvm.dbg.", 9)) - ST.remove(TI++); - else - ++TI; - } - + // Remove dead metadata. + M.getContext().RemoveDeadMetadata(); return true; } @@ -414,8 +297,7 @@ bool StripDebugDeclare::runOnModule(Module &M) { I != E; ++I) { GlobalVariable *GV = dyn_cast(I); if (!GV) continue; - if (GV->use_empty() && GV->hasName() - && strncmp(GV->getNameStart(), "llvm.dbg.global_variable", 24) == 0) + if (GV->use_empty() && GV->getName().startswith("llvm.dbg.global_variable")) DeadConstants.push_back(GV); } diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index 9f54388aa45e2..4442820a284bf 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -23,6 +23,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/CallGraphSCCPass.h" #include "llvm/Instructions.h" @@ -34,6 +35,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses"); @@ -47,15 +49,15 @@ namespace { CallGraphSCCPass::getAnalysisUsage(AU); } - virtual bool runOnSCC(const std::vector &SCC); + virtual bool runOnSCC(std::vector &SCC); static char ID; // Pass identification, replacement for typeid SRETPromotion() : CallGraphSCCPass(&ID) {} private: - bool PromoteReturn(CallGraphNode *CGN); + CallGraphNode *PromoteReturn(CallGraphNode *CGN); bool isSafeToUpdateAllCallers(Function *F); Function *cloneFunctionBody(Function *F, const StructType *STy); - void updateCallSites(Function *F, Function *NF); + CallGraphNode *updateCallSites(Function *F, Function *NF); bool nestedStructType(const StructType *STy); }; } @@ -68,49 +70,54 @@ Pass *llvm::createStructRetPromotionPass() { return new SRETPromotion(); } -bool SRETPromotion::runOnSCC(const std::vector &SCC) { +bool SRETPromotion::runOnSCC(std::vector &SCC) { bool Changed = false; for (unsigned i = 0, e = SCC.size(); i != e; ++i) - Changed |= PromoteReturn(SCC[i]); + if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) { + SCC[i] = NewNode; + Changed = true; + } return Changed; } /// PromoteReturn - This method promotes function that uses StructRet paramater -/// into a function that uses mulitple return value. -bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) { +/// into a function that uses multiple return values. +CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { Function *F = CGN->getFunction(); if (!F || F->isDeclaration() || !F->hasLocalLinkage()) - return false; + return 0; // Make sure that function returns struct. if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) - return false; + return 0; - DOUT << "SretPromotion: Looking at sret function " << F->getNameStart() << "\n"; + DEBUG(errs() << "SretPromotion: Looking at sret function " + << F->getName() << "\n"); - assert (F->getReturnType() == Type::VoidTy && "Invalid function return type"); + assert(F->getReturnType() == Type::getVoidTy(F->getContext()) && + "Invalid function return type"); Function::arg_iterator AI = F->arg_begin(); const llvm::PointerType *FArgType = dyn_cast(AI->getType()); - assert (FArgType && "Invalid sret parameter type"); + assert(FArgType && "Invalid sret parameter type"); const llvm::StructType *STy = dyn_cast(FArgType->getElementType()); - assert (STy && "Invalid sret parameter element type"); + assert(STy && "Invalid sret parameter element type"); // Check if it is ok to perform this promotion. if (isSafeToUpdateAllCallers(F) == false) { - DOUT << "SretPromotion: Not all callers can be updated\n"; + DEBUG(errs() << "SretPromotion: Not all callers can be updated\n"); NumRejectedSRETUses++; - return false; + return 0; } - DOUT << "SretPromotion: sret argument will be promoted\n"; + DEBUG(errs() << "SretPromotion: sret argument will be promoted\n"); NumSRET++; // [1] Replace use of sret parameter - AllocaInst *TheAlloca = new AllocaInst (STy, NULL, "mrv", - F->getEntryBlock().begin()); + AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", + F->getEntryBlock().begin()); Value *NFirstArg = F->arg_begin(); NFirstArg->replaceAllUsesWith(TheAlloca); @@ -121,7 +128,7 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) { ++BI; if (isa(I)) { Value *NV = new LoadInst(TheAlloca, "mrv.ld", I); - ReturnInst *NR = ReturnInst::Create(NV, I); + ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I); I->replaceAllUsesWith(NR); I->eraseFromParent(); } @@ -131,11 +138,13 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) { Function *NF = cloneFunctionBody(F, STy); // [4] Update all call sites to use new function - updateCallSites(F, NF); + CallGraphNode *NF_CFN = updateCallSites(F, NF); - F->eraseFromParent(); - getAnalysis().changeFunction(F, NF); - return true; + CallGraph &CG = getAnalysis(); + NF_CFN->stealCalledFunctionsFrom(CG[F]); + + delete CG.removeFunctionFromModule(F); + return NF_CFN; } // Check if it is ok to perform this promotion. @@ -243,23 +252,26 @@ Function *SRETPromotion::cloneFunctionBody(Function *F, Function::arg_iterator NI = NF->arg_begin(); ++I; while (I != E) { - I->replaceAllUsesWith(NI); - NI->takeName(I); - ++I; - ++NI; + I->replaceAllUsesWith(NI); + NI->takeName(I); + ++I; + ++NI; } return NF; } /// updateCallSites - Update all sites that call F to use NF. -void SRETPromotion::updateCallSites(Function *F, Function *NF) { +CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { CallGraph &CG = getAnalysis(); SmallVector Args; // Attributes - Keep track of the parameter attributes for the arguments. SmallVector ArgAttrsVec; + // Get a new callgraph node for NF. + CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); + while (!F->use_empty()) { CallSite CS = CallSite::get(*F->use_begin()); Instruction *Call = CS.getInstruction(); @@ -309,8 +321,10 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) { New->takeName(Call); // Update the callgraph to know that the callsite has been transformed. - CG[Call->getParent()->getParent()]->replaceCallSite(Call, New); - + CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; + CalleeNode->removeCallEdgeFor(Call); + CalleeNode->addCalledFunction(New, NF_CGN); + // Update all users of sret parameter to extract value using extractvalue. for (Value::use_iterator UI = FirstCArg->use_begin(), UE = FirstCArg->use_end(); UI != UE; ) { @@ -318,24 +332,25 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) { CallInst *C2 = dyn_cast(U2); if (C2 && (C2 == Call)) continue; - else if (GetElementPtrInst *UGEP = dyn_cast(U2)) { - ConstantInt *Idx = dyn_cast(UGEP->getOperand(2)); - assert (Idx && "Unexpected getelementptr index!"); - Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), - "evi", UGEP); - while(!UGEP->use_empty()) { - // isSafeToUpdateAllCallers has checked that all GEP uses are - // LoadInsts - LoadInst *L = cast(*UGEP->use_begin()); - L->replaceAllUsesWith(GR); - L->eraseFromParent(); - } - UGEP->eraseFromParent(); + + GetElementPtrInst *UGEP = cast(U2); + ConstantInt *Idx = cast(UGEP->getOperand(2)); + Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), + "evi", UGEP); + while(!UGEP->use_empty()) { + // isSafeToUpdateAllCallers has checked that all GEP uses are + // LoadInsts + LoadInst *L = cast(*UGEP->use_begin()); + L->replaceAllUsesWith(GR); + L->eraseFromParent(); } - else assert( 0 && "Unexpected sret parameter use"); + UGEP->eraseFromParent(); + continue; } Call->eraseFromParent(); } + + return NF_CGN; } /// nestedStructType - Return true if STy includes any @@ -344,7 +359,7 @@ bool SRETPromotion::nestedStructType(const StructType *STy) { unsigned Num = STy->getNumElements(); for (unsigned i = 0; i < Num; i++) { const Type *Ty = STy->getElementType(i); - if (!Ty->isSingleValueType() && Ty != Type::VoidTy) + if (!Ty->isSingleValueType() && Ty != Type::getVoidTy(STy->getContext())) return true; } return false; diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp index 2bd9809a39615..eb8f22585b621 100644 --- a/lib/Transforms/Instrumentation/BlockProfiling.cpp +++ b/lib/Transforms/Instrumentation/BlockProfiling.cpp @@ -19,12 +19,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "RSProfiling.h" #include "ProfilingUtils.h" @@ -52,8 +51,8 @@ ModulePass *llvm::createFunctionProfilerPass() { bool FunctionProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - cerr << "WARNING: cannot insert function profiling into a module" - << " with no main function!\n"; + errs() << "WARNING: cannot insert function profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } @@ -62,10 +61,11 @@ bool FunctionProfiler::runOnModule(Module &M) { if (!I->isDeclaration()) ++NumFunctions; - const Type *ATy = ArrayType::get(Type::Int32Ty, NumFunctions); + const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), + NumFunctions); GlobalVariable *Counters = - new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "FuncProfCounters", &M); + new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "FuncProfCounters"); // Instrument all of the functions... unsigned i = 0; @@ -98,26 +98,29 @@ ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); } bool BlockProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - cerr << "WARNING: cannot insert block profiling into a module" - << " with no main function!\n"; + errs() << "WARNING: cannot insert block profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } unsigned NumBlocks = 0; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - NumBlocks += I->size(); + if (!I->isDeclaration()) + NumBlocks += I->size(); - const Type *ATy = ArrayType::get(Type::Int32Ty, NumBlocks); + const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumBlocks); GlobalVariable *Counters = - new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "BlockProfCounters", &M); + new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "BlockProfCounters"); // Instrument all of the blocks... unsigned i = 0; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->isDeclaration()) continue; for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB) // Insert counter at the start of the block IncrementCounterInBlock(BB, i++, Counters); + } // Add the initialization call to main. InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters); diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index d7c518d282f81..494928e438148 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMInstrumentation BlockProfiling.cpp EdgeProfiling.cpp + OptimalEdgeProfiling.cpp ProfilingUtils.cpp RSProfiling.cpp ) diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 0831f3b7a4800..b9cb275578e01 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -16,25 +16,30 @@ // number of counters inserted. // //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "insert-edge-profiling" #include "ProfilingUtils.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/Statistic.h" #include using namespace llvm; +STATISTIC(NumEdgesInserted, "The # of edges inserted."); + namespace { class VISIBILITY_HIDDEN EdgeProfiler : public ModulePass { bool runOnModule(Module &M); public: static char ID; // Pass identification, replacement for typeid EdgeProfiler() : ModulePass(&ID) {} + + virtual const char *getPassName() const { + return "Edge Profiler"; + } }; } @@ -47,14 +52,17 @@ ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); } bool EdgeProfiler::runOnModule(Module &M) { Function *Main = M.getFunction("main"); if (Main == 0) { - cerr << "WARNING: cannot insert edge profiling into a module" - << " with no main function!\n"; + errs() << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; return false; // No main, no instrumentation! } std::set BlocksToInstrument; unsigned NumEdges = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + // Reserve space for (0,entry) edge. + ++NumEdges; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { // Keep track of which blocks need to be instrumented. We don't want to // instrument blocks that are added as the result of breaking critical @@ -62,15 +70,20 @@ bool EdgeProfiler::runOnModule(Module &M) { BlocksToInstrument.insert(BB); NumEdges += BB->getTerminator()->getNumSuccessors(); } + } - const Type *ATy = ArrayType::get(Type::Int32Ty, NumEdges); + const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges); GlobalVariable *Counters = - new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, - Constant::getNullValue(ATy), "EdgeProfCounters", &M); + new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "EdgeProfCounters"); + NumEdgesInserted = NumEdges; // Instrument all of the edges... unsigned i = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + // Create counter for (0,entry) edge. + IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (BlocksToInstrument.count(BB)) { // Don't instrument inserted blocks // Okay, we have to add a counter of each outgoing edge. If the @@ -93,6 +106,7 @@ bool EdgeProfiler::runOnModule(Module &M) { } } } + } // Add the initialization call to main. InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters); diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h new file mode 100644 index 0000000000000..2951dbcea9a18 --- /dev/null +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -0,0 +1,95 @@ +//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This module privides means for calculating a maximum spanning tree for a +// given set of weighted edges. The type parameter T is the type of a node. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H + +#include "llvm/ADT/EquivalenceClasses.h" +#include +#include + +namespace llvm { + + /// MaximumSpanningTree - A MST implementation. + /// The type parameter T determines the type of the nodes of the graph. + template + class MaximumSpanningTree { + + // A comparing class for comparing weighted edges. + template + struct EdgeWeightCompare { + bool operator()(typename MaximumSpanningTree::EdgeWeight X, + typename MaximumSpanningTree::EdgeWeight Y) const { + if (X.second > Y.second) return true; + if (X.second < Y.second) return false; + return false; + } + }; + + public: + typedef std::pair Edge; + typedef std::pair EdgeWeight; + typedef std::vector EdgeWeights; + protected: + typedef std::vector MaxSpanTree; + + MaxSpanTree MST; + + public: + static char ID; // Class identification, replacement for typeinfo + + /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a + /// spanning tree. + MaximumSpanningTree(EdgeWeights &EdgeVector) { + + std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare()); + + // Create spanning tree, Forest contains a special data structure + // that makes checking if two nodes are already in a common (sub-)tree + // fast and cheap. + EquivalenceClasses Forest; + for (typename EdgeWeights::iterator EWi = EdgeVector.begin(), + EWe = EdgeVector.end(); EWi != EWe; ++EWi) { + Edge e = (*EWi).first; + + Forest.insert(e.first); + Forest.insert(e.second); + } + + // Iterate over the sorted edges, biggest first. + for (typename EdgeWeights::iterator EWi = EdgeVector.begin(), + EWe = EdgeVector.end(); EWi != EWe; ++EWi) { + Edge e = (*EWi).first; + + if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) { + Forest.unionSets(e.first, e.second); + // So we know now that the edge is not already in a subtree, so we push + // the edge to the MST. + MST.push_back(e); + } + } + } + + typename MaxSpanTree::iterator begin() { + return MST.begin(); + } + + typename MaxSpanTree::iterator end() { + return MST.end(); + } + }; + +} // End llvm namespace + +#endif diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp new file mode 100644 index 0000000000000..b2e6747ca0e9e --- /dev/null +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -0,0 +1,219 @@ +//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass instruments the specified program with counters for edge profiling. +// Edge profiling can give a reasonable approximation of the hot paths through a +// program, and is used for a wide variety of program transformations. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "insert-optimal-edge-profiling" +#include "ProfilingUtils.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" +#include "MaximumSpanningTree.h" +#include +using namespace llvm; + +STATISTIC(NumEdgesInserted, "The # of edges inserted."); + +namespace { + class VISIBILITY_HIDDEN OptimalEdgeProfiler : public ModulePass { + bool runOnModule(Module &M); + public: + static char ID; // Pass identification, replacement for typeid + OptimalEdgeProfiler() : ModulePass(&ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(ProfileEstimatorPassID); + AU.addRequired(); + } + + virtual const char *getPassName() const { + return "Optimal Edge Profiler"; + } + }; +} + +char OptimalEdgeProfiler::ID = 0; +static RegisterPass +X("insert-optimal-edge-profiling", + "Insert optimal instrumentation for edge profiling"); + +ModulePass *llvm::createOptimalEdgeProfilerPass() { + return new OptimalEdgeProfiler(); +} + +inline static void printEdgeCounter(ProfileInfo::Edge e, + BasicBlock* b, + unsigned i) { + DEBUG(errs() << "--Edge Counter for " << (e) << " in " \ + << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n"); +} + +bool OptimalEdgeProfiler::runOnModule(Module &M) { + Function *Main = M.getFunction("main"); + if (Main == 0) { + errs() << "WARNING: cannot insert edge profiling into a module" + << " with no main function!\n"; + return false; // No main, no instrumentation! + } + + // NumEdges counts all the edges that may be instrumented. Later on its + // decided which edges to actually instrument, to achieve optimal profiling. + // For the entry block a virtual edge (0,entry) is reserved, for each block + // with no successors an edge (BB,0) is reserved. These edges are necessary + // to calculate a truly optimal maximum spanning tree and thus an optimal + // instrumentation. + unsigned NumEdges = 0; + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + // Reserve space for (0,entry) edge. + ++NumEdges; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + // Keep track of which blocks need to be instrumented. We don't want to + // instrument blocks that are added as the result of breaking critical + // edges! + if (BB->getTerminator()->getNumSuccessors() == 0) { + // Reserve space for (BB,0) edge. + ++NumEdges; + } else { + NumEdges += BB->getTerminator()->getNumSuccessors(); + } + } + } + + // In the profiling output a counter for each edge is reserved, but only few + // are used. This is done to be able to read back in the profile without + // calulating the maximum spanning tree again, instead each edge counter that + // is not used is initialised with -1 to signal that this edge counter has to + // be calculated from other edge counters on reading the profile info back + // in. + + const Type *Int32 = Type::getInt32Ty(M.getContext()); + const ArrayType *ATy = ArrayType::get(Int32, NumEdges); + GlobalVariable *Counters = + new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, + Constant::getNullValue(ATy), "OptEdgeProfCounters"); + NumEdgesInserted = 0; + + std::vector Initializer(NumEdges); + Constant* Zero = ConstantInt::get(Int32, 0); + Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); + + // Instrument all of the edges not in MST... + unsigned i = 0; + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + DEBUG(errs()<<"Working on "<getNameStr()<<"\n"); + + // Calculate a Maximum Spanning Tree with the edge weights determined by + // ProfileEstimator. ProfileEstimator also assign weights to the virtual + // edges (0,entry) and (BB,0) (for blocks with no successors) and this + // edges also participate in the maximum spanning tree calculation. + // The third parameter of MaximumSpanningTree() has the effect that not the + // actual MST is returned but the edges _not_ in the MST. + + ProfileInfo::EdgeWeights ECs = + getAnalysisID(ProfileEstimatorPassID, *F).getEdgeWeights(F); + std::vector EdgeVector(ECs.begin(), ECs.end()); + MaximumSpanningTree MST (EdgeVector); + std::stable_sort(MST.begin(),MST.end()); + + // Check if (0,entry) not in the MST. If not, instrument edge + // (IncrementCounterInBlock()) and set the counter initially to zero, if + // the edge is in the MST the counter is initialised to -1. + + BasicBlock *entry = &(F->getEntryBlock()); + ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); + if (!std::binary_search(MST.begin(), MST.end(), edge)) { + printEdgeCounter(edge,entry,i); + IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++; + Initializer[i++] = (Zero); + } else{ + Initializer[i++] = (Uncounted); + } + + // InsertedBlocks contains all blocks that were inserted for splitting an + // edge, this blocks do not have to be instrumented. + DenseSet InsertedBlocks; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + // Check if block was not inserted and thus does not have to be + // instrumented. + if (InsertedBlocks.count(BB)) continue; + + // Okay, we have to add a counter of each outgoing edge not in MST. If + // the outgoing edge is not critical don't split it, just insert the + // counter in the source or destination of the edge. Also, if the block + // has no successors, the virtual edge (BB,0) is processed. + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); + if (!std::binary_search(MST.begin(), MST.end(), edge)) { + printEdgeCounter(edge,BB,i); + IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + Initializer[i++] = (Zero); + } else{ + Initializer[i++] = (Uncounted); + } + } + for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { + BasicBlock *Succ = TI->getSuccessor(s); + ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ); + if (!std::binary_search(MST.begin(), MST.end(), edge)) { + + // If the edge is critical, split it. + bool wasInserted = SplitCriticalEdge(TI, s, this); + Succ = TI->getSuccessor(s); + if (wasInserted) + InsertedBlocks.insert(Succ); + + // Okay, we are guaranteed that the edge is no longer critical. If + // we only have a single successor, insert the counter in this block, + // otherwise insert it in the successor block. + if (TI->getNumSuccessors() == 1) { + // Insert counter at the start of the block + printEdgeCounter(edge,BB,i); + IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + } else { + // Insert counter at the start of the block + printEdgeCounter(edge,Succ,i); + IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++; + } + Initializer[i++] = (Zero); + } else { + Initializer[i++] = (Uncounted); + } + } + } + } + + // Check if the number of edges counted at first was the number of edges we + // considered for instrumentation. + assert(i==NumEdges && "the number of edges in counting array is wrong"); + + // Assing the now completely defined initialiser to the array. + Constant *init = ConstantArray::get(ATy, Initializer); + Counters->setInitializer(init); + + // Add the initialization call to main. + InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters); + return true; +} + diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 48071f1156925..1679bea08c195 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -18,22 +18,27 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, GlobalValue *Array) { + LLVMContext &Context = MainFn->getContext(); const Type *ArgVTy = - PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); - const PointerType *UIntPtr = PointerType::getUnqual(Type::Int32Ty); + PointerType::getUnqual(Type::getInt8PtrTy(Context)); + const PointerType *UIntPtr = + Type::getInt32PtrTy(Context); Module &M = *MainFn->getParent(); - Constant *InitFn = M.getOrInsertFunction(FnName, Type::Int32Ty, Type::Int32Ty, - ArgVTy, UIntPtr, Type::Int32Ty, + Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context), + Type::getInt32Ty(Context), + ArgVTy, UIntPtr, + Type::getInt32Ty(Context), (Type *)0); // This could force argc and argv into programs that wouldn't otherwise have // them, but instead we just pass null values in. std::vector Args(4); - Args[0] = Constant::getNullValue(Type::Int32Ty); + Args[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Args[1] = Constant::getNullValue(ArgVTy); // Skip over any allocas in the entry block. @@ -41,7 +46,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, BasicBlock::iterator InsertPos = Entry->begin(); while (isa(InsertPos)) ++InsertPos; - std::vector GEPIndices(2, Constant::getNullValue(Type::Int32Ty)); + std::vector GEPIndices(2, + Constant::getNullValue(Type::getInt32Ty(Context))); unsigned NumElements = 0; if (Array) { Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0], @@ -53,7 +59,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, // pass null. Args[2] = ConstantPointerNull::get(UIntPtr); } - Args[3] = ConstantInt::get(Type::Int32Ty, NumElements); + Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), "newargc", InsertPos); @@ -78,16 +84,18 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, AI = MainFn->arg_begin(); // If the program looked at argc, have it look at the return value of the // init call instead. - if (AI->getType() != Type::Int32Ty) { + if (AI->getType() != Type::getInt32Ty(Context)) { Instruction::CastOps opcode; if (!AI->use_empty()) { opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true); AI->replaceAllUsesWith( CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos)); } - opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true); + opcode = CastInst::getCastOpcode(AI, true, + Type::getInt32Ty(Context), true); InitCall->setOperand(1, - CastInst::Create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall)); + CastInst::Create(opcode, AI, Type::getInt32Ty(Context), + "argc.cast", InitCall)); } else { AI->replaceAllUsesWith(InitCall); InitCall->setOperand(1, AI); @@ -104,17 +112,20 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, while (isa(InsertPos)) ++InsertPos; + LLVMContext &Context = BB->getContext(); + // Create the getelementptr constant expression std::vector Indices(2); - Indices[0] = Constant::getNullValue(Type::Int32Ty); - Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum); + Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); Constant *ElementPtr = - ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); + ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], + Indices.size()); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal, - ConstantInt::get(Type::Int32Ty, 1), + ConstantInt::get(Type::getInt32Ty(Context), 1), "NewFuncCounter", InsertPos); new StoreInst(NewVal, ElementPtr, InsertPos); } diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp index b110f4eb368b6..3b72260db8458 100644 --- a/lib/Transforms/Instrumentation/RSProfiling.cpp +++ b/lib/Transforms/Instrumentation/RSProfiling.cpp @@ -33,6 +33,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Pass.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/Constants.h" @@ -43,6 +44,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "RSProfiling.h" #include @@ -197,8 +200,8 @@ GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval) : T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); ResetValue = Init; - Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage, - Init, "RandomSteeringCounter", &M); + Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, + Init, "RandomSteeringCounter"); } GlobalRandomCounter::~GlobalRandomCounter() {} @@ -211,8 +214,9 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { //decrement counter LoadInst* l = new LoadInst(Counter, "counter", t); - ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), - "countercc", t); + ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, + ConstantInt::get(T, 0), + "countercc"); Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), "counternew", t); @@ -221,7 +225,8 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) { //reset counter BasicBlock* oldnext = t->getSuccessor(0); - BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), + BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), + "reset", oldnext->getParent(), oldnext); TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); t->setSuccessor(0, resetblock); @@ -234,8 +239,8 @@ GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t, : AI(0), T(t) { ConstantInt* Init = ConstantInt::get(T, resetval); ResetValue = Init; - Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage, - Init, "RandomSteeringCounter", &M); + Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, + Init, "RandomSteeringCounter"); } GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {} @@ -283,8 +288,9 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) { //decrement counter LoadInst* l = new LoadInst(AI, "counter", t); - ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0), - "countercc", t); + ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, + ConstantInt::get(T, 0), + "countercc"); Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1), "counternew", t); @@ -293,7 +299,8 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) { //reset counter BasicBlock* oldnext = t->getSuccessor(0); - BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(), + BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), + "reset", oldnext->getParent(), oldnext); TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock); t->setSuccessor(0, resetblock); @@ -315,12 +322,13 @@ void CycleCounter::ProcessChoicePoint(BasicBlock* bb) { CallInst* c = CallInst::Create(F, "rdcc", t); BinaryOperator* b = - BinaryOperator::CreateAnd(c, ConstantInt::get(Type::Int64Ty, rm), + BinaryOperator::CreateAnd(c, + ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm), "mrdcc", t); - ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b, - ConstantInt::get(Type::Int64Ty, 0), - "mrdccc", t); + ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b, + ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0), + "mrdccc"); t->setCondition(s); } @@ -345,16 +353,16 @@ void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNu // Create the getelementptr constant expression std::vector Indices(2); - Indices[0] = Constant::getNullValue(Type::Int32Ty); - Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum); - Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray, + Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext())); + Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum); + Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], 2); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos); profcode.insert(OldVal); Value *NewVal = BinaryOperator::CreateAdd(OldVal, - ConstantInt::get(Type::Int32Ty, 1), + ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1), "NewCounter", InsertPos); profcode.insert(NewVal); profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos)); @@ -377,7 +385,8 @@ Value* ProfilerRS::Translate(Value* v) { if (bb == &bb->getParent()->getEntryBlock()) TransCache[bb] = bb; //don't translate entry block else - TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(), + TransCache[bb] = BasicBlock::Create(v->getContext(), + "dup_" + bb->getName(), bb->getParent(), NULL); return TransCache[bb]; } else if (Instruction* i = dyn_cast(v)) { @@ -401,7 +410,7 @@ Value* ProfilerRS::Translate(Value* v) { TransCache[v] = v; return v; } - assert(0 && "Value not handled"); + llvm_unreachable("Value not handled"); return 0; } @@ -466,16 +475,16 @@ void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) //a: Function::iterator BBN = src; ++BBN; - BasicBlock* bbC = BasicBlock::Create("choice", &F, BBN); + BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN); //ChoicePoints.insert(bbC); BBN = cast(Translate(src)); - BasicBlock* bbCp = BasicBlock::Create("choice", &F, ++BBN); + BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN); ChoicePoints.insert(bbCp); //b: BranchInst::Create(cast(Translate(dst)), bbC); BranchInst::Create(dst, cast(Translate(dst)), - ConstantInt::get(Type::Int1Ty, true), bbCp); + ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp); //c: { TerminatorInst* iB = src->getTerminator(); @@ -531,9 +540,8 @@ bool ProfilerRS::runOnFunction(Function& F) { TerminatorInst* T = F.getEntryBlock().getTerminator(); ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0), cast( - Translate(T->getSuccessor(0))), - ConstantInt::get(Type::Int1Ty, - true))); + Translate(T->getSuccessor(0))), + ConstantInt::get(Type::getInt1Ty(F.getContext()), true))); //do whatever is needed now that the function is duplicated c->PrepFunction(&F); @@ -556,10 +564,12 @@ bool ProfilerRS::runOnFunction(Function& F) { bool ProfilerRS::doInitialization(Module &M) { switch (RandomMethod) { case GBV: - c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1); + c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()), + (1 << 14) - 1); break; case GBVO: - c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1); + c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()), + (1 << 14) - 1); break; case HOSTCC: c = new CycleCounter(M, (1 << 14) - 1); @@ -639,7 +649,7 @@ static void getBackEdges(Function& F, T& BackEdges) { std::map finish; int time = 0; recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time); - DOUT << F.getName() << " " << BackEdges.size() << "\n"; + DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n"); } diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index 5fe1eeb5c7523..025d02ad30737 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -13,7 +13,7 @@ PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello include $(LEVEL)/Makefile.config # No support for plugins on windows targets -ifeq ($(OS), $(filter $(OS), Cygwin MingW)) +ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW)) PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS)) endif diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 9c55f664ebbd0..37f383fb512ad 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -21,19 +21,17 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" - using namespace llvm; STATISTIC(NumRemoved, "Number of instructions removed"); namespace { - struct VISIBILITY_HIDDEN ADCE : public FunctionPass { + struct ADCE : public FunctionPass { static char ID; // Pass identification, replacement for typeid ADCE() : FunctionPass(&ID) {} diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp index fb9b88005b6a8..54533f50405f3 100644 --- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -31,7 +31,6 @@ #include "llvm/Function.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/ADT/Statistic.h" #include "llvm/Transforms/Scalar.h" #include @@ -40,7 +39,7 @@ using namespace llvm; STATISTIC(NumMoved, "Number of basic blocks moved"); namespace { - struct VISIBILITY_HIDDEN BlockPlacement : public FunctionPass { + struct BlockPlacement : public FunctionPass { static char ID; // Pass identification, replacement for typeid BlockPlacement() : FunctionPass(&ID) {} @@ -127,13 +126,13 @@ void BlockPlacement::PlaceBlocks(BasicBlock *BB) { /*empty*/; if (SI == E) return; // No more successors to place. - unsigned MaxExecutionCount = PI->getExecutionCount(*SI); + double MaxExecutionCount = PI->getExecutionCount(*SI); BasicBlock *MaxSuccessor = *SI; // Scan for more frequently executed successors for (; SI != E; ++SI) if (!PlacedBlocks.count(*SI)) { - unsigned Count = PI->getExecutionCount(*SI); + double Count = PI->getExecutionCount(*SI); if (Count > MaxExecutionCount || // Prefer to not disturb the code. (Count == MaxExecutionCount && *SI == &*InsertPos)) { diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 8a8f83fa311d8..cbeed4c6b55f4 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -1,13 +1,13 @@ add_llvm_library(LLVMScalarOpts ADCE.cpp BasicBlockPlacement.cpp + CodeGenLICM.cpp CodeGenPrepare.cpp CondPropagate.cpp ConstantProp.cpp DCE.cpp DeadStoreElimination.cpp GVN.cpp - GVNPRE.cpp IndVarSimplify.cpp InstructionCombining.cpp JumpThreading.cpp @@ -19,7 +19,6 @@ add_llvm_library(LLVMScalarOpts LoopUnroll.cpp LoopUnswitch.cpp MemCpyOptimizer.cpp - PredicateSimplifier.cpp Reassociate.cpp Reg2Mem.cpp SCCP.cpp diff --git a/lib/Transforms/Scalar/CodeGenLICM.cpp b/lib/Transforms/Scalar/CodeGenLICM.cpp new file mode 100644 index 0000000000000..10f950e135dad --- /dev/null +++ b/lib/Transforms/Scalar/CodeGenLICM.cpp @@ -0,0 +1,112 @@ +//===- CodeGenLICM.cpp - LICM a function for code generation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This function performs late LICM, hoisting constants out of loops that +// are not valid immediates. It should not be followed by instcombine, +// because instcombine would quickly stuff the constants back into the loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-licm" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/DenseMap.h" +using namespace llvm; + +namespace { + class CodeGenLICM : public LoopPass { + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + public: + static char ID; // Pass identification, replacement for typeid + explicit CodeGenLICM() : LoopPass(&ID) {} + }; +} + +char CodeGenLICM::ID = 0; +static RegisterPass X("codegen-licm", + "hoist constants out of loops"); + +Pass *llvm::createCodeGenLICMPass() { + return new CodeGenLICM(); +} + +bool CodeGenLICM::runOnLoop(Loop *L, LPPassManager &) { + bool Changed = false; + + // Only visit outermost loops. + if (L->getParentLoop()) return Changed; + + Instruction *PreheaderTerm = L->getLoopPreheader()->getTerminator(); + DenseMap HoistedConstants; + + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + for (BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); + BBI != BBE; ++BBI) { + Instruction *I = BBI; + // TODO: For now, skip all intrinsic instructions, because some of them + // can require their operands to be constants, and we don't want to + // break that. + if (isa(I)) + continue; + // LLVM represents fneg as -0.0-x; don't hoist the -0.0 out. + if (BinaryOperator::isFNeg(I) || + BinaryOperator::isNeg(I) || + BinaryOperator::isNot(I)) + continue; + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + // Don't hoist out switch case constants. + if (isa(I) && i == 1) + break; + // Don't hoist out shuffle masks. + if (isa(I) && i == 2) + break; + Value *Op = I->getOperand(i); + Constant *C = dyn_cast(Op); + if (!C) continue; + // TODO: Ask the target which constants are legal. This would allow + // us to add support for hoisting ConstantInts and GlobalValues too. + if (isa(C) || + isa(C) || + isa(C)) { + BitCastInst *&BC = HoistedConstants[C]; + if (!BC) + BC = new BitCastInst(C, C->getType(), "hoist", PreheaderTerm); + I->setOperand(i, BC); + Changed = true; + } + } + } + } + + return Changed; +} + +void CodeGenLICM::getAnalysisUsage(AnalysisUsage &AU) const { + // This pass preserves just about everything. List some popular things here. + AU.setPreservesCFG(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved("scalar-evolution"); + AU.addPreserved("iv-users"); + AU.addPreserved("lda"); + AU.addPreserved("live-values"); + + // Hoisting requires a loop preheader. + AU.addRequiredID(LoopSimplifyID); +} diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 85e9243e3ce8e..a3e3fea4da076 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -23,10 +23,9 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -35,10 +34,10 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -46,10 +45,11 @@ static cl::opt FactorCommonPreds("split-critical-paths-tweak", cl::init(false), cl::Hidden); namespace { - class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { + class CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; + ProfileInfo *PI; /// BackEdges - Keep a set of all the loop back edges. /// @@ -60,6 +60,10 @@ namespace { : FunctionPass(&ID), TLI(tli) {} bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + } + private: bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -95,6 +99,7 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) { bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + PI = getAnalysisIfAvailable(); // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -232,7 +237,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { BranchInst *BI = cast(BB->getTerminator()); BasicBlock *DestBB = BI->getSuccessor(0); - DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; + DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); // If the destination block has a single pred, then this is a trivial edge, // just collapse it. @@ -241,12 +246,12 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // Remember if SinglePred was the entry block of the function. If so, we // will need to move BB back to the entry position. bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); - MergeBasicBlockIntoOnlyPred(DestBB); + MergeBasicBlockIntoOnlyPred(DestBB, this); if (isEntry && BB != &BB->getParent()->getEntryBlock()) BB->moveBefore(&BB->getParent()->getEntryBlock()); - DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; + DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); return; } } @@ -283,9 +288,13 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); + if (PI) { + PI->replaceAllUses(BB, DestBB); + PI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); + } BB->eraseFromParent(); - DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; + DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n"); } @@ -358,6 +367,9 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, // If we found a workable predecessor, change TI to branch to Succ. if (FoundMatch) { + ProfileInfo *PI = P->getAnalysisIfAvailable(); + if (PI) + PI->splitEdge(TIBB, Dest, Pred); Dest->removePredecessor(TIBB); TI->setSuccessor(SuccNum, Pred); return; @@ -410,8 +422,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, /// static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If this is a noop copy, - MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); - MVT DstVT = TLI.getValueType(CI->getType()); + EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(CI->getType()); // This is an fp<->int conversion? if (SrcVT.isInteger() != DstVT.isInteger()) @@ -424,10 +436,10 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ // If these values will be promoted, find out what they will be promoted // to. This helps us consider truncates on PPC as noop copies when they // are. - if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) - SrcVT = TLI.getTypeToTransformTo(SrcVT); - if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) - DstVT = TLI.getTypeToTransformTo(DstVT); + if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) + SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); + if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) + DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); // If, after promotion, these are the same types, this is a noop copy. if (SrcVT != DstVT) @@ -520,7 +532,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) { BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI(); InsertedCmp = - CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), + CmpInst::Create(CI->getOpcode(), + CI->getPredicate(), CI->getOperand(0), CI->getOperand(1), "", InsertPt); MadeChange = true; } @@ -577,7 +590,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // If all the instructions matched are already in this BB, don't do anything. if (!AnyNonLocal) { - DEBUG(cerr << "CGP: Found local addrmode: " << AddrMode << "\n"); + DEBUG(errs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -592,14 +605,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // computation. Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { - DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); if (SunkAddr->getType() != Addr->getType()) SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt); } else { - DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); - const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(); + DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst); + const Type *IntPtrTy = + TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); Value *Result = 0; // Start with the scale value. @@ -616,7 +630,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt); } if (AddrMode.Scale != 1) - V = BinaryOperator::CreateMul(V, Context->getConstantInt(IntPtrTy, + V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), "sunkaddr", InsertPt); Result = V; @@ -648,7 +662,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Add in the Base Offset if present. if (AddrMode.BaseOffs) { - Value *V = Context->getConstantInt(IntPtrTy, AddrMode.BaseOffs); + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); if (Result) Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt); else @@ -656,7 +670,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, } if (Result == 0) - SunkAddr = Context->getNullValue(Addr->getType()); + SunkAddr = Constant::getNullValue(Addr->getType()); else SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt); } @@ -858,18 +872,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { } else if (CallInst *CI = dyn_cast(I)) { // If we found an inline asm expession, and if the target knows how to // lower it to normal LLVM code, do so now. - if (TLI && isa(CI->getCalledValue())) - if (const TargetAsmInfo *TAI = - TLI->getTargetMachine().getTargetAsmInfo()) { - if (TAI->ExpandInlineAsm(CI)) { - BBI = BB.begin(); - // Avoid processing instructions out of order, which could cause - // reuse before a value is defined. - SunkAddrs.clear(); - } else - // Sink address computing for memory operands into the block. - MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); - } + if (TLI && isa(CI->getCalledValue())) { + if (TLI->ExpandInlineAsm(CI)) { + BBI = BB.begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + } else + // Sink address computing for memory operands into the block. + MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); + } } } diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp index c85d0317d65f8..5b573f492cdc4 100644 --- a/lib/Transforms/Scalar/CondPropagate.cpp +++ b/lib/Transforms/Scalar/CondPropagate.cpp @@ -14,26 +14,21 @@ #define DEBUG_TYPE "condprop" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Type.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" using namespace llvm; STATISTIC(NumBrThread, "Number of CFG edges threaded through branches"); STATISTIC(NumSwThread, "Number of CFG edges threaded through switches"); namespace { - struct VISIBILITY_HIDDEN CondProp : public FunctionPass { + struct CondProp : public FunctionPass { static char ID; // Pass identification, replacement for typeid CondProp() : FunctionPass(&ID) {} @@ -124,7 +119,7 @@ void CondProp::SimplifyBlock(BasicBlock *BB) { // Succ is now dead, but we cannot delete it without potentially // invalidating iterators elsewhere. Just insert an unreachable // instruction in it and delete this block later on. - new UnreachableInst(Succ); + new UnreachableInst(BB->getContext(), Succ); DeadBlocks.push_back(Succ); MadeChange = true; } @@ -196,8 +191,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) { if (&*BBI != SI) return; - bool RemovedPreds = false; - // Ok, we have this really simple case, walk the PHI operands, looking for // constants. Walk from the end to remove operands from the end when // possible, and to avoid invalidating "i". @@ -209,7 +202,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) { RevectorBlockTo(PN->getIncomingBlock(i-1), SI->getSuccessor(DestCase)); ++NumSwThread; - RemovedPreds = true; // If there were two predecessors before this simplification, or if the // PHI node contained all the same value except for the one we just diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index b933488cf636f..4fee327ebec16 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -24,7 +24,6 @@ #include "llvm/Constant.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" #include @@ -33,7 +32,7 @@ using namespace llvm; STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { - struct VISIBILITY_HIDDEN ConstantPropagation : public FunctionPass { + struct ConstantPropagation : public FunctionPass { static char ID; // Pass identification, replacement for typeid ConstantPropagation() : FunctionPass(&ID) {} @@ -67,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.erase(WorkList.begin()); // Get an element from the worklist... if (!I->use_empty()) // Don't muck with dead instructions... - if (Constant *C = ConstantFoldInstruction(I)) { + if (Constant *C = ConstantFoldInstruction(I, F.getContext())) { // Add all of the users of this instruction to the worklist, they might // be constant propagatable now... for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8bb504c09c6e3..39940c35da5d5 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -21,7 +21,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" #include @@ -34,7 +33,7 @@ namespace { //===--------------------------------------------------------------------===// // DeadInstElimination pass implementation // - struct VISIBILITY_HIDDEN DeadInstElimination : public BasicBlockPass { + struct DeadInstElimination : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid DeadInstElimination() : BasicBlockPass(&ID) {} virtual bool runOnBasicBlock(BasicBlock &BB) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index b923c92bd3007..a7b3e7524fa2b 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -29,14 +29,15 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h" using namespace llvm; STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); namespace { - struct VISIBILITY_HIDDEN DSE : public FunctionPass { + struct DSE : public FunctionPass { + TargetData *TD; + static char ID; // Pass identification, replacement for typeid DSE() : FunctionPass(&ID) {} @@ -62,7 +63,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -79,15 +79,15 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } bool DSE::runOnBasicBlock(BasicBlock &BB) { MemoryDependenceAnalysis& MD = getAnalysis(); - TargetData &TD = getAnalysis(); + TD = getAnalysisIfAvailable(); bool MadeChange = false; - // Do a top-down walk on the BB + // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { Instruction *Inst = BBI++; - // If we find a store or a free, get it's memory dependence. + // If we find a store or a free, get its memory dependence. if (!isa(Inst) && !isa(Inst)) continue; @@ -117,13 +117,17 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // If this is a store-store dependence, then the previous store is dead so // long as this store is at least as big as it. if (StoreInst *DepStore = dyn_cast(InstDep.getInst())) - if (TD.getTypeStoreSize(DepStore->getOperand(0)->getType()) <= - TD.getTypeStoreSize(SI->getOperand(0)->getType())) { + if (TD && + TD->getTypeStoreSize(DepStore->getOperand(0)->getType()) <= + TD->getTypeStoreSize(SI->getOperand(0)->getType())) { // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepStore); NumFastStores++; MadeChange = true; - + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; if (BBI != BB.begin()) --BBI; continue; @@ -134,8 +138,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (LoadInst *DepLoad = dyn_cast(InstDep.getInst())) { if (SI->getPointerOperand() == DepLoad->getPointerOperand() && SI->getOperand(0) == DepLoad) { + // DeleteDeadInstruction can delete the current instruction. Save BBI + // in case we need it. + WeakVH NextInst(BBI); + DeleteDeadInstruction(SI); - if (BBI != BB.begin()) + + if (NextInst == 0) // Next instruction deleted. + BBI = BB.begin(); + else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; NumFastStores++; MadeChange = true; @@ -181,7 +192,6 @@ bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) { /// store i32 1, i32* %A /// ret void bool DSE::handleEndBlock(BasicBlock &BB) { - TargetData &TD = getAnalysis(); AliasAnalysis &AA = getAnalysis(); bool MadeChange = false; @@ -302,14 +312,16 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Get size information for the alloca unsigned pointerSize = ~0U; - if (AllocaInst* A = dyn_cast(*I)) { - if (ConstantInt* C = dyn_cast(A->getArraySize())) - pointerSize = C->getZExtValue() * - TD.getTypeAllocSize(A->getAllocatedType()); - } else { - const PointerType* PT = cast( - cast(*I)->getType()); - pointerSize = TD.getTypeAllocSize(PT->getElementType()); + if (TD) { + if (AllocaInst* A = dyn_cast(*I)) { + if (ConstantInt* C = dyn_cast(A->getArraySize())) + pointerSize = C->getZExtValue() * + TD->getTypeAllocSize(A->getAllocatedType()); + } else { + const PointerType* PT = cast( + cast(*I)->getType()); + pointerSize = TD->getTypeAllocSize(PT->getElementType()); + } } // See if the call site touches it @@ -357,7 +369,6 @@ bool DSE::handleEndBlock(BasicBlock &BB) { bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, BasicBlock::iterator &BBI, SmallPtrSet& deadPointers) { - TargetData &TD = getAnalysis(); AliasAnalysis &AA = getAnalysis(); // If the kill pointer can be easily reduced to an alloca, @@ -379,13 +390,15 @@ bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize, E = deadPointers.end(); I != E; ++I) { // Get size information for the alloca. unsigned pointerSize = ~0U; - if (AllocaInst* A = dyn_cast(*I)) { - if (ConstantInt* C = dyn_cast(A->getArraySize())) - pointerSize = C->getZExtValue() * - TD.getTypeAllocSize(A->getAllocatedType()); - } else { - const PointerType* PT = cast(cast(*I)->getType()); - pointerSize = TD.getTypeAllocSize(PT->getElementType()); + if (TD) { + if (AllocaInst* A = dyn_cast(*I)) { + if (ConstantInt* C = dyn_cast(A->getArraySize())) + pointerSize = C->getZExtValue() * + TD->getTypeAllocSize(A->getAllocatedType()); + } else { + const PointerType* PT = cast(cast(*I)->getType()); + pointerSize = TD->getTypeAllocSize(PT->getElementType()); + } } // See if this pointer could alias it diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index f4fe15e0e525a..2ed4a638adf40 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -23,6 +23,7 @@ #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -32,13 +33,18 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include using namespace llvm; @@ -60,17 +66,17 @@ static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); /// as an efficient mechanism to determine the expression-wise equivalence of /// two values. namespace { - struct VISIBILITY_HIDDEN Expression { + struct Expression { enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, UDIV, SDIV, FDIV, UREM, SREM, - FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, - ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, - ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, - FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, - FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, + FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, + ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, + ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, + FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, + FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, - FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, + FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, EMPTY, TOMBSTONE }; @@ -80,11 +86,11 @@ namespace { uint32_t secondVN; uint32_t thirdVN; SmallVector varargs; - Value* function; - + Value *function; + Expression() { } Expression(ExpressionOpcode o) : opcode(o) { } - + bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; @@ -103,30 +109,30 @@ namespace { else { if (varargs.size() != other.varargs.size()) return false; - + for (size_t i = 0; i < varargs.size(); ++i) if (varargs[i] != other.varargs[i]) return false; - + return true; } } - + bool operator!=(const Expression &other) const { return !(*this == other); } }; - - class VISIBILITY_HIDDEN ValueTable { + + class ValueTable { private: DenseMap valueNumbering; DenseMap expressionNumbering; AliasAnalysis* AA; MemoryDependenceAnalysis* MD; DominatorTree* DT; - + uint32_t nextValueNumber; - + Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); Expression::ExpressionOpcode getOpcode(CmpInst* C); Expression::ExpressionOpcode getOpcode(CastInst* C); @@ -142,11 +148,11 @@ namespace { Expression create_expression(Constant* C); public: ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value* V); - uint32_t lookup(Value* V) const; - void add(Value* V, uint32_t num); + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); void clear(); - void erase(Value* v); + void erase(Value *v); unsigned size(); void setAliasAnalysis(AliasAnalysis* A) { AA = A; } AliasAnalysis *getAliasAnalysis() const { return AA; } @@ -162,30 +168,30 @@ template <> struct DenseMapInfo { static inline Expression getEmptyKey() { return Expression(Expression::EMPTY); } - + static inline Expression getTombstoneKey() { return Expression(Expression::TOMBSTONE); } - + static unsigned getHashValue(const Expression e) { unsigned hash = e.opcode; - + hash = e.firstVN + hash * 37; hash = e.secondVN + hash * 37; hash = e.thirdVN + hash * 37; - + hash = ((unsigned)((uintptr_t)e.type >> 4) ^ (unsigned)((uintptr_t)e.type >> 9)) + hash * 37; - + for (SmallVector::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) hash = *I + hash * 37; - + hash = ((unsigned)((uintptr_t)e.function >> 4) ^ (unsigned)((uintptr_t)e.function >> 9)) + hash * 37; - + return hash; } static bool isEqual(const Expression &LHS, const Expression &RHS) { @@ -201,7 +207,7 @@ template <> struct DenseMapInfo { Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { switch(BO->getOpcode()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Binary operator with unknown opcode?"); + llvm_unreachable("Binary operator with unknown opcode?"); case Instruction::Add: return Expression::ADD; case Instruction::FAdd: return Expression::FADD; case Instruction::Sub: return Expression::SUB; @@ -224,10 +230,10 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { } Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { - if (isa(C) || isa(C)) { + if (isa(C)) { switch (C->getPredicate()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Comparison with unknown predicate?"); + llvm_unreachable("Comparison with unknown predicate?"); case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; case ICmpInst::ICMP_NE: return Expression::ICMPNE; case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; @@ -239,32 +245,32 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; } - } - assert((isa(C) || isa(C)) && "Unknown compare"); - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Comparison with unknown predicate?"); - case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; - case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; - case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; - case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; - case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; - case FCmpInst::FCMP_ONE: return Expression::FCMPONE; - case FCmpInst::FCMP_ORD: return Expression::FCMPORD; - case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; - case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; - case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; - case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; - case FCmpInst::FCMP_ULT: return Expression::FCMPULT; - case FCmpInst::FCMP_ULE: return Expression::FCMPULE; - case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } else { + switch (C->getPredicate()) { + default: // THIS SHOULD NEVER HAPPEN + llvm_unreachable("Comparison with unknown predicate?"); + case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; + case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; + case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; + case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; + case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; + case FCmpInst::FCMP_ONE: return Expression::FCMPONE; + case FCmpInst::FCMP_ORD: return Expression::FCMPORD; + case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; + case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; + case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; + case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; + case FCmpInst::FCMP_ULT: return Expression::FCMPULT; + case FCmpInst::FCMP_ULE: return Expression::FCMPULE; + case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } } } Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { switch(C->getOpcode()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Cast operator with unknown opcode?"); + llvm_unreachable("Cast operator with unknown opcode?"); case Instruction::Trunc: return Expression::TRUNC; case Instruction::ZExt: return Expression::ZEXT; case Instruction::SExt: return Expression::SEXT; @@ -282,126 +288,126 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { Expression ValueTable::create_expression(CallInst* C) { Expression e; - + e.type = C->getType(); e.firstVN = 0; e.secondVN = 0; e.thirdVN = 0; e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - + for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); - + return e; } Expression ValueTable::create_expression(BinaryOperator* BO) { Expression e; - + e.firstVN = lookup_or_add(BO->getOperand(0)); e.secondVN = lookup_or_add(BO->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = BO->getType(); e.opcode = getOpcode(BO); - + return e; } Expression ValueTable::create_expression(CmpInst* C) { Expression e; - + e.firstVN = lookup_or_add(C->getOperand(0)); e.secondVN = lookup_or_add(C->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); - + return e; } Expression ValueTable::create_expression(CastInst* C) { Expression e; - + e.firstVN = lookup_or_add(C->getOperand(0)); e.secondVN = 0; e.thirdVN = 0; e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); - + return e; } Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression e; - + e.firstVN = lookup_or_add(S->getOperand(0)); e.secondVN = lookup_or_add(S->getOperand(1)); e.thirdVN = lookup_or_add(S->getOperand(2)); e.function = 0; e.type = S->getType(); e.opcode = Expression::SHUFFLE; - + return e; } Expression ValueTable::create_expression(ExtractElementInst* E) { Expression e; - + e.firstVN = lookup_or_add(E->getOperand(0)); e.secondVN = lookup_or_add(E->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = E->getType(); e.opcode = Expression::EXTRACT; - + return e; } Expression ValueTable::create_expression(InsertElementInst* I) { Expression e; - + e.firstVN = lookup_or_add(I->getOperand(0)); e.secondVN = lookup_or_add(I->getOperand(1)); e.thirdVN = lookup_or_add(I->getOperand(2)); e.function = 0; e.type = I->getType(); e.opcode = Expression::INSERT; - + return e; } Expression ValueTable::create_expression(SelectInst* I) { Expression e; - + e.firstVN = lookup_or_add(I->getCondition()); e.secondVN = lookup_or_add(I->getTrueValue()); e.thirdVN = lookup_or_add(I->getFalseValue()); e.function = 0; e.type = I->getType(); e.opcode = Expression::SELECT; - + return e; } Expression ValueTable::create_expression(GetElementPtrInst* G) { Expression e; - + e.firstVN = lookup_or_add(G->getPointerOperand()); e.secondVN = 0; e.thirdVN = 0; e.function = 0; e.type = G->getType(); e.opcode = Expression::GEP; - + for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); - + return e; } @@ -410,21 +416,21 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) { //===----------------------------------------------------------------------===// /// add - Insert a value into the table with a specified value number. -void ValueTable::add(Value* V, uint32_t num) { +void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } /// lookup_or_add - Returns the value number for the specified value, assigning /// it a new number if it did not have one before. -uint32_t ValueTable::lookup_or_add(Value* V) { +uint32_t ValueTable::lookup_or_add(Value *V) { DenseMap::iterator VI = valueNumbering.find(V); if (VI != valueNumbering.end()) return VI->second; - + if (CallInst* C = dyn_cast(V)) { if (AA->doesNotAccessMemory(C)) { Expression e = create_expression(C); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -432,20 +438,20 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (AA->onlyReadsMemory(C)) { Expression e = create_expression(C); - + if (expressionNumbering.find(e) == expressionNumbering.end()) { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + MemDepResult local_dep = MD->getDependency(C); - + if (!local_dep.isDef() && !local_dep.isNonLocal()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; @@ -453,12 +459,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { if (local_dep.isDef()) { CallInst* local_cdep = cast(local_dep.getInst()); - + if (local_cdep->getNumOperands() != C->getNumOperands()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + for (unsigned i = 1; i < C->getNumOperands(); ++i) { uint32_t c_vn = lookup_or_add(C->getOperand(i)); uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); @@ -467,19 +473,19 @@ uint32_t ValueTable::lookup_or_add(Value* V) { return nextValueNumber++; } } - + uint32_t v = lookup_or_add(local_cdep); valueNumbering.insert(std::make_pair(V, v)); return v; } // Non-local case. - const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); // FIXME: call/call dependencies for readonly calls should return def, not // clobber! Move the checking logic to MemDep! CallInst* cdep = 0; - + // Check to see if we have a single dominating call instruction that is // identical to C. for (unsigned i = 0, e = deps.size(); i != e; ++i) { @@ -494,23 +500,23 @@ uint32_t ValueTable::lookup_or_add(Value* V) { cdep = 0; break; } - + CallInst *NonLocalDepCall = dyn_cast(I->second.getInst()); // FIXME: All duplicated with non-local case. if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ cdep = NonLocalDepCall; continue; } - + cdep = 0; break; } - + if (!cdep) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + if (cdep->getNumOperands() != C->getNumOperands()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; @@ -523,18 +529,18 @@ uint32_t ValueTable::lookup_or_add(Value* V) { return nextValueNumber++; } } - + uint32_t v = lookup_or_add(cdep); valueNumbering.insert(std::make_pair(V, v)); return v; - + } else { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } } else if (BinaryOperator* BO = dyn_cast(V)) { Expression e = create_expression(BO); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -542,12 +548,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (CmpInst* C = dyn_cast(V)) { Expression e = create_expression(C); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -555,12 +561,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (ShuffleVectorInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -568,12 +574,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (ExtractElementInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -581,12 +587,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (InsertElementInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -594,12 +600,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (SelectInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -607,12 +613,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (CastInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -620,12 +626,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (GetElementPtrInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -633,7 +639,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else { @@ -644,7 +650,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) { /// lookup - Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t ValueTable::lookup(Value* V) const { +uint32_t ValueTable::lookup(Value *V) const { DenseMap::iterator VI = valueNumbering.find(V); assert(VI != valueNumbering.end() && "Value not numbered?"); return VI->second; @@ -658,7 +664,7 @@ void ValueTable::clear() { } /// erase - Remove a value from the value numbering -void ValueTable::erase(Value* V) { +void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -676,17 +682,17 @@ void ValueTable::verifyRemoved(const Value *V) const { //===----------------------------------------------------------------------===// namespace { - struct VISIBILITY_HIDDEN ValueNumberScope { + struct ValueNumberScope { ValueNumberScope* parent; DenseMap table; - + ValueNumberScope(ValueNumberScope* p) : parent(p) { } }; } namespace { - class VISIBILITY_HIDDEN GVN : public FunctionPass { + class GVN : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid @@ -698,45 +704,35 @@ namespace { ValueTable VN; DenseMap localAvail; - - typedef DenseMap > PhiMapType; - PhiMapType phiMap; - - + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); - + AU.addPreserved(); AU.addPreserved(); } - + // Helper fuctions // FIXME: eliminate or document these better bool processLoad(LoadInst* L, SmallVectorImpl &toErase); - bool processInstruction(Instruction* I, + bool processInstruction(Instruction *I, SmallVectorImpl &toErase); bool processNonLocalLoad(LoadInst* L, SmallVectorImpl &toErase); - bool processBlock(BasicBlock* BB); - Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, - DenseMap &Phis, - bool top_level = false); + bool processBlock(BasicBlock *BB); void dump(DenseMap& d); bool iterateOnFunction(Function &F); - Value* CollapsePhi(PHINode* p); - bool isSafeReplacement(PHINode* p, Instruction* inst); + Value *CollapsePhi(PHINode* p); bool performPRE(Function& F); - Value* lookupNumber(BasicBlock* BB, uint32_t num); - bool mergeBlockIntoPredecessor(BasicBlock* BB); - Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); + Value *lookupNumber(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; }; - + char GVN::ID = 0; } @@ -756,107 +752,31 @@ void GVN::dump(DenseMap& d) { printf("}\n"); } -Value* GVN::CollapsePhi(PHINode* p) { - Value* constVal = p->hasConstantValue(); - if (!constVal) return 0; - - Instruction* inst = dyn_cast(constVal); - if (!inst) - return constVal; - - if (DT->dominates(inst, p)) - if (isSafeReplacement(p, inst)) - return inst; - return 0; -} - -bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { +static bool isSafeReplacement(PHINode* p, Instruction *inst) { if (!isa(inst)) return true; - + for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); UI != E; ++UI) if (PHINode* use_phi = dyn_cast(UI)) if (use_phi->getParent() == inst->getParent()) return false; - + return true; } -/// GetValueForBlock - Get the value to use within the specified basic block. -/// available values are in Phis. -Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, - DenseMap &Phis, - bool top_level) { - - // If we have already computed this value, return the previously computed val. - DenseMap::iterator V = Phis.find(BB); - if (V != Phis.end() && !top_level) return V->second; - - // If the block is unreachable, just return undef, since this path - // can't actually occur at runtime. - if (!DT->isReachableFromEntry(BB)) - return Phis[BB] = Context->getUndef(orig->getType()); - - if (BasicBlock *Pred = BB->getSinglePredecessor()) { - Value *ret = GetValueForBlock(Pred, orig, Phis); - Phis[BB] = ret; - return ret; - } +Value *GVN::CollapsePhi(PHINode *PN) { + Value *ConstVal = PN->hasConstantValue(DT); + if (!ConstVal) return 0; - // Get the number of predecessors of this block so we can reserve space later. - // If there is already a PHI in it, use the #preds from it, otherwise count. - // Getting it from the PHI is constant time. - unsigned NumPreds; - if (PHINode *ExistingPN = dyn_cast(BB->begin())) - NumPreds = ExistingPN->getNumIncomingValues(); - else - NumPreds = std::distance(pred_begin(BB), pred_end(BB)); - - // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so - // now, then get values to fill in the incoming values for the PHI. - PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", - BB->begin()); - PN->reserveOperandSpace(NumPreds); - - Phis.insert(std::make_pair(BB, PN)); - - // Fill in the incoming values for the block. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - Value* val = GetValueForBlock(*PI, orig, Phis); - PN->addIncoming(val, *PI); - } - - VN.getAliasAnalysis()->copyValue(orig, PN); - - // Attempt to collapse PHI nodes that are trivially redundant - Value* v = CollapsePhi(PN); - if (!v) { - // Cache our phi construction results - if (LoadInst* L = dyn_cast(orig)) - phiMap[L->getPointerOperand()].insert(PN); - else - phiMap[orig].insert(PN); - - return PN; - } - - PN->replaceAllUsesWith(v); - if (isa(v->getType())) - MD->invalidateCachedPointerInfo(v); - - for (DenseMap::iterator I = Phis.begin(), - E = Phis.end(); I != E; ++I) - if (I->second == PN) - I->second = v; - - DEBUG(cerr << "GVN removed: " << *PN); - MD->removeInstruction(PN); - PN->eraseFromParent(); - DEBUG(verifyRemoved(PN)); - - Phis[BB] = v; - return v; + Instruction *Inst = dyn_cast(ConstVal); + if (!Inst) + return ConstVal; + + if (DT->dominates(Inst, PN)) + if (isSafeReplacement(PN, Inst)) + return Inst; + return 0; } /// IsValueFullyAvailableInBlock - Return true if we can prove that the value @@ -869,11 +789,11 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, /// currently speculating that it will be. /// 3) we are speculating for this block and have used that to speculate for /// other blocks. -static bool IsValueFullyAvailableInBlock(BasicBlock *BB, +static bool IsValueFullyAvailableInBlock(BasicBlock *BB, DenseMap &FullyAvailableBlocks) { // Optimistically assume that the block is fully available and check to see // if we already know about this block in one lookup. - std::pair::iterator, char> IV = + std::pair::iterator, char> IV = FullyAvailableBlocks.insert(std::make_pair(BB, 2)); // If the entry already existed for this block, return the precomputed value. @@ -884,29 +804,29 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, IV.first->second = 3; return IV.first->second != 0; } - + // Otherwise, see if it is fully available in all predecessors. pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - + // If this block has no predecessors, it isn't live-in here. if (PI == PE) goto SpeculationFailure; - + for (; PI != PE; ++PI) // If the value isn't fully available in one of our predecessors, then it // isn't fully available in this block either. Undo our previous // optimistic assumption and bail out. if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) goto SpeculationFailure; - + return true; - + // SpeculationFailure - If we get here, we found out that this is not, after // all, a fully-available block. We have a problem if we speculated on this and // used the speculation to mark other blocks as available. SpeculationFailure: char &BBVal = FullyAvailableBlocks[BB]; - + // If we didn't speculate on this, just return with it set to false. if (BBVal == 2) { BBVal = 0; @@ -918,7 +838,7 @@ SpeculationFailure: // 0 if set to one. SmallVector BBWorklist; BBWorklist.push_back(BB); - + while (!BBWorklist.empty()) { BasicBlock *Entry = BBWorklist.pop_back_val(); // Note that this sets blocks to 0 (unavailable) if they happen to not @@ -928,24 +848,372 @@ SpeculationFailure: // Mark as unavailable. EntryVal = 0; - + for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) BBWorklist.push_back(*I); } - + return false; } + +/// CanCoerceMustAliasedValueToLoad - Return true if +/// CoerceAvailableValueToLoadType will succeed. +static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, + const Type *LoadTy, + const TargetData &TD) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (isa(LoadTy) || isa(LoadTy) || + isa(StoredVal->getType()) || + isa(StoredVal->getType())) + return false; + + // The store has to be at least as big as the load. + if (TD.getTypeSizeInBits(StoredVal->getType()) < + TD.getTypeSizeInBits(LoadTy)) + return false; + + return true; +} + + +/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value. LoadedTy is the type of the load we want to replace and +/// InsertPt is the place to insert new instructions. +/// +/// If we can't do it, return null. +static Value *CoerceAvailableValueToLoadType(Value *StoredVal, + const Type *LoadedTy, + Instruction *InsertPt, + const TargetData &TD) { + if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) + return 0; + + const Type *StoredValTy = StoredVal->getType(); + + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); + + // If the store and reload are the same size, we can always reuse it. + if (StoreSize == LoadSize) { + if (isa(StoredValTy) && isa(LoadedTy)) { + // Pointer to Pointer -> use bitcast. + return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); + } + + // Convert source pointers to integers, which can be bitcast. + if (isa(StoredValTy)) { + StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); + } + + const Type *TypeToCastTo = LoadedTy; + if (isa(TypeToCastTo)) + TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); + + if (StoredValTy != TypeToCastTo) + StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); + + // Cast to pointer if the load needs a pointer type. + if (isa(LoadedTy)) + StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); + + return StoredVal; + } + + // If the loaded value is smaller than the available value, then we can + // extract out a piece from it. If the available value is too small, then we + // can't do anything. + assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); + + // Convert source pointers to integers, which can be manipulated. + if (isa(StoredValTy)) { + StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); + } + + // Convert vectors and fp to integer, which can be manipulated. + if (!isa(StoredValTy)) { + StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); + StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); + } + + // If this is a big-endian system, we need to shift the value down to the low + // bits so that a truncate will work. + if (TD.isBigEndian()) { + Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); + StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); + } + + // Truncate the integer to the right size now. + const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); + StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); + + if (LoadedTy == NewIntTy) + return StoredVal; + + // If the result is a pointer, inttoptr. + if (isa(LoadedTy)) + return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); + + // Otherwise, bitcast. + return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); +} + +/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can +/// be expressed as a base pointer plus a constant offset. Return the base and +/// offset to the caller. +static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const TargetData &TD) { + Operator *PtrOp = dyn_cast(Ptr); + if (PtrOp == 0) return Ptr; + + // Just look through bitcasts. + if (PtrOp->getOpcode() == Instruction::BitCast) + return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); + + // If this is a GEP with constant indices, we can look through it. + GEPOperator *GEP = dyn_cast(PtrOp); + if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = cast(*I); + if (OpC->isZero()) continue; + + // Handle a struct and array indices which add their offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += OpC->getSExtValue()*Size; + } + } + + // Re-sign extend from the pointer size if needed to get overflow edge cases + // right. + unsigned PtrSize = TD.getPointerSizeInBits(); + if (PtrSize < 64) + Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + + return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); +} + + +/// AnalyzeLoadFromClobberingStore - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. This returns -1 if we have to +/// give up, or a byte number in the stored value of the piece that feeds the +/// load. +static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, + const TargetData &TD) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (isa(L->getType()) || isa(L->getType()) || + isa(DepSI->getOperand(0)->getType()) || + isa(DepSI->getOperand(0)->getType())) + return -1; + + int64_t StoreOffset = 0, LoadOffset = 0; + Value *StoreBase = + GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD); + Value *LoadBase = + GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD); + if (StoreBase != LoadBase) + return -1; + + // If the load and store are to the exact same address, they should have been + // a must alias. AA must have gotten confused. + // FIXME: Study to see if/when this happens. + if (LoadOffset == StoreOffset) { +#if 0 + errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" + << "Base = " << *StoreBase << "\n" + << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" + << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" + << "Load Ptr = " << *L->getPointerOperand() << "\n" + << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; + errs() << "'" << L->getParent()->getParent()->getName() << "'" + << *L->getParent(); +#endif + return -1; + } + + // If the load and store don't overlap at all, the store doesn't provide + // anything to the load. In this case, they really don't alias at all, AA + // must have gotten confused. + // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then + // remove this check, as it is duplicated with what we have below. + uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + uint64_t LoadSize = TD.getTypeSizeInBits(L->getType()); + + if ((StoreSize & 7) | (LoadSize & 7)) + return -1; + StoreSize >>= 3; // Convert to bytes. + LoadSize >>= 3; + + + bool isAAFailure = false; + if (StoreOffset < LoadOffset) { + isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; + } else { + isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; + } + if (isAAFailure) { +#if 0 + errs() << "STORE LOAD DEP WITH COMMON BASE:\n" + << "Base = " << *StoreBase << "\n" + << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" + << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" + << "Load Ptr = " << *L->getPointerOperand() << "\n" + << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; + errs() << "'" << L->getParent()->getParent()->getName() << "'" + << *L->getParent(); +#endif + return -1; + } + + // If the Load isn't completely contained within the stored bits, we don't + // have all the bits to feed it. We could do something crazy in the future + // (issue a smaller load then merge the bits in) but this seems unlikely to be + // valuable. + if (StoreOffset > LoadOffset || + StoreOffset+StoreSize < LoadOffset+LoadSize) + return -1; + + // Okay, we can do this transformation. Return the number of bytes into the + // store that the load is. + return LoadOffset-StoreOffset; +} + + +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, + const Type *LoadTy, + Instruction *InsertPt, const TargetData &TD){ + LLVMContext &Ctx = SrcVal->getType()->getContext(); + + uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + + + // Compute which bits of the stored value are being used by the load. Convert + // to an integer type to start with. + if (isa(SrcVal->getType())) + SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt); + if (!isa(SrcVal->getType())) + SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), + "tmp", InsertPt); + + // Shift the bits to the least significant depending on endianness. + unsigned ShiftAmt; + if (TD.isLittleEndian()) { + ShiftAmt = Offset*8; + } else { + ShiftAmt = (StoreSize-LoadSize-Offset)*8; + } + + if (ShiftAmt) + SrcVal = BinaryOperator::CreateLShr(SrcVal, + ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt); + + if (LoadSize != StoreSize) + SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), + "tmp", InsertPt); + + return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); +} + +struct AvailableValueInBlock { + /// BB - The basic block in question. + BasicBlock *BB; + /// V - The value that is live out of the block. + Value *V; + /// Offset - The byte offset in V that is interesting for the load query. + unsigned Offset; + + static AvailableValueInBlock get(BasicBlock *BB, Value *V, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.V = V; + Res.Offset = Offset; + return Res; + } +}; + +/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, +/// construct SSA form, allowing us to eliminate LI. This returns the value +/// that should be used at LI's definition site. +static Value *ConstructSSAForLoadSet(LoadInst *LI, + SmallVectorImpl &ValuesPerBlock, + const TargetData *TD, + AliasAnalysis *AA) { + SmallVector NewPHIs; + SSAUpdater SSAUpdate(&NewPHIs); + SSAUpdate.Initialize(LI); + + const Type *LoadTy = LI->getType(); + + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + BasicBlock *BB = ValuesPerBlock[i].BB; + Value *AvailableVal = ValuesPerBlock[i].V; + unsigned Offset = ValuesPerBlock[i].Offset; + + if (SSAUpdate.HasValueForBlock(BB)) + continue; + + if (AvailableVal->getType() != LoadTy) { + assert(TD && "Need target data to handle type mismatch case"); + AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, + BB->getTerminator(), *TD); + + if (Offset) { + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" + << *ValuesPerBlock[i].V << '\n' + << *AvailableVal << '\n' << "\n\n\n"); + } + + + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" + << *ValuesPerBlock[i].V << '\n' + << *AvailableVal << '\n' << "\n\n\n"); + } + + SSAUpdate.AddAvailableValue(BB, AvailableVal); + } + + // Perform PHI construction. + Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); + + // If new PHI nodes were created, notify alias analysis. + if (isa(V->getType())) + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + AA->copyValue(LI, NewPHIs[i]); + + return V; +} + /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl &toErase) { // Find the non-local dependencies of the load. - SmallVector Deps; + SmallVector Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); - //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); - + //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " + // << Deps.size() << *LI << '\n'); + // If we had to process more than one hundred blocks to find the // dependencies, this load isn't worth worrying about. Optimizing // it will be too expensive. @@ -956,106 +1224,124 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // clobber in the current block. Reject this early. if (Deps.size() == 1 && Deps[0].second.isClobber()) { DEBUG( - DOUT << "GVN: non-local load "; - WriteAsOperand(*DOUT.stream(), LI); - DOUT << " is clobbered by " << *Deps[0].second.getInst(); + errs() << "GVN: non-local load "; + WriteAsOperand(errs(), LI); + errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n'; ); return false; } - + // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). - SmallVector, 16> ValuesPerBlock; + SmallVector ValuesPerBlock; SmallVector UnavailableBlocks; + + const TargetData *TD = 0; for (unsigned i = 0, e = Deps.size(); i != e; ++i) { BasicBlock *DepBB = Deps[i].first; MemDepResult DepInfo = Deps[i].second; - + if (DepInfo.isClobber()) { + // If the dependence is to a store that writes to a superset of the bits + // read by the load, we can extract the bits we need for the load from the + // stored value. + if (StoreInst *DepSI = dyn_cast(DepInfo.getInst())) { + if (TD == 0) + TD = getAnalysisIfAvailable(); + if (TD) { + int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD); + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + DepSI->getOperand(0), + Offset)); + continue; + } + } + } + + // FIXME: Handle memset/memcpy. UnavailableBlocks.push_back(DepBB); continue; } - + Instruction *DepInst = DepInfo.getInst(); - + // Loading the allocation -> undef. - if (isa(DepInst)) { - ValuesPerBlock.push_back(std::make_pair(DepBB, - Context->getUndef(LI->getType()))); + if (isa(DepInst) || isMalloc(DepInst)) { + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + UndefValue::get(LI->getType()))); continue; } - - if (StoreInst* S = dyn_cast(DepInst)) { - // Reject loads and stores that are to the same address but are of - // different types. - // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because - // of bitfield access, it would be interesting to optimize for it at some - // point. + + if (StoreInst *S = dyn_cast(DepInst)) { + // Reject loads and stores that are to the same address but are of + // different types if we have to. if (S->getOperand(0)->getType() != LI->getType()) { - UnavailableBlocks.push_back(DepBB); - continue; + if (TD == 0) + TD = getAnalysisIfAvailable(); + + // If the stored value is larger or equal to the loaded value, we can + // reuse it. + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), + LI->getType(), *TD)) { + UnavailableBlocks.push_back(DepBB); + continue; + } } - - ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); - - } else if (LoadInst* LD = dyn_cast(DepInst)) { + + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + S->getOperand(0))); + continue; + } + + if (LoadInst *LD = dyn_cast(DepInst)) { + // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { - UnavailableBlocks.push_back(DepBB); - continue; + if (TD == 0) + TD = getAnalysisIfAvailable(); + + // If the stored value is larger or equal to the loaded value, we can + // reuse it. + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ + UnavailableBlocks.push_back(DepBB); + continue; + } } - ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); - } else { - UnavailableBlocks.push_back(DepBB); + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); continue; } + + UnavailableBlocks.push_back(DepBB); + continue; } - + // If we have no predecessors that produce a known value for this load, exit // early. if (ValuesPerBlock.empty()) return false; - + // If all of the instructions we depend on produce a known value for this // load, then it is fully redundant and we can use PHI insertion to compute // its value. Insert PHIs and remove the fully redundant value now. if (UnavailableBlocks.empty()) { - // Use cached PHI construction information from previous runs - SmallPtrSet &p = phiMap[LI->getPointerOperand()]; - // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? - for (SmallPtrSet::iterator I = p.begin(), E = p.end(); - I != E; ++I) { - if ((*I)->getParent() == LI->getParent()) { - DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); - LI->replaceAllUsesWith(*I); - if (isa((*I)->getType())) - MD->invalidateCachedPointerInfo(*I); - toErase.push_back(LI); - NumGVNLoad++; - return true; - } - - ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); - } - - DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); + DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - DenseMap BlockReplValues; - BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); // Perform PHI construction. - Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); - LI->replaceAllUsesWith(v); - - if (isa(v)) - v->takeName(LI); - if (isa(v->getType())) - MD->invalidateCachedPointerInfo(v); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, + VN.getAliasAnalysis()); + LI->replaceAllUsesWith(V); + + if (isa(V)) + V->takeName(LI); + if (isa(V->getType())) + MD->invalidateCachedPointerInfo(V); toErase.push_back(LI); NumGVNLoad++; return true; } - + if (!EnablePRE || !EnableLoadPRE) return false; @@ -1066,7 +1352,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // prefer to not increase code size. As such, we only do this when we know // that we only have to insert *one* load (which means we're basically moving // the load, not inserting a new one). - + SmallPtrSet Blockers; for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) Blockers.insert(UnavailableBlocks[i]); @@ -1090,28 +1376,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (TmpBB->getTerminator()->getNumSuccessors() != 1) allSingleSucc = false; } - + assert(TmpBB); LoadBB = TmpBB; - + // If we have a repl set with LI itself in it, this means we have a loop where // at least one of the values is LI. Since this means that we won't be able // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].second == LI) + if (ValuesPerBlock[i].V == LI) return false; - + if (isSinglePred) { bool isHot = false; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (Instruction *I = dyn_cast(ValuesPerBlock[i].second)) - // "Hot" Instruction is in some loop (because it dominates its dep. - // instruction). - if (DT->dominates(LI, I)) { - isHot = true; - break; - } + if (Instruction *I = dyn_cast(ValuesPerBlock[i].V)) + // "Hot" Instruction is in some loop (because it dominates its dep. + // instruction). + if (DT->dominates(LI, I)) { + isHot = true; + break; + } // We are interested only in "hot" instructions. We don't want to do any // mis-optimizations here. @@ -1128,7 +1414,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, DenseMap FullyAvailableBlocks; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - FullyAvailableBlocks[ValuesPerBlock[i].first] = true; + FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) FullyAvailableBlocks[UnavailableBlocks[i]] = false; @@ -1136,33 +1422,33 @@ bool GVN::processNonLocalLoad(LoadInst *LI, PI != E; ++PI) { if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) continue; - + // If this load is not available in multiple predecessors, reject it. if (UnavailablePred && UnavailablePred != *PI) return false; UnavailablePred = *PI; } - + assert(UnavailablePred != 0 && "Fully available value should be eliminated above!"); - + // If the loaded pointer is PHI node defined in this block, do PHI translation // to get its value in the predecessor. Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); - + // Make sure the value is live in the predecessor. If it was defined by a // non-PHI instruction in this block, we don't know how to recompute it above. if (Instruction *LPInst = dyn_cast(LoadPtr)) if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { - DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " - << *LPInst << *LI << "\n"); + DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " + << *LPInst << '\n' << *LI << "\n"); return false; } - + // We don't currently handle critical edges :( if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { - DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" - << UnavailablePred->getName() << "': " << *LI); + DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" + << UnavailablePred->getName() << "': " << *LI << '\n'); return false; } @@ -1182,28 +1468,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. - DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); - + DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); + Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, LI->getAlignment(), UnavailablePred->getTerminator()); - - SmallPtrSet &p = phiMap[LI->getPointerOperand()]; - for (SmallPtrSet::iterator I = p.begin(), E = p.end(); - I != E; ++I) - ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); - - DenseMap BlockReplValues; - BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); - BlockReplValues[UnavailablePred] = NewLoad; - + + // Add the newly created load. + ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad)); + // Perform PHI construction. - Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); - LI->replaceAllUsesWith(v); - if (isa(v)) - v->takeName(LI); - if (isa(v->getType())) - MD->invalidateCachedPointerInfo(v); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, + VN.getAliasAnalysis()); + LI->replaceAllUsesWith(V); + if (isa(V)) + V->takeName(LI); + if (isa(V->getType())) + MD->invalidateCachedPointerInfo(V); toErase.push_back(LI); NumPRELoad++; return true; @@ -1214,64 +1495,119 @@ bool GVN::processNonLocalLoad(LoadInst *LI, bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { if (L->isVolatile()) return false; - - Value* pointer = L->getPointerOperand(); // ... to a pointer that has been loaded from before... - MemDepResult dep = MD->getDependency(L); - + MemDepResult Dep = MD->getDependency(L); + // If the value isn't available, don't do anything! - if (dep.isClobber()) { + if (Dep.isClobber()) { + // FIXME: We should handle memset/memcpy/memmove as dependent instructions + // to forward the value if available. + //if (isa(Dep.getInst())) + //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n"; + + // Check to see if we have something like this: + // store i32 123, i32* %P + // %A = bitcast i32* %P to i8* + // %B = gep i8* %A, i32 1 + // %C = load i8* %B + // + // We could do that by recognizing if the clobber instructions are obviously + // a common base + constant offset, and if the previous store (or memset) + // completely covers this load. This sort of thing can happen in bitfield + // access code. + if (StoreInst *DepSI = dyn_cast(Dep.getInst())) + if (const TargetData *TD = getAnalysisIfAvailable()) { + int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD); + if (Offset != -1) { + Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + L->getType(), L, *TD); + DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n' + << *AvailVal << '\n' << *L << "\n\n\n"); + + // Replace the load! + L->replaceAllUsesWith(AvailVal); + if (isa(AvailVal->getType())) + MD->invalidateCachedPointerInfo(AvailVal); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + } + DEBUG( // fast print dep, using operator<< on instruction would be too slow - DOUT << "GVN: load "; - WriteAsOperand(*DOUT.stream(), L); - Instruction *I = dep.getInst(); - DOUT << " is clobbered by " << *I; + errs() << "GVN: load "; + WriteAsOperand(errs(), L); + Instruction *I = Dep.getInst(); + errs() << " is clobbered by " << *I << '\n'; ); return false; } // If it is defined in another block, try harder. - if (dep.isNonLocal()) + if (Dep.isNonLocal()) return processNonLocalLoad(L, toErase); - Instruction *DepInst = dep.getInst(); + Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast(DepInst)) { - // Only forward substitute stores to loads of the same type. - // FIXME: Could do better! - if (DepSI->getPointerOperand()->getType() != pointer->getType()) - return false; + Value *StoredVal = DepSI->getOperand(0); + // The store and load are to a must-aliased pointer, but they may not + // actually have the same type. See if we know how to reuse the stored + // value (depending on its type). + const TargetData *TD = 0; + if (StoredVal->getType() != L->getType() && + (TD = getAnalysisIfAvailable())) { + StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), + L, *TD); + if (StoredVal == 0) + return false; + + DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + << '\n' << *L << "\n\n\n"); + } + // Remove it! - L->replaceAllUsesWith(DepSI->getOperand(0)); - if (isa(DepSI->getOperand(0)->getType())) - MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); + L->replaceAllUsesWith(StoredVal); + if (isa(StoredVal->getType())) + MD->invalidateCachedPointerInfo(StoredVal); toErase.push_back(L); NumGVNLoad++; return true; } if (LoadInst *DepLI = dyn_cast(DepInst)) { - // Only forward substitute stores to loads of the same type. - // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. - if (DepLI->getType() != L->getType()) - return false; + Value *AvailableVal = DepLI; + + // The loads are of a must-aliased pointer, but they may not actually have + // the same type. See if we know how to reuse the previously loaded value + // (depending on its type). + const TargetData *TD = 0; + if (DepLI->getType() != L->getType() && + (TD = getAnalysisIfAvailable())) { + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + if (AvailableVal == 0) + return false; + + DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + << "\n" << *L << "\n\n\n"); + } // Remove it! - L->replaceAllUsesWith(DepLI); + L->replaceAllUsesWith(AvailableVal); if (isa(DepLI->getType())) MD->invalidateCachedPointerInfo(DepLI); toErase.push_back(L); NumGVNLoad++; return true; } - + // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa(DepInst)) { - L->replaceAllUsesWith(Context->getUndef(L->getType())); + if (isa(DepInst) || isMalloc(DepInst)) { + L->replaceAllUsesWith(UndefValue::get(L->getType())); toErase.push_back(L); NumGVNLoad++; return true; @@ -1280,150 +1616,93 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { return false; } -Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { +Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { DenseMap::iterator I = localAvail.find(BB); if (I == localAvail.end()) return 0; - - ValueNumberScope* locals = I->second; - - while (locals) { - DenseMap::iterator I = locals->table.find(num); - if (I != locals->table.end()) + + ValueNumberScope *Locals = I->second; + while (Locals) { + DenseMap::iterator I = Locals->table.find(num); + if (I != Locals->table.end()) return I->second; - else - locals = locals->parent; + Locals = Locals->parent; } - + return 0; } -/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination -/// by inheritance from the dominator fails, see if we can perform phi -/// construction to eliminate the redundancy. -Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { - BasicBlock* BaseBlock = orig->getParent(); - - SmallPtrSet Visited; - SmallVector Stack; - Stack.push_back(BaseBlock); - - DenseMap Results; - - // Walk backwards through our predecessors, looking for instances of the - // value number we're looking for. Instances are recorded in the Results - // map, which is then used to perform phi construction. - while (!Stack.empty()) { - BasicBlock* Current = Stack.back(); - Stack.pop_back(); - - // If we've walked all the way to a proper dominator, then give up. Cases - // where the instance is in the dominator will have been caught by the fast - // path, and any cases that require phi construction further than this are - // probably not worth it anyways. Note that this is a SIGNIFICANT compile - // time improvement. - if (DT->properlyDominates(Current, orig->getParent())) return 0; - - DenseMap::iterator LA = - localAvail.find(Current); - if (LA == localAvail.end()) return 0; - DenseMap::iterator V = LA->second->table.find(valno); - - if (V != LA->second->table.end()) { - // Found an instance, record it. - Results.insert(std::make_pair(Current, V->second)); - continue; - } - - // If we reach the beginning of the function, then give up. - if (pred_begin(Current) == pred_end(Current)) - return 0; - - for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); - PI != PE; ++PI) - if (Visited.insert(*PI)) - Stack.push_back(*PI); - } - - // If we didn't find instances, give up. Otherwise, perform phi construction. - if (Results.size() == 0) - return 0; - else - return GetValueForBlock(BaseBlock, orig, Results, true); -} /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I, SmallVectorImpl &toErase) { - if (LoadInst* L = dyn_cast(I)) { - bool changed = processLoad(L, toErase); - - if (!changed) { - unsigned num = VN.lookup_or_add(L); - localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); + if (LoadInst *LI = dyn_cast(I)) { + bool Changed = processLoad(LI, toErase); + + if (!Changed) { + unsigned Num = VN.lookup_or_add(LI); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); } - - return changed; + + return Changed; } - - uint32_t nextNum = VN.getNextUnusedValueNumber(); - unsigned num = VN.lookup_or_add(I); - - if (BranchInst* BI = dyn_cast(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - + + uint32_t NextNum = VN.getNextUnusedValueNumber(); + unsigned Num = VN.lookup_or_add(I); + + if (BranchInst *BI = dyn_cast(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + if (!BI->isConditional() || isa(BI->getCondition())) return false; - - Value* branchCond = BI->getCondition(); - uint32_t condVN = VN.lookup_or_add(branchCond); - - BasicBlock* trueSucc = BI->getSuccessor(0); - BasicBlock* falseSucc = BI->getSuccessor(1); - - if (trueSucc->getSinglePredecessor()) - localAvail[trueSucc]->table[condVN] = Context->getConstantIntTrue(); - if (falseSucc->getSinglePredecessor()) - localAvail[falseSucc]->table[condVN] = Context->getConstantIntFalse(); + + Value *BranchCond = BI->getCondition(); + uint32_t CondVN = VN.lookup_or_add(BranchCond); + + BasicBlock *TrueSucc = BI->getSuccessor(0); + BasicBlock *FalseSucc = BI->getSuccessor(1); + + if (TrueSucc->getSinglePredecessor()) + localAvail[TrueSucc]->table[CondVN] = + ConstantInt::getTrue(TrueSucc->getContext()); + if (FalseSucc->getSinglePredecessor()) + localAvail[FalseSucc]->table[CondVN] = + ConstantInt::getFalse(TrueSucc->getContext()); return false; - + // Allocations are always uniquely numbered, so we can save time and memory - // by fast failing them. + // by fast failing them. } else if (isa(I) || isa(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); return false; } - + // Collapse PHI nodes if (PHINode* p = dyn_cast(I)) { - Value* constVal = CollapsePhi(p); - + Value *constVal = CollapsePhi(p); + if (constVal) { - for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); - PI != PE; ++PI) - PI->second.erase(p); - p->replaceAllUsesWith(constVal); if (isa(constVal->getType())) MD->invalidateCachedPointerInfo(constVal); VN.erase(p); - + toErase.push_back(p); } else { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); } - + // If the number we were assigned was a brand new VN, then we don't // need to do a lookup to see if the number already exists // somewhere in the domtree: it can't! - } else if (num == nextNum) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - + } else if (Num == NextNum) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + // Perform fast-path value-number based elimination of values inherited from // dominators. - } else if (Value* repl = lookupNumber(I->getParent(), num)) { + } else if (Value *repl = lookupNumber(I->getParent(), Num)) { // Remove it! VN.erase(I); I->replaceAllUsesWith(repl); @@ -1432,21 +1711,10 @@ bool GVN::processInstruction(Instruction *I, toErase.push_back(I); return true; -#if 0 - // Perform slow-pathvalue-number based elimination with phi construction. - } else if (Value* repl = AttemptRedundancyElimination(I, num)) { - // Remove it! - VN.erase(I); - I->replaceAllUsesWith(repl); - if (isa(repl->getType())) - MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); - return true; -#endif } else { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); } - + return false; } @@ -1457,35 +1725,35 @@ bool GVN::runOnFunction(Function& F) { VN.setAliasAnalysis(&getAnalysis()); VN.setMemDep(MD); VN.setDomTree(DT); - - bool changed = false; - bool shouldContinue = true; - + + bool Changed = false; + bool ShouldContinue = true; + // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { - BasicBlock* BB = FI; + BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); if (removedBlock) NumGVNBlocks++; - - changed |= removedBlock; + + Changed |= removedBlock; } - + unsigned Iteration = 0; - - while (shouldContinue) { - DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); - shouldContinue = iterateOnFunction(F); - changed |= shouldContinue; + + while (ShouldContinue) { + DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); + ShouldContinue = iterateOnFunction(F); + Changed |= ShouldContinue; ++Iteration; } - + if (EnablePRE) { bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); - changed |= PREChanged; + Changed |= PREChanged; } } // FIXME: Should perform GVN again after PRE does something. PRE can move @@ -1495,27 +1763,27 @@ bool GVN::runOnFunction(Function& F) { cleanupGlobalSets(); - return changed; + return Changed; } -bool GVN::processBlock(BasicBlock* BB) { +bool GVN::processBlock(BasicBlock *BB) { // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and // incrementing BI before processing an instruction). SmallVector toErase; - bool changed_function = false; - + bool ChangedFunction = false; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - changed_function |= processInstruction(BI, toErase); + ChangedFunction |= processInstruction(BI, toErase); if (toErase.empty()) { ++BI; continue; } - + // If we need some instructions deleted, do it now. NumGVNInstr += toErase.size(); - + // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) @@ -1523,7 +1791,7 @@ bool GVN::processBlock(BasicBlock* BB) { for (SmallVector::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) { - DEBUG(cerr << "GVN removed: " << **I); + DEBUG(errs() << "GVN removed: " << **I << '\n'); MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); @@ -1535,8 +1803,8 @@ bool GVN::processBlock(BasicBlock* BB) { else ++BI; } - - return changed_function; + + return ChangedFunction; } /// performPRE - Perform a purely local form of PRE that looks for diamond @@ -1547,32 +1815,33 @@ bool GVN::performPRE(Function& F) { DenseMap predMap; for (df_iterator DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock* CurrentBlock = *DI; - + BasicBlock *CurrentBlock = *DI; + // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; - + for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; - if (isa(CurInst) || isa(CurInst) || - isa(CurInst) || (CurInst->getType() == Type::VoidTy) || + if (isa(CurInst) || + isa(CurInst) || isa(CurInst) || + CurInst->getType()->isVoidTy() || CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa(CurInst)) continue; - uint32_t valno = VN.lookup(CurInst); - + uint32_t ValNo = VN.lookup(CurInst); + // Look for the predecessors for PRE opportunities. We're // only trying to solve the basic diamond case, where // a value is computed in the successor and one predecessor, // but not the other. We also explicitly disallow cases // where the successor is its own predecessor, because they're // more complicated to get right. - unsigned numWith = 0; - unsigned numWithout = 0; - BasicBlock* PREPred = 0; + unsigned NumWith = 0; + unsigned NumWithout = 0; + BasicBlock *PREPred = 0; predMap.clear(); for (pred_iterator PI = pred_begin(CurrentBlock), @@ -1581,59 +1850,59 @@ bool GVN::performPRE(Function& F) { // own predecessor, on in blocks with predecessors // that are not reachable. if (*PI == CurrentBlock) { - numWithout = 2; + NumWithout = 2; break; } else if (!localAvail.count(*PI)) { - numWithout = 2; + NumWithout = 2; break; } - - DenseMap::iterator predV = - localAvail[*PI]->table.find(valno); + + DenseMap::iterator predV = + localAvail[*PI]->table.find(ValNo); if (predV == localAvail[*PI]->table.end()) { PREPred = *PI; - numWithout++; + NumWithout++; } else if (predV->second == CurInst) { - numWithout = 2; + NumWithout = 2; } else { predMap[*PI] = predV->second; - numWith++; + NumWith++; } } - + // Don't do PRE when it might increase code size, i.e. when // we would need to insert instructions in more than one pred. - if (numWithout != 1 || numWith == 0) + if (NumWithout != 1 || NumWith == 0) continue; - + // We can't do PRE safely on a critical edge, so instead we schedule // the edge to be split and perform the PRE the next time we iterate // on the function. - unsigned succNum = 0; + unsigned SuccNum = 0; for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); i != e; ++i) if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { - succNum = i; + SuccNum = i; break; } - - if (isCriticalEdge(PREPred->getTerminator(), succNum)) { - toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); + + if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { + toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); continue; } - + // Instantiate the expression the in predecessor that lacked it. // Because we are going top-down through the block, all value numbers // will be available in the predecessor by the time we need them. Any // that weren't original present will have been instantiated earlier // in this loop. - Instruction* PREInstr = CurInst->clone(); + Instruction *PREInstr = CurInst->clone(); bool success = true; for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { Value *Op = PREInstr->getOperand(i); if (isa(Op) || isa(Op) || isa(Op)) continue; - + if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { PREInstr->setOperand(i, V); } else { @@ -1641,25 +1910,25 @@ bool GVN::performPRE(Function& F) { break; } } - + // Fail out if we encounter an operand that is not available in - // the PRE predecessor. This is typically because of loads which + // the PRE predecessor. This is typically because of loads which // are not value numbered precisely. if (!success) { delete PREInstr; DEBUG(verifyRemoved(PREInstr)); continue; } - + PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; - VN.add(PREInstr, valno); + VN.add(PREInstr, ValNo); NumGVNPRE++; - + // Update the availability map to include the new instruction. - localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); - + localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); + // Create a PHI to make the value available in this block. PHINode* Phi = PHINode::Create(CurInst->getType(), CurInst->getName() + ".pre-phi", @@ -1667,27 +1936,27 @@ bool GVN::performPRE(Function& F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) Phi->addIncoming(predMap[*PI], *PI); - - VN.add(Phi, valno); - localAvail[CurrentBlock]->table[valno] = Phi; - + + VN.add(Phi, ValNo); + localAvail[CurrentBlock]->table[ValNo] = Phi; + CurInst->replaceAllUsesWith(Phi); if (isa(Phi->getType())) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); - - DEBUG(cerr << "GVN PRE removed: " << *CurInst); + + DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); MD->removeInstruction(CurInst); CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); Changed = true; } } - + for (SmallVector, 4>::iterator I = toSplit.begin(), E = toSplit.end(); I != E; ++I) SplitCriticalEdge(I->first, I->second, this); - + return Changed || toSplit.size(); } @@ -1705,25 +1974,24 @@ bool GVN::iterateOnFunction(Function &F) { } // Top-down walk of the dominator tree - bool changed = false; + bool Changed = false; #if 0 // Needed for value numbering with phi construction to work. ReversePostOrderTraversal RPOT(&F); for (ReversePostOrderTraversal::rpo_iterator RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) - changed |= processBlock(*RI); + Changed |= processBlock(*RI); #else for (df_iterator DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) - changed |= processBlock(DI->getBlock()); + Changed |= processBlock(DI->getBlock()); #endif - return changed; + return Changed; } void GVN::cleanupGlobalSets() { VN.clear(); - phiMap.clear(); for (DenseMap::iterator I = localAvail.begin(), E = localAvail.end(); I != E; ++I) @@ -1736,18 +2004,6 @@ void GVN::cleanupGlobalSets() { void GVN::verifyRemoved(const Instruction *Inst) const { VN.verifyRemoved(Inst); - // Walk through the PHI map to make sure the instruction isn't hiding in there - // somewhere. - for (PhiMapType::iterator - I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { - assert(I->first != Inst && "Inst is still a key in PHI map!"); - - for (SmallPtrSet::iterator - II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { - assert(*II != Inst && "Inst is still a value in PHI map!"); - } - } - // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. for (DenseMap::iterator diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 88cf60ecbaa88..e2d9e0b9ec4ac 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -51,11 +51,11 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -67,7 +67,7 @@ STATISTIC(NumReplaced, "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); namespace { - class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass { + class IndVarSimplify : public LoopPass { IVUsers *IU; LoopInfo *LI; ScalarEvolution *SE; @@ -75,30 +75,30 @@ namespace { bool Changed; public: - static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(&ID) {} - - virtual bool runOnLoop(Loop *L, LPPassManager &LPM); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequired(); - AU.addRequired(); - AU.addRequiredID(LCSSAID); - AU.addPreserved(); - AU.addPreservedID(LoopSimplifyID); - AU.addPreserved(); - AU.addPreservedID(LCSSAID); - AU.setPreservesCFG(); - } + static char ID; // Pass identification, replacement for typeid + IndVarSimplify() : LoopPass(&ID) {} + + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addPreserved(); + AU.addPreservedID(LoopSimplifyID); + AU.addPreservedID(LCSSAID); + AU.addPreserved(); + AU.setPreservesCFG(); + } private: void RewriteNonIntegerIVs(Loop *L); - ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV* BackedgeTakenCount, + ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, @@ -129,7 +129,7 @@ Pass *llvm::createIndVarSimplifyPass() { /// SCEV analysis can determine a loop-invariant trip count of the loop, which /// is actually a much broader range than just linear tests. ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, - const SCEV* BackedgeTakenCount, + const SCEV *BackedgeTakenCount, Value *IndVar, BasicBlock *ExitingBlock, BranchInst *BI, @@ -138,13 +138,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // against the preincremented value, otherwise we prefer to compare against // the post-incremented value. Value *CmpIndVar; - const SCEV* RHS = BackedgeTakenCount; + const SCEV *RHS = BackedgeTakenCount; if (ExitingBlock == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. - const SCEV* Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); - const SCEV* N = + const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); + const SCEV *N = SE->getAddExpr(BackedgeTakenCount, SE->getIntegerSCEV(1, BackedgeTakenCount->getType())); if ((isa(N) && !N->isZero()) || @@ -182,13 +182,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, else Opcode = ICmpInst::ICMP_EQ; - DOUT << "INDVARS: Rewriting loop exit condition to:\n" - << " LHS:" << *CmpIndVar // includes a newline - << " op:\t" - << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" - << " RHS:\t" << *RHS << "\n"; + DEBUG(errs() << "INDVARS: Rewriting loop exit condition to:\n" + << " LHS:" << *CmpIndVar << '\n' + << " op:\t" + << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *RHS << "\n"); - ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); + ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); Instruction *OrigCond = cast(BI->getCondition()); // It's tempting to use replaceAllUsesWith here to fully replace the old @@ -264,7 +264,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, // Okay, this instruction has a user outside of the current loop // and varies predictably *inside* the loop. Evaluate the value it // contains when the loop exits, if possible. - const SCEV* ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); + const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); if (!ExitValue->isLoopInvariant(L)) continue; @@ -273,25 +273,23 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); - DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal - << " LoopVal = " << *Inst << "\n"; + DEBUG(errs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' + << " LoopVal = " << *Inst << "\n"); PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. RecursivelyDeleteTriviallyDeadInstructions(Inst); - // If we're inserting code into the exit block rather than the - // preheader, we can (and have to) remove the PHI entirely. - // This is safe, because the NewVal won't be variant - // in the loop, so we don't need an LCSSA phi node anymore. - if (ExitBlocks.size() == 1) { + if (NumPreds == 1) { + // Completely replace a single-pred PHI. This is safe, because the + // NewVal won't be variant in the loop, so we don't need an LCSSA phi + // node anymore. PN->replaceAllUsesWith(ExitVal); RecursivelyDeleteTriviallyDeadInstructions(PN); - break; } } - if (ExitBlocks.size() != 1) { + if (NumPreds != 1) { // Clone the PHI and delete the original one. This lets IVUsers and // any other maps purge the original user from their records. PHINode *NewPN = PN->clone(); @@ -339,7 +337,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { RewriteNonIntegerIVs(L); BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null - const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE); @@ -367,14 +365,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { NeedCannIV = true; } for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV* Stride = IU->StrideOrder[i]; + const SCEV *Stride = IU->StrideOrder[i]; const Type *Ty = SE->getEffectiveSCEVType(Stride->getType()); if (!LargestType || SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) LargestType = Ty; - std::map::iterator SI = + std::map::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); @@ -403,7 +401,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumInserted; Changed = true; - DOUT << "INDVARS: New CanIV: " << *IndVar; + DEBUG(errs() << "INDVARS: New CanIV: " << *IndVar << '\n'); // Now that the official induction variable is established, reinsert // the old canonical-looking variable after it so that the IR remains @@ -458,9 +456,9 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // the need for the code evaluation methods to insert induction variables // of different sizes. for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - const SCEV* Stride = IU->StrideOrder[i]; + const SCEV *Stride = IU->StrideOrder[i]; - std::map::iterator SI = + std::map::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); ilist &List = SI->second->Users; @@ -471,7 +469,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, Instruction *User = UI->getUser(); // Compute the final addrec to expand into code. - const SCEV* AR = IU->getReplacementExpr(*UI); + const SCEV *AR = IU->getReplacementExpr(*UI); // FIXME: It is an extremely bad idea to indvar substitute anything more // complex than affine induction variables. Doing so will put expensive @@ -508,8 +506,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); - DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op - << " into = " << *NewVal << "\n"; + DEBUG(errs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); ++NumRemoved; Changed = true; @@ -546,8 +544,19 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { // New instructions were inserted at the end of the preheader. if (isa(I)) break; - if (I->isTrapping()) + // Don't move instructions which might have side effects, since the side + // effects need to complete before instructions inside the loop. Also + // don't move instructions which might read memory, since the loop may + // modify memory. Note that it's okay if the instruction might have + // undefined behavior: LoopSimplify guarantees that the preheader + // dominates the exit block. + if (I->mayHaveSideEffects() || I->mayReadFromMemory()) continue; + // Don't sink static AllocaInsts out of the entry block, which would + // turn them into dynamic allocas! + if (AllocaInst *AI = dyn_cast(I)) + if (AI->isStaticAlloca()) + continue; // Determine if there is a use in or before the loop (direct or // otherwise). bool UsedInLoop = false; @@ -630,7 +639,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { // Check incoming value. ConstantFP *InitValue = dyn_cast(PH->getIncomingValue(IncomingEdge)); if (!InitValue) return; - uint64_t newInitValue = Type::Int32Ty->getPrimitiveSizeInBits(); + uint64_t newInitValue = + Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(InitValue->getValueAPF(), &newInitValue)) return; @@ -646,7 +656,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { IncrVIndex = 0; IncrValue = dyn_cast(Incr->getOperand(IncrVIndex)); if (!IncrValue) return; - uint64_t newIncrValue = Type::Int32Ty->getPrimitiveSizeInBits(); + uint64_t newIncrValue = + Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue)) return; @@ -677,7 +688,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { EVIndex = 0; EV = dyn_cast(EC->getOperand(EVIndex)); if (!EV) return; - uint64_t intEV = Type::Int32Ty->getPrimitiveSizeInBits(); + uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits(); if (!convertToInt(EV->getValueAPF(), &intEV)) return; @@ -710,24 +721,26 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return; // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Type::Int32Ty, + PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()), PH->getName()+".int", PH); - NewPHI->addIncoming(Context->getConstantInt(Type::Int32Ty, newInitValue), + NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()), + newInitValue), PH->getIncomingBlock(IncomingEdge)); Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, - Context->getConstantInt(Type::Int32Ty, + ConstantInt::get(Type::getInt32Ty(PH->getContext()), newIncrValue), Incr->getName()+".int", Incr); NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge)); // The back edge is edge 1 of newPHI, whatever it may have been in the // original PHI. - ConstantInt *NewEV = Context->getConstantInt(Type::Int32Ty, intEV); + ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()), + intEV); Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV); Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1)); - ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(), - EC->getParent()->getTerminator()); + ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(), + NewPred, LHS, RHS, EC->getName()); // In the following deltions, PH may become dead and may be deleted. // Use a WeakVH to observe whether this happens. @@ -739,7 +752,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { RecursivelyDeleteTriviallyDeadInstructions(EC); // Delete old, floating point, increment instruction. - Incr->replaceAllUsesWith(Context->getUndef(Incr->getType())); + Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); RecursivelyDeleteTriviallyDeadInstructions(Incr); // Replace floating induction variable, if it isn't already deleted. diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 59fbd396a3a16..7c96c49a34b9b 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -40,7 +40,9 @@ #include "llvm/Pass.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/Operator.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -48,11 +50,13 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/InstVisitor.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -60,7 +64,6 @@ #include "llvm/ADT/STLExtras.h" #include #include -#include using namespace llvm; using namespace llvm::PatternMatch; @@ -71,29 +74,49 @@ STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumSunkInst , "Number of instructions sunk"); namespace { - class VISIBILITY_HIDDEN InstCombiner - : public FunctionPass, - public InstVisitor { - // Worklist of all of the instructions that need to be simplified. + /// InstCombineWorklist - This is the worklist management logic for + /// InstCombine. + class InstCombineWorklist { SmallVector Worklist; DenseMap WorklistMap; - TargetData *TD; - bool MustPreserveLCSSA; + + void operator=(const InstCombineWorklist&RHS); // DO NOT IMPLEMENT + InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT public: - static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(&ID) {} - - LLVMContext* getContext() { return Context; } - - /// AddToWorkList - Add the specified instruction to the worklist if it - /// isn't already in it. - void AddToWorkList(Instruction *I) { - if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) + InstCombineWorklist() {} + + bool isEmpty() const { return Worklist.empty(); } + + /// Add - Add the specified instruction to the worklist if it isn't already + /// in it. + void Add(Instruction *I) { + if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { + DEBUG(errs() << "IC: ADD: " << *I << '\n'); Worklist.push_back(I); + } + } + + void AddValue(Value *V) { + if (Instruction *I = dyn_cast(V)) + Add(I); } - // RemoveFromWorkList - remove I from the worklist if it exists. - void RemoveFromWorkList(Instruction *I) { + /// AddInitialGroup - Add the specified batch of stuff in reverse order. + /// which should only be done when the worklist is empty and when the group + /// has no duplicates. + void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { + assert(Worklist.empty() && "Worklist must be empty to add initial group"); + Worklist.reserve(NumEntries+16); + DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); + for (; NumEntries; --NumEntries) { + Instruction *I = List[NumEntries-1]; + WorklistMap.insert(std::make_pair(I, Worklist.size())); + Worklist.push_back(I); + } + } + + // Remove - remove I from the worklist if it exists. + void Remove(Instruction *I) { DenseMap::iterator It = WorklistMap.find(I); if (It == WorklistMap.end()) return; // Not in worklist. @@ -103,51 +126,74 @@ namespace { WorklistMap.erase(It); } - Instruction *RemoveOneFromWorkList() { + Instruction *RemoveOne() { Instruction *I = Worklist.back(); Worklist.pop_back(); WorklistMap.erase(I); return I; } - /// AddUsersToWorkList - When an instruction is simplified, add all users of /// the instruction to the work lists because they might get more simplified /// now. /// - void AddUsersToWorkList(Value &I) { + void AddUsersToWorkList(Instruction &I) { for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ++UI) - AddToWorkList(cast(*UI)); - } - - /// AddUsesToWorkList - When an instruction is simplified, add operands to - /// the work lists because they might get more simplified now. - /// - void AddUsesToWorkList(Instruction &I) { - for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) - if (Instruction *Op = dyn_cast(*i)) - AddToWorkList(Op); + Add(cast(*UI)); } - /// AddSoonDeadInstToWorklist - The specified instruction is about to become - /// dead. Add all of its operands to the worklist, turning them into - /// undef's to reduce the number of uses of those instructions. - /// - /// Return the specified operand before it is turned into an undef. - /// - Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) { - Value *R = I.getOperand(op); - - for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) - if (Instruction *Op = dyn_cast(*i)) { - AddToWorkList(Op); - // Set the operand to undef to drop the use. - *i = Context->getUndef(Op->getType()); - } + + /// Zap - check that the worklist is empty and nuke the backing store for + /// the map if it is large. + void Zap() { + assert(WorklistMap.empty() && "Worklist empty, but map not?"); - return R; + // Do an explicit clear, this shrinks the map if needed. + WorklistMap.clear(); } + }; +} // end anonymous namespace. + + +namespace { + /// InstCombineIRInserter - This is an IRBuilder insertion helper that works + /// just like the normal insertion helper, but also adds any new instructions + /// to the instcombine worklist. + class InstCombineIRInserter : public IRBuilderDefaultInserter { + InstCombineWorklist &Worklist; + public: + InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} + + void InsertHelper(Instruction *I, const Twine &Name, + BasicBlock *BB, BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt); + Worklist.Add(I); + } + }; +} // end anonymous namespace + + +namespace { + class InstCombiner : public FunctionPass, + public InstVisitor { + TargetData *TD; + bool MustPreserveLCSSA; + bool MadeIRChange; + public: + /// Worklist - All of the instructions that need to be simplified. + InstCombineWorklist Worklist; + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + typedef IRBuilder BuilderTy; + BuilderTy *Builder; + + static char ID; // Pass identification, replacement for typeid + InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} + + LLVMContext *Context; + LLVMContext *getContext() const { return Context; } public: virtual bool runOnFunction(Function &F); @@ -155,12 +201,11 @@ namespace { bool DoOneIteration(Function &F, unsigned ItNum); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } - TargetData &getTargetData() const { return *TD; } + TargetData *getTargetData() const { return TD; } // Visitation implementation - Implement instruction combining for different // instruction types. The semantics are as follows: @@ -187,8 +232,10 @@ namespace { Instruction *visitSDiv(BinaryOperator &I); Instruction *visitFDiv(BinaryOperator &I); Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); Instruction *visitAnd(BinaryOperator &I); Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); + Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS); Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C); Instruction *visitOr (BinaryOperator &I); @@ -208,7 +255,7 @@ namespace { Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); - Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS, + Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, BinaryOperator &I); @@ -269,30 +316,10 @@ namespace { "New instruction already inserted into a basic block!"); BasicBlock *BB = Old.getParent(); BB->getInstList().insert(&Old, New); // Insert inst - AddToWorkList(New); + Worklist.Add(New); return New; } - - /// InsertCastBefore - Insert a cast of V to TY before the instruction POS. - /// This also adds the cast to the worklist. Finally, this returns the - /// cast. - Value *InsertCastBefore(Instruction::CastOps opc, Value *V, const Type *Ty, - Instruction &Pos) { - if (V->getType() == Ty) return V; - - if (Constant *CV = dyn_cast(V)) - return Context->getConstantExprCast(opc, CV, Ty); - - Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos); - AddToWorkList(C); - return C; - } - Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) { - return InsertCastBefore(Instruction::BitCast, V, Ty, Pos); - } - - // ReplaceInstUsesWith - This method is to be used when an instruction is // found to be dead, replacable with another preexisting expression. Here // we add all uses of I to the worklist, replace all uses of I with the new @@ -300,16 +327,15 @@ namespace { // modified. // Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { - AddUsersToWorkList(I); // Add all modified instrs to worklist - if (&I != V) { - I.replaceAllUsesWith(V); - return &I; - } else { - // If we are replacing the instruction with itself, this must be in a - // segment of unreachable code, so just clobber the instruction. - I.replaceAllUsesWith(Context->getUndef(I.getType())); - return &I; - } + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. + + // If we are replacing the instruction with itself, this must be in a + // segment of unreachable code, so just clobber the instruction. + if (&I == V) + V = UndefValue::get(I.getType()); + + I.replaceAllUsesWith(V); + return &I; } // EraseInstFromFunction - When dealing with an instruction that has side @@ -317,10 +343,19 @@ namespace { // instruction. Instead, visit methods should return the value returned by // this function. Instruction *EraseInstFromFunction(Instruction &I) { + DEBUG(errs() << "IC: ERASE " << I << '\n'); + assert(I.use_empty() && "Cannot erase instruction that is used!"); - AddUsesToWorkList(I); - RemoveFromWorkList(&I); + // Make sure that we reprocess all operands now that we reduced their + // use counts. + if (I.getNumOperands() < 8) { + for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) + if (Instruction *Op = dyn_cast(*i)) + Worklist.Add(Op); + } + Worklist.Remove(&I); I.eraseFromParent(); + MadeIRChange = true; return 0; // Don't do anything with FI } @@ -364,10 +399,15 @@ namespace { Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt& UndefElts, unsigned Depth = 0); - // FoldOpIntoPhi - Given a binary operator or cast instruction which has a - // PHI node as operand #0, see if we can fold the instruction into the PHI - // (which is only possible if all operands to the PHI are constants). - Instruction *FoldOpIntoPhi(Instruction &I); + // FoldOpIntoPhi - Given a binary operator, cast instruction, or select + // which has a PHI node as operand #0, see if we can fold the instruction + // into the PHI (which is only possible if all operands to the PHI are + // constants). + // + // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms + // that would normally be unprofitable because they strongly encourage jump + // threading. + Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false); // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary" // operator and they all are only used by the PHI, PHI together their @@ -399,7 +439,7 @@ namespace { unsigned PrefAlign = 0); }; -} +} // end anonymous namespace char InstCombiner::ID = 0; static RegisterPass @@ -409,7 +449,8 @@ X("instcombine", "Combine redundant instructions"); // 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static unsigned getComplexity(Value *V) { if (isa(V)) { - if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || + if (BinaryOperator::isNeg(V) || + BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V)) return 3; return 4; @@ -429,7 +470,7 @@ static bool isOnlyUse(Value *V) { static const Type *getPromotedType(const Type *Ty) { if (const IntegerType* ITy = dyn_cast(Ty)) { if (ITy->getBitWidth() < 32) - return Type::Int32Ty; + return Type::getInt32Ty(Ty->getContext()); } return Ty; } @@ -438,29 +479,12 @@ static const Type *getPromotedType(const Type *Ty) { /// expression bitcast, or a GetElementPtrInst with all zero indices, return the /// operand value, otherwise return null. static Value *getBitCastOperand(Value *V) { - if (BitCastInst *I = dyn_cast(V)) - // BitCastInst? - return I->getOperand(0); - else if (GetElementPtrInst *GEP = dyn_cast(V)) { - // GetElementPtrInst? - if (GEP->hasAllZeroIndices()) - return GEP->getOperand(0); - } else if (ConstantExpr *CE = dyn_cast(V)) { - if (CE->getOpcode() == Instruction::BitCast) - // BitCast ConstantExp? - return CE->getOperand(0); - else if (CE->getOpcode() == Instruction::GetElementPtr) { - // GetElementPtr ConstantExp? - for (User::op_iterator I = CE->op_begin() + 1, E = CE->op_end(); - I != E; ++I) { - ConstantInt *CI = dyn_cast(I); - if (!CI || !CI->isZero()) - // Any non-zero indices? Not cast-like. - return 0; - } - // All-zero indices? This is just like casting. - return CE->getOperand(0); - } + if (Operator *O = dyn_cast(V)) { + if (O->getOpcode() == Instruction::BitCast) + return O->getOperand(0); + if (GEPOperator *GEP = dyn_cast(V)) + if (GEP->hasAllZeroIndices()) + return GEP->getPointerOperand(); } return 0; } @@ -474,7 +498,7 @@ isEliminableCastPair( const Type *DstTy, ///< The target type for the second cast instruction TargetData *TD ///< The target data for pointer size ) { - + const Type *SrcTy = CI->getOperand(0)->getType(); // A from above const Type *MidTy = CI->getType(); // B from above @@ -483,12 +507,15 @@ isEliminableCastPair( Instruction::CastOps secondOp = Instruction::CastOps(opcode); unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, - DstTy, TD->getIntPtrType()); + DstTy, + TD ? TD->getIntPtrType(CI->getContext()) : 0); // We don't want to form an inttoptr or ptrtoint that converts to an integer // type that differs from the pointer size. - if ((Res == Instruction::IntToPtr && SrcTy != TD->getIntPtrType()) || - (Res == Instruction::PtrToInt && DstTy != TD->getIntPtrType())) + if ((Res == Instruction::IntToPtr && + (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || + (Res == Instruction::PtrToInt && + (!TD || DstTy != TD->getIntPtrType(CI->getContext())))) Res = 0; return Instruction::CastOps(Res); @@ -503,7 +530,7 @@ static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V, // If this is another cast that can be eliminated, it isn't codegen either. if (const CastInst *CI = dyn_cast(V)) - if (isEliminableCastPair(CI, opcode, Ty, TD)) + if (isEliminableCastPair(CI, opcode, Ty, TD)) return false; return true; } @@ -528,7 +555,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { if (BinaryOperator *Op = dyn_cast(I.getOperand(0))) if (Op->getOpcode() == Opcode && isa(Op->getOperand(1))) { if (isa(I.getOperand(1))) { - Constant *Folded = Context->getConstantExpr(I.getOpcode(), + Constant *Folded = ConstantExpr::get(I.getOpcode(), cast(I.getOperand(1)), cast(Op->getOperand(1))); I.setOperand(0, Op->getOperand(0)); @@ -541,11 +568,11 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) { Constant *C2 = cast(Op1->getOperand(1)); // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) - Constant *Folded = Context->getConstantExpr(I.getOpcode(), C1, C2); + Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2); Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0), Op1->getOperand(0), Op1->getName(), &I); - AddToWorkList(New); + Worklist.Add(New); I.setOperand(0, New); I.setOperand(1, Folded); return true; @@ -568,17 +595,17 @@ bool InstCombiner::SimplifyCompare(CmpInst &I) { // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction // if the LHS is a constant zero (which is the 'negate' form). // -static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castNegVal(Value *V) { if (BinaryOperator::isNeg(V)) return BinaryOperator::getNegArgument(V); // Constants can be considered to be negated values if they can be folded. if (ConstantInt *C = dyn_cast(V)) - return Context->getConstantExprNeg(C); + return ConstantExpr::getNeg(C); if (ConstantVector *C = dyn_cast(V)) if (C->getType()->getElementType()->isInteger()) - return Context->getConstantExprNeg(C); + return ConstantExpr::getNeg(C); return 0; } @@ -587,28 +614,28 @@ static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) { // instruction if the LHS is a constant negative zero (which is the 'negate' // form). // -static inline Value *dyn_castFNegVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castFNegVal(Value *V) { if (BinaryOperator::isFNeg(V)) return BinaryOperator::getFNegArgument(V); // Constants can be considered to be negated values if they can be folded. if (ConstantFP *C = dyn_cast(V)) - return Context->getConstantExprFNeg(C); + return ConstantExpr::getFNeg(C); if (ConstantVector *C = dyn_cast(V)) if (C->getType()->getElementType()->isFloatingPoint()) - return Context->getConstantExprFNeg(C); + return ConstantExpr::getFNeg(C); return 0; } -static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castNotVal(Value *V) { if (BinaryOperator::isNot(V)) return BinaryOperator::getNotArgument(V); // Constants can be considered to be not'ed values... if (ConstantInt *C = dyn_cast(V)) - return Context->getConstantInt(~C->getValue()); + return ConstantInt::get(C->getType(), ~C->getValue()); return 0; } @@ -617,8 +644,7 @@ static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) { // non-constant operand of the multiply, and set CST to point to the multiplier. // Otherwise, return null. // -static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST, - LLVMContext* Context) { +static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { if (V->hasOneUse() && V->getType()->isInteger()) if (Instruction *I = dyn_cast(V)) { if (I->getOpcode() == Instruction::Mul) @@ -629,48 +655,27 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST, // The multiplier is really 1 << CST. uint32_t BitWidth = cast(V->getType())->getBitWidth(); uint32_t CSTVal = CST->getLimitedValue(BitWidth); - CST = Context->getConstantInt(APInt(BitWidth, 1).shl(CSTVal)); + CST = ConstantInt::get(V->getType()->getContext(), + APInt(BitWidth, 1).shl(CSTVal)); return I->getOperand(0); } } return 0; } -/// dyn_castGetElementPtr - If this is a getelementptr instruction or constant -/// expression, return it. -static User *dyn_castGetElementPtr(Value *V) { - if (isa(V)) return cast(V); - if (ConstantExpr *CE = dyn_cast(V)) - if (CE->getOpcode() == Instruction::GetElementPtr) - return cast(V); - return false; -} - -/// getOpcode - If this is an Instruction or a ConstantExpr, return the -/// opcode value. Otherwise return UserOp1. -static unsigned getOpcode(const Value *V) { - if (const Instruction *I = dyn_cast(V)) - return I->getOpcode(); - if (const ConstantExpr *CE = dyn_cast(V)) - return CE->getOpcode(); - // Use UserOp1 to mean there's no opcode. - return Instruction::UserOp1; -} - /// AddOne - Add one to a ConstantInt -static Constant *AddOne(Constant *C, LLVMContext* Context) { - return Context->getConstantExprAdd(C, - Context->getConstantInt(C->getType(), 1)); +static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, + ConstantInt::get(C->getType(), 1)); } /// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C, LLVMContext* Context) { - return Context->getConstantExprSub(C, - Context->getConstantInt(C->getType(), 1)); +static Constant *SubOne(ConstantInt *C) { + return ConstantExpr::getSub(C, + ConstantInt::get(C->getType(), 1)); } /// MultiplyOverflows - True if the multiply can not be expressed in an int /// this size. -static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign, - LLVMContext* Context) { +static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) { uint32_t W = C1->getBitWidth(); APInt LHSExt = C1->getValue(), RHSExt = C2->getValue(); if (sign) { @@ -697,7 +702,7 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign, /// are any bits set in the constant that are not demanded. If so, shrink the /// constant and return true. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded, LLVMContext* Context) { + APInt Demanded) { assert(I && "No instruction?"); assert(OpNo < I->getNumOperands() && "Operand index too large"); @@ -712,7 +717,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, // This instruction is producing bits that are not demanded. Shrink the RHS. Demanded &= OpC->getValue(); - I->setOperand(OpNo, Context->getConstantInt(Demanded)); + I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded)); return true; } @@ -784,7 +789,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth); if (NewVal == 0) return false; - U.set(NewVal); + U = NewVal; return true; } @@ -844,7 +849,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (DemandedMask == 0) { // Not demanding any bits from V. if (isa(V)) return 0; - return Context->getUndef(VTy); + return UndefValue::get(VTy); } if (Depth == 6) // Limit search depth. @@ -886,7 +891,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits in the inputs are known zeros, return zero. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Context->getNullValue(VTy); + return Constant::getNullValue(VTy); } else if (I->getOpcode() == Instruction::Or) { // We can simplify (X|Y) -> X or Y in the user's context if we know that @@ -955,10 +960,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If all of the demanded bits in the inputs are known zeros, return zero. if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) - return Context->getNullValue(VTy); + return Constant::getNullValue(VTy); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero, Context)) + if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero)) return I; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -995,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(I, 1, DemandedMask, Context)) + if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1030,7 +1035,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { - Instruction *Or = + Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); return InsertNewInstBefore(Or, *I); @@ -1043,7 +1048,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { // all known if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Context->getConstantInt(~RHSKnownOne & DemandedMask); + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnownOne & DemandedMask); Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); return InsertNewInstBefore(And, *I); @@ -1052,9 +1058,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the RHS is a constant, see if we can simplify it. // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. - if (ShrinkDemandedConstant(I, 1, DemandedMask, Context)) + if (ShrinkDemandedConstant(I, 1, DemandedMask)) return I; + // If our LHS is an 'and' and if it has one use, and if any of the bits we + // are flipping are known to be set, then the xor is just resetting those + // bits to zero. We can just knock out bits from the 'and' and the 'xor', + // simplifying both of them. + if (Instruction *LHSInst = dyn_cast(I->getOperand(0))) + if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && + isa(I->getOperand(1)) && + isa(LHSInst->getOperand(1)) && + (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { + ConstantInt *AndRHS = cast(LHSInst->getOperand(1)); + ConstantInt *XorRHS = cast(I->getOperand(1)); + APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); + + Constant *AndC = + ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); + Instruction *NewAnd = + BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); + InsertNewInstBefore(NewAnd, *I); + + Constant *XorC = + ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); + Instruction *NewXor = + BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); + return InsertNewInstBefore(NewXor, *I); + } + + RHSKnownZero = KnownZeroOut; RHSKnownOne = KnownOneOut; break; @@ -1069,8 +1102,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(I, 1, DemandedMask, Context) || - ShrinkDemandedConstant(I, 2, DemandedMask, Context)) + if (ShrinkDemandedConstant(I, 1, DemandedMask) || + ShrinkDemandedConstant(I, 2, DemandedMask)) return I; // Only known if known in both the LHS and RHS. @@ -1194,7 +1227,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the RHS of the add has bits set that can't affect the input, reduce // the constant. - if (ShrinkDemandedConstant(I, 1, InDemandedBits, Context)) + if (ShrinkDemandedConstant(I, 1, InDemandedBits)) return I; // Avoid excess work. @@ -1415,10 +1448,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *NewVal; if (InputBit > ResultBit) NewVal = BinaryOperator::CreateLShr(I->getOperand(1), - Context->getConstantInt(I->getType(), InputBit-ResultBit)); + ConstantInt::get(I->getType(), InputBit-ResultBit)); else NewVal = BinaryOperator::CreateShl(I->getOperand(1), - Context->getConstantInt(I->getType(), ResultBit-InputBit)); + ConstantInt::get(I->getType(), ResultBit-InputBit)); NewVal->takeName(I); return InsertNewInstBefore(NewVal, *I); } @@ -1434,12 +1467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - Constant *C = Context->getConstantInt(RHSKnownOne); - if (isa(V->getType())) - C = Context->getConstantExprIntToPtr(C, V->getType()); - return C; - } + if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) + return Constant::getIntegerValue(VTy, RHSKnownOne); return false; } @@ -1465,13 +1494,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, return 0; } else if (DemandedElts == 0) { // If nothing is demanded, provide undef. UndefElts = EltMask; - return Context->getUndef(V->getType()); + return UndefValue::get(V->getType()); } UndefElts = 0; if (ConstantVector *CP = dyn_cast(V)) { const Type *EltTy = cast(V->getType())->getElementType(); - Constant *Undef = Context->getUndef(EltTy); + Constant *Undef = UndefValue::get(EltTy); std::vector Elts; for (unsigned i = 0; i != VWidth; ++i) @@ -1486,7 +1515,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } // If we changed the constant, return it. - Constant *NewCP = Context->getConstantVector(Elts); + Constant *NewCP = ConstantVector::get(Elts); return NewCP != CP ? NewCP : 0; } else if (isa(V)) { // Simplify the CAZ to a ConstantVector where the non-demanded elements are @@ -1498,15 +1527,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, return 0; const Type *EltTy = cast(V->getType())->getElementType(); - Constant *Zero = Context->getNullValue(EltTy); - Constant *Undef = Context->getUndef(EltTy); + Constant *Zero = Constant::getNullValue(EltTy); + Constant *Undef = UndefValue::get(EltTy); std::vector Elts; for (unsigned i = 0; i != VWidth; ++i) { Constant *Elt = DemandedElts[i] ? Zero : Undef; Elts.push_back(Elt); } UndefElts = DemandedElts ^ EltMask; - return Context->getConstantVector(Elts); + return ConstantVector::get(Elts); } // Limit search depth. @@ -1553,8 +1582,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If this is inserting an element that isn't demanded, remove this // insertelement. unsigned IdxNo = Idx->getZExtValue(); - if (IdxNo >= VWidth || !DemandedElts[IdxNo]) - return AddSoonDeadInstToWorklist(*I, 0); + if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { + Worklist.Add(I); + return I->getOperand(0); + } // Otherwise, the element inserted overwrites whatever was there, so the // input demanded set is simpler than the output set. @@ -1620,12 +1651,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, std::vector Elts; for (unsigned i = 0; i < VWidth; ++i) { if (UndefElts[i]) - Elts.push_back(Context->getUndef(Type::Int32Ty)); + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); else - Elts.push_back(Context->getConstantInt(Type::Int32Ty, + Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Shuffle->getMaskValue(i))); } - I->setOperand(2, Context->getConstantVector(Elts)); + I->setOperand(2, ConstantVector::get(Elts)); MadeChange = true; } break; @@ -1678,7 +1709,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts = UndefElts2; if (VWidth > InVWidth) { - assert(0 && "Unimp"); + llvm_unreachable("Unimp"); // If there are more elements in the result than there are in the source, // then an output element is undef if the corresponding input element is // undef. @@ -1686,7 +1717,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (UndefElts2[OutIdx/Ratio]) UndefElts.set(OutIdx); } else if (VWidth < InVWidth) { - assert(0 && "Unimp"); + llvm_unreachable("Unimp"); // If there are more elements in the source than there are in the result, // then a result element is undef if all of the corresponding input // elements are undef. @@ -1752,11 +1783,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Value *LHS = II->getOperand(1); Value *RHS = II->getOperand(2); // Extract the element as scalars. - LHS = InsertNewInstBefore(new ExtractElementInst(LHS, 0U,"tmp"), *II); - RHS = InsertNewInstBefore(new ExtractElementInst(RHS, 0U,"tmp"), *II); + LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, + ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); + RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, + ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); switch (II->getIntrinsicID()) { - default: assert(0 && "Case stmts out of sync!"); + default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse2_sub_sd: TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, @@ -1771,9 +1804,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Instruction *New = InsertElementInst::Create( - Context->getUndef(II->getType()), TmpV, 0U, II->getName()); + UndefValue::get(II->getType()), TmpV, + ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName()); InsertNewInstBefore(New, *II); - AddSoonDeadInstToWorklist(*II, 0); return New; } } @@ -1799,8 +1832,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, /// 'shouldApply' and 'apply' methods. /// template -static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F, - LLVMContext* Context) { +static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) { unsigned Opcode = Root.getOpcode(); Value *LHS = Root.getOperand(0); @@ -1833,7 +1865,7 @@ static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F, // Make what used to be the LHS of the root be the user of the root... Value *ExtraOperand = TmpLHSI->getOperand(1); if (&Root == TmpLHSI) { - Root.replaceAllUsesWith(Context->getNullValue(TmpLHSI->getType())); + Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType())); return 0; } Root.replaceAllUsesWith(TmpLHSI); // Users now use TmpLHSI @@ -1872,12 +1904,11 @@ namespace { // AddRHS - Implements: X + X --> X << 1 struct AddRHS { Value *RHS; - LLVMContext* Context; - AddRHS(Value *rhs, LLVMContext* C) : RHS(rhs), Context(C) {} + explicit AddRHS(Value *rhs) : RHS(rhs) {} bool shouldApply(Value *LHS) const { return LHS == RHS; } Instruction *apply(BinaryOperator &Add) const { return BinaryOperator::CreateShl(Add.getOperand(0), - Context->getConstantInt(Add.getType(), 1)); + ConstantInt::get(Add.getType(), 1)); } }; @@ -1885,12 +1916,11 @@ struct AddRHS { // iff C1&C2 == 0 struct AddMaskingAnd { Constant *C2; - LLVMContext* Context; - AddMaskingAnd(Constant *c, LLVMContext* C) : C2(c), Context(C) {} + explicit AddMaskingAnd(Constant *c) : C2(c) {} bool shouldApply(Value *LHS) const { ConstantInt *C1; return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && - Context->getConstantExprAnd(C1, C2)->isNullValue(); + ConstantExpr::getAnd(C1, C2)->isNullValue(); } Instruction *apply(BinaryOperator &Add) const { return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); @@ -1901,11 +1931,8 @@ struct AddMaskingAnd { static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner *IC) { - LLVMContext* Context = IC->getContext(); - - if (CastInst *CI = dyn_cast(&I)) { - return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I); - } + if (CastInst *CI = dyn_cast(&I)) + return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType()); // Figure out if the constant is the left or the right argument. bool ConstIsRHS = isa(I.getOperand(1)); @@ -1913,24 +1940,24 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, if (Constant *SOC = dyn_cast(SO)) { if (ConstIsRHS) - return Context->getConstantExpr(I.getOpcode(), SOC, ConstOperand); - return Context->getConstantExpr(I.getOpcode(), ConstOperand, SOC); + return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); + return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC); } Value *Op0 = SO, *Op1 = ConstOperand; if (!ConstIsRHS) std::swap(Op0, Op1); - Instruction *New; + if (BinaryOperator *BO = dyn_cast(&I)) - New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op"); - else if (CmpInst *CI = dyn_cast(&I)) - New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1, - SO->getName()+".cmp"); - else { - assert(0 && "Unknown binary instruction type!"); - abort(); - } - return IC->InsertNewInstBefore(New, I); + return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName()+".op"); + if (ICmpInst *CI = dyn_cast(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + if (FCmpInst *CI = dyn_cast(&I)) + return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, + SO->getName()+".cmp"); + llvm_unreachable("Unknown binary instruction type!"); } // FoldOpIntoSelect - Given an instruction with a select as one operand and a @@ -1946,7 +1973,7 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (isa(TV) || isa(FV)) { // Bool selects with constant operands can be folded to logical ops. - if (SI->getType() == Type::Int1Ty) return 0; + if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0; Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC); Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); @@ -1958,20 +1985,34 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI, } -/// FoldOpIntoPhi - Given a binary operator or cast instruction which has a PHI -/// node as operand #0, see if we can fold the instruction into the PHI (which -/// is only possible if all operands to the PHI are constants). -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { +/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which +/// has a PHI node as operand #0, see if we can fold the instruction into the +/// PHI (which is only possible if all operands to the PHI are constants). +/// +/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms +/// that would normally be unprofitable because they strongly encourage jump +/// threading. +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, + bool AllowAggressive) { + AllowAggressive = false; PHINode *PN = cast(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); - if (!PN->hasOneUse() || NumPHIValues == 0) return 0; - - // Check to see if all of the operands of the PHI are constants. If there is - // one non-constant value, remember the BB it is. If there is more than one - // or if *it* is a PHI, bail out. + if (NumPHIValues == 0 || + // We normally only transform phis with a single use, unless we're trying + // hard to make jump threading happen. + (!PN->hasOneUse() && !AllowAggressive)) + return 0; + + + // Check to see if all of the operands of the PHI are simple constants + // (constantint/constantfp/undef). If there is one non-constant value, + // remember the BB it is in. If there is more than one or if *it* is a PHI, + // bail out. We don't do arbitrary constant expressions here because moving + // their computation can be expensive without a cost model. BasicBlock *NonConstBB = 0; for (unsigned i = 0; i != NumPHIValues; ++i) - if (!isa(PN->getIncomingValue(i))) { + if (!isa(PN->getIncomingValue(i)) || + isa(PN->getIncomingValue(i))) { if (NonConstBB) return 0; // More than one non-const value. if (isa(PN->getIncomingValue(i))) return 0; // Itself a phi. NonConstBB = PN->getIncomingBlock(i); @@ -1986,7 +2027,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB) { + if (NonConstBB != 0 && !AllowAggressive) { BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); if (!BI || !BI->isUnconditional()) return 0; } @@ -1998,15 +2039,37 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { NewPN->takeName(PN); // Next, add all of the operands to the PHI. - if (I.getNumOperands() == 2) { + if (SelectInst *SI = dyn_cast(&I)) { + // We only currently try to fold the condition of a select when it is a phi, + // not the true/false values. + Value *TrueV = SI->getTrueValue(); + Value *FalseV = SI->getFalseValue(); + BasicBlock *PhiTransBB = PN->getParent(); + for (unsigned i = 0; i != NumPHIValues; ++i) { + BasicBlock *ThisBB = PN->getIncomingBlock(i); + Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); + Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); + Value *InV = 0; + if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { + InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; + } else { + assert(PN->getIncomingBlock(i) == NonConstBB); + InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, + FalseVInPred, + "phitmp", NonConstBB->getTerminator()); + Worklist.Add(cast(InV)); + } + NewPN->addIncoming(InV, ThisBB); + } + } else if (I.getNumOperands() == 2) { Constant *C = cast(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV = 0; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { if (CmpInst *CI = dyn_cast(&I)) - InV = Context->getConstantExprCompare(CI->getPredicate(), InC, C); + InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else - InV = Context->getConstantExpr(I.getOpcode(), InC, C); + InV = ConstantExpr::get(I.getOpcode(), InC, C); } else { assert(PN->getIncomingBlock(i) == NonConstBB); if (BinaryOperator *BO = dyn_cast(&I)) @@ -2014,14 +2077,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PN->getIncomingValue(i), C, "phitmp", NonConstBB->getTerminator()); else if (CmpInst *CI = dyn_cast(&I)) - InV = CmpInst::Create(CI->getOpcode(), + InV = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), PN->getIncomingValue(i), C, "phitmp", NonConstBB->getTerminator()); else - assert(0 && "Unknown binop!"); + llvm_unreachable("Unknown binop!"); - AddToWorkList(cast(InV)); + Worklist.Add(cast(InV)); } NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } @@ -2031,13 +2094,13 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InV; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) { - InV = Context->getConstantExprCast(CI->getOpcode(), InC, RetTy); + InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); } else { assert(PN->getIncomingBlock(i) == NonConstBB); InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i), I.getType(), "phitmp", NonConstBB->getTerminator()); - AddToWorkList(cast(InV)); + Worklist.Add(cast(InV)); } NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } @@ -2098,13 +2161,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - // zext(i1) - 1 -> select i1, 0, -1 + // zext(bool) + C -> bool ? C + 1 : C if (ZExtInst *ZI = dyn_cast(LHS)) - if (CI->isAllOnesValue() && - ZI->getOperand(0)->getType() == Type::Int1Ty) - return SelectInst::Create(ZI->getOperand(0), - Context->getNullValue(I.getType()), - Context->getConstantIntAllOnesValue(I.getType())); + if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) + return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); } if (isa(LHS)) @@ -2146,24 +2206,23 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { const Type *MiddleType = 0; switch (Size) { default: break; - case 32: MiddleType = Type::Int32Ty; break; - case 16: MiddleType = Type::Int16Ty; break; - case 8: MiddleType = Type::Int8Ty; break; + case 32: MiddleType = Type::getInt32Ty(*Context); break; + case 16: MiddleType = Type::getInt16Ty(*Context); break; + case 8: MiddleType = Type::getInt8Ty(*Context); break; } if (MiddleType) { - Instruction *NewTrunc = new TruncInst(XorLHS, MiddleType, "sext"); - InsertNewInstBefore(NewTrunc, I); + Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext"); return new SExtInst(NewTrunc, I.getType(), I.getName()); } } } - if (I.getType() == Type::Int1Ty) + if (I.getType() == Type::getInt1Ty(*Context)) return BinaryOperator::CreateXor(LHS, RHS); // X + X --> X << 1 if (I.getType()->isInteger()) { - if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS, Context), Context)) + if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS))) return Result; if (Instruction *RHSI = dyn_cast(RHS)) { @@ -2180,11 +2239,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // -A + B --> B - A // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castNegVal(LHS, Context)) { + if (Value *LHSV = dyn_castNegVal(LHS)) { if (LHS->getType()->isIntOrIntVector()) { - if (Value *RHSV = dyn_castNegVal(RHS, Context)) { - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum"); - InsertNewInstBefore(NewAdd, I); + if (Value *RHSV = dyn_castNegVal(RHS)) { + Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } } @@ -2194,34 +2252,34 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // A + -B --> A - B if (!isa(RHS)) - if (Value *V = dyn_castNegVal(RHS, Context)) + if (Value *V = dyn_castNegVal(RHS)) return BinaryOperator::CreateSub(LHS, V); ConstantInt *C2; - if (Value *X = dyn_castFoldableMul(LHS, C2, Context)) { + if (Value *X = dyn_castFoldableMul(LHS, C2)) { if (X == RHS) // X*C + X --> X * (C+1) - return BinaryOperator::CreateMul(RHS, AddOne(C2, Context)); + return BinaryOperator::CreateMul(RHS, AddOne(C2)); // X*C1 + X*C2 --> X * (C1+C2) ConstantInt *C1; - if (X == dyn_castFoldableMul(RHS, C1, Context)) - return BinaryOperator::CreateMul(X, Context->getConstantExprAdd(C1, C2)); + if (X == dyn_castFoldableMul(RHS, C1)) + return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); } // X + X*C --> X * (C+1) - if (dyn_castFoldableMul(RHS, C2, Context) == LHS) - return BinaryOperator::CreateMul(LHS, AddOne(C2, Context)); + if (dyn_castFoldableMul(RHS, C2) == LHS) + return BinaryOperator::CreateMul(LHS, AddOne(C2)); // X + ~X --> -1 since ~X = -X-1 - if (dyn_castNotVal(LHS, Context) == RHS || - dyn_castNotVal(RHS, Context) == LHS) - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + if (dyn_castNotVal(LHS) == RHS || + dyn_castNotVal(RHS) == LHS) + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0 if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) - if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2, Context), Context)) + if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2))) return R; // A+B --> A|B iff A and B have no bits set in common. @@ -2258,8 +2316,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } if (W == Y) { - Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z, - LHS->getName()), I); + Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); return BinaryOperator::CreateMul(W, NewAdd); } } @@ -2268,11 +2325,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ConstantInt *CRHS = dyn_cast(RHS)) { Value *X = 0; if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X - return BinaryOperator::CreateSub(SubOne(CRHS, Context), X); + return BinaryOperator::CreateSub(SubOne(CRHS), X); // (X & FF00) + xx00 -> (X+xx00) & FF00 - if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { - Constant *Anded = Context->getConstantExprAnd(CRHS, C2); + if (LHS->hasOneUse() && + match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { + Constant *Anded = ConstantExpr::getAnd(CRHS, C2); if (Anded == CRHS) { // See if all bits from the first bit set in the Add RHS up are included // in the mask. First, get the rightmost bit. @@ -2286,8 +2344,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (AddRHSHighBits == AddRHSHighBitsAnd) { // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS, - LHS->getName()), I); + Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); return BinaryOperator::CreateAnd(NewAdd, C2); } } @@ -2299,28 +2356,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return R; } - // add (cast *A to intptrtype) B -> - // cast (GEP (cast *A to i8*) B) --> intptrtype - { - CastInst *CI = dyn_cast(LHS); - Value *Other = RHS; - if (!CI) { - CI = dyn_cast(RHS); - Other = LHS; - } - if (CI && CI->getType()->isSized() && - (CI->getType()->getScalarSizeInBits() == - TD->getIntPtrType()->getPrimitiveSizeInBits()) - && isa(CI->getOperand(0)->getType())) { - unsigned AS = - cast(CI->getOperand(0)->getType())->getAddressSpace(); - Value *I2 = InsertBitCastBefore(CI->getOperand(0), - Context->getPointerType(Type::Int8Ty, AS), I); - I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I); - return new PtrToIntInst(I2, CI->getType()); - } - } - // add (select X 0 (sub n A)) A --> select X A n { SelectInst *SI = dyn_cast(LHS); @@ -2336,10 +2371,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Can we fold the add into the argument of the select? // We check both true and false select arguments for a matching subtract. - if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) + if (match(FV, m_Zero()) && + match(TV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the true select value. return SelectInst::Create(SI->getCondition(), N, A); - if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) + if (match(TV, m_Zero()) && + match(FV, m_Sub(m_Value(N), m_Specific(A)))) // Fold the add into the false select value. return SelectInst::Create(SI->getCondition(), A, N); } @@ -2351,14 +2388,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // (add (sext x), cst) --> (sext (add x, cst')) if (ConstantInt *RHSC = dyn_cast(RHS)) { Constant *CI = - Context->getConstantExprTrunc(RHSC, LHSConv->getOperand(0)->getType()); + ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && - Context->getConstantExprSExt(CI, I.getType()) == RHSC && + ConstantExpr::getSExt(CI, I.getType()) == RHSC && WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new, smaller add. - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), - CI, "addconv"); - InsertNewInstBefore(NewAdd, I); + Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), + CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -2373,10 +2409,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), - "addconv"); - InsertNewInstBefore(NewAdd, I); + Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -2392,7 +2426,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (Constant *RHSC = dyn_cast(RHS)) { // X + 0 --> X if (ConstantFP *CFP = dyn_cast(RHSC)) { - if (CFP->isExactlyValue(Context->getConstantFPNegativeZero + if (CFP->isExactlyValue(ConstantFP::getNegativeZero (I.getType())->getValueAPF())) return ReplaceInstUsesWith(I, LHS); } @@ -2404,12 +2438,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // -A + B --> B - A // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castFNegVal(LHS, Context)) + if (Value *LHSV = dyn_castFNegVal(LHS)) return BinaryOperator::CreateFSub(RHS, LHSV); // A + -B --> A - B if (!isa(RHS)) - if (Value *V = dyn_castFNegVal(RHS, Context)) + if (Value *V = dyn_castFNegVal(RHS)) return BinaryOperator::CreateFSub(LHS, V); // Check for X+0.0. Simplify it to X if we know X is not -0.0. @@ -2427,14 +2461,13 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { // instcombined. if (ConstantFP *CFP = dyn_cast(RHS)) { Constant *CI = - Context->getConstantExprFPToSI(CFP, LHSConv->getOperand(0)->getType()); + ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); if (LHSConv->hasOneUse() && - Context->getConstantExprSIToFP(CI, I.getType()) == CFP && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { // Insert the new integer add. - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), - CI, "addconv"); - InsertNewInstBefore(NewAdd, I); + Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), + CI, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -2449,10 +2482,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { WillNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0))) { // Insert the new integer add. - Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0), - RHSConv->getOperand(0), - "addconv"); - InsertNewInstBefore(NewAdd, I); + Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), + RHSConv->getOperand(0), "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -2465,10 +2496,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (Op0 == Op1) // sub X, X -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // If this is a 'B = x-(-A)', change to B = x+A... - if (Value *V = dyn_castNegVal(Op1, Context)) + if (Value *V = dyn_castNegVal(Op1)) return BinaryOperator::CreateAdd(Op0, V); if (isa(Op0)) @@ -2484,7 +2515,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // C - ~X == X + (1+C) Value *X = 0; if (match(Op1, m_Not(m_Value(X)))) - return BinaryOperator::CreateAdd(X, AddOne(C, Context)); + return BinaryOperator::CreateAdd(X, AddOne(C)); // -(X >>u 31) -> (X >>s 31) // -(X >>s 31) -> (X >>u 31) @@ -2519,22 +2550,29 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (SelectInst *SI = dyn_cast(Op1)) if (Instruction *R = FoldOpIntoSelect(I, SI, this)) return R; + + // C - zext(bool) -> bool ? C - 1 : C + if (ZExtInst *ZI = dyn_cast(Op1)) + if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) + return SelectInst::Create(ZI->getOperand(0), SubOne(C), C); } - if (I.getType() == Type::Int1Ty) + if (I.getType() == Type::getInt1Ty(*Context)) return BinaryOperator::CreateXor(Op0, Op1); if (BinaryOperator *Op1I = dyn_cast(Op1)) { if (Op1I->getOpcode() == Instruction::Add) { if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName()); + return BinaryOperator::CreateNeg(Op1I->getOperand(1), + I.getName()); else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName()); + return BinaryOperator::CreateNeg(Op1I->getOperand(0), + I.getName()); else if (ConstantInt *CI1 = dyn_cast(I.getOperand(0))) { if (ConstantInt *CI2 = dyn_cast(Op1I->getOperand(1))) // C1-(X+C2) --> (C1-C2)-X return BinaryOperator::CreateSub( - Context->getConstantExprSub(CI1, CI2), Op1I->getOperand(0)); + ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0)); } } @@ -2558,8 +2596,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) { Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); - Value *NewNot = - InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I); + Value *NewNot = Builder->CreateNot(OtherOp, "B.not"); return BinaryOperator::CreateAnd(Op0, NewNot); } @@ -2569,13 +2606,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (CSI->isZero()) if (Constant *DivRHS = dyn_cast(Op1I->getOperand(1))) return BinaryOperator::CreateSDiv(Op1I->getOperand(0), - Context->getConstantExprNeg(DivRHS)); + ConstantExpr::getNeg(DivRHS)); // X - X*C --> X * (1-C) ConstantInt *C2 = 0; - if (dyn_castFoldableMul(Op1I, C2, Context) == Op0) { + if (dyn_castFoldableMul(Op1I, C2) == Op0) { Constant *CP1 = - Context->getConstantExprSub(Context->getConstantInt(I.getType(), 1), + ConstantExpr::getSub(ConstantInt::get(I.getType(), 1), C2); return BinaryOperator::CreateMul(Op0, CP1); } @@ -2590,18 +2627,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, Op0I->getOperand(0)); } else if (Op0I->getOpcode() == Instruction::Sub) { if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName()); + return BinaryOperator::CreateNeg(Op0I->getOperand(1), + I.getName()); } } ConstantInt *C1; - if (Value *X = dyn_castFoldableMul(Op0, C1, Context)) { + if (Value *X = dyn_castFoldableMul(Op0, C1)) { if (X == Op1) // X*C - X --> X * (C-1) - return BinaryOperator::CreateMul(Op1, SubOne(C1, Context)); + return BinaryOperator::CreateMul(Op1, SubOne(C1)); ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) - if (X == dyn_castFoldableMul(Op1, C2, Context)) - return BinaryOperator::CreateMul(X, Context->getConstantExprSub(C1, C2)); + if (X == dyn_castFoldableMul(Op1, C2)) + return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); } return 0; } @@ -2610,15 +2648,17 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // If this is a 'B = x-(-A)', change to B = x+A... - if (Value *V = dyn_castFNegVal(Op1, Context)) + if (Value *V = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFAdd(Op0, V); if (BinaryOperator *Op1I = dyn_cast(Op1)) { if (Op1I->getOpcode() == Instruction::FAdd) { if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(1), I.getName()); + return BinaryOperator::CreateFNeg(Op1I->getOperand(1), + I.getName()); else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y - return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName()); + return BinaryOperator::CreateFNeg(Op1I->getOperand(0), + I.getName()); } } @@ -2657,26 +2697,24 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - // TODO: If Op1 is undef and Op0 is finite, return zero. - if (!I.getType()->isFPOrFPVector() && - isa(I.getOperand(1))) // undef * X -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + if (isa(Op1)) // undef * X -> 0 + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - // Simplify mul instructions with a constant RHS... - if (Constant *Op1 = dyn_cast(I.getOperand(1))) { - if (ConstantInt *CI = dyn_cast(Op1)) { + // Simplify mul instructions with a constant RHS. + if (Constant *Op1C = dyn_cast(Op1)) { + if (ConstantInt *CI = dyn_cast(Op1C)) { // ((X << C1)*C2) == (X * (C2 << C1)) if (BinaryOperator *SI = dyn_cast(Op0)) if (SI->getOpcode() == Instruction::Shl) if (Constant *ShOp = dyn_cast(SI->getOperand(1))) return BinaryOperator::CreateMul(SI->getOperand(0), - Context->getConstantExprShl(CI, ShOp)); + ConstantExpr::getShl(CI, ShOp)); if (CI->isZero()) - return ReplaceInstUsesWith(I, Op1); // X * 0 == 0 + return ReplaceInstUsesWith(I, Op1C); // X * 0 == 0 if (CI->equalsInt(1)) // X * 1 == X return ReplaceInstUsesWith(I, Op0); if (CI->isAllOnesValue()) // X * -1 == 0 - X @@ -2685,12 +2723,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { const APInt& Val = cast(CI)->getValue(); if (Val.isPowerOf2()) { // Replace X*(2^C) with X << C return BinaryOperator::CreateShl(Op0, - Context->getConstantInt(Op0->getType(), Val.logBase2())); + ConstantInt::get(Op0->getType(), Val.logBase2())); } - } else if (isa(Op1->getType())) { - // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros. + } else if (isa(Op1C->getType())) { + if (Op1C->isNullValue()) + return ReplaceInstUsesWith(I, Op1C); - if (ConstantVector *Op1V = dyn_cast(Op1)) { + if (ConstantVector *Op1V = dyn_cast(Op1C)) { if (Op1V->isAllOnesValue()) // X * -1 == 0 - X return BinaryOperator::CreateNeg(Op0, I.getName()); @@ -2705,13 +2744,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (BinaryOperator *Op0I = dyn_cast(Op0)) if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && - isa(Op0I->getOperand(1)) && isa(Op1)) { + isa(Op0I->getOperand(1)) && isa(Op1C)) { // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. - Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0), - Op1, "tmp"); - InsertNewInstBefore(Add, I); - Value *C1C2 = Context->getConstantExprMul(Op1, - cast(Op0I->getOperand(1))); + Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); + Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1)); return BinaryOperator::CreateAdd(Add, C1C2); } @@ -2726,93 +2762,80 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return NV; } - if (Value *Op0v = dyn_castNegVal(Op0, Context)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castNegVal(I.getOperand(1), Context)) + if (Value *Op0v = dyn_castNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castNegVal(Op1)) return BinaryOperator::CreateMul(Op0v, Op1v); // (X / Y) * Y = X - (X % Y) // (X / Y) * -Y = (X % Y) - X { - Value *Op1 = I.getOperand(1); + Value *Op1C = Op1; BinaryOperator *BO = dyn_cast(Op0); if (!BO || (BO->getOpcode() != Instruction::UDiv && BO->getOpcode() != Instruction::SDiv)) { - Op1 = Op0; - BO = dyn_cast(I.getOperand(1)); + Op1C = Op0; + BO = dyn_cast(Op1); } - Value *Neg = dyn_castNegVal(Op1, Context); + Value *Neg = dyn_castNegVal(Op1C); if (BO && BO->hasOneUse() && - (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) && + (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) && (BO->getOpcode() == Instruction::UDiv || BO->getOpcode() == Instruction::SDiv)) { Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); - Instruction *Rem; + // If the division is exact, X % Y is zero. + if (SDivOperator *SDiv = dyn_cast(BO)) + if (SDiv->isExact()) { + if (Op1BO == Op1C) + return ReplaceInstUsesWith(I, Op0BO); + return BinaryOperator::CreateNeg(Op0BO); + } + + Value *Rem; if (BO->getOpcode() == Instruction::UDiv) - Rem = BinaryOperator::CreateURem(Op0BO, Op1BO); + Rem = Builder->CreateURem(Op0BO, Op1BO); else - Rem = BinaryOperator::CreateSRem(Op0BO, Op1BO); - - InsertNewInstBefore(Rem, I); + Rem = Builder->CreateSRem(Op0BO, Op1BO); Rem->takeName(BO); - if (Op1BO == Op1) + if (Op1BO == Op1C) return BinaryOperator::CreateSub(Op0BO, Rem); - else - return BinaryOperator::CreateSub(Rem, Op0BO); + return BinaryOperator::CreateSub(Rem, Op0BO); } } - if (I.getType() == Type::Int1Ty) - return BinaryOperator::CreateAnd(Op0, I.getOperand(1)); + /// i1 mul -> i1 and. + if (I.getType() == Type::getInt1Ty(*Context)) + return BinaryOperator::CreateAnd(Op0, Op1); + // X*(1 << Y) --> X << Y + // (1 << Y)*X --> X << Y + { + Value *Y; + if (match(Op0, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op1, Y); + if (match(Op1, m_Shl(m_One(), m_Value(Y)))) + return BinaryOperator::CreateShl(Op0, Y); + } + // If one of the operands of the multiply is a cast from a boolean value, then // we know the bool is either zero or one, so this is a 'masking' multiply. - // See if we can simplify things based on how the boolean was originally - // formed. - CastInst *BoolCast = 0; - if (ZExtInst *CI = dyn_cast(Op0)) - if (CI->getOperand(0)->getType() == Type::Int1Ty) - BoolCast = CI; - if (!BoolCast) - if (ZExtInst *CI = dyn_cast(I.getOperand(1))) - if (CI->getOperand(0)->getType() == Type::Int1Ty) - BoolCast = CI; - if (BoolCast) { - if (ICmpInst *SCI = dyn_cast(BoolCast->getOperand(0))) { - Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1); - const Type *SCOpTy = SCIOp0->getType(); - bool TIS = false; - - // If the icmp is true iff the sign bit of X is set, then convert this - // multiply into a shift/and combination. - if (isa(SCIOp1) && - isSignBitCheck(SCI->getPredicate(), cast(SCIOp1), TIS) && - TIS) { - // Shift the X value right to turn it into "all signbits". - Constant *Amt = Context->getConstantInt(SCIOp0->getType(), - SCOpTy->getPrimitiveSizeInBits()-1); - Value *V = - InsertNewInstBefore( - BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt, - BoolCast->getOperand(0)->getName()+ - ".mask"), I); - - // If the multiply type is not the same as the source type, sign extend - // or truncate to the multiply type. - if (I.getType() != V->getType()) { - uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits(); - uint32_t DstBits = I.getType()->getPrimitiveSizeInBits(); - Instruction::CastOps opcode = - (SrcBits == DstBits ? Instruction::BitCast : - (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc)); - V = InsertCastBefore(opcode, V, I.getType(), I); - } + // X * Y (where Y is 0 or 1) -> X & (0-Y) + if (!isa(I.getType())) { + // -2 is "-1 << 1" so it is all bits set except the low one. + APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); + + Value *BoolCast = 0, *OtherOp = 0; + if (MaskedValueIsZero(Op0, Negative2)) + BoolCast = Op0, OtherOp = Op1; + else if (MaskedValueIsZero(Op1, Negative2)) + BoolCast = Op1, OtherOp = Op0; - Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0; - return BinaryOperator::CreateAnd(V, OtherOp); - } + if (BoolCast) { + Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + BoolCast, "tmp"); + return BinaryOperator::CreateAnd(V, OtherOp); } } @@ -2821,17 +2844,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); - Value *Op0 = I.getOperand(0); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Simplify mul instructions with a constant RHS... - if (Constant *Op1 = dyn_cast(I.getOperand(1))) { - if (ConstantFP *Op1F = dyn_cast(Op1)) { + if (Constant *Op1C = dyn_cast(Op1)) { + if (ConstantFP *Op1F = dyn_cast(Op1C)) { // "In IEEE floating point, x*1 is not equivalent to x for nans. However, // ANSI says we can drop signals, so we can do this anyway." (from GCC) if (Op1F->isExactlyValue(1.0)) return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' - } else if (isa(Op1->getType())) { - if (ConstantVector *Op1V = dyn_cast(Op1)) { + } else if (isa(Op1C->getType())) { + if (ConstantVector *Op1V = dyn_cast(Op1C)) { // As above, vector X*splat(1.0) -> X in all defined cases. if (Constant *Splat = Op1V->getSplatValue()) { if (ConstantFP *F = dyn_cast(Splat)) @@ -2851,8 +2874,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return NV; } - if (Value *Op0v = dyn_castFNegVal(Op0, Context)) // -X * -Y = X*Y - if (Value *Op1v = dyn_castFNegVal(I.getOperand(1), Context)) + if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castFNegVal(Op1)) return BinaryOperator::CreateFMul(Op0v, Op1v); return Changed ? &I : 0; @@ -2907,11 +2930,11 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { I != E; ++I) { if (*I == SI) { *I = SI->getOperand(NonNullOperand); - AddToWorkList(BBI); + Worklist.Add(BBI); } else if (*I == SelectCond) { - *I = NonNullOperand == 1 ? Context->getConstantIntTrue() : - Context->getConstantIntFalse(); - AddToWorkList(BBI); + *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) : + ConstantInt::getFalse(*Context); + Worklist.Add(BBI); } } @@ -2942,7 +2965,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) { if (isa(Op0)) { if (Op0->getType()->isFPOrFPVector()) return ReplaceInstUsesWith(I, Op0); - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } // X / undef -> undef @@ -2962,12 +2985,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { // (sdiv X, X) --> 1 (udiv X, X) --> 1 if (Op0 == Op1) { if (const VectorType *Ty = dyn_cast(I.getType())) { - Constant *CI = Context->getConstantInt(Ty->getElementType(), 1); + Constant *CI = ConstantInt::get(Ty->getElementType(), 1); std::vector Elts(Ty->getNumElements(), CI); - return ReplaceInstUsesWith(I, Context->getConstantVector(Elts)); + return ReplaceInstUsesWith(I, ConstantVector::get(Elts)); } - Constant *CI = Context->getConstantInt(I.getType(), 1); + Constant *CI = ConstantInt::get(I.getType(), 1); return ReplaceInstUsesWith(I, CI); } @@ -2989,11 +3012,11 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) if (ConstantInt *LHSRHS = dyn_cast(LHS->getOperand(1))) { if (MultiplyOverflows(RHS, LHSRHS, - I.getOpcode()==Instruction::SDiv, Context)) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + I.getOpcode()==Instruction::SDiv)) + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); else return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), - Context->getConstantExprMul(RHS, LHSRHS)); + ConstantExpr::getMul(RHS, LHSRHS)); } if (!RHS->isZero()) { // avoid X udiv 0 @@ -3009,10 +3032,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { // 0 / X == 0, we don't need to preserve faults! if (ConstantInt *LHS = dyn_cast(Op0)) if (LHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // It can't be division by zero, hence it must be division by one. - if (I.getType() == Type::Int1Ty) + if (I.getType() == Type::getInt1Ty(*Context)) return ReplaceInstUsesWith(I, Op0); if (ConstantVector *Op1V = dyn_cast(Op1)) { @@ -3038,14 +3061,13 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // if so, convert to a right shift. if (C->getValue().isPowerOf2()) // 0 not included in isPowerOf2 return BinaryOperator::CreateLShr(Op0, - Context->getConstantInt(Op0->getType(), C->getValue().logBase2())); + ConstantInt::get(Op0->getType(), C->getValue().logBase2())); // X udiv C, where C >= signbit if (C->getValue().isNegative()) { - Value *IC = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_ULT, Op0, C), - I); - return SelectInst::Create(IC, Context->getNullValue(I.getType()), - Context->getConstantInt(I.getType(), 1)); + Value *IC = Builder->CreateICmpULT( Op0, C); + return SelectInst::Create(IC, Constant::getNullValue(I.getType()), + ConstantInt::get(I.getType(), 1)); } } @@ -3057,10 +3079,8 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (C1.isPowerOf2()) { Value *N = RHSI->getOperand(1); const Type *NTy = N->getType(); - if (uint32_t C2 = C1.logBase2()) { - Constant *C2V = Context->getConstantInt(NTy, C2); - N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I); - } + if (uint32_t C2 = C1.logBase2()) + N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp"); return BinaryOperator::CreateLShr(Op0, N); } } @@ -3076,16 +3096,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Compute the shift amounts uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2(); // Construct the "on true" case of the select - Constant *TC = Context->getConstantInt(Op0->getType(), TSA); - Instruction *TSI = BinaryOperator::CreateLShr( - Op0, TC, SI->getName()+".t"); - TSI = InsertNewInstBefore(TSI, I); + Constant *TC = ConstantInt::get(Op0->getType(), TSA); + Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t"); // Construct the "on false" case of the select - Constant *FC = Context->getConstantInt(Op0->getType(), FSA); - Instruction *FSI = BinaryOperator::CreateLShr( - Op0, FC, SI->getName()+".f"); - FSI = InsertNewInstBefore(FSI, I); + Constant *FC = ConstantInt::get(Op0->getType(), FSA); + Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f"); // construct the select instruction and return it. return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); @@ -3105,17 +3121,45 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { // sdiv X, -1 == -X if (RHS->isAllOnesValue()) return BinaryOperator::CreateNeg(Op0); + + // sdiv X, C --> ashr X, log2(C) + if (cast(&I)->isExact() && + RHS->getValue().isNonNegative() && + RHS->getValue().isPowerOf2()) { + Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), + RHS->getValue().exactLogBase2()); + return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); + } + + // -X/C --> X/-C provided the negation doesn't overflow. + if (SubOperator *Sub = dyn_cast(Op0)) + if (isa(Sub->getOperand(0)) && + cast(Sub->getOperand(0))->isNullValue() && + Sub->hasNoSignedWrap()) + return BinaryOperator::CreateSDiv(Sub->getOperand(1), + ConstantExpr::getNeg(RHS)); } // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. if (I.getType()->isInteger()) { APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); - if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { - // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set - return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + if (MaskedValueIsZero(Op0, Mask)) { + if (MaskedValueIsZero(Op1, Mask)) { + // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } + ConstantInt *ShiftedInt; + if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && + ShiftedInt->getValue().isPowerOf2()) { + // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) + // Safe because the only negative value (1 << Y) can take on is + // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have + // the sign bit set. + return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); + } } - } + } return 0; } @@ -3134,7 +3178,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { if (isa(Op0)) { // undef % X -> 0 if (I.getType()->isFPOrFPVector()) return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } if (isa(Op1)) return ReplaceInstUsesWith(I, Op1); // X % undef -> undef @@ -3159,15 +3203,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { // 0 % X == 0 for integer, we don't need to preserve faults! if (Constant *LHS = dyn_cast(Op0)) if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); if (ConstantInt *RHS = dyn_cast(Op1)) { // X % 0 == undef, we don't need to preserve faults! if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, Context->getUndef(I.getType())); + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); if (Instruction *Op0I = dyn_cast(Op0)) { if (SelectInst *SI = dyn_cast(Op0I)) { @@ -3199,7 +3243,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // if so, convert to a bitwise and. if (ConstantInt *C = dyn_cast(RHS)) if (C->getValue().isPowerOf2()) - return BinaryOperator::CreateAnd(Op0, SubOne(C, Context)); + return BinaryOperator::CreateAnd(Op0, SubOne(C)); } if (Instruction *RHSI = dyn_cast(I.getOperand(1))) { @@ -3207,9 +3251,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { if (RHSI->getOpcode() == Instruction::Shl && isa(RHSI->getOperand(0))) { if (cast(RHSI->getOperand(0))->getValue().isPowerOf2()) { - Constant *N1 = Context->getConstantIntAllOnesValue(I.getType()); - Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1, - "tmp"), I); + Constant *N1 = Constant::getAllOnesValue(I.getType()); + Value *Add = Builder->CreateAdd(RHSI, N1, "tmp"); return BinaryOperator::CreateAnd(Op0, Add); } } @@ -3223,12 +3266,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // STO == 0 and SFO == 0 handled above. if ((STO->getValue().isPowerOf2()) && (SFO->getValue().isPowerOf2())) { - Value *TrueAnd = InsertNewInstBefore( - BinaryOperator::CreateAnd(Op0, SubOne(STO, Context), - SI->getName()+".t"), I); - Value *FalseAnd = InsertNewInstBefore( - BinaryOperator::CreateAnd(Op0, SubOne(SFO, Context), - SI->getName()+".f"), I); + Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), + SI->getName()+".t"); + Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), + SI->getName()+".f"); return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd); } } @@ -3241,15 +3282,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); // Handle the integer rem common cases - if (Instruction *common = commonIRemTransforms(I)) - return common; + if (Instruction *Common = commonIRemTransforms(I)) + return Common; - if (Value *RHSNeg = dyn_castNegVal(Op1, Context)) + if (Value *RHSNeg = dyn_castNegVal(Op1)) if (!isa(RHSNeg) || (isa(RHSNeg) && cast(RHSNeg)->getValue().isStrictlyPositive())) { // X % -Y -> X % Y - AddUsesToWorkList(I); + Worklist.AddValue(I.getOperand(1)); I.setOperand(1, RHSNeg); return &I; } @@ -3279,15 +3320,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { for (unsigned i = 0; i != VWidth; ++i) { if (ConstantInt *RHS = dyn_cast(RHSV->getOperand(i))) { if (RHS->getValue().isNegative()) - Elts[i] = cast(Context->getConstantExprNeg(RHS)); + Elts[i] = cast(ConstantExpr::getNeg(RHS)); else Elts[i] = RHS; } } - Constant *NewRHSV = Context->getConstantVector(Elts); + Constant *NewRHSV = ConstantVector::get(Elts); if (NewRHSV != RHSV) { - AddUsesToWorkList(I); + Worklist.AddValue(I.getOperand(1)); I.setOperand(1, NewRHSV); return &I; } @@ -3351,7 +3392,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) { case ICmpInst::ICMP_SLE: return 6; // 110 // True -> 7 default: - assert(0 && "Invalid ICmp predicate!"); + llvm_unreachable("Invalid ICmp predicate!"); return 0; } } @@ -3379,7 +3420,7 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { // True -> 7 default: // Not expecting FCMP_FALSE and FCMP_TRUE; - assert(0 && "Unexpected FCmp predicate!"); + llvm_unreachable("Unexpected FCmp predicate!"); return 0; } } @@ -3389,10 +3430,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) { /// new ICmp instruction. The sign is passed in to determine which kind /// of predicate to use in the new icmp instruction. static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, - LLVMContext* Context) { + LLVMContext *Context) { switch (code) { - default: assert(0 && "Illegal ICmp code!"); - case 0: return Context->getConstantIntFalse(); + default: llvm_unreachable("Illegal ICmp code!"); + case 0: return ConstantInt::getFalse(*Context); case 1: if (sign) return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); @@ -3415,7 +3456,7 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS); else return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); - case 7: return Context->getConstantIntTrue(); + case 7: return ConstantInt::getTrue(*Context); } } @@ -3423,9 +3464,9 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, /// opcode and two operands into either a FCmp instruction. isordered is passed /// in to determine which kind of predicate to use in the new fcmp instruction. static Value *getFCmpValue(bool isordered, unsigned code, - Value *LHS, Value *RHS, LLVMContext* Context) { + Value *LHS, Value *RHS, LLVMContext *Context) { switch (code) { - default: assert(0 && "Illegal FCmp code!"); + default: llvm_unreachable("Illegal FCmp code!"); case 0: if (isordered) return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); @@ -3461,7 +3502,7 @@ static Value *getFCmpValue(bool isordered, unsigned code, return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS); else return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); - case 7: return Context->getConstantIntTrue(); + case 7: return ConstantInt::getTrue(*Context); } } @@ -3504,7 +3545,7 @@ struct FoldICmpLogical { case Instruction::And: Code = LHSCode & RHSCode; break; case Instruction::Or: Code = LHSCode | RHSCode; break; case Instruction::Xor: Code = LHSCode ^ RHSCode; break; - default: assert(0 && "Illegal logical opcode!"); return 0; + default: llvm_unreachable("Illegal logical opcode!"); return 0; } bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) || @@ -3529,14 +3570,13 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, Value *X = Op->getOperand(0); Constant *Together = 0; if (!Op->isShift()) - Together = Context->getConstantExprAnd(AndRHS, OpRHS); + Together = ConstantExpr::getAnd(AndRHS, OpRHS); switch (Op->getOpcode()) { case Instruction::Xor: if (Op->hasOneUse()) { // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Instruction *And = BinaryOperator::CreateAnd(X, AndRHS); - InsertNewInstBefore(And, TheAnd); + Value *And = Builder->CreateAnd(X, AndRHS); And->takeName(Op); return BinaryOperator::CreateXor(And, Together); } @@ -3547,8 +3587,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, if (Op->hasOneUse() && Together != OpRHS) { // (X | C1) & C2 --> (X | (C1&C2)) & C2 - Instruction *Or = BinaryOperator::CreateOr(X, Together); - InsertNewInstBefore(Or, TheAnd); + Value *Or = Builder->CreateOr(X, Together); Or->takeName(Op); return BinaryOperator::CreateAnd(Or, AndRHS); } @@ -3578,8 +3617,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, return &TheAnd; } else { // Pull the XOR out of the AND. - Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS); - InsertNewInstBefore(NewAnd, TheAnd); + Value *NewAnd = Builder->CreateAnd(X, AndRHS); NewAnd->takeName(Op); return BinaryOperator::CreateXor(NewAnd, AndRHS); } @@ -3595,7 +3633,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); - ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShlMask); + ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask); if (CI->getValue() == ShlMask) { // Masking out bits that the shift already masks @@ -3615,7 +3653,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShrMask); + ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); if (CI->getValue() == ShrMask) { // Masking out bits that the shift already masks. @@ -3634,14 +3672,12 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - Constant *C = Context->getConstantInt(AndRHS->getValue() & ShrMask); + Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask); if (C == AndRHS) { // Masking out bits shifted in. // (Val ashr C1) & C2 -> (Val lshr C1) & C2 // Make the argument unsigned. Value *ShVal = Op->getOperand(0); - ShVal = InsertNewInstBefore( - BinaryOperator::CreateLShr(ShVal, OpRHS, - Op->getName()), TheAnd); + ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); } } @@ -3659,7 +3695,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, bool Inside, Instruction &IB) { - assert(cast(Context->getConstantExprICmp((isSigned ? + assert(cast(ConstantExpr::getICmp((isSigned ? ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() && "Lo is not <= Hi in range emission code!"); @@ -3675,10 +3711,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, } // Emit V-Lo getConstantExprNeg(Lo); - Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off"); - InsertNewInstBefore(Add, IB); - Constant *UpperBound = Context->getConstantExprAdd(NegLo, Hi); + Constant *NegLo = ConstantExpr::getNeg(Lo); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi); return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound); } @@ -3686,7 +3721,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, return new ICmpInst(ICmpInst::ICMP_EQ, V, V); // V < Min || V >= Hi -> V > Hi-1 - Hi = SubOne(cast(Hi), Context); + Hi = SubOne(cast(Hi)); if (cast(Lo)->isMinValue(isSigned)) { ICmpInst::Predicate pred = (isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); @@ -3695,10 +3730,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, // Emit V-Lo >u Hi-1-Lo // Note that Hi has already had one subtracted from it, above. - ConstantInt *NegLo = cast(Context->getConstantExprNeg(Lo)); - Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off"); - InsertNewInstBefore(Add, IB); - Constant *LowerBound = Context->getConstantExprAdd(NegLo, Hi); + ConstantInt *NegLo = cast(ConstantExpr::getNeg(Lo)); + Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); + Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi); return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound); } @@ -3740,7 +3774,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, switch (LHSI->getOpcode()) { default: return 0; case Instruction::And: - if (Context->getConstantExprAnd(N, Mask) == Mask) { + if (ConstantExpr::getAnd(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == @@ -3764,17 +3798,14 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 if ((Mask->getValue().countLeadingZeros() + Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() - && Context->getConstantExprAnd(N, Mask)->isNullValue()) + && ConstantExpr::getAnd(N, Mask)->isNullValue()) break; return 0; } - Instruction *New; if (isSub) - New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold"); - else - New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold"); - return InsertNewInstBefore(New, I); + return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); + return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold"); } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. @@ -3785,16 +3816,17 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, ICmpInst::Predicate LHSCC, RHSCC; // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), + m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), + m_ConstantInt(RHSCst)))) return 0; // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) // where C is a power of 2 if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT && LHSCst->getValue().isPowerOf2()) { - Instruction *NewOr = BinaryOperator::CreateOr(Val, Val2); - InsertNewInstBefore(NewOr, I); + Value *NewOr = Builder->CreateOr(Val, Val2); return new ICmpInst(LHSCC, NewOr, LHSCst); } @@ -3837,14 +3869,14 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, assert(LHSCst != RHSCst && "Compares not folded above?"); switch (LHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 @@ -3852,13 +3884,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, } case ICmpInst::ICMP_NE: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: - if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X u< 14) -> X < 13 + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13 return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: - if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X s< 14) -> X < 13 + if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13 return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst); break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15 @@ -3866,23 +3898,21 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15 return ReplaceInstUsesWith(I, RHS); case ICmpInst::ICMP_NE: - if (LHSCst == SubOne(RHSCst, Context)){// (X != 13 & X != 14) -> X-13 >u 1 - Constant *AddCST = Context->getConstantExprNeg(LHSCst); - Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, - Val->getName()+".off"); - InsertNewInstBefore(Add, I); + if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); return new ICmpInst(ICmpInst::ICMP_UGT, Add, - Context->getConstantInt(Add->getType(), 1)); + ConstantInt::get(Add->getType(), 1)); } break; // (X != 13 & X != 15) -> no change } break; case ICmpInst::ICMP_ULT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change break; case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13 @@ -3894,10 +3924,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, break; case ICmpInst::ICMP_SLT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 @@ -3909,18 +3939,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, break; case ICmpInst::ICMP_UGT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15 return ReplaceInstUsesWith(I, RHS); case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change break; case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst, Context)) // (X u> 13 & X != 14) -> X u> 14 + if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14 return new ICmpInst(LHSCC, Val, RHSCst); break; // (X u> 13 & X != 15) -> no change case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) 13 & X s< 15) -> no change break; @@ -3928,18 +3958,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, break; case ICmpInst::ICMP_SGT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15 case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15 return ReplaceInstUsesWith(I, RHS); case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: - if (RHSCst == AddOne(LHSCst, Context)) // (X s> 13 & X != 14) -> X s> 14 + if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14 return new ICmpInst(LHSCC, Val, RHSCst); break; // (X s> 13 & X != 15) -> no change case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 - return InsertRangeTest(Val, AddOne(LHSCst, Context), + return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true, I); case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change break; @@ -3950,13 +3980,89 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, return 0; } +Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + + if (LHS->getPredicate() == FCmpInst::FCMP_ORD && + RHS->getPredicate() == FCmpInst::FCMP_ORD) { + // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // false. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp ord x,x" is "fcmp ord x, 0". + if (isa(LHS->getOperand(1)) && + isa(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_ORD, + LHS->getOperand(0), RHS->getOperand(0)); + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); + + if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); + if (Op0CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, LHS); + + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op1Pred == 0) { + std::swap(LHS, RHS); + std::swap(Op0Pred, Op1Pred); + std::swap(Op0Ordered, Op1Ordered); + } + if (Op0Pred == 0) { + // uno && ueq -> uno && (uno || eq) -> ueq + // ord && olt -> ord && (ord && lt) -> olt + if (Op0Ordered == Op1Ordered) + return ReplaceInstUsesWith(I, RHS); + + // uno && oeq -> uno && (ord && eq) -> false + // uno && ord -> false + if (!Op0Ordered) + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); + // ord && ueq -> ord && (uno || eq) -> oeq + return cast(getFCmpValue(true, Op1Pred, + Op0LHS, Op0RHS, Context)); + } + } + + return 0; +} + Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (isa(Op1)) // X & undef -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // and X, X = X if (Op0 == Op1) @@ -3976,36 +4082,32 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } if (ConstantInt *AndRHS = dyn_cast(Op1)) { - const APInt& AndRHSMask = AndRHS->getValue(); + const APInt &AndRHSMask = AndRHS->getValue(); APInt NotAndRHS(~AndRHSMask); // Optimize a variety of ((val OP C1) & C2) combinations... - if (isa(Op0)) { - Instruction *Op0I = cast(Op0); + if (BinaryOperator *Op0I = dyn_cast(Op0)) { Value *Op0LHS = Op0I->getOperand(0); Value *Op0RHS = Op0I->getOperand(1); switch (Op0I->getOpcode()) { + default: break; case Instruction::Xor: case Instruction::Or: // If the mask is only needed on one incoming arm, push it up. - if (Op0I->hasOneUse()) { - if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { - // Not masking anything out for the LHS, move to RHS. - Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS, - Op0RHS->getName()+".masked"); - InsertNewInstBefore(NewRHS, I); - return BinaryOperator::Create( - cast(Op0I)->getOpcode(), Op0LHS, NewRHS); - } - if (!isa(Op0RHS) && - MaskedValueIsZero(Op0RHS, NotAndRHS)) { - // Not masking anything out for the RHS, move to LHS. - Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS, - Op0LHS->getName()+".masked"); - InsertNewInstBefore(NewLHS, I); - return BinaryOperator::Create( - cast(Op0I)->getOpcode(), NewLHS, Op0RHS); - } + if (!Op0I->hasOneUse()) break; + + if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { + // Not masking anything out for the LHS, move to RHS. + Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); + } + if (!isa(Op0RHS) && + MaskedValueIsZero(Op0RHS, NotAndRHS)) { + // Not masking anything out for the RHS, move to LHS. + Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); + return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); } break; @@ -4036,8 +4138,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { ConstantInt *A = dyn_cast(Op0LHS); if (!(A && A->isZero()) && // avoid infinite recursion. MaskedValueIsZero(Op0LHS, Mask)) { - Instruction *NewNeg = BinaryOperator::CreateNeg(Op0RHS); - InsertNewInstBefore(NewNeg, I); + Value *NewNeg = Builder->CreateNeg(Op0RHS); return BinaryOperator::CreateAnd(NewNeg, AndRHS); } } @@ -4048,9 +4149,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // (1 << x) & 1 --> zext(x == 0) // (1 >> x) & 1 --> zext(x == 0) if (AndRHSMask == 1 && Op0LHS == AndRHS) { - Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS, - Context->getNullValue(I.getType())); - InsertNewInstBefore(NewICmp, I); + Value *NewICmp = + Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); return new ZExtInst(NewICmp, I.getType()); } break; @@ -4072,21 +4172,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // into : and (cast X to T), trunc_or_bitcast(C1)&C2 // This will fold the two constants together, which may allow // other simplifications. - Instruction *NewCast = CastInst::CreateTruncOrBitCast( + Value *NewCast = Builder->CreateTruncOrBitCast( CastOp->getOperand(0), I.getType(), CastOp->getName()+".shrunk"); - NewCast = InsertNewInstBefore(NewCast, I); // trunc_or_bitcast(C1)&C2 - Constant *C3 = - Context->getConstantExprTruncOrBitCast(AndCI,I.getType()); - C3 = Context->getConstantExprAnd(C3, AndRHS); + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + C3 = ConstantExpr::getAnd(C3, AndRHS); return BinaryOperator::CreateAnd(NewCast, C3); } else if (CastOp->getOpcode() == Instruction::Or) { // Change: and (cast (or X, C1) to T), C2 // into : trunc(C1)&C2 iff trunc(C1)&C2 == C2 - Constant *C3 = - Context->getConstantExprTruncOrBitCast(AndCI,I.getType()); - if (Context->getConstantExprAnd(C3, AndRHS) == AndRHS) + Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); + if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS) // trunc(C1)&C2 return ReplaceInstUsesWith(I, AndRHS); } @@ -4103,17 +4200,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return NV; } - Value *Op0NotVal = dyn_castNotVal(Op0, Context); - Value *Op1NotVal = dyn_castNotVal(Op1, Context); + Value *Op0NotVal = dyn_castNotVal(Op0); + Value *Op1NotVal = dyn_castNotVal(Op1); if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // (~A & ~B) == (~(A | B)) - De Morgan's Law if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) { - Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal, - I.getName()+".demorgan"); - InsertNewInstBefore(Or, I); + Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, + I.getName()+".demorgan"); return BinaryOperator::CreateNot(Or); } @@ -4159,11 +4255,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { cast(Op1)->swapOperands(); std::swap(A, B); } - if (A == Op0) { // A&(A^B) -> A & ~B - Instruction *NotB = BinaryOperator::CreateNot(B, "tmp"); - InsertNewInstBefore(NotB, I); - return BinaryOperator::CreateAnd(A, NotB); - } + if (A == Op0) // A&(A^B) -> A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp")); } // (A&((~A)|B)) -> A&B @@ -4177,7 +4270,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (ICmpInst *RHS = dyn_cast(Op1)) { // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) return R; if (ICmpInst *LHS = dyn_cast(Op0)) @@ -4190,16 +4283,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (CastInst *Op1C = dyn_cast(Op1)) if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ? const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && // Only do this if the casts both really cause code to be generated. ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), I.getType(), TD) && ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType(), TD)) { - Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0), - Op1C->getOperand(0), - I.getName()); - InsertNewInstBefore(NewOp, I); + Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } } @@ -4210,10 +4302,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0), - SI1->getOperand(0), - SI0->getName()), I); + Value *NewOp = + Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } @@ -4221,66 +4312,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // If and'ing two fcmp, try combine them into one. if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) { - if (LHS->getPredicate() == FCmpInst::FCMP_ORD && - RHS->getPredicate() == FCmpInst::FCMP_ORD) { - // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) - if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // false. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); - return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0), - RHS->getOperand(0)); - } - } else { - Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS; - FCmpInst::Predicate Op0CC, Op1CC; - if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) && - match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) { - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - else if (Op0CC == FCmpInst::FCMP_FALSE || - Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); - else if (Op0CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, Op1); - else if (Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, Op0); - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op1Pred == 0) { - std::swap(Op0, Op1); - std::swap(Op0Pred, Op1Pred); - std::swap(Op0Ordered, Op1Ordered); - } - if (Op0Pred == 0) { - // uno && ueq -> uno && (uno || eq) -> ueq - // ord && olt -> ord && (ord && lt) -> olt - if (Op0Ordered == Op1Ordered) - return ReplaceInstUsesWith(I, Op1); - // uno && oeq -> uno && (ord && eq) -> false - // uno && ord -> false - if (!Op0Ordered) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); - // ord && ueq -> ord && (uno || eq) -> oeq - return cast(getFCmpValue(true, Op1Pred, - Op0LHS, Op0RHS, Context)); - } - } - } - } - } + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) + return Res; } return Changed ? &I : 0; @@ -4450,7 +4484,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { /// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then /// we can simplify this expression to "cond ? C : D or B". static Instruction *MatchSelectFromAndOr(Value *A, Value *B, - Value *C, Value *D) { + Value *C, Value *D, + LLVMContext *Context) { // If A is not a select of -1/0, this cannot match. Value *Cond = 0; if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) @@ -4477,8 +4512,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, ICmpInst::Predicate LHSCC, RHSCC; // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), + m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), + m_ConstantInt(RHSCst)))) return 0; // From here on, we only handle: @@ -4520,18 +4557,16 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, assert(LHSCst != RHSCst && "Compares not folded above?"); switch (LHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: - if (LHSCst == SubOne(RHSCst, Context)) { + if (LHSCst == SubOne(RHSCst)) { // (X == 13 | X == 14) -> X-13 getConstantExprNeg(LHSCst); - Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, - Val->getName()+".off"); - InsertNewInstBefore(Add, I); - AddCST = Context->getConstantExprSub(AddOne(RHSCst, Context), LHSCst); + Constant *AddCST = ConstantExpr::getNeg(LHSCst); + Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); + AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst); return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST); } break; // (X == 13 | X == 15) -> no change @@ -4546,7 +4581,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_NE: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13 case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13 case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13 @@ -4554,12 +4589,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); } break; case ICmpInst::ICMP_ULT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change break; case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2 @@ -4567,7 +4602,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, // this can cause overflow. if (RHSCst->isMaxValue(false)) return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context), + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false, I); case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change break; @@ -4580,7 +4615,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_SLT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change break; case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2 @@ -4588,7 +4623,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, // this can cause overflow. if (RHSCst->isMaxValue(true)) return ReplaceInstUsesWith(I, LHS); - return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context), + return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false, I); case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change break; @@ -4601,7 +4636,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_UGT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13 case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13 return ReplaceInstUsesWith(I, LHS); @@ -4609,14 +4644,14 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change break; } break; case ICmpInst::ICMP_SGT: switch (RHSCC) { - default: assert(0 && "Unknown integer condition code!"); + default: llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13 case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13 return ReplaceInstUsesWith(I, LHS); @@ -4624,7 +4659,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, break; case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change break; } @@ -4633,6 +4668,72 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, return 0; } +Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, + FCmpInst *RHS) { + if (LHS->getPredicate() == FCmpInst::FCMP_UNO && + RHS->getPredicate() == FCmpInst::FCMP_UNO && + LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { + if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) + if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { + // If either of the constants are nans, then the whole thing returns + // true. + if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + + // Otherwise, no need to compare the two constants, compare the + // rest. + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + } + + // Handle vector zeros. This occurs because the canonical form of + // "fcmp uno x,x" is "fcmp uno x, 0". + if (isa(LHS->getOperand(1)) && + isa(RHS->getOperand(1))) + return new FCmpInst(FCmpInst::FCMP_UNO, + LHS->getOperand(0), RHS->getOperand(0)); + + return 0; + } + + Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); + Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); + FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); + + if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { + // Swap RHS operands to match LHS. + Op1CC = FCmpInst::getSwappedPredicate(Op1CC); + std::swap(Op1LHS, Op1RHS); + } + if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { + // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). + if (Op0CC == Op1CC) + return new FCmpInst((FCmpInst::Predicate)Op0CC, + Op0LHS, Op0RHS); + if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + if (Op0CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, RHS); + if (Op1CC == FCmpInst::FCMP_FALSE) + return ReplaceInstUsesWith(I, LHS); + bool Op0Ordered; + bool Op1Ordered; + unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); + unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); + if (Op0Ordered == Op1Ordered) { + // If both are ordered or unordered, return a new fcmp with + // or'ed predicates. + Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, + Op0LHS, Op0RHS, Context); + if (Instruction *I = dyn_cast(RV)) + return I; + // Otherwise, it's a constant boolean value... + return ReplaceInstUsesWith(I, RV); + } + } + return 0; +} + /// FoldOrWithConstants - This helper function folds: /// /// ((A | B) & C1) | (B & C2) @@ -4655,8 +4756,7 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, if (!Xor.isAllOnesValue()) return 0; if (V1 == A || V1 == B) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateAnd((V1 == A) ? B : A, CI1), I); + Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); return BinaryOperator::CreateOr(NewOp, V1); } @@ -4668,7 +4768,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); if (isa(Op1)) // X | undef -> -1 - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // or X, X = X if (Op0 == Op1) @@ -4691,21 +4791,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (ConstantInt *RHS = dyn_cast(Op1)) { ConstantInt *C1 = 0; Value *X = 0; // (X & C1) | C2 --> (X | C2) & (C1|C2) - if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) { - Instruction *Or = BinaryOperator::CreateOr(X, RHS); - InsertNewInstBefore(Or, I); + if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && + isOnlyUse(Op0)) { + Value *Or = Builder->CreateOr(X, RHS); Or->takeName(Op0); return BinaryOperator::CreateAnd(Or, - Context->getConstantInt(RHS->getValue() | C1->getValue())); + ConstantInt::get(*Context, RHS->getValue() | C1->getValue())); } // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) - if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) { - Instruction *Or = BinaryOperator::CreateOr(X, RHS); - InsertNewInstBefore(Or, I); + if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && + isOnlyUse(Op0)) { + Value *Or = Builder->CreateOr(X, RHS); Or->takeName(Op0); return BinaryOperator::CreateXor(Or, - Context->getConstantInt(C1->getValue() & ~RHS->getValue())); + ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue())); } // Try to fold constant and into select arguments. @@ -4738,19 +4838,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // (X^C)|Y -> (X|Y)^C iff Y&C == 0 - if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && + if (Op0->hasOneUse() && + match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && MaskedValueIsZero(Op1, C1->getValue())) { - Instruction *NOr = BinaryOperator::CreateOr(A, Op1); - InsertNewInstBefore(NOr, I); + Value *NOr = Builder->CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, C1); } // Y|(X^C) -> (X|Y)^C iff Y&C == 0 - if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && + if (Op1->hasOneUse() && + match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && MaskedValueIsZero(Op0, C1->getValue())) { - Instruction *NOr = BinaryOperator::CreateOr(A, Op0); - InsertNewInstBefore(NOr, I); + Value *NOr = Builder->CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, C1); } @@ -4801,20 +4901,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { V1 = C, V2 = A, V3 = B; if (V1) { - Value *Or = - InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I); + Value *Or = Builder->CreateOr(V2, V3, "tmp"); return BinaryOperator::CreateAnd(V1, Or); } } // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants - if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) + if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context)) return Match; - if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) + if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context)) return Match; - if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) + if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context)) return Match; - if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) + if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context)) return Match; // ((A&~B)|(~A&B)) -> A^B @@ -4841,10 +4940,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && SI0->getOperand(1) == SI1->getOperand(1) && (SI0->hasOneUse() || SI1->hasOneUse())) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0), - SI1->getOperand(0), - SI0->getName()), I); + Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), + SI0->getName()); return BinaryOperator::Create(SI1->getOpcode(), NewOp, SI1->getOperand(1)); } @@ -4865,26 +4962,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (match(Op0, m_Not(m_Value(A)))) { // ~A | Op1 if (A == Op1) // ~A | A == -1 - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); } else { A = 0; } // Note, A is still live here! if (match(Op1, m_Not(m_Value(B)))) { // Op0 | ~B if (Op0 == B) - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); // (~A | ~B) == (~(A & B)) - De Morgan's Law if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) { - Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B, - I.getName()+".demorgan"), I); + Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan"); return BinaryOperator::CreateNot(And); } } // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) { - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) return R; if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) @@ -4899,17 +4995,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (!isa(Op0C->getOperand(0)) || !isa(Op1C->getOperand(0))) { const Type *SrcTy = Op0C->getOperand(0)->getType(); - if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && + if (SrcTy == Op1C->getOperand(0)->getType() && + SrcTy->isIntOrIntVector() && // Only do this if the casts both really cause code to be // generated. ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), I.getType(), TD) && ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType(), TD)) { - Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0), - Op1C->getOperand(0), - I.getName()); - InsertNewInstBefore(NewOp, I); + Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } } @@ -4919,61 +5014,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (fcmp uno x, c) | (fcmp uno y, c) -> (fcmp uno x, y) if (FCmpInst *LHS = dyn_cast(I.getOperand(0))) { - if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) { - if (LHS->getPredicate() == FCmpInst::FCMP_UNO && - RHS->getPredicate() == FCmpInst::FCMP_UNO && - LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { - if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) - if (ConstantFP *RHSC = dyn_cast(RHS->getOperand(1))) { - // If either of the constants are nans, then the whole thing returns - // true. - if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - - // Otherwise, no need to compare the two constants, compare the - // rest. - return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0), - RHS->getOperand(0)); - } - } else { - Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS; - FCmpInst::Predicate Op0CC, Op1CC; - if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) && - match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) { - if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { - // Swap RHS operands to match LHS. - Op1CC = FCmpInst::getSwappedPredicate(Op1CC); - std::swap(Op1LHS, Op1RHS); - } - if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { - // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). - if (Op0CC == Op1CC) - return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); - else if (Op0CC == FCmpInst::FCMP_TRUE || - Op1CC == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - else if (Op0CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, Op1); - else if (Op1CC == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, Op0); - bool Op0Ordered; - bool Op1Ordered; - unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); - unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); - if (Op0Ordered == Op1Ordered) { - // If both are ordered or unordered, return a new fcmp with - // or'ed predicates. - Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, - Op0LHS, Op0RHS, Context); - if (Instruction *I = dyn_cast(RV)) - return I; - // Otherwise, it's a constant boolean value... - return ReplaceInstUsesWith(I, RV); - } - } - } - } - } + if (FCmpInst *RHS = dyn_cast(I.getOperand(1))) + if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) + return Res; } return Changed ? &I : 0; @@ -5001,14 +5044,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (isa(Op0)) // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); return ReplaceInstUsesWith(I, Op1); // X ^ undef -> undef } // xor X, X = 0, even if X is nested in a sequence of Xor's. - if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1), Context)) { + if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) { assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } // See if we can simplify any instructions used by the instruction whose sole @@ -5020,22 +5063,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X // Is this a ~ operation? - if (Value *NotOp = dyn_castNotVal(&I, Context)) { + if (Value *NotOp = dyn_castNotVal(&I)) { // ~(~X & Y) --> (X | ~Y) - De Morgan's Law // ~(~X | Y) === (X & ~Y) - De Morgan's Law if (BinaryOperator *Op0I = dyn_cast(NotOp)) { if (Op0I->getOpcode() == Instruction::And || Op0I->getOpcode() == Instruction::Or) { - if (dyn_castNotVal(Op0I->getOperand(1), Context)) Op0I->swapOperands(); - if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0), Context)) { - Instruction *NotY = - BinaryOperator::CreateNot(Op0I->getOperand(1), - Op0I->getOperand(1)->getName()+".not"); - InsertNewInstBefore(NotY, I); + if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands(); + if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { + Value *NotY = + Builder->CreateNot(Op0I->getOperand(1), + Op0I->getOperand(1)->getName()+".not"); if (Op0I->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(Op0NotVal, NotY); - else - return BinaryOperator::CreateAnd(Op0NotVal, NotY); + return BinaryOperator::CreateAnd(Op0NotVal, NotY); } } } @@ -5043,7 +5084,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (ConstantInt *RHS = dyn_cast(Op1)) { - if (RHS == Context->getConstantIntTrue() && Op0->hasOneUse()) { + if (RHS->isOne() && Op0->hasOneUse()) { // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B if (ICmpInst *ICI = dyn_cast(Op0)) return new ICmpInst(ICI->getInversePredicate(), @@ -5059,16 +5100,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CmpInst *CI = dyn_cast(Op0C->getOperand(0))) { if (CI->hasOneUse() && Op0C->hasOneUse()) { Instruction::CastOps Opcode = Op0C->getOpcode(); - if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) { - if (RHS == Context->getConstantExprCast(Opcode, - Context->getConstantIntTrue(), - Op0C->getDestTy())) { - Instruction *NewCI = InsertNewInstBefore(CmpInst::Create( - CI->getOpcode(), CI->getInversePredicate(), - CI->getOperand(0), CI->getOperand(1)), I); - NewCI->takeName(CI); - return CastInst::Create(Opcode, NewCI, Op0C->getType()); - } + if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && + (RHS == ConstantExpr::getCast(Opcode, + ConstantInt::getTrue(*Context), + Op0C->getDestTy()))) { + CI->setPredicate(CI->getInversePredicate()); + return CastInst::Create(Opcode, CI, Op0C->getType()); } } } @@ -5078,9 +5115,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // ~(c-X) == X-c-1 == X+(-c-1) if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue()) if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { - Constant *NegOp0I0C = Context->getConstantExprNeg(Op0I0C); - Constant *ConstantRHS = Context->getConstantExprSub(NegOp0I0C, - Context->getConstantInt(I.getType(), 1)); + Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); + Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, + ConstantInt::get(I.getType(), 1)); return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS); } @@ -5088,28 +5125,28 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Op0I->getOpcode() == Instruction::Add) { // ~(X-c) --> (-c-1)-X if (RHS->isAllOnesValue()) { - Constant *NegOp0CI = Context->getConstantExprNeg(Op0CI); + Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub( - Context->getConstantExprSub(NegOp0CI, - Context->getConstantInt(I.getType(), 1)), + ConstantExpr::getSub(NegOp0CI, + ConstantInt::get(I.getType(), 1)), Op0I->getOperand(0)); } else if (RHS->getValue().isSignBit()) { // (X + C) ^ signbit -> (X + C + signbit) - Constant *C = - Context->getConstantInt(RHS->getValue() + Op0CI->getValue()); + Constant *C = ConstantInt::get(*Context, + RHS->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); } } else if (Op0I->getOpcode() == Instruction::Or) { // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0 if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { - Constant *NewRHS = Context->getConstantExprOr(Op0CI, RHS); + Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS); // Anything in both C1 and C2 is known to be zero, remove it from // NewRHS. - Constant *CommonBits = Context->getConstantExprAnd(Op0CI, RHS); - NewRHS = Context->getConstantExprAnd(NewRHS, - Context->getConstantExprNot(CommonBits)); - AddToWorkList(Op0I); + Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); + NewRHS = ConstantExpr::getAnd(NewRHS, + ConstantExpr::getNot(CommonBits)); + Worklist.Add(Op0I); I.setOperand(0, Op0I->getOperand(0)); I.setOperand(1, NewRHS); return &I; @@ -5127,13 +5164,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return NV; } - if (Value *X = dyn_castNotVal(Op0, Context)) // ~A ^ A == -1 + if (Value *X = dyn_castNotVal(Op0)) // ~A ^ A == -1 if (X == Op1) - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); - if (Value *X = dyn_castNotVal(Op1, Context)) // A ^ ~A == -1 + if (Value *X = dyn_castNotVal(Op1)) // A ^ ~A == -1 if (X == Op0) - return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); BinaryOperator *Op1I = dyn_cast(Op1); @@ -5152,7 +5189,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { return ReplaceInstUsesWith(I, B); // A^(A^B) == B } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) { return ReplaceInstUsesWith(I, A); // A^(B^A) == B - } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ + } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && + Op1I->hasOneUse()){ if (A == Op0) { // A^(A&B) -> A^(B&A) Op1I->swapOperands(); std::swap(A, B); @@ -5167,26 +5205,23 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { BinaryOperator *Op0I = dyn_cast(Op0); if (Op0I) { Value *A, *B; - if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && Op0I->hasOneUse()) { + if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()) { if (A == Op1) // (B|A)^B == (A|B)^B std::swap(A, B); - if (B == Op1) { // (A|B)^B == A & ~B - Instruction *NotB = - InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I); - return BinaryOperator::CreateAnd(A, NotB); - } + if (B == Op1) // (A|B)^B == A & ~B + return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp")); } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) { return ReplaceInstUsesWith(I, B); // (A^B)^A == B } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) { return ReplaceInstUsesWith(I, A); // (B^A)^A == B - } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ + } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && + Op0I->hasOneUse()){ if (A == Op1) // (A&B)^A -> (B&A)^A std::swap(A, B); if (B == Op1 && // (B&A)^A == ~B & A !isa(Op1)) { // Canonical form is (B&C)^C - Instruction *N = - InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I); - return BinaryOperator::CreateAnd(N, Op1); + return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1); } } } @@ -5196,10 +5231,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Op0I->getOpcode() == Op1I->getOpcode() && Op0I->getOperand(1) == Op1I->getOperand(1) && (Op1I->hasOneUse() || Op1I->hasOneUse())) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0), - Op1I->getOperand(0), - Op0I->getName()), I); + Value *NewOp = + Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), + Op0I->getName()); return BinaryOperator::Create(Op1I->getOpcode(), NewOp, Op1I->getOperand(1)); } @@ -5235,8 +5269,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { X = B, Y = A, Z = C; if (X) { - Instruction *NewOp = - InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I); + Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName()); return BinaryOperator::CreateAnd(NewOp, X); } } @@ -5244,7 +5277,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B) if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) - if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) + if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS))) return R; // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) @@ -5258,10 +5291,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { I.getType(), TD) && ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), I.getType(), TD)) { - Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0), - Op1C->getOperand(0), - I.getName()); - InsertNewInstBefore(NewOp, I); + Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), + Op1C->getOperand(0), I.getName()); return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType()); } } @@ -5271,8 +5302,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } static ConstantInt *ExtractElement(Constant *V, Constant *Idx, - LLVMContext* Context) { - return cast(Context->getConstantExprExtractElement(V, Idx)); + LLVMContext *Context) { + return cast(ConstantExpr::getExtractElement(V, Idx)); } static bool HasAddOverflow(ConstantInt *Result, @@ -5290,13 +5321,13 @@ static bool HasAddOverflow(ConstantInt *Result, /// AddWithOverflow - Compute Result = In1+In2, returning true if the result /// overflowed for this type. static bool AddWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, LLVMContext* Context, + Constant *In2, LLVMContext *Context, bool IsSigned = false) { - Result = Context->getConstantExprAdd(In1, In2); + Result = ConstantExpr::getAdd(In1, In2); if (const VectorType *VTy = dyn_cast(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = Context->getConstantInt(Type::Int32Ty, i); + Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); if (HasAddOverflow(ExtractElement(Result, Idx, Context), ExtractElement(In1, Idx, Context), ExtractElement(In2, Idx, Context), @@ -5326,13 +5357,13 @@ static bool HasSubOverflow(ConstantInt *Result, /// SubWithOverflow - Compute Result = In1-In2, returning true if the result /// overflowed for this type. static bool SubWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, LLVMContext* Context, + Constant *In2, LLVMContext *Context, bool IsSigned = false) { - Result = Context->getConstantExprSub(In1, In2); + Result = ConstantExpr::getSub(In1, In2); if (const VectorType *VTy = dyn_cast(In1->getType())) { for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = Context->getConstantInt(Type::Int32Ty, i); + Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i); if (HasSubOverflow(ExtractElement(Result, Idx, Context), ExtractElement(In1, Idx, Context), ExtractElement(In2, Idx, Context), @@ -5351,11 +5382,10 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1, /// code necessary to compute the offset from the base pointer (without adding /// in the base pointer). Return the result as a signed integer of intptr size. static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { - TargetData &TD = IC.getTargetData(); + TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); - const Type *IntPtrTy = TD.getIntPtrType(); - LLVMContext* Context = IC.getContext(); - Value *Result = Context->getNullValue(IntPtrTy); + const Type *IntPtrTy = TD.getIntPtrType(I.getContext()); + Value *Result = Constant::getNullValue(IntPtrTy); // Build a mask for high order bits. unsigned IntPtrWidth = TD.getPointerSizeInBits(); @@ -5372,74 +5402,49 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { if (const StructType *STy = dyn_cast(*GTI)) { Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - if (ConstantInt *RC = dyn_cast(Result)) - Result = - Context->getConstantInt(RC->getValue() + APInt(IntPtrWidth, Size)); - else - Result = IC.InsertNewInstBefore( - BinaryOperator::CreateAdd(Result, - Context->getConstantInt(IntPtrTy, Size), - GEP->getName()+".offs"), I); + Result = IC.Builder->CreateAdd(Result, + ConstantInt::get(IntPtrTy, Size), + GEP->getName()+".offs"); continue; } - Constant *Scale = Context->getConstantInt(IntPtrTy, Size); + Constant *Scale = ConstantInt::get(IntPtrTy, Size); Constant *OC = - Context->getConstantExprIntegerCast(OpC, IntPtrTy, true /*SExt*/); - Scale = Context->getConstantExprMul(OC, Scale); - if (Constant *RC = dyn_cast(Result)) - Result = Context->getConstantExprAdd(RC, Scale); - else { - // Emit an add instruction. - Result = IC.InsertNewInstBefore( - BinaryOperator::CreateAdd(Result, Scale, - GEP->getName()+".offs"), I); - } + ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); + Scale = ConstantExpr::getMul(OC, Scale); + // Emit an add instruction. + Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs"); continue; } // Convert to correct type. - if (Op->getType() != IntPtrTy) { - if (Constant *OpC = dyn_cast(Op)) - Op = Context->getConstantExprIntegerCast(OpC, IntPtrTy, true); - else - Op = IC.InsertNewInstBefore(CastInst::CreateIntegerCast(Op, IntPtrTy, - true, - Op->getName()+".c"), I); - } + if (Op->getType() != IntPtrTy) + Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c"); if (Size != 1) { - Constant *Scale = Context->getConstantInt(IntPtrTy, Size); - if (Constant *OpC = dyn_cast(Op)) - Op = Context->getConstantExprMul(OpC, Scale); - else // We'll let instcombine(mul) convert this to a shl if possible. - Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale, - GEP->getName()+".idx"), I); + Constant *Scale = ConstantInt::get(IntPtrTy, Size); + // We'll let instcombine(mul) convert this to a shl if possible. + Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx"); } // Emit an add instruction. - if (isa(Op) && isa(Result)) - Result = Context->getConstantExprAdd(cast(Op), - cast(Result)); - else - Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result, - GEP->getName()+".offs"), I); + Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs"); } return Result; } -/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of -/// the *offset* implied by GEP to zero. For example, if we have &A[i], we want -/// to return 'i' for "icmp ne i, 0". Note that, in general, indices can be -/// complex, and scales are involved. The above expression would also be legal -/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). This -/// later form is less amenable to optimization though, and we are allowed to -/// generate the first by knowing that pointer arithmetic doesn't overflow. +/// EvaluateGEPOffsetExpression - Return a value that can be used to compare +/// the *offset* implied by a GEP to zero. For example, if we have &A[i], we +/// want to return 'i' for "icmp ne i, 0". Note that, in general, indices can +/// be complex, and scales are involved. The above expression would also be +/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). +/// This later form is less amenable to optimization though, and we are allowed +/// to generate the first by knowing that pointer arithmetic doesn't overflow. /// /// If we can't emit an optimized form for this expression, this returns null. /// static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, InstCombiner &IC) { - TargetData &TD = IC.getTargetData(); + TargetData &TD = *IC.getTargetData(); gep_type_iterator GTI = gep_type_begin(GEP); // Check to see if this gep only has a single variable index. If so, and if @@ -5502,8 +5507,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) - VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(), - VariableIdx->getNameStart(), &I); + VariableIdx = new TruncInst(VariableIdx, + TD.getIntPtrType(VariableIdx->getContext()), + VariableIdx->getName(), &I); return VariableIdx; } @@ -5523,40 +5529,39 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I, return 0; // Okay, we can do this evaluation. Start by converting the index to intptr. - const Type *IntPtrTy = TD.getIntPtrType(); + const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext()); if (VariableIdx->getType() != IntPtrTy) VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy, true /*SExt*/, - VariableIdx->getNameStart(), &I); - Constant *OffsetVal = IC.getContext()->getConstantInt(IntPtrTy, NewOffs); + VariableIdx->getName(), &I); + Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I); } /// FoldGEPICmp - Fold comparisons between a GEP instruction and something /// else. At this point we know that the GEP is on the LHS of the comparison. -Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, +Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I) { - assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!"); - // Look through bitcasts. if (BitCastInst *BCI = dyn_cast(RHS)) RHS = BCI->getOperand(0); Value *PtrBase = GEPLHS->getOperand(0); - if (PtrBase == RHS) { + if (TD && PtrBase == RHS && GEPLHS->isInBounds()) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). // This transformation (ignoring the base and scales) is valid because we - // know pointers can't overflow. See if we can output an optimized form. + // know pointers can't overflow since the gep is inbounds. See if we can + // output an optimized form. Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this); // If not, synthesize the offset the hard way. if (Offset == 0) Offset = EmitGEPOffset(GEPLHS, I, *this); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, - Context->getNullValue(Offset->getType())); - } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) { + Constant::getNullValue(Offset->getType())); + } else if (GEPOperator *GEPRHS = dyn_cast(RHS)) { // If the base pointers are different, but the indices are the same, just // compare the base pointer. if (PtrBase != GEPRHS->getOperand(0)) { @@ -5572,7 +5577,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, // If all indices are the same, just compare the base pointers. if (IndicesTheSame) - return new ICmpInst(ICmpInst::getSignedPredicate(Cond), + return new ICmpInst(ICmpInst::getSignedPredicate(Cond), GEPLHS->getOperand(0), GEPRHS->getOperand(0)); // Otherwise, the base pointers are different and the indices are @@ -5622,7 +5627,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, if (NumDifferences == 0) // SAME GEP? return ReplaceInstUsesWith(I, // No comparison is needed here. - Context->getConstantInt(Type::Int1Ty, + ConstantInt::get(Type::getInt1Ty(*Context), ICmpInst::isTrueWhenEqual(Cond))); else if (NumDifferences == 1) { @@ -5635,7 +5640,8 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! - if ((isa(GEPLHS) || GEPLHS->hasOneUse()) && + if (TD && + (isa(GEPLHS) || GEPLHS->hasOneUse()) && (isa(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) Value *L = EmitGEPOffset(GEPLHS, I, *this); @@ -5680,7 +5686,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, ICmpInst::Predicate Pred; switch (I.getPredicate()) { - default: assert(0 && "Unexpected predicate!"); + default: llvm_unreachable("Unexpected predicate!"); case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_OEQ: Pred = ICmpInst::ICMP_EQ; @@ -5706,9 +5712,9 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Pred = ICmpInst::ICMP_NE; break; case FCmpInst::FCMP_ORD: - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); case FCmpInst::FCMP_UNO: - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); } const IntegerType *IntTy = cast(LHSI->getOperand(0)->getType()); @@ -5728,8 +5734,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); } } else { // If the RHS value is > UnsignedMax, fold the comparison. This handles @@ -5740,8 +5746,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); } } @@ -5753,8 +5759,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); } } @@ -5763,27 +5769,27 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // casting the FP value to the integer value and back, checking for equality. // Don't do this for zero, because -0.0 is not fractional. Constant *RHSInt = LHSUnsigned - ? Context->getConstantExprFPToUI(RHSC, IntTy) - : Context->getConstantExprFPToSI(RHSC, IntTy); + ? ConstantExpr::getFPToUI(RHSC, IntTy) + : ConstantExpr::getFPToSI(RHSC, IntTy); if (!RHS.isZero()) { bool Equal = LHSUnsigned - ? Context->getConstantExprUIToFP(RHSInt, RHSC->getType()) == RHSC - : Context->getConstantExprSIToFP(RHSInt, RHSC->getType()) == RHSC; + ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC + : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC; if (!Equal) { // If we had a comparison against a fractional value, we have to adjust // the compare predicate and sometimes the value. RHSC is rounded towards // zero at this point. switch (Pred) { - default: assert(0 && "Unexpected integer comparison!"); + default: llvm_unreachable("Unexpected integer comparison!"); case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); case ICmpInst::ICMP_ULE: // (float)int <= 4.4 --> int <= 4 // (float)int <= -4.4 --> false if (RHS.isNegative()) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_SLE: // (float)int <= 4.4 --> int <= 4 @@ -5795,7 +5801,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // (float)int < -4.4 --> false // (float)int < 4.4 --> int <= 4 if (RHS.isNegative()) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); Pred = ICmpInst::ICMP_ULE; break; case ICmpInst::ICMP_SLT: @@ -5808,7 +5814,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // (float)int > 4.4 --> int > 4 // (float)int > -4.4 --> true if (RHS.isNegative()) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); break; case ICmpInst::ICMP_SGT: // (float)int > 4.4 --> int > 4 @@ -5820,7 +5826,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 if (!RHS.isNegative()) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); Pred = ICmpInst::ICMP_UGT; break; case ICmpInst::ICMP_SGE: @@ -5844,22 +5850,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { // Fold trivial predicates. if (I.getPredicate() == FCmpInst::FCMP_FALSE) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0)); if (I.getPredicate() == FCmpInst::FCMP_TRUE) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1)); // Simplify 'fcmp pred X, X' if (Op0 == Op1) { switch (I.getPredicate()) { - default: assert(0 && "Unknown predicate!"); + default: llvm_unreachable("Unknown predicate!"); case FCmpInst::FCMP_UEQ: // True if unordered or equal case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1)); case FCmpInst::FCMP_OGT: // True if ordered and greater than case FCmpInst::FCMP_OLT: // True if ordered and less than case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0)); case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y) case FCmpInst::FCMP_ULT: // True if unordered or less than @@ -5867,7 +5873,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_UNE: // True if unordered or not equal // Canonicalize these to be 'fcmp uno %X, 0.0'. I.setPredicate(FCmpInst::FCMP_UNO); - I.setOperand(1, Context->getNullValue(Op0->getType())); + I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; case FCmpInst::FCMP_ORD: // True if ordered (no nans) @@ -5876,13 +5882,13 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { case FCmpInst::FCMP_OLE: // True if ordered and less than or equal // Canonicalize these to be 'fcmp ord %X, 0.0'. I.setPredicate(FCmpInst::FCMP_ORD); - I.setOperand(1, Context->getNullValue(Op0->getType())); + I.setOperand(1, Constant::getNullValue(Op0->getType())); return &I; } } if (isa(Op1)) // fcmp pred X, undef -> undef - return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty)); + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); // Handle fcmp with constant RHS if (Constant *RHSC = dyn_cast(Op1)) { @@ -5890,11 +5896,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (ConstantFP *CFP = dyn_cast(RHSC)) { if (CFP->getValueAPF().isNaN()) { if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and... - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); assert(FCmpInst::isUnordered(I.getPredicate()) && "Comparison must be either ordered or unordered!"); // True if unordered. - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); } } @@ -5905,7 +5911,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I)) + if (Instruction *NV = FoldOpIntoPhi(I, true)) return NV; break; case Instruction::SIToFP: @@ -5921,18 +5927,16 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (LHSI->hasOneUse()) { if (Constant *C = dyn_cast(LHSI->getOperand(1))) { // Fold the known value into the constant operand. - Op1 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC); + Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); // Insert a new FCmp of the other select operand. - Op2 = InsertNewInstBefore(new FCmpInst(I.getPredicate(), - LHSI->getOperand(2), RHSC, - I.getName()), I); + Op2 = Builder->CreateFCmp(I.getPredicate(), + LHSI->getOperand(2), RHSC, I.getName()); } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { // Fold the known value into the constant operand. - Op2 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC); + Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC); // Insert a new FCmp of the other select operand. - Op1 = InsertNewInstBefore(new FCmpInst(I.getPredicate(), - LHSI->getOperand(1), RHSC, - I.getName()), I); + Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); } } @@ -5952,28 +5956,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // icmp X, X if (Op0 == Op1) - return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, + return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), I.isTrueWhenEqual())); if (isa(Op1)) // X icmp undef -> undef - return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty)); + return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); // icmp , - Global/Stack value // addresses never equal each other! We already know that Op0 != Op1. - if ((isa(Op0) || isa(Op0) || + if ((isa(Op0) || isa(Op0) || isa(Op0)) && - (isa(Op1) || isa(Op1) || + (isa(Op1) || isa(Op1) || isa(Op1))) - return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, + return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context), !I.isTrueWhenEqual())); // icmp's with boolean values can always be turned into bitwise operations - if (Ty == Type::Int1Ty) { + if (Ty == Type::getInt1Ty(*Context)) { switch (I.getPredicate()) { - default: assert(0 && "Invalid icmp instruction!"); + default: llvm_unreachable("Invalid icmp instruction!"); case ICmpInst::ICMP_EQ: { // icmp eq i1 A, B -> ~(A^B) - Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp"); - InsertNewInstBefore(Xor, I); + Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp"); return BinaryOperator::CreateNot(Xor); } case ICmpInst::ICMP_NE: // icmp eq i1 A, B -> A^B @@ -5983,32 +5986,28 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { std::swap(Op0, Op1); // Change icmp ugt -> icmp ult // FALL THROUGH case ICmpInst::ICMP_ULT:{ // icmp ult i1 A, B -> ~A & B - Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp"); - InsertNewInstBefore(Not, I); + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); return BinaryOperator::CreateAnd(Not, Op1); } case ICmpInst::ICMP_SGT: std::swap(Op0, Op1); // Change icmp sgt -> icmp slt // FALL THROUGH case ICmpInst::ICMP_SLT: { // icmp slt i1 A, B -> A & ~B - Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp"); - InsertNewInstBefore(Not, I); + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); return BinaryOperator::CreateAnd(Not, Op0); } case ICmpInst::ICMP_UGE: std::swap(Op0, Op1); // Change icmp uge -> icmp ule // FALL THROUGH case ICmpInst::ICMP_ULE: { // icmp ule i1 A, B -> ~A | B - Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp"); - InsertNewInstBefore(Not, I); + Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp"); return BinaryOperator::CreateOr(Not, Op1); } case ICmpInst::ICMP_SGE: std::swap(Op0, Op1); // Change icmp sge -> icmp sle // FALL THROUGH case ICmpInst::ICMP_SLE: { // icmp sle i1 A, B -> A | ~B - Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp"); - InsertNewInstBefore(Not, I); + Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp"); return BinaryOperator::CreateOr(Not, Op0); } } @@ -6040,20 +6039,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { default: break; case ICmpInst::ICMP_ULE: if (CI->isMaxValue(false)) // A <=u MAX -> TRUE - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI, Context)); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return new ICmpInst(ICmpInst::ICMP_ULT, Op0, + AddOne(CI)); case ICmpInst::ICMP_SLE: if (CI->isMaxValue(true)) // A <=s MAX -> TRUE - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI, Context)); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return new ICmpInst(ICmpInst::ICMP_SLT, Op0, + AddOne(CI)); case ICmpInst::ICMP_UGE: if (CI->isMinValue(false)) // A >=u MIN -> TRUE - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI, Context)); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return new ICmpInst(ICmpInst::ICMP_UGT, Op0, + SubOne(CI)); case ICmpInst::ICMP_SGE: if (CI->isMinValue(true)) // A >=s MIN -> TRUE - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); - return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI, Context)); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); + return new ICmpInst(ICmpInst::ICMP_SGT, Op0, + SubOne(CI)); } // If this comparison is a normal comparison, it demands all @@ -6100,110 +6103,114 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // that code below can assume that Min != Max. if (!isa(Op0) && Op0Min == Op0Max) return new ICmpInst(I.getPredicate(), - Context->getConstantInt(Op0Min), Op1); + ConstantInt::get(*Context, Op0Min), Op1); if (!isa(Op1) && Op1Min == Op1Max) - return new ICmpInst(I.getPredicate(), Op0, - Context->getConstantInt(Op1Min)); + return new ICmpInst(I.getPredicate(), Op0, + ConstantInt::get(*Context, Op1Min)); // Based on the range information we know about the LHS, see if we can // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { - default: assert(0 && "Unknown icmp opcode!"); + default: llvm_unreachable("Unknown icmp opcode!"); case ICmpInst::ICMP_EQ: if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_NE: if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); break; case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A true if max(A) < min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Min.uge(Op1Max)) // A false if min(A) >= max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); if (Op1Min == Op0Max) // A A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast(Op1)) { if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context)); + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + SubOne(CI)); // (x (x >s -1) -> true if sign bit clear if (CI->isMinValue(true)) return new ICmpInst(ICmpInst::ICMP_SGT, Op0, - Context->getConstantIntAllOnesValue(Op0->getType())); + Constant::getAllOnesValue(Op0->getType())); } break; case ICmpInst::ICMP_UGT: if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast(Op1)) { if (Op1Min == Op0Max-1) // A >u C -> A == C+1 if max(a)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context)); + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + AddOne(CI)); // (x >u 2147483647) -> (x true if sign bit set if (CI->isMaxValue(true)) return new ICmpInst(ICmpInst::ICMP_SLT, Op0, - Context->getNullValue(Op0->getType())); + Constant::getNullValue(Op0->getType())); } break; case ICmpInst::ICMP_SLT: if (Op0Max.slt(Op1Min)) // A true if max(A) < min(C) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Min.sge(Op1Max)) // A false if min(A) >= max(C) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); if (Op1Min == Op0Max) // A A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast(Op1)) { if (Op1Max == Op0Min+1) // A A == C-1 if min(A)+1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context)); + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + SubOne(CI)); } break; case ICmpInst::ICMP_SGT: if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast(Op1)) { if (Op1Min == Op0Max-1) // A >s C -> A == C+1 if max(A)-1 == C - return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context)); + return new ICmpInst(ICmpInst::ICMP_EQ, Op0, + AddOne(CI)); } break; case ICmpInst::ICMP_SGE: assert(!isa(Op1) && "ICMP_SGE with ConstantInt not folded!"); if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_SLE: assert(!isa(Op1) && "ICMP_SLE with ConstantInt not folded!"); if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_UGE: assert(!isa(Op1) && "ICMP_UGE with ConstantInt not folded!"); if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; case ICmpInst::ICMP_ULE: assert(!isa(Op1) && "ICMP_ULE with ConstantInt not folded!"); if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); break; } @@ -6255,16 +6262,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } if (isAllZeros) return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), - Context->getNullValue(LHSI->getOperand(0)->getType())); + Constant::getNullValue(LHSI->getOperand(0)->getType())); } break; case Instruction::PHI: - // Only fold icmp into the PHI if the phi and fcmp are in the same + // Only fold icmp into the PHI if the phi and icmp are in the same // block. If in the same block, we're encouraging jump threading. If // not, we are just pessimizing the code by making an i1 phi. if (LHSI->getParent() == I.getParent()) - if (Instruction *NV = FoldOpIntoPhi(I)) + if (Instruction *NV = FoldOpIntoPhi(I, true)) return NV; break; case Instruction::Select: { @@ -6275,18 +6282,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (LHSI->hasOneUse()) { if (Constant *C = dyn_cast(LHSI->getOperand(1))) { // Fold the known value into the constant operand. - Op1 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC); + Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); // Insert a new ICmp of the other select operand. - Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), - LHSI->getOperand(2), RHSC, - I.getName()), I); + Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), + RHSC, I.getName()); } else if (Constant *C = dyn_cast(LHSI->getOperand(2))) { // Fold the known value into the constant operand. - Op2 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC); + Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); // Insert a new ICmp of the other select operand. - Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), - LHSI->getOperand(1), RHSC, - I.getName()), I); + Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), + RHSC, I.getName()); } } @@ -6298,19 +6303,31 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If we have (malloc != null), and if the malloc has a single use, we // can assume it is successful and remove the malloc. if (LHSI->hasOneUse() && isa(RHSC)) { - AddToWorkList(LHSI); - return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, - !I.isTrueWhenEqual())); + Worklist.Add(LHSI); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); + } + break; + case Instruction::Call: + // If we have (malloc != null), and if the malloc has a single use, we + // can assume it is successful and remove the malloc. + if (isMalloc(LHSI) && LHSI->hasOneUse() && + isa(RHSC)) { + Worklist.Add(LHSI); + return ReplaceInstUsesWith(I, + ConstantInt::get(Type::getInt1Ty(*Context), + !I.isTrueWhenEqual())); } break; } } // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. - if (User *GEP = dyn_castGetElementPtr(Op0)) + if (GEPOperator *GEP = dyn_cast(Op0)) if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I)) return NI; - if (User *GEP = dyn_castGetElementPtr(Op1)) + if (GEPOperator *GEP = dyn_cast(Op1)) if (Instruction *NI = FoldGEPICmp(GEP, Op0, ICmpInst::getSwappedPredicate(I.getPredicate()), I)) return NI; @@ -6333,10 +6350,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If Op1 is a constant, we can fold the cast into the constant. if (Op0->getType() != Op1->getType()) { if (Constant *Op1C = dyn_cast(Op1)) { - Op1 = Context->getConstantExprBitCast(Op1C, Op0->getType()); + Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); } else { // Otherwise, cast the RHS right before the icmp - Op1 = InsertBitCastBefore(Op1, Op0->getType(), I); + Op1 = Builder->CreateBitCast(Op1, Op0->getType()); } } return new ICmpInst(I.getPredicate(), Op0, Op1); @@ -6397,16 +6414,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Mask = -1 >> count-trailing-zeros(Cst). if (!CI->isZero() && !CI->isOne()) { const APInt &AP = CI->getValue(); - ConstantInt *Mask = Context->getConstantInt( + ConstantInt *Mask = ConstantInt::get(*Context, APInt::getLowBitsSet(AP.getBitWidth(), AP.getBitWidth() - AP.countTrailingZeros())); - Instruction *And1 = BinaryOperator::CreateAnd(Op0I->getOperand(0), - Mask); - Instruction *And2 = BinaryOperator::CreateAnd(Op1I->getOperand(0), - Mask); - InsertNewInstBefore(And1, I); - InsertNewInstBefore(And2, I); + Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); + Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask); return new ICmpInst(I.getPredicate(), And1, And2); } } @@ -6435,7 +6448,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0 Value *OtherVal = A == Op1 ? B : A; return new ICmpInst(I.getPredicate(), OtherVal, - Context->getNullValue(A->getType())); + Constant::getNullValue(A->getType())); } if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { @@ -6444,10 +6457,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { Constant *NC = - Context->getConstantInt(C1->getValue() ^ C2->getValue()); - Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp"); - return new ICmpInst(I.getPredicate(), A, - InsertNewInstBefore(Xor, I)); + ConstantInt::get(*Context, C1->getValue() ^ C2->getValue()); + Value *Xor = Builder->CreateXor(C, NC, "tmp"); + return new ICmpInst(I.getPredicate(), A, Xor); } // A^B == A^D -> B == D @@ -6463,18 +6475,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // A == (A^B) -> B == 0 Value *OtherVal = A == Op0 ? B : A; return new ICmpInst(I.getPredicate(), OtherVal, - Context->getNullValue(A->getType())); + Constant::getNullValue(A->getType())); } // (A-B) == A -> B == 0 if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B)))) return new ICmpInst(I.getPredicate(), B, - Context->getNullValue(B->getType())); + Constant::getNullValue(B->getType())); // A == (A-B) -> B == 0 if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B)))) return new ICmpInst(I.getPredicate(), B, - Context->getNullValue(B->getType())); + Constant::getNullValue(B->getType())); // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 if (Op0->hasOneUse() && Op1->hasOneUse() && @@ -6493,10 +6505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } if (X) { // Build (X^Y) & Z - Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I); - Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I); + Op1 = Builder->CreateXor(X, Y, "tmp"); + Op1 = Builder->CreateAnd(Op1, Z, "tmp"); I.setOperand(0, Op1); - I.setOperand(1, Context->getNullValue(Op1->getType())); + I.setOperand(1, Constant::getNullValue(Op1->getType())); return &I; } } @@ -6535,13 +6547,13 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and // C2 (CI). By solving for X we can turn this into a range check // instead of computing a divide. - Constant *Prod = Context->getConstantExprMul(CmpRHS, DivRHS); + Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); // Determine if the product overflows by seeing if the product is // not equal to the divide. Make sure we do the same kind of divide // as in the LHS instruction that we're folding. - bool ProdOV = (DivIsSigned ? Context->getConstantExprSDiv(Prod, DivRHS) : - Context->getConstantExprUDiv(Prod, DivRHS)) != CmpRHS; + bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : + ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; // Get the ICmp opcode ICmpInst::Predicate Pred = ICI.getPredicate(); @@ -6565,8 +6577,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. if (CmpRHSV == 0) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = cast(Context->getConstantExprNeg(SubOne(DivRHS, - Context))); + LoBound = cast(ConstantExpr::getNeg(SubOne(DivRHS))); HiBound = DivRHS; } else if (CmpRHSV.isStrictlyPositive()) { // (X / pos) op pos LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) @@ -6575,11 +6586,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true); } else { // (X / pos) op neg // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) - HiBound = AddOne(Prod, Context); + HiBound = AddOne(Prod); LoOverflow = HiOverflow = ProdOV ? -1 : 0; if (!LoOverflow) { ConstantInt* DivNeg = - cast(Context->getConstantExprNeg(DivRHS)); + cast(ConstantExpr::getNeg(DivRHS)); LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context, true) ? -1 : 0; } @@ -6587,15 +6598,15 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. if (CmpRHSV == 0) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(DivRHS, Context); - HiBound = cast(Context->getConstantExprNeg(DivRHS)); + LoBound = AddOne(DivRHS); + HiBound = cast(ConstantExpr::getNeg(DivRHS)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN } } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos // e.g. X/-5 op 3 --> [-19, -14) - HiBound = AddOne(Prod, Context); + HiBound = AddOne(Prod); HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) LoOverflow = AddWithOverflow(LoBound, HiBound, @@ -6613,42 +6624,42 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, Value *X = DivI->getOperand(0); switch (Pred) { - default: assert(0 && "Unhandled icmp opcode!"); + default: llvm_unreachable("Unhandled icmp opcode!"); case ICmpInst::ICMP_EQ: if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, LoBound); else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, HiBound); else return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); else if (HiOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, LoBound); else if (LoOverflow) - return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : + return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, HiBound); else return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: if (LoOverflow == +1) // Low bound is greater than input range. - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); if (LoOverflow == -1) // Low bound is less than input range. - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); return new ICmpInst(Pred, X, LoBound); case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); else if (HiOverflow == -1) // High bound less than input range. - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); else @@ -6682,7 +6693,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, NewRHS.zext(SrcBits); NewRHS |= KnownOne; return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - Context->getConstantInt(NewRHS)); + ConstantInt::get(*Context, NewRHS)); } } break; @@ -6699,7 +6710,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // the operation, just stop using the Xor. if (!XorCST->getValue().isNegative()) { ICI.setOperand(0, CompareVal); - AddToWorkList(LHSI); + Worklist.Add(LHSI); return &ICI; } @@ -6711,10 +6722,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (isTrueIfPositive) return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, - SubOne(RHS, Context)); + SubOne(RHS)); else return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, - AddOne(RHS, Context)); + AddOne(RHS)); } if (LHSI->hasOneUse()) { @@ -6725,7 +6736,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, ? ICI.getUnsignedPredicate() : ICI.getSignedPredicate(); return new ICmpInst(Pred, LHSI->getOperand(0), - Context->getConstantInt(RHSV ^ SignBit)); + ConstantInt::get(*Context, RHSV ^ SignBit)); } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) @@ -6736,7 +6747,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, : ICI.getSignedPredicate(); Pred = ICI.getSwappedPredicate(Pred); return new ICmpInst(Pred, LHSI->getOperand(0), - Context->getConstantInt(RHSV ^ NotSignBit)); + ConstantInt::get(*Context, RHSV ^ NotSignBit)); } } } @@ -6763,12 +6774,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, NewCST.zext(BitWidth); APInt NewCI = RHSV; NewCI.zext(BitWidth); - Instruction *NewAnd = - BinaryOperator::CreateAnd(Cast->getOperand(0), - Context->getConstantInt(NewCST),LHSI->getName()); - InsertNewInstBefore(NewAnd, ICI); + Value *NewAnd = + Builder->CreateAnd(Cast->getOperand(0), + ConstantInt::get(*Context, NewCST), LHSI->getName()); return new ICmpInst(ICI.getPredicate(), NewAnd, - Context->getConstantInt(NewCI)); + ConstantInt::get(*Context, NewCI)); } } @@ -6805,32 +6815,31 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (CanFold) { Constant *NewCst; if (Shift->getOpcode() == Instruction::Shl) - NewCst = Context->getConstantExprLShr(RHS, ShAmt); + NewCst = ConstantExpr::getLShr(RHS, ShAmt); else - NewCst = Context->getConstantExprShl(RHS, ShAmt); + NewCst = ConstantExpr::getShl(RHS, ShAmt); // Check to see if we are shifting out any of the bits being // compared. - if (Context->getConstantExpr(Shift->getOpcode(), + if (ConstantExpr::get(Shift->getOpcode(), NewCst, ShAmt) != RHS) { // If we shifted bits out, the fold is not going to work out. // As a special case, check to see if this means that the // result is always true or false now. if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); } else { ICI.setOperand(1, NewCst); Constant *NewAndCST; if (Shift->getOpcode() == Instruction::Shl) - NewAndCST = Context->getConstantExprLShr(AndCST, ShAmt); + NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt); else - NewAndCST = Context->getConstantExprShl(AndCST, ShAmt); + NewAndCST = ConstantExpr::getShl(AndCST, ShAmt); LHSI->setOperand(1, NewAndCST); LHSI->setOperand(0, Shift->getOperand(0)); - AddToWorkList(Shift); // Shift is dead. - AddUsesToWorkList(ICI); + Worklist.Add(Shift); // Shift is dead. return &ICI; } } @@ -6845,19 +6854,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Compute C << Y. Value *NS; if (Shift->getOpcode() == Instruction::LShr) { - NS = BinaryOperator::CreateShl(AndCST, - Shift->getOperand(1), "tmp"); + NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp"); } else { // Insert a logical shift. - NS = BinaryOperator::CreateLShr(AndCST, - Shift->getOperand(1), "tmp"); + NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp"); } - InsertNewInstBefore(cast(NS), ICI); // Compute X & (C << Y). - Instruction *NewAnd = - BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); - InsertNewInstBefore(NewAnd, ICI); + Value *NewAnd = + Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); ICI.setOperand(0, NewAnd); return &ICI; @@ -6881,11 +6886,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If we are comparing against bits always shifted out, the // comparison cannot succeed. Constant *Comp = - Context->getConstantExprShl(Context->getConstantExprLShr(RHS, ShAmt), + ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt), ShAmt); if (Comp != RHS) {// Comparing against a bit that we know is zero. bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE); + Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } @@ -6893,15 +6898,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); Constant *Mask = - Context->getConstantInt(APInt::getLowBitsSet(TypeBits, + ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits, TypeBits-ShAmtVal)); - Instruction *AndI = - BinaryOperator::CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, ICI); + Value *And = + Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, - Context->getConstantInt(RHSV.lshr(ShAmtVal))); + ConstantInt::get(*Context, RHSV.lshr(ShAmtVal))); } } @@ -6910,15 +6913,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (LHSI->hasOneUse() && isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) { // (X << 31) (X&1) != 0 - Constant *Mask = Context->getConstantInt(APInt(TypeBits, 1) << + Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) << (TypeBits-ShAmt->getZExtValue()-1)); - Instruction *AndI = - BinaryOperator::CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, ICI); - + Value *And = + Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, - And, Context->getNullValue(And->getType())); + And, Constant::getNullValue(And->getType())); } break; } @@ -6948,7 +6948,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (Comp != RHSV) { // Comparing against a bit that we know is zero. bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; - Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE); + Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE); return ReplaceInstUsesWith(ICI, Cst); } @@ -6959,20 +6959,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, MaskedValueIsZero(LHSI->getOperand(0), APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) { return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), - Context->getConstantExprShl(RHS, ShAmt)); + ConstantExpr::getShl(RHS, ShAmt)); } if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = Context->getConstantInt(Val); + Constant *Mask = ConstantInt::get(*Context, Val); - Instruction *AndI = - BinaryOperator::CreateAnd(LHSI->getOperand(0), - Mask, LHSI->getName()+".mask"); - Value *And = InsertNewInstBefore(AndI, ICI); + Value *And = Builder->CreateAnd(LHSI->getOperand(0), + Mask, LHSI->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, - Context->getConstantExprShl(RHS, ShAmt)); + ConstantExpr::getShl(RHS, ShAmt)); } break; } @@ -7005,18 +7003,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (ICI.isSignedPredicate()) { if (CR.getLower().isSignBit()) { return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), - Context->getConstantInt(CR.getUpper())); + ConstantInt::get(*Context, CR.getUpper())); } else if (CR.getUpper().isSignBit()) { return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), - Context->getConstantInt(CR.getLower())); + ConstantInt::get(*Context, CR.getLower())); } } else { if (CR.getLower().isMinValue()) { return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), - Context->getConstantInt(CR.getUpper())); + ConstantInt::get(*Context, CR.getUpper())); } else if (CR.getUpper().isMinValue()) { return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), - Context->getConstantInt(CR.getLower())); + ConstantInt::get(*Context, CR.getLower())); } } } @@ -7036,12 +7034,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (RHSV == 0 && isa(BO->getOperand(1)) &&BO->hasOneUse()){ const APInt &V = cast(BO->getOperand(1))->getValue(); if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { - Instruction *NewRem = - BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1), - BO->getName()); - InsertNewInstBefore(NewRem, ICI); - return new ICmpInst(ICI.getPredicate(), NewRem, - Context->getNullValue(BO->getType())); + Value *NewRem = + Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), + BO->getName()); + return new ICmpInst(ICI.getPredicate(), NewRem, + Constant::getNullValue(BO->getType())); } } break; @@ -7050,19 +7047,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (ConstantInt *BOp1C = dyn_cast(BO->getOperand(1))) { if (BO->hasOneUse()) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - Context->getConstantExprSub(RHS, BOp1C)); + ConstantExpr::getSub(RHS, BOp1C)); } else if (RHSV == 0) { // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); - if (Value *NegVal = dyn_castNegVal(BOp1, Context)) + if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0, Context)) + else if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); else if (BO->hasOneUse()) { - Instruction *Neg = BinaryOperator::CreateNeg(BOp1); - InsertNewInstBefore(Neg, ICI); + Value *Neg = Builder->CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(ICI.getPredicate(), BOp0, Neg); } @@ -7073,7 +7069,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // the explicit xor. if (Constant *BOC = dyn_cast(BO->getOperand(1))) return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), - Context->getConstantExprXor(RHS, BOC)); + ConstantExpr::getXor(RHS, BOC)); // FALLTHROUGH case Instruction::Sub: @@ -7087,10 +7083,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If bits are being or'd in that are not present in the constant we // are comparing against, then the comparison could never succeed! if (Constant *BOC = dyn_cast(BO->getOperand(1))) { - Constant *NotCI = Context->getConstantExprNot(RHS); - if (!Context->getConstantExprAnd(BOC, NotCI)->isNullValue()) + Constant *NotCI = ConstantExpr::getNot(RHS); + if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue()) return ReplaceInstUsesWith(ICI, - Context->getConstantInt(Type::Int1Ty, + ConstantInt::get(Type::getInt1Ty(*Context), isICMP_NE)); } break; @@ -7101,19 +7097,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // comparison can never succeed! if ((RHSV & ~BOC->getValue()) != 0) return ReplaceInstUsesWith(ICI, - Context->getConstantInt(Type::Int1Ty, + ConstantInt::get(Type::getInt1Ty(*Context), isICMP_NE)); // If we have ((X & C) == C), turn it into ((X & C) != 0). if (RHS == BOC && RHSV.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, LHSI, - Context->getNullValue(RHS->getType())); + Constant::getNullValue(RHS->getType())); // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); - Constant *Zero = Context->getNullValue(X->getType()); + Constant *Zero = Constant::getNullValue(X->getType()); ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(pred, X, Zero); @@ -7122,7 +7118,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // ((X & ~7) == 0) --> X < 8 if (RHSV == 0 && isHighOnes(BOC)) { Value *X = BO->getOperand(0); - Constant *NegX = Context->getConstantExprNeg(BOC); + Constant *NegX = ConstantExpr::getNeg(BOC); ICmpInst::Predicate pred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(pred, X, NegX); @@ -7133,9 +7129,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } else if (IntrinsicInst *II = dyn_cast(LHSI)) { // Handle icmp {eq|ne} , intcst. if (II->getIntrinsicID() == Intrinsic::bswap) { - AddToWorkList(II); + Worklist.Add(II); ICI.setOperand(0, II->getOperand(1)); - ICI.setOperand(1, Context->getConstantInt(RHSV.byteSwap())); + ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap())); return &ICI; } } @@ -7155,17 +7151,17 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the // integer type is the same size as the pointer type. - if (LHSCI->getOpcode() == Instruction::PtrToInt && - getTargetData().getPointerSizeInBits() == + if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && + TD->getPointerSizeInBits() == cast(DestTy)->getBitWidth()) { Value *RHSOp = 0; if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) { - RHSOp = Context->getConstantExprIntToPtr(RHSC, SrcTy); + RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); } else if (PtrToIntInst *RHSC = dyn_cast(ICI.getOperand(1))) { RHSOp = RHSC->getOperand(0); // If the pointer types don't match, insert a bitcast. if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI); + RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); } if (RHSOp) @@ -7212,8 +7208,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Compute the constant that would happen if we truncated to SrcTy then // reextended to DestTy. - Constant *Res1 = Context->getConstantExprTrunc(CI, SrcTy); - Constant *Res2 = Context->getConstantExprCast(LHSCI->getOpcode(), + Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); + Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy); // If the re-extended constant didn't change... @@ -7239,9 +7235,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // First, handle some easy cases. We know the result cannot be equal at this // point so handle the ICI.isEquality() cases if (ICI.getPredicate() == ICmpInst::ICMP_EQ) - return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); + return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context)); if (ICI.getPredicate() == ICmpInst::ICMP_NE) - return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); + return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context)); // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. @@ -7249,20 +7245,19 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { if (isSignedCmp) { // We're performing a signed comparison. if (cast(CI)->getValue().isNegative()) - Result = Context->getConstantIntFalse(); // X < (small) --> false + Result = ConstantInt::getFalse(*Context); // X < (small) --> false else - Result = Context->getConstantIntTrue(); // X < (large) --> true + Result = ConstantInt::getTrue(*Context); // X < (large) --> true } else { // We're performing an unsigned comparison. if (isSignedExt) { // We're performing an unsigned comp with a sign extended value. // This is true if the input is >= 0. [aka >s -1] - Constant *NegOne = Context->getConstantIntAllOnesValue(SrcTy); - Result = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_SGT, LHSCIOp, - NegOne, ICI.getName()), ICI); + Constant *NegOne = Constant::getAllOnesValue(SrcTy); + Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName()); } else { // Unsigned extend & unsigned compare -> always true. - Result = Context->getConstantIntTrue(); + Result = ConstantInt::getTrue(*Context); } } @@ -7275,7 +7270,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { ICI.getPredicate()==ICmpInst::ICMP_SGT) && "ICmp should be folded!"); if (Constant *CI = dyn_cast(Result)) - return ReplaceInstUsesWith(ICI, Context->getConstantExprNot(CI)); + return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI)); return BinaryOperator::CreateNot(Result); } @@ -7317,21 +7312,21 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { // shl X, 0 == X and shr X, 0 == X // shl 0, X == 0 and shr 0, X == 0 - if (Op1 == Context->getNullValue(Op1->getType()) || - Op0 == Context->getNullValue(Op0->getType())) + if (Op1 == Constant::getNullValue(Op1->getType()) || + Op0 == Constant::getNullValue(Op0->getType())) return ReplaceInstUsesWith(I, Op0); if (isa(Op0)) { if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef return ReplaceInstUsesWith(I, Op0); else // undef << X -> 0, undef >>u X -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } if (isa(Op1)) { if (I.getOpcode() == Instruction::AShr) // X >>s undef -> X return ReplaceInstUsesWith(I, Op0); else // X << undef, X >>u undef -> 0 - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); } // See if we can fold away this shift. @@ -7363,9 +7358,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // if (Op1->uge(TypeBits)) { if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Context->getNullValue(Op0->getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); else { - I.setOperand(1, Context->getConstantInt(I.getType(), TypeBits-1)); + I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); return &I; } } @@ -7375,7 +7370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (BO->getOpcode() == Instruction::Mul && isLeftShift) if (Constant *BOOp = dyn_cast(BO->getOperand(1))) return BinaryOperator::CreateMul(BO->getOperand(0), - Context->getConstantExprShl(BOOp, Op1)); + ConstantExpr::getShl(BOOp, Op1)); // Try to fold constant and into select arguments. if (SelectInst *SI = dyn_cast(Op0)) @@ -7396,10 +7391,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = Context->getConstantExprZExt(Op1, TrOp->getType()); - Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt, - I.getName()); - InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2) + Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + // (shift2 (shift1 & 0x00FF), c2) + Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to @@ -7420,10 +7414,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, MaskV = MaskV.lshr(Op1->getZExtValue()); } - Instruction *And = - BinaryOperator::CreateAnd(NSh, Context->getConstantInt(MaskV), - TI->getName()); - InsertNewInstBefore(And, I); // shift1 & 0x00FF + // shift1 & 0x00FF + Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV), + TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); @@ -7444,17 +7437,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // These operators commute. // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C) if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && - match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))){ - Instruction *YS = BinaryOperator::CreateShl( - Op0BO->getOperand(0), Op1, - Op0BO->getName()); - InsertNewInstBefore(YS, I); // (Y << C) - Instruction *X = - BinaryOperator::Create(Op0BO->getOpcode(), YS, V1, - Op0BO->getOperand(1)->getName()); - InsertNewInstBefore(X, I); // (X + (Y << C)) + match(Op0BO->getOperand(1), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, Context->getConstantInt( + return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } @@ -7465,16 +7456,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, m_And(m_Shr(m_Value(V1), m_Specific(Op1)), m_ConstantInt(CC))) && cast(Op0BOOp1)->getOperand(0)->hasOneUse()) { - Instruction *YS = BinaryOperator::CreateShl( - Op0BO->getOperand(0), Op1, - Op0BO->getName()); - InsertNewInstBefore(YS, I); // (Y << C) - Instruction *XM = - BinaryOperator::CreateAnd(V1, - Context->getConstantExprShl(CC, Op1), - V1->getName()+".mask"); - InsertNewInstBefore(XM, I); // X & (CC << C) - + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(0), Op1, + Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } } @@ -7483,17 +7470,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, case Instruction::Sub: { // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C) if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && - match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))){ - Instruction *YS = BinaryOperator::CreateShl( - Op0BO->getOperand(1), Op1, - Op0BO->getName()); - InsertNewInstBefore(YS, I); // (Y << C) - Instruction *X = - BinaryOperator::Create(Op0BO->getOpcode(), V1, YS, - Op0BO->getOperand(0)->getName()); - InsertNewInstBefore(X, I); // (X + (Y << C)) + match(Op0BO->getOperand(0), m_Shr(m_Value(V1), + m_Specific(Op1)))) { + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // (X + (Y << C)) + Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, Context->getConstantInt( + return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); } @@ -7504,15 +7489,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, m_ConstantInt(CC))) && V2 == Op1 && cast(Op0BO->getOperand(0)) ->getOperand(0)->hasOneUse()) { - Instruction *YS = BinaryOperator::CreateShl( - Op0BO->getOperand(1), Op1, - Op0BO->getName()); - InsertNewInstBefore(YS, I); // (Y << C) - Instruction *XM = - BinaryOperator::CreateAnd(V1, - Context->getConstantExprShl(CC, Op1), - V1->getName()+".mask"); - InsertNewInstBefore(XM, I); // X & (CC << C) + Value *YS = // (Y << C) + Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + // X & (CC << C) + Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -7552,11 +7533,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, isValid = Op0C->getValue()[TypeBits-1] == highBitSet; if (isValid) { - Constant *NewRHS = Context->getConstantExpr(I.getOpcode(), Op0C, Op1); + Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); - Instruction *NewShift = - BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1); - InsertNewInstBefore(NewShift, I); + Value *NewShift = + Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, @@ -7589,31 +7569,33 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // saturates. if (AmtSum >= TypeBits) { if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); AmtSum = TypeBits-1; // Saturate to 31 for i32 ashr. } return BinaryOperator::Create(I.getOpcode(), X, - Context->getConstantInt(Ty, AmtSum)); - } else if (ShiftOp->getOpcode() == Instruction::LShr && - I.getOpcode() == Instruction::AShr) { + ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::LShr && + I.getOpcode() == Instruction::AShr) { if (AmtSum >= TypeBits) - return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); + return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0. - return BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, AmtSum)); - } else if (ShiftOp->getOpcode() == Instruction::AShr && - I.getOpcode() == Instruction::LShr) { + return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); + } + + if (ShiftOp->getOpcode() == Instruction::AShr && + I.getOpcode() == Instruction::LShr) { // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0. if (AmtSum >= TypeBits) AmtSum = TypeBits-1; - Instruction *Shift = - BinaryOperator::CreateAShr(X, Context->getConstantInt(Ty, AmtSum)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask)); } // Okay, if we get here, one shift must be left, and the other shift must be @@ -7622,12 +7604,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // If we have ((X >>? C) << C), turn this into X & (-1 << C). if (I.getOpcode() == Instruction::Shl) { APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); } // If we have ((X << C) >>u C), turn this into X & (-1 >>u C). if (I.getOpcode() == Instruction::LShr) { APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); - return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask)); } // We can simplify ((X << C) >>s C) into a trunc + sext. // NOTE: we could do this for any C, but that would make 'unusual' integer @@ -7641,15 +7623,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, case 32 : case 64 : case 128: - SExtType = Context->getIntegerType(Ty->getBitWidth() - ShiftAmt1); + SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1); break; default: break; } - if (SExtType) { - Instruction *NewTrunc = new TruncInst(X, SExtType, "sext"); - InsertNewInstBefore(NewTrunc, I); - return new SExtInst(NewTrunc, Ty); - } + if (SExtType) + return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty); // Otherwise, we can't handle it yet. } else if (ShiftAmt1 < ShiftAmt2) { uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; @@ -7658,23 +7637,21 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (I.getOpcode() == Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); - Instruction *Shift = - BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(*Context, Mask)); } // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr) { assert(ShiftOp->getOpcode() == Instruction::Shl); - Instruction *Shift = - BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, ShiftDiff)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(*Context, Mask)); } // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. @@ -7686,24 +7663,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (I.getOpcode() == Instruction::Shl) { assert(ShiftOp->getOpcode() == Instruction::LShr || ShiftOp->getOpcode() == Instruction::AShr); - Instruction *Shift = - BinaryOperator::Create(ShiftOp->getOpcode(), X, - Context->getConstantInt(Ty, ShiftDiff)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, + ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(*Context, Mask)); } // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2) if (I.getOpcode() == Instruction::LShr) { assert(ShiftOp->getOpcode() == Instruction::Shl); - Instruction *Shift = - BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff)); - InsertNewInstBefore(Shift, I); + Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff)); APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); - return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); + return BinaryOperator::CreateAnd(Shift, + ConstantInt::get(*Context, Mask)); } // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. @@ -7718,12 +7693,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, /// X*Scale+Offset. /// static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - int &Offset, LLVMContext* Context) { - assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!"); + int &Offset, LLVMContext *Context) { + assert(Val->getType() == Type::getInt32Ty(*Context) && + "Unexpected allocation size type!"); if (ConstantInt *CI = dyn_cast(Val)) { Offset = CI->getZExtValue(); Scale = 0; - return Context->getConstantInt(Type::Int32Ty, 0); + return ConstantInt::get(Type::getInt32Ty(*Context), 0); } else if (BinaryOperator *I = dyn_cast(Val)) { if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { @@ -7763,6 +7739,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocationInst &AI) { const PointerType *PTy = cast(CI.getType()); + BuilderTy AllocaBuilder(*Builder); + AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); + // Remove any uses of AI that are dead. assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); @@ -7773,11 +7752,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, ++UI; // If this instruction uses AI more than once, don't break UI. ++NumDeadInst; - DOUT << "IC: DCE: " << *User; + DEBUG(errs() << "IC: DCE: " << *User << '\n'); EraseInstFromFunction(*User); } } - + + // This requires TargetData to get the alloca alignment and size information. + if (!TD) return 0; + // Get the type really allocated and the type casted to. const Type *AllocElTy = AI.getAllocatedType(); const Type *CastElTy = PTy->getElementType(); @@ -7816,30 +7798,22 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (Scale == 1) { Amt = NumElements; } else { - // If the allocation size is constant, form a constant mul expression - Amt = Context->getConstantInt(Type::Int32Ty, Scale); - if (isa(NumElements)) - Amt = Context->getConstantExprMul(cast(NumElements), - cast(Amt)); - // otherwise multiply the amount and the number of elements - else { - Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp"); - Amt = InsertNewInstBefore(Tmp, AI); - } + Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale); + // Insert before the alloca, not before the cast. + Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); } if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = Context->getConstantInt(Type::Int32Ty, Offset, true); - Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp"); - Amt = InsertNewInstBefore(Tmp, AI); + Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true); + Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); } AllocationInst *New; if (isa(AI)) - New = new MallocInst(CastElTy, Amt, AI.getAlignment()); + New = AllocaBuilder.CreateMalloc(CastElTy, Amt); else - New = new AllocaInst(CastElTy, Amt, AI.getAlignment()); - InsertNewInstBefore(New, AI); + New = AllocaBuilder.CreateAlloca(CastElTy, Amt); + New->setAlignment(AI.getAlignment()); New->takeName(&AI); // If the allocation has one real use plus a dbg.declare, just remove the @@ -7851,11 +7825,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // things that used it to use the new cast. This will also hack on CI, but it // will die soon. else if (!AI.hasOneUse()) { - AddUsesToWorkList(AI); // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. - CastInst *NewCast = new BitCastInst(New, AI.getType(), "tmpcast"); - InsertNewInstBefore(NewCast, AI); + Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); AI.replaceAllUsesWith(NewCast); } return ReplaceInstUsesWith(CI, New); @@ -7923,6 +7895,23 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, NumCastsRemoved); + case Instruction::UDiv: + case Instruction::URem: { + // UDiv and URem can be truncated if all the truncated bits are zero. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + if (BitWidth < OrigBitWidth) { + APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); + if (MaskedValueIsZero(I->getOperand(0), Mask) && + MaskedValueIsZero(I->getOperand(1), Mask)) { + return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, + NumCastsRemoved) && + CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, + NumCastsRemoved); + } + } + break; + } case Instruction::Shl: // If we are truncating the result of this SHL, and if it's a shift of a // constant amount, we can always perform a SHL in a smaller type. @@ -7993,7 +7982,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty, Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned) { if (Constant *C = dyn_cast(V)) - return Context->getConstantExprIntegerCast(C, Ty, + return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // Otherwise, it must be an instruction. @@ -8009,7 +7998,9 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, case Instruction::Xor: case Instruction::AShr: case Instruction::LShr: - case Instruction::Shl: { + case Instruction::Shl: + case Instruction::UDiv: + case Instruction::URem: { Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned); Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned); Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); @@ -8046,7 +8037,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, } default: // TODO: Can handle more cases here. - assert(0 && "Unreachable!"); + llvm_unreachable("Unreachable!"); break; } @@ -8089,13 +8080,14 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, SmallVectorImpl &NewIndices, const TargetData *TD, - LLVMContext* Context) { + LLVMContext *Context) { + if (!TD) return 0; if (!Ty->isSized()) return 0; // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - const Type *IntPtrTy = TD->getIntPtrType(); + const Type *IntPtrTy = TD->getIntPtrType(*Context); int64_t FirstIdx = 0; if (int64_t TySize = TD->getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -8110,7 +8102,7 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } - NewIndices.push_back(Context->getConstantInt(IntPtrTy, FirstIdx)); + NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); // Index into the types. If we fail, set OrigBase to null. while (Offset) { @@ -8124,14 +8116,14 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, "Offset must stay within the indexed type"); unsigned Elt = SL->getElementContainingOffset(Offset); - NewIndices.push_back(Context->getConstantInt(Type::Int32Ty, Elt)); + NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt)); Offset -= SL->getElementOffset(Elt); Ty = STy->getElementType(Elt); } else if (const ArrayType *AT = dyn_cast(Ty)) { uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(Context->getConstantInt(IntPtrTy,Offset/EltSize)); + NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); Offset %= EltSize; Ty = AT->getElementType(); } else { @@ -8154,7 +8146,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. - AddToWorkList(GEP); + Worklist.Add(GEP); CI.setOperand(0, GEP->getOperand(0)); return &CI; } @@ -8163,7 +8155,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // GEP computes a constant offset, see if we can convert these three // instructions into fewer. This typically happens with unions and other // non-type-safe code. - if (GEP->hasOneUse() && isa(GEP->getOperand(0))) { + if (TD && GEP->hasOneUse() && isa(GEP->getOperand(0))) { if (GEP->hasAllConstantIndices()) { // We are guaranteed to get a constant from EmitGEPOffset. ConstantInt *OffsetV = @@ -8179,10 +8171,10 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { // If we were able to index down into an element, create the GEP // and bitcast the result. This eliminates one bitcast, potentially // two. - Instruction *NGEP = GetElementPtrInst::Create(OrigBase, - NewIndices.begin(), - NewIndices.end(), ""); - InsertNewInstBefore(NGEP, CI); + Value *NGEP = cast(GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(OrigBase, + NewIndices.begin(), NewIndices.end()) : + Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end()); NGEP->takeName(GEP); if (isa(CI)) @@ -8214,10 +8206,8 @@ static bool isSafeIntegerType(const Type *Ty) { } } -/// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as -/// integer types. This function implements the common transforms for all those -/// cases. -/// @brief Implement the transforms common to CastInst with integer operands +/// commonIntCastTransforms - This function implements the common transforms +/// for trunc, zext, and sext. Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -8241,11 +8231,10 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // Attempt to propagate the cast into the instruction for int->int casts. int NumCastsRemoved = 0; - if (!isa(CI) && - // Only do this if the dest type is a simple type, don't convert the - // expression tree to something weird like i93 unless the source is also - // strange. - (isSafeIntegerType(DestTy->getScalarType()) || + // Only do this if the dest type is a simple type, don't convert the + // expression tree to something weird like i93 unless the source is also + // strange. + if ((isSafeIntegerType(DestTy->getScalarType()) || !isSafeIntegerType(SrcI->getType()->getScalarType())) && CanEvaluateInDifferentType(SrcI, DestTy, CI.getOpcode(), NumCastsRemoved)) { @@ -8261,7 +8250,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { default: // All the others use floating point so we shouldn't actually // get here because of the check above. - assert(0 && "Unknown cast type"); + llvm_unreachable("Unknown cast type"); case Instruction::Trunc: DoXForm = true; break; @@ -8307,8 +8296,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { } if (DoXForm) { - DOUT << "ICE: EvaluateInDifferentType converting expression type to avoid" - << " cast: " << CI; + DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type" + " to avoid cast: " << CI); Value *Res = EvaluateInDifferentType(SrcI, DestTy, CI.getOpcode() == Instruction::SExt); if (JustReplace) @@ -8317,9 +8306,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { assert(Res->getType() == DestTy); switch (CI.getOpcode()) { - default: assert(0 && "Unknown cast type!"); + default: llvm_unreachable("Unknown cast type!"); case Instruction::Trunc: - case Instruction::BitCast: // Just replace this cast with the result. return ReplaceInstUsesWith(CI, Res); case Instruction::ZExt: { @@ -8332,8 +8320,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { return ReplaceInstUsesWith(CI, Res); // We need to emit an AND to clear the high bits. - Constant *C = Context->getConstantInt(APInt::getLowBitsSet(DestBitSize, - SrcBitSize)); + Constant *C = ConstantInt::get(*Context, + APInt::getLowBitsSet(DestBitSize, SrcBitSize)); return BinaryOperator::CreateAnd(Res, C); } case Instruction::SExt: { @@ -8344,9 +8332,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { return ReplaceInstUsesWith(CI, Res); // We need to emit a cast to truncate, then a cast to sext. - return CastInst::Create(Instruction::SExt, - InsertCastBefore(Instruction::Trunc, Res, Src->getType(), - CI), DestTy); + return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy); } } } @@ -8362,16 +8348,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { case Instruction::Or: case Instruction::Xor: // If we are discarding information, rewrite. - if (DestBitSize <= SrcBitSize && DestBitSize != 1) { - // Don't insert two casts if they cannot be eliminated. We allow - // two casts to be inserted if the sizes are the same. This could - // only be converting signedness, which is a noop. - if (DestBitSize == SrcBitSize || - !ValueRequiresCast(CI.getOpcode(), Op1, DestTy,TD) || + if (DestBitSize < SrcBitSize && DestBitSize != 1) { + // Don't insert two casts unless at least one can be eliminated. + if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { - Instruction::CastOps opcode = CI.getOpcode(); - Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI); - Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI); + Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); + Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); return BinaryOperator::Create( cast(SrcI)->getOpcode(), Op0c, Op1c); } @@ -8380,62 +8362,25 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) { // cast (xor bool X, true) to int --> xor (cast bool X to int), 1 if (isa(CI) && SrcBitSize == 1 && SrcI->getOpcode() == Instruction::Xor && - Op1 == Context->getConstantIntTrue() && + Op1 == ConstantInt::getTrue(*Context) && (!Op0->hasOneUse() || !isa(Op0))) { - Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI); + Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName()); return BinaryOperator::CreateXor(New, - Context->getConstantInt(CI.getType(), 1)); - } - break; - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::SRem: - case Instruction::URem: - // If we are just changing the sign, rewrite. - if (DestBitSize == SrcBitSize) { - // Don't insert two casts if they cannot be eliminated. We allow - // two casts to be inserted if the sizes are the same. This could - // only be converting signedness, which is a noop. - if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) || - !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { - Value *Op0c = InsertCastBefore(Instruction::BitCast, - Op0, DestTy, *SrcI); - Value *Op1c = InsertCastBefore(Instruction::BitCast, - Op1, DestTy, *SrcI); - return BinaryOperator::Create( - cast(SrcI)->getOpcode(), Op0c, Op1c); - } + ConstantInt::get(CI.getType(), 1)); } break; - case Instruction::Shl: - // Allow changing the sign of the source operand. Do not allow - // changing the size of the shift, UNLESS the shift amount is a - // constant. We must not change variable sized shifts to a smaller - // size, because it is undefined to shift more bits out than exist - // in the value. - if (DestBitSize == SrcBitSize || - (DestBitSize < SrcBitSize && isa(Op1))) { - Instruction::CastOps opcode = (DestBitSize == SrcBitSize ? - Instruction::BitCast : Instruction::Trunc); - Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI); - Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI); + case Instruction::Shl: { + // Canonicalize trunc inside shl, if we can. + ConstantInt *CI = dyn_cast(Op1); + if (CI && DestBitSize < SrcBitSize && + CI->getLimitedValue(DestBitSize) < DestBitSize) { + Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); + Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName()); return BinaryOperator::CreateShl(Op0c, Op1c); } break; - case Instruction::AShr: - // If this is a signed shr, and if all bits shifted in are about to be - // truncated off, turn it into an unsigned shr to allow greater - // simplifications. - if (DestBitSize < SrcBitSize && - isa(Op1)) { - uint32_t ShiftAmt = cast(Op1)->getLimitedValue(SrcBitSize); - if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) { - // Insert the new logical shift right. - return BinaryOperator::CreateLShr(Op0, Op1); - } - } - break; + } } return 0; } @@ -8450,11 +8395,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits(); // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) - if (DestBitWidth == 1 && - isa(Ty) == isa(Src->getType())) { - Constant *One = Context->getConstantInt(Src->getType(), 1); - Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI); - Value *Zero = Context->getNullValue(Src->getType()); + if (DestBitWidth == 1) { + Constant *One = ConstantInt::get(Src->getType(), 1); + Src = Builder->CreateAnd(Src, One, "tmp"); + Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } @@ -8469,12 +8413,12 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth)); if (MaskedValueIsZero(ShiftOp, Mask)) { if (ShAmt >= DestBitWidth) // All zeros. - return ReplaceInstUsesWith(CI, Context->getNullValue(Ty)); + return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty)); // Okay, we can shrink this. Truncate the input, then return a new // shift. - Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI); - Value *V2 = Context->getConstantExprTrunc(ShAmtV, Ty); + Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName()); + Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty); return BinaryOperator::CreateLShr(V1, V2); } } @@ -8499,20 +8443,15 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (!DoXform) return ICI; Value *In = ICI->getOperand(0); - Value *Sh = Context->getConstantInt(In->getType(), + Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits()-1); - In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh, - In->getName()+".lobit"), - CI); + In = Builder->CreateLShr(In, Sh, In->getName()+".lobit"); if (In->getType() != CI.getType()) - In = CastInst::CreateIntegerCast(In, CI.getType(), - false/*ZExt*/, "tmp", &CI); + In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp"); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { - Constant *One = Context->getConstantInt(In->getType(), 1); - In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One, - In->getName()+".not"), - CI); + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, In->getName()+".not"); } return ReplaceInstUsesWith(CI, In); @@ -8545,8 +8484,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (Op1CV != 0 && (Op1CV != KnownZeroMask)) { // (X&4) == 2 --> false // (X&4) != 2 --> true - Constant *Res = Context->getConstantInt(Type::Int1Ty, isNE); - Res = Context->getConstantExprZExt(Res, CI.getType()); + Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE); + Res = ConstantExpr::getZExt(Res, CI.getType()); return ReplaceInstUsesWith(CI, Res); } @@ -8555,15 +8494,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, if (ShiftAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, - Context->getConstantInt(In->getType(), ShiftAmt), - In->getName()+".lobit"), CI); + In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), + In->getName()+".lobit"); } if ((Op1CV != 0) == isNE) { // Toggle the low bit. - Constant *One = Context->getConstantInt(In->getType(), 1); - In = BinaryOperator::CreateXor(In, One, "tmp"); - InsertNewInstBefore(cast(In), CI); + Constant *One = ConstantInt::get(In->getType(), 1); + In = Builder->CreateXor(In, One, "tmp"); } if (CI.getType() == In->getType()) @@ -8600,21 +8537,21 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // SrcSize > DstSize: trunc(a) & mask if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - Constant *AndConst = Context->getConstantInt(A->getType(), AndValue); - Instruction *And = - BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask"); - InsertNewInstBefore(And, CI); + Constant *AndConst = ConstantInt::get(A->getType(), AndValue); + Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); return new ZExtInst(And, CI.getType()); - } else if (SrcSize == DstSize) { + } + + if (SrcSize == DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); - return BinaryOperator::CreateAnd(A, Context->getConstantInt(A->getType(), + return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(), AndValue)); - } else if (SrcSize > DstSize) { - Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp"); - InsertNewInstBefore(Trunc, CI); + } + if (SrcSize > DstSize) { + Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp"); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); return BinaryOperator::CreateAnd(Trunc, - Context->getConstantInt(Trunc->getType(), + ConstantInt::get(Trunc->getType(), AndValue)); } } @@ -8631,8 +8568,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() && (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { - Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI); - Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI); + Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); return BinaryOperator::Create(Instruction::Or, LCast, RCast); } } @@ -8645,7 +8582,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (TI0->getType() == CI.getType()) return BinaryOperator::CreateAnd(TI0, - Context->getConstantExprZExt(C, CI.getType())); + ConstantExpr::getZExt(C, CI.getType())); } // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). @@ -8657,9 +8594,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (TruncInst *TI = dyn_cast(And->getOperand(0))) { Value *TI0 = TI->getOperand(0); if (TI0->getType() == CI.getType()) { - Constant *ZC = Context->getConstantExprZExt(C, CI.getType()); - Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp"); - InsertNewInstBefore(NewAnd, *And); + Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); + Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp"); return BinaryOperator::CreateXor(NewAnd, ZC); } } @@ -8674,14 +8610,14 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Src = CI.getOperand(0); // Canonicalize sign-extend from i1 to a select. - if (Src->getType() == Type::Int1Ty) + if (Src->getType() == Type::getInt1Ty(*Context)) return SelectInst::Create(Src, - Context->getConstantIntAllOnesValue(CI.getType()), - Context->getNullValue(CI.getType())); + Constant::getAllOnesValue(CI.getType()), + Constant::getNullValue(CI.getType())); // See if the value being truncated is already sign extended. If so, just // eliminate the trunc/sext pair. - if (getOpcode(Src) == Instruction::Trunc) { + if (Operator::getOpcode(Src) == Instruction::Trunc) { Value *Op = cast(Src)->getOperand(0); unsigned OpBits = Op->getType()->getScalarSizeInBits(); unsigned MidBits = Src->getType()->getScalarSizeInBits(); @@ -8729,9 +8665,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { unsigned MidSize = Src->getType()->getScalarSizeInBits(); unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; - Constant *ShAmtV = Context->getConstantInt(CI.getType(), ShAmt); - I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV, - CI.getName()), CI); + Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); + I = Builder->CreateShl(I, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(I, ShAmtV); } } @@ -8742,18 +8677,18 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { /// FitsInFPType - Return a Constant* for the specified FP constant if it fits /// in the specified FP type without changing its value. static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem, - LLVMContext* Context) { + LLVMContext *Context) { bool losesInfo; APFloat F = CFP->getValueAPF(); (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); if (!losesInfo) - return Context->getConstantFP(F); + return ConstantFP::get(*Context, F); return 0; } /// LookThroughFPExtensions - If this is an fp extension instruction, look /// through it until we get the source value. -static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) { +static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) { if (Instruction *I = dyn_cast(V)) if (I->getOpcode() == Instruction::FPExt) return LookThroughFPExtensions(I->getOperand(0), Context); @@ -8762,12 +8697,12 @@ static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) { // that can accurately represent it. This allows us to turn // (float)((double)X+2.0) into x+2.0f. if (ConstantFP *CFP = dyn_cast(V)) { - if (CFP->getType() == Type::PPC_FP128Ty) + if (CFP->getType() == Type::getPPC_FP128Ty(*Context)) return V; // No constant folding of this. // See if the value can be truncated to float and then reextended. if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context)) return V; - if (CFP->getType() == Type::DoubleTy) + if (CFP->getType() == Type::getDoubleTy(*Context)) return V; // Won't shrink. if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context)) return V; @@ -8804,10 +8739,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // the cast, do this xform. if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize && RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { - LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc, - CI.getType(), CI); - RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc, - CI.getType(), CI); + LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); + RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType()); return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc); } } @@ -8875,10 +8808,11 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { // trunc to be exposed to other transforms. Don't do this for extending // ptrtoint's, because we don't know if the target sign or zero extends its // pointers. - if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { - Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0), - TD->getIntPtrType(), - "tmp"), CI); + if (TD && + CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { + Value *P = Builder->CreatePtrToInt(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), + "tmp"); return new TruncInst(P, CI.getType()); } @@ -8891,65 +8825,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { // allows the trunc to be exposed to other transforms. Don't do this for // extending inttoptr's, because we don't know if the target sign or zero // extends to pointers. - if (CI.getOperand(0)->getType()->getScalarSizeInBits() > + if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) { - Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0), - TD->getIntPtrType(), - "tmp"), CI); + Value *P = Builder->CreateTrunc(CI.getOperand(0), + TD->getIntPtrType(CI.getContext()), "tmp"); return new IntToPtrInst(P, CI.getType()); } if (Instruction *I = commonCastTransforms(CI)) return I; - - const Type *DestPointee = cast(CI.getType())->getElementType(); - if (!DestPointee->isSized()) return 0; - - // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP. - ConstantInt *Cst; - Value *X; - if (match(CI.getOperand(0), m_Add(m_Cast(m_Value(X)), - m_ConstantInt(Cst)))) { - // If the source and destination operands have the same type, see if this - // is a single-index GEP. - if (X->getType() == CI.getType()) { - // Get the size of the pointee type. - uint64_t Size = TD->getTypeAllocSize(DestPointee); - - // Convert the constant to intptr type. - APInt Offset = Cst->getValue(); - Offset.sextOrTrunc(TD->getPointerSizeInBits()); - - // If Offset is evenly divisible by Size, we can do this xform. - if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){ - Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size)); - return GetElementPtrInst::Create(X, Context->getConstantInt(Offset)); - } - } - // TODO: Could handle other cases, e.g. where add is indexing into field of - // struct etc. - } else if (CI.getOperand(0)->hasOneUse() && - match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) { - // Otherwise, if this is inttoptr(add x, cst), try to turn this into an - // "inttoptr+GEP" instead of "add+intptr". - - // Get the size of the pointee type. - uint64_t Size = TD->getTypeAllocSize(DestPointee); - - // Convert the constant to intptr type. - APInt Offset = Cst->getValue(); - Offset.sextOrTrunc(TD->getPointerSizeInBits()); - - // If Offset is evenly divisible by Size, we can do this xform. - if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){ - Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size)); - - Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(), - "tmp"), CI); - return GetElementPtrInst::Create(P, - Context->getConstantInt(Offset), "tmp"); - } - } + return 0; } @@ -8960,10 +8845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { const Type *SrcTy = Src->getType(); const Type *DestTy = CI.getType(); - if (SrcTy->isInteger() && DestTy->isInteger()) { - if (Instruction *Result = commonIntCastTransforms(CI)) - return Result; - } else if (isa(SrcTy)) { + if (isa(SrcTy)) { if (Instruction *I = commonPointerCastTransforms(CI)) return I; } else { @@ -8987,8 +8869,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace()) return 0; - // If we are casting a malloc or alloca to a pointer to a type of the same + // If we are casting a alloca to a pointer to a type of the same // size, rewrite the allocation instruction to allocate the "right" type. + // There is no need to modify malloc calls because it is their bitcast that + // needs to be cleaned up. if (AllocationInst *AI = dyn_cast(Src)) if (Instruction *V = PromoteCastOfAllocation(CI, *AI)) return V; @@ -8996,7 +8880,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the source and destination are pointers, and this cast is equivalent // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep. // This can enhance SROA and other transforms that want type-safe pointers. - Constant *ZeroUInt = Context->getNullValue(Type::Int32Ty); + Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context)); unsigned NumZeros = 0; while (SrcElTy != DstElTy && isa(SrcElTy) && !isa(SrcElTy) && @@ -9008,8 +8892,30 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { SmallVector Idxs(NumZeros+1, ZeroUInt); - return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "", - ((Instruction*) NULL)); + return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "", + ((Instruction*) NULL)); + } + } + + if (const VectorType *DestVTy = dyn_cast(DestTy)) { + if (DestVTy->getNumElements() == 1) { + if (!isa(SrcTy)) { + Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); + return InsertElementInst::Create(UndefValue::get(DestTy), Elem, + Constant::getNullValue(Type::getInt32Ty(*Context))); + } + // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) + } + } + + if (const VectorType *SrcVTy = dyn_cast(SrcTy)) { + if (SrcVTy->getNumElements() == 1) { + if (!isa(DestTy)) { + Value *Elem = + Builder->CreateExtractElement(Src, + Constant::getNullValue(Type::getInt32Ty(*Context))); + return CastInst::Create(Instruction::BitCast, Elem, DestTy); + } } } @@ -9030,10 +8936,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Tmp->getOperand(0)->getType() == DestTy) || ((Tmp = dyn_cast(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = InsertCastBefore(Instruction::BitCast, - SVI->getOperand(0), DestTy, CI); - Value *RHS = InsertCastBefore(Instruction::BitCast, - SVI->getOperand(1), DestTy, CI); + Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); + Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); // Return a new shuffle vector. Use the same element ID's, as we // know the vector types match #elts. return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); @@ -9076,9 +8980,9 @@ static unsigned GetSelectFoldableOperands(Instruction *I) { /// GetSelectFoldableConstant - For the same transformation as the previous /// function, return the identity constant that goes into the select. static Constant *GetSelectFoldableConstant(Instruction *I, - LLVMContext* Context) { + LLVMContext *Context) { switch (I->getOpcode()) { - default: assert(0 && "This cannot happen!"); abort(); + default: llvm_unreachable("This cannot happen!"); case Instruction::Add: case Instruction::Sub: case Instruction::Or: @@ -9086,11 +8990,11 @@ static Constant *GetSelectFoldableConstant(Instruction *I, case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - return Context->getNullValue(I->getType()); + return Constant::getNullValue(I->getType()); case Instruction::And: - return Context->getAllOnesValue(I->getType()); + return Constant::getAllOnesValue(I->getType()); case Instruction::Mul: - return Context->getConstantInt(I->getType(), 1); + return ConstantInt::get(I->getType(), 1); } } @@ -9110,7 +9014,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // Fold this by inserting a select from the input values. SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName()+".v"); + FI->getOperand(0), SI.getName()+".v"); InsertNewInstBefore(NewSI, SI); return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); @@ -9160,7 +9064,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, else return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp); } - assert(0 && "Shouldn't get here"); + llvm_unreachable("Shouldn't get here"); return 0; } @@ -9202,7 +9106,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, NewSel->takeName(TVI); if (BinaryOperator *BO = dyn_cast(TVI)) return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - assert(0 && "Unknown instruction!!"); + llvm_unreachable("Unknown instruction!!"); } } } @@ -9231,7 +9135,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, NewSel->takeName(FVI); if (BinaryOperator *BO = dyn_cast(FVI)) return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - assert(0 && "Unknown instruction!!"); + llvm_unreachable("Unknown instruction!!"); } } } @@ -9266,7 +9170,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT)) return ReplaceInstUsesWith(SI, FalseVal); // X < C ? X : C-1 --> X > C-1 ? C-1 : X - Constant *AdjustedRHS = SubOne(CI, Context); + Constant *AdjustedRHS = SubOne(CI); if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { Pred = ICmpInst::getSwappedPredicate(Pred); @@ -9286,7 +9190,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT)) return ReplaceInstUsesWith(SI, FalseVal); // X > C ? X : C+1 --> X < C+1 ? C+1 : X - Constant *AdjustedRHS = AddOne(CI, Context); + Constant *AdjustedRHS = AddOne(CI); if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) || (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) { Pred = ICmpInst::getSwappedPredicate(Pred); @@ -9323,10 +9227,10 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) || (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { Value *In = ICI->getOperand(0); - Value *Sh = Context->getConstantInt(In->getType(), + Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits()-1); In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, - In->getName()+".lobit"), + In->getName()+".lobit"), *ICI); if (In->getType() != SI.getType()) In = CastInst::CreateIntegerCast(In, SI.getType(), @@ -9365,6 +9269,14 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, return Changed ? &SI : 0; } +/// isDefinedInBB - Return true if the value is an instruction defined in the +/// specified basicblock. +static bool isDefinedInBB(const Value *V, const BasicBlock *BB) { + const Instruction *I = dyn_cast(V); + return I != 0 && I->getParent() == BB; +} + + Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -9390,7 +9302,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return ReplaceInstUsesWith(SI, FalseVal); } - if (SI.getType() == Type::Int1Ty) { + if (SI.getType() == Type::getInt1Ty(*Context)) { if (ConstantInt *C = dyn_cast(TrueVal)) { if (C->getZExtValue()) { // Change: A = select B, true, C --> A = or B, C @@ -9438,26 +9350,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (ICmpInst *IC = dyn_cast(SI.getCondition())) { - - // (x ashr x, 31 - if (TrueValC->isAllOnesValue() && FalseValC->isZero()) - if (ConstantInt *CmpCst = dyn_cast(IC->getOperand(1))) { - if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) { - // The comparison constant and the result are not neccessarily the - // same width. Make an all-ones value by inserting a AShr. - Value *X = IC->getOperand(0); - uint32_t Bits = X->getType()->getScalarSizeInBits(); - Constant *ShAmt = Context->getConstantInt(X->getType(), Bits-1); - Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X, - ShAmt, "ones"); - InsertNewInstBefore(SRA, SI); - - // Then cast to the appropriate width. - return CastInst::CreateIntegerCast(SRA, SI.getType(), true); - } - } - - // If one of the constants is zero (we know they can't both be) and we // have an icmp instruction with zero, and we have an 'and' with the // non-constant value, eliminate this whole mess. This corresponds to @@ -9568,10 +9460,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select C, (add X, Y), (sub X, Z) Value *NegVal; // Compute -Z if (Constant *C = dyn_cast(SubOp->getOperand(1))) { - NegVal = Context->getConstantExprNeg(C); + NegVal = ConstantExpr::getNeg(C); } else { NegVal = InsertNewInstBefore( - BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI); + BinaryOperator::CreateNeg(SubOp->getOperand(1), + "tmp"), SI); } Value *NewTrueOp = OtherAddOp; @@ -9595,6 +9488,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return FoldI; } + // See if we can fold the select into a phi node. The true/false values have + // to be live in the predecessor blocks. If they are instructions in SI's + // block, we can't map to the predecessor. + if (isa(SI.getCondition()) && + (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) || + isa(SI.getTrueValue())) && + (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) || + isa(SI.getFalseValue()))) + if (Instruction *NV = FoldOpIntoPhi(SI)) + return NV; + if (BinaryOperator::isNot(CondVal)) { SI.setOperand(0, BinaryOperator::getNotArgument(CondVal)); SI.setOperand(1, FalseVal); @@ -9617,7 +9521,7 @@ static unsigned EnforceKnownAlignment(Value *V, User *U = dyn_cast(V); if (!U) return Align; - switch (getOpcode(U)) { + switch (Operator::getOpcode(U)) { default: break; case Instruction::BitCast: return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); @@ -9650,16 +9554,13 @@ static unsigned EnforceKnownAlignment(Value *V, Align = PrefAlign; } } - } else if (AllocationInst *AI = dyn_cast(V)) { - // If there is a requested alignment and if this is an alloca, round up. We - // don't do this for malloc, because some systems can't respect the request. - if (isa(AI)) { - if (AI->getAlignment() >= PrefAlign) - Align = AI->getAlignment(); - else { - AI->setAlignment(PrefAlign); - Align = PrefAlign; - } + } else if (AllocaInst *AI = dyn_cast(V)) { + // If there is a requested alignment and if this is an alloca, round up. + if (AI->getAlignment() >= PrefAlign) + Align = AI->getAlignment(); + else { + AI->setAlignment(PrefAlign); + Align = PrefAlign; } } @@ -9694,7 +9595,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { unsigned CopyAlign = MI->getAlignment(); if (CopyAlign < MinAlign) { - MI->setAlignment(MinAlign); + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + MinAlign, false)); return MI; } @@ -9715,7 +9617,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // Use an integer load+store unless we can find something better. Type *NewPtrTy = - Context->getPointerTypeUnqual(Context->getIntegerType(Size<<3)); + PointerType::getUnqual(IntegerType::get(*Context, Size<<3)); // Memcpy forces the use of i8* for the source and destination. That means // that if you're using memcpy to move one double around, you'll get a cast @@ -9725,7 +9627,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // integer datatype. if (Value *Op = getBitCastOperand(MI->getOperand(1))) { const Type *SrcETy = cast(Op->getType())->getElementType(); - if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { + if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { // The SrcETy might be something like {{{double}}} or [1 x double]. Rip // down through these levels if so. while (!SrcETy->isSingleValueType()) { @@ -9744,7 +9646,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { } if (SrcETy->isSingleValueType()) - NewPtrTy = Context->getPointerTypeUnqual(SrcETy); + NewPtrTy = PointerType::getUnqual(SrcETy); } } @@ -9754,28 +9656,29 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI); - Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI); + Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); InsertNewInstBefore(L, *MI); InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setOperand(3, Context->getNullValue(MemOpLength->getType())); + MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); return MI; } Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest()); if (MI->getAlignment() < Alignment) { - MI->setAlignment(Alignment); + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), + Alignment, false)); return MI; } // Extract the length and alignment and fill if they are constant. ConstantInt *LenC = dyn_cast(MI->getLength()); ConstantInt *FillC = dyn_cast(MI->getValue()); - if (!LenC || !FillC || FillC->getType() != Type::Int8Ty) + if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context)) return 0; uint64_t Len = LenC->getZExtValue(); Alignment = MI->getAlignment(); @@ -9785,21 +9688,21 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { - const Type *ITy = Context->getIntegerType(Len*8); // n=1 -> i8. + const Type *ITy = IntegerType::get(*Context, Len*8); // n=1 -> i8. Value *Dest = MI->getDest(); - Dest = InsertBitCastBefore(Dest, Context->getPointerTypeUnqual(ITy), *MI); + Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy)); // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - InsertNewInstBefore(new StoreInst(Context->getConstantInt(ITy, Fill), + InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill), Dest, false, Alignment), *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setLength(Context->getNullValue(LenC->getType())); + MI->setLength(Constant::getNullValue(LenC->getType())); return MI; } @@ -9820,8 +9723,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return &CI; } - - IntrinsicInst *II = dyn_cast(&CI); if (!II) return visitCallSite(&CI); @@ -9891,9 +9792,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC lvx -> load if the pointer is known aligned. // Turn X86 loadups -> load if the pointer is known aligned. if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - Value *Ptr = InsertBitCastBefore(II->getOperand(1), - Context->getPointerTypeUnqual(II->getType()), - CI); + Value *Ptr = Builder->CreateBitCast(II->getOperand(1), + PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } break; @@ -9902,8 +9802,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn stvx -> store if the pointer is known aligned. if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { const Type *OpPtrTy = - Context->getPointerTypeUnqual(II->getOperand(1)->getType()); - Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI); + PointerType::getUnqual(II->getOperand(1)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); return new StoreInst(II->getOperand(1), Ptr); } break; @@ -9913,8 +9813,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn X86 storeu -> store if the pointer is known aligned. if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { const Type *OpPtrTy = - Context->getPointerTypeUnqual(II->getOperand(2)->getType()); - Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI); + PointerType::getUnqual(II->getOperand(2)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); return new StoreInst(II->getOperand(2), Ptr); } break; @@ -9951,9 +9851,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI); - Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI); - Value *Result = Context->getUndef(Op0->getType()); + Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); + Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. Value *ExtractedElts[32]; @@ -9966,16 +9866,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Idx &= 31; // Match the hardware behavior. if (ExtractedElts[Idx] == 0) { - Instruction *Elt = - new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp"); - InsertNewInstBefore(Elt, CI); - ExtractedElts[Idx] = Elt; + ExtractedElts[Idx] = + Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false), + "tmp"); } // Insert this value into the result vector. - Result = InsertElementInst::Create(Result, ExtractedElts[Idx], - i, "tmp"); - InsertNewInstBefore(cast(Result), CI); + Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], + ConstantInt::get(Type::getInt32Ty(*Context), i, false), + "tmp"); } return CastInst::Create(Instruction::BitCast, Result, CI.getType()); } @@ -9999,7 +9899,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { TerminatorInst *TI = II->getParent()->getTerminator(); bool CannotRemove = false; for (++BI; &*BI != TI; ++BI) { - if (isa(BI)) { + if (isa(BI) || isMalloc(BI)) { CannotRemove = true; break; } @@ -10055,7 +9955,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, const Type* DstTy = cast(CI->getType())->getElementType(); if (!SrcTy->isSized() || !DstTy->isSized()) return false; - if (TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) + if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) return false; return true; } @@ -10076,11 +9976,13 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { Instruction *OldCall = CS.getInstruction(); // If the call and callee calling conventions don't match, this call must // be unreachable, as the call is undefined. - new StoreInst(Context->getConstantIntTrue(), - Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), + new StoreInst(ConstantInt::getTrue(*Context), + UndefValue::get(Type::getInt1PtrTy(*Context)), OldCall); - if (!OldCall->use_empty()) - OldCall->replaceAllUsesWith(Context->getUndef(OldCall->getType())); + // If OldCall dues not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!OldCall->getType()->isVoidTy()) + OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType())); if (isa(OldCall)) // Not worth removing an invoke here. return EraseInstFromFunction(*OldCall); return 0; @@ -10090,18 +9992,20 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // This instruction is not reachable, just remove it. We insert a store to // undef so that we know that this code is not reachable, despite the fact // that we can't modify the CFG here. - new StoreInst(Context->getConstantIntTrue(), - Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), + new StoreInst(ConstantInt::getTrue(*Context), + UndefValue::get(Type::getInt1PtrTy(*Context)), CS.getInstruction()); - if (!CS.getInstruction()->use_empty()) + // If CS dues not return void then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!CS.getInstruction()->getType()->isVoidTy()) CS.getInstruction()-> - replaceAllUsesWith(Context->getUndef(CS.getInstruction()->getType())); + replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType())); if (InvokeInst *II = dyn_cast(CS.getInstruction())) { // Don't break the CFG, insert a dummy cond branch. BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), - Context->getConstantIntTrue(), II); + ConstantInt::getTrue(*Context), II); } return EraseInstFromFunction(*CS.getInstruction()); } @@ -10165,13 +10069,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Callee->isDeclaration() && // Conversion is ok if changing from one pointer type to another or from // a pointer to an integer of the same size. - !((isa(OldRetTy) || OldRetTy == TD->getIntPtrType()) && - (isa(NewRetTy) || NewRetTy == TD->getIntPtrType()))) + !((isa(OldRetTy) || !TD || + OldRetTy == TD->getIntPtrType(Caller->getContext())) && + (isa(NewRetTy) || !TD || + NewRetTy == TD->getIntPtrType(Caller->getContext())))) return false; // Cannot transform this return value. if (!Caller->use_empty() && // void -> non-void is handled specially - NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy)) + !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy)) return false; // Cannot transform this return value. if (!CallerPAL.isEmpty() && !Caller->use_empty()) { @@ -10212,8 +10118,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Converting from one pointer type to another or between a pointer and an // integer of the same size is safe even if we do not have a body. bool isConvertible = ActTy == ParamTy || - ((isa(ParamTy) || ParamTy == TD->getIntPtrType()) && - (isa(ActTy) || ActTy == TD->getIntPtrType())); + (TD && ((isa(ParamTy) || + ParamTy == TD->getIntPtrType(Caller->getContext())) && + (isa(ActTy) || + ActTy == TD->getIntPtrType(Caller->getContext())))); if (Callee->isDeclaration() && !isConvertible) return false; } @@ -10260,8 +10168,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } else { Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, ParamTy, false); - CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp"); - Args.push_back(InsertNewInstBefore(NewCast, *Caller)); + Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp")); } // Add any parameter attributes. @@ -10270,26 +10177,24 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { } // If the function takes more arguments than the call was taking, add them - // now... + // now. for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) - Args.push_back(Context->getNullValue(FT->getParamType(i))); + Args.push_back(Constant::getNullValue(FT->getParamType(i))); - // If we are removing arguments to the function, emit an obnoxious warning... + // If we are removing arguments to the function, emit an obnoxious warning. if (FT->getNumParams() < NumActualArgs) { if (!FT->isVarArg()) { - cerr << "WARNING: While resolving call to function '" - << Callee->getName() << "' arguments were dropped!\n"; + errs() << "WARNING: While resolving call to function '" + << Callee->getName() << "' arguments were dropped!\n"; } else { - // Add all of the arguments in their promoted form to the arg list... + // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { const Type *PTy = getPromotedType((*AI)->getType()); if (PTy != (*AI)->getType()) { // Must promote to pass through va_arg area! - Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, - PTy, false); - Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp"); - InsertNewInstBefore(Cast, *Caller); - Args.push_back(Cast); + Instruction::CastOps opcode = + CastInst::getCastOpcode(*AI, false, PTy, false); + Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp")); } else { Args.push_back(*AI); } @@ -10304,10 +10209,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Attributes FnAttrs = CallerPAL.getFnAttributes()) attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); - if (NewRetTy == Type::VoidTy) + if (NewRetTy->isVoidTy()) Caller->setName(""); // Void type should not have a name. - const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),attrVec.end()); + const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), + attrVec.end()); Instruction *NC; if (InvokeInst *II = dyn_cast(Caller)) { @@ -10329,7 +10235,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Insert a cast of the return type as necessary. Value *NV = NC; if (OldRetTy != NV->getType() && !Caller->use_empty()) { - if (NV->getType() != Type::VoidTy) { + if (!NV->getType()->isVoidTy()) { Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false, OldRetTy, false); NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); @@ -10343,16 +10249,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Otherwise, it's a call, just insert cast right after the call instr InsertNewInstBefore(NC, *Caller); } - AddUsersToWorkList(*Caller); + Worklist.AddUsersToWorkList(*Caller); } else { - NV = Context->getUndef(Caller->getType()); + NV = UndefValue::get(Caller->getType()); } } - if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + + if (!Caller->use_empty()) Caller->replaceAllUsesWith(NV); - Caller->eraseFromParent(); - RemoveFromWorkList(Caller); + + EraseInstFromFunction(*Caller); return true; } @@ -10469,14 +10376,14 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { // Replace the trampoline call with a direct call. Let the generic // code sort out any function type mismatches. - FunctionType *NewFTy = - Context->getFunctionType(FTy->getReturnType(), NewTypes, + FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg()); Constant *NewCallee = - NestF->getType() == Context->getPointerTypeUnqual(NewFTy) ? - NestF : Context->getConstantExprBitCast(NestF, - Context->getPointerTypeUnqual(NewFTy)); - const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),NewAttrs.end()); + NestF->getType() == PointerType::getUnqual(NewFTy) ? + NestF : ConstantExpr::getBitCast(NestF, + PointerType::getUnqual(NewFTy)); + const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), + NewAttrs.end()); Instruction *NewCaller; if (InvokeInst *II = dyn_cast(Caller)) { @@ -10495,10 +10402,10 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { setCallingConv(cast(Caller)->getCallingConv()); cast(NewCaller)->setAttributes(NewPAL); } - if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + if (!Caller->getType()->isVoidTy()) Caller->replaceAllUsesWith(NewCaller); Caller->eraseFromParent(); - RemoveFromWorkList(Caller); + Worklist.Remove(Caller); return 0; } } @@ -10508,13 +10415,13 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { // code sort out any function type mismatches. Constant *NewCallee = NestF->getType() == PTy ? NestF : - Context->getConstantExprBitCast(NestF, PTy); + ConstantExpr::getBitCast(NestF, PTy); CS.setCalledFunction(NewCallee); return CS.getInstruction(); } -/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)] -/// and if a/b/c/d and the add's all have a single use, turn this into two phi's +/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] +/// and if a/b/c and the add's all have a single use, turn this into a phi /// and a single binop. Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Instruction *FirstInst = cast(PN.getIncomingValue(0)); @@ -10526,8 +10433,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { const Type *LHSType = LHSVal->getType(); const Type *RHSType = RHSVal->getType(); - // Scan to see if all operands are the same opcode, all have one use, and all - // kill their operands (i.e. the operands have one use). + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { Instruction *I = dyn_cast(PN.getIncomingValue(i)); if (!I || I->getOpcode() != Opc || !I->hasOneUse() || @@ -10547,6 +10453,13 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { if (I->getOperand(0) != LHSVal) LHSVal = 0; if (I->getOperand(1) != RHSVal) RHSVal = 0; } + + // If both LHS and RHS would need a PHI, don't do this transformation, + // because it would increase the number of PHIs entering the block, + // which leads to higher register pressure. This is especially + // bad when the PHIs are in the header of a loop. + if (!LHSVal && !RHSVal) + return 0; // Otherwise, this is safe to transform! @@ -10589,8 +10502,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { if (BinaryOperator *BinOp = dyn_cast(FirstInst)) return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal); CmpInst *CIOp = cast(FirstInst); - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, - RHSVal); + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + LHSVal, RHSVal); } Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { @@ -10601,9 +10514,13 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // This is true if all GEP bases are allocas and if all indices into them are // constants. bool AllBasePointersAreAllocas = true; + + // We don't want to replace this phi if the replacement would require + // more than one phi, which leads to higher register pressure. This is + // especially bad when the PHIs are in the header of a loop. + bool NeededPhi = false; - // Scan to see if all operands are the same opcode, all have one use, and all - // kill their operands (i.e. the operands have one use). + // Scan to see if all operands are the same opcode, and all have one use. for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { GetElementPtrInst *GEP= dyn_cast(PN.getIncomingValue(i)); if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || @@ -10632,7 +10549,16 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) return 0; + + // If we already needed a PHI for an earlier operand, and another operand + // also requires a PHI, we'd be introducing more PHIs than we're + // eliminating, which increases register pressure on entry to the PHI's + // block. + if (NeededPhi) + return 0; + FixedOperands[op] = 0; // Needs a PHI. + NeededPhi = true; } } @@ -10678,8 +10604,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { } Value *Base = FixedOperands[0]; - return GetElementPtrInst::Create(Base, FixedOperands.begin()+1, - FixedOperands.end()); + return cast(FirstInst)->isInBounds() ? + GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, + FixedOperands.end()) : + GetElementPtrInst::Create(Base, FixedOperands.begin()+1, + FixedOperands.end()); } @@ -10836,7 +10765,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { if (BinaryOperator *BinOp = dyn_cast(FirstInst)) return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp); if (CmpInst *CIOp = dyn_cast(FirstInst)) - return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), + return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), PhiVal, ConstantOp); assert(isa(FirstInst) && "Unknown operation"); @@ -10929,7 +10858,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { SmallPtrSet PotentiallyDeadPHIs; PotentiallyDeadPHIs.insert(&PN); if (DeadPHICycle(PU, PotentiallyDeadPHIs)) - return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType())); + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); } // If this phi has a single use, and if that use just computes a value for @@ -10941,7 +10870,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (PHIUser->hasOneUse() && (isa(PHIUser) || isa(PHIUser)) && PHIUser->use_back() == &PN) { - return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType())); + return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType())); } } @@ -10982,30 +10911,14 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { return 0; } -static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy, - Instruction *InsertPoint, - InstCombiner *IC) { - unsigned PtrSize = DTy->getScalarSizeInBits(); - unsigned VTySize = V->getType()->getScalarSizeInBits(); - // We must cast correctly to the pointer type. Ensure that we - // sign extend the integer value if it is smaller as this is - // used for address computation. - Instruction::CastOps opcode = - (VTySize < PtrSize ? Instruction::SExt : - (VTySize == PtrSize ? Instruction::BitCast : Instruction::Trunc)); - return IC->InsertCastBefore(opcode, V, DTy, *InsertPoint); -} - - Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); - // Is it 'getelementptr %P, i32 0' or 'getelementptr %P' - // If so, eliminate the noop. + // Eliminate 'getelementptr %P, i32 0' and 'getelementptr %P', they are noops. if (GEP.getNumOperands() == 1) return ReplaceInstUsesWith(GEP, PtrOp); if (isa(GEP.getOperand(0))) - return ReplaceInstUsesWith(GEP, Context->getUndef(GEP.getType())); + return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); bool HasZeroPointerIndex = false; if (Constant *C = dyn_cast(GEP.getOperand(1))) @@ -11015,78 +10928,48 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return ReplaceInstUsesWith(GEP, PtrOp); // Eliminate unneeded casts for indices. - bool MadeChange = false; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end(); - i != e; ++i, ++GTI) { - if (isa(*GTI)) { - if (CastInst *CI = dyn_cast(*i)) { - if (CI->getOpcode() == Instruction::ZExt || - CI->getOpcode() == Instruction::SExt) { - const Type *SrcTy = CI->getOperand(0)->getType(); - // We can eliminate a cast from i32 to i64 iff the target - // is a 32-bit pointer target. - if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) { - MadeChange = true; - *i = CI->getOperand(0); - } - } - } + if (TD) { + bool MadeChange = false; + unsigned PtrSize = TD->getPointerSizeInBits(); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); + I != E; ++I, ++GTI) { + if (!isa(*GTI)) continue; + // If we are using a wider index than needed for this platform, shrink it - // to what we need. If narrower, sign-extend it to what we need. - // If the incoming value needs a cast instruction, - // insert it. This explicit cast can make subsequent optimizations more - // obvious. - Value *Op = *i; - if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) { - if (Constant *C = dyn_cast(Op)) { - *i = Context->getConstantExprTrunc(C, TD->getIntPtrType()); - MadeChange = true; - } else { - Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(), - GEP); - *i = Op; - MadeChange = true; - } - } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) { - if (Constant *C = dyn_cast(Op)) { - *i = Context->getConstantExprSExt(C, TD->getIntPtrType()); - MadeChange = true; - } else { - Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(), - GEP); - *i = Op; - MadeChange = true; - } - } + // to what we need. If narrower, sign-extend it to what we need. This + // explicit cast can make subsequent optimizations more obvious. + unsigned OpBits = cast((*I)->getType())->getBitWidth(); + if (OpBits == PtrSize) + continue; + + *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); + MadeChange = true; } + if (MadeChange) return &GEP; } - if (MadeChange) return &GEP; // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. // - SmallVector SrcGEPOperands; - if (User *Src = dyn_castGetElementPtr(PtrOp)) - SrcGEPOperands.append(Src->op_begin(), Src->op_end()); - - if (!SrcGEPOperands.empty()) { + if (GEPOperator *Src = dyn_cast(PtrOp)) { // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. // - if (isa(SrcGEPOperands[0]) && - cast(SrcGEPOperands[0])->getNumOperands() == 2) - return 0; // Wait until our source is folded to completion. + if (GetElementPtrInst *SrcGEP = + dyn_cast(Src->getOperand(0))) + if (SrcGEP->getNumOperands() == 2) + return 0; // Wait until our source is folded to completion. SmallVector Indices; // Find out whether the last index in the source GEP is a sequential idx. bool EndsWithSequential = false; - for (gep_type_iterator I = gep_type_begin(*cast(PtrOp)), - E = gep_type_end(*cast(PtrOp)); I != E; ++I) + for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); + I != E; ++I) EndsWithSequential = !isa(*I); // Can we combine the two pointer arithmetics offsets? @@ -11094,98 +10977,68 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Replace: gep (gep %P, long B), long A, ... // With: T = long A+B; gep %P, T, ... // - Value *Sum, *SO1 = SrcGEPOperands.back(), *GO1 = GEP.getOperand(1); - if (SO1 == Context->getNullValue(SO1->getType())) { + Value *Sum; + Value *SO1 = Src->getOperand(Src->getNumOperands()-1); + Value *GO1 = GEP.getOperand(1); + if (SO1 == Constant::getNullValue(SO1->getType())) { Sum = GO1; - } else if (GO1 == Context->getNullValue(GO1->getType())) { + } else if (GO1 == Constant::getNullValue(GO1->getType())) { Sum = SO1; } else { - // If they aren't the same type, convert both to an integer of the - // target's pointer size. - if (SO1->getType() != GO1->getType()) { - if (Constant *SO1C = dyn_cast(SO1)) { - SO1 = - Context->getConstantExprIntegerCast(SO1C, GO1->getType(), true); - } else if (Constant *GO1C = dyn_cast(GO1)) { - GO1 = - Context->getConstantExprIntegerCast(GO1C, SO1->getType(), true); - } else { - unsigned PS = TD->getPointerSizeInBits(); - if (TD->getTypeSizeInBits(SO1->getType()) == PS) { - // Convert GO1 to SO1's type. - GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this); - - } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) { - // Convert SO1 to GO1's type. - SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this); - } else { - const Type *PT = TD->getIntPtrType(); - SO1 = InsertCastToIntPtrTy(SO1, PT, &GEP, this); - GO1 = InsertCastToIntPtrTy(GO1, PT, &GEP, this); - } - } - } - if (isa(SO1) && isa(GO1)) - Sum = Context->getConstantExprAdd(cast(SO1), - cast(GO1)); - else { - Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); - InsertNewInstBefore(cast(Sum), GEP); - } + // If they aren't the same type, then the input hasn't been processed + // by the loop above yet (which canonicalizes sequential index types to + // intptr_t). Just avoid transforming this until the input has been + // normalized. + if (SO1->getType() != GO1->getType()) + return 0; + Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); } - // Recycle the GEP we already have if possible. - if (SrcGEPOperands.size() == 2) { - GEP.setOperand(0, SrcGEPOperands[0]); + // Update the GEP in place if possible. + if (Src->getNumOperands() == 2) { + GEP.setOperand(0, Src->getOperand(0)); GEP.setOperand(1, Sum); return &GEP; - } else { - Indices.insert(Indices.end(), SrcGEPOperands.begin()+1, - SrcGEPOperands.end()-1); - Indices.push_back(Sum); - Indices.insert(Indices.end(), GEP.op_begin()+2, GEP.op_end()); } + Indices.append(Src->op_begin()+1, Src->op_end()-1); + Indices.push_back(Sum); + Indices.append(GEP.op_begin()+2, GEP.op_end()); } else if (isa(*GEP.idx_begin()) && cast(*GEP.idx_begin())->isNullValue() && - SrcGEPOperands.size() != 1) { + Src->getNumOperands() != 1) { // Otherwise we can do the fold if the first index of the GEP is a zero - Indices.insert(Indices.end(), SrcGEPOperands.begin()+1, - SrcGEPOperands.end()); - Indices.insert(Indices.end(), GEP.idx_begin()+1, GEP.idx_end()); + Indices.append(Src->op_begin()+1, Src->op_end()); + Indices.append(GEP.idx_begin()+1, GEP.idx_end()); } if (!Indices.empty()) - return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(), - Indices.end(), GEP.getName()); - - } else if (GlobalValue *GV = dyn_cast(PtrOp)) { - // GEP of global variable. If all of the indices for this GEP are - // constants, we can promote this to a constexpr instead of an instruction. - - // Scan for nonconstants... - SmallVector Indices; - User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); - for (; I != E && isa(*I); ++I) - Indices.push_back(cast(*I)); - - if (I == E) { // If they are all constants... - Constant *CE = Context->getConstantExprGetElementPtr(GV, - &Indices[0],Indices.size()); - - // Replace all uses of the GEP with the new constexpr... - return ReplaceInstUsesWith(GEP, CE); - } - } else if (Value *X = getBitCastOperand(PtrOp)) { // Is the operand a cast? - if (!isa(X->getType())) { - // Not interesting. Source pointer must be a cast from pointer. - } else if (HasZeroPointerIndex) { - // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... - // into : GEP [10 x i8]* X, i32 0, ... - // - // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... - // into : GEP i8* X, ... - // - // This occurs when the program declares an array extern like "int X[];" + return (cast(&GEP)->isInBounds() && + Src->isInBounds()) ? + GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), + Indices.end(), GEP.getName()) : + GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), + Indices.end(), GEP.getName()); + } + + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). + if (Value *X = getBitCastOperand(PtrOp)) { + assert(isa(X->getType()) && "Must be cast from pointer"); + + // If the input bitcast is actually "bitcast(bitcast(x))", then we don't + // want to change the gep until the bitcasts are eliminated. + if (getBitCastOperand(X)) { + Worklist.AddValue(PtrOp); + return 0; + } + + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... + // into : GEP [10 x i8]* X, i32 0, ... + // + // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... + // into : GEP i8* X, ... + // + // This occurs when the program declares an array extern like "int X[];" + if (HasZeroPointerIndex) { const PointerType *CPTy = cast(PtrOp->getType()); const PointerType *XTy = cast(X->getType()); if (const ArrayType *CATy = @@ -11194,10 +11047,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (CATy->getElementType() == XTy->getElementType()) { // -> GEP i8* X, ... SmallVector Indices(GEP.idx_begin()+1, GEP.idx_end()); - return GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), - GEP.getName()); - } else if (const ArrayType *XATy = - dyn_cast(XTy->getElementType())) { + return cast(&GEP)->isInBounds() ? + GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(), + GEP.getName()) : + GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), + GEP.getName()); + } + + if (const ArrayType *XATy = dyn_cast(XTy->getElementType())){ // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { // -> GEP [10 x i8]* X, i32 0, ... @@ -11216,16 +11073,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast const Type *SrcElTy = cast(X->getType())->getElementType(); const Type *ResElTy=cast(PtrOp->getType())->getElementType(); - if (isa(SrcElTy) && + if (TD && isa(SrcElTy) && TD->getTypeAllocSize(cast(SrcElTy)->getElementType()) == TD->getTypeAllocSize(ResElTy)) { Value *Idx[2]; - Idx[0] = Context->getNullValue(Type::Int32Ty); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); Idx[1] = GEP.getOperand(1); - Value *V = InsertNewInstBefore( - GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP); + Value *NewGEP = cast(&GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : + Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); // V and GEP are both pointer types --> BitCast - return new BitCastInst(V, GEP.getType()); + return new BitCastInst(NewGEP, GEP.getType()); } // Transform things like: @@ -11233,7 +11091,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (isa(SrcElTy) && ResElTy == Type::Int8Ty) { + if (TD && isa(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) { uint64_t ArrayEltSize = TD->getTypeAllocSize(cast(SrcElTy)->getElementType()); @@ -11243,17 +11101,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { ConstantInt *Scale = 0; if (ArrayEltSize == 1) { NewIdx = GEP.getOperand(1); - Scale = - Context->getConstantInt(cast(NewIdx->getType()), 1); + Scale = ConstantInt::get(cast(NewIdx->getType()), 1); } else if (ConstantInt *CI = dyn_cast(GEP.getOperand(1))) { - NewIdx = Context->getConstantInt(CI->getType(), 1); + NewIdx = ConstantInt::get(CI->getType(), 1); Scale = CI; } else if (Instruction *Inst =dyn_cast(GEP.getOperand(1))){ if (Inst->getOpcode() == Instruction::Shl && isa(Inst->getOperand(1))) { ConstantInt *ShAmt = cast(Inst->getOperand(1)); uint32_t ShAmtVal = ShAmt->getLimitedValue(64); - Scale = Context->getConstantInt(cast(Inst->getType()), + Scale = ConstantInt::get(cast(Inst->getType()), 1ULL << ShAmtVal); NewIdx = Inst->getOperand(0); } else if (Inst->getOpcode() == Instruction::Mul && @@ -11269,23 +11126,21 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // operation after making sure Scale doesn't have the sign bit set. if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL && Scale->getZExtValue() % ArrayEltSize == 0) { - Scale = Context->getConstantInt(Scale->getType(), + Scale = ConstantInt::get(Scale->getType(), Scale->getZExtValue() / ArrayEltSize); if (Scale->getZExtValue() != 1) { - Constant *C = - Context->getConstantExprIntegerCast(Scale, NewIdx->getType(), + Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(), false /*ZExt*/); - Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale"); - NewIdx = InsertNewInstBefore(Sc, GEP); + NewIdx = Builder->CreateMul(NewIdx, C, "idxscale"); } // Insert the new GEP instruction. Value *Idx[2]; - Idx[0] = Context->getNullValue(Type::Int32Ty); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context)); Idx[1] = NewIdx; - Instruction *NewGEP = - GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()); - NewGEP = InsertNewInstBefore(NewGEP, GEP); + Value *NewGEP = cast(&GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : + Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return new BitCastInst(NewGEP, GEP.getType()); } @@ -11294,12 +11149,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } /// See if we can simplify: - /// X = bitcast A to B* + /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> /// into a gep of the original struct. This is important for SROA and alias /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast(PtrOp)) { - if (!isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + if (TD && + !isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { // Determine how much the GEP moves the pointer. We are guaranteed to get // a constant back from EmitGEPOffset. ConstantInt *OffsetV = @@ -11311,7 +11167,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (Offset == 0) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa(BCI->getOperand(0))) { + if (isa(BCI->getOperand(0)) || + isMalloc(BCI->getOperand(0))) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -11332,11 +11189,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { const Type *InTy = cast(BCI->getOperand(0)->getType())->getElementType(); if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) { - Instruction *NGEP = - GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(), - NewIndices.end()); - if (NGEP->getType() == GEP.getType()) return NGEP; - InsertNewInstBefore(NGEP, GEP); + Value *NGEP = cast(&GEP)->isInBounds() ? + Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()) : + Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), + NewIndices.end()); + + if (NGEP->getType() == GEP.getType()) + return ReplaceInstUsesWith(GEP, NGEP); NGEP->takeName(&GEP); return new BitCastInst(NGEP, GEP.getType()); } @@ -11351,18 +11211,17 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { const Type *NewTy = - Context->getArrayType(AI.getAllocatedType(), C->getZExtValue()); + ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocationInst *New = 0; // Create and insert the replacement instruction... if (isa(AI)) - New = new MallocInst(NewTy, 0, AI.getAlignment(), AI.getName()); + New = Builder->CreateMalloc(NewTy, 0, AI.getName()); else { assert(isa(AI) && "Unknown type of allocation inst!"); - New = new AllocaInst(NewTy, 0, AI.getAlignment(), AI.getName()); + New = Builder->CreateAlloca(NewTy, 0, AI.getName()); } - - InsertNewInstBefore(New, AI); + New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info @@ -11373,27 +11232,27 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) { // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // - Value *NullIdx = Context->getNullValue(Type::Int32Ty); + Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context)); Value *Idx[2]; Idx[0] = NullIdx; Idx[1] = NullIdx; - Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2, - New->getName()+".sub", It); + Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, + New->getName()+".sub", It); // Now make everything use the getelementptr instead of the original // allocation. return ReplaceInstUsesWith(AI, V); } else if (isa(AI.getArraySize())) { - return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType())); + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } } - if (isa(AI) && AI.getAllocatedType()->isSized()) { + if (TD && isa(AI) && AI.getAllocatedType()->isSized()) { // If alloca'ing a zero byte object, replace the alloca with a null pointer. // Note that we only do this for alloca's, because malloc should allocate // and return a unique pointer, even for a zero byte allocation. if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) - return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType())); + return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) @@ -11409,8 +11268,8 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { // free undef -> unreachable. if (isa(Op)) { // Insert a new store to null because we cannot modify the CFG here. - new StoreInst(Context->getConstantIntTrue(), - Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), &FI); + new StoreInst(ConstantInt::getTrue(*Context), + UndefValue::get(Type::getInt1PtrTy(*Context)), &FI); return EraseInstFromFunction(FI); } @@ -11428,7 +11287,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { // Change free (gep X, 0,0,0,0) into free(X) if (GetElementPtrInst *GEPI = dyn_cast(Op)) { if (GEPI->hasAllZeroIndices()) { - AddToWorkList(GEPI); + Worklist.Add(GEPI); FI.setOperand(0, GEPI->getOperand(0)); return &FI; } @@ -11440,6 +11299,21 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) { EraseInstFromFunction(FI); return EraseInstFromFunction(*MI); } + if (isMalloc(Op)) { + if (CallInst* CI = extractMallocCallFromBitCast(Op)) { + if (Op->hasOneUse() && CI->hasOneUse()) { + EraseInstFromFunction(FI); + EraseInstFromFunction(*CI); + return EraseInstFromFunction(*cast(Op)); + } + } else { + // Op is a call to malloc + if (Op->hasOneUse()) { + EraseInstFromFunction(FI); + return EraseInstFromFunction(*cast(Op)); + } + } + } return 0; } @@ -11450,7 +11324,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, const TargetData *TD) { User *CI = cast(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); - LLVMContext* Context = IC.getContext(); + LLVMContext *Context = IC.getContext(); if (TD) { if (ConstantExpr *CE = dyn_cast(CI)) { @@ -11479,7 +11353,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, SingleChar = 0; StrVal = (StrVal << 8) | SingleChar; } - Value *NL = Context->getConstantInt(StrVal); + Value *NL = ConstantInt::get(*Context, StrVal); return IC.ReplaceInstUsesWith(LI, NL); } } @@ -11505,26 +11379,26 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, if (Constant *CSrc = dyn_cast(CastOp)) if (ASrcTy->getNumElements() != 0) { Value *Idxs[2]; - Idxs[0] = Idxs[1] = Context->getNullValue(Type::Int32Ty); - CastOp = Context->getConstantExprGetElementPtr(CSrc, Idxs, 2); + Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context)); + CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2); SrcTy = cast(CastOp->getType()); SrcPTy = SrcTy->getElementType(); } - if ((SrcPTy->isInteger() || isa(SrcPTy) || + if (IC.getTargetData() && + (SrcPTy->isInteger() || isa(SrcPTy) || isa(SrcPTy)) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. (isa(SrcPTy) == isa(LI.getType())) && - IC.getTargetData().getTypeSizeInBits(SrcPTy) == - IC.getTargetData().getTypeSizeInBits(DestPTy)) { + IC.getTargetData()->getTypeSizeInBits(SrcPTy) == + IC.getTargetData()->getTypeSizeInBits(DestPTy)) { // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. - Value *NewLoad = IC.InsertNewInstBefore(new LoadInst(CastOp, - CI->getName(), - LI.isVolatile()),LI); + Value *NewLoad = + IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } @@ -11537,14 +11411,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); // Attempt to improve the alignment. - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); - if (KnownAlign > - (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : - LI.getAlignment())) - LI.setAlignment(KnownAlign); - - // load (cast X) --> cast (load X) iff safe + if (TD) { + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); + if (KnownAlign > + (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : + LI.getAlignment())) + LI.setAlignment(KnownAlign); + } + + // load (cast X) --> cast (load X) iff safe. if (isa(Op)) if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) return Res; @@ -11562,29 +11438,28 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (GetElementPtrInst *GEPI = dyn_cast(Op)) { const Value *GEPI0 = GEPI->getOperand(0); // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(GEPI0) && - cast(GEPI0->getType())->getAddressSpace() == 0) { + if (isa(GEPI0) && GEPI->getPointerAddressSpace() == 0){ // Insert a new store to null instruction before the load to indicate // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. - new StoreInst(Context->getUndef(LI.getType()), - Context->getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } if (Constant *C = dyn_cast(Op)) { // load null/undef -> undef // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(C) || (C->isNullValue() && - cast(Op->getType())->getAddressSpace() == 0)) { + if (isa(C) || + (C->isNullValue() && LI.getPointerAddressSpace() == 0)) { // Insert a new store to null instruction before the load to indicate that // this code is not reachable. We do this instead of inserting an // unreachable instruction directly because we cannot modify the CFG. - new StoreInst(Context->getUndef(LI.getType()), - Context->getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } // Instcombine load (constant global) into the value loaded. @@ -11605,9 +11480,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // that this code is not reachable. We do this instead of inserting // an unreachable instruction directly because we cannot modify the // CFG. - new StoreInst(Context->getUndef(LI.getType()), - Context->getNullValue(Op->getType()), &LI); - return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); + new StoreInst(UndefValue::get(LI.getType()), + Constant::getNullValue(Op->getType()), &LI); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } else if (CE->isCast()) { @@ -11622,9 +11497,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { if (GlobalVariable *GV = dyn_cast(Op->getUnderlyingObject())){ if (GV->isConstant() && GV->hasDefinitiveInitializer()) { if (GV->getInitializer()->isNullValue()) - return ReplaceInstUsesWith(LI, Context->getNullValue(LI.getType())); + return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType())); else if (isa(GV->getInitializer())) - return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); + return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); } } @@ -11643,10 +11518,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) && isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { - Value *V1 = InsertNewInstBefore(new LoadInst(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"), LI); - Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"), LI); + Value *V1 = Builder->CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + Value *V2 = Builder->CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); return SelectInst::Create(SI->getCondition(), V1, V2); } @@ -11674,7 +11549,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { User *CI = cast(SI.getOperand(1)); Value *CastOp = CI->getOperand(0); - LLVMContext* Context = IC.getContext(); const Type *DestPTy = cast(CI->getType())->getElementType(); const PointerType *SrcTy = dyn_cast(CastOp->getType()); @@ -11696,7 +11570,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // constants. if (isa(SrcPTy) || isa(SrcPTy)) { // Index through pointer. - Constant *Zero = Context->getNullValue(Type::Int32Ty); + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext())); NewGEPIndices.push_back(Zero); while (1) { @@ -11713,7 +11587,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { } } - SrcTy = Context->getPointerType(SrcPTy, SrcTy->getAddressSpace()); + SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); } if (!SrcPTy->isInteger() && !isa(SrcPTy)) @@ -11721,10 +11595,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // If the pointers point into different address spaces or if they point to // values with different sizes, we can't do the transformation. - if (SrcTy->getAddressSpace() != + if (!IC.getTargetData() || + SrcTy->getAddressSpace() != cast(CI->getType())->getAddressSpace() || - IC.getTargetData().getTypeSizeInBits(SrcPTy) != - IC.getTargetData().getTypeSizeInBits(DestPTy)) + IC.getTargetData()->getTypeSizeInBits(SrcPTy) != + IC.getTargetData()->getTypeSizeInBits(DestPTy)) return 0; // Okay, we are casting from one integer or pointer type to another of @@ -11745,22 +11620,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // SIOp0 is a pointer to aggregate and this is a store to the first field, // emit a GEP to index into its first field. - if (!NewGEPIndices.empty()) { - if (Constant *C = dyn_cast(CastOp)) - CastOp = Context->getConstantExprGetElementPtr(C, &NewGEPIndices[0], - NewGEPIndices.size()); - else - CastOp = IC.InsertNewInstBefore( - GetElementPtrInst::Create(CastOp, NewGEPIndices.begin(), - NewGEPIndices.end()), SI); - } + if (!NewGEPIndices.empty()) + CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), + NewGEPIndices.end()); - if (Constant *C = dyn_cast(SIOp0)) - NewCast = Context->getConstantExprCast(opcode, C, CastDstTy); - else - NewCast = IC.InsertNewInstBefore( - CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"), - SI); + NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, + SIOp0->getName()+".c"); return new StoreInst(NewCast, CastOp); } @@ -11777,12 +11642,16 @@ static bool equivalentAddressValues(Value *A, Value *B) { if (A == B) return true; // Test if the values come form identical arithmetic instructions. + // This uses isIdenticalToWhenDefined instead of isIdenticalTo because + // its only used to compare two uses within the same basic block, which + // means that they'll always either have the same value or one of them + // will have an undefined value. if (isa(A) || isa(A) || isa(A) || isa(A)) if (Instruction *BI = dyn_cast(B)) - if (cast(A)->isIdenticalTo(BI)) + if (cast(A)->isIdenticalToWhenDefined(BI)) return true; // Otherwise they may not be equivalent. @@ -11854,12 +11723,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { } // Attempt to improve the alignment. - unsigned KnownAlign = - GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); - if (KnownAlign > - (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : - SI.getAlignment())) - SI.setAlignment(KnownAlign); + if (TD) { + unsigned KnownAlign = + GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); + if (KnownAlign > + (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : + SI.getAlignment())) + SI.setAlignment(KnownAlign); + } // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This @@ -11914,12 +11785,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG - if (isa(Ptr) && - cast(Ptr->getType())->getAddressSpace() == 0) { + if (isa(Ptr) && SI.getPointerAddressSpace() == 0) { if (!isa(Val)) { - SI.setOperand(0, Context->getUndef(Val->getType())); + SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast(Val)) - AddToWorkList(U); // Dropped a use. + Worklist.Add(U); // Dropped a use. ++NumCombined; } return 0; // Do not modify these! @@ -12096,41 +11966,34 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Cannonicalize fcmp_one -> fcmp_oeq FCmpInst::Predicate FPred; Value *Y; if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest))) - if ((FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || - FPred == FCmpInst::FCMP_OGE) && BI.getCondition()->hasOneUse()) { - FCmpInst *I = cast(BI.getCondition()); - FCmpInst::Predicate NewPred = FCmpInst::getInversePredicate(FPred); - Instruction *NewSCC = new FCmpInst(NewPred, X, Y, "", I); - NewSCC->takeName(I); - // Swap Destinations and condition... - BI.setCondition(NewSCC); + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || + FPred == FCmpInst::FCMP_OGE) { + FCmpInst *Cond = cast(BI.getCondition()); + Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); + + // Swap Destinations and condition. BI.setSuccessor(0, FalseDest); BI.setSuccessor(1, TrueDest); - RemoveFromWorkList(I); - I->eraseFromParent(); - AddToWorkList(NewSCC); + Worklist.Add(Cond); return &BI; } // Cannonicalize icmp_ne -> icmp_eq ICmpInst::Predicate IPred; if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), - TrueDest, FalseDest))) - if ((IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || - IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || - IPred == ICmpInst::ICMP_SGE) && BI.getCondition()->hasOneUse()) { - ICmpInst *I = cast(BI.getCondition()); - ICmpInst::Predicate NewPred = ICmpInst::getInversePredicate(IPred); - Instruction *NewSCC = new ICmpInst(NewPred, X, Y, "", I); - NewSCC->takeName(I); - // Swap Destinations and condition... - BI.setCondition(NewSCC); + TrueDest, FalseDest)) && + BI.getCondition()->hasOneUse()) + if (IPred == ICmpInst::ICMP_NE || IPred == ICmpInst::ICMP_ULE || + IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || + IPred == ICmpInst::ICMP_SGE) { + ICmpInst *Cond = cast(BI.getCondition()); + Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); + // Swap Destinations and condition. BI.setSuccessor(0, FalseDest); BI.setSuccessor(1, TrueDest); - RemoveFromWorkList(I); - I->eraseFromParent();; - AddToWorkList(NewSCC); + Worklist.Add(Cond); return &BI; } @@ -12145,10 +12008,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) SI.setOperand(i, - Context->getConstantExprSub(cast(SI.getOperand(i)), + ConstantExpr::getSub(cast(SI.getOperand(i)), AddRHS)); SI.setOperand(0, I->getOperand(0)); - AddToWorkList(I); + Worklist.Add(I); return &SI; } } @@ -12163,10 +12026,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (Constant *C = dyn_cast(Agg)) { if (isa(C)) - return ReplaceInstUsesWith(EV, Context->getUndef(EV.getType())); + return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType())); if (isa(C)) - return ReplaceInstUsesWith(EV, Context->getNullValue(EV.getType())); + return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType())); if (isa(C) || isa(C)) { // Extract the element indexed by the first index out of the constant @@ -12214,10 +12077,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // %E = insertvalue { i32 } %X, i32 42, 0 // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). - Value *NewEV = InsertNewInstBefore( - ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()), - EV); + Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), + EV.idx_begin(), EV.idx_end()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), insi, inse); } @@ -12303,17 +12164,17 @@ static std::vector getShuffleMask(const ShuffleVectorInst *SVI) { /// value is already around as a register, for example if it were inserted then /// extracted from the vector. static Value *FindScalarElement(Value *V, unsigned EltNo, - LLVMContext* Context) { + LLVMContext *Context) { assert(isa(V->getType()) && "Not looking at a vector?"); const VectorType *PTy = cast(V->getType()); unsigned Width = PTy->getNumElements(); if (EltNo >= Width) // Out of range access. - return Context->getUndef(PTy->getElementType()); + return UndefValue::get(PTy->getElementType()); if (isa(V)) - return Context->getUndef(PTy->getElementType()); + return UndefValue::get(PTy->getElementType()); else if (isa(V)) - return Context->getNullValue(PTy->getElementType()); + return Constant::getNullValue(PTy->getElementType()); else if (ConstantVector *CP = dyn_cast(V)) return CP->getOperand(EltNo); else if (InsertElementInst *III = dyn_cast(V)) { @@ -12339,7 +12200,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo, else if (InEl < LHSWidth*2) return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context); else - return Context->getUndef(PTy->getElementType()); + return UndefValue::get(PTy->getElementType()); } // Otherwise, we don't know. @@ -12349,18 +12210,18 @@ static Value *FindScalarElement(Value *V, unsigned EltNo, Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If vector val is undef, replace extract with scalar undef. if (isa(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); // If vector val is constant 0, replace extract with scalar 0. if (isa(EI.getOperand(0))) - return ReplaceInstUsesWith(EI, Context->getNullValue(EI.getType())); + return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType())); if (ConstantVector *C = dyn_cast(EI.getOperand(0))) { // If vector val is constant with all elements the same, replace EI with // that element. When the elements are not identical, we cannot replace yet // (we do that below, but only when the index is constant). Constant *op0 = C->getOperand(0); - for (unsigned i = 1; i < C->getNumOperands(); ++i) + for (unsigned i = 1; i != C->getNumOperands(); ++i) if (C->getOperand(i) != op0) { op0 = 0; break; @@ -12373,13 +12234,12 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // find a previously computed scalar that was inserted into the vector. if (ConstantInt *IdxC = dyn_cast(EI.getOperand(1))) { unsigned IndexVal = IdxC->getZExtValue(); - unsigned VectorWidth = - cast(EI.getOperand(0)->getType())->getNumElements(); + unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); // If this is extracting an invalid index, turn this into undef, to avoid // crashing the code below. if (IndexVal >= VectorWidth) - return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); // This instruction only demands the single element from the input vector. // If the input vector has a single use, simplify it based on this use @@ -12411,42 +12271,27 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } if (Instruction *I = dyn_cast(EI.getOperand(0))) { - if (I->hasOneUse()) { - // Push extractelement into predecessor operation if legal and - // profitable to do so - if (BinaryOperator *BO = dyn_cast(I)) { - bool isConstantElt = isa(EI.getOperand(1)); - if (CheapToScalarize(BO, isConstantElt)) { - ExtractElementInst *newEI0 = - new ExtractElementInst(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); - ExtractElementInst *newEI1 = - new ExtractElementInst(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); - InsertNewInstBefore(newEI0, EI); - InsertNewInstBefore(newEI1, EI); - return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); - } - } else if (isa(I)) { - unsigned AS = - cast(I->getOperand(0)->getType())->getAddressSpace(); - Value *Ptr = InsertBitCastBefore(I->getOperand(0), - Context->getPointerType(EI.getType(), AS),EI); - GetElementPtrInst *GEP = - GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep"); - InsertNewInstBefore(GEP, EI); - return new LoadInst(GEP); - } - } - if (InsertElementInst *IE = dyn_cast(I)) { + // Push extractelement into predecessor operation if legal and + // profitable to do so + if (BinaryOperator *BO = dyn_cast(I)) { + if (I->hasOneUse() && + CheapToScalarize(BO, isa(EI.getOperand(1)))) { + Value *newEI0 = + Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); + Value *newEI1 = + Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); + return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); + } + } else if (InsertElementInst *IE = dyn_cast(I)) { // Extracting the inserted element? if (IE->getOperand(2) == EI.getOperand(1)) return ReplaceInstUsesWith(EI, IE->getOperand(1)); // If the inserted and extracted elements are constants, they must not // be the same value, extract from the pre-inserted value instead. - if (isa(IE->getOperand(2)) && - isa(EI.getOperand(1))) { - AddUsesToWorkList(EI); + if (isa(IE->getOperand(2)) && isa(EI.getOperand(1))) { + Worklist.AddValue(EI.getOperand(0)); EI.setOperand(0, IE->getOperand(0)); return &EI; } @@ -12465,11 +12310,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { SrcIdx -= LHSWidth; Src = SVI->getOperand(1); } else { - return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); + return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); } - return new ExtractElementInst(Src, SrcIdx); + return ExtractElementInst::Create(Src, + ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx, + false)); } } + // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) } return 0; } @@ -12479,21 +12327,21 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { /// Otherwise, return false. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, std::vector &Mask, - LLVMContext* Context) { + LLVMContext *Context) { assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); unsigned NumElts = cast(V->getType())->getNumElements(); if (isa(V)) { - Mask.assign(NumElts, Context->getUndef(Type::Int32Ty)); + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); return true; } else if (V == LHS) { for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(Context->getConstantInt(Type::Int32Ty, i)); + Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); return true; } else if (V == RHS) { for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(Context->getConstantInt(Type::Int32Ty, i+NumElts)); + Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts)); return true; } else if (InsertElementInst *IEI = dyn_cast(V)) { // If this is an insert of an extract from some other vector, include it. @@ -12510,7 +12358,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, // transitively ok. if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) { // If so, update the mask to reflect the inserted undef. - Mask[InsertedIdx] = Context->getUndef(Type::Int32Ty); + Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context)); return true; } } else if (ExtractElementInst *EI = dyn_cast(ScalarOp)){ @@ -12527,11 +12375,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, // If so, update the mask to reflect the inserted value. if (EI->getOperand(0) == LHS) { Mask[InsertedIdx % NumElts] = - Context->getConstantInt(Type::Int32Ty, ExtractedIdx); + ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx); } else { assert(EI->getOperand(0) == RHS); Mask[InsertedIdx % NumElts] = - Context->getConstantInt(Type::Int32Ty, ExtractedIdx+NumElts); + ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts); } return true; @@ -12549,17 +12397,17 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// RHS of the shuffle instruction, if it is not null. Return a shuffle mask /// that computes V and the LHS value of the shuffle. static Value *CollectShuffleElements(Value *V, std::vector &Mask, - Value *&RHS, LLVMContext* Context) { + Value *&RHS, LLVMContext *Context) { assert(isa(V->getType()) && (RHS == 0 || V->getType() == RHS->getType()) && "Invalid shuffle!"); unsigned NumElts = cast(V->getType())->getNumElements(); if (isa(V)) { - Mask.assign(NumElts, Context->getUndef(Type::Int32Ty)); + Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context))); return V; } else if (isa(V)) { - Mask.assign(NumElts, Context->getConstantInt(Type::Int32Ty, 0)); + Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0)); return V; } else if (InsertElementInst *IEI = dyn_cast(V)) { // If this is an insert of an extract from some other vector, include it. @@ -12580,7 +12428,7 @@ static Value *CollectShuffleElements(Value *V, std::vector &Mask, RHS = EI->getOperand(0); Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context); Mask[InsertedIdx % NumElts] = - Context->getConstantInt(Type::Int32Ty, NumElts+ExtractedIdx); + ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx); return V; } @@ -12590,7 +12438,7 @@ static Value *CollectShuffleElements(Value *V, std::vector &Mask, // Everything but the extracted element is replaced with the RHS. for (unsigned i = 0; i != NumElts; ++i) { if (i != InsertedIdx) - Mask[i] = Context->getConstantInt(Type::Int32Ty, NumElts+i); + Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i); } return V; } @@ -12608,7 +12456,7 @@ static Value *CollectShuffleElements(Value *V, std::vector &Mask, // Otherwise, can't do anything fancy. Return an identity vector. for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(Context->getConstantInt(Type::Int32Ty, i)); + Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i)); return V; } @@ -12635,45 +12483,23 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { return ReplaceInstUsesWith(IE, VecOp); if (InsertedIdx >= NumVectorElts) // Out of range insert. - return ReplaceInstUsesWith(IE, Context->getUndef(IE.getType())); + return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); // If we are extracting a value from a vector, then inserting it right // back into the same place, just use the input vector. if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) return ReplaceInstUsesWith(IE, VecOp); - // We could theoretically do this for ANY input. However, doing so could - // turn chains of insertelement instructions into a chain of shufflevector - // instructions, and right now we do not merge shufflevectors. As such, - // only do this in a situation where it is clear that there is benefit. - if (isa(VecOp) || isa(VecOp)) { - // Turn this into shuffle(EIOp0, VecOp, Mask). The result has all of - // the values of VecOp, except then one read from EIOp0. - // Build a new shuffle mask. - std::vector Mask; - if (isa(VecOp)) - Mask.assign(NumVectorElts, Context->getUndef(Type::Int32Ty)); - else { - assert(isa(VecOp) && "Unknown thing"); - Mask.assign(NumVectorElts, Context->getConstantInt(Type::Int32Ty, - NumVectorElts)); - } - Mask[InsertedIdx] = - Context->getConstantInt(Type::Int32Ty, ExtractedIdx); - return new ShuffleVectorInst(EI->getOperand(0), VecOp, - Context->getConstantVector(Mask)); - } - // If this insertelement isn't used by some other insertelement, turn it // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa(IE.use_back())) { std::vector Mask; Value *RHS = 0; Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context); - if (RHS == 0) RHS = Context->getUndef(LHS->getType()); + if (RHS == 0) RHS = UndefValue::get(LHS->getType()); // We now have a shuffle of LHS, RHS, Mask. return new ShuffleVectorInst(LHS, RHS, - Context->getConstantVector(Mask)); + ConstantVector::get(Mask)); } } } @@ -12697,7 +12523,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // Undefined shuffle mask -> undefined value. if (isa(SVI.getOperand(2))) - return ReplaceInstUsesWith(SVI, Context->getUndef(SVI.getType())); + return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); unsigned VWidth = cast(SVI.getType())->getNumElements(); @@ -12724,21 +12550,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { std::vector Elts; for (unsigned i = 0, e = Mask.size(); i != e; ++i) { if (Mask[i] >= 2*e) - Elts.push_back(Context->getUndef(Type::Int32Ty)); + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); else { if ((Mask[i] >= e && isa(RHS)) || (Mask[i] < e && isa(LHS))) { Mask[i] = 2*e; // Turn into undef. - Elts.push_back(Context->getUndef(Type::Int32Ty)); + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); } else { Mask[i] = Mask[i] % e; // Force to LHS. - Elts.push_back(Context->getConstantInt(Type::Int32Ty, Mask[i])); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i])); } } } SVI.setOperand(0, SVI.getOperand(1)); - SVI.setOperand(1, Context->getUndef(RHS->getType())); - SVI.setOperand(2, Context->getConstantVector(Elts)); + SVI.setOperand(1, UndefValue::get(RHS->getType())); + SVI.setOperand(2, ConstantVector::get(Elts)); LHS = SVI.getOperand(0); RHS = SVI.getOperand(1); MadeChange = true; @@ -12788,14 +12614,14 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { std::vector Elts; for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { if (NewMask[i] >= LHSInNElts*2) { - Elts.push_back(Context->getUndef(Type::Int32Ty)); + Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context))); } else { - Elts.push_back(Context->getConstantInt(Type::Int32Ty, NewMask[i])); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i])); } } return new ShuffleVectorInst(LHSSVI->getOperand(0), LHSSVI->getOperand(1), - Context->getConstantVector(Elts)); + ConstantVector::get(Elts)); } } } @@ -12855,6 +12681,9 @@ static void AddReachableCodeToWorklist(BasicBlock *BB, const TargetData *TD) { SmallVector Worklist; Worklist.push_back(BB); + + std::vector InstrsForInstCombineWorklist; + InstrsForInstCombineWorklist.reserve(128); while (!Worklist.empty()) { BB = Worklist.back(); @@ -12863,44 +12692,28 @@ static void AddReachableCodeToWorklist(BasicBlock *BB, // We have now visited this block! If we've already been here, ignore it. if (!Visited.insert(BB)) continue; - DbgInfoIntrinsic *DBI_Prev = NULL; for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *Inst = BBI++; // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst)) { ++NumDeadInst; - DOUT << "IC: DCE: " << *Inst; + DEBUG(errs() << "IC: DCE: " << *Inst << '\n'); Inst->eraseFromParent(); continue; } // ConstantProp instruction if trivially constant. - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { - DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst; + if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { + DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " + << *Inst << '\n'); Inst->replaceAllUsesWith(C); ++NumConstProp; Inst->eraseFromParent(); continue; } - - // If there are two consecutive llvm.dbg.stoppoint calls then - // it is likely that the optimizer deleted code in between these - // two intrinsics. - DbgInfoIntrinsic *DBI_Next = dyn_cast(Inst); - if (DBI_Next) { - if (DBI_Prev - && DBI_Prev->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint - && DBI_Next->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint) { - IC.RemoveFromWorkList(DBI_Prev); - DBI_Prev->eraseFromParent(); - } - DBI_Prev = DBI_Next; - } else { - DBI_Prev = 0; - } - IC.AddToWorkList(Inst); + InstrsForInstCombineWorklist.push_back(Inst); } // Recursively visit successors. If this is a branch or switch on a @@ -12932,14 +12745,22 @@ static void AddReachableCodeToWorklist(BasicBlock *BB, for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) Worklist.push_back(TI->getSuccessor(i)); } + + // Once we've found all of the instructions to add to instcombine's worklist, + // add them in reverse order. This way instcombine will visit from the top + // of the function down. This jives well with the way that it adds all uses + // of instructions to the worklist after doing a transformation, thus avoiding + // some N^2 behavior in pathological cases. + IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], + InstrsForInstCombineWorklist.size()); } bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { - bool Changed = false; - TD = &getAnalysis(); + MadeIRChange = false; + TD = getAnalysisIfAvailable(); - DEBUG(DOUT << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " - << F.getNameStr() << "\n"); + DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + << F.getNameStr() << "\n"); { // Do a depth-first traversal of the function, populate the worklist with @@ -12957,71 +12778,73 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { while (Term != BB->begin()) { // Remove instrs bottom-up BasicBlock::iterator I = Term; --I; - DOUT << "IC: DCE: " << *I; + DEBUG(errs() << "IC: DCE: " << *I << '\n'); // A debug intrinsic shouldn't force another iteration if we weren't // going to do one without it. if (!isa(I)) { ++NumDeadInst; - Changed = true; + MadeIRChange = true; } - if (!I->use_empty()) - I->replaceAllUsesWith(Context->getUndef(I->getType())); + + + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I->getType()->isVoidTy()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); I->eraseFromParent(); } } } - while (!Worklist.empty()) { - Instruction *I = RemoveOneFromWorkList(); + while (!Worklist.isEmpty()) { + Instruction *I = Worklist.RemoveOne(); if (I == 0) continue; // skip null values. // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I)) { - // Add operands to the worklist. - if (I->getNumOperands() < 4) - AddUsesToWorkList(*I); + DEBUG(errs() << "IC: DCE: " << *I << '\n'); + EraseInstFromFunction(*I); ++NumDeadInst; - - DOUT << "IC: DCE: " << *I; - - I->eraseFromParent(); - RemoveFromWorkList(I); - Changed = true; + MadeIRChange = true; continue; } // Instruction isn't dead, see if we can constant propagate it. - if (Constant *C = ConstantFoldInstruction(I, TD)) { - DOUT << "IC: ConstFold to: " << *C << " from: " << *I; + if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) { + DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. - AddUsesToWorkList(*I); ReplaceInstUsesWith(*I, C); - ++NumConstProp; - I->eraseFromParent(); - RemoveFromWorkList(I); - Changed = true; + EraseInstFromFunction(*I); + MadeIRChange = true; continue; } - if (TD && - (I->getType()->getTypeID() == Type::VoidTyID || - I->isTrapping())) { + if (TD) { // See if we can constant fold its operands. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) if (ConstantExpr *CE = dyn_cast(i)) - if (Constant *NewC = ConstantFoldConstantExpression(CE, TD)) + if (Constant *NewC = ConstantFoldConstantExpression(CE, + F.getContext(), TD)) if (NewC != CE) { - i->set(NewC); - Changed = true; + *i = NewC; + MadeIRChange = true; } } // See if we can trivially sink this instruction to a successor basic block. if (I->hasOneUse()) { BasicBlock *BB = I->getParent(); - BasicBlock *UserParent = cast(I->use_back())->getParent(); + Instruction *UserInst = cast(I->use_back()); + BasicBlock *UserParent; + + // Get the block the use occurs in. + if (PHINode *PN = dyn_cast(UserInst)) + UserParent = PN->getIncomingBlock(I->use_begin().getUse()); + else + UserParent = UserInst->getParent(); + if (UserParent != BB) { bool UserIsSuccessor = false; // See if the user is one of our successors. @@ -13034,31 +12857,34 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // If the user is one of our immediate successors, and if that successor // only has us as a predecessors (we'd have to split the critical edge // otherwise), we can keep going. - if (UserIsSuccessor && !isa(I->use_back()) && - next(pred_begin(UserParent)) == pred_end(UserParent)) + if (UserIsSuccessor && UserParent->getSinglePredecessor()) // Okay, the CFG is simple enough, try to sink this instruction. - Changed |= TryToSinkInstruction(I, UserParent); + MadeIRChange |= TryToSinkInstruction(I, UserParent); } } - // Now that we have an instruction, try combining it to simplify it... + // Now that we have an instruction, try combining it to simplify it. + Builder->SetInsertPoint(I->getParent(), I); + #ifndef NDEBUG std::string OrigI; #endif - DEBUG(std::ostringstream SS; I->print(SS); OrigI = SS.str();); + DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); + DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); + if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { - DOUT << "IC: Old = " << *I - << " New = " << *Result; + DEBUG(errs() << "IC: Old = " << *I << '\n' + << " New = " << *Result << '\n'); // Everything uses the new instruction now. I->replaceAllUsesWith(Result); // Push the new instruction and any users onto the worklist. - AddToWorkList(Result); - AddUsersToWorkList(*Result); + Worklist.Add(Result); + Worklist.AddUsersToWorkList(*Result); // Move the name to the new instruction first. Result->takeName(I); @@ -13073,52 +12899,42 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { InstParent->getInstList().insert(InsertPos, Result); - // Make sure that we reprocess all operands now that we reduced their - // use counts. - AddUsesToWorkList(*I); - - // Instructions can end up on the worklist more than once. Make sure - // we do not process an instruction that has been deleted. - RemoveFromWorkList(I); - - // Erase the old instruction. - InstParent->getInstList().erase(I); + EraseInstFromFunction(*I); } else { #ifndef NDEBUG - DOUT << "IC: Mod = " << OrigI - << " New = " << *I; + DEBUG(errs() << "IC: Mod = " << OrigI << '\n' + << " New = " << *I << '\n'); #endif // If the instruction was modified, it's possible that it is now dead. // if so, remove it. if (isInstructionTriviallyDead(I)) { - // Make sure we process all operands now that we are reducing their - // use counts. - AddUsesToWorkList(*I); - - // Instructions may end up in the worklist more than once. Erase all - // occurrences of this instruction. - RemoveFromWorkList(I); - I->eraseFromParent(); + EraseInstFromFunction(*I); } else { - AddToWorkList(I); - AddUsersToWorkList(*I); + Worklist.Add(I); + Worklist.AddUsersToWorkList(*I); } } - Changed = true; + MadeIRChange = true; } } - assert(WorklistMap.empty() && "Worklist empty, but map not?"); - - // Do an explicit clear, this shrinks the map if needed. - WorklistMap.clear(); - return Changed; + Worklist.Zap(); + return MadeIRChange; } bool InstCombiner::runOnFunction(Function &F) { MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + Context = &F.getContext(); + + + /// Builder - This is an IRBuilder that automatically inserts new + /// instructions into the worklist when they are created. + IRBuilder + TheBuilder(F.getContext(), ConstantFolder(F.getContext()), + InstCombineIRInserter(Worklist)); + Builder = &TheBuilder; bool EverMadeChange = false; @@ -13126,6 +12942,8 @@ bool InstCombiner::runOnFunction(Function &F) { unsigned Iteration = 0; while (DoOneIteration(F, Iteration++)) EverMadeChange = true; + + Builder = 0; return EverMadeChange; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dee7bfba21dd8..8b11edd891fd5 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" @@ -26,13 +27,13 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumThreads, "Number of jumps threaded"); STATISTIC(NumFolds, "Number of terminators folded"); +STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); static cl::opt Threshold("jump-threading-threshold", @@ -56,7 +57,7 @@ namespace { /// In this case, the unconditional branch at the end of the first if can be /// revectored to the false side of the second if. /// - class VISIBILITY_HIDDEN JumpThreading : public FunctionPass { + class JumpThreading : public FunctionPass { TargetData *TD; #ifdef NDEBUG SmallPtrSet LoopHeaders; @@ -68,15 +69,16 @@ namespace { JumpThreading() : FunctionPass(&ID) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); } bool runOnFunction(Function &F); void FindLoopHeaders(Function &F); bool ProcessBlock(BasicBlock *BB); - bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB, - unsigned JumpThreadCost); + bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); + bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, + BasicBlock *PredBB); + BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val); bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); @@ -99,8 +101,8 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } /// runOnFunction - Top level algorithm. /// bool JumpThreading::runOnFunction(Function &F) { - DOUT << "Jump threading on function '" << F.getNameStart() << "'\n"; - TD = &getAnalysis(); + DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n"); + TD = getAnalysisIfAvailable(); FindLoopHeaders(F); @@ -119,8 +121,8 @@ bool JumpThreading::runOnFunction(Function &F) { // edges which simplifies the CFG. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { - DOUT << " JT: Deleting dead block '" << BB->getNameStart() - << "' with terminator: " << *BB->getTerminator(); + DEBUG(errs() << " JT: Deleting dead block '" << BB->getName() + << "' with terminator: " << *BB->getTerminator() << '\n'); LoopHeaders.erase(BB); DeleteDeadBlock(BB); Changed = true; @@ -134,6 +136,48 @@ bool JumpThreading::runOnFunction(Function &F) { return EverChanged; } +/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to +/// thread across it. +static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { + /// Ignore PHI nodes, these will be flattened when duplication happens. + BasicBlock::const_iterator I = BB->getFirstNonPHI(); + + // Sum up the cost of each instruction until we get to the terminator. Don't + // include the terminator because the copy won't include it. + unsigned Size = 0; + for (; !isa(I); ++I) { + // Debugger intrinsics don't incur code size. + if (isa(I)) continue; + + // If this is a pointer->pointer bitcast, it is free. + if (isa(I) && isa(I->getType())) + continue; + + // All other instructions count for at least one unit. + ++Size; + + // Calls are more expensive. If they are non-intrinsic calls, we model them + // as having cost of 4. If they are a non-vector intrinsic, we model them + // as having cost of 2 total, and if they are a vector intrinsic, we model + // them as having cost 1. + if (const CallInst *CI = dyn_cast(I)) { + if (!isa(CI)) + Size += 3; + else if (!isa(CI->getType())) + Size += 1; + } + } + + // Threading through a switch statement is particularly profitable. If this + // block ends in a switch, decrease its cost to make it more likely to happen. + if (isa(I)) + Size = Size > 6 ? Size-6 : 0; + + return Size; +} + + + /// FindLoopHeaders - We do not want jump threading to turn proper loop /// structures into irreducible loops. Doing this breaks up the loop nesting /// hierarchy and pessimizes later transformations. To prevent this from @@ -173,52 +217,34 @@ BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) { if (CommonPreds.size() == 1) return CommonPreds[0]; - DOUT << " Factoring out " << CommonPreds.size() - << " common predecessors.\n"; + DEBUG(errs() << " Factoring out " << CommonPreds.size() + << " common predecessors.\n"); return SplitBlockPredecessors(PN->getParent(), &CommonPreds[0], CommonPreds.size(), ".thr_comm", this); } -/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to -/// thread across it. -static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { - /// Ignore PHI nodes, these will be flattened when duplication happens. - BasicBlock::const_iterator I = BB->getFirstNonPHI(); - - // Sum up the cost of each instruction until we get to the terminator. Don't - // include the terminator because the copy won't include it. - unsigned Size = 0; - for (; !isa(I); ++I) { - // Debugger intrinsics don't incur code size. - if (isa(I)) continue; - - // If this is a pointer->pointer bitcast, it is free. - if (isa(I) && isa(I->getType())) - continue; - - // All other instructions count for at least one unit. - ++Size; - - // Calls are more expensive. If they are non-intrinsic calls, we model them - // as having cost of 4. If they are a non-vector intrinsic, we model them - // as having cost of 2 total, and if they are a vector intrinsic, we model - // them as having cost 1. - if (const CallInst *CI = dyn_cast(I)) { - if (!isa(CI)) - Size += 3; - else if (!isa(CI->getType())) - Size += 1; - } +/// GetBestDestForBranchOnUndef - If we determine that the specified block ends +/// in an undefined jump, decide which block is best to revector to. +/// +/// Since we can pick an arbitrary destination, we pick the successor with the +/// fewest predecessors. This should reduce the in-degree of the others. +/// +static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { + TerminatorInst *BBTerm = BB->getTerminator(); + unsigned MinSucc = 0; + BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); + // Compute the successor with the minimum number of predecessors. + unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { + TestBB = BBTerm->getSuccessor(i); + unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); + if (NumPreds < MinNumPreds) + MinSucc = i; } - // Threading through a switch statement is particularly profitable. If this - // block ends in a switch, decrease its cost to make it more likely to happen. - if (isa(I)) - Size = Size > 6 ? Size-6 : 0; - - return Size; + return MinSucc; } /// ProcessBlock - If there are any predecessors whose control can be threaded @@ -262,39 +288,28 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // terminator to an unconditional branch. This can occur due to threading in // other blocks. if (isa(Condition)) { - DOUT << " In block '" << BB->getNameStart() - << "' folding terminator: " << *BB->getTerminator(); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding terminator: " << *BB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(BB); return true; } // If the terminator is branching on an undef, we can pick any of the - // successors to branch to. Since this is arbitrary, we pick the successor - // with the fewest predecessors. This should reduce the in-degree of the - // others. + // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa(Condition)) { - TerminatorInst *BBTerm = BB->getTerminator(); - unsigned MinSucc = 0; - BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); - // Compute the successor with the minimum number of predecessors. - unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { - TestBB = BBTerm->getSuccessor(i); - unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); - if (NumPreds < MinNumPreds) - MinSucc = i; - } + unsigned BestSucc = GetBestDestForJumpOnUndef(BB); // Fold the branch/switch. + TerminatorInst *BBTerm = BB->getTerminator(); for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { - if (i == MinSucc) continue; + if (i == BestSucc) continue; BBTerm->getSuccessor(i)->removePredecessor(BB); } - DOUT << " In block '" << BB->getNameStart() - << "' folding undef terminator: " << *BBTerm; - BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding undef terminator: " << *BBTerm << '\n'); + BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm); BBTerm->eraseFromParent(); return true; } @@ -419,8 +434,8 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, else if (PredBI->getSuccessor(0) != BB) BranchDir = false; else { - DOUT << " In block '" << PredBB->getNameStart() - << "' folding terminator: " << *PredBB->getTerminator(); + DEBUG(errs() << " In block '" << PredBB->getName() + << "' folding terminator: " << *PredBB->getTerminator() << '\n'); ++NumFolds; ConstantFoldTerminator(PredBB); return true; @@ -431,29 +446,24 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB, // If the dest block has one predecessor, just fix the branch condition to a // constant and fold it. if (BB->getSinglePredecessor()) { - DOUT << " In block '" << BB->getNameStart() - << "' folding condition to '" << BranchDir << "': " - << *BB->getTerminator(); + DEBUG(errs() << " In block '" << BB->getName() + << "' folding condition to '" << BranchDir << "': " + << *BB->getTerminator() << '\n'); ++NumFolds; - DestBI->setCondition(Context->getConstantInt(Type::Int1Ty, BranchDir)); + Value *OldCond = DestBI->getCondition(); + DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BranchDir)); ConstantFoldTerminator(BB); + RecursivelyDeleteTriviallyDeadInstructions(OldCond); return true; } - - // Otherwise we need to thread from PredBB to DestBB's successor which - // involves code duplication. Check to see if it is worth it. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } + // Next, figure out which successor we are threading to. BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that @@ -472,7 +482,6 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, if (PredBB == DestBB) return false; - SwitchInst *PredSI = cast(PredBB->getTerminator()); SwitchInst *DestSI = cast(DestBB->getTerminator()); @@ -508,8 +517,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, // Otherwise, we're safe to make the change. Make sure that the edge from // DestSI to DestSucc is not critical and has no PHI nodes. - DOUT << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI; - DOUT << "THROUGH: " << *DestSI; + DEBUG(errs() << "FORWARDING EDGE " << *DestVal << " FROM: " << *PredSI); + DEBUG(errs() << "THROUGH: " << *DestSI); // If the destination has PHI nodes, just split the edge for updating // simplicity. @@ -564,7 +573,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // If the returned value is the load itself, replace with an undef. This can // only happen in dead loops. - if (AvailableVal == LI) AvailableVal = Context->getUndef(LI->getType()); + if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType()); LI->replaceAllUsesWith(AvailableVal); LI->eraseFromParent(); return true; @@ -685,49 +694,74 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { } -/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in +/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in /// the current block. See if there are any simplifications we can do based on /// inputs to the phi node. /// bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { - // See if the phi node has any constant values. If so, we can determine where - // the corresponding predecessor will branch. - ConstantInt *PredCst = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if ((PredCst = dyn_cast(PN->getIncomingValue(i)))) - break; - - // If no incoming value has a constant, we don't know the destination of any - // predecessors. - if (PredCst == 0) - return false; - - // See if the cost of duplicating this block is low enough. BasicBlock *BB = PN->getParent(); - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; + + // See if the phi node has any constant integer or undef values. If so, we + // can determine where the corresponding predecessor will branch. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *PredVal = PN->getIncomingValue(i); + + // Check to see if this input is a constant integer. If so, the direction + // of the branch is predictable. + if (ConstantInt *CI = dyn_cast(PredVal)) { + // Merge any common predecessors that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI); + + BasicBlock *SuccBB; + if (BranchInst *BI = dyn_cast(BB->getTerminator())) + SuccBB = BI->getSuccessor(CI->isZero()); + else { + SwitchInst *SI = cast(BB->getTerminator()); + SuccBB = SI->getSuccessor(SI->findCaseValue(CI)); + } + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB); + } + + // If the input is an undef, then it doesn't matter which way it will go. + // Pick an arbitrary dest and thread the edge. + if (UndefValue *UV = dyn_cast(PredVal)) { + // Merge any common predecessors that will act the same. + BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV); + BasicBlock *SuccBB = + BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB)); + + // Ok, try to thread it! + return ThreadEdge(BB, PredBB, SuccBB); + } } - // If so, we can actually do this threading. Merge any common predecessors - // that will act the same. - BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); + // If the incoming values are all variables, we don't know the destination of + // any predecessors. However, if any of the predecessor blocks end in an + // unconditional branch, we can *duplicate* the jump into that block in order + // to further encourage jump threading and to eliminate cases where we have + // branch on a phi of an icmp (branch on icmp is much better). + + // We don't want to do this tranformation for switches, because we don't + // really want to duplicate a switch. + if (isa(BB->getTerminator())) + return false; - // Next, figure out which successor we are threading to. - BasicBlock *SuccBB; - if (BranchInst *BI = dyn_cast(BB->getTerminator())) - SuccBB = BI->getSuccessor(PredCst == Context->getConstantIntFalse()); - else { - SwitchInst *SI = cast(BB->getTerminator()); - SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst)); + // Look for unconditional branch predecessors. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = PN->getIncomingBlock(i); + if (BranchInst *PredBr = dyn_cast(PredBB->getTerminator())) + if (PredBr->isUnconditional() && + // Try to duplicate BB into PredBB. + DuplicateCondBranchOnPHIIntoPred(BB, PredBB)) + return true; } - - // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + + return false; } + /// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch /// whose condition is an AND/OR where one side is PN. If PN has constant /// operands that permit us to evaluate the condition for some operand, thread @@ -756,7 +790,8 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, // We can only do the simplification for phi nodes of 'false' with AND or // 'true' with OR. See if we have any entries in the phi for this. unsigned PredNo = ~0U; - ConstantInt *PredCst = Context->getConstantInt(Type::Int1Ty, !isAnd); + ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()), + !isAnd); for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) == PredCst) { PredNo = i; @@ -768,14 +803,6 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, if (PredNo == ~0U) return false; - // See if the cost of duplicating this block is low enough. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } - // If so, we can actually do this threading. Merge any common predecessors // that will act the same. BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); @@ -787,7 +814,7 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } /// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right @@ -795,15 +822,15 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB, /// result can not be determined, a null pointer is returned. static Constant *GetResultOfComparison(CmpInst::Predicate pred, Value *LHS, Value *RHS, - LLVMContext* Context) { + LLVMContext &Context) { if (Constant *CLHS = dyn_cast(LHS)) if (Constant *CRHS = dyn_cast(RHS)) - return Context->getConstantExprCompare(pred, CLHS, CRHS); + return ConstantExpr::getCompare(pred, CLHS, CRHS); if (LHS == RHS) if (isa(LHS->getType()) || isa(LHS->getType())) return ICmpInst::isTrueWhenEqual(pred) ? - Context->getConstantIntTrue() : Context->getConstantIntFalse(); + ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context); return 0; } @@ -829,7 +856,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { PredVal = PN->getIncomingValue(i); Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, - RHS, Context); + RHS, Cmp->getContext()); if (!Res) { PredVal = 0; continue; @@ -854,14 +881,6 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { if (PredVal == 0) return false; - // See if the cost of duplicating this block is low enough. - unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); - if (JumpThreadCost > Threshold) { - DOUT << " Not threading BB '" << BB->getNameStart() - << "' - Cost is too high: " << JumpThreadCost << "\n"; - return false; - } - // If so, we can actually do this threading. Merge any common predecessors // that will act the same. BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal); @@ -870,58 +889,77 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) { BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection); // Ok, try to thread it! - return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + return ThreadEdge(BB, PredBB, SuccBB); } +/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new +/// predecessor to the PHIBB block. If it has PHI nodes, add entries for +/// NewPred using the entries from OldPred (suitably mapped). +static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, + BasicBlock *OldPred, + BasicBlock *NewPred, + DenseMap &ValueMap) { + for (BasicBlock::iterator PNI = PHIBB->begin(); + PHINode *PN = dyn_cast(PNI); ++PNI) { + // Ok, we have a PHI node. Figure out what the incoming value was for the + // DestBlock. + Value *IV = PN->getIncomingValueForBlock(OldPred); + + // Remap the value if necessary. + if (Instruction *Inst = dyn_cast(IV)) { + DenseMap::iterator I = ValueMap.find(Inst); + if (I != ValueMap.end()) + IV = I->second; + } + + PN->addIncoming(IV, NewPred); + } +} + /// ThreadEdge - We have decided that it is safe and profitable to thread an /// edge from PredBB to SuccBB across BB. Transform the IR to reflect this /// change. bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, - BasicBlock *SuccBB, unsigned JumpThreadCost) { - + BasicBlock *SuccBB) { // If threading to the same block as we come from, we would infinite loop. if (SuccBB == BB) { - DOUT << " Not threading across BB '" << BB->getNameStart() - << "' - would thread to self!\n"; + DEBUG(errs() << " Not threading across BB '" << BB->getName() + << "' - would thread to self!\n"); return false; } // If threading this would thread across a loop header, don't thread the edge. // See the comments above FindLoopHeaders for justifications and caveats. if (LoopHeaders.count(BB)) { - DOUT << " Not threading from '" << PredBB->getNameStart() - << "' across loop header BB '" << BB->getNameStart() - << "' to dest BB '" << SuccBB->getNameStart() - << "' - it might create an irreducible loop!\n"; + DEBUG(errs() << " Not threading from '" << PredBB->getName() + << "' across loop header BB '" << BB->getName() + << "' to dest BB '" << SuccBB->getName() + << "' - it might create an irreducible loop!\n"); return false; } - // And finally, do it! - DOUT << " Threading edge from '" << PredBB->getNameStart() << "' to '" - << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost - << ", across block:\n " - << *BB << "\n"; - - // Jump Threading can not update SSA properties correctly if the values - // defined in the duplicated block are used outside of the block itself. For - // this reason, we spill all values that are used outside of BB to the stack. - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - if (!I->isUsedOutsideOfBlock(BB)) - continue; - - // We found a use of I outside of BB. Create a new stack slot to - // break this inter-block usage pattern. - DemoteRegToStack(*I); + unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); + if (JumpThreadCost > Threshold) { + DEBUG(errs() << " Not threading BB '" << BB->getName() + << "' - Cost is too high: " << JumpThreadCost << "\n"); + return false; } - + + // And finally, do it! + DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '" + << SuccBB->getName() << "' with cost: " << JumpThreadCost + << ", across block:\n " + << *BB << "\n"); + // We are going to have to map operands from the original BB block to the new // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to // account for entry from PredBB. DenseMap ValueMapping; - BasicBlock *NewBB = - BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), + BB->getName()+".thread", + BB->getParent(), BB); NewBB->moveAfter(PredBB); BasicBlock::iterator BI = BB->begin(); @@ -932,7 +970,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, // mapping and using it to remap operands in the cloned instructions. for (; !isa(BI); ++BI) { Instruction *New = BI->clone(); - New->setName(BI->getNameStart()); + New->setName(BI->getName()); NewBB->getInstList().push_back(New); ValueMapping[BI] = New; @@ -951,21 +989,48 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the // PHI nodes for NewBB now. - for (BasicBlock::iterator PNI = SuccBB->begin(); isa(PNI); ++PNI) { - PHINode *PN = cast(PNI); - // Ok, we have a PHI node. Figure out what the incoming value was for the - // DestBlock. - Value *IV = PN->getIncomingValueForBlock(BB); - - // Remap the value if necessary. - if (Instruction *Inst = dyn_cast(IV)) { - DenseMap::iterator I = ValueMapping.find(Inst); - if (I != ValueMapping.end()) - IV = I->second; + AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); + + // If there were values defined in BB that are used outside the block, then we + // now have to update all uses of the value to use either the original value, + // the cloned value, or some PHI derived value. This can require arbitrary + // PHI insertion, of which we are prepared to do, clean these up now. + SSAUpdater SSAUpdate; + SmallVector UsesToRename; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + // Scan all uses of this instruction to see if it is used outside of its + // block, and if so, record them in UsesToRename. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast(*UI); + if (PHINode *UserPN = dyn_cast(User)) { + if (UserPN->getIncomingBlock(UI) == BB) + continue; + } else if (User->getParent() == BB) + continue; + + UsesToRename.push_back(&UI.getUse()); } - PN->addIncoming(IV, NewBB); + + // If there are no uses outside the block, we're done with this instruction. + if (UsesToRename.empty()) + continue; + + DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + + // We found a use of I outside of BB. Rename all uses of I that are outside + // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks + // with the two values we know. + SSAUpdate.Initialize(I); + SSAUpdate.AddAvailableValue(BB, I); + SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); + DEBUG(errs() << "\n"); } + // Ok, NewBB is good to go. Update the terminator of PredBB to jump to // NewBB instead of BB. This eliminates predecessors from BB, which requires // us to simplify any PHI nodes in BB. @@ -982,7 +1047,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BI = NewBB->begin(); for (BasicBlock::iterator E = NewBB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { Inst->replaceAllUsesWith(C); Inst->eraseFromParent(); continue; @@ -995,3 +1060,120 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, ++NumThreads; return true; } + +/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch +/// to BB which contains an i1 PHI node and a conditional branch on that PHI. +/// If we can duplicate the contents of BB up into PredBB do so now, this +/// improves the odds that the branch will be on an analyzable instruction like +/// a compare. +bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, + BasicBlock *PredBB) { + // If BB is a loop header, then duplicating this block outside the loop would + // cause us to transform this into an irreducible loop, don't do this. + // See the comments above FindLoopHeaders for justifications and caveats. + if (LoopHeaders.count(BB)) { + DEBUG(errs() << " Not duplicating loop header '" << BB->getName() + << "' into predecessor block '" << PredBB->getName() + << "' - it might create an irreducible loop!\n"); + return false; + } + + unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); + if (DuplicationCost > Threshold) { + DEBUG(errs() << " Not duplicating BB '" << BB->getName() + << "' - Cost is too high: " << DuplicationCost << "\n"); + return false; + } + + // Okay, we decided to do this! Clone all the instructions in BB onto the end + // of PredBB. + DEBUG(errs() << " Duplicating block '" << BB->getName() << "' into end of '" + << PredBB->getName() << "' to eliminate branch on phi. Cost: " + << DuplicationCost << " block is:" << *BB << "\n"); + + // We are going to have to map operands from the original BB block into the + // PredBB block. Evaluate PHI nodes in BB. + DenseMap ValueMapping; + + BasicBlock::iterator BI = BB->begin(); + for (; PHINode *PN = dyn_cast(BI); ++BI) + ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); + + BranchInst *OldPredBranch = cast(PredBB->getTerminator()); + + // Clone the non-phi instructions of BB into PredBB, keeping track of the + // mapping and using it to remap operands in the cloned instructions. + for (; BI != BB->end(); ++BI) { + Instruction *New = BI->clone(); + New->setName(BI->getName()); + PredBB->getInstList().insert(OldPredBranch, New); + ValueMapping[BI] = New; + + // Remap operands to patch up intra-block references. + for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) + if (Instruction *Inst = dyn_cast(New->getOperand(i))) { + DenseMap::iterator I = ValueMapping.find(Inst); + if (I != ValueMapping.end()) + New->setOperand(i, I->second); + } + } + + // Check to see if the targets of the branch had PHI nodes. If so, we need to + // add entries to the PHI nodes for branch from PredBB now. + BranchInst *BBBranch = cast(BB->getTerminator()); + AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB, + ValueMapping); + AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, + ValueMapping); + + // If there were values defined in BB that are used outside the block, then we + // now have to update all uses of the value to use either the original value, + // the cloned value, or some PHI derived value. This can require arbitrary + // PHI insertion, of which we are prepared to do, clean these up now. + SSAUpdater SSAUpdate; + SmallVector UsesToRename; + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + // Scan all uses of this instruction to see if it is used outside of its + // block, and if so, record them in UsesToRename. + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; + ++UI) { + Instruction *User = cast(*UI); + if (PHINode *UserPN = dyn_cast(User)) { + if (UserPN->getIncomingBlock(UI) == BB) + continue; + } else if (User->getParent() == BB) + continue; + + UsesToRename.push_back(&UI.getUse()); + } + + // If there are no uses outside the block, we're done with this instruction. + if (UsesToRename.empty()) + continue; + + DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + + // We found a use of I outside of BB. Rename all uses of I that are outside + // its block to be uses of the appropriate PHI node etc. See ValuesInBlocks + // with the two values we know. + SSAUpdate.Initialize(I); + SSAUpdate.AddAvailableValue(BB, I); + SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); + DEBUG(errs() << "\n"); + } + + // PredBB no longer jumps to BB, remove entries in the PHI node for the edge + // that we nuked. + BB->removePredecessor(PredBB); + + // Remove the unconditional branch at the end of the PredBB block. + OldPredBranch->eraseFromParent(); + + ++NumDupes; + return true; +} + + diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index d6daeca1128c9..756fbf3e7bd52 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -35,8 +35,8 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" #include "llvm/Target/TargetData.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -46,8 +46,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/Statistic.h" #include @@ -73,7 +73,7 @@ EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden, "global variables")); namespace { - struct VISIBILITY_HIDDEN LICM : public LoopPass { + struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid LICM() : LoopPass(&ID) {} @@ -91,6 +91,7 @@ namespace { AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreservedID(LoopSimplifyID); } bool doFinalization() { @@ -338,7 +339,6 @@ void LICM::SinkRegion(DomTreeNode *N) { } } - /// HoistRegion - Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions @@ -389,9 +389,13 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // Don't hoist loads which have may-aliased stores in loop. unsigned Size = 0; if (LI->getType()->isSized()) - Size = AA->getTargetData().getTypeStoreSize(LI->getType()); + Size = AA->getTypeStoreSize(LI->getType()); return !pointerInvalidatedByLoop(LI->getOperand(0), Size); } else if (CallInst *CI = dyn_cast(&I)) { + if (isa(CI)) { + // Don't hoist/sink dbgstoppoints, we handle them separately + return false; + } // Handle obvious cases efficiently. AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == AliasAnalysis::DoesNotAccessMemory) @@ -465,7 +469,7 @@ bool LICM::isLoopInvariantInst(Instruction &I) { /// position, and may either delete it or move it to outside of the loop. /// void LICM::sink(Instruction &I) { - DOUT << "LICM sinking instruction: " << I; + DEBUG(errs() << "LICM sinking instruction: " << I); SmallVector ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); @@ -482,22 +486,27 @@ void LICM::sink(Instruction &I) { if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); - if (!I.use_empty()) // If I has users in unreachable blocks, eliminate. - I.replaceAllUsesWith(Context->getUndef(I.getType())); + // If I has users in unreachable blocks, eliminate. + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I.getType()->isVoidTy()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Move the instruction to the start of the exit block, after any PHI // nodes in it. I.removeFromParent(); - BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI(); ExitBlocks[0]->getInstList().insert(InsertPt, &I); } } else if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); - if (!I.use_empty()) // If I has users in unreachable blocks, eliminate. - I.replaceAllUsesWith(Context->getUndef(I.getType())); + // If I has users in unreachable blocks, eliminate. + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I.getType()->isVoidTy()) + I.replaceAllUsesWith(UndefValue::get(I.getType())); I.eraseFromParent(); } else { // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to @@ -507,7 +516,7 @@ void LICM::sink(Instruction &I) { // Firstly, we create a stack object to hold the value... AllocaInst *AI = 0; - if (I.getType() != Type::VoidTy) { + if (!I.getType()->isVoidTy()) { AI = new AllocaInst(I.getType(), 0, I.getName(), I.getParent()->getParent()->getEntryBlock().begin()); CurAST->add(AI); @@ -593,7 +602,7 @@ void LICM::sink(Instruction &I) { if (AI) { std::vector Allocas; Allocas.push_back(AI); - PromoteMemToReg(Allocas, *DT, *DF, CurAST); + PromoteMemToReg(Allocas, *DT, *DF, AI->getContext(), CurAST); } } } @@ -602,7 +611,8 @@ void LICM::sink(Instruction &I) { /// that is safe to hoist, this instruction is called to do the dirty work. /// void LICM::hoist(Instruction &I) { - DOUT << "LICM hoisting to " << Preheader->getName() << ": " << I; + DEBUG(errs() << "LICM hoisting to " << Preheader->getName() << ": " + << I << "\n"); // Remove the instruction from its current basic block... but don't delete the // instruction. @@ -623,7 +633,8 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (!Inst.isTrapping()) return true; + if (Inst.isSafeToSpeculativelyExecute()) + return true; // Otherwise we have to check to make sure that the instruction dominates all // of the exit blocks. If it doesn't, then there is a path out of the loop @@ -635,12 +646,6 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { if (Inst.getParent() == CurLoop->getHeader()) return true; - // It's always safe to load from a global or alloca. - if (isa(Inst)) - if (isa(Inst.getOperand(0)) || - isa(Inst.getOperand(0))) - return true; - // Get the exit blocks for the current loop. SmallVector ExitBlocks; CurLoop->getExitBlocks(ExitBlocks); @@ -773,7 +778,7 @@ void LICM::PromoteValuesInLoop() { PromotedAllocas.reserve(PromotedValues.size()); for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) PromotedAllocas.push_back(PromotedValues[i].first); - PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); + PromoteMemToReg(PromotedAllocas, *DT, *DF, Preheader->getContext(), CurAST); } /// FindPromotableValuesInLoop - Check the current loop for stores to definite @@ -862,7 +867,7 @@ void LICM::FindPromotableValuesInLoop( for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I) ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); - DOUT << "LICM: Promoting value: " << *V << "\n"; + DEBUG(errs() << "LICM: Promoting value: " << *V << "\n"); } } diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 302cdec2ba4a7..5f93756a05c07 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -15,19 +15,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-delete" - #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallVector.h" - using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); namespace { - class VISIBILITY_HIDDEN LoopDeletion : public LoopPass { + class LoopDeletion : public LoopPass { public: static char ID; // Pass ID, replacement for typeid LoopDeletion() : LoopPass(&ID) {} @@ -38,9 +36,9 @@ namespace { bool SingleDominatingExit(Loop* L, SmallVector& exitingBlocks); bool IsLoopDead(Loop* L, SmallVector& exitingBlocks, - SmallVector& exitBlocks); - bool IsLoopInvariantInst(Instruction *I, Loop* L); - + SmallVector& exitBlocks, + bool &Changed, BasicBlock *Preheader); + virtual void getAnalysisUsage(AnalysisUsage& AU) const { AU.addRequired(); AU.addRequired(); @@ -84,32 +82,13 @@ bool LoopDeletion::SingleDominatingExit(Loop* L, return DT.dominates(exitingBlocks[0], latch); } -/// IsLoopInvariantInst - Checks if an instruction is invariant with respect to -/// a loop, which is defined as being true if all of its operands are defined -/// outside of the loop. These instructions can be hoisted out of the loop -/// if their results are needed. This could be made more aggressive by -/// recursively checking the operands for invariance, but it's not clear that -/// it's worth it. -bool LoopDeletion::IsLoopInvariantInst(Instruction *I, Loop* L) { - // PHI nodes are not loop invariant if defined in the loop. - if (isa(I) && L->contains(I->getParent())) - return false; - - // The instruction is loop invariant if all of its operands are loop-invariant - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (!L->isLoopInvariant(I->getOperand(i))) - return false; - - // If we got this far, the instruction is loop invariant! - return true; -} - /// IsLoopDead - Determined if a loop is dead. This assumes that we've already /// checked for unique exit and exiting blocks, and that the code is in LCSSA /// form. bool LoopDeletion::IsLoopDead(Loop* L, SmallVector& exitingBlocks, - SmallVector& exitBlocks) { + SmallVector& exitBlocks, + bool &Changed, BasicBlock *Preheader) { BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock* exitBlock = exitBlocks[0]; @@ -122,7 +101,7 @@ bool LoopDeletion::IsLoopDead(Loop* L, while (PHINode* P = dyn_cast(BI)) { Value* incoming = P->getIncomingValueForBlock(exitingBlock); if (Instruction* I = dyn_cast(incoming)) - if (!IsLoopInvariantInst(I, L)) + if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; BI++; @@ -181,15 +160,16 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { return false; // Finally, we have to check that the loop really is dead. - if (!IsLoopDead(L, exitingBlocks, exitBlocks)) - return false; + bool Changed = false; + if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) + return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. ScalarEvolution& SE = getAnalysis(); - const SCEV* S = SE.getBackedgeTakenCount(L); + const SCEV *S = SE.getBackedgeTakenCount(L); if (isa(S)) - return false; + return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. @@ -199,18 +179,12 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't // mess with this unless you have good reason and know what you're doing. - - // Move simple loop-invariant expressions out of the loop, since they - // might be needed by the exit phis. - for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); - LI != LE; ++LI) - for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); - BI != BE; ) { - Instruction* I = BI++; - if (!I->use_empty() && IsLoopInvariantInst(I, L)) - I->moveBefore(preheader->getTerminator()); - } - + + // Tell ScalarEvolution that the loop is deleted. Do this before + // deleting the loop so that ScalarEvolution can look at the loop + // to determine what it needs to clean up. + SE.forgetLoopBackedgeTakenCount(L); + // Connect the preheader directly to the exit block. TerminatorInst* TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); @@ -248,11 +222,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { (*LI)->dropAllReferences(); } - // Tell ScalarEvolution that the loop is deleted. Do this before - // deleting the loop so that ScalarEvolution can look at the loop - // to determine what it needs to clean up. - SE.forgetLoopBackedgeTakenCount(L); - // Erase the instructions and the blocks without having to worry // about ordering because we already dropped the references. // NOTE: This iteration is safe because erasing the block does not remove its @@ -273,8 +242,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // The last step is to inform the loop pass manager that we've // eliminated this loop. LPM.deleteLoopFromQueue(L); + Changed = true; NumDeleted++; - return true; + return Changed; } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 38e3a8b7af709..5f9d3703da99d 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -51,7 +51,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "loop-index-split" - #include "llvm/Transforms/Scalar.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" @@ -61,7 +60,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" @@ -73,8 +71,7 @@ STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted"); namespace { - class VISIBILITY_HIDDEN LoopIndexSplit : public LoopPass { - + class LoopIndexSplit : public LoopPass { public: static char ID; // Pass ID, replacement for typeid LoopIndexSplit() : LoopPass(&ID) {} @@ -294,31 +291,33 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) { // Return V+1 static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt, - LLVMContext* Context) { - Constant *One = Context->getConstantInt(V->getType(), 1, Sign); + LLVMContext &Context) { + Constant *One = ConstantInt::get(V->getType(), 1, Sign); return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt); } // Return V-1 static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt, - LLVMContext* Context) { - Constant *One = Context->getConstantInt(V->getType(), 1, Sign); + LLVMContext &Context) { + Constant *One = ConstantInt::get(V->getType(), 1, Sign); return BinaryOperator::CreateSub(V, One, "lsp", InsertPt); } // Return min(V1, V1) static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { - Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - V1, V2, "lsp", InsertPt); + Value *C = new ICmpInst(InsertPt, + Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + V1, V2, "lsp"); return SelectInst::Create(C, V1, V2, "lsp", InsertPt); } // Return max(V1, V2) static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { - Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - V1, V2, "lsp", InsertPt); + Value *C = new ICmpInst(InsertPt, + Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + V1, V2, "lsp"); return SelectInst::Create(C, V2, V1, "lsp", InsertPt); } @@ -427,15 +426,15 @@ bool LoopIndexSplit::processOneIterationLoop() { // c1 = icmp uge i32 SplitValue, StartValue // c2 = icmp ult i32 SplitValue, ExitValue // and i32 c1, c2 - Instruction *C1 = new ICmpInst(ExitCondition->isSignedPredicate() ? + Instruction *C1 = new ICmpInst(BR, ExitCondition->isSignedPredicate() ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, - SplitValue, StartValue, "lisplit", BR); + SplitValue, StartValue, "lisplit"); CmpInst::Predicate C2P = ExitCondition->getPredicate(); BranchInst *LatchBR = cast(Latch->getTerminator()); - if (LatchBR->getOperand(0) != Header) + if (LatchBR->getOperand(1) != Header) C2P = CmpInst::getInversePredicate(C2P); - Instruction *C2 = new ICmpInst(C2P, SplitValue, ExitValue, "lisplit", BR); + Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit"); Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR); SplitCondition->replaceAllUsesWith(NSplitCond); @@ -491,6 +490,8 @@ bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) { EBR->setSuccessor(1, T); } + LLVMContext &Context = Op.getContext(); + // New upper and lower bounds. Value *NLB = NULL; Value *NUB = NULL; @@ -698,7 +699,8 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP, E = df_end(DN); DI != E; ++DI) { BasicBlock *BB = DI->getBlock(); WorkList.push_back(BB); - BB->replaceAllUsesWith(UndefValue::get(Type::LabelTy)); + BB->replaceAllUsesWith(UndefValue::get( + Type::getLabelTy(DeadBB->getContext()))); } while (!WorkList.empty()) { @@ -877,6 +879,8 @@ bool LoopIndexSplit::splitLoop() { BasicBlock *ExitingBlock = ExitCondition->getParent(); if (!cleanBlock(ExitingBlock)) return false; + LLVMContext &Context = Header->getContext(); + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BranchInst *BR = dyn_cast((*I)->getTerminator()); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 1f7892ad10159..70c69bb1dae00 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -32,7 +32,7 @@ using namespace llvm; STATISTIC(NumRotated, "Number of loops rotated"); namespace { - class VISIBILITY_HIDDEN RenameData { + class RenameData { public: RenameData(Instruction *O, Value *P, Instruction *H) : Original(O), PreHeader(P), Header(H) { } @@ -42,8 +42,7 @@ namespace { Instruction *Header; // New header replacement }; - class VISIBILITY_HIDDEN LoopRotate : public LoopPass { - + class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid LoopRotate() : LoopPass(&ID) {} @@ -178,6 +177,11 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { // Now, this loop is suitable for rotation. + // Anything ScalarEvolution may know about this loop or the PHI nodes + // in its header will soon be invalidated. + if (ScalarEvolution *SE = getAnalysisIfAvailable()) + SE->forgetLoopBackedgeTakenCount(L); + // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the // loop. Otherwise loop is not suitable for rotation. @@ -435,7 +439,8 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { // Right now original pre-header has two successors, new header and // exit block. Insert new block between original pre-header and // new header such that loop's new pre-header has only one successor. - BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph", + BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(), + "bb.nph", OrigHeader->getParent(), NewHeader); LoopInfo &LI = LPM.getAnalysis(); @@ -511,26 +516,30 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { DF->addBasicBlock(L->getHeader(), LatchSet); } - // If a loop block dominates new loop latch then its frontier is - // new header and Exit. + // If a loop block dominates new loop latch then add to its frontiers + // new header and Exit and remove new latch (which is equal to original + // header). BasicBlock *NewLatch = L->getLoopLatch(); - DominatorTree *DT = getAnalysisIfAvailable(); - for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - BasicBlock *B = *BI; - if (DT->dominates(B, NewLatch)) { - DominanceFrontier::iterator BDFI = DF->find(B); - if (BDFI != DF->end()) { - DominanceFrontier::DomSetType &BSet = BDFI->second; - BSet = BDFI->second; - BSet.clear(); - BSet.insert(L->getHeader()); - BSet.insert(Exit); - } else { - DominanceFrontier::DomSetType BSet; - BSet.insert(L->getHeader()); - BSet.insert(Exit); - DF->addBasicBlock(B, BSet); + + assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader"); + + if (DominatorTree *DT = getAnalysisIfAvailable()) { + for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); + BI != BE; ++BI) { + BasicBlock *B = *BI; + if (DT->dominates(B, NewLatch)) { + DominanceFrontier::iterator BDFI = DF->find(B); + if (BDFI != DF->end()) { + DominanceFrontier::DomSetType &BSet = BDFI->second; + BSet.erase(NewLatch); + BSet.insert(L->getHeader()); + BSet.insert(Exit); + } else { + DominanceFrontier::DomSetType BSet; + BSet.insert(L->getHeader()); + BSet.insert(Exit); + DF->addBasicBlock(B, BSet); + } } } } @@ -538,22 +547,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { // Preserve canonical loop form, which means Exit block should // have only one predecessor. - BasicBlock *NExit = SplitEdge(L->getLoopLatch(), Exit, this); - - // Preserve LCSSA. - for (BasicBlock::iterator I = Exit->begin(); - (PN = dyn_cast(I)); ++I) { - unsigned N = PN->getNumIncomingValues(); - for (unsigned index = 0; index != N; ++index) - if (PN->getIncomingBlock(index) == NExit) { - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(), - NExit->begin()); - NewPN->addIncoming(PN->getIncomingValue(index), L->getLoopLatch()); - PN->setIncomingValue(index, NewPN); - PN->setIncomingBlock(index, NExit); - break; - } - } + SplitEdge(L->getLoopLatch(), Exit, this); assert(NewHeader && L->getHeader() == NewHeader && "Invalid loop header after loop rotation"); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 046fed3d71575..d8f6cc18a1e94 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -24,7 +24,6 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/DerivedTypes.h" #include "llvm/Analysis/Dominators.h" @@ -38,9 +37,9 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include using namespace llvm; @@ -64,26 +63,26 @@ namespace { /// IVInfo - This structure keeps track of one IV expression inserted during /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as /// well as the PHI node and increment value created for rewrite. - struct VISIBILITY_HIDDEN IVExpr { - const SCEV* Stride; - const SCEV* Base; + struct IVExpr { + const SCEV *Stride; + const SCEV *Base; PHINode *PHI; - IVExpr(const SCEV* const stride, const SCEV* const base, PHINode *phi) + IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi) : Stride(stride), Base(base), PHI(phi) {} }; /// IVsOfOneStride - This structure keeps track of all IV expression inserted /// during StrengthReduceStridedIVUsers for a particular stride of the IV. - struct VISIBILITY_HIDDEN IVsOfOneStride { + struct IVsOfOneStride { std::vector IVs; - void addIV(const SCEV* const Stride, const SCEV* const Base, PHINode *PHI) { + void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) { IVs.push_back(IVExpr(Stride, Base, PHI)); } }; - class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { + class LoopStrengthReduce : public LoopPass { IVUsers *IU; LoopInfo *LI; DominatorTree *DT; @@ -92,11 +91,11 @@ namespace { /// IVsByStride - Keep track of all IVs that have been inserted for a /// particular stride. - std::map IVsByStride; + std::map IVsByStride; /// StrideNoReuse - Keep track of all the strides whose ivs cannot be /// reused (nor should they be rewritten to reuse other strides). - SmallSet StrideNoReuse; + SmallSet StrideNoReuse; /// DeadInsts - Keep track of instructions we may have made dead, so that /// we can remove them after we are done working. @@ -134,7 +133,7 @@ namespace { private: ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, - const SCEV* const * &CondStride); + const SCEV *const * &CondStride); void OptimizeIndvars(Loop *L); void OptimizeLoopCountIV(Loop *L); @@ -150,16 +149,16 @@ namespace { IVStrideUse* &CondUse); bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV* const * &CondStride); + const SCEV *const * &CondStride); bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); - const SCEV* CheckForIVReuse(bool, bool, bool, const SCEV* const&, + const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&, IVExpr&, const Type*, const std::vector& UsersToProcess); bool ValidScale(bool, int64_t, const std::vector& UsersToProcess); bool ValidOffset(bool, int64_t, int64_t, const std::vector& UsersToProcess); - const SCEV* CollectIVUsers(const SCEV* const &Stride, + const SCEV *CollectIVUsers(const SCEV *const &Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -169,11 +168,11 @@ namespace { const std::vector &UsersToProcess, const Loop *L, bool AllUsesAreAddresses, - const SCEV* Stride); + const SCEV *Stride); void PrepareToStrengthReduceFully( std::vector &UsersToProcess, - const SCEV* Stride, - const SCEV* CommonExprs, + const SCEV *Stride, + const SCEV *CommonExprs, const Loop *L, SCEVExpander &PreheaderRewriter); void PrepareToStrengthReduceFromSmallerStride( @@ -183,13 +182,13 @@ namespace { Instruction *PreInsertPt); void PrepareToStrengthReduceWithNewPhi( std::vector &UsersToProcess, - const SCEV* Stride, - const SCEV* CommonExprs, + const SCEV *Stride, + const SCEV *CommonExprs, Value *CommonBaseV, Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter); - void StrengthReduceStridedIVUsers(const SCEV* const &Stride, + void StrengthReduceStridedIVUsers(const SCEV *const &Stride, IVUsersOfOneStride &Uses, Loop *L); void DeleteTriviallyDeadInstructions(); @@ -233,7 +232,7 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() { /// containsAddRecFromDifferentLoop - Determine whether expression S involves a /// subexpression that is an AddRec from a loop other than L. An outer loop /// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { +static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) { // This is very common, put it first. if (isa(S)) return false; @@ -248,7 +247,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { if (newLoop == L) return false; // if newLoop is an outer loop of L, this is OK. - if (!LoopInfoBase::isNotAlreadyContainedIn(L, newLoop)) + if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop)) return false; } return true; @@ -328,7 +327,7 @@ namespace { /// this use. As the use is processed, information gets moved from this /// field to the Imm field (below). BasedUser values are sorted by this /// field. - const SCEV* Base; + const SCEV *Base; /// Inst - The instruction using the induction variable. Instruction *Inst; @@ -341,7 +340,7 @@ namespace { /// before Inst, because it will be folded into the imm field of the /// instruction. This is also sometimes used for loop-variant values that /// must be added inside the loop. - const SCEV* Imm; + const SCEV *Imm; /// Phi - The induction variable that performs the striding that /// should be used for this user. @@ -363,13 +362,13 @@ namespace { // Once we rewrite the code to insert the new IVs we want, update the // operands of Inst to use the new expression 'NewBase', with 'Imm' added // to it. - void RewriteInstructionToUseNewBase(const SCEV* const &NewBase, + void RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *InsertPt, SCEVExpander &Rewriter, Loop *L, Pass *P, LoopInfo &LI, SmallVectorImpl &DeadInsts); - Value *InsertCodeForBaseAtPosition(const SCEV* const &NewBase, + Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -379,12 +378,12 @@ namespace { } void BasedUser::dump() const { - cerr << " Base=" << *Base; - cerr << " Imm=" << *Imm; - cerr << " Inst: " << *Inst; + errs() << " Base=" << *Base; + errs() << " Imm=" << *Imm; + errs() << " Inst: " << *Inst; } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase, const Type *Ty, SCEVExpander &Rewriter, Instruction *IP, Loop *L, @@ -408,7 +407,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); - const SCEV* NewValSCEV = SE->getUnknown(Base); + const SCEV *NewValSCEV = SE->getUnknown(Base); // Always emit the immediate into the same block as the user. NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); @@ -423,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase, // value of NewBase in the case that it's a diffferent instruction from // the PHI that NewBase is computed from, or null otherwise. // -void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, +void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase, Instruction *NewBasePt, SCEVExpander &Rewriter, Loop *L, Pass *P, LoopInfo &LI, @@ -460,9 +459,10 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, // Replace the use of the operand Value with the new Phi we just created. Inst->replaceUsesOfWith(OperandValToReplace, NewVal); - DOUT << " Replacing with "; - DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false)); - DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"; + DEBUG(errs() << " Replacing with "); + DEBUG(WriteAsOperand(errs(), NewVal, /*PrintType=*/false)); + DEBUG(errs() << ", which has value " << *NewBase << " plus IMM " + << *Imm << "\n"); return; } @@ -483,43 +483,45 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, // loop because multiple copies sometimes do useful sinking of code in // that case(?). Instruction *OldLoc = dyn_cast(OperandValToReplace); + BasicBlock *PHIPred = PN->getIncomingBlock(i); if (L->contains(OldLoc->getParent())) { // If this is a critical edge, split the edge so that we do not insert // the code on all predecessor/successor paths. We do this unless this // is the canonical backedge for this loop, as this can make some // inserted code be in an illegal position. - BasicBlock *PHIPred = PN->getIncomingBlock(i); if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 && (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) { // First step, split the critical edge. - SplitCriticalEdge(PHIPred, PN->getParent(), P, false); + BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(), + P, false); // Next step: move the basic block. In particular, if the PHI node // is outside of the loop, and PredTI is in the loop, we want to // move the block to be immediately before the PHI block, not // immediately after PredTI. - if (L->contains(PHIPred) && !L->contains(PN->getParent())) { - BasicBlock *NewBB = PN->getIncomingBlock(i); + if (L->contains(PHIPred) && !L->contains(PN->getParent())) NewBB->moveBefore(PN->getParent()); - } // Splitting the edge can reduce the number of PHI entries we have. e = PN->getNumIncomingValues(); + PHIPred = NewBB; + i = PN->getBasicBlockIndex(PHIPred); } } - Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; + Value *&Code = InsertedCode[PHIPred]; if (!Code) { // Insert the code into the end of the predecessor block. Instruction *InsertPt = (L->contains(OldLoc->getParent())) ? - PN->getIncomingBlock(i)->getTerminator() : + PHIPred->getTerminator() : OldLoc->getParent()->getTerminator(); Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(), Rewriter, InsertPt, L, LI); - DOUT << " Changing PHI use to "; - DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false)); - DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"; + DEBUG(errs() << " Changing PHI use to "); + DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); + DEBUG(errs() << ", which has value " << *NewBase << " plus IMM " + << *Imm << "\n"); } // Replace the use of the operand Value with the new Phi we just created. @@ -535,7 +537,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, /// fitsInAddressMode - Return true if V can be subsumed within an addressing /// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy, +static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy, const TargetLowering *TLI, bool HasBaseReg) { if (const SCEVConstant *SC = dyn_cast(V)) { int64_t VC = SC->getValue()->getSExtValue(); @@ -567,12 +569,12 @@ static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy, /// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are /// loop varying to the Imm operand. -static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm, +static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm, Loop *L, ScalarEvolution *SE) { if (Val->isLoopInvariant(L)) return; // Nothing to do. if (const SCEVAddExpr *SAE = dyn_cast(Val)) { - SmallVector NewOps; + SmallVector NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) @@ -590,10 +592,10 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm, Val = SE->getAddExpr(NewOps); } else if (const SCEVAddRecExpr *SARE = dyn_cast(Val)) { // Try to pull immediates out of the start value of nested addrec's. - const SCEV* Start = SARE->getStart(); + const SCEV *Start = SARE->getStart(); MoveLoopVariantsToImmediateField(Start, Imm, L, SE); - SmallVector Ops(SARE->op_begin(), SARE->op_end()); + SmallVector Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } else { @@ -609,15 +611,15 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm, /// Accumulate these immediate values into the Imm value. static void MoveImmediateValues(const TargetLowering *TLI, const Type *AccessTy, - const SCEV* &Val, const SCEV* &Imm, + const SCEV *&Val, const SCEV *&Imm, bool isAddress, Loop *L, ScalarEvolution *SE) { if (const SCEVAddExpr *SAE = dyn_cast(Val)) { - SmallVector NewOps; + SmallVector NewOps; NewOps.reserve(SAE->getNumOperands()); for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { - const SCEV* NewOp = SAE->getOperand(i); + const SCEV *NewOp = SAE->getOperand(i); MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE); if (!NewOp->isLoopInvariant(L)) { @@ -636,11 +638,11 @@ static void MoveImmediateValues(const TargetLowering *TLI, return; } else if (const SCEVAddRecExpr *SARE = dyn_cast(Val)) { // Try to pull immediates out of the start value of nested addrec's. - const SCEV* Start = SARE->getStart(); + const SCEV *Start = SARE->getStart(); MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE); if (Start != SARE->getStart()) { - SmallVector Ops(SARE->op_begin(), SARE->op_end()); + SmallVector Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Start; Val = SE->getAddRecExpr(Ops, SARE->getLoop()); } @@ -651,8 +653,8 @@ static void MoveImmediateValues(const TargetLowering *TLI, fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) && SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { - const SCEV* SubImm = SE->getIntegerSCEV(0, Val->getType()); - const SCEV* NewOp = SME->getOperand(1); + const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType()); + const SCEV *NewOp = SME->getOperand(1); MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE); // If we extracted something out of the subexpressions, see if we can @@ -687,7 +689,7 @@ static void MoveImmediateValues(const TargetLowering *TLI, static void MoveImmediateValues(const TargetLowering *TLI, Instruction *User, - const SCEV* &Val, const SCEV* &Imm, + const SCEV *&Val, const SCEV *&Imm, bool isAddress, Loop *L, ScalarEvolution *SE) { const Type *AccessTy = getAccessType(User); @@ -697,19 +699,19 @@ static void MoveImmediateValues(const TargetLowering *TLI, /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are /// added together. This is used to reassociate common addition subexprs /// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(SmallVector &SubExprs, - const SCEV* Expr, +static void SeparateSubExprs(SmallVector &SubExprs, + const SCEV *Expr, ScalarEvolution *SE) { if (const SCEVAddExpr *AE = dyn_cast(Expr)) { for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j) SeparateSubExprs(SubExprs, AE->getOperand(j), SE); } else if (const SCEVAddRecExpr *SARE = dyn_cast(Expr)) { - const SCEV* Zero = SE->getIntegerSCEV(0, Expr->getType()); + const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType()); if (SARE->getOperand(0) == Zero) { SubExprs.push_back(Expr); } else { // Compute the addrec with zero as its base. - SmallVector Ops(SARE->op_begin(), SARE->op_end()); + SmallVector Ops(SARE->op_begin(), SARE->op_end()); Ops[0] = Zero; // Start with zero base. SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); @@ -733,7 +735,7 @@ struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; /// not remove anything. This looks for things like (a+b+c) and /// (a+c+d) and computes the common (a+c) subexpression. The common expression /// is *removed* from the Bases and returned. -static const SCEV* +static const SCEV * RemoveCommonExpressionsFromUseBases(std::vector &Uses, ScalarEvolution *SE, Loop *L, const TargetLowering *TLI) { @@ -741,9 +743,9 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses, // Only one use? This is a very common case, so we handle it specially and // cheaply. - const SCEV* Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); - const SCEV* Result = Zero; - const SCEV* FreeResult = Zero; + const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); + const SCEV *Result = Zero; + const SCEV *FreeResult = Zero; if (NumUses == 1) { // If the use is inside the loop, use its base, regardless of what it is: // it is clearly shared across all the IV's. If the use is outside the loop @@ -759,13 +761,13 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses, // Also track whether all uses of each expression can be moved into an // an addressing mode "for free"; such expressions are left within the loop. // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; - std::map SubExpressionUseData; + std::map SubExpressionUseData; // UniqueSubExprs - Keep track of all of the subexpressions we see in the // order we see them. - SmallVector UniqueSubExprs; + SmallVector UniqueSubExprs; - SmallVector SubExprs; + SmallVector SubExprs; unsigned NumUsesInsideLoop = 0; for (unsigned i = 0; i != NumUses; ++i) { // If the user is outside the loop, just ignore it for base computation. @@ -809,7 +811,7 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses, // Now that we know how many times each is used, build Result. Iterate over // UniqueSubexprs so that we have a stable ordering. for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { - std::map::iterator I = + std::map::iterator I = SubExpressionUseData.find(UniqueSubExprs[i]); assert(I != SubExpressionUseData.end() && "Entry not found?"); if (I->second.Count == NumUsesInsideLoop) { // Found CSE! @@ -853,7 +855,7 @@ RemoveCommonExpressionsFromUseBases(std::vector &Uses, if (FreeResult != Zero) { SeparateSubExprs(SubExprs, FreeResult, SE); for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { - std::map::iterator I = + std::map::iterator I = SubExpressionUseData.find(SubExprs[j]); SubExpressionUseData.erase(I); } @@ -902,7 +904,8 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale, for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) { // If this is a load or other access, pass the type of the access in. - const Type *AccessTy = Type::VoidTy; + const Type *AccessTy = + Type::getVoidTy(UsersToProcess[i].Inst->getContext()); if (isAddressUse(UsersToProcess[i].Inst, UsersToProcess[i].OperandValToReplace)) AccessTy = getAccessType(UsersToProcess[i].Inst); @@ -934,7 +937,8 @@ bool LoopStrengthReduce::ValidOffset(bool HasBaseReg, for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) { // If this is a load or other access, pass the type of the access in. - const Type *AccessTy = Type::VoidTy; + const Type *AccessTy = + Type::getVoidTy(UsersToProcess[i].Inst->getContext()); if (isAddressUse(UsersToProcess[i].Inst, UsersToProcess[i].OperandValToReplace)) AccessTy = getAccessType(UsersToProcess[i].Inst); @@ -982,10 +986,10 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1, /// be folded into the addressing mode, nor even that the factor be constant; /// a multiply (executed once) outside the loop is better than another IV /// within. Well, usually. -const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, +const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, bool AllUsesAreAddresses, bool AllUsesAreOutsideLoop, - const SCEV* const &Stride, + const SCEV *const &Stride, IVExpr &IV, const Type *Ty, const std::vector& UsersToProcess) { if (StrideNoReuse.count(Stride)) @@ -995,7 +999,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map::iterator SI = + std::map::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa(SI->first) || StrideNoReuse.count(SI->first)) @@ -1048,7 +1052,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // an existing IV if we can. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map::iterator SI = + std::map::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end() || !isa(SI->first)) continue; @@ -1068,7 +1072,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, // -1*old. for (unsigned NewStride = 0, e = IU->StrideOrder.size(); NewStride != e; ++NewStride) { - std::map::iterator SI = + std::map::iterator SI = IVsByStride.find(IU->StrideOrder[NewStride]); if (SI == IVsByStride.end()) continue; @@ -1097,7 +1101,7 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) { /// isNonConstantNegative - Return true if the specified scev is negated, but /// not a constant. -static bool isNonConstantNegative(const SCEV* const &Expr) { +static bool isNonConstantNegative(const SCEV *const &Expr) { const SCEVMulExpr *Mul = dyn_cast(Expr); if (!Mul) return false; @@ -1114,7 +1118,7 @@ static bool isNonConstantNegative(const SCEV* const &Expr) { /// of the strided accesses, as well as the old information from Uses. We /// progressively move information from the Base field to the Imm field, until /// we eventually have the full access expression to rewrite the use. -const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride, +const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride, IVUsersOfOneStride &Uses, Loop *L, bool &AllUsesAreAddresses, @@ -1145,7 +1149,7 @@ const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride, // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find // "A+B"), emit it to the preheader, then remove the expression from the // UsersToProcess base values. - const SCEV* CommonExprs = + const SCEV *CommonExprs = RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI); // Next, figure out what we can represent in the immediate fields of @@ -1211,7 +1215,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( const std::vector &UsersToProcess, const Loop *L, bool AllUsesAreAddresses, - const SCEV* Stride) { + const SCEV *Stride) { if (!EnableFullLSRMode) return false; @@ -1248,7 +1252,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( if (!Imm) Imm = SE->getIntegerSCEV(0, Stride->getType()); const Instruction *Inst = UsersToProcess[i].Inst; const Type *AccessTy = getAccessType(Inst); - const SCEV* Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); + const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); if (!Diff->isZero() && (!AllUsesAreAddresses || !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true))) @@ -1282,7 +1286,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode( /// /// Return the created phi node. /// -static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step, +static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step, Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &Rewriter) { @@ -1302,7 +1306,7 @@ static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step, // If the stride is negative, insert a sub instead of an add for the // increment. bool isNegative = isNonConstantNegative(Step); - const SCEV* IncAmount = Step; + const SCEV *IncAmount = Step; if (isNegative) IncAmount = Rewriter.SE.getNegativeSCEV(Step); @@ -1341,13 +1345,13 @@ static void SortUsersToProcess(std::vector &UsersToProcess) { // loop before users outside of the loop with a particular base. // // We would like to use stable_sort here, but we can't. The problem is that - // const SCEV*'s don't have a deterministic ordering w.r.t to each other, so + // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so // we don't have anything to do a '<' comparison on. Because we think the // number of uses is small, do a horrible bubble sort which just relies on // ==. for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) { // Get a base value. - const SCEV* Base = UsersToProcess[i].Base; + const SCEV *Base = UsersToProcess[i].Base; // Compact everything with this base to be consecutive with this one. for (unsigned j = i+1; j != e; ++j) { @@ -1366,11 +1370,11 @@ static void SortUsersToProcess(std::vector &UsersToProcess) { void LoopStrengthReduce::PrepareToStrengthReduceFully( std::vector &UsersToProcess, - const SCEV* Stride, - const SCEV* CommonExprs, + const SCEV *Stride, + const SCEV *CommonExprs, const Loop *L, SCEVExpander &PreheaderRewriter) { - DOUT << " Fully reducing all users\n"; + DEBUG(errs() << " Fully reducing all users\n"); // Rewrite the UsersToProcess records, creating a separate PHI for each // unique Base value. @@ -1379,9 +1383,9 @@ LoopStrengthReduce::PrepareToStrengthReduceFully( // TODO: The uses are grouped by base, but not sorted. We arbitrarily // pick the first Imm value here to start with, and adjust it for the // other uses. - const SCEV* Imm = UsersToProcess[i].Imm; - const SCEV* Base = UsersToProcess[i].Base; - const SCEV* Start = SE->getAddExpr(CommonExprs, Base, Imm); + const SCEV *Imm = UsersToProcess[i].Imm; + const SCEV *Base = UsersToProcess[i].Base; + const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm); PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L, PreheaderRewriter); // Loop over all the users with the same base. @@ -1413,13 +1417,13 @@ static Instruction *FindIVIncInsertPt(std::vector &UsersToProcess, void LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( std::vector &UsersToProcess, - const SCEV* Stride, - const SCEV* CommonExprs, + const SCEV *Stride, + const SCEV *CommonExprs, Value *CommonBaseV, Instruction *IVIncInsertPt, const Loop *L, SCEVExpander &PreheaderRewriter) { - DOUT << " Inserting new PHI:\n"; + DEBUG(errs() << " Inserting new PHI:\n"); PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV), Stride, IVIncInsertPt, L, @@ -1432,9 +1436,9 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi( for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) UsersToProcess[i].Phi = Phi; - DOUT << " IV="; - DEBUG(WriteAsOperand(*DOUT, Phi, /*PrintType=*/false)); - DOUT << "\n"; + DEBUG(errs() << " IV="); + DEBUG(WriteAsOperand(errs(), Phi, /*PrintType=*/false)); + DEBUG(errs() << "\n"); } /// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to @@ -1447,8 +1451,8 @@ LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride( Value *CommonBaseV, const IVExpr &ReuseIV, Instruction *PreInsertPt) { - DOUT << " Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride - << " and BASE " << *ReuseIV.Base << "\n"; + DEBUG(errs() << " Rewriting in terms of existing IV of STRIDE " + << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n"); // All the users will share the reused IV. for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) @@ -1490,7 +1494,7 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset, /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single /// stride of IV. All of the users may have different starting values, and this /// may not be the only stride. -void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, +void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride, IVUsersOfOneStride &Uses, Loop *L) { // If all the users are moved to another stride, then there is nothing to do. @@ -1513,7 +1517,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // move information from the Base field to the Imm field, until we eventually // have the full access expression to rewrite the use. std::vector UsersToProcess; - const SCEV* CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, + const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -1531,9 +1535,11 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // If all uses are addresses, consider sinking the immediate part of the // common expression back into uses if they can fit in the immediate fields. if (TLI && HaveCommonExprs && AllUsesAreAddresses) { - const SCEV* NewCommon = CommonExprs; - const SCEV* Imm = SE->getIntegerSCEV(0, ReplacedTy); - MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE); + const SCEV *NewCommon = CommonExprs; + const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy); + MoveImmediateValues(TLI, Type::getVoidTy( + L->getLoopPreheader()->getContext()), + NewCommon, Imm, true, L, SE); if (!Imm->isZero()) { bool DoSink = true; @@ -1548,11 +1554,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, if (GV || Offset) // Pass VoidTy as the AccessTy to be conservative, because // there could be multiple access types among all the uses. - DoSink = IsImmFoldedIntoAddrMode(GV, Offset, Type::VoidTy, + DoSink = IsImmFoldedIntoAddrMode(GV, Offset, + Type::getVoidTy(L->getLoopPreheader()->getContext()), UsersToProcess, TLI); if (DoSink) { - DOUT << " Sinking " << *Imm << " back down into uses\n"; + DEBUG(errs() << " Sinking " << *Imm << " back down into uses\n"); for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm); CommonExprs = NewCommon; @@ -1564,9 +1571,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // Now that we know what we need to do, insert the PHI node itself. // - DOUT << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " - << *Stride << ":\n" - << " Common base: " << *CommonExprs << "\n"; + DEBUG(errs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " + << *Stride << ":\n" + << " Common base: " << *CommonExprs << "\n"); SCEVExpander Rewriter(*SE); SCEVExpander PreheaderRewriter(*SE); @@ -1576,11 +1583,13 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, BasicBlock *LatchBlock = L->getLoopLatch(); Instruction *IVIncInsertPt = LatchBlock->getTerminator(); - Value *CommonBaseV = Context->getNullValue(ReplacedTy); + Value *CommonBaseV = Constant::getNullValue(ReplacedTy); - const SCEV* RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); - IVExpr ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty), - SE->getIntegerSCEV(0, Type::Int32Ty), + const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); + IVExpr ReuseIV(SE->getIntegerSCEV(0, + Type::getInt32Ty(Preheader->getContext())), + SE->getIntegerSCEV(0, + Type::getInt32Ty(Preheader->getContext())), 0); /// Choose a strength-reduction strategy and prepare for it by creating @@ -1618,7 +1627,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // strength-reduced forms. This outer loop handles all bases, the inner // loop handles all users of a particular base. while (!UsersToProcess.empty()) { - const SCEV* Base = UsersToProcess.back().Base; + const SCEV *Base = UsersToProcess.back().Base; Instruction *Inst = UsersToProcess.back().Inst; // Emit the code for Base into the preheader. @@ -1626,17 +1635,17 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, if (!Base->isZero()) { BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt); - DOUT << " INSERTING code for BASE = " << *Base << ":"; + DEBUG(errs() << " INSERTING code for BASE = " << *Base << ":"); if (BaseV->hasName()) - DOUT << " Result value name = %" << BaseV->getNameStr(); - DOUT << "\n"; + DEBUG(errs() << " Result value name = %" << BaseV->getName()); + DEBUG(errs() << "\n"); // If BaseV is a non-zero constant, make sure that it gets inserted into // the preheader, instead of being forward substituted into the uses. We // do this by forcing a BitCast (noop cast) to be inserted into the // preheader in this case. if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) && - !isa(BaseV)) { + isa(BaseV)) { // We want this constant emitted into the preheader! This is just // using cast as a copy so BitCast (no-op cast) is appropriate BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", @@ -1650,15 +1659,15 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // FIXME: Use emitted users to emit other users. BasedUser &User = UsersToProcess.back(); - DOUT << " Examining "; + DEBUG(errs() << " Examining "); if (User.isUseOfPostIncrementedValue) - DOUT << "postinc"; + DEBUG(errs() << "postinc"); else - DOUT << "preinc"; - DOUT << " use "; - DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace, + DEBUG(errs() << "preinc"); + DEBUG(errs() << " use "); + DEBUG(WriteAsOperand(errs(), UsersToProcess.back().OperandValToReplace, /*PrintType=*/false)); - DOUT << " in Inst: " << *(User.Inst); + DEBUG(errs() << " in Inst: " << *User.Inst); // If this instruction wants to use the post-incremented value, move it // after the post-inc and use its value instead of the PHI. @@ -1673,7 +1682,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, User.Inst->moveBefore(IVIncInsertPt); } - const SCEV* RewriteExpr = SE->getUnknown(RewriteOp); + const SCEV *RewriteExpr = SE->getUnknown(RewriteOp); if (SE->getEffectiveSCEVType(RewriteOp->getType()) != SE->getEffectiveSCEVType(ReplacedTy)) { @@ -1705,7 +1714,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, // The base has been used to initialize the PHI node but we don't want // it here. if (!ReuseIV.Base->isZero()) { - const SCEV* typedBase = ReuseIV.Base; + const SCEV *typedBase = ReuseIV.Base; if (SE->getEffectiveSCEVType(RewriteExpr->getType()) != SE->getEffectiveSCEVType(ReuseIV.Base->getType())) { // It's possible the original IV is a larger type than the new IV, @@ -1770,10 +1779,10 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, /// set the IV user and stride information and return true, otherwise return /// false. bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, - const SCEV* const * &CondStride) { + const SCEV *const * &CondStride) { for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e && !CondUse; ++Stride) { - std::map::iterator SI = + std::map::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); @@ -1800,7 +1809,7 @@ namespace { const ScalarEvolution *SE; explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} - bool operator()(const SCEV* const &LHS, const SCEV* const &RHS) { + bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) { const SCEVConstant *LHSC = dyn_cast(LHS); const SCEVConstant *RHSC = dyn_cast(RHS); if (LHSC && RHSC) { @@ -1843,14 +1852,14 @@ namespace { /// if (v1 < 30) goto loop ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, IVStrideUse* &CondUse, - const SCEV* const* &CondStride) { + const SCEV *const* &CondStride) { // If there's only one stride in the loop, there's nothing to do here. if (IU->StrideOrder.size() < 2) return Cond; // If there are other users of the condition's stride, don't bother // trying to change the condition because the stride will still // remain. - std::map::iterator I = + std::map::iterator I = IU->IVUsesByStride.find(*CondStride); if (I == IU->IVUsesByStride.end() || I->second->Users.size() != 1) @@ -1867,11 +1876,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, const Type *NewCmpTy = NULL; unsigned TyBits = SE->getTypeSizeInBits(CmpTy); unsigned NewTyBits = 0; - const SCEV* *NewStride = NULL; + const SCEV **NewStride = NULL; Value *NewCmpLHS = NULL; Value *NewCmpRHS = NULL; int64_t Scale = 1; - const SCEV* NewOffset = SE->getIntegerSCEV(0, CmpTy); + const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy); if (ConstantInt *C = dyn_cast(Cond->getOperand(1))) { int64_t CmpVal = C->getValue().getSExtValue(); @@ -1883,7 +1892,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Look for a suitable stride / iv as replacement. for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { - std::map::iterator SI = + std::map::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[i]); if (!isa(SI->first)) continue; @@ -1942,7 +1951,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, NewCmpTy = NewCmpLHS->getType(); NewTyBits = SE->getTypeSizeInBits(NewCmpTy); - const Type *NewCmpIntTy = Context->getIntegerType(NewTyBits); + const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits); if (RequiresTypeConversion(NewCmpTy, CmpTy)) { // Check if it is possible to rewrite it using // an iv / stride of a smaller integer type. @@ -1963,7 +1972,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector UsersToProcess; - const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -1987,10 +1996,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, NewStride = &IU->StrideOrder[i]; if (!isa(NewCmpTy)) - NewCmpRHS = Context->getConstantInt(NewCmpTy, NewCmpVal); + NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal); else { - Constant *CI = Context->getConstantInt(NewCmpIntTy, NewCmpVal); - NewCmpRHS = Context->getConstantExprIntToPtr(CI, NewCmpTy); + Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); + NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy); } NewOffset = TyBits == NewTyBits ? SE->getMulExpr(CondUse->getOffset(), @@ -2019,9 +2028,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond, // Create a new compare instruction using new stride / iv. ICmpInst *OldCond = Cond; // Insert new compare instruction. - Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS, - L->getHeader()->getName() + ".termcond", - OldCond); + Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS, + L->getHeader()->getName() + ".termcond"); // Remove the old compare instruction. The old indvar is probably dead too. DeadInsts.push_back(CondUse->getOperandValToReplace()); @@ -2098,13 +2106,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, SelectInst *Sel = dyn_cast(Cond->getOperand(1)); if (!Sel || !Sel->hasOneUse()) return Cond; - const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa(BackedgeTakenCount)) return Cond; - const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); // Add one to the backedge-taken count to get the trip count. - const SCEV* IterationCount = SE->getAddExpr(BackedgeTakenCount, One); + const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One); // Check for a max calculation that matches the pattern. if (!isa(IterationCount) && !isa(IterationCount)) @@ -2117,13 +2125,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, if (Max->getNumOperands() != 2) return Cond; - const SCEV* MaxLHS = Max->getOperand(0); - const SCEV* MaxRHS = Max->getOperand(1); + const SCEV *MaxLHS = Max->getOperand(0); + const SCEV *MaxRHS = Max->getOperand(1); if (!MaxLHS || MaxLHS != One) return Cond; // Check the relevant induction variable for conformance to // the pattern. - const SCEV* IV = SE->getSCEV(Cond->getOperand(0)); + const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); const SCEVAddRecExpr *AR = dyn_cast(IV); if (!AR || !AR->isAffine() || AR->getStart() != One || @@ -2152,7 +2160,7 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, // Ok, everything looks ok to change the condition into an SLT or SGE and // delete the max calculation. ICmpInst *NewCond = - new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond); + new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp"); // Delete the max calculation instructions. Cond->replaceAllUsesWith(NewCond); @@ -2169,13 +2177,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond, /// inside the loop then try to eliminate the cast opeation. void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { - const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa(BackedgeTakenCount)) return; - + for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map::iterator SI = + std::map::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); if (!isa(SI->first)) @@ -2209,7 +2217,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { if (TLI) { // If target does not support DestTy natively then do not apply // this transformation. - MVT DVT = TLI->getValueType(DestTy); + EVT DVT = TLI->getValueType(DestTy); if (!TLI->isTypeLegal(DVT)) continue; } @@ -2234,7 +2242,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { ConstantInt *Init = dyn_cast(PH->getIncomingValue(Entry)); if (!Init) continue; - Constant *NewInit = Context->getConstantFP(DestTy, Init->getZExtValue()); + Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue()); BinaryOperator *Incr = dyn_cast(PH->getIncomingValue(Latch)); @@ -2258,7 +2266,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); /* create new increment. '++d' in above example. */ - Constant *CFP = Context->getConstantFP(DestTy, C->getZExtValue()); + Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); BinaryOperator *NewIncr = BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? Instruction::FAdd : Instruction::FSub, @@ -2294,6 +2302,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // one register value. BasicBlock *LatchBlock = L->getLoopLatch(); BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) // Multiple exits, just look at the exit in the latch block if there is one. ExitingBlock = LatchBlock; @@ -2305,7 +2314,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { // Search IVUsesByStride to find Cond's IVUse if there is one. IVStrideUse *CondUse = 0; - const SCEV* const *CondStride = 0; + const SCEV *const *CondStride = 0; ICmpInst *Cond = cast(TermBr->getCondition()); if (!FindIVUserForCond(Cond, CondUse, CondStride)) return; // setcc doesn't use the IV. @@ -2335,7 +2344,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { int64_t SInt = SC->getValue()->getSExtValue(); for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee; ++NewStride) { - std::map::iterator SI = + std::map::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[NewStride]); if (!isa(SI->first) || SI->first == *CondStride) continue; @@ -2349,7 +2358,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { bool AllUsesAreAddresses = true; bool AllUsesAreOutsideLoop = true; std::vector UsersToProcess; - const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L, + const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L, AllUsesAreAddresses, AllUsesAreOutsideLoop, UsersToProcess); @@ -2410,7 +2419,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) { void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // If the number of times the loop is executed isn't computable, give up. - const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa(BackedgeTakenCount)) return; @@ -2439,9 +2448,9 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // Handle only tests for equality for the moment, and only stride 1. if (Cond->getPredicate() != CmpInst::ICMP_EQ) return; - const SCEV* IV = SE->getSCEV(Cond->getOperand(0)); + const SCEV *IV = SE->getSCEV(Cond->getOperand(0)); const SCEVAddRecExpr *AR = dyn_cast(IV); - const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); + const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One) return; // If the RHS of the comparison is defined inside the loop, the rewrite @@ -2497,7 +2506,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { Value *startVal = phi->getIncomingValue(inBlock); Value *endVal = Cond->getOperand(1); // FIXME check for case where both are constant - Constant* Zero = Context->getConstantInt(Cond->getOperand(1)->getType(), 0); + Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0); BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub, endVal, startVal, "tmp", PreInsertPt); @@ -2516,11 +2525,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { Changed = false; if (!IU->IVUsesByStride.empty()) { -#ifndef NDEBUG - DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart() - << "\" "; - DEBUG(L->dump()); -#endif + DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() + << "\" "; + L->dump()); // Sort the StrideOrder so we process larger strides first. std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), @@ -2557,7 +2564,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) { // strides deterministic - not dependent on map order. for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) { - std::map::iterator SI = + std::map::iterator SI = IU->IVUsesByStride.find(IU->StrideOrder[Stride]); assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); // FIXME: Generalize to non-affine IV's. diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp index 23757cdb2d29c..837ec59dbbce7 100644 --- a/lib/Transforms/Scalar/LoopUnroll.cpp +++ b/lib/Transforms/Scalar/LoopUnroll.cpp @@ -17,9 +17,9 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include @@ -39,7 +39,7 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, "-unroll-threshold loop size is reached.")); namespace { - class VISIBILITY_HIDDEN LoopUnroll : public LoopPass { + class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid LoopUnroll() : LoopPass(&ID) {} @@ -96,10 +96,7 @@ static unsigned ApproximateLoopSize(const Loop *L) { // is higher than other instructions. Here 3 and 10 are magic // numbers that help one isolated test case from PR2067 without // negatively impacting measured benchmarks. - if (isa(I)) - Size = Size + 3; - else - Size = Size + 10; + Size += isa(I) ? 3 : 10; } else { ++Size; } @@ -118,51 +115,48 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { LoopInfo *LI = &getAnalysis(); BasicBlock *Header = L->getHeader(); - DOUT << "Loop Unroll: F[" << Header->getParent()->getName() - << "] Loop %" << Header->getName() << "\n"; + DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName() + << "] Loop %" << Header->getName() << "\n"); + (void)Header; // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); unsigned Count = UnrollCount; - + // Automatically select an unroll count. if (Count == 0) { // Conservative heuristic: if we know the trip count, see if we can // completely unroll (subject to the threshold, checked below); otherwise - // try to find greatest modulo of the trip count which is still under + // try to find greatest modulo of the trip count which is still under // threshold value. - if (TripCount != 0) { - Count = TripCount; - } else { + if (TripCount == 0) return false; - } + Count = TripCount; } // Enforce the threshold. if (UnrollThreshold != NoThreshold) { unsigned LoopSize = ApproximateLoopSize(L); - DOUT << " Loop Size = " << LoopSize << "\n"; + DEBUG(errs() << " Loop Size = " << LoopSize << "\n"); uint64_t Size = (uint64_t)LoopSize*Count; if (TripCount != 1 && Size > UnrollThreshold) { - DOUT << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << UnrollThreshold << "\n"; - if (UnrollAllowPartial) { - // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = UnrollThreshold / LoopSize; - while (Count != 0 && TripCount%Count != 0) { - Count--; - } - if (Count < 2) { - DOUT << " could not unroll partially\n"; - return false; - } else { - DOUT << " partially unrolling with count: " << Count << "\n"; - } - } else { - DOUT << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"; + DEBUG(errs() << " Too large to fully unroll with count: " << Count + << " because size: " << Size << ">" << UnrollThreshold << "\n"); + if (!UnrollAllowPartial) { + DEBUG(errs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); + return false; + } + // Reduce unroll count to be modulo of TripCount for partial unrolling + Count = UnrollThreshold / LoopSize; + while (Count != 0 && TripCount%Count != 0) { + Count--; + } + if (Count < 2) { + DEBUG(errs() << " could not unroll partially\n"); return false; } + DEBUG(errs() << " partially unrolling with count: " << Count << "\n"); } } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index de5eedf1e84cf..f6de362926032 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -34,6 +34,7 @@ #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" @@ -44,8 +45,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; @@ -56,12 +57,14 @@ STATISTIC(NumSelects , "Number of selects unswitched"); STATISTIC(NumTrivial , "Number of unswitches that are trivial"); STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); +// The specific value of 50 here was chosen based only on intuition and a +// few specific examples. static cl::opt Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), - cl::init(10), cl::Hidden); + cl::init(50), cl::Hidden); namespace { - class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass { + class LoopUnswitch : public LoopPass { LoopInfo *LI; // Loop information LPPassManager *LPM; @@ -112,6 +115,10 @@ namespace { private: + virtual void releaseMemory() { + UnswitchedVals.clear(); + } + /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, /// remove it. void RemoveLoopFromWorklist(Loop *L) { @@ -168,8 +175,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { if (isa(Cond)) return 0; // TODO: Handle: br (VARIANT|INVARIANT). - // TODO: Hoist simple expressions out of loops. - if (L->isLoopInvariant(Cond)) return Cond; + + // Hoist simple values out. + if (L->makeLoopInvariant(Cond, Changed)) + return Cond; if (BinaryOperator *BO = dyn_cast(Cond)) if (BO->getOpcode() == Instruction::And || @@ -214,6 +223,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { /// and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; + LLVMContext &Context = currentLoop->getHeader()->getContext(); // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this @@ -231,7 +241,7 @@ bool LoopUnswitch::processCurrentLoop() { Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed); if (LoopCond && UnswitchIfProfitable(LoopCond, - Context->getConstantIntTrue())) { + ConstantInt::getTrue(Context))) { ++NumBranches; return true; } @@ -261,7 +271,7 @@ bool LoopUnswitch::processCurrentLoop() { Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); if (LoopCond && UnswitchIfProfitable(LoopCond, - Context->getConstantIntTrue())) { + ConstantInt::getTrue(Context))) { ++NumSelects; return true; } @@ -335,6 +345,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, BasicBlock **LoopExit) { BasicBlock *Header = currentLoop->getHeader(); TerminatorInst *HeaderTerm = Header->getTerminator(); + LLVMContext &Context = Header->getContext(); BasicBlock *LoopExitBB = 0; if (BranchInst *BI = dyn_cast(HeaderTerm)) { @@ -349,10 +360,10 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // this. if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, BI->getSuccessor(0)))) { - if (Val) *Val = Context->getConstantIntTrue(); + if (Val) *Val = ConstantInt::getTrue(Context); } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, BI->getSuccessor(1)))) { - if (Val) *Val = Context->getConstantIntFalse(); + if (Val) *Val = ConstantInt::getFalse(Context); } } else if (SwitchInst *SI = dyn_cast(HeaderTerm)) { // If this isn't a switch on Cond, we can't handle it. @@ -398,29 +409,14 @@ unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) { if (IsTrivialUnswitchCondition(LIC)) return 0; - // FIXME: This is really overly conservative. However, more liberal - // estimations have thus far resulted in excessive unswitching, which is bad - // both in compile time and in code size. This should be replaced once - // someone figures out how a good estimation. - return currentLoop->getBlocks().size(); - - unsigned Cost = 0; - // FIXME: this is brain dead. It should take into consideration code - // shrinkage. + // FIXME: This is overly conservative because it does not take into + // consideration code simplification opportunities. + CodeMetrics Metrics; for (Loop::block_iterator I = currentLoop->block_begin(), E = currentLoop->block_end(); - I != E; ++I) { - BasicBlock *BB = *I; - // Do not include empty blocks in the cost calculation. This happen due to - // loop canonicalization and will be removed. - if (BB->begin() == BasicBlock::iterator(BB->getTerminator())) - continue; - - // Count basic blocks. - ++Cost; - } - - return Cost; + I != E; ++I) + Metrics.analyzeBasicBlock(*I); + return Metrics.NumInsts; } /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when @@ -445,9 +441,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){ // FIXME: this should estimate growth by the amount of code shared by the // resultant unswitched loops. // - DOUT << "NOT unswitching loop %" - << currentLoop->getHeader()->getName() << ", cost too high: " - << currentLoop->getBlocks().size() << "\n"; + DEBUG(errs() << "NOT unswitching loop %" + << currentLoop->getHeader()->getName() << ", cost too high: " + << currentLoop->getBlocks().size() << "\n"); return false; } @@ -506,14 +502,20 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, // Insert a conditional branch on LIC to the two preheaders. The original // code is the true version and the new code is the false version. Value *BranchVal = LIC; - if (!isa(Val) || Val->getType() != Type::Int1Ty) - BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt); - else if (Val != Context->getConstantIntTrue()) + if (!isa(Val) || + Val->getType() != Type::getInt1Ty(LIC->getContext())) + BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp"); + else if (Val != ConstantInt::getTrue(Val->getContext())) // We want to enter the new loop when the condition is true. std::swap(TrueDest, FalseDest); // Insert the new branch. - BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); + BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); + + // If either edge is critical, split it. This helps preserve LoopSimplify + // form for enclosing loops. + SplitCriticalEdge(BI, 0, this); + SplitCriticalEdge(BI, 1, this); } /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable @@ -524,10 +526,10 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, BasicBlock *ExitBlock) { - DOUT << "loop-unswitch: Trivial-Unswitch loop %" - << loopHeader->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " << L->getHeader()->getParent()->getName() - << " on cond: " << *Val << " == " << *Cond << "\n"; + DEBUG(errs() << "loop-unswitch: Trivial-Unswitch loop %" + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << L->getHeader()->getParent()->getName() + << " on cond: " << *Val << " == " << *Cond << "\n"); // First step, split the preheader, so that we know that there is a safe place // to insert the conditional branch. We will change loopPreheader to have a @@ -570,47 +572,11 @@ void LoopUnswitch::SplitExitEdges(Loop *L, for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; - std::vector Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); - - for (unsigned j = 0, e = Preds.size(); j != e; ++j) { - BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this); - BasicBlock* StartBlock = Preds[j]; - BasicBlock* EndBlock; - if (NewExitBlock->getSinglePredecessor() == ExitBlock) { - EndBlock = NewExitBlock; - NewExitBlock = EndBlock->getSinglePredecessor(); - } else { - EndBlock = ExitBlock; - } - - std::set InsertedPHIs; - PHINode* OldLCSSA = 0; - for (BasicBlock::iterator I = EndBlock->begin(); - (OldLCSSA = dyn_cast(I)); ++I) { - Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock); - PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(), - OldLCSSA->getName() + ".us-lcssa", - NewExitBlock->getTerminator()); - NewLCSSA->addIncoming(OldValue, StartBlock); - OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock), - NewLCSSA); - InsertedPHIs.insert(NewLCSSA); - } - - BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI(); - for (BasicBlock::iterator I = NewExitBlock->begin(); - (OldLCSSA = dyn_cast(I)) && InsertedPHIs.count(OldLCSSA) == 0; - ++I) { - PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(), - OldLCSSA->getName() + ".us-lcssa", - InsertPt); - OldLCSSA->replaceAllUsesWith(NewLCSSA); - NewLCSSA->addIncoming(OldLCSSA, NewExitBlock); - } - - } + SmallVector Preds(pred_begin(ExitBlock), + pred_end(ExitBlock)); + SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), + ".us-lcssa", this); } - } /// UnswitchNontrivialCondition - We determined that the loop is profitable @@ -619,10 +585,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L, void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); - DOUT << "loop-unswitch: Unswitching loop %" - << loopHeader->getName() << " [" << L->getBlocks().size() - << " blocks] in Function " << F->getName() - << " when '" << *Val << "' == " << *LIC << "\n"; + DEBUG(errs() << "loop-unswitch: Unswitching loop %" + << loopHeader->getName() << " [" << L->getBlocks().size() + << " blocks] in Function " << F->getName() + << " when '" << *Val << "' == " << *LIC << "\n"); LoopBlocks.clear(); NewBlocks.clear(); @@ -745,7 +711,7 @@ static void RemoveFromWorklist(Instruction *I, static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector &Worklist, Loop *L, LPPassManager *LPM) { - DOUT << "Replace with '" << *V << "': " << *I; + DEBUG(errs() << "Replace with '" << *V << "': " << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -788,7 +754,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, // dominates the latch). LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L); Pred->getTerminator()->eraseFromParent(); - new UnreachableInst(Pred); + new UnreachableInst(BB->getContext(), Pred); // The loop is now broken, remove it from LI. RemoveLoopFromHierarchy(L); @@ -807,7 +773,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, return; } - DOUT << "Nuking dead block: " << *BB; + DEBUG(errs() << "Nuking dead block: " << *BB); // Remove the instructions in the basic block from the worklist. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { @@ -815,8 +781,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, // Anything that uses the instructions in this basic block should have their // uses replaced with undefs. - if (!I->use_empty()) - I->replaceAllUsesWith(Context->getUndef(I->getType())); + // If I is not void type then replaceAllUsesWith undef. + // This allows ValueHandlers and custom metadata to adjust itself. + if (!I->getType()->isVoidTy()) + I->replaceAllUsesWith(UndefValue::get(I->getType())); } // If this is the edge to the header block for a loop, remove the loop and @@ -897,15 +865,18 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // selects, switches. std::vector Users(LIC->use_begin(), LIC->use_end()); std::vector Worklist; + LLVMContext &Context = Val->getContext(); + // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC // in the loop with the appropriate one directly. - if (IsEqual || (isa(Val) && Val->getType() == Type::Int1Ty)) { + if (IsEqual || (isa(Val) && + Val->getType() == Type::getInt1Ty(Val->getContext()))) { Value *Replacement; if (IsEqual) Replacement = Val; else - Replacement = Context->getConstantInt(Type::Int1Ty, + Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), !cast(Val)->getZExtValue()); for (unsigned i = 0, e = Users.size(); i != e; ++i) @@ -937,27 +908,35 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // FIXME: This is a hack. We need to keep the successor around // and hooked up so as to preserve the loop structure, because // trying to update it is complicated. So instead we preserve the - // loop structure and put the block on an dead code path. - - BasicBlock *SISucc = SI->getSuccessor(i); - BasicBlock* Old = SI->getParent(); - BasicBlock* Split = SplitBlock(Old, SI, this); - - Instruction* OldTerm = Old->getTerminator(); - BranchInst::Create(Split, SISucc, - Context->getConstantIntTrue(), OldTerm); - - LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L); - Old->getTerminator()->eraseFromParent(); - - PHINode *PN; - for (BasicBlock::iterator II = SISucc->begin(); - (PN = dyn_cast(II)); ++II) { - Value *InVal = PN->removeIncomingValue(Split, false); - PN->addIncoming(InVal, Old); - } - - SI->removeCase(i); + // loop structure and put the block on a dead code path. + BasicBlock *Switch = SI->getParent(); + SplitEdge(Switch, SI->getSuccessor(i), this); + // Compute the successors instead of relying on the return value + // of SplitEdge, since it may have split the switch successor + // after PHI nodes. + BasicBlock *NewSISucc = SI->getSuccessor(i); + BasicBlock *OldSISucc = *succ_begin(NewSISucc); + // Create an "unreachable" destination. + BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", + Switch->getParent(), + OldSISucc); + new UnreachableInst(Context, Abort); + // Force the new case destination to branch to the "unreachable" + // block while maintaining a (dead) CFG edge to the old block. + NewSISucc->getTerminator()->eraseFromParent(); + BranchInst::Create(Abort, OldSISucc, + ConstantInt::getTrue(Context), NewSISucc); + // Release the PHI operands for this edge. + for (BasicBlock::iterator II = NewSISucc->begin(); + PHINode *PN = dyn_cast(II); ++II) + PN->setIncomingValue(PN->getBasicBlockIndex(Switch), + UndefValue::get(PN->getType())); + // Tell the domtree about the new block. We don't fully update the + // domtree here -- instead we force it to do a full recomputation + // after the pass is complete -- but we do need to inform it of + // new blocks. + if (DT) + DT->addNewBlock(Abort, NewSISucc); break; } } @@ -971,7 +950,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, SimplifyCode(Worklist, L); } -/// SimplifyCode - Okay, now that we have simplified some instructions in the +/// SimplifyCode - Okay, now that we have simplified some instructions in the /// loop, walk over it and constant prop, dce, and fold control flow where /// possible. Note that this is effectively a very simple loop-structure-aware /// optimizer. During processing of this loop, L could very well be deleted, so @@ -986,14 +965,14 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { Worklist.pop_back(); // Simple constant folding. - if (Constant *C = ConstantFoldInstruction(I)) { + if (Constant *C = ConstantFoldInstruction(I, I->getContext())) { ReplaceUsesOfWith(I, C, Worklist, L, LPM); continue; } // Simple DCE. if (isInstructionTriviallyDead(I)) { - DOUT << "Remove dead instruction '" << *I; + DEBUG(errs() << "Remove dead instruction '" << *I); // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -1017,10 +996,11 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { break; case Instruction::And: if (isa(I->getOperand(0)) && - I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS + // constant -> RHS + I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) cast(I)->swapOperands(); if (ConstantInt *CB = dyn_cast(I->getOperand(1))) - if (CB->getType() == Type::Int1Ty) { + if (CB->getType() == Type::getInt1Ty(I->getContext())) { if (CB->isOne()) // X & 1 -> X ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM); else // X & 0 -> 0 @@ -1030,10 +1010,11 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { break; case Instruction::Or: if (isa(I->getOperand(0)) && - I->getOperand(0)->getType() == Type::Int1Ty) // constant -> RHS + // constant -> RHS + I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext())) cast(I)->swapOperands(); if (ConstantInt *CB = dyn_cast(I->getOperand(1))) - if (CB->getType() == Type::Int1Ty) { + if (CB->getType() == Type::getInt1Ty(I->getContext())) { if (CB->isOne()) // X | 1 -> 1 ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM); else // X | 0 -> X @@ -1052,8 +1033,8 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { if (!SinglePred) continue; // Nothing to do. assert(SinglePred == Pred && "CFG broken"); - DOUT << "Merging blocks: " << Pred->getName() << " <- " - << Succ->getName() << "\n"; + DEBUG(errs() << "Merging blocks: " << Pred->getName() << " <- " + << Succ->getName() << "\n"); // Resolve any single entry PHI nodes in Succ. while (PHINode *PN = dyn_cast(Succ->begin())) @@ -1080,7 +1061,7 @@ void LoopUnswitch::SimplifyCode(std::vector &Worklist, Loop *L) { // remove dead blocks. break; // FIXME: Enable. - DOUT << "Folded branch: " << *BI; + DEBUG(errs() << "Folded branch: " << *BI); BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue()); BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue()); DeadSucc->removePredecessor(BI->getParent(), true); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3c7a5ab8f4d38..c922814833c59 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -24,29 +24,33 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include using namespace llvm; STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); +STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); /// isBytewiseValue - If the specified value can be set by repeating the same /// byte in memory, return the i8 value that it is represented with. This is /// true for all i8 values obviously, but is also true for i32 0, i32 -1, /// i16 0xF0F0, double 0.0 etc. If the value can't be handled with a repeated /// byte store (e.g. i16 0x1234), return null. -static Value *isBytewiseValue(Value *V, LLVMContext* Context) { +static Value *isBytewiseValue(Value *V) { + LLVMContext &Context = V->getContext(); + // All byte-wide stores are splatable, even of arbitrary variables. - if (V->getType() == Type::Int8Ty) return V; + if (V->getType() == Type::getInt8Ty(Context)) return V; // Constant float and double values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. if (ConstantFP *CFP = dyn_cast(V)) { - if (CFP->getType() == Type::FloatTy) - V = Context->getConstantExprBitCast(CFP, Type::Int32Ty); - if (CFP->getType() == Type::DoubleTy) - V = Context->getConstantExprBitCast(CFP, Type::Int64Ty); + if (CFP->getType()->isFloatTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context)); + if (CFP->getType()->isDoubleTy()) + V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context)); // Don't handle long double formats, which have strange constraints. } @@ -69,7 +73,7 @@ static Value *isBytewiseValue(Value *V, LLVMContext* Context) { if (Val != Val2) return 0; } - return Context->getConstantInt(Val); + return ConstantInt::get(Context, Val); } } @@ -271,6 +275,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { if (Start < I->Start) { I->Start = Start; I->StartPtr = SI->getPointerOperand(); + I->Alignment = SI->getAlignment(); } // Now we know that Start <= I->End and Start >= I->Start (so the startpoint @@ -295,8 +300,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) { //===----------------------------------------------------------------------===// namespace { - - class VISIBILITY_HIDDEN MemCpyOpt : public FunctionPass { + class MemCpyOpt : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid @@ -309,16 +313,15 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addPreserved(); AU.addPreserved(); - AU.addPreserved(); } // Helper fuctions - bool processStore(StoreInst *SI, BasicBlock::iterator& BBI); - bool processMemCpy(MemCpyInst* M); - bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C); + bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); + bool processMemCpy(MemCpyInst *M); + bool processMemMove(MemMoveInst *M); + bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C); bool iterateOnFunction(Function &F); }; @@ -337,27 +340,31 @@ static RegisterPass X("memcpyopt", /// some other patterns to fold away. In particular, this looks for stores to /// neighboring locations of memory. If it sees enough consequtive ones /// (currently 4) it attempts to merge them together into a memcpy/memset. -bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { +bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (SI->isVolatile()) return false; + LLVMContext &Context = SI->getContext(); + // There are two cases that are interesting for this code to handle: memcpy // and memset. Right now we only handle memset. // Ensure that the value being stored is something that can be memset'able a // byte at a time like "0" or "-1" or any width, as well as things like // 0xA0A0A0A0 and 0.0. - Value *ByteVal = isBytewiseValue(SI->getOperand(0), Context); + Value *ByteVal = isBytewiseValue(SI->getOperand(0)); if (!ByteVal) return false; - TargetData &TD = getAnalysis(); + TargetData *TD = getAnalysisIfAvailable(); + if (!TD) return false; AliasAnalysis &AA = getAnalysis(); + Module *M = SI->getParent()->getParent()->getParent(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. - MemsetRanges Ranges(TD); + MemsetRanges Ranges(*TD); Value *StartPtr = SI->getPointerOperand(); @@ -385,12 +392,12 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { if (NextStore->isVolatile()) break; // Check to see if this stored value is of the same byte-splattable value. - if (ByteVal != isBytewiseValue(NextStore->getOperand(0), Context)) + if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; - if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD)) + if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); @@ -405,7 +412,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. Ranges.addStore(0, SI); - Function *MemSetF = 0; @@ -419,7 +425,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. - if (!Range.isProfitableToUseMemset(TD)) + if (!Range.isProfitableToUseMemset(*TD)) continue; // Otherwise, we do want to transform this! Create a new memset. We put @@ -429,37 +435,38 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { BasicBlock::iterator InsertPt = BI; if (MemSetF == 0) { - const Type *Tys[] = {Type::Int64Ty}; - MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent() - ->getParent(), Intrinsic::memset, - Tys, 1); - } + const Type *Ty = Type::getInt64Ty(Context); + MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1); + } // Get the starting pointer of the block. StartPtr = Range.StartPtr; // Cast the start ptr to be i8* as memset requires. - const Type *i8Ptr = Context->getPointerTypeUnqual(Type::Int8Ty); + const Type *i8Ptr = Type::getInt8PtrTy(Context); if (StartPtr->getType() != i8Ptr) - StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), + StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), InsertPt); Value *Ops[] = { StartPtr, ByteVal, // Start, value - Context->getConstantInt(Type::Int64Ty, Range.End-Range.Start), // size - Context->getConstantInt(Type::Int32Ty, Range.Alignment) // align + // size + ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), + // align + ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment) }; Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt); - DEBUG(cerr << "Replace stores:\n"; + DEBUG(errs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) - cerr << *Range.TheStores[i]; - cerr << "With: " << *C); C=C; + errs() << *Range.TheStores[i]; + errs() << "With: " << *C); C=C; // Don't invalidate the iterator BBI = BI; // Zap all the stores. - for (SmallVector::const_iterator SI = Range.TheStores.begin(), + for (SmallVector::const_iterator + SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) (*SI)->eraseFromParent(); ++NumMemSetInfer; @@ -490,29 +497,30 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. - Value* cpyDest = cpy->getDest(); - Value* cpySrc = cpy->getSource(); + Value *cpyDest = cpy->getDest(); + Value *cpySrc = cpy->getSource(); CallSite CS = CallSite::get(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. - ConstantInt* cpyLength = dyn_cast(cpy->getLength()); + ConstantInt *cpyLength = dyn_cast(cpy->getLength()); if (!cpyLength) return false; // Require that src be an alloca. This simplifies the reasoning considerably. - AllocaInst* srcAlloca = dyn_cast(cpySrc); + AllocaInst *srcAlloca = dyn_cast(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. - TargetData& TD = getAnalysis(); + TargetData *TD = getAnalysisIfAvailable(); + if (!TD) return false; - ConstantInt* srcArraySize = dyn_cast(srcAlloca->getArraySize()); + ConstantInt *srcArraySize = dyn_cast(srcAlloca->getArraySize()); if (!srcArraySize) return false; - uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) * + uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLength->getZExtValue() < srcSize) @@ -521,25 +529,25 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. - if (AllocaInst* A = dyn_cast(cpyDest)) { + if (AllocaInst *A = dyn_cast(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. - ConstantInt* destArraySize = dyn_cast(A->getArraySize()); + ConstantInt *destArraySize = dyn_cast(A->getArraySize()); if (!destArraySize) return false; - uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) * + uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; - } else if (Argument* A = dyn_cast(cpyDest)) { + } else if (Argument *A = dyn_cast(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; - const Type* StructTy = cast(A->getType())->getElementType(); - uint64_t destSize = TD.getTypeAllocSize(StructTy); + const Type *StructTy = cast(A->getType())->getElementType(); + uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; @@ -554,14 +562,14 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { SmallVector srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { - User* UI = srcUseList.back(); + User *UI = srcUseList.back(); srcUseList.pop_back(); if (isa(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); - } else if (GetElementPtrInst* G = dyn_cast(UI)) { + } else if (GetElementPtrInst *G = dyn_cast(UI)) { if (G->hasAllZeroIndices()) for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) @@ -575,8 +583,8 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. - DominatorTree& DT = getAnalysis(); - if (Instruction* cpyDestInst = dyn_cast(cpyDest)) + DominatorTree &DT = getAnalysis(); + if (Instruction *cpyDestInst = dyn_cast(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; @@ -584,7 +592,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. - AliasAnalysis& AA = getAnalysis(); + AliasAnalysis &AA = getAnalysis(); if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != AliasAnalysis::NoModRef) return false; @@ -597,11 +605,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; - if (CS.getArgument(i)->getType() != cpyDest->getType()) - CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, - CS.getArgument(i)->getType(), cpyDest->getName(), C)); - else + if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); + else + CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, + CS.getArgument(i)->getType(), cpyDest->getName(), C)); } if (!changedArgument) @@ -609,7 +617,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. - MemoryDependenceAnalysis& MD = getAnalysis(); + MemoryDependenceAnalysis &MD = getAnalysis(); MD.removeInstruction(C); // Remove the memcpy @@ -624,22 +632,22 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be /// a memcpy from X to Z (or potentially a memmove, depending on circumstances). /// This allows later passes to remove the first memcpy altogether. -bool MemCpyOpt::processMemCpy(MemCpyInst* M) { - MemoryDependenceAnalysis& MD = getAnalysis(); +bool MemCpyOpt::processMemCpy(MemCpyInst *M) { + MemoryDependenceAnalysis &MD = getAnalysis(); // The are two possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE - // b) call-memcpy xform for return slot optimization + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. MemDepResult dep = MD.getDependency(M); if (!dep.isClobber()) return false; if (!isa(dep.getInst())) { - if (CallInst* C = dyn_cast(dep.getInst())) + if (CallInst *C = dyn_cast(dep.getInst())) return performCallSlotOptzn(M, C); return false; } - MemCpyInst* MDep = cast(dep.getInst()); + MemCpyInst *MDep = cast(dep.getInst()); // We can only transforms memcpy's where the dest of one is the source of the // other @@ -648,8 +656,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) { // Second, the length of the memcpy's must be the same, or the preceeding one // must be larger than the following one. - ConstantInt* C1 = dyn_cast(MDep->getLength()); - ConstantInt* C2 = dyn_cast(M->getLength()); + ConstantInt *C1 = dyn_cast(MDep->getLength()); + ConstantInt *C2 = dyn_cast(M->getLength()); if (!C1 || !C2) return false; @@ -661,7 +669,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) { // Finally, we have to make sure that the dest of the second does not // alias the source of the first - AliasAnalysis& AA = getAnalysis(); + AliasAnalysis &AA = getAnalysis(); if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) != AliasAnalysis::NoAlias) return false; @@ -673,17 +681,16 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) { return false; // If all checks passed, then we can transform these memcpy's - const Type *Tys[1]; - Tys[0] = M->getLength()->getType(); - Function* MemCpyFun = Intrinsic::getDeclaration( + const Type *Ty = M->getLength()->getType(); + Function *MemCpyFun = Intrinsic::getDeclaration( M->getParent()->getParent()->getParent(), - M->getIntrinsicID(), Tys, 1); + M->getIntrinsicID(), &Ty, 1); Value *Args[4] = { M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst() }; - CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M); + CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M); // If C and M don't interfere, then this is a valid transformation. If they @@ -702,41 +709,78 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) { return false; } -// MemCpyOpt::runOnFunction - This is the main transformation entry point for a -// function. -// -bool MemCpyOpt::runOnFunction(Function& F) { +/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst +/// are guaranteed not to alias. +bool MemCpyOpt::processMemMove(MemMoveInst *M) { + AliasAnalysis &AA = getAnalysis(); + + // If the memmove is a constant size, use it for the alias query, this allows + // us to optimize things like: memmove(P, P+64, 64); + uint64_t MemMoveSize = ~0ULL; + if (ConstantInt *Len = dyn_cast(M->getLength())) + MemMoveSize = Len->getZExtValue(); - bool changed = false; - bool shouldContinue = true; + // See if the pointers alias. + if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) != + AliasAnalysis::NoAlias) + return false; - while (shouldContinue) { - shouldContinue = iterateOnFunction(F); - changed |= shouldContinue; - } + DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); - return changed; -} + // If not, then we know we can transform this. + Module *Mod = M->getParent()->getParent()->getParent(); + const Type *Ty = M->getLength()->getType(); + M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1)); + // MemDep may have over conservative information about this instruction, just + // conservatively flush it from the cache. + getAnalysis().removeInstruction(M); -// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN + ++NumMoveToCpy; + return true; +} + + +// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN. bool MemCpyOpt::iterateOnFunction(Function &F) { - bool changed_function = false; + bool MadeChange = false; - // Walk all instruction in the function + // Walk all instruction in the function. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - // Avoid invalidating the iterator - Instruction* I = BI++; + // Avoid invalidating the iterator. + Instruction *I = BI++; if (StoreInst *SI = dyn_cast(I)) - changed_function |= processStore(SI, BI); - else if (MemCpyInst* M = dyn_cast(I)) { - changed_function |= processMemCpy(M); + MadeChange |= processStore(SI, BI); + else if (MemCpyInst *M = dyn_cast(I)) + MadeChange |= processMemCpy(M); + else if (MemMoveInst *M = dyn_cast(I)) { + if (processMemMove(M)) { + --BI; // Reprocess the new memcpy. + MadeChange = true; + } } } } - return changed_function; + return MadeChange; +} + +// MemCpyOpt::runOnFunction - This is the main transformation entry point for a +// function. +// +bool MemCpyOpt::runOnFunction(Function &F) { + bool MadeChange = false; + while (1) { + if (!iterateOnFunction(F)) + break; + MadeChange = true; + } + + return MadeChange; } + + + diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index fa60a9dba3b55..e6ffac251b7bb 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -31,9 +31,9 @@ #include "llvm/Pass.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include @@ -46,7 +46,7 @@ STATISTIC(NumAnnihil, "Number of expr tree annihilated"); STATISTIC(NumFactor , "Number of multiplies factored"); namespace { - struct VISIBILITY_HIDDEN ValueEntry { + struct ValueEntry { unsigned Rank; Value *Op; ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} @@ -61,17 +61,17 @@ namespace { /// static void PrintOps(Instruction *I, const std::vector &Ops) { Module *M = I->getParent()->getParent()->getParent(); - cerr << Instruction::getOpcodeName(I->getOpcode()) << " " + errs() << Instruction::getOpcodeName(I->getOpcode()) << " " << *Ops[0].Op->getType(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - WriteAsOperand(*cerr.stream() << " ", Ops[i].Op, false, M); - cerr << "," << Ops[i].Rank; + WriteAsOperand(errs() << " ", Ops[i].Op, false, M); + errs() << "," << Ops[i].Rank; } } #endif namespace { - class VISIBILITY_HIDDEN Reassociate : public FunctionPass { + class Reassociate : public FunctionPass { std::map RankMap; std::map, unsigned> ValueRankMap; bool MadeChange; @@ -181,8 +181,8 @@ unsigned Reassociate::getRank(Value *V) { (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I))) ++Rank; - //DOUT << "Calculated Rank[" << V->getName() << "] = " - // << Rank << "\n"; + //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = " + // << Rank << "\n"); return CachedRank = Rank; } @@ -200,8 +200,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { /// static Instruction *LowerNegateToMultiply(Instruction *Neg, std::map, unsigned> &ValueRankMap, - LLVMContext* Context) { - Constant *Cst = Context->getConstantIntAllOnesValue(Neg->getType()); + LLVMContext &Context) { + Constant *Cst = Constant::getAllOnesValue(Neg->getType()); Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); ValueRankMap.erase(Neg); @@ -222,7 +222,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { isReassociableOp(RHS, I->getOpcode()) && "Not an expression that needs linearization?"); - DOUT << "Linear" << *LHS << *RHS << *I; + DEBUG(errs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n'); // Move the RHS instruction to live immediately before I, avoiding breaking // dominator properties. @@ -235,7 +235,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) { ++NumLinear; MadeChange = true; - DOUT << "Linearized: " << *I; + DEBUG(errs() << "Linearized: " << *I << '\n'); // If D is part of this expression tree, tail recurse. if (isReassociableOp(I->getOperand(1), I->getOpcode())) @@ -256,6 +256,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, std::vector &Ops) { Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); unsigned Opcode = I->getOpcode(); + LLVMContext &Context = I->getContext(); // First step, linearize the expression if it is in ((A+B)+(C+D)) form. BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); @@ -284,8 +285,8 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, Ops.push_back(ValueEntry(getRank(RHS), RHS)); // Clear the leaves out. - I->setOperand(0, Context->getUndef(I->getType())); - I->setOperand(1, Context->getUndef(I->getType())); + I->setOperand(0, UndefValue::get(I->getType())); + I->setOperand(1, UndefValue::get(I->getType())); return; } else { // Turn X+(Y+Z) -> (Y+Z)+X @@ -320,7 +321,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I, Ops.push_back(ValueEntry(getRank(RHS), RHS)); // Clear the RHS leaf out. - I->setOperand(1, Context->getUndef(I->getType())); + I->setOperand(1, UndefValue::get(I->getType())); } // RewriteExprTree - Now that the operands for this expression tree are @@ -333,10 +334,10 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, if (I->getOperand(0) != Ops[i].Op || I->getOperand(1) != Ops[i+1].Op) { Value *OldLHS = I->getOperand(0); - DOUT << "RA: " << *I; + DEBUG(errs() << "RA: " << *I << '\n'); I->setOperand(0, Ops[i].Op); I->setOperand(1, Ops[i+1].Op); - DOUT << "TO: " << *I; + DEBUG(errs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; @@ -349,9 +350,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, assert(i+2 < Ops.size() && "Ops index out of range!"); if (I->getOperand(1) != Ops[i].Op) { - DOUT << "RA: " << *I; + DEBUG(errs() << "RA: " << *I << '\n'); I->setOperand(1, Ops[i].Op); - DOUT << "TO: " << *I; + DEBUG(errs() << "TO: " << *I << '\n'); MadeChange = true; ++NumChanged; } @@ -373,7 +374,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // version of the value is returned, and BI is left pointing at the instruction // that should be processed next by the reassociation pass. // -static Value *NegateValue(Value *V, Instruction *BI) { +static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) { // We are trying to expose opportunity for reassociation. One of the things // that we want to do to achieve this is to push a negation as deep into an // expression chain as possible, to expose the add instructions. In practice, @@ -386,8 +387,8 @@ static Value *NegateValue(Value *V, Instruction *BI) { if (Instruction *I = dyn_cast(V)) if (I->getOpcode() == Instruction::Add && I->hasOneUse()) { // Push the negates through the add. - I->setOperand(0, NegateValue(I->getOperand(0), BI)); - I->setOperand(1, NegateValue(I->getOperand(1), BI)); + I->setOperand(0, NegateValue(Context, I->getOperand(0), BI)); + I->setOperand(1, NegateValue(Context, I->getOperand(1), BI)); // We must move the add instruction here, because the neg instructions do // not dominate the old add instruction in general. By moving it, we are @@ -407,7 +408,7 @@ static Value *NegateValue(Value *V, Instruction *BI) { /// ShouldBreakUpSubtract - Return true if we should break up this subtract of /// X-Y into (X + -Y). -static bool ShouldBreakUpSubtract(Instruction *Sub) { +static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) { // If this is a negation, we can't split it up! if (BinaryOperator::isNeg(Sub)) return false; @@ -431,7 +432,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is /// only used by an add, transform this into (X+(0-Y)) to promote better /// reassociation. -static Instruction *BreakUpSubtract(Instruction *Sub, +static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub, std::map, unsigned> &ValueRankMap) { // Convert a subtract into an add and a neg instruction... so that sub // instructions can be commuted with other add instructions... @@ -439,7 +440,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, // Calculate the negative value of Operand 1 of the sub instruction... // and set it as the RHS of the add instruction we just made... // - Value *NegVal = NegateValue(Sub->getOperand(1), Sub); + Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub); Instruction *New = BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub); New->takeName(Sub); @@ -449,7 +450,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, Sub->replaceAllUsesWith(New); Sub->eraseFromParent(); - DOUT << "Negated: " << *New; + DEBUG(errs() << "Negated: " << *New << '\n'); return New; } @@ -458,16 +459,16 @@ static Instruction *BreakUpSubtract(Instruction *Sub, /// reassociation. static Instruction *ConvertShiftToMul(Instruction *Shl, std::map, unsigned> &ValueRankMap, - LLVMContext* Context) { + LLVMContext &Context) { // If an operand of this shift is a reassociable multiply, or if the shift // is used by a reassociable multiply or add, turn into a multiply. if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || (Shl->hasOneUse() && (isReassociableOp(Shl->use_back(), Instruction::Mul) || isReassociableOp(Shl->use_back(), Instruction::Add)))) { - Constant *MulCst = Context->getConstantInt(Shl->getType(), 1); + Constant *MulCst = ConstantInt::get(Shl->getType(), 1); MulCst = - Context->getConstantExprShl(MulCst, cast(Shl->getOperand(1))); + ConstantExpr::getShl(MulCst, cast(Shl->getOperand(1))); Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl); @@ -567,7 +568,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, if (Constant *V1 = dyn_cast(Ops[Ops.size()-2].Op)) if (Constant *V2 = dyn_cast(Ops.back().Op)) { Ops.pop_back(); - Ops.back().Op = Context->getConstantExpr(Opcode, V1, V2); + Ops.back().Op = ConstantExpr::get(Opcode, V1, V2); return OptimizeExpression(I, Ops); } @@ -623,10 +624,10 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, if (FoundX != i) { if (Opcode == Instruction::And) { // ...&X&~X = 0 ++NumAnnihil; - return Context->getNullValue(X->getType()); + return Constant::getNullValue(X->getType()); } else if (Opcode == Instruction::Or) { // ...|X|~X = -1 ++NumAnnihil; - return Context->getConstantIntAllOnesValue(X->getType()); + return Constant::getAllOnesValue(X->getType()); } } } @@ -645,7 +646,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, assert(Opcode == Instruction::Xor); if (e == 2) { ++NumAnnihil; - return Context->getNullValue(Ops[0].Op->getType()); + return Constant::getNullValue(Ops[0].Op->getType()); } // ... X^X -> ... Ops.erase(Ops.begin()+i, Ops.begin()+i+2); @@ -670,7 +671,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, // Remove X and -X from the operand list. if (Ops.size() == 2) { ++NumAnnihil; - return Context->getNullValue(X->getType()); + return Constant::getNullValue(X->getType()); } else { Ops.erase(Ops.begin()+i); if (i < FoundX) @@ -727,7 +728,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, // If any factor occurred more than one time, we can pull it out. if (MaxOcc > 1) { - DOUT << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n"; + DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n"); // Create a new instruction that uses the MaxOccVal twice. If we don't do // this, we could otherwise run into situations where removing a factor @@ -781,6 +782,8 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, /// ReassociateBB - Inspect all of the instructions in this basic block, /// reassociating them as we go. void Reassociate::ReassociateBB(BasicBlock *BB) { + LLVMContext &Context = BB->getContext(); + for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { Instruction *BI = BBI++; if (BI->getOpcode() == Instruction::Shl && @@ -798,8 +801,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) { // If this is a subtract instruction which is not already in negate form, // see if we can convert it to X+-Y. if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(BI)) { - BI = BreakUpSubtract(BI, ValueRankMap); + if (ShouldBreakUpSubtract(Context, BI)) { + BI = BreakUpSubtract(Context, BI, ValueRankMap); MadeChange = true; } else if (BinaryOperator::isNeg(BI)) { // Otherwise, this is a negation. See if the operand is a multiply tree @@ -838,7 +841,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { std::vector Ops; LinearizeExprTree(I, Ops); - DOUT << "RAIn:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n"; + DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << "\n"); // Now that we have linearized the tree to a list and have gathered all of // the operands and their ranks, sort the operands by their rank. Use a @@ -853,7 +856,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { if (Value *V = OptimizeExpression(I, Ops)) { // This expression tree simplified to something that isn't a tree, // eliminate it. - DOUT << "Reassoc to scalar: " << *V << "\n"; + DEBUG(errs() << "Reassoc to scalar: " << *V << "\n"); I->replaceAllUsesWith(V); RemoveDeadBinaryOp(I); return; @@ -871,7 +874,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { Ops.pop_back(); } - DOUT << "RAOut:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n"; + DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << "\n"); if (Ops.size() == 1) { // This expression tree simplified to something that isn't a tree, diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index ac95d25b7f7fb..99e12522ce0c7 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -26,7 +26,6 @@ #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CFG.h" #include using namespace llvm; @@ -35,7 +34,7 @@ STATISTIC(NumRegsDemoted, "Number of registers demoted"); STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); namespace { - struct VISIBILITY_HIDDEN RegToMem : public FunctionPass { + struct RegToMem : public FunctionPass { static char ID; // Pass identification, replacement for typeid RegToMem() : FunctionPass(&ID) {} @@ -44,73 +43,17 @@ namespace { AU.addPreservedID(BreakCriticalEdgesID); } - bool valueEscapes(Instruction* i) { - BasicBlock* bb = i->getParent(); - for (Value::use_iterator ii = i->use_begin(), ie = i->use_end(); - ii != ie; ++ii) - if (cast(*ii)->getParent() != bb || - isa(*ii)) + bool valueEscapes(const Instruction *Inst) const { + const BasicBlock *BB = Inst->getParent(); + for (Value::use_const_iterator UI = Inst->use_begin(),E = Inst->use_end(); + UI != E; ++UI) + if (cast(*UI)->getParent() != BB || + isa(*UI)) return true; return false; } - virtual bool runOnFunction(Function &F) { - if (!F.isDeclaration()) { - // Insert all new allocas into entry block. - BasicBlock* BBEntry = &F.getEntryBlock(); - assert(pred_begin(BBEntry) == pred_end(BBEntry) && - "Entry block to function must not have predecessors!"); - - // Find first non-alloca instruction and create insertion point. This is - // safe if block is well-formed: it always have terminator, otherwise - // we'll get and assertion. - BasicBlock::iterator I = BBEntry->begin(); - while (isa(I)) ++I; - - CastInst *AllocaInsertionPoint = - CastInst::Create(Instruction::BitCast, - Context->getNullValue(Type::Int32Ty), Type::Int32Ty, - "reg2mem alloca point", I); - - // Find the escaped instructions. But don't create stack slots for - // allocas in entry block. - std::list worklist; - for (Function::iterator ibb = F.begin(), ibe = F.end(); - ibb != ibe; ++ibb) - for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); - iib != iie; ++iib) { - if (!(isa(iib) && iib->getParent() == BBEntry) && - valueEscapes(iib)) { - worklist.push_front(&*iib); - } - } - - // Demote escaped instructions - NumRegsDemoted += worklist.size(); - for (std::list::iterator ilb = worklist.begin(), - ile = worklist.end(); ilb != ile; ++ilb) - DemoteRegToStack(**ilb, false, AllocaInsertionPoint); - - worklist.clear(); - - // Find all phi's - for (Function::iterator ibb = F.begin(), ibe = F.end(); - ibb != ibe; ++ibb) - for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); - iib != iie; ++iib) - if (isa(iib)) - worklist.push_front(&*iib); - - // Demote phi nodes - NumPhisDemoted += worklist.size(); - for (std::list::iterator ilb = worklist.begin(), - ile = worklist.end(); ilb != ile; ++ilb) - DemotePHIToStack(cast(*ilb), AllocaInsertionPoint); - - return true; - } - return false; - } + virtual bool runOnFunction(Function &F); }; } @@ -118,6 +61,66 @@ char RegToMem::ID = 0; static RegisterPass X("reg2mem", "Demote all values to stack slots"); + +bool RegToMem::runOnFunction(Function &F) { + if (F.isDeclaration()) + return false; + + // Insert all new allocas into entry block. + BasicBlock *BBEntry = &F.getEntryBlock(); + assert(pred_begin(BBEntry) == pred_end(BBEntry) && + "Entry block to function must not have predecessors!"); + + // Find first non-alloca instruction and create insertion point. This is + // safe if block is well-formed: it always have terminator, otherwise + // we'll get and assertion. + BasicBlock::iterator I = BBEntry->begin(); + while (isa(I)) ++I; + + CastInst *AllocaInsertionPoint = + new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), + Type::getInt32Ty(F.getContext()), + "reg2mem alloca point", I); + + // Find the escaped instructions. But don't create stack slots for + // allocas in entry block. + std::list WorkList; + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) { + if (!(isa(iib) && iib->getParent() == BBEntry) && + valueEscapes(iib)) { + WorkList.push_front(&*iib); + } + } + + // Demote escaped instructions + NumRegsDemoted += WorkList.size(); + for (std::list::iterator ilb = WorkList.begin(), + ile = WorkList.end(); ilb != ile; ++ilb) + DemoteRegToStack(**ilb, false, AllocaInsertionPoint); + + WorkList.clear(); + + // Find all phi's + for (Function::iterator ibb = F.begin(), ibe = F.end(); + ibb != ibe; ++ibb) + for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); + iib != iie; ++iib) + if (isa(iib)) + WorkList.push_front(&*iib); + + // Demote phi nodes + NumPhisDemoted += WorkList.size(); + for (std::list::iterator ilb = WorkList.begin(), + ile = WorkList.end(); ilb != ile; ++ilb) + DemotePHIToStack(cast(*ilb), AllocaInsertionPoint); + + return true; +} + + // createDemoteRegisterToMemory - Provide an entry point to create this pass. // const PassInfo *const llvm::DemoteRegisterToMemoryID = &X; diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index f0bc12734734a..b5edf4e058214 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -33,9 +33,10 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallSet.h" @@ -58,7 +59,7 @@ namespace { /// LatticeVal class - This class represents the different lattice values that /// an LLVM value may occupy. It is a simple class with value semantics. /// -class VISIBILITY_HIDDEN LatticeVal { +class LatticeVal { enum { /// undefined - This LLVM Value has no known value yet. undefined, @@ -139,7 +140,7 @@ public: /// Constant Propagation. /// class SCCPSolver : public InstVisitor { - LLVMContext* Context; + LLVMContext *Context; DenseSet BBExecutable;// The basic blocks that are executable std::map ValueState; // The state each value is in. @@ -179,12 +180,12 @@ class SCCPSolver : public InstVisitor { typedef std::pair Edge; DenseSet KnownFeasibleEdges; public: - void setContext(LLVMContext* C) { Context = C; } + void setContext(LLVMContext *C) { Context = C; } /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. void MarkBlockExecutable(BasicBlock *BB) { - DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n"; + DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n"); BBExecutable.insert(BB); // Basic block is executable! BBWorkList.push_back(BB); // Add the block to the work list! } @@ -260,14 +261,14 @@ private: // inline void markConstant(LatticeVal &IV, Value *V, Constant *C) { if (IV.markConstant(C)) { - DOUT << "markConstant: " << *C << ": " << *V; + DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); } } inline void markForcedConstant(LatticeVal &IV, Value *V, Constant *C) { IV.markForcedConstant(C); - DOUT << "markForcedConstant: " << *C << ": " << *V; + DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n'); InstWorkList.push_back(V); } @@ -280,11 +281,11 @@ private: // work list so that the users of the instruction are updated later. inline void markOverdefined(LatticeVal &IV, Value *V) { if (IV.markOverdefined()) { - DEBUG(DOUT << "markOverdefined: "; + DEBUG(errs() << "markOverdefined: "; if (Function *F = dyn_cast(V)) - DOUT << "Function '" << F->getName() << "'\n"; + errs() << "Function '" << F->getName() << "'\n"; else - DOUT << *V); + errs() << *V << '\n'); // Only instructions go on the work list OverdefinedInstWorkList.push_back(V); } @@ -337,8 +338,8 @@ private: return; // This edge is already known to be executable! if (BBExecutable.count(Dest)) { - DOUT << "Marking Edge Executable: " << Source->getNameStart() - << " -> " << Dest->getNameStart() << "\n"; + DEBUG(errs() << "Marking Edge Executable: " << Source->getName() + << " -> " << Dest->getName() << "\n"); // The destination is already executable, but we just made an edge // feasible that wasn't before. Revisit the PHI nodes in the block @@ -399,7 +400,9 @@ private: void visitStoreInst (Instruction &I); void visitLoadInst (LoadInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); - void visitCallInst (CallInst &I) { visitCallSite(CallSite::get(&I)); } + void visitCallInst (CallInst &I) { + visitCallSite(CallSite::get(&I)); + } void visitInvokeInst (InvokeInst &II) { visitCallSite(CallSite::get(&II)); visitTerminatorInst(II); @@ -414,7 +417,7 @@ private: void visitInstruction(Instruction &I) { // If a new instruction is added to LLVM that we don't handle... - cerr << "SCCP: Don't know how to handle: " << I; + errs() << "SCCP: Don't know how to handle: " << I; markOverdefined(&I); // Just in case } }; @@ -440,7 +443,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, Succs[0] = Succs[1] = true; } else if (BCValue.isConstant()) { // Constant condition variables mean the branch can only go a single way - Succs[BCValue.getConstant() == Context->getConstantIntFalse()] = true; + Succs[BCValue.getConstant() == ConstantInt::getFalse(*Context)] = true; } } } else if (isa(&TI)) { @@ -455,7 +458,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, } else if (SCValue.isConstant()) Succs[SI->findCaseValue(cast(SCValue.getConstant()))] = true; } else { - assert(0 && "SCCP: Don't know how to handle this terminator!"); + llvm_unreachable("SCCP: Don't know how to handle this terminator!"); } } @@ -485,7 +488,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { // Constant condition variables mean the branch can only go a single way return BI->getSuccessor(BCValue.getConstant() == - Context->getConstantIntFalse()) == To; + ConstantInt::getFalse(*Context)) == To; } return false; } @@ -513,8 +516,10 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { } return false; } else { - cerr << "Unknown terminator instruction: " << *TI; - abort(); +#ifndef NDEBUG + errs() << "Unknown terminator instruction: " << *TI << '\n'; +#endif + llvm_unreachable(0); } } @@ -642,7 +647,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) { DenseMap, LatticeVal>::iterator It = TrackedMultipleRetVals.find(std::make_pair(F, i)); if (It == TrackedMultipleRetVals.end()) break; - if (Value *Val = FindInsertedValue(I.getOperand(0), i)) + if (Value *Val = FindInsertedValue(I.getOperand(0), i, I.getContext())) mergeInValue(It->second, F, getValueState(Val)); } } @@ -666,7 +671,7 @@ void SCCPSolver::visitCastInst(CastInst &I) { if (VState.isOverdefined()) // Inherit overdefinedness of operand markOverdefined(&I); else if (VState.isConstant()) // Propagate constant value - markConstant(&I, Context->getConstantExprCast(I.getOpcode(), + markConstant(&I, ConstantExpr::getCast(I.getOpcode(), VState.getConstant(), I.getType())); } @@ -809,12 +814,12 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { if (NonOverdefVal->isUndefined()) { // Could annihilate value. if (I.getOpcode() == Instruction::And) - markConstant(IV, &I, Context->getNullValue(I.getType())); + markConstant(IV, &I, Constant::getNullValue(I.getType())); else if (const VectorType *PT = dyn_cast(I.getType())) - markConstant(IV, &I, Context->getConstantVectorAllOnesValue(PT)); + markConstant(IV, &I, Constant::getAllOnesValue(PT)); else markConstant(IV, &I, - Context->getConstantIntAllOnesValue(I.getType())); + Constant::getAllOnesValue(I.getType())); return; } else { if (I.getOpcode() == Instruction::And) { @@ -859,7 +864,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { break; // Cannot fold this operation over the PHI nodes! } else if (In1.isConstant() && In2.isConstant()) { Constant *V = - Context->getConstantExpr(I.getOpcode(), In1.getConstant(), + ConstantExpr::get(I.getOpcode(), In1.getConstant(), In2.getConstant()); if (Result.isUndefined()) Result.markConstant(V); @@ -908,7 +913,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { markOverdefined(IV, &I); } else if (V1State.isConstant() && V2State.isConstant()) { markConstant(IV, &I, - Context->getConstantExpr(I.getOpcode(), V1State.getConstant(), + ConstantExpr::get(I.getOpcode(), V1State.getConstant(), V2State.getConstant())); } } @@ -945,7 +950,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { Result.markOverdefined(); break; // Cannot fold this operation over the PHI nodes! } else if (In1.isConstant() && In2.isConstant()) { - Constant *V = Context->getConstantExprCompare(I.getPredicate(), + Constant *V = ConstantExpr::getCompare(I.getPredicate(), In1.getConstant(), In2.getConstant()); if (Result.isUndefined()) @@ -994,7 +999,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { markOverdefined(IV, &I); } else if (V1State.isConstant() && V2State.isConstant()) { - markConstant(IV, &I, Context->getConstantExprCompare(I.getPredicate(), + markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), V1State.getConstant(), V2State.getConstant())); } @@ -1096,7 +1101,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { Constant *Ptr = Operands[0]; Operands.erase(Operands.begin()); // Erase the pointer from idx list... - markConstant(IV, &I, Context->getConstantExprGetElementPtr(Ptr, &Operands[0], + markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0], Operands.size())); } @@ -1127,10 +1132,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) { if (PtrVal.isConstant() && !I.isVolatile()) { Value *Ptr = PtrVal.getConstant(); // TODO: Consider a target hook for valid address spaces for this xform. - if (isa(Ptr) && - cast(Ptr->getType())->getAddressSpace() == 0) { + if (isa(Ptr) && I.getPointerAddressSpace() == 0) { // load null -> null - markConstant(IV, &I, Context->getNullValue(I.getType())); + markConstant(IV, &I, Constant::getNullValue(I.getType())); return; } @@ -1179,7 +1183,7 @@ void SCCPSolver::visitCallSite(CallSite CS) { if (F == 0 || !F->hasLocalLinkage()) { CallOverdefined: // Void return and not tracking callee, just bail. - if (I->getType() == Type::VoidTy) return; + if (I->getType()->isVoidTy()) return; // Otherwise, if we have a single return value case, and if the function is // a declaration, maybe we can constant fold it. @@ -1258,6 +1262,10 @@ CallOverdefined: for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI, ++CAI) { LatticeVal &IV = ValueState[AI]; + if (AI->hasByValAttr() && !F->onlyReadsMemory()) { + IV.markOverdefined(); + continue; + } if (!IV.isOverdefined()) mergeInValue(IV, AI, getValueState(*CAI)); } @@ -1273,7 +1281,7 @@ void SCCPSolver::Solve() { Value *I = OverdefinedInstWorkList.back(); OverdefinedInstWorkList.pop_back(); - DOUT << "\nPopped off OI-WL: " << *I; + DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from // bottom to constant @@ -1291,7 +1299,7 @@ void SCCPSolver::Solve() { Value *I = InstWorkList.back(); InstWorkList.pop_back(); - DOUT << "\nPopped off I-WL: " << *I; + DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n'); // "I" got into the work list because it either made the transition from // bottom to constant @@ -1311,7 +1319,7 @@ void SCCPSolver::Solve() { BasicBlock *BB = BBWorkList.back(); BBWorkList.pop_back(); - DOUT << "\nPopped off BBWL: " << *BB; + DEBUG(errs() << "\nPopped off BBWL: " << *BB << '\n'); // Notify all instructions in this basic block that they are newly // executable. @@ -1345,7 +1353,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { // Look for instructions which produce undef values. - if (I->getType() == Type::VoidTy) continue; + if (I->getType()->isVoidTy()) continue; LatticeVal &LV = getValueState(I); if (!LV.isUndefined()) continue; @@ -1371,22 +1379,22 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // to be handled here, because we don't know whether the top part is 1's // or 0's. assert(Op0LV.isUndefined()); - markForcedConstant(LV, I, Context->getNullValue(ITy)); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); return true; case Instruction::Mul: case Instruction::And: // undef * X -> 0. X could be zero. // undef & X -> 0. X could be zero. - markForcedConstant(LV, I, Context->getNullValue(ITy)); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); return true; case Instruction::Or: // undef | X -> -1. X could be -1. if (const VectorType *PTy = dyn_cast(ITy)) markForcedConstant(LV, I, - Context->getConstantVectorAllOnesValue(PTy)); + Constant::getAllOnesValue(PTy)); else - markForcedConstant(LV, I, Context->getConstantIntAllOnesValue(ITy)); + markForcedConstant(LV, I, Constant::getAllOnesValue(ITy)); return true; case Instruction::SDiv: @@ -1399,7 +1407,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // undef / X -> 0. X could be maxint. // undef % X -> 0. X could be 1. - markForcedConstant(LV, I, Context->getNullValue(ITy)); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); return true; case Instruction::AShr: @@ -1420,7 +1428,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // X >> undef -> 0. X could be 0. // X << undef -> 0. X could be 0. - markForcedConstant(LV, I, Context->getNullValue(ITy)); + markForcedConstant(LV, I, Constant::getNullValue(ITy)); return true; case Instruction::Select: // undef ? X : Y -> X or Y. There could be commonality between X/Y. @@ -1483,7 +1491,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // as undef, then further analysis could think the undef went another way // leading to an inconsistent set of conclusions. if (BranchInst *BI = dyn_cast(TI)) { - BI->setCondition(Context->getConstantIntFalse()); + BI->setCondition(ConstantInt::getFalse(*Context)); } else { SwitchInst *SI = cast(TI); SI->setCondition(SI->getCaseValue(1)); @@ -1502,7 +1510,7 @@ namespace { /// SCCP Class - This class uses the SCCPSolver to implement a per-function /// Sparse Conditional Constant Propagator. /// - struct VISIBILITY_HIDDEN SCCP : public FunctionPass { + struct SCCP : public FunctionPass { static char ID; // Pass identification, replacement for typeid SCCP() : FunctionPass(&ID) {} @@ -1531,9 +1539,9 @@ FunctionPass *llvm::createSCCPPass() { // and return true if the function was modified. // bool SCCP::runOnFunction(Function &F) { - DOUT << "SCCP on function '" << F.getNameStart() << "'\n"; + DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n"); SCCPSolver Solver; - Solver.setContext(Context); + Solver.setContext(&F.getContext()); // Mark the first block of the function as being executable. Solver.MarkBlockExecutable(F.begin()); @@ -1546,7 +1554,7 @@ bool SCCP::runOnFunction(Function &F) { bool ResolvedUndefs = true; while (ResolvedUndefs) { Solver.Solve(); - DOUT << "RESOLVING UNDEFs\n"; + DEBUG(errs() << "RESOLVING UNDEFs\n"); ResolvedUndefs = Solver.ResolvedUndefsIn(F); } @@ -1561,7 +1569,7 @@ bool SCCP::runOnFunction(Function &F) { for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (!Solver.isBlockExecutable(BB)) { - DOUT << " BasicBlock Dead:" << *BB; + DEBUG(errs() << " BasicBlock Dead:" << *BB); ++NumDeadBlocks; // Delete the instructions backwards, as it has a reduced likelihood of @@ -1573,7 +1581,7 @@ bool SCCP::runOnFunction(Function &F) { Instruction *I = Insts.back(); Insts.pop_back(); if (!I->use_empty()) - I->replaceAllUsesWith(Context->getUndef(I->getType())); + I->replaceAllUsesWith(UndefValue::get(I->getType())); BB->getInstList().erase(I); MadeChanges = true; ++NumInstRemoved; @@ -1584,8 +1592,7 @@ bool SCCP::runOnFunction(Function &F) { // for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Inst->getType() == Type::VoidTy || - isa(Inst)) + if (Inst->getType()->isVoidTy() || isa(Inst)) continue; LatticeVal &IV = Values[Inst]; @@ -1593,8 +1600,8 @@ bool SCCP::runOnFunction(Function &F) { continue; Constant *Const = IV.isConstant() - ? IV.getConstant() : Context->getUndef(Inst->getType()); - DOUT << " Constant: " << *Const << " = " << *Inst; + ? IV.getConstant() : UndefValue::get(Inst->getType()); + DEBUG(errs() << " Constant: " << *Const << " = " << *Inst); // Replaces all of the uses of a variable with uses of the constant. Inst->replaceAllUsesWith(Const); @@ -1617,7 +1624,7 @@ namespace { /// IPSCCP Class - This class implements interprocedural Sparse Conditional /// Constant Propagation. /// - struct VISIBILITY_HIDDEN IPSCCP : public ModulePass { + struct IPSCCP : public ModulePass { static char ID; IPSCCP() : ModulePass(&ID) {} bool runOnModule(Module &M); @@ -1658,7 +1665,10 @@ static bool AddressIsTaken(GlobalValue *GV) { } bool IPSCCP::runOnModule(Module &M) { + LLVMContext *Context = &M.getContext(); + SCCPSolver Solver; + Solver.setContext(Context); // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. @@ -1687,7 +1697,7 @@ bool IPSCCP::runOnModule(Module &M) { while (ResolvedUndefs) { Solver.Solve(); - DOUT << "RESOLVING UNDEFS\n"; + DEBUG(errs() << "RESOLVING UNDEFS\n"); ResolvedUndefs = false; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) ResolvedUndefs |= Solver.ResolvedUndefsIn(*F); @@ -1709,8 +1719,8 @@ bool IPSCCP::runOnModule(Module &M) { LatticeVal &IV = Values[AI]; if (IV.isConstant() || IV.isUndefined()) { Constant *CST = IV.isConstant() ? - IV.getConstant() : Context->getUndef(AI->getType()); - DOUT << "*** Arg " << *AI << " = " << *CST <<"\n"; + IV.getConstant() : UndefValue::get(AI->getType()); + DEBUG(errs() << "*** Arg " << *AI << " = " << *CST <<"\n"); // Replaces all of the uses of a variable with uses of the // constant. @@ -1721,7 +1731,7 @@ bool IPSCCP::runOnModule(Module &M) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (!Solver.isBlockExecutable(BB)) { - DOUT << " BasicBlock Dead:" << *BB; + DEBUG(errs() << " BasicBlock Dead:" << *BB); ++IPNumDeadBlocks; // Delete the instructions backwards, as it has a reduced likelihood of @@ -1734,7 +1744,7 @@ bool IPSCCP::runOnModule(Module &M) { Instruction *I = Insts.back(); Insts.pop_back(); if (!I->use_empty()) - I->replaceAllUsesWith(Context->getUndef(I->getType())); + I->replaceAllUsesWith(UndefValue::get(I->getType())); BB->getInstList().erase(I); MadeChanges = true; ++IPNumInstRemoved; @@ -1746,18 +1756,18 @@ bool IPSCCP::runOnModule(Module &M) { TI->getSuccessor(i)->removePredecessor(BB); } if (!TI->use_empty()) - TI->replaceAllUsesWith(Context->getUndef(TI->getType())); + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); BB->getInstList().erase(TI); if (&*BB != &F->front()) BlocksToErase.push_back(BB); else - new UnreachableInst(BB); + new UnreachableInst(M.getContext(), BB); } else { for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { Instruction *Inst = BI++; - if (Inst->getType() == Type::VoidTy) + if (Inst->getType()->isVoidTy()) continue; LatticeVal &IV = Values[Inst]; @@ -1765,8 +1775,8 @@ bool IPSCCP::runOnModule(Module &M) { continue; Constant *Const = IV.isConstant() - ? IV.getConstant() : Context->getUndef(Inst->getType()); - DOUT << " Constant: " << *Const << " = " << *Inst; + ? IV.getConstant() : UndefValue::get(Inst->getType()); + DEBUG(errs() << " Constant: " << *Const << " = " << *Inst); // Replaces all of the uses of a variable with uses of the // constant. @@ -1802,7 +1812,7 @@ bool IPSCCP::runOnModule(Module &M) { } else if (SwitchInst *SI = dyn_cast(I)) { assert(isa(SI->getCondition()) && "Switch should fold"); } else { - assert(0 && "Didn't fold away reference to block!"); + llvm_unreachable("Didn't fold away reference to block!"); } #endif @@ -1834,12 +1844,12 @@ bool IPSCCP::runOnModule(Module &M) { for (DenseMap::const_iterator I = RV.begin(), E = RV.end(); I != E; ++I) if (!I->second.isOverdefined() && - I->first->getReturnType() != Type::VoidTy) { + !I->first->getReturnType()->isVoidTy()) { Function *F = I->first; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast(BB->getTerminator())) if (!isa(RI->getOperand(0))) - RI->setOperand(0, Context->getUndef(F->getReturnType())); + RI->setOperand(0, UndefValue::get(F->getReturnType())); } // If we infered constant or undef values for globals variables, we can delete @@ -1850,7 +1860,7 @@ bool IPSCCP::runOnModule(Module &M) { GlobalVariable *GV = I->first; assert(!I->second.isOverdefined() && "Overdefined values should have been taken out of the map!"); - DOUT << "Found that GV '" << GV->getNameStart() << "' is constant!\n"; + DEBUG(errs() << "Found that GV '" << GV->getName() << "' is constant!\n"); while (!GV->use_empty()) { StoreInst *SI = cast(GV->use_back()); SI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 109fb90d52f3f..610d874b3684e 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -34,13 +34,13 @@ #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); @@ -49,7 +49,7 @@ STATISTIC(NumConverted, "Number of aggregates converted to scalar"); STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { - struct VISIBILITY_HIDDEN SROA : public FunctionPass { + struct SROA : public FunctionPass { static char ID; // Pass identification, replacement for typeid explicit SROA(signed T = -1) : FunctionPass(&ID) { if (T == -1) @@ -68,7 +68,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.setPreservesCFG(); } @@ -150,9 +149,16 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { bool SROA::runOnFunction(Function &F) { - TD = &getAnalysis(); - + TD = getAnalysisIfAvailable(); + bool Changed = performPromotion(F); + + // FIXME: ScalarRepl currently depends on TargetData more than it + // theoretically needs to. It should be refactored in order to support + // target-independent IR. Until this is done, just skip the actual + // scalar-replacement portion of this pass. + if (!TD) return Changed; + while (1) { bool LocalChange = performScalarRepl(F); if (!LocalChange) break; // No need to repromote if no scalarrepl @@ -186,7 +192,7 @@ bool SROA::performPromotion(Function &F) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF); + PromoteMemToReg(Allocas, DT, DF, F.getContext()); NumPromoted += Allocas.size(); Changed = true; } @@ -238,11 +244,10 @@ bool SROA::performScalarRepl(Function &F) { // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { - DOUT << "Found alloca equal to global: " << *AI; - DOUT << " memcpy = " << *TheCopy; + DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n'); + DEBUG(errs() << " memcpy = " << *TheCopy << '\n'); Constant *TheSrc = cast(TheCopy->getOperand(2)); - AI->replaceAllUsesWith( - Context->getConstantExprBitCast(TheSrc, AI->getType())); + AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); TheCopy->eraseFromParent(); // Don't mutate the global. AI->eraseFromParent(); ++NumGlobals; @@ -256,9 +261,12 @@ bool SROA::performScalarRepl(Function &F) { // value cannot be decomposed at all. uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + // Do not promote [0 x %struct]. + if (AllocaSize == 0) continue; + // Do not promote any struct whose size is too big. if (AllocaSize > SRThreshold) continue; - + if ((isa(AI->getAllocatedType()) || isa(AI->getAllocatedType())) && // Do not promote any struct into more than "32" separate vars. @@ -266,7 +274,7 @@ bool SROA::performScalarRepl(Function &F) { // Check that all of the users of the allocation are capable of being // transformed. switch (isSafeAllocaToScalarRepl(AI)) { - default: assert(0 && "Unexpected value!"); + default: llvm_unreachable("Unexpected value!"); case 0: // Not safe to scalar replace. break; case 1: // Safe, but requires cleanup/canonicalizations first @@ -298,16 +306,17 @@ bool SROA::performScalarRepl(Function &F) { // we just get a lot of insert/extracts. If at least one vector is // involved, then we probably really do have a union of vector/array. if (VectorTy && isa(VectorTy) && HadAVector) { - DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n"; + DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " + << *VectorTy << '\n'); // Create and insert the vector alloca. - NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); + NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } else { - DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"; + DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - const Type *NewTy = Context->getIntegerType(AllocaSize*8); + const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } @@ -328,14 +337,14 @@ bool SROA::performScalarRepl(Function &F) { /// predicate, do SROA now. void SROA::DoScalarReplacement(AllocationInst *AI, std::vector &WorkList) { - DOUT << "Found inst to SROA: " << *AI; + DEBUG(errs() << "Found inst to SROA: " << *AI << '\n'); SmallVector ElementAllocas; if (const StructType *ST = dyn_cast(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, AI->getAlignment(), - AI->getName() + "." + utostr(i), AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } @@ -345,7 +354,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI, const Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), - AI->getName() + "." + utostr(i), AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } @@ -371,7 +380,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI, // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 // (Also works for arrays instead of structs) if (LoadInst *LI = dyn_cast(User)) { - Value *Insert = Context->getUndef(LI->getType()); + Value *Insert = UndefValue::get(LI->getType()); for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { Value *Load = new LoadInst(ElementAllocas[i], "load", LI); Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); @@ -418,7 +427,8 @@ void SROA::DoScalarReplacement(AllocationInst *AI, // expanded itself once the worklist is rerun. // SmallVector NewArgs; - NewArgs.push_back(Context->getNullValue(Type::Int32Ty)); + NewArgs.push_back(Constant::getNullValue( + Type::getInt32Ty(AI->getContext()))); NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), NewArgs.end(), "", GEPI); @@ -478,7 +488,7 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, if (Info.isUnsafe) return; break; } - DOUT << " Transformation preventing inst: " << *User; + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); return MarkUnsafe(Info); case Instruction::Call: if (MemIntrinsic *MI = dyn_cast(User)) { @@ -488,10 +498,10 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, break; } } - DOUT << " Transformation preventing inst: " << *User; + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); return MarkUnsafe(Info); default: - DOUT << " Transformation preventing inst: " << *User; + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); return MarkUnsafe(Info); } } @@ -531,7 +541,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, // The GEP is not safe to transform if not of the form "GEP , 0, ". if (I == E || - I.getOperand() != Context->getNullValue(I.getOperand()->getType())) { + I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { return MarkUnsafe(Info); } @@ -727,6 +737,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, // that doesn't have anything to do with the alloca that we are promoting. For // memset, this Value* stays null. Value *OtherPtr = 0; + LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy if (BCInst == MTI->getRawDest()) @@ -764,7 +775,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, const Type *BytePtrTy = MI->getRawDest()->getType(); bool SROADest = MI->getRawDest() == BCInst; - Constant *Zero = Context->getNullValue(Type::Int32Ty); + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. @@ -772,9 +783,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, unsigned OtherEltAlign = MemAlignment; if (OtherPtr) { - Value *Idx[2] = { Zero, Context->getConstantInt(Type::Int32Ty, i) }; + Value *Idx[2] = { Zero, + ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, - OtherPtr->getNameStr()+"."+utostr(i), + OtherPtr->getNameStr()+"."+Twine(i), MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast(OtherPtr->getType()); @@ -819,7 +831,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, Constant *StoreVal; if (ConstantInt *CI = dyn_cast(MI->getOperand(2))) { if (CI->isZero()) { - StoreVal = Context->getNullValue(EltTy); // 0.0, null, 0, <0,0> + StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. const Type *ValTy = EltTy->getScalarType(); @@ -835,18 +847,18 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, } // Convert the integer value to the appropriate type. - StoreVal = Context->getConstantInt(TotalVal); + StoreVal = ConstantInt::get(Context, TotalVal); if (isa(ValTy)) - StoreVal = Context->getConstantExprIntToPtr(StoreVal, ValTy); + StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); else if (ValTy->isFloatingPoint()) - StoreVal = Context->getConstantExprBitCast(StoreVal, ValTy); + StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); assert(StoreVal->getType() == ValTy && "Type mismatch!"); // If the requested value was a vector constant, create it. if (EltTy != ValTy) { unsigned NumElts = cast(ValTy)->getNumElements(); SmallVector Elts(NumElts, StoreVal); - StoreVal = Context->getConstantVector(&Elts[0], NumElts); + StoreVal = ConstantVector::get(&Elts[0], NumElts); } } new StoreInst(StoreVal, EltPtr, MI); @@ -872,15 +884,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, Value *Ops[] = { SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr - Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size - Context->getConstantInt(Type::Int32Ty, OtherEltAlign) // Align + ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + // Align + ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign) }; CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } else { assert(isa(MI)); Value *Ops[] = { EltPtr, MI->getOperand(2), // Dest, Value, - Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size + ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size Zero // Align }; CallInst::Create(TheFn, Ops, Ops + 4, "", MI); @@ -910,9 +923,11 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, - Context->getIntegerType(AllocaSizeBits), "", SI); + IntegerType::get(SI->getContext(), AllocaSizeBits), + "", SI); - DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; + DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI + << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. @@ -929,7 +944,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, Value *EltVal = SrcVal; if (Shift) { - Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift); + Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, "sroa.store.elt", SI); } @@ -942,7 +957,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, if (FieldSizeBits != AllocaSizeBits) EltVal = new TruncInst(EltVal, - Context->getIntegerType(FieldSizeBits), "", SI); + IntegerType::get(SI->getContext(), FieldSizeBits), + "", SI); Value *DestField = NewElts[i]; if (EltVal->getType() == FieldTy) { // Storing to an integer field of this size, just do it. @@ -952,7 +968,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, } else { // Otherwise, bitcast the dest pointer (for aggregates). DestField = new BitCastInst(DestField, - Context->getPointerTypeUnqual(EltVal->getType()), + PointerType::getUnqual(EltVal->getType()), "", SI); } new StoreInst(EltVal, DestField, SI); @@ -977,7 +993,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, Value *EltVal = SrcVal; if (Shift) { - Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift); + Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, "sroa.store.elt", SI); } @@ -985,7 +1001,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, // Truncate down to an integer of the right size. if (ElementSizeBits != AllocaSizeBits) EltVal = new TruncInst(EltVal, - Context->getIntegerType(ElementSizeBits),"",SI); + IntegerType::get(SI->getContext(), + ElementSizeBits),"",SI); Value *DestField = NewElts[i]; if (EltVal->getType() == ArrayEltTy) { // Storing to an integer field of this size, just do it. @@ -995,7 +1012,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, } else { // Otherwise, bitcast the dest pointer (for aggregates). DestField = new BitCastInst(DestField, - Context->getPointerTypeUnqual(EltVal->getType()), + PointerType::getUnqual(EltVal->getType()), "", SI); } new StoreInst(EltVal, DestField, SI); @@ -1026,7 +1043,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) return; - DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI; + DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI + << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. @@ -1038,9 +1056,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, const Type *ArrayEltTy = cast(AllocaEltTy)->getElementType(); ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); } - - Value *ResultVal = - Context->getNullValue(Context->getIntegerType(AllocaSizeBits)); + + Value *ResultVal = + Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Load the value from the alloca. If the NewElt is an aggregate, cast @@ -1053,11 +1071,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - const IntegerType *FieldIntTy = Context->getIntegerType(FieldSizeBits); + const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + FieldSizeBits); if (!isa(FieldTy) && !FieldTy->isFloatingPoint() && !isa(FieldTy)) SrcField = new BitCastInst(SrcField, - Context->getPointerTypeUnqual(FieldIntTy), + PointerType::getUnqual(FieldIntTy), "", LI); SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); @@ -1082,7 +1101,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); if (Shift) { - Value *ShiftVal = Context->getConstantInt(SrcField->getType(), Shift); + Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); } @@ -1152,7 +1171,8 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { I != E; ++I) { isSafeUseOfAllocation(cast(*I), AI, Info); if (Info.isUnsafe) { - DOUT << "Cannot transform: " << *AI << " due to user: " << **I; + DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " + << **I << '\n'); return 0; } } @@ -1186,24 +1206,25 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) { return; if (NumElements == 1) { - GEPI->setOperand(2, Context->getNullValue(Type::Int32Ty)); + GEPI->setOperand(2, + Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()))); return; } assert(NumElements == 2 && "Unhandled case!"); // All users of the GEP must be loads. At each use of the GEP, insert // two loads of the appropriate indexed GEP and select between them. - Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), - Context->getNullValue(I.getOperand()->getType()), - "isone", GEPI); + Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(), + Constant::getNullValue(I.getOperand()->getType()), + "isone"); // Insert the new GEP instructions, which are properly indexed. SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); - Indices[1] = Context->getNullValue(Type::Int32Ty); + Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), Indices.begin(), Indices.end(), GEPI->getName()+".0", GEPI); - Indices[1] = Context->getConstantInt(Type::Int32Ty, 1); + Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), Indices.begin(), Indices.end(), @@ -1261,9 +1282,9 @@ void SROA::CleanupAllocaUsers(AllocationInst *AI) { /// and stores would mutate the memory. static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, unsigned AllocaSize, const TargetData &TD, - LLVMContext* Context) { + LLVMContext &Context) { // If this could be contributing to a vector, analyze it. - if (VecTy != Type::VoidTy) { // either null or a vector type. + if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type. // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. @@ -1276,7 +1297,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, VecTy = VInTy; return; } - } else if (In == Type::FloatTy || In == Type::DoubleTy || + } else if (In->isFloatTy() || In->isDoubleTy() || (isa(In) && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { // If we're accessing something that could be an element of a vector, see @@ -1289,7 +1310,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, cast(VecTy)->getElementType() ->getPrimitiveSizeInBits()/8 == EltSize)) { if (VecTy == 0) - VecTy = Context->getVectorType(In, AllocaSize/EltSize); + VecTy = VectorType::get(In, AllocaSize/EltSize); return; } } @@ -1297,7 +1318,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, // Otherwise, we have a case that we can't handle with an optimized vector // form. We can still turn this into a large integer. - VecTy = Type::VoidTy; + VecTy = Type::getVoidTy(Context); } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all @@ -1320,7 +1341,8 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Don't break volatile loads. if (LI->isVolatile()) return false; - MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD, Context); + MergeInType(LI->getType(), Offset, VecTy, + AllocaSize, *TD, V->getContext()); SawVec |= isa(LI->getType()); continue; } @@ -1329,7 +1351,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Storing the pointer, not into the value? if (SI->getOperand(0) == V || SI->isVolatile()) return 0; MergeInType(SI->getOperand(0)->getType(), Offset, - VecTy, AllocaSize, *TD, Context); + VecTy, AllocaSize, *TD, V->getContext()); SawVec |= isa(SI->getOperand(0)->getType()); continue; } @@ -1433,7 +1455,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (StoreInst *SI = dyn_cast(User)) { assert(SI->getOperand(0) != Ptr && "Consistency error!"); - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); + // FIXME: Remove once builder has Twine API. + Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, Builder); Builder.CreateStore(New, NewAI); @@ -1457,8 +1480,10 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { for (unsigned i = 1; i != NumBytes; ++i) APVal |= APVal << 8; - Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); - Value *New = ConvertScalar_InsertValue(Context->getConstantInt(APVal), + // FIXME: Remove once builder has Twine API. + Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); + Value *New = ConvertScalar_InsertValue( + ConstantInt::get(User->getContext(), APVal), Old, Offset, Builder); Builder.CreateStore(New, NewAI); } @@ -1510,8 +1535,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { continue; } - assert(0 && "Unsupported operation!"); - abort(); + llvm_unreachable("Unsupported operation!"); } } @@ -1545,9 +1569,8 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = Builder.CreateExtractElement(FromVal, - Context->getConstantInt(Type::Int32Ty,Elt), - "tmp"); + Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( + Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); if (V->getType() != ToType) V = Builder.CreateBitCast(V, ToType, "tmp"); return V; @@ -1557,7 +1580,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // use insertvalue's to form the FCA. if (const StructType *ST = dyn_cast(ToType)) { const StructLayout &Layout = *TD->getStructLayout(ST); - Value *Res = Context->getUndef(ST); + Value *Res = UndefValue::get(ST); for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), @@ -1569,7 +1592,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, if (const ArrayType *AT = dyn_cast(ToType)) { uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); - Value *Res = Context->getUndef(AT); + Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), Offset+i*EltSize, Builder); @@ -1599,21 +1622,23 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, // only some bits are used. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateLShr(FromVal, - Context->getConstantInt(FromVal->getType(), + ConstantInt::get(FromVal->getType(), ShAmt), "tmp"); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) FromVal = Builder.CreateShl(FromVal, - Context->getConstantInt(FromVal->getType(), + ConstantInt::get(FromVal->getType(), -ShAmt), "tmp"); // Finally, unconditionally truncate the integer to the right width. unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) FromVal = - Builder.CreateTrunc(FromVal, Context->getIntegerType(LIBitWidth), "tmp"); + Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); else if (LIBitWidth > NTy->getBitWidth()) FromVal = - Builder.CreateZExt(FromVal, Context->getIntegerType(LIBitWidth), "tmp"); + Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); // If the result is an integer, this is a trunc or bitcast. if (isa(ToType)) { @@ -1645,6 +1670,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. const Type *AllocaType = Old->getType(); + LLVMContext &Context = Old->getContext(); if (const VectorType *VTy = dyn_cast(AllocaType)) { uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); @@ -1664,7 +1690,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); SV = Builder.CreateInsertElement(Old, SV, - Context->getConstantInt(Type::Int32Ty, Elt), + ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), "tmp"); return SV; } @@ -1697,9 +1723,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); if (SV->getType()->isFloatingPoint() || isa(SV->getType())) - SV = Builder.CreateBitCast(SV, Context->getIntegerType(SrcWidth), "tmp"); + SV = Builder.CreateBitCast(SV, + IntegerType::get(SV->getContext(),SrcWidth), "tmp"); else if (isa(SV->getType())) - SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp"); + SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { @@ -1732,11 +1759,11 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // only some bits in the structure are set. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = Builder.CreateShl(SV, Context->getConstantInt(SV->getType(), + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt), "tmp"); Mask <<= ShAmt; } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = Builder.CreateLShr(SV, Context->getConstantInt(SV->getType(), + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt), "tmp"); Mask = Mask.lshr(-ShAmt); } @@ -1745,7 +1772,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, // in the new bits. if (SrcWidth != DestWidth) { assert(DestWidth > SrcWidth); - Old = Builder.CreateAnd(Old, Context->getConstantInt(~Mask), "mask"); + Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); SV = Builder.CreateOr(Old, SV, "ins"); } return SV; diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index b8bce801a1fb2..29712b3c13de8 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -30,7 +30,6 @@ #include "llvm/Module.h" #include "llvm/Attributes.h" #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" #include "llvm/Pass.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -40,7 +39,7 @@ using namespace llvm; STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { - struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass { + struct CFGSimplifyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid CFGSimplifyPass() : FunctionPass(&ID) {} @@ -58,20 +57,20 @@ FunctionPass *llvm::createCFGSimplificationPass() { /// ChangeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, LLVMContext* Context) { +static void ChangeToUnreachable(Instruction *I, LLVMContext &Context) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) (*SI)->removePredecessor(BB); - new UnreachableInst(I); + new UnreachableInst(I->getContext(), I); // All instructions after this are dead. BasicBlock::iterator BBI = I, BBE = BB->end(); while (BBI != BBE) { if (!BBI->use_empty()) - BBI->replaceAllUsesWith(Context->getUndef(BBI->getType())); + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); BB->getInstList().erase(BBI++); } } @@ -97,7 +96,7 @@ static void ChangeToCall(InvokeInst *II) { static bool MarkAliveBlocks(BasicBlock *BB, SmallPtrSet &Reachable, - LLVMContext* Context) { + LLVMContext &Context) { SmallVector Worklist; Worklist.push_back(BB); @@ -132,7 +131,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa(Ptr) || (isa(Ptr) && - cast(Ptr->getType())->getAddressSpace() == 0)) { + SI->getPointerAddressSpace() == 0)) { ChangeToUnreachable(SI, Context); Changed = true; break; diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index 4aad17d7236d4..13077fe642a7f 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -22,15 +22,13 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Config/config.h" using namespace llvm; namespace { /// This pass optimizes well half_powr function calls. /// - class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass { + class SimplifyHalfPowrLibCalls : public FunctionPass { const TargetData *TD; public: static char ID; // Pass identification @@ -39,7 +37,6 @@ namespace { bool runOnFunction(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); } Instruction * @@ -60,8 +57,9 @@ FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { /// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging /// their control flow to better facilitate subsequent optimization. Instruction * -SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector &HalfPowrs, - Instruction *InsertPt) { +SimplifyHalfPowrLibCalls:: +InlineHalfPowrs(const std::vector &HalfPowrs, + Instruction *InsertPt) { std::vector Bodies; BasicBlock *NewBlock = 0; @@ -123,7 +121,7 @@ SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector &Half /// runOnFunction - Top level algorithm. /// bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { - TD = &getAnalysis(); + TD = getAnalysisIfAvailable(); bool Changed = false; std::vector HalfPowrs; @@ -136,8 +134,7 @@ bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { Function *Callee = CI->getCalledFunction(); if (Callee && Callee->hasExternalLinkage()) { // Look for calls with well-known names. - const char *CalleeName = Callee->getNameStart(); - if (strcmp(CalleeName, "__half_powrf4") == 0) + if (Callee->getName() == "__half_powrf4") IsHalfPowr = true; } } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index ec48469f536ef..e186601505c21 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -9,11 +9,9 @@ // // This file implements a simple pass that applies a variety of small // optimizations for calls to specific well-known function calls (e.g. runtime -// library functions). For example, a call to the function "exit(3)" that -// occurs within the main() function can be transformed into a simple "return 3" -// instruction. Any optimization that takes this form (replace call to library -// function with simpler code that provides the same result) belongs in this -// file. +// library functions). Any optimization that takes the very simple form +// "replace call to library function with simpler code that provides the same +// result" belongs in this file. // //===----------------------------------------------------------------------===// @@ -29,8 +27,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Config/config.h" using namespace llvm; @@ -44,7 +43,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions"); /// This class is the abstract base class for the set of optimizations that /// corresponds to one library call. namespace { -class VISIBILITY_HIDDEN LibCallOptimization { +class LibCallOptimization { protected: Function *Caller; const TargetData *TD; @@ -58,14 +57,14 @@ public: /// performed. If it returns CI, then it transformed the call and CI is to be /// deleted. If it returns something else, replace CI with the new value and /// delete CI. - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; - - Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder<> &B) { + + Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); - this->TD = &TD; + this->TD = TD; if (CI->getCalledFunction()) - Context = CI->getCalledFunction()->getContext(); + Context = &CI->getCalledFunction()->getContext(); return CallOptimizer(CI->getCalledFunction(), CI, B); } @@ -76,12 +75,12 @@ public: /// specified pointer. Ptr is required to be some pointer type, and the /// return value has 'intptr_t' type. Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); - + /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, + Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B); - + /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B); @@ -96,35 +95,36 @@ public: /// 'floor'). This function is known to take a single of type matching 'Op' /// and returns one value with the same type. If 'Op' is a long double, 'l' /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. - Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B); - + Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, + const AttrListPtr &Attrs); + /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. void EmitPutChar(Value *Char, IRBuilder<> &B); - + /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. void EmitPutS(Value *Str, IRBuilder<> &B); - + /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an i32, and File is a pointer to FILE. void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B); - + /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B); - + /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B); - + }; } // End anonymous namespace. /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) { return - B.CreateBitCast(V, Context->getPointerTypeUnqual(Type::Int8Ty), "cstr"); + B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr"); } /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -137,8 +137,8 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) { Attribute::NoUnwind); Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), - TD->getIntPtrType(), - Context->getPointerTypeUnqual(Type::Int8Ty), + TD->getIntPtrType(*Context), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen"); if (const Function *F = dyn_cast(StrLen->stripPointerCasts())) @@ -157,7 +157,7 @@ Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, Tys[0] = Len->getType(); Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1); return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len, - Context->getConstantInt(Type::Int32Ty, Align)); + ConstantInt::get(Type::getInt32Ty(*Context), Align)); } /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is @@ -169,9 +169,10 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), - Context->getPointerTypeUnqual(Type::Int8Ty), - Context->getPointerTypeUnqual(Type::Int8Ty), - Type::Int32Ty, TD->getIntPtrType(), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt32Ty(*Context), + TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); @@ -192,10 +193,10 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2, Attribute::NoUnwind); Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), - Type::Int32Ty, - Context->getPointerTypeUnqual(Type::Int8Ty), - Context->getPointerTypeUnqual(Type::Int8Ty), - TD->getIntPtrType(), NULL); + Type::getInt32Ty(*Context), + Type::getInt8PtrTy(*Context), + Type::getInt8PtrTy(*Context), + TD->getIntPtrType(*Context), NULL); CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B), Len, "memcmp"); @@ -213,7 +214,7 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val, const Type *Tys[1]; Tys[0] = Len->getType(); Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); - Value *Align = Context->getConstantInt(Type::Int32Ty, 1); + Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1); return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); } @@ -222,14 +223,15 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val, /// returns one value with the same type. If 'Op' is a long double, 'l' is /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, - IRBuilder<> &B) { + IRBuilder<> &B, + const AttrListPtr &Attrs) { char NameBuffer[20]; - if (Op->getType() != Type::DoubleTy) { + if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. unsigned NameLen = strlen(Name); assert(NameLen < sizeof(NameBuffer)-2); memcpy(NameBuffer, Name, NameLen); - if (Op->getType() == Type::FloatTy) + if (Op->getType()->isFloatTy()) NameBuffer[NameLen] = 'f'; // floorf else NameBuffer[NameLen] = 'l'; // floorl @@ -241,7 +243,7 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, Value *Callee = M->getOrInsertFunction(Name, Op->getType(), Op->getType(), NULL); CallInst *CI = B.CreateCall(Callee, Op, Name); - + CI->setAttributes(Attrs); if (const Function *F = dyn_cast(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -252,10 +254,12 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, /// is an integer. void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) { Module *M = Caller->getParent(); - Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty, - Type::Int32Ty, NULL); + Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), NULL); CallInst *CI = B.CreateCall(PutChar, - B.CreateIntCast(Char, Type::Int32Ty, "chari"), + B.CreateIntCast(Char, + Type::getInt32Ty(*Context), + "chari"), "putchar"); if (const Function *F = dyn_cast(PutChar->stripPointerCasts())) @@ -271,8 +275,8 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) { AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), - Type::Int32Ty, - Context->getPointerTypeUnqual(Type::Int8Ty), + Type::getInt32Ty(*Context), + Type::getInt8PtrTy(*Context), NULL); CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); if (const Function *F = dyn_cast(PutS->stripPointerCasts())) @@ -289,12 +293,16 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) { AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Constant *F; if (isa(File->getType())) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty, - Type::Int32Ty, File->getType(), NULL); + F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), + Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), File->getType(), + NULL); else - F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty, + F = M->getOrInsertFunction("fputc", + Type::getInt32Ty(*Context), + Type::getInt32Ty(*Context), File->getType(), NULL); - Char = B.CreateIntCast(Char, Type::Int32Ty, "chari"); + Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari"); CallInst *CI = B.CreateCall2(F, Char, File, "fputc"); if (const Function *Fn = dyn_cast(F->stripPointerCasts())) @@ -311,12 +319,13 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) { AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); Constant *F; if (isa(File->getType())) - F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::Int32Ty, - Context->getPointerTypeUnqual(Type::Int8Ty), + F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + Type::getInt32Ty(*Context), + Type::getInt8PtrTy(*Context), File->getType(), NULL); else - F = M->getOrInsertFunction("fputs", Type::Int32Ty, - Context->getPointerTypeUnqual(Type::Int8Ty), + F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context), + Type::getInt8PtrTy(*Context), File->getType(), NULL); CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); @@ -336,17 +345,19 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File, Constant *F; if (isa(File->getType())) F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), - TD->getIntPtrType(), - Context->getPointerTypeUnqual(Type::Int8Ty), - TD->getIntPtrType(), TD->getIntPtrType(), + TD->getIntPtrType(*Context), + Type::getInt8PtrTy(*Context), + TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), File->getType(), NULL); else - F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(), - Context->getPointerTypeUnqual(Type::Int8Ty), - TD->getIntPtrType(), TD->getIntPtrType(), + F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context), + Type::getInt8PtrTy(*Context), + TD->getIntPtrType(*Context), + TD->getIntPtrType(*Context), File->getType(), NULL); CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, - Context->getConstantInt(TD->getIntPtrType(), 1), File); + ConstantInt::get(TD->getIntPtrType(*Context), 1), File); if (const Function *Fn = dyn_cast(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); @@ -362,30 +373,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { // Look through noop bitcast instructions. if (BitCastInst *BCI = dyn_cast(V)) return GetStringLengthH(BCI->getOperand(0), PHIs); - + // If this is a PHI node, there are two cases: either we have already seen it // or we haven't. if (PHINode *PN = dyn_cast(V)) { if (!PHIs.insert(PN)) return ~0ULL; // already in the set. - + // If it was new, see if all the input strings are the same length. uint64_t LenSoFar = ~0ULL; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs); if (Len == 0) return 0; // Unknown length -> unknown. - + if (Len == ~0ULL) continue; - + if (Len != LenSoFar && LenSoFar != ~0ULL) return 0; // Disagree -> unknown. LenSoFar = Len; } - + // Success, all agree. return LenSoFar; } - + // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y) if (SelectInst *SI = dyn_cast(V)) { uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); @@ -397,7 +408,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { if (Len1 != Len2) return 0; return Len1; } - + // If the value is not a GEP instruction nor a constant expression with a // GEP instruction, then return unknown. User *GEP = 0; @@ -410,11 +421,11 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { } else { return 0; } - + // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return 0; - + // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. if (ConstantInt *Idx = dyn_cast(GEP->getOperand(1))) { @@ -422,7 +433,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { return 0; } else return 0; - + // If the second index isn't a ConstantInt, then this is a variable index // into the array. If this occurs, we can't say anything meaningful about // the string. @@ -431,28 +442,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { StartIdx = CI->getZExtValue(); else return 0; - + // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. GlobalVariable* GV = dyn_cast(GEP->getOperand(0)); - if (!GV || !GV->isConstant() || !GV->hasInitializer()) + if (!GV || !GV->isConstant() || !GV->hasInitializer() || + GV->mayBeOverridden()) return 0; Constant *GlobalInit = GV->getInitializer(); - + // Handle the ConstantAggregateZero case, which is a degenerate case. The // initializer is constant zero so the length of the string must be zero. if (isa(GlobalInit)) return 1; // Len = 0 offset by 1. - + // Must be a Constant Array ConstantArray *Array = dyn_cast(GlobalInit); - if (!Array || Array->getType()->getElementType() != Type::Int8Ty) + if (!Array || + Array->getType()->getElementType() != Type::getInt8Ty(V->getContext())) return false; - + // Get the number of elements in the array uint64_t NumElts = Array->getType()->getNumElements(); - + // Traverse the constant array from StartIdx (derived above) which is // the place the GEP refers to in the array. for (unsigned i = StartIdx; i != NumElts; ++i) { @@ -463,7 +476,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { if (CI->isZero()) return i-StartIdx+1; // We found end of string, success! } - + return 0; // The array isn't null terminated, conservatively return 'unknown'. } @@ -471,7 +484,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { /// the specified pointer, return 'len+1'. If we can't, return 0. static uint64_t GetStringLength(Value *V) { if (!isa(V->getType())) return 0; - + SmallPtrSet PHIs; uint64_t Len = GetStringLengthH(V, PHIs); // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return @@ -480,7 +493,7 @@ static uint64_t GetStringLength(Value *V) { } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. +/// value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { @@ -495,74 +508,39 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { return true; } -//===----------------------------------------------------------------------===// -// Miscellaneous LibCall Optimizations -//===----------------------------------------------------------------------===// - -namespace { -//===---------------------------------------===// -// 'exit' Optimizations - -/// ExitOpt - int main() { exit(4); } --> int main() { return 4; } -struct VISIBILITY_HIDDEN ExitOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Verify we have a reasonable prototype for exit. - if (Callee->arg_size() == 0 || !CI->use_empty()) - return 0; - - // Verify the caller is main, and that the result type of main matches the - // argument type of exit. - if (!Caller->isName("main") || !Caller->hasExternalLinkage() || - Caller->getReturnType() != CI->getOperand(1)->getType()) - return 0; - - TerminatorInst *OldTI = CI->getParent()->getTerminator(); - - // Create the return after the call. - ReturnInst *RI = B.CreateRet(CI->getOperand(1)); - - // Drop all successor phi node entries. - for (unsigned i = 0, e = OldTI->getNumSuccessors(); i != e; ++i) - OldTI->getSuccessor(i)->removePredecessor(CI->getParent()); - - // Erase all instructions from after our return instruction until the end of - // the block. - BasicBlock::iterator FirstDead = RI; ++FirstDead; - CI->getParent()->getInstList().erase(FirstDead, CI->getParent()->end()); - return CI; - } -}; - //===----------------------------------------------------------------------===// // String and Memory LibCall Optimizations //===----------------------------------------------------------------------===// //===---------------------------------------===// // 'strcat' Optimizations - -struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization { +namespace { +struct StrCatOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcat" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType()) return 0; - + // Extract some information from the instruction Value *Dst = CI->getOperand(1); Value *Src = CI->getOperand(2); - + // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) return 0; --Len; // Unbias length. - + // Handle the simple, do-nothing case: strcat(x, "") -> x if (Len == 0) return Dst; - + + // These optimizations require TargetData. + if (!TD) return 0; + EmitStrLenMemCpy(Src, Dst, Len, B); return Dst; } @@ -571,28 +549,28 @@ struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. Value *DstLen = EmitStrLen(Dst, B); - + // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of // the string .. we're concatenating). Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); - + // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(CpyDst, Src, - Context->getConstantInt(TD->getIntPtrType(), Len+1), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B); } }; //===---------------------------------------===// // 'strncat' Optimizations -struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt { +struct StrNCatOpt : public StrCatOpt { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncat" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType() || !isa(FT->getParamType(2))) @@ -619,6 +597,9 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt { // strncat(x, c, 0) -> x if (SrcLen == 0 || Len == 0) return Dst; + // These optimizations require TargetData. + if (!TD) return 0; + // We don't optimize this case if (Len < SrcLen) return 0; @@ -632,27 +613,31 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt { //===---------------------------------------===// // 'strchr' Optimizations -struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization { +struct StrChrOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strchr" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getReturnType() != Type::getInt8PtrTy(*Context) || FT->getParamType(0) != FT->getReturnType()) return 0; - + Value *SrcStr = CI->getOperand(1); - + // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast(CI->getOperand(2)); if (CharC == 0) { + // These optimizations require TargetData. + if (!TD) return 0; + uint64_t Len = GetStringLength(SrcStr); - if (Len == 0 || FT->getParamType(1) != Type::Int32Ty) // memchr needs i32. + if (Len == 0 || + FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32. return 0; - + return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. - Context->getConstantInt(TD->getIntPtrType(), Len), B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), B); } // Otherwise, the character is a constant, see if the first argument is @@ -660,24 +645,24 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization { std::string Str; if (!GetConstantStringInfo(SrcStr, Str)) return 0; - + // strchr can find the nul character. Str += '\0'; char CharValue = CharC->getSExtValue(); - + // Compute the offset. uint64_t i = 0; while (1) { if (i == Str.size()) // Didn't find the char. strchr returns null. - return Context->getNullValue(CI->getType()); + return Constant::getNullValue(CI->getType()); // Did we find our match? if (Str[i] == CharValue) break; ++i; } - + // strchr(s+n,c) -> gep(s+n+i,c) - Value *Idx = Context->getConstantInt(Type::Int64Ty, i); + Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i); return B.CreateGEP(SrcStr, Idx, "strchr"); } }; @@ -685,40 +670,44 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization { //===---------------------------------------===// // 'strcmp' Optimizations -struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization { +struct StrCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty || + if (FT->getNumParams() != 2 || + FT->getReturnType() != Type::getInt32Ty(*Context) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty)) + FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - + Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); if (Str1P == Str2P) // strcmp(x,x) -> 0 - return Context->getConstantInt(CI->getType(), 0); - + return ConstantInt::get(CI->getType(), 0); + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - + if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); - + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - + // strcmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) - return Context->getConstantInt(CI->getType(), + return ConstantInt::get(CI->getType(), strcmp(Str1.c_str(),Str2.c_str())); // strcmp(P, "x") -> memcmp(P, "x", 2) uint64_t Len1 = GetStringLength(Str1P); uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { + // These optimizations require TargetData. + if (!TD) return 0; + return EmitMemCmp(Str1P, Str2P, - Context->getConstantInt(TD->getIntPtrType(), + ConstantInt::get(TD->getIntPtrType(*Context), std::min(Len1, Len2)), B); } @@ -729,43 +718,44 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization { //===---------------------------------------===// // 'strncmp' Optimizations -struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization { +struct StrNCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty || + if (FT->getNumParams() != 3 || + FT->getReturnType() != Type::getInt32Ty(*Context) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getParamType(2))) return 0; - + Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 - return Context->getConstantInt(CI->getType(), 0); - + return ConstantInt::get(CI->getType(), 0); + // Get the length argument if it is constant. uint64_t Length; if (ConstantInt *LengthArg = dyn_cast(CI->getOperand(3))) Length = LengthArg->getZExtValue(); else return 0; - + if (Length == 0) // strncmp(x,y,0) -> 0 - return Context->getConstantInt(CI->getType(), 0); - + return ConstantInt::get(CI->getType(), 0); + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); - + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> *x return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); - + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - + // strncmp(x, y) -> cnst (if both x and y are constant strings) if (HasStr1 && HasStr2) - return Context->getConstantInt(CI->getType(), + return ConstantInt::get(CI->getType(), strncmp(Str1.c_str(), Str2.c_str(), Length)); return 0; } @@ -775,27 +765,30 @@ struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization { //===---------------------------------------===// // 'strcpy' Optimizations -struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization { +struct StrCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Verify the "strcpy" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty)) + FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - + Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); if (Dst == Src) // strcpy(x,x) -> x return Src; - + + // These optimizations require TargetData. + if (!TD) return 0; + // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); if (Len == 0) return 0; - + // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(Dst, Src, - Context->getConstantInt(TD->getIntPtrType(), Len), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B); return Dst; } }; @@ -803,12 +796,12 @@ struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization { //===---------------------------------------===// // 'strncpy' Optimizations -struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { +struct StrNCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getParamType(2))) return 0; @@ -823,7 +816,8 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { if (SrcLen == 0) { // strncpy(x, "", y) -> memset(x, '\0', y, 1) - EmitMemSet(Dst, Context->getConstantInt(Type::Int8Ty, '\0'), LenOp, B); + EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, + B); return Dst; } @@ -835,12 +829,15 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { if (Len == 0) return Dst; // strncpy(x, y, 0) -> x + // These optimizations require TargetData. + if (!TD) return 0; + // Let strncpy handle the zero padding if (Len > SrcLen+1) return 0; // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] EmitMemCpy(Dst, Src, - Context->getConstantInt(TD->getIntPtrType(), Len), 1, B); + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B); return Dst; } @@ -849,19 +846,19 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { //===---------------------------------------===// // 'strlen' Optimizations -struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization { +struct StrLenOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || - FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || + FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !isa(FT->getReturnType())) return 0; - + Value *Src = CI->getOperand(1); // Constant folding: strlen("xyz") -> 3 if (uint64_t Len = GetStringLength(Src)) - return Context->getConstantInt(CI->getType(), Len-1); + return ConstantInt::get(CI->getType(), Len-1); // Handle strlen(p) != 0. if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0; @@ -875,7 +872,7 @@ struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization { //===---------------------------------------===// // 'strto*' Optimizations -struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization { +struct StrToOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || @@ -897,18 +894,18 @@ struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization { //===---------------------------------------===// // 'memcmp' Optimizations -struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { +struct MemCmpOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || !isa(FT->getParamType(0)) || !isa(FT->getParamType(1)) || - FT->getReturnType() != Type::Int32Ty) + FT->getReturnType() != Type::getInt32Ty(*Context)) return 0; Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); if (LHS == RHS) // memcmp(s,s,x) -> 0 - return Context->getNullValue(CI->getType()); + return Constant::getNullValue(CI->getType()); // Make sure we have a constant length. ConstantInt *LenC = dyn_cast(CI->getOperand(3)); @@ -916,7 +913,7 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 - return Context->getNullValue(CI->getType()); + return Constant::getNullValue(CI->getType()); if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv"); @@ -927,8 +924,8 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0 if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) { - const Type *PTy = Context->getPointerTypeUnqual(Len == 2 ? - Type::Int16Ty : Type::Int32Ty); + const Type *PTy = PointerType::getUnqual(Len == 2 ? + Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context)); LHS = B.CreateBitCast(LHS, PTy, "tmp"); RHS = B.CreateBitCast(RHS, PTy, "tmp"); LoadInst *LHSV = B.CreateLoad(LHS, "lhsv"); @@ -944,13 +941,16 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { //===---------------------------------------===// // 'memcpy' Optimizations -struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization { +struct MemCpyOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !isa(FT->getParamType(0)) || !isa(FT->getParamType(1)) || - FT->getParamType(2) != TD->getIntPtrType()) + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -962,25 +962,28 @@ struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization { //===---------------------------------------===// // 'memmove' Optimizations -struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization { +struct MemMoveOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !isa(FT->getParamType(0)) || !isa(FT->getParamType(1)) || - FT->getParamType(2) != TD->getIntPtrType()) + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) Module *M = Caller->getParent(); Intrinsic::ID IID = Intrinsic::memmove; const Type *Tys[1]; - Tys[0] = TD->getIntPtrType(); + Tys[0] = TD->getIntPtrType(*Context); Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1); Value *Dst = CastToCStr(CI->getOperand(1), B); Value *Src = CastToCStr(CI->getOperand(2), B); Value *Size = CI->getOperand(3); - Value *Align = Context->getConstantInt(Type::Int32Ty, 1); + Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1); B.CreateCall4(MemMove, Dst, Src, Size, Align); return CI->getOperand(1); } @@ -989,17 +992,21 @@ struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization { //===---------------------------------------===// // 'memset' Optimizations -struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization { +struct MemSetOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !isa(FT->getParamType(0)) || - FT->getParamType(1) != TD->getIntPtrType() || - FT->getParamType(2) != TD->getIntPtrType()) + !isa(FT->getParamType(1)) || + FT->getParamType(2) != TD->getIntPtrType(*Context)) return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateTrunc(CI->getOperand(2), Type::Int8Ty); + Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), + false); EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); return CI->getOperand(1); } @@ -1012,7 +1019,7 @@ struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization { //===---------------------------------------===// // 'pow*' Optimizations -struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { +struct PowOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the @@ -1021,40 +1028,44 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPoint()) return 0; - + Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); if (ConstantFP *Op1C = dyn_cast(Op1)) { if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 return Op1C; if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) - return EmitUnaryFloatFnCall(Op2, "exp2", B); + return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); } - + ConstantFP *Op2C = dyn_cast(Op2); if (Op2C == 0) return 0; - + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 - return Context->getConstantFP(CI->getType(), 1.0); - + return ConstantFP::get(CI->getType(), 1.0); + if (Op2C->isExactlyValue(0.5)) { - // FIXME: This is not safe for -0.0 and -inf. This can only be done when - // 'unsafe' math optimizations are allowed. - // x pow(x, 0.5) sqrt(x) - // --------------------------------------------- - // -0.0 +0.0 -0.0 - // -inf +inf NaN -#if 0 - // pow(x, 0.5) -> sqrt(x) - return B.CreateCall(get_sqrt(), Op1, "sqrt"); -#endif + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow, and still handles negative zero + // and negative infinite correctly. + // TODO: In fast-math mode, this could be just sqrt(x). + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *Inf = ConstantFP::getInfinity(CI->getType()); + Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, + Callee->getAttributes()); + Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, + Callee->getAttributes()); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp"); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp"); + return Sel; } - + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x return Op1; if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x return B.CreateFMul(Op1, Op1, "pow2"); if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x - return B.CreateFDiv(Context->getConstantFP(CI->getType(), 1.0), + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); return 0; } @@ -1063,7 +1074,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { //===---------------------------------------===// // 'exp2' Optimizations -struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { +struct Exp2Opt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 1 argument of FP type, which matches the @@ -1071,35 +1082,38 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isFloatingPoint()) return 0; - + Value *Op = CI->getOperand(1); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) - LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); + LdExpArg = B.CreateSExt(OpC->getOperand(0), + Type::getInt32Ty(*Context), "tmp"); } else if (UIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) - LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); + LdExpArg = B.CreateZExt(OpC->getOperand(0), + Type::getInt32Ty(*Context), "tmp"); } if (LdExpArg) { const char *Name; - if (Op->getType() == Type::FloatTy) + if (Op->getType()->isFloatTy()) Name = "ldexpf"; - else if (Op->getType() == Type::DoubleTy) + else if (Op->getType()->isDoubleTy()) Name = "ldexp"; else Name = "ldexpl"; - Constant *One = Context->getConstantFP(APFloat(1.0f)); - if (Op->getType() != Type::FloatTy) - One = Context->getConstantExprFPExtend(One, Op->getType()); + Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); + if (!Op->getType()->isFloatTy()) + One = ConstantExpr::getFPExtend(One, Op->getType()); Module *M = Caller->getParent(); Value *Callee = M->getOrInsertFunction(Name, Op->getType(), - Op->getType(), Type::Int32Ty,NULL); + Op->getType(), + Type::getInt32Ty(*Context),NULL); CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); if (const Function *F = dyn_cast(Callee->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); @@ -1113,22 +1127,23 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { //===---------------------------------------===// // Double -> Float Shrinking Optimizations for Unary Functions like 'floor' -struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization { +struct UnaryDoubleFPOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy || - FT->getParamType(0) != Type::DoubleTy) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) return 0; // If this is something like 'floor((double)floatval)', convert to floorf. FPExtInst *Cast = dyn_cast(CI->getOperand(1)); - if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy) + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) return 0; // floor((double)floatval) -> (double)floorf(floatval) Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getNameStart(), B); - return B.CreateFPExt(V, Type::DoubleTy); + V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B, + Callee->getAttributes()); + return B.CreateFPExt(V, Type::getDoubleTy(*Context)); } }; @@ -1139,54 +1154,56 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization { //===---------------------------------------===// // 'ffs*' Optimizations -struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization { +struct FFSOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // Just make sure this has 2 arguments of the same FP type, which match the // result type. - if (FT->getNumParams() != 1 || FT->getReturnType() != Type::Int32Ty || + if (FT->getNumParams() != 1 || + FT->getReturnType() != Type::getInt32Ty(*Context) || !isa(FT->getParamType(0))) return 0; - + Value *Op = CI->getOperand(1); - + // Constant fold. if (ConstantInt *CI = dyn_cast(Op)) { if (CI->getValue() == 0) // ffs(0) -> 0. - return Context->getNullValue(CI->getType()); - return Context->getConstantInt(Type::Int32Ty, // ffs(c) -> cttz(c)+1 + return Constant::getNullValue(CI->getType()); + return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1 CI->getValue().countTrailingZeros()+1); } - + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 const Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, &ArgType, 1); Value *V = B.CreateCall(F, Op, "cttz"); - V = B.CreateAdd(V, Context->getConstantInt(V->getType(), 1), "tmp"); - V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp"); - - Value *Cond = B.CreateICmpNE(Op, Context->getNullValue(ArgType), "tmp"); - return B.CreateSelect(Cond, V, Context->getConstantInt(Type::Int32Ty, 0)); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); + V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp"); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); + return B.CreateSelect(Cond, V, + ConstantInt::get(Type::getInt32Ty(*Context), 0)); } }; //===---------------------------------------===// // 'isdigit' Optimizations -struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization { +struct IsDigitOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || - FT->getParamType(0) != Type::Int32Ty) + FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isdigit(c) -> (c-'0') getOperand(1); - Op = B.CreateSub(Op, Context->getConstantInt(Type::Int32Ty, '0'), + Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), "isdigittmp"); - Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 10), + Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), "isdigit"); return B.CreateZExt(Op, CI->getType()); } @@ -1195,58 +1212,58 @@ struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization { //===---------------------------------------===// // 'isascii' Optimizations -struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization { +struct IsAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(i32) if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || - FT->getParamType(0) != Type::Int32Ty) + FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isascii(c) -> c getOperand(1); - Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 128), + Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128), "isascii"); return B.CreateZExt(Op, CI->getType()); } }; - + //===---------------------------------------===// // 'abs', 'labs', 'llabs' Optimizations -struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization { +struct AbsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require integer(integer) where the types agree. if (FT->getNumParams() != 1 || !isa(FT->getReturnType()) || FT->getParamType(0) != FT->getReturnType()) return 0; - + // abs(x) -> x >s -1 ? x : -x Value *Op = CI->getOperand(1); - Value *Pos = B.CreateICmpSGT(Op, - Context->getConstantIntAllOnesValue(Op->getType()), + Value *Pos = B.CreateICmpSGT(Op, + Constant::getAllOnesValue(Op->getType()), "ispos"); Value *Neg = B.CreateNeg(Op, "neg"); return B.CreateSelect(Pos, Op, Neg); } }; - + //===---------------------------------------===// // 'toascii' Optimizations -struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization { +struct ToAsciiOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { const FunctionType *FT = Callee->getFunctionType(); // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != Type::Int32Ty) + FT->getParamType(0) != Type::getInt32Ty(*Context)) return 0; - + // isascii(c) -> c & 0x7f return B.CreateAnd(CI->getOperand(1), - Context->getConstantInt(CI->getType(),0x7F)); + ConstantInt::get(CI->getType(),0x7F)); } }; @@ -1257,15 +1274,15 @@ struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization { //===---------------------------------------===// // 'printf' Optimizations -struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization { +struct PrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require one fixed pointer argument and an integer/void result. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() < 1 || !isa(FT->getParamType(0)) || !(isa(FT->getReturnType()) || - FT->getReturnType() == Type::VoidTy)) + FT->getReturnType()->isVoidTy())) return 0; - + // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) @@ -1273,39 +1290,39 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization { // Empty format string -> noop. if (FormatStr.empty()) // Tolerate printf's declared void. - return CI->use_empty() ? (Value*)CI : - Context->getConstantInt(CI->getType(), 0); - + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 0); + // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - EmitPutChar(Context->getConstantInt(Type::Int32Ty, FormatStr[0]), B); - return CI->use_empty() ? (Value*)CI : - Context->getConstantInt(CI->getType(), 1); + EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B); + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 1); } - + // printf("foo\n") --> puts("foo") if (FormatStr[FormatStr.size()-1] == '\n' && FormatStr.find('%') == std::string::npos) { // no format characters. // Create a string literal with no \n on it. We expect the constant merge // pass to be run after this pass, to merge duplicate strings. FormatStr.erase(FormatStr.end()-1); - Constant *C = Context->getConstantArray(FormatStr, true); - C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage, - C, "str", Callee->getParent()); + Constant *C = ConstantArray::get(*Context, FormatStr, true); + C = new GlobalVariable(*Callee->getParent(), C->getType(), true, + GlobalVariable::InternalLinkage, C, "str"); EmitPutS(C, B); - return CI->use_empty() ? (Value*)CI : - Context->getConstantInt(CI->getType(), FormatStr.size()+1); + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); } - + // Optimize specific format strings. // printf("%c", chr) --> putchar(*(i8*)dst) if (FormatStr == "%c" && CI->getNumOperands() > 2 && isa(CI->getOperand(2)->getType())) { EmitPutChar(CI->getOperand(2), B); - return CI->use_empty() ? (Value*)CI : - Context->getConstantInt(CI->getType(), 1); + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 1); } - + // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && isa(CI->getOperand(2)->getType()) && @@ -1320,7 +1337,7 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization { //===---------------------------------------===// // 'sprintf' Optimizations -struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { +struct SPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed pointer arguments and an integer result. const FunctionType *FT = Callee->getFunctionType(); @@ -1333,7 +1350,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) return 0; - + // If we just have a format string (nothing else crazy) transform it. if (CI->getNumOperands() == 3) { // Make sure there's no % in the constant array. We could try to handle @@ -1341,41 +1358,49 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') return 0; // we found a format specifier, bail out. - + + // These optimizations require TargetData. + if (!TD) return 0; + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. - Context->getConstantInt(TD->getIntPtrType(), FormatStr.size()+1),1,B); - return Context->getConstantInt(CI->getType(), FormatStr.size()); + ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); + return ConstantInt::get(CI->getType(), FormatStr.size()); } - + // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) return 0; - + // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 if (!isa(CI->getOperand(3)->getType())) return 0; - Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char"); + Value *V = B.CreateTrunc(CI->getOperand(3), + Type::getInt8Ty(*Context), "char"); Value *Ptr = CastToCStr(CI->getOperand(1), B); B.CreateStore(V, Ptr); - Ptr = B.CreateGEP(Ptr, Context->getConstantInt(Type::Int32Ty, 1), "nul"); - B.CreateStore(Context->getNullValue(Type::Int8Ty), Ptr); - - return Context->getConstantInt(CI->getType(), 1); + Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), + "nul"); + B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr); + + return ConstantInt::get(CI->getType(), 1); } - + if (FormatStr[1] == 's') { + // These optimizations require TargetData. + if (!TD) return 0; + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!isa(CI->getOperand(3)->getType())) return 0; Value *Len = EmitStrLen(CI->getOperand(3), B); Value *IncLen = B.CreateAdd(Len, - Context->getConstantInt(Len->getType(), 1), + ConstantInt::get(Len->getType(), 1), "leninc"); EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B); - + // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); } @@ -1386,7 +1411,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { //===---------------------------------------===// // 'fwrite' Optimizations -struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization { +struct FWriteOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require a pointer, an integer, an integer, a pointer, returning integer. const FunctionType *FT = Callee->getFunctionType(); @@ -1396,22 +1421,22 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization { !isa(FT->getParamType(3)) || !isa(FT->getReturnType())) return 0; - + // Get the element size and count. ConstantInt *SizeC = dyn_cast(CI->getOperand(2)); ConstantInt *CountC = dyn_cast(CI->getOperand(3)); if (!SizeC || !CountC) return 0; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); - + // If this is writing zero records, remove the call (it's a noop). if (Bytes == 0) - return Context->getConstantInt(CI->getType(), 0); - + return ConstantInt::get(CI->getType(), 0); + // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); EmitFPutC(Char, CI->getOperand(4), B); - return Context->getConstantInt(CI->getType(), 1); + return ConstantInt::get(CI->getType(), 1); } return 0; @@ -1421,20 +1446,23 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization { //===---------------------------------------===// // 'fputs' Optimizations -struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization { +struct FPutsOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + // Require two pointers. Also, we can't optimize if return value is used. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !isa(FT->getParamType(0)) || !isa(FT->getParamType(1)) || !CI->use_empty()) return 0; - + // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getOperand(1)); if (!Len) return 0; EmitFWrite(CI->getOperand(1), - Context->getConstantInt(TD->getIntPtrType(), Len-1), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), CI->getOperand(2), B); return CI; // Known to have no uses (see above). } @@ -1443,7 +1471,7 @@ struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization { //===---------------------------------------===// // 'fprintf' Optimizations -struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { +struct FPrintFOpt : public LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { // Require two fixed paramters as pointers and integer result. const FunctionType *FT = Callee->getFunctionType(); @@ -1451,7 +1479,7 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { !isa(FT->getParamType(1)) || !isa(FT->getReturnType())) return 0; - + // All the optimizations depend on the format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) @@ -1462,26 +1490,29 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return 0; // We found a format specifier. - - EmitFWrite(CI->getOperand(2), Context->getConstantInt(TD->getIntPtrType(), + + // These optimizations require TargetData. + if (!TD) return 0; + + EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), CI->getOperand(1), B); - return Context->getConstantInt(CI->getType(), FormatStr.size()); + return ConstantInt::get(CI->getType(), FormatStr.size()); } - + // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) return 0; - + // Decode the second character of the format string. if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> *(i8*)dst = chr if (!isa(CI->getOperand(3)->getType())) return 0; EmitFPutC(CI->getOperand(3), CI->getOperand(1), B); - return Context->getConstantInt(CI->getType(), 1); + return ConstantInt::get(CI->getType(), 1); } - + if (FormatStr[1] == 's') { // fprintf(F, "%s", str) -> fputs(str, F) if (!isa(CI->getOperand(3)->getType()) || !CI->use_empty()) @@ -1502,10 +1533,8 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { namespace { /// This pass optimizes well known library functions from libc and libm. /// - class VISIBILITY_HIDDEN SimplifyLibCalls : public FunctionPass { + class SimplifyLibCalls : public FunctionPass { StringMap Optimizations; - // Miscellaneous LibCall Optimizations - ExitOpt Exit; // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; @@ -1536,7 +1565,6 @@ namespace { bool doInitialization(Module &M); virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); } }; char SimplifyLibCalls::ID = 0; @@ -1547,15 +1575,12 @@ X("simplify-libcalls", "Simplify well-known library calls"); // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { - return new SimplifyLibCalls(); + return new SimplifyLibCalls(); } /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { - // Miscellaneous LibCall Optimizations - Optimizations["exit"] = &Exit; - // String and Memory LibCall Optimizations Optimizations["strcat"] = &StrCat; Optimizations["strncat"] = &StrNCat; @@ -1576,7 +1601,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; Optimizations["memset"] = &MemSet; - + // Math Library Optimizations Optimizations["powf"] = &Pow; Optimizations["pow"] = &Pow; @@ -1594,7 +1619,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f80"] = &Exp2; Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; - + #ifdef HAVE_FLOORF Optimizations["floor"] = &UnaryDoubleFP; #endif @@ -1610,7 +1635,7 @@ void SimplifyLibCalls::InitOptimizations() { #ifdef HAVE_NEARBYINTF Optimizations["nearbyint"] = &UnaryDoubleFP; #endif - + // Integer Optimizations Optimizations["ffs"] = &FFS; Optimizations["ffsl"] = &FFS; @@ -1621,7 +1646,7 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["isdigit"] = &IsDigit; Optimizations["isascii"] = &IsAscii; Optimizations["toascii"] = &ToAscii; - + // Formatting and IO Optimizations Optimizations["sprintf"] = &SPrintF; Optimizations["printf"] = &PrintF; @@ -1636,10 +1661,10 @@ void SimplifyLibCalls::InitOptimizations() { bool SimplifyLibCalls::runOnFunction(Function &F) { if (Optimizations.empty()) InitOptimizations(); - - const TargetData &TD = getAnalysis(); - - IRBuilder<> Builder; + + const TargetData *TD = getAnalysisIfAvailable(); + + IRBuilder<> Builder(F.getContext()); bool Changed = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -1647,37 +1672,35 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { // Ignore non-calls. CallInst *CI = dyn_cast(I++); if (!CI) continue; - + // Ignore indirect calls and calls to non-external functions. Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration() || !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage())) continue; - + // Ignore unknown calls. - const char *CalleeName = Callee->getNameStart(); - StringMap::iterator OMI = - Optimizations.find(CalleeName, CalleeName+Callee->getNameLen()); - if (OMI == Optimizations.end()) continue; - + LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); + if (!LCO) continue; + // Set the builder to the instruction after the call. Builder.SetInsertPoint(BB, I); - + // Try to optimize this call. - Value *Result = OMI->second->OptimizeCall(CI, TD, Builder); + Value *Result = LCO->OptimizeCall(CI, TD, Builder); if (Result == 0) continue; - DEBUG(DOUT << "SimplifyLibCalls simplified: " << *CI; - DOUT << " into: " << *Result << "\n"); - + DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI; + errs() << " into: " << *Result << "\n"); + // Something changed! Changed = true; ++NumSimplified; - + // Inspect the instruction after the call (which was potentially just // added) next. I = CI; ++I; - + if (CI != Result && !CI->use_empty()) { CI->replaceAllUsesWith(Result); if (!Result->hasName()) @@ -1736,40 +1759,39 @@ bool SimplifyLibCalls::doInitialization(Module &M) { if (!F.isDeclaration()) continue; - unsigned NameLen = F.getNameLen(); - if (!NameLen) + if (!F.hasName()) continue; const FunctionType *FTy = F.getFunctionType(); - const char *NameStr = F.getNameStart(); - switch (NameStr[0]) { + StringRef Name = F.getName(); + switch (Name[0]) { case 's': - if (NameLen == 6 && !strcmp(NameStr, "strlen")) { + if (Name == "strlen") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 6 && !strcmp(NameStr, "strcpy")) || - (NameLen == 6 && !strcmp(NameStr, "stpcpy")) || - (NameLen == 6 && !strcmp(NameStr, "strcat")) || - (NameLen == 6 && !strcmp(NameStr, "strtol")) || - (NameLen == 6 && !strcmp(NameStr, "strtod")) || - (NameLen == 6 && !strcmp(NameStr, "strtof")) || - (NameLen == 7 && !strcmp(NameStr, "strtoul")) || - (NameLen == 7 && !strcmp(NameStr, "strtoll")) || - (NameLen == 7 && !strcmp(NameStr, "strtold")) || - (NameLen == 7 && !strcmp(NameStr, "strncat")) || - (NameLen == 7 && !strcmp(NameStr, "strncpy")) || - (NameLen == 8 && !strcmp(NameStr, "strtoull"))) { + } else if (Name == "strcpy" || + Name == "stpcpy" || + Name == "strcat" || + Name == "strtol" || + Name == "strtod" || + Name == "strtof" || + Name == "strtoul" || + Name == "strtoll" || + Name == "strtold" || + Name == "strncat" || + Name == "strncpy" || + Name == "strtoull") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 7 && !strcmp(NameStr, "strxfrm")) { + } else if (Name == "strxfrm") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -1777,13 +1799,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if ((NameLen == 6 && !strcmp(NameStr, "strcmp")) || - (NameLen == 6 && !strcmp(NameStr, "strspn")) || - (NameLen == 7 && !strcmp(NameStr, "strncmp")) || - (NameLen == 7 && !strcmp(NameStr, "strcspn")) || - (NameLen == 7 && !strcmp(NameStr, "strcoll")) || - (NameLen == 10 && !strcmp(NameStr, "strcasecmp")) || - (NameLen == 11 && !strcmp(NameStr, "strncasecmp"))) { + } else if (Name == "strcmp" || + Name == "strspn" || + Name == "strncmp" || + Name ==" strcspn" || + Name == "strcoll" || + Name == "strcasecmp" || + Name == "strncasecmp") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -1792,31 +1814,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if ((NameLen == 6 && !strcmp(NameStr, "strstr")) || - (NameLen == 7 && !strcmp(NameStr, "strpbrk"))) { + } else if (Name == "strstr" || + Name == "strpbrk") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(1))) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if ((NameLen == 6 && !strcmp(NameStr, "strtok")) || - (NameLen == 8 && !strcmp(NameStr, "strtok_r"))) { + } else if (Name == "strtok" || + Name == "strtok_r") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if ((NameLen == 5 && !strcmp(NameStr, "scanf")) || - (NameLen == 6 && !strcmp(NameStr, "setbuf")) || - (NameLen == 7 && !strcmp(NameStr, "setvbuf"))) { + } else if (Name == "scanf" || + Name == "setbuf" || + Name == "setvbuf") { if (FTy->getNumParams() < 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 6 && !strcmp(NameStr, "strdup")) || - (NameLen == 7 && !strcmp(NameStr, "strndup"))) { + } else if (Name == "strdup" || + Name == "strndup") { if (FTy->getNumParams() < 1 || !isa(FTy->getReturnType()) || !isa(FTy->getParamType(0))) @@ -1824,10 +1846,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); - } else if ((NameLen == 4 && !strcmp(NameStr, "stat")) || - (NameLen == 6 && !strcmp(NameStr, "sscanf")) || - (NameLen == 7 && !strcmp(NameStr, "sprintf")) || - (NameLen == 7 && !strcmp(NameStr, "statvfs"))) { + } else if (Name == "stat" || + Name == "sscanf" || + Name == "sprintf" || + Name == "statvfs") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -1835,7 +1857,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "snprintf")) { + } else if (Name == "snprintf") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(2))) @@ -1843,7 +1865,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 3); - } else if (NameLen == 9 && !strcmp(NameStr, "setitimer")) { + } else if (Name == "setitimer") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(1)) || !isa(FTy->getParamType(2))) @@ -1851,7 +1873,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 2); setDoesNotCapture(F, 3); - } else if (NameLen == 6 && !strcmp(NameStr, "system")) { + } else if (Name == "system") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; @@ -1860,7 +1882,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'm': - if (NameLen == 6 && !strcmp(NameStr, "memcmp")) { + if (Name == "malloc") { + if (FTy->getNumParams() != 1 || + !isa(FTy->getReturnType())) + continue; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "memcmp") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -1869,29 +1897,29 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if ((NameLen == 6 && !strcmp(NameStr, "memchr")) || - (NameLen == 7 && !strcmp(NameStr, "memrchr"))) { + } else if (Name == "memchr" || + Name == "memrchr") { if (FTy->getNumParams() != 3) continue; setOnlyReadsMemory(F); setDoesNotThrow(F); - } else if ((NameLen == 4 && !strcmp(NameStr, "modf")) || - (NameLen == 5 && !strcmp(NameStr, "modff")) || - (NameLen == 5 && !strcmp(NameStr, "modfl")) || - (NameLen == 6 && !strcmp(NameStr, "memcpy")) || - (NameLen == 7 && !strcmp(NameStr, "memccpy")) || - (NameLen == 7 && !strcmp(NameStr, "memmove"))) { + } else if (Name == "modf" || + Name == "modff" || + Name == "modfl" || + Name == "memcpy" || + Name == "memccpy" || + Name == "memmove") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "memalign")) { + } else if (Name == "memalign") { if (!isa(FTy->getReturnType())) continue; setDoesNotAlias(F, 0); - } else if ((NameLen == 5 && !strcmp(NameStr, "mkdir")) || - (NameLen == 6 && !strcmp(NameStr, "mktime"))) { + } else if (Name == "mkdir" || + Name == "mktime") { if (FTy->getNumParams() == 0 || !isa(FTy->getParamType(0))) continue; @@ -1900,7 +1928,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'r': - if (NameLen == 7 && !strcmp(NameStr, "realloc")) { + if (Name == "realloc") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(0)) || !isa(FTy->getReturnType())) @@ -1908,23 +1936,23 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); - } else if (NameLen == 4 && !strcmp(NameStr, "read")) { + } else if (Name == "read") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(1))) continue; // May throw; "read" is a valid pthread cancellation point. setDoesNotCapture(F, 2); - } else if ((NameLen == 5 && !strcmp(NameStr, "rmdir")) || - (NameLen == 6 && !strcmp(NameStr, "rewind")) || - (NameLen == 6 && !strcmp(NameStr, "remove")) || - (NameLen == 8 && !strcmp(NameStr, "realpath"))) { + } else if (Name == "rmdir" || + Name == "rewind" || + Name == "remove" || + Name == "realpath") { if (FTy->getNumParams() < 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 6 && !strcmp(NameStr, "rename")) || - (NameLen == 8 && !strcmp(NameStr, "readlink"))) { + } else if (Name == "rename" || + Name == "readlink") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -1935,7 +1963,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'w': - if (NameLen == 5 && !strcmp(NameStr, "write")) { + if (Name == "write") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(1))) continue; @@ -1944,7 +1972,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'b': - if (NameLen == 5 && !strcmp(NameStr, "bcopy")) { + if (Name == "bcopy") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -1952,7 +1980,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 4 && !strcmp(NameStr, "bcmp")) { + } else if (Name == "bcmp") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -1961,7 +1989,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setOnlyReadsMemory(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 5 && !strcmp(NameStr, "bzero")) { + } else if (Name == "bzero") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(0))) continue; @@ -1970,17 +1998,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'c': - if (NameLen == 6 && !strcmp(NameStr, "calloc")) { + if (Name == "calloc") { if (FTy->getNumParams() != 2 || !isa(FTy->getReturnType())) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); - } else if ((NameLen == 5 && !strcmp(NameStr, "chmod")) || - (NameLen == 5 && !strcmp(NameStr, "chown")) || - (NameLen == 7 && !strcmp(NameStr, "ctermid")) || - (NameLen == 8 && !strcmp(NameStr, "clearerr")) || - (NameLen == 8 && !strcmp(NameStr, "closedir"))) { + } else if (Name == "chmod" || + Name == "chown" || + Name == "ctermid" || + Name == "clearerr" || + Name == "closedir") { if (FTy->getNumParams() == 0 || !isa(FTy->getParamType(0))) continue; @@ -1989,17 +2017,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'a': - if ((NameLen == 4 && !strcmp(NameStr, "atoi")) || - (NameLen == 4 && !strcmp(NameStr, "atol")) || - (NameLen == 4 && !strcmp(NameStr, "atof")) || - (NameLen == 5 && !strcmp(NameStr, "atoll"))) { + if (Name == "atoi" || + Name == "atol" || + Name == "atof" || + Name == "atoll") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setOnlyReadsMemory(F); setDoesNotCapture(F, 1); - } else if (NameLen == 6 && !strcmp(NameStr, "access")) { + } else if (Name == "access") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(0))) continue; @@ -2008,7 +2036,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'f': - if (NameLen == 5 && !strcmp(NameStr, "fopen")) { + if (Name == "fopen") { if (FTy->getNumParams() != 2 || !isa(FTy->getReturnType()) || !isa(FTy->getParamType(0)) || @@ -2018,7 +2046,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 6 && !strcmp(NameStr, "fdopen")) { + } else if (Name == "fdopen") { if (FTy->getNumParams() != 2 || !isa(FTy->getReturnType()) || !isa(FTy->getParamType(1))) @@ -2026,52 +2054,52 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 2); - } else if ((NameLen == 4 && !strcmp(NameStr, "feof")) || - (NameLen == 4 && !strcmp(NameStr, "free")) || - (NameLen == 5 && !strcmp(NameStr, "fseek")) || - (NameLen == 5 && !strcmp(NameStr, "ftell")) || - (NameLen == 5 && !strcmp(NameStr, "fgetc")) || - (NameLen == 6 && !strcmp(NameStr, "fseeko")) || - (NameLen == 6 && !strcmp(NameStr, "ftello")) || - (NameLen == 6 && !strcmp(NameStr, "fileno")) || - (NameLen == 6 && !strcmp(NameStr, "fflush")) || - (NameLen == 6 && !strcmp(NameStr, "fclose")) || - (NameLen == 7 && !strcmp(NameStr, "fsetpos")) || - (NameLen == 9 && !strcmp(NameStr, "flockfile")) || - (NameLen == 11 && !strcmp(NameStr, "funlockfile")) || - (NameLen == 12 && !strcmp(NameStr, "ftrylockfile"))) { + } else if (Name == "feof" || + Name == "free" || + Name == "fseek" || + Name == "ftell" || + Name == "fgetc" || + Name == "fseeko" || + Name == "ftello" || + Name == "fileno" || + Name == "fflush" || + Name == "fclose" || + Name == "fsetpos" || + Name == "flockfile" || + Name == "funlockfile" || + Name == "ftrylockfile") { if (FTy->getNumParams() == 0 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if (NameLen == 6 && !strcmp(NameStr, "ferror")) { + } else if (Name == "ferror") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); setOnlyReadsMemory(F); - } else if ((NameLen == 5 && !strcmp(NameStr, "fputc")) || - (NameLen == 5 && !strcmp(NameStr, "fstat")) || - (NameLen == 5 && !strcmp(NameStr, "frexp")) || - (NameLen == 6 && !strcmp(NameStr, "frexpf")) || - (NameLen == 6 && !strcmp(NameStr, "frexpl")) || - (NameLen == 8 && !strcmp(NameStr, "fstatvfs"))) { + } else if (Name == "fputc" || + Name == "fstat" || + Name == "frexp" || + Name == "frexpf" || + Name == "frexpl" || + Name == "fstatvfs") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 5 && !strcmp(NameStr, "fgets")) { + } else if (Name == "fgets") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(2))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 3); - } else if ((NameLen == 5 && !strcmp(NameStr, "fread")) || - (NameLen == 6 && !strcmp(NameStr, "fwrite"))) { + } else if (Name == "fread" || + Name == "fwrite") { if (FTy->getNumParams() != 4 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(3))) @@ -2079,10 +2107,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 4); - } else if ((NameLen == 5 && !strcmp(NameStr, "fputs")) || - (NameLen == 6 && !strcmp(NameStr, "fscanf")) || - (NameLen == 7 && !strcmp(NameStr, "fprintf")) || - (NameLen == 7 && !strcmp(NameStr, "fgetpos"))) { + } else if (Name == "fputs" || + Name == "fscanf" || + Name == "fprintf" || + Name == "fgetpos") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -2093,31 +2121,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'g': - if ((NameLen == 4 && !strcmp(NameStr, "getc")) || - (NameLen == 10 && !strcmp(NameStr, "getlogin_r")) || - (NameLen == 13 && !strcmp(NameStr, "getc_unlocked"))) { + if (Name == "getc" || + Name == "getlogin_r" || + Name == "getc_unlocked") { if (FTy->getNumParams() == 0 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if (NameLen == 6 && !strcmp(NameStr, "getenv")) { + } else if (Name == "getenv") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setOnlyReadsMemory(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 4 && !strcmp(NameStr, "gets")) || - (NameLen == 7 && !strcmp(NameStr, "getchar"))) { + } else if (Name == "gets" || + Name == "getchar") { setDoesNotThrow(F); - } else if (NameLen == 9 && !strcmp(NameStr, "getitimer")) { + } else if (Name == "getitimer") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "getpwnam")) { + } else if (Name == "getpwnam") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; @@ -2126,22 +2154,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'u': - if (NameLen == 6 && !strcmp(NameStr, "ungetc")) { + if (Name == "ungetc") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if ((NameLen == 5 && !strcmp(NameStr, "uname")) || - (NameLen == 6 && !strcmp(NameStr, "unlink")) || - (NameLen == 8 && !strcmp(NameStr, "unsetenv"))) { + } else if (Name == "uname" || + Name == "unlink" || + Name == "unsetenv") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 5 && !strcmp(NameStr, "utime")) || - (NameLen == 6 && !strcmp(NameStr, "utimes"))) { + } else if (Name == "utime" || + Name == "utimes") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -2152,30 +2180,30 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'p': - if (NameLen == 4 && !strcmp(NameStr, "putc")) { + if (Name == "putc") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if ((NameLen == 4 && !strcmp(NameStr, "puts")) || - (NameLen == 6 && !strcmp(NameStr, "printf")) || - (NameLen == 6 && !strcmp(NameStr, "perror"))) { + } else if (Name == "puts" || + Name == "printf" || + Name == "perror") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 5 && !strcmp(NameStr, "pread")) || - (NameLen == 6 && !strcmp(NameStr, "pwrite"))) { + } else if (Name == "pread" || + Name == "pwrite") { if (FTy->getNumParams() != 4 || !isa(FTy->getParamType(1))) continue; // May throw; these are valid pthread cancellation points. setDoesNotCapture(F, 2); - } else if (NameLen == 7 && !strcmp(NameStr, "putchar")) { + } else if (Name == "putchar") { setDoesNotThrow(F); - } else if (NameLen == 5 && !strcmp(NameStr, "popen")) { + } else if (Name == "popen") { if (FTy->getNumParams() != 2 || !isa(FTy->getReturnType()) || !isa(FTy->getParamType(0)) || @@ -2185,7 +2213,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 6 && !strcmp(NameStr, "pclose")) { + } else if (Name == "pclose") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; @@ -2194,14 +2222,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'v': - if (NameLen == 6 && !strcmp(NameStr, "vscanf")) { + if (Name == "vscanf") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 7 && !strcmp(NameStr, "vsscanf")) || - (NameLen == 7 && !strcmp(NameStr, "vfscanf"))) { + } else if (Name == "vsscanf" || + Name == "vfscanf") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(1)) || !isa(FTy->getParamType(2))) @@ -2209,19 +2237,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 6 && !strcmp(NameStr, "valloc")) { + } else if (Name == "valloc") { if (!isa(FTy->getReturnType())) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); - } else if (NameLen == 7 && !strcmp(NameStr, "vprintf")) { + } else if (Name == "vprintf") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 8 && !strcmp(NameStr, "vfprintf")) || - (NameLen == 8 && !strcmp(NameStr, "vsprintf"))) { + } else if (Name == "vfprintf" || + Name == "vsprintf") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -2229,7 +2257,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 9 && !strcmp(NameStr, "vsnprintf")) { + } else if (Name == "vsnprintf") { if (FTy->getNumParams() != 4 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(2))) @@ -2240,13 +2268,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'o': - if (NameLen == 4 && !strcmp(NameStr, "open")) { + if (Name == "open") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(0))) continue; // May throw; "open" is a valid pthread cancellation point. setDoesNotCapture(F, 1); - } else if (NameLen == 7 && !strcmp(NameStr, "opendir")) { + } else if (Name == "opendir") { if (FTy->getNumParams() != 1 || !isa(FTy->getReturnType()) || !isa(FTy->getParamType(0))) @@ -2257,12 +2285,12 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 't': - if (NameLen == 7 && !strcmp(NameStr, "tmpfile")) { + if (Name == "tmpfile") { if (!isa(FTy->getReturnType())) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); - } else if (NameLen == 5 && !strcmp(NameStr, "times")) { + } else if (Name == "times") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; @@ -2271,21 +2299,21 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'h': - if ((NameLen == 5 && !strcmp(NameStr, "htonl")) || - (NameLen == 5 && !strcmp(NameStr, "htons"))) { + if (Name == "htonl" || + Name == "htons") { setDoesNotThrow(F); setDoesNotAccessMemory(F); } break; case 'n': - if ((NameLen == 5 && !strcmp(NameStr, "ntohl")) || - (NameLen == 5 && !strcmp(NameStr, "ntohs"))) { + if (Name == "ntohl" || + Name == "ntohs") { setDoesNotThrow(F); setDoesNotAccessMemory(F); } break; case 'l': - if (NameLen == 5 && !strcmp(NameStr, "lstat")) { + if (Name == "lstat") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -2293,7 +2321,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 6 && !strcmp(NameStr, "lchown")) { + } else if (Name == "lchown") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(0))) continue; @@ -2302,7 +2330,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 'q': - if (NameLen == 5 && !strcmp(NameStr, "qsort")) { + if (Name == "qsort") { if (FTy->getNumParams() != 4 || !isa(FTy->getParamType(3))) continue; @@ -2311,8 +2339,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case '_': - if ((NameLen == 8 && !strcmp(NameStr, "__strdup")) || - (NameLen == 9 && !strcmp(NameStr, "__strndup"))) { + if (Name == "__strdup" || + Name == "__strndup") { if (FTy->getNumParams() < 1 || !isa(FTy->getReturnType()) || !isa(FTy->getParamType(0))) @@ -2320,19 +2348,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); - } else if (NameLen == 10 && !strcmp(NameStr, "__strtok_r")) { + } else if (Name == "__strtok_r") { if (FTy->getNumParams() != 3 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "_IO_getc")) { + } else if (Name == "_IO_getc") { if (FTy->getNumParams() != 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if (NameLen == 8 && !strcmp(NameStr, "_IO_putc")) { + } else if (Name == "_IO_putc") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(1))) continue; @@ -2341,16 +2369,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } break; case 1: - if (NameLen == 15 && !strcmp(NameStr, "\1__isoc99_scanf")) { + if (Name == "\1__isoc99_scanf") { if (FTy->getNumParams() < 1 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if ((NameLen == 7 && !strcmp(NameStr, "\1stat64")) || - (NameLen == 8 && !strcmp(NameStr, "\1lstat64")) || - (NameLen == 10 && !strcmp(NameStr, "\1statvfs64")) || - (NameLen == 16 && !strcmp(NameStr, "\1__isoc99_sscanf"))) { + } else if (Name == "\1stat64" || + Name == "\1lstat64" || + Name == "\1statvfs64" || + Name == "\1__isoc99_sscanf") { if (FTy->getNumParams() < 1 || !isa(FTy->getParamType(0)) || !isa(FTy->getParamType(1))) @@ -2358,7 +2386,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotThrow(F); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if (NameLen == 8 && !strcmp(NameStr, "\1fopen64")) { + } else if (Name == "\1fopen64") { if (FTy->getNumParams() != 2 || !isa(FTy->getReturnType()) || !isa(FTy->getParamType(0)) || @@ -2368,26 +2396,26 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setDoesNotAlias(F, 0); setDoesNotCapture(F, 1); setDoesNotCapture(F, 2); - } else if ((NameLen == 9 && !strcmp(NameStr, "\1fseeko64")) || - (NameLen == 9 && !strcmp(NameStr, "\1ftello64"))) { + } else if (Name == "\1fseeko64" || + Name == "\1ftello64") { if (FTy->getNumParams() == 0 || !isa(FTy->getParamType(0))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 1); - } else if (NameLen == 10 && !strcmp(NameStr, "\1tmpfile64")) { + } else if (Name == "\1tmpfile64") { if (!isa(FTy->getReturnType())) continue; setDoesNotThrow(F); setDoesNotAlias(F, 0); - } else if ((NameLen == 8 && !strcmp(NameStr, "\1fstat64")) || - (NameLen == 11 && !strcmp(NameStr, "\1fstatvfs64"))) { + } else if (Name == "\1fstat64" || + Name == "\1fstatvfs64") { if (FTy->getNumParams() != 2 || !isa(FTy->getParamType(1))) continue; setDoesNotThrow(F); setDoesNotCapture(F, 2); - } else if (NameLen == 7 && !strcmp(NameStr, "\1open64")) { + } else if (Name == "\1open64") { if (FTy->getNumParams() < 2 || !isa(FTy->getParamType(0))) continue; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index c037ee9603177..68689d6f13b7d 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -30,8 +30,8 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/SmallPtrSet.h" #include @@ -45,7 +45,7 @@ TailDupThreshold("taildup-threshold", cl::init(1), cl::Hidden); namespace { - class VISIBILITY_HIDDEN TailDup : public FunctionPass { + class TailDup : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid @@ -128,7 +128,7 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI, // other instructions. if (isa(I) || isa(I)) return false; - // Allso alloca and malloc. + // Also alloca and malloc. if (isa(I)) return false; // Some vector instructions can expand into a number of instructions. @@ -243,13 +243,13 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { BasicBlock *DestBlock = Branch->getSuccessor(0); assert(SourceBlock != DestBlock && "Our predicate is broken!"); - DOUT << "TailDuplication[" << SourceBlock->getParent()->getName() - << "]: Eliminating branch: " << *Branch; + DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName() + << "]: Eliminating branch: " << *Branch); // See if we can avoid duplicating code by moving it up to a dominator of both // blocks. if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { - DOUT << "Found shared dominator: " << DomBlock->getName() << "\n"; + DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n"); // If there are non-phi instructions in DestBlock that have no operands // defined in DestBlock, and if the instruction has no side effects, we can @@ -258,7 +258,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { while (!isa(BBI)) { Instruction *I = BBI++; - bool CanHoist = !I->isTrapping() && !I->mayHaveSideEffects(); + bool CanHoist = I->isSafeToSpeculativelyExecute() && + !I->mayReadFromMemory(); if (CanHoist) { for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) if (Instruction *OpI = dyn_cast(I->getOperand(op))) @@ -271,7 +272,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { // Remove from DestBlock, move right before the term in DomBlock. DestBlock->getInstList().remove(I); DomBlock->getInstList().insert(DomBlock->getTerminator(), I); - DOUT << "Hoisted: " << *I; + DEBUG(errs() << "Hoisted: " << *I); } } } @@ -358,7 +359,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) { Instruction *Inst = BI++; if (isInstructionTriviallyDead(Inst)) Inst->eraseFromParent(); - else if (Constant *C = ConstantFoldInstruction(Inst)) { + else if (Constant *C = ConstantFoldInstruction(Inst, + Inst->getContext())) { Inst->replaceAllUsesWith(C); Inst->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 34ee57c9b9dca..b56e17040db27 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -60,14 +60,13 @@ #include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" using namespace llvm; STATISTIC(NumEliminated, "Number of tail calls removed"); STATISTIC(NumAccumAdded, "Number of accumulators introduced"); namespace { - struct VISIBILITY_HIDDEN TailCallElim : public FunctionPass { + struct TailCallElim : public FunctionPass { static char ID; // Pass identification, replacement for typeid TailCallElim() : FunctionPass(&ID) {} @@ -394,7 +393,7 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // create the new entry block, allowing us to branch back to the old entry. if (OldEntry == 0) { OldEntry = &F->getEntryBlock(); - BasicBlock *NewEntry = BasicBlock::Create("", F, OldEntry); + BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); OldEntry->setName("tailrecurse"); BranchInst::Create(OldEntry, NewEntry); diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 71049fa212d31..135a621f5d96a 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -19,17 +19,18 @@ #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::PatternMatch; -void ExtAddrMode::print(OStream &OS) const { +void ExtAddrMode::print(raw_ostream &OS) const { bool NeedPlus = false; OS << "["; if (BaseGV) { OS << (NeedPlus ? " + " : "") << "GV:"; - WriteAsOperand(*OS.stream(), BaseGV, /*PrintType=*/false); + WriteAsOperand(OS, BaseGV, /*PrintType=*/false); NeedPlus = true; } @@ -39,13 +40,13 @@ void ExtAddrMode::print(OStream &OS) const { if (BaseReg) { OS << (NeedPlus ? " + " : "") << "Base:"; - WriteAsOperand(*OS.stream(), BaseReg, /*PrintType=*/false); + WriteAsOperand(OS, BaseReg, /*PrintType=*/false); NeedPlus = true; } if (Scale) { OS << (NeedPlus ? " + " : "") << Scale << "*"; - WriteAsOperand(*OS.stream(), ScaledReg, /*PrintType=*/false); + WriteAsOperand(OS, ScaledReg, /*PrintType=*/false); NeedPlus = true; } @@ -53,8 +54,8 @@ void ExtAddrMode::print(OStream &OS) const { } void ExtAddrMode::dump() const { - print(cerr); - cerr << '\n'; + print(errs()); + errs() << '\n'; } @@ -205,7 +206,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, if (!RHS) return false; int64_t Scale = RHS->getSExtValue(); if (Opcode == Instruction::Shl) - Scale = 1 << Scale; + Scale = 1LL << Scale; return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 6d1180d0dd9a4..4931ab3f7fadc 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -16,6 +16,7 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Constant.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -23,6 +24,8 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" #include using namespace llvm; @@ -249,11 +252,11 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { Value *RetVal = 0; // Create a value to return... if the function doesn't return null... - if (BB->getParent()->getReturnType() != Type::VoidTy) + if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext())) RetVal = Constant::getNullValue(BB->getParent()->getReturnType()); // Create the return... - NewTI = ReturnInst::Create(RetVal); + NewTI = ReturnInst::Create(TI->getContext(), RetVal); } break; @@ -261,8 +264,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) { case Instruction::Switch: // Should remove entry default: case Instruction::Ret: // Cannot happen, has no successors! - assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!"); - abort(); + llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!"); } if (NewTI) // If it's a different instruction, replace. @@ -318,7 +320,8 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { ++SplitIt; BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); - // The new block lives in whichever loop the old one did. + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI nodes. if (LoopInfo* LI = P->getAnalysisIfAvailable()) if (Loop *L = LI->getLoopFor(Old)) L->addBasicBlockToLoop(New, LI->getBase()); @@ -352,32 +355,61 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) { /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and -/// DominanceFrontier, but no other analyses. +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. +/// In particular, it does not preserve LoopSimplify (because it's +/// complicated to handle the case where one of the edges being split +/// is an exit of a loop with other exits). +/// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. - BasicBlock *NewBB = - BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, + BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); + LoopInfo *LI = P ? P->getAnalysisIfAvailable() : 0; + Loop *L = LI ? LI->getLoopFor(BB) : 0; + bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); + // Move the edges from Preds to point to NewBB instead of BB. - for (unsigned i = 0; i != NumPreds; ++i) + // While here, if we need to preserve loop analyses, collect + // some information about how this split will affect loops. + bool HasLoopExit = false; + bool IsLoopEntry = !!L; + bool SplitMakesNewLoopHeader = false; + for (unsigned i = 0; i != NumPreds; ++i) { Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); - + + if (LI) { + // If we need to preserve LCSSA, determine if any of + // the preds is a loop exit. + if (PreserveLCSSA) + if (Loop *PL = LI->getLoopFor(Preds[i])) + if (!PL->contains(BB)) + HasLoopExit = true; + // If we need to preserve LoopInfo, note whether any of the + // preds crosses an interesting loop boundary. + if (L) { + if (L->contains(Preds[i])) + IsLoopEntry = false; + else + SplitMakesNewLoopHeader = true; + } + } + } + // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable():0) DF->splitBlock(NewBB); - AliasAnalysis *AA = P ? P->getAnalysisIfAvailable() : 0; - - + // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to @@ -388,20 +420,42 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, cast(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } + + AliasAnalysis *AA = P ? P->getAnalysisIfAvailable() : 0; + + if (L) { + if (IsLoopEntry) { + if (Loop *PredLoop = LI->getLoopFor(Preds[0])) { + // Add the new block to the nearest enclosing loop (and not an + // adjacent loop). + while (PredLoop && !PredLoop->contains(BB)) + PredLoop = PredLoop->getParentLoop(); + if (PredLoop) + PredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } + } else { + L->addBasicBlockToLoop(NewBB, LI->getBase()); + if (SplitMakesNewLoopHeader) + L->moveToHeader(NewBB); + } + } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa(I); ) { PHINode *PN = cast(I++); // Check to see if all of the values coming in are the same. If so, we - // don't need to create a new PHI node. - Value *InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 1; i != NumPreds; ++i) - if (InVal != PN->getIncomingValueForBlock(Preds[i])) { - InVal = 0; - break; - } - + // don't need to create a new PHI node, unless it's needed for LCSSA. + Value *InVal = 0; + if (!HasLoopExit) { + InVal = PN->getIncomingValueForBlock(Preds[0]); + for (unsigned i = 1; i != NumPreds; ++i) + if (InVal != PN->getIncomingValueForBlock(Preds[i])) { + InVal = 0; + break; + } + } + if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old @@ -426,16 +480,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); - - // Check to see if we can eliminate this phi node. - if (Value *V = PN->hasConstantValue(DT != 0)) { - Instruction *I = dyn_cast(V); - if (!I || DT == 0 || DT->dominates(I, PN)) { - PN->replaceAllUsesWith(V); - if (AA) AA->deleteValue(PN); - PN->eraseFromParent(); - } - } } return NewBB; @@ -503,11 +547,15 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { // Test if the values are trivially equivalent. if (A == B) return true; - // Test if the values come form identical arithmetic instructions. + // Test if the values come from identical arithmetic instructions. + // Use isIdenticalToWhenDefined instead of isIdenticalTo because + // this function is only used when one address use dominates the + // other, which means that they'll always either have the same + // value or one of them will have an undefined value. if (isa(A) || isa(A) || isa(A) || isa(A)) if (const Instruction *BI = dyn_cast(B)) - if (cast(A)->isIdenticalTo(BI)) + if (cast(A)->isIdenticalToWhenDefined(BI)) return true; // Otherwise they may not be equivalent. @@ -537,7 +585,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, unsigned AccessSize = 0; if (AA) { const Type *AccessTy = cast(Ptr->getType())->getElementType(); - AccessSize = AA->getTargetData().getTypeStoreSizeInBits(AccessTy); + AccessSize = AA->getTypeStoreSize(AccessTy); } while (ScanFrom != ScanBB->begin()) { diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index 1650cfa306533..4b720b1e323cb 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "basicinliner" - #include "llvm/Module.h" #include "llvm/Function.h" #include "llvm/Transforms/Utils/BasicInliner.h" @@ -21,6 +20,7 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallPtrSet.h" #include @@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() { } } - DOUT << ": " << CallSites.size() << " call sites.\n"; + DEBUG(errs() << ": " << CallSites.size() << " call sites.\n"); // Inline call sites. bool Changed = false; @@ -109,22 +109,22 @@ void BasicInlinerImpl::inlineFunctions() { } InlineCost IC = CA.getInlineCost(CS, NeverInline); if (IC.isAlways()) { - DOUT << " Inlining: cost=always" - <<", call: " << *CS.getInstruction(); + DEBUG(errs() << " Inlining: cost=always" + <<", call: " << *CS.getInstruction()); } else if (IC.isNever()) { - DOUT << " NOT Inlining: cost=never" - <<", call: " << *CS.getInstruction(); + DEBUG(errs() << " NOT Inlining: cost=never" + <<", call: " << *CS.getInstruction()); continue; } else { int Cost = IC.getValue(); if (Cost >= (int) BasicInlineThreshold) { - DOUT << " NOT Inlining: cost = " << Cost - << ", call: " << *CS.getInstruction(); + DEBUG(errs() << " NOT Inlining: cost = " << Cost + << ", call: " << *CS.getInstruction()); continue; } else { - DOUT << " Inlining: cost = " << Cost - << ", call: " << *CS.getInstruction(); + DEBUG(errs() << " Inlining: cost = " << Cost + << ", call: " << *CS.getInstruction()); } } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index c4fd1eae43cd9..849b2b5d5cd62 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -21,11 +21,13 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -43,6 +45,7 @@ namespace { AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); // No loop canonicalization guarantees are broken by this pass. AU.addPreservedID(LoopSimplifyID); @@ -114,6 +117,38 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, return false; } +/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form +/// may require new PHIs in the new exit block. This function inserts the +/// new PHIs, as needed. Preds is a list of preds inside the loop, SplitBB +/// is the new loop exit block, and DestBB is the old loop exit, now the +/// successor of SplitBB. +static void CreatePHIsForSplitLoopExit(SmallVectorImpl &Preds, + BasicBlock *SplitBB, + BasicBlock *DestBB) { + // SplitBB shouldn't have anything non-trivial in it yet. + assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() && + "SplitBB has non-PHI nodes!"); + + // For each PHI in the destination block... + for (BasicBlock::iterator I = DestBB->begin(); + PHINode *PN = dyn_cast(I); ++I) { + unsigned Idx = PN->getBasicBlockIndex(SplitBB); + Value *V = PN->getIncomingValue(Idx); + // If the input is a PHI which already satisfies LCSSA, don't create + // a new one. + if (const PHINode *VP = dyn_cast(V)) + if (VP->getParent() == SplitBB) + continue; + // Otherwise a new PHI is needed. Create one and populate it. + PHINode *NewPN = PHINode::Create(PN->getType(), "split", + SplitBB->getTerminator()); + for (unsigned i = 0, e = Preds.size(); i != e; ++i) + NewPN->addIncoming(V, Preds[i]); + // Update the original PHI. + PN->setIncomingValue(Idx, NewPN); + } +} + /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to /// split the critical edge. This will update DominatorTree and /// DominatorFrontier information if it is available, thus calling this pass @@ -121,15 +156,15 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, /// false otherwise. This ensures that all edges to that dest go to one block /// instead of each going to a different block. // -bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, - bool MergeIdenticalEdges) { - if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false; +BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, + Pass *P, bool MergeIdenticalEdges) { + if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); // Create a new basic block, linking it into the CFG. - BasicBlock *NewBB = BasicBlock::Create(TIBB->getName() + "." + - DestBB->getName() + "_crit_edge"); + BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), + TIBB->getName() + "." + DestBB->getName() + "_crit_edge"); // Create our unconditional branch... BranchInst::Create(DestBB, NewBB); @@ -171,7 +206,7 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, // If we don't have a pass object, we can't update anything... - if (P == 0) return true; + if (P == 0) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is // the TIBB, TIBB clearly dominates NewBB. TIBB usually doesn't dominate @@ -222,8 +257,8 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, // If NewBBDominatesDestBB hasn't been computed yet, do so with DF. if (!OtherPreds.empty()) { // FIXME: IMPLEMENT THIS! - assert(0 && "Requiring domfrontiers but not idom/domtree/domset." - " not implemented yet!"); + llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." + " not implemented yet!"); } // Since the new block is dominated by its only predecessor TIBB, @@ -253,9 +288,9 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, // Update LoopInfo if it is around. if (LoopInfo *LI = P->getAnalysisIfAvailable()) { - // If one or the other blocks were not in a loop, the new block is not - // either, and thus LI doesn't need to be updated. - if (Loop *TIL = LI->getLoopFor(TIBB)) + if (Loop *TIL = LI->getLoopFor(TIBB)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. if (Loop *DestLoop = LI->getLoopFor(DestBB)) { if (TIL == DestLoop) { // Both in the same loop, the NewBB joins loop. @@ -277,6 +312,65 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, P->addBasicBlockToLoop(NewBB, LI->getBase()); } } + // If TIBB is in a loop and DestBB is outside of that loop, split the + // other exit blocks of the loop that also have predecessors outside + // the loop, to maintain a LoopSimplify guarantee. + if (!TIL->contains(DestBB) && + P->mustPreserveAnalysisID(LoopSimplifyID)) { + assert(!TIL->contains(NewBB) && + "Split point for loop exit is contained in loop!"); + + // Update LCSSA form in the newly created exit block. + if (P->mustPreserveAnalysisID(LCSSAID)) { + SmallVector OrigPred; + OrigPred.push_back(TIBB); + CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); + } + + // For each unique exit block... + SmallVector ExitBlocks; + TIL->getExitBlocks(ExitBlocks); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { + // Collect all the preds that are inside the loop, and note + // whether there are any preds outside the loop. + SmallVector Preds; + bool HasPredOutsideOfLoop = false; + BasicBlock *Exit = ExitBlocks[i]; + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); + I != E; ++I) + if (TIL->contains(*I)) + Preds.push_back(*I); + else + HasPredOutsideOfLoop = true; + // If there are any preds not in the loop, we'll need to split + // the edges. The Preds.empty() check is needed because a block + // may appear multiple times in the list. We can't use + // getUniqueExitBlocks above because that depends on LoopSimplify + // form, which we're in the process of restoring! + if (!Preds.empty() && HasPredOutsideOfLoop) { + BasicBlock *NewExitBB = + SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), + "split", P); + if (P->mustPreserveAnalysisID(LCSSAID)) + CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); + } + } + } + // LCSSA form was updated above for the case where LoopSimplify is + // available, which means that all predecessors of loop exit blocks + // are within the loop. Without LoopSimplify form, it would be + // necessary to insert a new phi. + assert((!P->mustPreserveAnalysisID(LCSSAID) || + P->mustPreserveAnalysisID(LoopSimplifyID)) && + "SplitCriticalEdge doesn't know how to update LCCSA form " + "without LoopSimplify!"); + } } - return true; + + // Update ProfileInfo if it is around. + if (ProfileInfo *PI = P->getAnalysisIfAvailable()) { + PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges); + } + + return NewBB; } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 10cae5ca70872..f4394ea64d6ea 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -6,11 +6,10 @@ add_llvm_library(LLVMTransformUtils CloneFunction.cpp CloneLoop.cpp CloneModule.cpp - CloneTrace.cpp CodeExtractor.cpp DemoteRegToStack.cpp - InlineCost.cpp InlineFunction.cpp + InstructionNamer.cpp LCSSA.cpp Local.cpp LoopSimplify.cpp @@ -19,12 +18,12 @@ add_llvm_library(LLVMTransformUtils LowerSwitch.cpp Mem2Reg.cpp PromoteMemoryToRegister.cpp - SimplifyCFG.cpp + SSAUpdater.cpp SSI.cpp + SimplifyCFG.cpp UnifyFunctionExitNodes.cpp UnrollLoop.cpp ValueMapper.cpp - InstructionNamer.cpp ) target_link_libraries (LLVMTransformUtils LLVMSupport) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index d0fdefa3f6894..30130fa0a1265 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -20,6 +20,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/GlobalVariable.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -34,7 +35,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, DenseMap &ValueMap, const char *NameSuffix, Function *F, ClonedCodeInfo *CodeInfo) { - BasicBlock *NewBB = BasicBlock::Create("", F); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; @@ -72,7 +73,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, DenseMap &ValueMap, - std::vector &Returns, + SmallVectorImpl &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -165,7 +166,7 @@ Function *llvm::CloneFunction(const Function *F, ValueMap[I] = DestI++; // Add mapping to ValueMap } - std::vector Returns; // Ignore returns cloned... + SmallVector Returns; // Ignore returns cloned. CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo); return NewF; } @@ -179,7 +180,7 @@ namespace { Function *NewFunc; const Function *OldFunc; DenseMap &ValueMap; - std::vector &Returns; + SmallVectorImpl &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; @@ -187,7 +188,7 @@ namespace { public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, DenseMap &valueMap, - std::vector &returns, + SmallVectorImpl &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) @@ -218,7 +219,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Nope, clone it now. BasicBlock *NewBB; - BBEntry = NewBB = BasicBlock::Create(); + BBEntry = NewBB = BasicBlock::Create(BB->getContext()); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; @@ -237,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner. if (const DbgFuncStartInst *DFSI = dyn_cast(II)) { if (DbgFnStart == NULL) { - DISubprogram SP(cast(DFSI->getSubprogram())); + DISubprogram SP(DFSI->getSubprogram()); if (SP.describes(BB->getParent())) DbgFnStart = DFSI->getSubprogram(); } @@ -323,17 +324,21 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, /// mapping its operands through ValueMap if they are available. Constant *PruningFunctionCloner:: ConstantFoldMappedInstruction(const Instruction *I) { + LLVMContext &Context = I->getContext(); + SmallVector Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null(MapValue(I->getOperand(i), - ValueMap))) + ValueMap, + Context))) Ops.push_back(Op); else return 0; // All operands not constant! if (const CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), - &Ops[0], Ops.size(), TD); + &Ops[0], Ops.size(), + Context, TD); if (const LoadInst *LI = dyn_cast(I)) if (ConstantExpr *CE = dyn_cast(Ops[0])) @@ -344,7 +349,7 @@ ConstantFoldMappedInstruction(const Instruction *I) { CE); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0], - Ops.size(), TD); + Ops.size(), Context, TD); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -356,11 +361,12 @@ ConstantFoldMappedInstruction(const Instruction *I) { /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, DenseMap &ValueMap, - std::vector &Returns, + SmallVectorImpl &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD) { assert(NameSuffix && "NameSuffix cannot be null!"); + LLVMContext &Context = OldFunc->getContext(); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), @@ -385,7 +391,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. - std::vector PHIToResolve; + SmallVector PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { BasicBlock *NewBB = cast_or_null(ValueMap[BI]); @@ -430,7 +436,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = cast_or_null(ValueMap[PN->getIncomingBlock(pred)])) { - Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap); + Value *InVal = MapValue(PN->getIncomingValue(pred), + ValueMap, Context); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 82f5b93a9544d..0285f8c8d107b 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -56,10 +56,11 @@ Module *llvm::CloneModule(const Module *M, // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { - GlobalVariable *GV = new GlobalVariable(I->getType()->getElementType(), + GlobalVariable *GV = new GlobalVariable(*New, + I->getType()->getElementType(), false, GlobalValue::ExternalLinkage, 0, - I->getName(), New); + I->getName()); GV->setAlignment(I->getAlignment()); ValueMap[I] = GV; } @@ -88,7 +89,8 @@ Module *llvm::CloneModule(const Module *M, GlobalVariable *GV = cast(ValueMap[I]); if (I->hasInitializer()) GV->setInitializer(cast(MapValue(I->getInitializer(), - ValueMap))); + ValueMap, + M->getContext()))); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -106,7 +108,7 @@ Module *llvm::CloneModule(const Module *M, ValueMap[J] = DestI++; } - std::vector Returns; // Ignore returns cloned... + SmallVector Returns; // Ignore returns cloned. CloneFunctionInto(F, I, ValueMap, Returns); } @@ -119,7 +121,7 @@ Module *llvm::CloneModule(const Module *M, GlobalAlias *GA = cast(ValueMap[I]); GA->setLinkage(I->getLinkage()); if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast(MapValue(C, ValueMap))); + GA->setAliasee(cast(MapValue(C, ValueMap, M->getContext()))); } return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 6d5904e308867..c39ccf7d3f457 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -18,6 +18,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" @@ -27,6 +28,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" #include #include @@ -180,8 +183,24 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { void CodeExtractor::splitReturnBlocks() { for (std::set::iterator I = BlocksToExtract.begin(), E = BlocksToExtract.end(); I != E; ++I) - if (ReturnInst *RI = dyn_cast((*I)->getTerminator())) - (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); + if (ReturnInst *RI = dyn_cast((*I)->getTerminator())) { + BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); + if (DT) { + // Old dominates New. New node domiantes all other nodes dominated + //by Old. + DomTreeNode *OldNode = DT->getNode(*I); + SmallVector Children; + for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end(); + DI != DE; ++DI) + Children.push_back(*DI); + + DomTreeNode *NewNode = DT->addNewBlock(New, *I); + + for (SmallVector::iterator I = Children.begin(), + E = Children.end(); I != E; ++I) + DT->changeImmediateDominator(*I, NewNode); + } + } } // findInputsOutputs - Find inputs to, outputs from the code region. @@ -234,15 +253,15 @@ Function *CodeExtractor::constructFunction(const Values &inputs, BasicBlock *newHeader, Function *oldFunction, Module *M) { - DOUT << "inputs: " << inputs.size() << "\n"; - DOUT << "outputs: " << outputs.size() << "\n"; + DEBUG(errs() << "inputs: " << inputs.size() << "\n"); + DEBUG(errs() << "outputs: " << outputs.size() << "\n"); // This function returns unsigned, outputs will go back by reference. switch (NumExitBlocks) { case 0: - case 1: RetTy = Type::VoidTy; break; - case 2: RetTy = Type::Int1Ty; break; - default: RetTy = Type::Int16Ty; break; + case 1: RetTy = Type::getVoidTy(header->getContext()); break; + case 2: RetTy = Type::getInt1Ty(header->getContext()); break; + default: RetTy = Type::getInt16Ty(header->getContext()); break; } std::vector paramTy; @@ -251,32 +270,34 @@ Function *CodeExtractor::constructFunction(const Values &inputs, for (Values::const_iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) { const Value *value = *i; - DOUT << "value used in func: " << *value << "\n"; + DEBUG(errs() << "value used in func: " << *value << "\n"); paramTy.push_back(value->getType()); } // Add the types of the output values to the function's argument list. for (Values::const_iterator I = outputs.begin(), E = outputs.end(); I != E; ++I) { - DOUT << "instr used in func: " << **I << "\n"; + DEBUG(errs() << "instr used in func: " << **I << "\n"); if (AggregateArgs) paramTy.push_back((*I)->getType()); else paramTy.push_back(PointerType::getUnqual((*I)->getType())); } - DOUT << "Function type: " << *RetTy << " f("; + DEBUG(errs() << "Function type: " << *RetTy << " f("); for (std::vector::iterator i = paramTy.begin(), e = paramTy.end(); i != e; ++i) - DOUT << **i << ", "; - DOUT << ")\n"; + DEBUG(errs() << **i << ", "); + DEBUG(errs() << ")\n"); if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - PointerType *StructPtr = PointerType::getUnqual(StructType::get(paramTy)); + PointerType *StructPtr = + PointerType::getUnqual(StructType::get(M->getContext(), paramTy)); paramTy.clear(); paramTy.push_back(StructPtr); } - const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); + const FunctionType *funcType = + FunctionType::get(RetTy, paramTy, false); // Create the new function Function *newFunction = Function::Create(funcType, @@ -298,13 +319,13 @@ Function *CodeExtractor::constructFunction(const Values &inputs, Value *RewriteVal; if (AggregateArgs) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::Int32Ty); - Idx[1] = ConstantInt::get(Type::Int32Ty, i); - std::string GEPname = "gep_" + inputs[i]->getName(); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); + Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); TerminatorInst *TI = newFunction->begin()->getTerminator(); - GetElementPtrInst *GEP = GetElementPtrInst::Create(AI, Idx, Idx+2, - GEPname, TI); - RewriteVal = new LoadInst(GEP, "load" + GEPname, TI); + GetElementPtrInst *GEP = + GetElementPtrInst::Create(AI, Idx, Idx+2, + "gep_" + inputs[i]->getName(), TI); + RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); } else RewriteVal = AI++; @@ -340,6 +361,20 @@ Function *CodeExtractor::constructFunction(const Values &inputs, return newFunction; } +/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI +/// that uses the value within the basic block, and return the predecessor +/// block associated with that use, or return 0 if none is found. +static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { + for (Value::use_iterator UI = Used->use_begin(), + UE = Used->use_end(); UI != UE; ++UI) { + PHINode *P = dyn_cast(*UI); + if (P && P->getParent() == BB) + return P->getIncomingBlock(UI); + } + + return 0; +} + /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. @@ -348,7 +383,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Values &inputs, Values &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, StructValues, ReloadOutputs; + std::vector params, StructValues, ReloadOutputs, Reloads; + + LLVMContext &Context = newFunction->getContext(); // Add inputs as params, or to be filled into the struct for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) @@ -378,7 +415,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ArgTypes.push_back((*v)->getType()); // Allocate a struct at the beginning of this function - Type *StructArgTy = StructType::get(ArgTypes); + Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); Struct = new AllocaInst(StructArgTy, 0, "structArg", codeReplacer->getParent()->begin()->begin()); @@ -386,8 +423,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, for (unsigned i = 0, e = inputs.size(); i != e; ++i) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::Int32Ty); - Idx[1] = ConstantInt::get(Type::Int32Ty, i); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); GetElementPtrInst *GEP = GetElementPtrInst::Create(Struct, Idx, Idx + 2, "gep_" + StructValues[i]->getName()); @@ -412,8 +449,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Value *Output = 0; if (AggregateArgs) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::Int32Ty); - Idx[1] = ConstantInt::get(Type::Int32Ty, FirstOut + i); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); GetElementPtrInst *GEP = GetElementPtrInst::Create(Struct, Idx, Idx + 2, "gep_reload_" + outputs[i]->getName()); @@ -423,6 +460,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, Output = ReloadOutputs[i]; } LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); + Reloads.push_back(load); codeReplacer->getInstList().push_back(load); std::vector Users(outputs[i]->use_begin(), outputs[i]->use_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { @@ -434,7 +472,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Now we can emit a switch statement using the call as a value. SwitchInst *TheSwitch = - SwitchInst::Create(ConstantInt::getNullValue(Type::Int16Ty), + SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), codeReplacer, 0, codeReplacer); // Since there may be multiple exits from the original region, make the new @@ -456,7 +494,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, if (!NewTarget) { // If we don't already have an exit stub for this non-extracted // destination, create one now! - NewTarget = BasicBlock::Create(OldTarget->getName() + ".exitStub", + NewTarget = BasicBlock::Create(Context, + OldTarget->getName() + ".exitStub", newFunction); unsigned SuccNum = switchVal++; @@ -465,17 +504,18 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, case 0: case 1: break; // No value needed. case 2: // Conditional branch, return a bool - brVal = ConstantInt::get(Type::Int1Ty, !SuccNum); + brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); break; default: - brVal = ConstantInt::get(Type::Int16Ty, SuccNum); + brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); break; } - ReturnInst *NTRet = ReturnInst::Create(brVal, NewTarget); + ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget); // Update the switch instruction. - TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum), + TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), + SuccNum), OldTarget); // Restore values just before we exit @@ -507,14 +547,25 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, DominatesDef = false; } - if (DT) + if (DT) { DominatesDef = DT->dominates(DefBlock, OldTarget); + + // If the output value is used by a phi in the target block, + // then we need to test for dominance of the phi's predecessor + // instead. Unfortunately, this a little complicated since we + // have already rewritten uses of the value to uses of the reload. + BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], + OldTarget); + if (pred && DT && DT->dominates(DefBlock, pred)) + DominatesDef = true; + } if (DominatesDef) { if (AggregateArgs) { Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::Int32Ty); - Idx[1] = ConstantInt::get(Type::Int32Ty,FirstOut+out); + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), + FirstOut+out); GetElementPtrInst *GEP = GetElementPtrInst::Create(OAI, Idx, Idx + 2, "gep_" + outputs[out]->getName(), @@ -543,15 +594,16 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // this should be rewritten as a `ret' // Check if the function should return a value - if (OldFnRetTy == Type::VoidTy) { - ReturnInst::Create(0, TheSwitch); // Return void + if (OldFnRetTy == Type::getVoidTy(Context)) { + ReturnInst::Create(Context, 0, TheSwitch); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have - ReturnInst::Create(TheSwitch->getCondition(), TheSwitch); + ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); } else { // Otherwise we must have code extracted an unwind or something, just // return whatever we want. - ReturnInst::Create(Constant::getNullValue(OldFnRetTy), TheSwitch); + ReturnInst::Create(Context, + Constant::getNullValue(OldFnRetTy), TheSwitch); } TheSwitch->eraseFromParent(); @@ -644,12 +696,14 @@ ExtractCodeRegion(const std::vector &code) { Function *oldFunction = header->getParent(); // This takes place of the original loop - BasicBlock *codeReplacer = BasicBlock::Create("codeRepl", oldFunction, + BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), + "codeRepl", oldFunction, header); // The new function needs a root node because other nodes can branch to the // head of the region, but the entry node of a function cannot have preds. - BasicBlock *newFuncRoot = BasicBlock::Create("newFuncRoot"); + BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), + "newFuncRoot"); newFuncRoot->getInstList().push_back(BranchInst::Create(header)); // Find inputs to, outputs from the code region. @@ -702,7 +756,8 @@ ExtractCodeRegion(const std::vector &code) { // cerr << "OLD FUNCTION: " << *oldFunction; // verifyFunction(*oldFunction); - DEBUG(if (verifyFunction(*newFunction)) abort()); + DEBUG(if (verifyFunction(*newFunction)) + llvm_report_error("verifyFunction failed!")); return newFunction; } diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index b8dd75413342f..c908b4a559142 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -39,7 +39,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", AllocaPoint); + Slot = new AllocaInst(I.getType(), 0, + I.getName()+".reg2mem", AllocaPoint); } else { Function *F = I.getParent()->getParent(); Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", @@ -116,7 +117,8 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", AllocaPoint); + Slot = new AllocaInst(P->getType(), 0, + P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 4989c00ceb814..0d00d69c8cb99 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -15,6 +15,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" @@ -28,13 +29,73 @@ #include "llvm/Support/CallSite.h" using namespace llvm; -bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) { - return InlineFunction(CallSite(CI), CG, TD); +bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD, + SmallVectorImpl *StaticAllocas) { + return InlineFunction(CallSite(CI), CG, TD, StaticAllocas); } -bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) { - return InlineFunction(CallSite(II), CG, TD); +bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD, + SmallVectorImpl *StaticAllocas) { + return InlineFunction(CallSite(II), CG, TD, StaticAllocas); } + +/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into +/// an invoke, we have to turn all of the calls that can throw into +/// invokes. This function analyze BB to see if there are any calls, and if so, +/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI +/// nodes in that block with the values specified in InvokeDestPHIValues. +/// +static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, + BasicBlock *InvokeDest, + const SmallVectorImpl &InvokeDestPHIValues) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { + Instruction *I = BBI++; + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + CallInst *CI = dyn_cast(I); + if (CI == 0) continue; + + // If this call cannot unwind, don't convert it to an invoke. + if (CI->doesNotThrow()) + continue; + + // Convert this function call into an invoke instruction. + // First, split the basic block. + BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); + + // Next, create the new invoke instruction, inserting it at the end + // of the old basic block. + SmallVector InvokeArgs(CI->op_begin()+1, CI->op_end()); + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, + InvokeArgs.begin(), InvokeArgs.end(), + CI->getName(), BB->getTerminator()); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + + // Make sure that anything using the call now uses the invoke! This also + // updates the CallGraph if present. + CI->replaceAllUsesWith(II); + + // Delete the unconditional branch inserted by splitBasicBlock + BB->getInstList().pop_back(); + Split->getInstList().pop_front(); // Delete the original call + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + unsigned i = 0; + for (BasicBlock::iterator I = InvokeDest->begin(); + isa(I); ++I, ++i) + cast(I)->addIncoming(InvokeDestPHIValues[i], BB); + + // This basic block is now complete, the caller will continue scanning the + // next one. + return; + } +} + + /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes and turn unwind /// instructions into branches to the invoke unwind dest. @@ -43,10 +104,9 @@ bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) { /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, - ClonedCodeInfo &InlinedCodeInfo, - CallGraph *CG) { + ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); - std::vector InvokeDestPHIValues; + SmallVector InvokeDestPHIValues; // If there are PHI nodes in the unwind destination block, we need to // keep track of which values came into them from this invoke, then remove @@ -62,92 +122,39 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to - // rewrite. - if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) { - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); - BB != E; ++BB) { - if (InlinedCodeInfo.ContainsCalls) { - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){ - Instruction *I = BBI++; - - // We only need to check for function calls: inlined invoke - // instructions require no special handling. - if (!isa(I)) continue; - CallInst *CI = cast(I); - - // If this call cannot unwind, don't convert it to an invoke. - if (CI->doesNotThrow()) - continue; - - // Convert this function call into an invoke instruction. - // First, split the basic block. - BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); - - // Next, create the new invoke instruction, inserting it at the end - // of the old basic block. - SmallVector InvokeArgs(CI->op_begin()+1, CI->op_end()); - InvokeInst *II = - InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, - InvokeArgs.begin(), InvokeArgs.end(), - CI->getName(), BB->getTerminator()); - II->setCallingConv(CI->getCallingConv()); - II->setAttributes(CI->getAttributes()); - - // Make sure that anything using the call now uses the invoke! - CI->replaceAllUsesWith(II); - - // Update the callgraph. - if (CG) { - // We should be able to do this: - // (*CG)[Caller]->replaceCallSite(CI, II); - // but that fails if the old call site isn't in the call graph, - // which, because of LLVM bug 3601, it sometimes isn't. - CallGraphNode *CGN = (*CG)[Caller]; - for (CallGraphNode::iterator NI = CGN->begin(), NE = CGN->end(); - NI != NE; ++NI) { - if (NI->first == CI) { - NI->first = II; - break; - } - } - } - - // Delete the unconditional branch inserted by splitBasicBlock - BB->getInstList().pop_back(); - Split->getInstList().pop_front(); // Delete the original call - - // Update any PHI nodes in the exceptional block to indicate that - // there is now a new entry in them. - unsigned i = 0; - for (BasicBlock::iterator I = InvokeDest->begin(); - isa(I); ++I, ++i) { - PHINode *PN = cast(I); - PN->addIncoming(InvokeDestPHIValues[i], BB); - } - - // This basic block is now complete, start scanning the next one. - break; - } - } - - if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { - // An UnwindInst requires special handling when it gets inlined into an - // invoke site. Once this happens, we know that the unwind would cause - // a control transfer to the invoke exception destination, so we can - // transform it into a direct branch to the exception destination. - BranchInst::Create(InvokeDest, UI); - - // Delete the unwind instruction! - UI->eraseFromParent(); - - // Update any PHI nodes in the exceptional block to indicate that - // there is now a new entry in them. - unsigned i = 0; - for (BasicBlock::iterator I = InvokeDest->begin(); - isa(I); ++I, ++i) { - PHINode *PN = cast(I); - PN->addIncoming(InvokeDestPHIValues[i], BB); - } + // rewrite. If the code doesn't have calls or unwinds, we know there is + // nothing to rewrite. + if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) { + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); + return; + } + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ + if (InlinedCodeInfo.ContainsCalls) + HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest, + InvokeDestPHIValues); + + if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { + // An UnwindInst requires special handling when it gets inlined into an + // invoke site. Once this happens, we know that the unwind would cause + // a control transfer to the invoke exception destination, so we can + // transform it into a direct branch to the exception destination. + BranchInst::Create(InvokeDest, UI); + + // Delete the unwind instruction! + UI->eraseFromParent(); + + // Update any PHI nodes in the exceptional block to indicate that + // there is now a new entry in them. + unsigned i = 0; + for (BasicBlock::iterator I = InvokeDest->begin(); + isa(I); ++I, ++i) { + PHINode *PN = cast(I); + PN->addIncoming(InvokeDestPHIValues[i], BB); } } } @@ -185,17 +192,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS, } for (; I != E; ++I) { - const Instruction *OrigCall = I->first.getInstruction(); + const Value *OrigCall = I->first; DenseMap::iterator VMI = ValueMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI != ValueMap.end() && VMI->second) { - // If the call was inlined, but then constant folded, there is no edge to - // add. Check for this case. - if (Instruction *NewCall = dyn_cast(VMI->second)) - CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); - } + if (VMI == ValueMap.end() || VMI->second == 0) + continue; + + // If the call was inlined, but then constant folded, there is no edge to + // add. Check for this case. + if (Instruction *NewCall = dyn_cast(VMI->second)) + CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); } + // Update the call graph by deleting the edge from Callee to Caller. We must // do this after the loop above in case Caller and Callee are the same. CallerNode->removeCallEdgeFor(CS); @@ -204,25 +213,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS, /// findFnRegionEndMarker - This is a utility routine that is used by /// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds /// to the llvm.dbg.func.start of the function F. Otherwise return NULL. +/// static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) { - GlobalVariable *FnStart = NULL; + MDNode *FnStart = NULL; const DbgRegionEndInst *FnEnd = NULL; for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI) for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (FnStart == NULL) { if (const DbgFuncStartInst *FSI = dyn_cast(BI)) { - DISubprogram SP(cast(FSI->getSubprogram())); + DISubprogram SP(FSI->getSubprogram()); assert (SP.isNull() == false && "Invalid llvm.dbg.func.start"); if (SP.describes(F)) - FnStart = SP.getGV(); + FnStart = SP.getNode(); } - } else { - if (const DbgRegionEndInst *REI = dyn_cast(BI)) - if (REI->getContext() == FnStart) - FnEnd = REI; + continue; } + + if (const DbgRegionEndInst *REI = dyn_cast(BI)) + if (REI->getContext() == FnStart) + FnEnd = REI; } return FnEnd; } @@ -236,8 +247,10 @@ static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) { // exists in the instruction stream. Similiarly this will inline a recursive // function by one level. // -bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { +bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, + SmallVectorImpl *StaticAllocas) { Instruction *TheCall = CS.getInstruction(); + LLVMContext &Context = TheCall->getContext(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); @@ -277,7 +290,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { // Make sure to capture all of the return instructions from the cloned // function. - std::vector Returns; + SmallVector Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; @@ -302,15 +315,17 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && !CalledFunc->onlyReadsMemory()) { const Type *AggTy = cast(I->getType())->getElementType(); - const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); + const Type *VoidPtrTy = + Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; if (TD) Align = TD->getPrefTypeAlignment(AggTy); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), - Caller->begin()->begin()); + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, + I->getName(), + &*Caller->begin()->begin()); // Emit a memcpy. - const Type *Tys[] = { Type::Int64Ty }; + const Type *Tys[] = { Type::getInt64Ty(Context) }; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, Tys, 1); @@ -321,13 +336,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { if (TD == 0) Size = ConstantExpr::getSizeOf(AggTy); else - Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy)); + Size = ConstantInt::get(Type::getInt64Ty(Context), + TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. Value *CallArgs[] = { - DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1) + DestCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1) }; CallInst *TheMemCpy = CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); @@ -352,13 +369,12 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { // call site. The function body cloner does not clone original // region end marker from the CalledFunc. This will ensure that // inlined function's scope ends at the right place. - const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc); - if (DREI) { - for (BasicBlock::iterator BI = TheCall, - BE = TheCall->getParent()->end(); BI != BE; ++BI) { + if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) { + for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end(); + BI != BE; ++BI) { if (DbgStopPointInst *DSPI = dyn_cast(BI)) { if (DbgRegionEndInst *NewDREI = - dyn_cast(DREI->clone())) + dyn_cast(DREI->clone())) NewDREI->insertAfter(DSPI); break; } @@ -388,31 +404,39 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), - E = FirstNewBlock->end(); I != E; ) - if (AllocaInst *AI = dyn_cast(I++)) { - // If the alloca is now dead, remove it. This often occurs due to code - // specialization. - if (AI->use_empty()) { - AI->eraseFromParent(); - continue; - } + E = FirstNewBlock->end(); I != E; ) { + AllocaInst *AI = dyn_cast(I++); + if (AI == 0) continue; + + // If the alloca is now dead, remove it. This often occurs due to code + // specialization. + if (AI->use_empty()) { + AI->eraseFromParent(); + continue; + } - if (isa(AI->getArraySize())) { - // Scan for the block of allocas that we can move over, and move them - // all at once. - while (isa(I) && - isa(cast(I)->getArraySize())) - ++I; - - // Transfer all of the allocas over in a block. Using splice means - // that the instructions aren't removed from the symbol table, then - // reinserted. - Caller->getEntryBlock().getInstList().splice( - InsertPoint, - FirstNewBlock->getInstList(), - AI, I); - } + if (!isa(AI->getArraySize())) + continue; + + // Keep track of the static allocas that we inline into the caller if the + // StaticAllocas pointer is non-null. + if (StaticAllocas) StaticAllocas->push_back(AI); + + // Scan for the block of allocas that we can move over, and move them + // all at once. + while (isa(I) && + isa(cast(I)->getArraySize())) { + if (StaticAllocas) StaticAllocas->push_back(cast(I)); + ++I; } + + // Transfer all of the allocas over in a block. Using splice means + // that the instructions aren't removed from the symbol table, then + // reinserted. + Caller->getEntryBlock().getInstList().splice(InsertPoint, + FirstNewBlock->getInstList(), + AI, I); + } } // If the inlined code contained dynamic alloca instructions, wrap the inlined @@ -486,7 +510,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { BB != E; ++BB) { TerminatorInst *Term = BB->getTerminator(); if (isa(Term)) { - new UnreachableInst(Term); + new UnreachableInst(Context, Term); BB->getInstList().erase(Term); } } @@ -495,7 +519,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { // any inlined 'unwind' instructions into branches to the invoke exception // destination, and call instructions into invoke instructions. if (InvokeInst *II = dyn_cast(TheCall)) - HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo, CG); + HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 4f8a1603948a6..1fa51a3b6a713 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -32,7 +32,7 @@ namespace { bool runOnFunction(Function &F) { for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) - if (!AI->hasName() && AI->getType() != Type::VoidTy) + if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext())) AI->setName("tmp"); for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -40,7 +40,7 @@ namespace { BB->setName("BB"); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->hasName() && I->getType() != Type::VoidTy) + if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext())) I->setName("tmp"); } return true; diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index d5e7303a50700..56e662e9dac19 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -33,22 +33,19 @@ #include "llvm/Pass.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/PredIteratorCache.h" -#include -#include using namespace llvm; STATISTIC(NumLCSSA, "Number of live out of a loop variables"); namespace { - struct VISIBILITY_HIDDEN LCSSA : public LoopPass { + struct LCSSA : public LoopPass { static char ID; // Pass identification, replacement for typeid LCSSA() : LoopPass(&ID) {} @@ -57,12 +54,10 @@ namespace { DominatorTree *DT; std::vector LoopBlocks; PredIteratorCache PredCache; + Loop *L; virtual bool runOnLoop(Loop *L, LPPassManager &LPM); - void ProcessInstruction(Instruction* Instr, - const SmallVector& exitBlocks); - /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. It maintains both of these, /// as well as the CFG. It also requires dominator information. @@ -71,9 +66,9 @@ namespace { AU.setPreservesCFG(); AU.addRequiredID(LoopSimplifyID); AU.addPreservedID(LoopSimplifyID); - AU.addRequired(); + AU.addRequiredTransitive(); AU.addPreserved(); - AU.addRequired(); + AU.addRequiredTransitive(); AU.addPreserved(); AU.addPreserved(); @@ -85,15 +80,17 @@ namespace { AU.addPreserved(); } private: - void getLoopValuesUsedOutsideLoop(Loop *L, - SetVector &AffectedValues, - const SmallVector& exitBlocks); - - Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, - DenseMap &Phis); + bool ProcessInstruction(Instruction *Inst, + const SmallVectorImpl &ExitBlocks); + + /// verifyAnalysis() - Verify loop nest. + virtual void verifyAnalysis() const { + // Check the special guarantees that LCSSA makes. + assert(L->isLCSSAForm() && "LCSSA form not preserved!"); + } /// inLoop - returns true if the given block is within the current loop - bool inLoop(BasicBlock* B) { + bool inLoop(BasicBlock *B) const { return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B); } }; @@ -105,181 +102,163 @@ static RegisterPass X("lcssa", "Loop-Closed SSA Form Pass"); Pass *llvm::createLCSSAPass() { return new LCSSA(); } const PassInfo *const llvm::LCSSAID = &X; + +/// BlockDominatesAnExit - Return true if the specified block dominates at least +/// one of the blocks in the specified list. +static bool BlockDominatesAnExit(BasicBlock *BB, + const SmallVectorImpl &ExitBlocks, + DominatorTree *DT) { + DomTreeNode *DomNode = DT->getNode(BB); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i]))) + return true; + + return false; +} + + /// runOnFunction - Process all loops in the function, inner-most out. -bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) { - PredCache.clear(); +bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { + L = TheLoop; LI = &LPM.getAnalysis(); DT = &getAnalysis(); - // Speed up queries by creating a sorted list of blocks + // Get the set of exiting blocks. + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + + if (ExitBlocks.empty()) + return false; + + // Speed up queries by creating a sorted vector of blocks. LoopBlocks.clear(); LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); - std::sort(LoopBlocks.begin(), LoopBlocks.end()); + array_pod_sort(LoopBlocks.begin(), LoopBlocks.end()); - SmallVector exitBlocks; - L->getExitBlocks(exitBlocks); + // Look at all the instructions in the loop, checking to see if they have uses + // outside the loop. If so, rewrite those uses. + bool MadeChange = false; - SetVector AffectedValues; - getLoopValuesUsedOutsideLoop(L, AffectedValues, exitBlocks); + for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end(); + BBI != E; ++BBI) { + BasicBlock *BB = *BBI; + + // For large loops, avoid use-scanning by using dominance information: In + // particular, if a block does not dominate any of the loop exits, then none + // of the values defined in the block could be used outside the loop. + if (!BlockDominatesAnExit(BB, ExitBlocks, DT)) + continue; + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + // Reject two common cases fast: instructions with no uses (like stores) + // and instructions with one use that is in the same block as this. + if (I->use_empty() || + (I->hasOneUse() && I->use_back()->getParent() == BB && + !isa(I->use_back()))) + continue; + + MadeChange |= ProcessInstruction(I, ExitBlocks); + } + } - // If no values are affected, we can save a lot of work, since we know that - // nothing will be changed. - if (AffectedValues.empty()) - return false; + assert(L->isLCSSAForm()); + PredCache.clear(); + + return MadeChange; +} + +/// isExitBlock - Return true if the specified block is in the list. +static bool isExitBlock(BasicBlock *BB, + const SmallVectorImpl &ExitBlocks) { + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + if (ExitBlocks[i] == BB) + return true; + return false; +} + +/// ProcessInstruction - Given an instruction in the loop, check to see if it +/// has any uses that are outside the current loop. If so, insert LCSSA PHI +/// nodes and rewrite the uses. +bool LCSSA::ProcessInstruction(Instruction *Inst, + const SmallVectorImpl &ExitBlocks) { + SmallVector UsesToRewrite; - // Iterate over all affected values for this loop and insert Phi nodes - // for them in the appropriate exit blocks + BasicBlock *InstBB = Inst->getParent(); - for (SetVector::iterator I = AffectedValues.begin(), - E = AffectedValues.end(); I != E; ++I) - ProcessInstruction(*I, exitBlocks); + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { + BasicBlock *UserBB = cast(*UI)->getParent(); + if (PHINode *PN = dyn_cast(*UI)) + UserBB = PN->getIncomingBlock(UI); + + if (InstBB != UserBB && !inLoop(UserBB)) + UsesToRewrite.push_back(&UI.getUse()); + } - assert(L->isLCSSAForm()); + // If there are no uses outside the loop, exit with no change. + if (UsesToRewrite.empty()) return false; - return true; -} - -/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes, -/// eliminate all out-of-loop uses. -void LCSSA::ProcessInstruction(Instruction *Instr, - const SmallVector& exitBlocks) { ++NumLCSSA; // We are applying the transformation - // Keep track of the blocks that have the value available already. - DenseMap Phis; - - BasicBlock *DomBB = Instr->getParent(); - // Invoke instructions are special in that their result value is not available // along their unwind edge. The code below tests to see whether DomBB dominates // the value, so adjust DomBB to the normal destination block, which is // effectively where the value is first usable. - if (InvokeInst *Inv = dyn_cast(Instr)) + BasicBlock *DomBB = Inst->getParent(); + if (InvokeInst *Inv = dyn_cast(Inst)) DomBB = Inv->getNormalDest(); DomTreeNode *DomNode = DT->getNode(DomBB); - // Insert the LCSSA phi's into the exit blocks (dominated by the value), and - // add them to the Phi's map. - for (SmallVector::const_iterator BBI = exitBlocks.begin(), - BBE = exitBlocks.end(); BBI != BBE; ++BBI) { - BasicBlock *BB = *BBI; - DomTreeNode *ExitBBNode = DT->getNode(BB); - Value *&Phi = Phis[ExitBBNode]; - if (!Phi && DT->dominates(DomNode, ExitBBNode)) { - PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa", - BB->begin()); - PN->reserveOperandSpace(PredCache.GetNumPreds(BB)); - - // Remember that this phi makes the value alive in this block. - Phi = PN; - - // Add inputs from inside the loop for this PHI. - for (BasicBlock** PI = PredCache.GetPreds(BB); *PI; ++PI) - PN->addIncoming(Instr, *PI); - } - } + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(Inst); - - // Record all uses of Instr outside the loop. We need to rewrite these. The - // LCSSA phis won't be included because they use the value in the loop. - for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end(); - UI != E;) { - BasicBlock *UserBB = cast(*UI)->getParent(); - if (PHINode *P = dyn_cast(*UI)) { - UserBB = P->getIncomingBlock(UI); - } + // Insert the LCSSA phi's into all of the exit blocks dominated by the + // value., and add them to the Phi's map. + for (SmallVectorImpl::const_iterator BBI = ExitBlocks.begin(), + BBE = ExitBlocks.end(); BBI != BBE; ++BBI) { + BasicBlock *ExitBB = *BBI; + if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue; - // If the user is in the loop, don't rewrite it! - if (UserBB == Instr->getParent() || inLoop(UserBB)) { - ++UI; - continue; - } + // If we already inserted something for this BB, don't reprocess it. + if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - // Otherwise, patch up uses of the value with the appropriate LCSSA Phi, - // inserting PHI nodes into join points where needed. - Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis); - - // Preincrement the iterator to avoid invalidating it when we change the - // value. - Use &U = UI.getUse(); - ++UI; - U.set(Val); - } -} + PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa", + ExitBB->begin()); + PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB)); -/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that -/// are used by instructions outside of it. -void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L, - SetVector &AffectedValues, - const SmallVector& exitBlocks) { - // FIXME: For large loops, we may be able to avoid a lot of use-scanning - // by using dominance information. In particular, if a block does not - // dominate any of the loop exits, then none of the values defined in the - // block could be used outside the loop. - for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); - BB != BE; ++BB) { - for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I) - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; - ++UI) { - BasicBlock *UserBB = cast(*UI)->getParent(); - if (PHINode* p = dyn_cast(*UI)) { - UserBB = p->getIncomingBlock(UI); - } - - if (*BB != UserBB && !inLoop(UserBB)) { - AffectedValues.insert(I); - break; - } - } + // Add inputs from inside the loop for this PHI. + for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) + PN->addIncoming(Inst, *PI); + + // Remember that this phi makes the value alive in this block. + SSAUpdate.AddAvailableValue(ExitBB, PN); } -} - -/// GetValueForBlock - Get the value to use within the specified basic block. -/// available values are in Phis. -Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, - DenseMap &Phis) { - // If there is no dominator info for this BB, it is unreachable. - if (BB == 0) - return UndefValue::get(OrigInst->getType()); - - // If we have already computed this value, return the previously computed val. - if (Phis.count(BB)) return Phis[BB]; - - DomTreeNode *IDom = BB->getIDom(); + + // Rewrite all uses outside the loop in terms of the new PHIs we just + // inserted. + for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { + // If this use is in an exit block, rewrite to use the newly inserted PHI. + // This is required for correctness because SSAUpdate doesn't handle uses in + // the same block. It assumes the PHI we inserted is at the end of the + // block. + Instruction *User = cast(UsesToRewrite[i]->getUser()); + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast(User)) + UserBB = PN->getIncomingBlock(*UsesToRewrite[i]); - // Otherwise, there are two cases: we either have to insert a PHI node or we - // don't. We need to insert a PHI node if this block is not dominated by one - // of the exit nodes from the loop (the loop could have multiple exits, and - // though the value defined *inside* the loop dominated all its uses, each - // exit by itself may not dominate all the uses). - // - // The simplest way to check for this condition is by checking to see if the - // idom is in the loop. If so, we *know* that none of the exit blocks - // dominate this block. Note that we *know* that the block defining the - // original instruction is in the idom chain, because if it weren't, then the - // original value didn't dominate this use. - if (!inLoop(IDom->getBlock())) { - // Idom is not in the loop, we must still be "below" the exit block and must - // be fully dominated by the value live in the idom. - Value* val = GetValueForBlock(IDom, OrigInst, Phis); - Phis.insert(std::make_pair(BB, val)); - return val; + if (isa(UserBB->begin()) && + isExitBlock(UserBB, ExitBlocks)) { + UsesToRewrite[i]->set(UserBB->begin()); + continue; + } + + // Otherwise, do full PHI insertion. + SSAUpdate.RewriteUse(*UsesToRewrite[i]); } - BasicBlock *BBN = BB->getBlock(); - - // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so - // now, then get values to fill in the incoming values for the PHI. - PHINode *PN = PHINode::Create(OrigInst->getType(), - OrigInst->getName() + ".lcssa", BBN->begin()); - PN->reserveOperandSpace(PredCache.GetNumPreds(BBN)); - Phis.insert(std::make_pair(BB, PN)); - - // Fill in the incoming values for the block. - for (BasicBlock** PI = PredCache.GetPreds(BBN); *PI; ++PI) - PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI); - return PN; + return true; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 8c08638c4c3d7..b62261119c75c 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -20,9 +20,11 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" @@ -183,8 +185,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { } else if (SI->getNumSuccessors() == 2) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. - Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(), - SI->getSuccessorValue(1), "cond", SI); + Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(), + SI->getSuccessorValue(1), "cond"); // Insert the new branch... BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI); @@ -262,7 +264,6 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { /// too, recursively. void llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { - // We can remove a PHI if it is on a cycle in the def-use graph // where each node in the cycle has degree one, i.e. only one use, // and is an instruction with no side effects. @@ -294,7 +295,7 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. /// -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) { // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast(DestBB->begin())) { Value *NewVal = PN->getIncomingValue(0); @@ -314,6 +315,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) { // Anything that branched to PredBB now branches to DestBB. PredBB->replaceAllUsesWith(DestBB); + if (P) { + ProfileInfo *PI = P->getAnalysisIfAvailable(); + if (PI) { + PI->replaceAllUses(PredBB, DestBB); + PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB)); + } + } // Nuke BB. PredBB->eraseFromParent(); } diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index d6b167f8b848b..c22708a92b7a6 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -37,10 +37,12 @@ #include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" @@ -55,44 +57,42 @@ STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); STATISTIC(NumNested , "Number of nested loops split out"); namespace { - struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass { + struct VISIBILITY_HIDDEN LoopSimplify : public LoopPass { static char ID; // Pass identification, replacement for typeid - LoopSimplify() : FunctionPass(&ID) {} + LoopSimplify() : LoopPass(&ID) {} // AA - If we have an alias analysis object to update, this is it, otherwise // this is null. AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; - virtual bool runOnFunction(Function &F); + Loop *L; + virtual bool runOnLoop(Loop *L, LPPassManager &LPM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { // We need loop information to identify the loops... - AU.addRequired(); - AU.addRequired(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } /// verifyAnalysis() - Verify loop nest. void verifyAnalysis() const { -#ifndef NDEBUG - LoopInfo *NLI = &getAnalysis(); - for (LoopInfo::iterator I = NLI->begin(), E = NLI->end(); I != E; ++I) - (*I)->verifyLoop(); -#endif + assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!"); } private: - bool ProcessLoop(Loop *L); + bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); - void InsertPreheaderForLoop(Loop *L); - Loop *SeparateNestedLoop(Loop *L); - void InsertUniqueBackedgeBlock(Loop *L); + BasicBlock *InsertPreheaderForLoop(Loop *L); + Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM); + void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); void PlaceSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl &SplitPreds, Loop *L); @@ -105,73 +105,19 @@ X("loopsimplify", "Canonicalize natural loops", true); // Publically exposed interface to pass... const PassInfo *const llvm::LoopSimplifyID = &X; -FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } +Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } /// runOnFunction - Run down all loops in the CFG (recursively, but we could do /// it in any convenient order) inserting preheaders... /// -bool LoopSimplify::runOnFunction(Function &F) { +bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { + L = l; bool Changed = false; LI = &getAnalysis(); AA = getAnalysisIfAvailable(); DT = &getAnalysis(); - // Check to see that no blocks (other than the header) in loops have - // predecessors that are not in loops. This is not valid for natural loops, - // but can occur if the blocks are unreachable. Since they are unreachable we - // can just shamelessly destroy their terminators to make them not branch into - // the loop! - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - // This case can only occur for unreachable blocks. Blocks that are - // unreachable can't be in loops, so filter those blocks out. - if (LI->getLoopFor(BB)) continue; - - bool BlockUnreachable = false; - TerminatorInst *TI = BB->getTerminator(); - - // Check to see if any successors of this block are non-loop-header loops - // that are not the header. - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { - // If this successor is not in a loop, BB is clearly ok. - Loop *L = LI->getLoopFor(TI->getSuccessor(i)); - if (!L) continue; - - // If the succ is the loop header, and if L is a top-level loop, then this - // is an entrance into a loop through the header, which is also ok. - if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0) - continue; - - // Otherwise, this is an entrance into a loop from some place invalid. - // Either the loop structure is invalid and this is not a natural loop (in - // which case the compiler is buggy somewhere else) or BB is unreachable. - BlockUnreachable = true; - break; - } - - // If this block is ok, check the next one. - if (!BlockUnreachable) continue; - - // Otherwise, this block is dead. To clean up the CFG and to allow later - // loop transformations to ignore this case, we delete the edges into the - // loop by replacing the terminator. - - // Remove PHI entries from the successors. - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - TI->getSuccessor(i)->removePredecessor(BB); - - // Add a new unreachable instruction before the old terminator. - new UnreachableInst(TI); - - // Delete the dead terminator. - if (AA) AA->deleteValue(TI); - if (!TI->use_empty()) - TI->replaceAllUsesWith(UndefValue::get(TI->getType())); - TI->eraseFromParent(); - Changed |= true; - } - - for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= ProcessLoop(*I); + Changed |= ProcessLoop(L, LPM); return Changed; } @@ -179,21 +125,42 @@ bool LoopSimplify::runOnFunction(Function &F) { /// ProcessLoop - Walk the loop structure in depth first order, ensuring that /// all loops have preheaders. /// -bool LoopSimplify::ProcessLoop(Loop *L) { +bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) { bool Changed = false; ReprocessLoop: - - // Canonicalize inner loops before outer loops. Inner loop canonicalization - // can provide work for the outer loop to canonicalize. - for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= ProcessLoop(*I); - - assert(L->getBlocks()[0] == L->getHeader() && - "Header isn't first block in loop?"); + + // Check to see that no blocks (other than the header) in this loop that has + // predecessors that are not in the loop. This is not valid for natural + // loops, but can occur if the blocks are unreachable. Since they are + // unreachable we can just shamelessly delete those CFG edges! + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) { + if (*BB == L->getHeader()) continue; + + SmallPtrSet BadPreds; + for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) + if (!L->contains(*PI)) + BadPreds.insert(*PI); + + // Delete each unique out-of-loop (and thus dead) predecessor. + for (SmallPtrSet::iterator I = BadPreds.begin(), + E = BadPreds.end(); I != E; ++I) { + // Inform each successor of each dead pred. + for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) + (*SI)->removePredecessor(*I); + // Zap the dead pred's terminator and replace it with unreachable. + TerminatorInst *TI = (*I)->getTerminator(); + TI->replaceAllUsesWith(UndefValue::get(TI->getType())); + (*I)->getTerminator()->eraseFromParent(); + new UnreachableInst((*I)->getContext(), *I); + Changed = true; + } + } // Does the loop already have a preheader? If so, don't insert one. - if (L->getLoopPreheader() == 0) { - InsertPreheaderForLoop(L); + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = InsertPreheaderForLoop(L); NumInserted++; Changed = true; } @@ -229,10 +196,9 @@ ReprocessLoop: // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (NumBackedges < 8) { - if (Loop *NL = SeparateNestedLoop(L)) { + if (SeparateNestedLoop(L, LPM)) { ++NumNested; // This is a big restructuring change, reprocess the whole loop. - ProcessLoop(NL); Changed = true; // GCC doesn't tail recursion eliminate this. goto ReprocessLoop; @@ -242,7 +208,7 @@ ReprocessLoop: // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. - InsertUniqueBackedgeBlock(L); + InsertUniqueBackedgeBlock(L, Preheader); NumInserted++; Changed = true; } @@ -253,7 +219,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast(I++)); ) - if (Value *V = PN->hasConstantValue()) { + if (Value *V = PN->hasConstantValue(DT)) { if (AA) AA->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -286,19 +252,10 @@ ReprocessLoop: Instruction *Inst = I++; if (Inst == CI) continue; - if (Inst->isTrapping()) { + if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) { AllInvariant = false; break; } - for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j) - if (!L->isLoopInvariant(Inst->getOperand(j))) { - AllInvariant = false; - break; - } - if (!AllInvariant) - break; - // Hoist. - Inst->moveBefore(L->getLoopPreheader()->getTerminator()); } if (!AllInvariant) continue; @@ -317,9 +274,10 @@ ReprocessLoop: DomTreeNode *Node = DT->getNode(ExitingBlock); const std::vector *> &Children = Node->getChildren(); - for (unsigned k = 0, g = Children.size(); k != g; ++k) { - DT->changeImmediateDominator(Children[k], Node->getIDom()); - if (DF) DF->changeImmediateDominator(Children[k]->getBlock(), + while (!Children.empty()) { + DomTreeNode *Child = Children.front(); + DT->changeImmediateDominator(Child, Node->getIDom()); + if (DF) DF->changeImmediateDominator(Child->getBlock(), Node->getIDom()->getBlock(), DT); } @@ -339,7 +297,7 @@ ReprocessLoop: /// preheader, this method is called to insert one. This method has two phases: /// preheader insertion and analysis updating. /// -void LoopSimplify::InsertPreheaderForLoop(Loop *L) { +BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { BasicBlock *Header = L->getHeader(); // Compute the set of predecessors of the loop that are not in the loop. @@ -353,19 +311,12 @@ void LoopSimplify::InsertPreheaderForLoop(Loop *L) { BasicBlock *NewBB = SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), ".preheader", this); - - - //===--------------------------------------------------------------------===// - // Update analysis results now that we have performed the transformation - // - - // We know that we have loop information to update... update it now. - if (Loop *Parent = L->getParentLoop()) - Parent->addBasicBlockToLoop(NewBB, LI->getBase()); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); + + return NewBB; } /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit @@ -382,17 +333,6 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { LoopBlocks.size(), ".loopexit", this); - // Update Loop Information - we know that the new block will be in whichever - // loop the Exit block is in. Note that it may not be in that immediate loop, - // if the successor is some other loop header. In that case, we continue - // walking up the loop tree to find a loop that contains both the successor - // block and the predecessor block. - Loop *SuccLoop = LI->getLoopFor(Exit); - while (SuccLoop && !SuccLoop->contains(L->getHeader())) - SuccLoop = SuccLoop->getParentLoop(); - if (SuccLoop) - SuccLoop->addBasicBlockToLoop(NewBB, LI->getBase()); - return NewBB; } @@ -422,14 +362,13 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) { PHINode *PN = cast(I); ++I; - if (Value *V = PN->hasConstantValue()) - if (!isa(V) || DT->dominates(cast(V), PN)) { - // This is a degenerate PHI already, don't modify it! - PN->replaceAllUsesWith(V); - if (AA) AA->deleteValue(PN); - PN->eraseFromParent(); - continue; - } + if (Value *V = PN->hasConstantValue(DT)) { + // This is a degenerate PHI already, don't modify it! + PN->replaceAllUsesWith(V); + if (AA) AA->deleteValue(PN); + PN->eraseFromParent(); + continue; + } // Scan this PHI node looking for a use of the PHI node by itself. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) @@ -496,7 +435,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, /// If we are able to separate out a loop, return the new outer loop that was /// created. /// -Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { +Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { PHINode *PN = FindPHIToPartitionLoops(L, DT, AA); if (PN == 0) return 0; // No known way to partition. @@ -527,17 +466,20 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { else LI->changeTopLevelLoop(L, NewOuter); - // This block is going to be our new header block: add it to this loop and all - // parent loops. - NewOuter->addBasicBlockToLoop(NewBB, LI->getBase()); - // L is now a subloop of our outer loop. NewOuter->addChildLoop(L); + // Add the new loop to the pass manager queue. + LPM.insertLoopIntoQueue(NewOuter); + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) NewOuter->addBlockEntry(*I); + // Now reset the header in L, which had been moved by + // SplitBlockPredecessors for the outer loop. + L->moveToHeader(Header); + // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. std::set BlocksInL; @@ -578,11 +520,10 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { /// backedges to target a new basic block and have that block branch to the loop /// header. This ensures that loops have exactly one backedge. /// -void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) { +void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop - BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); Function *F = Header->getParent(); @@ -592,7 +533,8 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) { if (*I != Preheader) BackedgeBlocks.push_back(*I); // Create and insert the new backedge block... - BasicBlock *BEBlock = BasicBlock::Create(Header->getName()+".backedge", F); + BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), + Header->getName()+".backedge", F); BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); // Move the new backedge block to right after the last backedge block. diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp index 74e7028d127cb..f26d7c146ee3d 100644 --- a/lib/Transforms/Utils/LowerAllocations.cpp +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -19,6 +19,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Constants.h" +#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/ADT/Statistic.h" #include "llvm/Target/TargetData.h" @@ -28,17 +29,17 @@ using namespace llvm; STATISTIC(NumLowered, "Number of allocations lowered"); namespace { - /// LowerAllocations - Turn malloc and free instructions into %malloc and - /// %free calls. + /// LowerAllocations - Turn malloc and free instructions into @malloc and + /// @free calls. /// class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass { - Constant *MallocFunc; // Functions in the module we are processing - Constant *FreeFunc; // Initialized by doInitialization + Constant *FreeFunc; // Functions in the module we are processing + // Initialized by doInitialization bool LowerMallocArgToInteger; public: static char ID; // Pass ID, replacement for typeid explicit LowerAllocations(bool LowerToInt = false) - : BasicBlockPass(&ID), MallocFunc(0), FreeFunc(0), + : BasicBlockPass(&ID), FreeFunc(0), LowerMallocArgToInteger(LowerToInt) {} virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -86,12 +87,9 @@ Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) { // This function is always successful. // bool LowerAllocations::doInitialization(Module &M) { - const Type *BPTy = PointerType::getUnqual(Type::Int8Ty); - // Prototype malloc as "char* malloc(...)", because we don't know in - // doInitialization whether size_t is int or long. - FunctionType *FT = FunctionType::get(BPTy, true); - MallocFunc = M.getOrInsertFunction("malloc", FT); - FreeFunc = M.getOrInsertFunction("free" , Type::VoidTy, BPTy, (Type *)0); + const Type *BPTy = Type::getInt8PtrTy(M.getContext()); + FreeFunc = M.getOrInsertFunction("free" , Type::getVoidTy(M.getContext()), + BPTy, (Type *)0); return true; } @@ -100,57 +98,22 @@ bool LowerAllocations::doInitialization(Module &M) { // bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { bool Changed = false; - assert(MallocFunc && FreeFunc && "Pass not initialized!"); + assert(FreeFunc && "Pass not initialized!"); BasicBlock::InstListType &BBIL = BB.getInstList(); const TargetData &TD = getAnalysis(); - const Type *IntPtrTy = TD.getIntPtrType(); + const Type *IntPtrTy = TD.getIntPtrType(BB.getContext()); // Loop over all of the instructions, looking for malloc or free instructions for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) { if (MallocInst *MI = dyn_cast(I)) { - const Type *AllocTy = MI->getType()->getElementType(); - - // malloc(type) becomes i8 *malloc(size) - Value *MallocArg; - if (LowerMallocArgToInteger) - MallocArg = ConstantInt::get(Type::Int64Ty, - TD.getTypeAllocSize(AllocTy)); - else - MallocArg = ConstantExpr::getSizeOf(AllocTy); - MallocArg = ConstantExpr::getTruncOrBitCast(cast(MallocArg), - IntPtrTy); - - if (MI->isArrayAllocation()) { - if (isa(MallocArg) && - cast(MallocArg)->isOne()) { - MallocArg = MI->getOperand(0); // Operand * 1 = Operand - } else if (Constant *CO = dyn_cast(MI->getOperand(0))) { - CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/); - MallocArg = ConstantExpr::getMul(CO, cast(MallocArg)); - } else { - Value *Scale = MI->getOperand(0); - if (Scale->getType() != IntPtrTy) - Scale = CastInst::CreateIntegerCast(Scale, IntPtrTy, false /*ZExt*/, - "", I); - - // Multiply it by the array size if necessary... - MallocArg = BinaryOperator::Create(Instruction::Mul, Scale, - MallocArg, "", I); - } - } - - // Create the call to Malloc. - CallInst *MCall = CallInst::Create(MallocFunc, MallocArg, "", I); - MCall->setTailCall(); - - // Create a cast instruction to convert to the right type... - Value *MCast; - if (MCall->getType() != Type::VoidTy) - MCast = new BitCastInst(MCall, MI->getType(), "", I); - else - MCast = Constant::getNullValue(MI->getType()); + Value *ArraySize = MI->getOperand(0); + if (ArraySize->getType() != IntPtrTy) + ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, + false /*ZExt*/, "", I); + Value *MCast = CallInst::CreateMalloc(I, IntPtrTy, + MI->getAllocatedType(), ArraySize); // Replace all uses of the old malloc inst with the cast inst MI->replaceAllUsesWith(MCast); @@ -160,7 +123,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) { } else if (FreeInst *FI = dyn_cast(I)) { Value *PtrCast = new BitCastInst(FI->getOperand(0), - PointerType::getUnqual(Type::Int8Ty), "", I); + Type::getInt8PtrTy(BB.getContext()), "", I); // Insert a call to the free function... CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall(); diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 1f6b1a2a68466..9a3de2649244e 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -40,6 +40,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -114,7 +115,8 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) { // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { - const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); + const Type *VoidPtrTy = + Type::getInt8PtrTy(M.getContext()); AbortMessage = 0; if (ExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. @@ -125,9 +127,9 @@ bool LowerInvoke::doInitialization(Module &M) { { // The type is recursive, so use a type holder. std::vector Elements; Elements.push_back(JmpBufTy); - OpaqueType *OT = OpaqueType::get(); + OpaqueType *OT = OpaqueType::get(M.getContext()); Elements.push_back(PointerType::getUnqual(OT)); - PATypeHolder JBLType(StructType::get(Elements)); + PATypeHolder JBLType(StructType::get(M.getContext(), Elements)); OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle. JBLinkTy = JBLType.get(); M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy); @@ -138,10 +140,10 @@ bool LowerInvoke::doInitialization(Module &M) { // Now that we've done that, insert the jmpbuf list head global, unless it // already exists. if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) { - JBListHead = new GlobalVariable(PtrJBList, false, + JBListHead = new GlobalVariable(M, PtrJBList, false, GlobalValue::LinkOnceAnyLinkage, Constant::getNullValue(PtrJBList), - "llvm.sjljeh.jblist", &M); + "llvm.sjljeh.jblist"); } // VisualStudio defines setjmp as _setjmp via #include / , @@ -163,7 +165,8 @@ bool LowerInvoke::doInitialization(Module &M) { } // We need the 'write' and 'abort' functions for both models. - AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0); + AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), + (Type *)0); #if 0 // "write" is Unix-specific.. code is going away soon anyway. WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty, VoidPtrTy, Type::Int32Ty, (Type *)0); @@ -178,26 +181,30 @@ void LowerInvoke::createAbortMessage(Module *M) { // The abort message for expensive EH support tells the user that the // program 'unwound' without an 'invoke' instruction. Constant *Msg = - ConstantArray::get("ERROR: Exception thrown, but not caught!\n"); + ConstantArray::get(M->getContext(), + "ERROR: Exception thrown, but not caught!\n"); AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, + GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, GlobalValue::InternalLinkage, - Msg, "abortmsg", M); - std::vector GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); + Msg, "abortmsg"); + std::vector GEPIdx(2, + Constant::getNullValue(Type::getInt32Ty(M->getContext()))); AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); } else { // The abort message for cheap EH support tells the user that EH is not // enabled. Constant *Msg = - ConstantArray::get("Exception handler needed, but not enabled. Recompile" - " program with -enable-correct-eh-support.\n"); + ConstantArray::get(M->getContext(), + "Exception handler needed, but not enabled." + "Recompile program with -enable-correct-eh-support.\n"); AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, + GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, GlobalValue::InternalLinkage, - Msg, "abortmsg", M); - std::vector GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); + Msg, "abortmsg"); + std::vector GEPIdx(2, Constant::getNullValue( + Type::getInt32Ty(M->getContext()))); AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); } } @@ -249,8 +256,9 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { // Insert a return instruction. This really should be a "barrier", as it // is unreachable. - ReturnInst::Create(F.getReturnType() == Type::VoidTy ? 0 : - Constant::getNullValue(F.getReturnType()), UI); + ReturnInst::Create(F.getContext(), + F.getReturnType() == Type::getVoidTy(F.getContext()) ? + 0 : Constant::getNullValue(F.getReturnType()), UI); // Remove the unwind instruction now. BB->getInstList().erase(UI); @@ -265,7 +273,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, AllocaInst *InvokeNum, SwitchInst *CatchSwitch) { - ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo); + ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()), + InvokeNo); // If the unwind edge has phi nodes, split the edge. if (isa(II->getUnwindDest()->begin())) { @@ -284,7 +293,8 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI(); // nonvolatile. - new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI); + new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), + InvokeNum, false, NI); // Add a switch case to our unwind block. CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); @@ -469,13 +479,15 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // alloca because the value needs to be live across invokes. unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0; AllocaInst *JmpBuf = - new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); + new AllocaInst(JBLinkTy, 0, Align, + "jblink", F.begin()->begin()); std::vector Idx; - Idx.push_back(Constant::getNullValue(Type::Int32Ty)); - Idx.push_back(ConstantInt::get(Type::Int32Ty, 1)); + Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); + Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1)); OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), - "OldBuf", EntryBB->getTerminator()); + "OldBuf", + EntryBB->getTerminator()); // Copy the JBListHead to the alloca. Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, @@ -487,20 +499,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create the catch block. The catch block is basically a big switch // statement that goes to all of the invoke catch blocks. - BasicBlock *CatchBB = BasicBlock::Create("setjmp.catch", &F); + BasicBlock *CatchBB = + BasicBlock::Create(F.getContext(), "setjmp.catch", &F); // Create an alloca which keeps track of which invoke is currently // executing. For normal calls it contains zero. - AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum", - EntryBB->begin()); - new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true, - EntryBB->getTerminator()); + AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0, + "invokenum",EntryBB->begin()); + new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + InvokeNum, true, EntryBB->getTerminator()); // Insert a load in the Catch block, and a switch on its value. By default, // we go to a block that just does an unwind (which is the correct action // for a standard call). - BasicBlock *UnwindBB = BasicBlock::Create("unwindbb", &F); - Unwinds.push_back(new UnwindInst(UnwindBB)); + BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F); + Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB)); Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB); SwitchInst *CatchSwitch = @@ -512,19 +525,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), "setjmp.cont"); - Idx[1] = ConstantInt::get(Type::Int32Ty, 0); + Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), "TheJmpBuf", EntryBB->getTerminator()); - JmpBufPtr = new BitCastInst(JmpBufPtr, PointerType::getUnqual(Type::Int8Ty), + JmpBufPtr = new BitCastInst(JmpBufPtr, + Type::getInt8PtrTy(F.getContext()), "tmp", EntryBB->getTerminator()); Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret", EntryBB->getTerminator()); // Compare the return value to zero. - Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet, + Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), + ICmpInst::ICMP_EQ, SJRet, Constant::getNullValue(SJRet->getType()), - "notunwind", EntryBB->getTerminator()); + "notunwind"); // Nuke the uncond branch. EntryBB->getTerminator()->eraseFromParent(); @@ -541,9 +556,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create three new blocks, the block to load the jmpbuf ptr and compare // against null, the block to do the longjmp, and the error block for if it // is null. Add them at the end of the function because they are not hot. - BasicBlock *UnwindHandler = BasicBlock::Create("dounwind", &F); - BasicBlock *UnwindBlock = BasicBlock::Create("unwind", &F); - BasicBlock *TermBlock = BasicBlock::Create("unwinderror", &F); + BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(), + "dounwind", &F); + BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F); + BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F); // If this function contains an invoke, restore the old jumpbuf ptr. Value *BufPtr; @@ -556,26 +572,27 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { } // Load the JBList, if it's null, then there was no catch! - Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr, + Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr, Constant::getNullValue(BufPtr->getType()), - "notnull", UnwindHandler); + "notnull"); BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler); // Create the block to do the longjmp. // Get a pointer to the jmpbuf and longjmp. std::vector Idx; - Idx.push_back(Constant::getNullValue(Type::Int32Ty)); - Idx.push_back(ConstantInt::get(Type::Int32Ty, 0)); + Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); + Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)); Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf", UnwindBlock); - Idx[0] = new BitCastInst(Idx[0], PointerType::getUnqual(Type::Int8Ty), + Idx[0] = new BitCastInst(Idx[0], + Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); - Idx[1] = ConstantInt::get(Type::Int32Ty, 1); + Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock); - new UnreachableInst(UnwindBlock); + new UnreachableInst(F.getContext(), UnwindBlock); // Set up the term block ("throw without a catch"). - new UnreachableInst(TermBlock); + new UnreachableInst(F.getContext(), TermBlock); // Insert a new call to write(2, AbortMessage, AbortMessageLength); writeAbortMessage(TermBlock->getTerminator()); diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 1da59360fc2be..764f0980cd2d0 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -18,6 +18,7 @@ #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" @@ -108,8 +109,10 @@ bool LowerSwitch::runOnFunction(Function &F) { // operator<< - Used for debugging purposes. // -static std::ostream& operator<<(std::ostream &O, - const LowerSwitch::CaseVector &C) { +static raw_ostream& operator<<(raw_ostream &O, + const LowerSwitch::CaseVector &C) ATTRIBUTE_USED; +static raw_ostream& operator<<(raw_ostream &O, + const LowerSwitch::CaseVector &C) { O << "["; for (LowerSwitch::CaseVector::const_iterator B = C.begin(), @@ -121,11 +124,6 @@ static std::ostream& operator<<(std::ostream &O, return O << "]"; } -static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) { - if (O.stream()) *O.stream() << C; - return O; -} - // switchConvert - Convert the switch statement into a binary lookup of // the case values. The function recursively builds this tree. // @@ -140,9 +138,9 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, unsigned Mid = Size / 2; std::vector LHS(Begin, Begin + Mid); - DOUT << "LHS: " << LHS << "\n"; + DEBUG(errs() << "LHS: " << LHS << "\n"); std::vector RHS(Begin + Mid, End); - DOUT << "RHS: " << RHS << "\n"; + DEBUG(errs() << "RHS: " << RHS << "\n"); CaseRange& Pivot = *(Begin + Mid); DEBUG(errs() << "Pivot ==> " @@ -157,11 +155,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, // Create a new node that checks if the value is < pivot. Go to the // left branch if it is and right branch if not. Function* F = OrigBlock->getParent(); - BasicBlock* NewNode = BasicBlock::Create("NodeBlock"); + BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock"); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewNode); - ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); + ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, + Val, Pivot.Low, "Pivot"); NewNode->getInstList().push_back(Comp); BranchInst::Create(LBranch, RBranch, Comp, NewNode); return NewNode; @@ -178,7 +177,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, BasicBlock* Default) { Function* F = OrigBlock->getParent(); - BasicBlock* NewLeaf = BasicBlock::Create("LeafBlock"); + BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); Function::iterator FI = OrigBlock; F->getBasicBlockList().insert(++FI, NewLeaf); @@ -186,18 +185,18 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, ICmpInst* Comp = NULL; if (Leaf.Low == Leaf.High) { // Make the seteq instruction... - Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low, - "SwitchLeaf", NewLeaf); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, + Leaf.Low, "SwitchLeaf"); } else { // Make range comparison if (cast(Leaf.Low)->isMinValue(true /*isSigned*/)) { // Val >= Min && Val <= Hi --> Val <= Hi - Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High, - "SwitchLeaf", NewLeaf); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, + "SwitchLeaf"); } else if (cast(Leaf.Low)->isZero()) { // Val >= 0 && Val <= Hi --> Val <=u Hi - Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High, - "SwitchLeaf", NewLeaf); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, + "SwitchLeaf"); } else { // Emit V-Lo <=u Hi-Lo Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); @@ -205,8 +204,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, Val->getName()+".off", NewLeaf); Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); - Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound, - "SwitchLeaf", NewLeaf); + Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, + "SwitchLeaf"); } } @@ -290,7 +289,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. - BasicBlock* NewDefault = BasicBlock::Create("NewDefault"); + BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); F->getBasicBlockList().insert(Default, NewDefault); BranchInst::Create(Default, NewDefault); @@ -308,9 +307,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { CaseVector Cases; unsigned numCmps = Clusterify(Cases, SI); - DOUT << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << "\n"; - DOUT << "Cases: " << Cases << "\n"; + DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << "\n"); + DEBUG(errs() << "Cases: " << Cases << "\n"); + (void)numCmps; BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, OrigBlock, NewDefault); diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 2b06d778e1452..5df08326d8bb1 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -75,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF); + PromoteMemToReg(Allocas, DT, DF, F.getContext()); NumPromoted += Allocas.size(); Changed = true; } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index b717699b7e055..9ca06bd180a16 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -23,13 +23,13 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Compiler.h" @@ -41,7 +41,6 @@ STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -// Provide DenseMapInfo for all pointers. namespace llvm { template<> struct DenseMapInfo > { @@ -181,6 +180,8 @@ namespace { /// AST - An AliasSetTracker object to update. If null, don't update it. /// AliasSetTracker *AST; + + LLVMContext &Context; /// AllocaLookup - Reverse mapping of Allocas. /// @@ -212,8 +213,9 @@ namespace { DenseMap BBNumPreds; public: PromoteMem2Reg(const std::vector &A, DominatorTree &dt, - DominanceFrontier &df, AliasSetTracker *ast) - : Allocas(A), DT(dt), DF(df), AST(ast) {} + DominanceFrontier &df, AliasSetTracker *ast, + LLVMContext &C) + : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {} void run(); @@ -291,10 +293,9 @@ namespace { // As we scan the uses of the alloca instruction, keep track of stores, // and decide whether all of the loads and stores to the alloca are within // the same basic block. - for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); - U != E;) { - Instruction *User = cast(*U); - ++U; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E;) { + Instruction *User = cast(*UI++); if (BitCastInst *BC = dyn_cast(User)) { // Remove any uses of this alloca in DbgInfoInstrinsics. assert(BC->hasOneUse() && "Unexpected alloca uses!"); @@ -303,7 +304,8 @@ namespace { BC->eraseFromParent(); continue; } - else if (StoreInst *SI = dyn_cast(User)) { + + if (StoreInst *SI = dyn_cast(User)) { // Remember the basic blocks which define new values for the alloca DefiningBlocks.push_back(SI->getParent()); AllocaPointerVal = SI->getOperand(0); @@ -491,17 +493,14 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = PN->hasConstantValue(true)) { - if (!isa(V) || - properlyDominates(cast(V), PN)) { - if (AST && isa(PN->getType())) - AST->deleteValue(PN); - PN->replaceAllUsesWith(V); - PN->eraseFromParent(); - NewPhiNodes.erase(I++); - EliminatedAPHI = true; - continue; - } + if (Value *V = PN->hasConstantValue(&DT)) { + if (AST && isa(PN->getType())) + AST->deleteValue(PN); + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + NewPhiNodes.erase(I++); + EliminatedAPHI = true; + continue; } ++I; } @@ -603,7 +602,9 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, LiveInBlockWorklist.pop_back(); --i, --e; break; - } else if (LoadInst *LI = dyn_cast(I)) { + } + + if (LoadInst *LI = dyn_cast(I)) { if (LI->getOperand(0) != AI) continue; // Okay, we found a load before a store to the alloca. It is actually @@ -757,6 +758,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, } } +namespace { /// StoreIndexSearchPredicate - This is a helper predicate used to search by the /// first element of a pair. @@ -767,6 +769,8 @@ struct StoreIndexSearchPredicate { } }; +} + /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic /// block. If this is the case, avoid traversing the CFG and inserting a lot of /// potentially useless PHI nodes by just performing a single linear pass over @@ -864,8 +868,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), - Allocas[AllocaNo]->getName() + "." + - utostr(Version++), BB->begin()); + Allocas[AllocaNo]->getName() + "." + Twine(Version++), + BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; PN->reserveOperandSpace(getNumPreds(BB)); @@ -995,9 +999,9 @@ NextIteration: /// void llvm::PromoteMemToReg(const std::vector &Allocas, DominatorTree &DT, DominanceFrontier &DF, - AliasSetTracker *AST) { + LLVMContext &Context, AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, DF, AST).run(); + PromoteMem2Reg(Allocas, DT, DF, AST, Context).run(); } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp new file mode 100644 index 0000000000000..780ee26389429 --- /dev/null +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -0,0 +1,335 @@ +//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdater class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +typedef DenseMap > AvailableValsTy; +typedef std::vector > > + IncomingPredInfoTy; + +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast(AV); +} + +static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { + return *static_cast(IPI); +} + + +SSAUpdater::SSAUpdater(SmallVectorImpl *NewPHI) + : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {} + +SSAUpdater::~SSAUpdater() { + delete &getAvailableVals(AV); + delete &getIncomingPredInfo(IPI); +} + +/// Initialize - Reset this object to get ready for a new set of SSA +/// updates. ProtoValue is the value used to name PHI nodes. +void SSAUpdater::Initialize(Value *ProtoValue) { + if (AV == 0) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + + if (IPI == 0) + IPI = new IncomingPredInfoTy(); + else + getIncomingPredInfo(IPI).clear(); + PrototypeValue = ProtoValue; +} + +/// HasValueForBlock - Return true if the SSAUpdater already has a value for +/// the specified block. +bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +/// AddAvailableValue - Indicate that a rewritten value is available in the +/// specified block with the specified value. +void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { + assert(PrototypeValue != 0 && "Need to initialize SSAUpdater"); + assert(PrototypeValue->getType() == V->getType() && + "All rewritten values must have the same type"); + getAvailableVals(AV)[BB] = V; +} + +/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is +/// live at the end of the specified block. +Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { + assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); + Value *Res = GetValueAtEndOfBlockInternal(BB); + assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); + return Res; +} + +/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that +/// is live in the middle of the specified block. +/// +/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one +/// important case: if there is a definition of the rewritten value after the +/// 'use' in BB. Consider code like this: +/// +/// X1 = ... +/// SomeBB: +/// use(X) +/// X2 = ... +/// br Cond, SomeBB, OutBB +/// +/// In this case, there are two values (X1 and X2) added to the AvailableVals +/// set by the client of the rewriter, and those values are both live out of +/// their respective blocks. However, the use of X happens in the *middle* of +/// a block. Because of this, we need to insert a new PHI node in SomeBB to +/// merge the appropriate values, and this value isn't live out of the block. +/// +Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!getAvailableVals(AV).count(BB)) + return GetValueAtEndOfBlock(BB); + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector, 8> PredValues; + Value *SingularValue = 0; + + // We can get our predecessor info by walking the pred_iterator list, but it + // is relatively slow. If we already have PHI nodes in this block, walk one + // of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast(BB->begin())) { + for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = SomePhi->getIncomingBlock(i); + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (i == 0) + SingularValue = PredVal; + else if (PredVal != SingularValue) + SingularValue = 0; + } + } else { + bool isFirstPred = true; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBB = *PI; + Value *PredVal = GetValueAtEndOfBlock(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + } + + // If there are no predecessors, just return undef. + if (PredValues.empty()) + return UndefValue::get(PrototypeValue->getType()); + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue != 0) + return SingularValue; + + // Otherwise, we do need a PHI: insert one now. + PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), + PrototypeValue->getName(), + &BB->front()); + InsertedPHI->reserveOperandSpace(PredValues.size()); + + // Fill in all the predecessors of the PHI. + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (Value *ConstVal = InsertedPHI->hasConstantValue()) { + InsertedPHI->eraseFromParent(); + return ConstVal; + } + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI; +} + +/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, +/// which use their value in the corresponding predecessor. +void SSAUpdater::RewriteUse(Use &U) { + Instruction *User = cast(U.getUser()); + BasicBlock *UseBB = User->getParent(); + if (PHINode *UserPN = dyn_cast(User)) + UseBB = UserPN->getIncomingBlock(U); + + U.set(GetValueInMiddleOfBlock(UseBB)); +} + + +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// walking predecessors inserting PHI nodes as needed until we get to a block +/// where the value is available. +/// +Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { + AvailableValsTy &AvailableVals = getAvailableVals(AV); + + // Query AvailableVals by doing an insertion of null. + std::pair InsertRes = + AvailableVals.insert(std::make_pair(BB, WeakVH())); + + // Handle the case when the insertion fails because we have already seen BB. + if (!InsertRes.second) { + // If the insertion failed, there are two cases. The first case is that the + // value is already available for the specified block. If we get this, just + // return the value. + if (InsertRes.first->second != 0) + return InsertRes.first->second; + + // Otherwise, if the value we find is null, then this is the value is not + // known but it is being computed elsewhere in our recursion. This means + // that we have a cycle. Handle this by inserting a PHI node and returning + // it. When we get back to the first instance of the recursion we will fill + // in the PHI node. + return InsertRes.first->second = + PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), + &BB->front()); + } + + // Okay, the value isn't in the map and we just inserted a null in the entry + // to indicate that we're processing the block. Since we have no idea what + // value is in this block, we have to recurse through our predecessors. + // + // While we're walking our predecessors, we keep track of them in a vector, + // then insert a PHI node in the end if we actually need one. We could use a + // smallvector here, but that would take a lot of stack space for every level + // of the recursion, just use IncomingPredInfo as an explicit stack. + IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); + unsigned FirstPredInfoEntry = IncomingPredInfo.size(); + + // As we're walking the predecessors, keep track of whether they are all + // producing the same value. If so, this value will capture it, if not, it + // will get reset to null. We distinguish the no-predecessor case explicitly + // below. + TrackingVH SingularValue; + + // We can get our predecessor info by walking the pred_iterator list, but it + // is relatively slow. If we already have PHI nodes in this block, walk one + // of them to get the predecessor list instead. + if (PHINode *SomePhi = dyn_cast(BB->begin())) { + for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { + BasicBlock *PredBB = SomePhi->getIncomingBlock(i); + Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); + IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (i == 0) + SingularValue = PredVal; + else if (PredVal != SingularValue) + SingularValue = 0; + } + } else { + bool isFirstPred = true; + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *PredBB = *PI; + Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); + IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + } + + // If there are no predecessors, then we must have found an unreachable block + // just return 'undef'. Since there are no predecessors, InsertRes must not + // be invalidated. + if (IncomingPredInfo.size() == FirstPredInfoEntry) + return InsertRes.first->second = UndefValue::get(PrototypeValue->getType()); + + /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If + /// this block is involved in a loop, a no-entry PHI node will have been + /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted + /// above. + TrackingVH &InsertedVal = AvailableVals[BB]; + + // If all the predecessor values are the same then we don't need to insert a + // PHI. This is the simple and common case. + if (SingularValue) { + // If a PHI node got inserted, replace it with the singlar value and delete + // it. + if (InsertedVal) { + PHINode *OldVal = cast(InsertedVal); + // Be careful about dead loops. These RAUW's also update InsertedVal. + if (InsertedVal != SingularValue) + OldVal->replaceAllUsesWith(SingularValue); + else + OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType())); + OldVal->eraseFromParent(); + } else { + InsertedVal = SingularValue; + } + + // Drop the entries we added in IncomingPredInfo to restore the stack. + IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + return InsertedVal; + } + + // Otherwise, we do need a PHI: insert one now if we don't already have one. + if (InsertedVal == 0) + InsertedVal = PHINode::Create(PrototypeValue->getType(), + PrototypeValue->getName(), &BB->front()); + + PHINode *InsertedPHI = cast(InsertedVal); + InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry); + + // Fill in all the predecessors of the PHI. + for (IncomingPredInfoTy::iterator I = + IncomingPredInfo.begin()+FirstPredInfoEntry, + E = IncomingPredInfo.end(); I != E; ++I) + InsertedPHI->addIncoming(I->second, I->first); + + // Drop the entries we added in IncomingPredInfo to restore the stack. + IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, + IncomingPredInfo.end()); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (Value *ConstVal = InsertedPHI->hasConstantValue()) { + InsertedPHI->replaceAllUsesWith(ConstVal); + InsertedPHI->eraseFromParent(); + InsertedVal = ConstVal; + } else { + DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n"); + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + } + + return InsertedVal; +} + + diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp index 4c4dd37ddf754..3bb2e8ee69114 100644 --- a/lib/Transforms/Utils/SSI.cpp +++ b/lib/Transforms/Utils/SSI.cpp @@ -23,6 +23,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SSI.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" using namespace llvm; @@ -30,11 +31,12 @@ using namespace llvm; static const std::string SSI_PHI = "SSI_phi"; static const std::string SSI_SIG = "SSI_sigma"; -static const unsigned UNSIGNED_INFINITE = ~0U; +STATISTIC(NumSigmaInserted, "Number of sigma functions inserted"); +STATISTIC(NumPhiInserted, "Number of phi functions inserted"); void SSI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); AU.setPreservesAll(); } @@ -45,22 +47,23 @@ bool SSI::runOnFunction(Function &F) { /// This methods creates the SSI representation for the list of values /// received. It will only create SSI representation if a value is used -/// in a to decide a branch. Repeated values are created only once. +/// to decide a branch. Repeated values are created only once. /// void SSI::createSSI(SmallVectorImpl &value) { init(value); - for (unsigned i = 0; i < num_values; ++i) { - if (created.insert(value[i])) { - needConstruction[i] = true; - } - } - insertSigmaFunctions(value); + SmallPtrSet needConstruction; + for (SmallVectorImpl::iterator I = value.begin(), + E = value.end(); I != E; ++I) + if (created.insert(*I)) + needConstruction.insert(*I); + + insertSigmaFunctions(needConstruction); // Test if there is a need to transform to SSI - if (needConstruction.any()) { - insertPhiFunctions(value); - renameInit(value); + if (!needConstruction.empty()) { + insertPhiFunctions(needConstruction); + renameInit(needConstruction); rename(DT_->getRoot()); fixPhis(); } @@ -71,100 +74,107 @@ void SSI::createSSI(SmallVectorImpl &value) { /// Insert sigma functions (a sigma function is a phi function with one /// operator) /// -void SSI::insertSigmaFunctions(SmallVectorImpl &value) { - for (unsigned i = 0; i < num_values; ++i) { - if (!needConstruction[i]) - continue; - - bool need = false; - for (Value::use_iterator begin = value[i]->use_begin(), end = - value[i]->use_end(); begin != end; ++begin) { +void SSI::insertSigmaFunctions(SmallPtrSet &value) { + for (SmallPtrSet::iterator I = value.begin(), + E = value.end(); I != E; ++I) { + for (Value::use_iterator begin = (*I)->use_begin(), + end = (*I)->use_end(); begin != end; ++begin) { // Test if the Use of the Value is in a comparator - CmpInst *CI = dyn_cast(begin); - if (CI && isUsedInTerminator(CI)) { - // Basic Block of the Instruction - BasicBlock *BB = CI->getParent(); - // Last Instruction of the Basic Block - const TerminatorInst *TI = BB->getTerminator(); - - for (unsigned j = 0, e = TI->getNumSuccessors(); j < e; ++j) { - // Next Basic Block - BasicBlock *BB_next = TI->getSuccessor(j); - if (BB_next != BB && - BB_next->getUniquePredecessor() != NULL && - dominateAny(BB_next, value[i])) { - PHINode *PN = PHINode::Create( - value[i]->getType(), SSI_SIG, BB_next->begin()); - PN->addIncoming(value[i], BB); - sigmas.insert(std::make_pair(PN, i)); - created.insert(PN); - need = true; - defsites[i].push_back(BB_next); + if (CmpInst *CI = dyn_cast(begin)) { + // Iterates through all uses of CmpInst + for (Value::use_iterator begin_ci = CI->use_begin(), + end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) { + // Test if any use of CmpInst is in a Terminator + if (TerminatorInst *TI = dyn_cast(begin_ci)) { + insertSigma(TI, *I); } } } } - needConstruction[i] = need; + } +} + +/// Inserts Sigma Functions in every BasicBlock successor to Terminator +/// Instruction TI. All inserted Sigma Function are related to Instruction I. +/// +void SSI::insertSigma(TerminatorInst *TI, Instruction *I) { + // Basic Block of the Terminator Instruction + BasicBlock *BB = TI->getParent(); + for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { + // Next Basic Block + BasicBlock *BB_next = TI->getSuccessor(i); + if (BB_next != BB && + BB_next->getSinglePredecessor() != NULL && + dominateAny(BB_next, I)) { + PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin()); + PN->addIncoming(I, BB); + sigmas[PN] = I; + created.insert(PN); + defsites[I].push_back(BB_next); + ++NumSigmaInserted; + } } } /// Insert phi functions when necessary /// -void SSI::insertPhiFunctions(SmallVectorImpl &value) { +void SSI::insertPhiFunctions(SmallPtrSet &value) { DominanceFrontier *DF = &getAnalysis(); - for (unsigned i = 0; i < num_values; ++i) { + for (SmallPtrSet::iterator I = value.begin(), + E = value.end(); I != E; ++I) { // Test if there were any sigmas for this variable - if (needConstruction[i]) { - - SmallPtrSet BB_visited; - - // Insert phi functions if there is any sigma function - while (!defsites[i].empty()) { - - BasicBlock *BB = defsites[i].back(); - - defsites[i].pop_back(); - DominanceFrontier::iterator DF_BB = DF->find(BB); - - // Iterates through all the dominance frontier of BB - for (std::set::iterator DF_BB_begin = - DF_BB->second.begin(), DF_BB_end = DF_BB->second.end(); - DF_BB_begin != DF_BB_end; ++DF_BB_begin) { - BasicBlock *BB_dominated = *DF_BB_begin; - - // Test if has not yet visited this node and if the - // original definition dominates this node - if (BB_visited.insert(BB_dominated) && - DT_->properlyDominates(value_original[i], BB_dominated) && - dominateAny(BB_dominated, value[i])) { - PHINode *PN = PHINode::Create( - value[i]->getType(), SSI_PHI, BB_dominated->begin()); - phis.insert(std::make_pair(PN, i)); - created.insert(PN); - - defsites[i].push_back(BB_dominated); - } + SmallPtrSet BB_visited; + + // Insert phi functions if there is any sigma function + while (!defsites[*I].empty()) { + + BasicBlock *BB = defsites[*I].back(); + + defsites[*I].pop_back(); + DominanceFrontier::iterator DF_BB = DF->find(BB); + + // The BB is unreachable. Skip it. + if (DF_BB == DF->end()) + continue; + + // Iterates through all the dominance frontier of BB + for (std::set::iterator DF_BB_begin = + DF_BB->second.begin(), DF_BB_end = DF_BB->second.end(); + DF_BB_begin != DF_BB_end; ++DF_BB_begin) { + BasicBlock *BB_dominated = *DF_BB_begin; + + // Test if has not yet visited this node and if the + // original definition dominates this node + if (BB_visited.insert(BB_dominated) && + DT_->properlyDominates(value_original[*I], BB_dominated) && + dominateAny(BB_dominated, *I)) { + PHINode *PN = PHINode::Create( + (*I)->getType(), SSI_PHI, BB_dominated->begin()); + phis.insert(std::make_pair(PN, *I)); + created.insert(PN); + + defsites[*I].push_back(BB_dominated); + ++NumPhiInserted; } } - BB_visited.clear(); } + BB_visited.clear(); } } /// Some initialization for the rename part /// -void SSI::renameInit(SmallVectorImpl &value) { - value_stack.resize(num_values); - for (unsigned i = 0; i < num_values; ++i) { - value_stack[i].push_back(value[i]); - } +void SSI::renameInit(SmallPtrSet &value) { + for (SmallPtrSet::iterator I = value.begin(), + E = value.end(); I != E; ++I) + value_stack[*I].push_back(*I); } /// Renames all variables in the specified BasicBlock. /// Only variables that need to be rename will be. /// void SSI::rename(BasicBlock *BB) { - BitVector *defined = new BitVector(num_values, false); + SmallPtrSet defined; // Iterate through instructions and make appropriate renaming. // For SSI_PHI (b = PHI()), store b at value_stack as a new @@ -178,19 +188,17 @@ void SSI::rename(BasicBlock *BB) { begin != end; ++begin) { Instruction *I = begin; if (PHINode *PN = dyn_cast(I)) { // Treat PHI functions - int position; + Instruction* position; // Treat SSI_PHI - if ((position = getPositionPhi(PN)) != -1) { + if ((position = getPositionPhi(PN))) { value_stack[position].push_back(PN); - (*defined)[position] = true; - } - + defined.insert(position); // Treat SSI_SIG - else if ((position = getPositionSigma(PN)) != -1) { + } else if ((position = getPositionSigma(PN))) { substituteUse(I); value_stack[position].push_back(PN); - (*defined)[position] = true; + defined.insert(position); } // Treat all other PHI functions @@ -216,10 +224,9 @@ void SSI::rename(BasicBlock *BB) { for (BasicBlock::iterator begin = BB_succ->begin(), notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) { Instruction *I = begin; - PHINode *PN; - int position; - if ((PN = dyn_cast(I)) && ((position - = getPositionPhi(PN)) != -1)) { + PHINode *PN = dyn_cast(I); + Instruction* position; + if (PN && ((position = getPositionPhi(PN)))) { PN->addIncoming(value_stack[position].back(), BB); } } @@ -237,13 +244,9 @@ void SSI::rename(BasicBlock *BB) { // Now we remove all inserted definitions of a variable from the top of // the stack leaving the previous one as the top. - if (defined->any()) { - for (unsigned i = 0; i < num_values; ++i) { - if ((*defined)[i]) { - value_stack[i].pop_back(); - } - } - } + for (SmallPtrSet::iterator DI = defined.begin(), + DE = defined.end(); DI != DE; ++DI) + value_stack[*DI].pop_back(); } /// Substitute any use in this instruction for the last definition of @@ -252,23 +255,24 @@ void SSI::rename(BasicBlock *BB) { void SSI::substituteUse(Instruction *I) { for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { Value *operand = I->getOperand(i); - for (unsigned j = 0; j < num_values; ++j) { - if (operand == value_stack[j].front() && - I != value_stack[j].back()) { + for (DenseMap >::iterator + VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) { + if (operand == VI->second.front() && + I != VI->second.back()) { PHINode *PN_I = dyn_cast(I); - PHINode *PN_vs = dyn_cast(value_stack[j].back()); + PHINode *PN_vs = dyn_cast(VI->second.back()); // If a phi created in a BasicBlock is used as an operand of another // created in the same BasicBlock, this step marks this second phi, // to fix this issue later. It cannot be fixed now, because the // operands of the first phi are not final yet. if (PN_I && PN_vs && - value_stack[j].back()->getParent() == I->getParent()) { + VI->second.back()->getParent() == I->getParent()) { phisToFix.insert(PN_I); } - I->setOperand(i, value_stack[j].back()); + I->setOperand(i, VI->second.back()); break; } } @@ -276,12 +280,16 @@ void SSI::substituteUse(Instruction *I) { } /// Test if the BasicBlock BB dominates any use or definition of value. +/// If it dominates a phi instruction that is on the same BasicBlock, +/// that does not count. /// bool SSI::dominateAny(BasicBlock *BB, Instruction *value) { for (Value::use_iterator begin = value->use_begin(), end = value->use_end(); begin != end; ++begin) { Instruction *I = cast(*begin); BasicBlock *BB_father = I->getParent(); + if (BB == BB_father && isa(I)) + continue; if (DT_->dominates(BB, BB_father)) { return true; } @@ -293,31 +301,54 @@ bool SSI::dominateAny(BasicBlock *BB, Instruction *value) { /// as an operand of another phi function used in the same BasicBlock, /// LLVM looks this as an error. So on the second phi, the first phi is called /// P and the BasicBlock it incomes is B. This P will be replaced by the value -/// it has for BasicBlock B. +/// it has for BasicBlock B. It also includes undef values for predecessors +/// that were not included in the phi. /// void SSI::fixPhis() { for (SmallPtrSet::iterator begin = phisToFix.begin(), end = phisToFix.end(); begin != end; ++begin) { PHINode *PN = *begin; for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { - PHINode *PN_father; - if ((PN_father = dyn_cast(PN->getIncomingValue(i))) && - PN->getParent() == PN_father->getParent()) { + PHINode *PN_father = dyn_cast(PN->getIncomingValue(i)); + if (PN_father && PN->getParent() == PN_father->getParent() && + !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) { BasicBlock *BB = PN->getIncomingBlock(i); int pos = PN_father->getBasicBlockIndex(BB); PN->setIncomingValue(i, PN_father->getIncomingValue(pos)); } } } + + for (DenseMapIterator begin = phis.begin(), + end = phis.end(); begin != end; ++begin) { + PHINode *PN = begin->first; + BasicBlock *BB = PN->getParent(); + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + SmallVector Preds(PI, PE); + for (unsigned size = Preds.size(); + PI != PE && PN->getNumIncomingValues() != size; ++PI) { + bool found = false; + for (unsigned i = 0, pn_end = PN->getNumIncomingValues(); + i < pn_end; ++i) { + if (PN->getIncomingBlock(i) == *PI) { + found = true; + break; + } + } + if (!found) { + PN->addIncoming(UndefValue::get(PN->getType()), *PI); + } + } + } } /// Return which variable (position on the vector of variables) this phi /// represents on the phis list. /// -unsigned SSI::getPositionPhi(PHINode *PN) { - DenseMap::iterator val = phis.find(PN); +Instruction* SSI::getPositionPhi(PHINode *PN) { + DenseMap::iterator val = phis.find(PN); if (val == phis.end()) - return UNSIGNED_INFINITE; + return 0; else return val->second; } @@ -325,52 +356,27 @@ unsigned SSI::getPositionPhi(PHINode *PN) { /// Return which variable (position on the vector of variables) this phi /// represents on the sigmas list. /// -unsigned SSI::getPositionSigma(PHINode *PN) { - DenseMap::iterator val = sigmas.find(PN); +Instruction* SSI::getPositionSigma(PHINode *PN) { + DenseMap::iterator val = sigmas.find(PN); if (val == sigmas.end()) - return UNSIGNED_INFINITE; + return 0; else return val->second; } -/// Return true if the the Comparison Instruction is an operator -/// of the Terminator instruction of its Basic Block. -/// -unsigned SSI::isUsedInTerminator(CmpInst *CI) { - TerminatorInst *TI = CI->getParent()->getTerminator(); - if (TI->getNumOperands() == 0) { - return false; - } else if (CI == TI->getOperand(0)) { - return true; - } else { - return false; - } -} - /// Initializes /// void SSI::init(SmallVectorImpl &value) { - num_values = value.size(); - needConstruction.resize(num_values, false); - - value_original.resize(num_values); - defsites.resize(num_values); - - for (unsigned i = 0; i < num_values; ++i) { - value_original[i] = value[i]->getParent(); - defsites[i].push_back(value_original[i]); + for (SmallVectorImpl::iterator I = value.begin(), + E = value.end(); I != E; ++I) { + value_original[*I] = (*I)->getParent(); + defsites[*I].push_back((*I)->getParent()); } } /// Clean all used resources in this creation of SSI /// void SSI::clean() { - for (unsigned i = 0; i < num_values; ++i) { - defsites[i].clear(); - if (i < value_stack.size()) - value_stack[i].clear(); - } - phis.clear(); sigmas.clear(); phisToFix.clear(); @@ -378,7 +384,6 @@ void SSI::clean() { defsites.clear(); value_stack.clear(); value_original.clear(); - needConstruction.clear(); } /// createSSIPass - The public interface to this file... @@ -388,3 +393,40 @@ FunctionPass *llvm::createSSIPass() { return new SSI(); } char SSI::ID = 0; static RegisterPass X("ssi", "Static Single Information Construction"); +/// SSIEverything - A pass that runs createSSI on every non-void variable, +/// intended for debugging. +namespace { + struct VISIBILITY_HIDDEN SSIEverything : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + SSIEverything() : FunctionPass(&ID) {} + + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + } + }; +} + +bool SSIEverything::runOnFunction(Function &F) { + SmallVector Insts; + SSI &ssi = getAnalysis(); + + if (F.isDeclaration() || F.isIntrinsic()) return false; + + for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) + for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) + if (I->getType() != Type::getVoidTy(F.getContext())) + Insts.push_back(I); + + ssi.createSSI(Insts); + return true; +} + +/// createSSIEverythingPass - The public interface to this file... +/// +FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); } + +char SSIEverything::ID = 0; +static RegisterPass +Y("ssi-everything", "Static Single Information Construction"); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 58d4d5a344c1c..6fd7d7bf9aeae 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -21,6 +21,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/SmallVector.h" @@ -84,19 +85,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); - DOUT << "Looking to fold " << BB->getNameStart() << " into " - << Succ->getNameStart() << "\n"; + DEBUG(errs() << "Looking to fold " << BB->getName() << " into " + << Succ->getName() << "\n"); // Shortcut, if there is only a single predecessor it must be BB and merging // is always safe if (Succ->getSinglePredecessor()) return true; - typedef SmallPtrSet InstrSet; - InstrSet BBPHIs; - - // Make a list of all phi nodes in BB - BasicBlock::iterator BBI = BB->begin(); - while (isa(*BBI)) BBPHIs.insert(BBI++); - // Make a list of the predecessors of BB typedef SmallPtrSet BlockSet; BlockSet BBPreds(pred_begin(BB), pred_end(BB)); @@ -126,16 +120,13 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { PI != PE; PI++) { if (BBPN->getIncomingValueForBlock(*PI) != PN->getIncomingValueForBlock(*PI)) { - DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " - << Succ->getNameStart() << " is conflicting with " - << BBPN->getNameStart() << " with regard to common predecessor " - << (*PI)->getNameStart() << "\n"; + DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with " + << BBPN->getName() << " with regard to common predecessor " + << (*PI)->getName() << "\n"); return false; } } - // Remove this phinode from the list of phis in BB, since it has been - // handled. - BBPHIs.erase(BBPN); } else { Value* Val = PN->getIncomingValueForBlock(BB); for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); @@ -144,33 +135,15 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // one for BB, in which case this phi node will not prevent the merging // of the block. if (Val != PN->getIncomingValueForBlock(*PI)) { - DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " - << Succ->getNameStart() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getNameStart() << "\n"; + DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in " + << Succ->getName() << " is conflicting with regard to common " + << "predecessor " << (*PI)->getName() << "\n"); return false; } } } } - // If there are any other phi nodes in BB that don't have a phi node in Succ - // to merge with, they must be moved to Succ completely. However, for any - // predecessors of Succ, branches will be added to the phi node that just - // point to itself. So, for any common predecessors, this must not cause - // conflicts. - for (InstrSet::iterator I = BBPHIs.begin(), E = BBPHIs.end(); - I != E; I++) { - PHINode *PN = cast(*I); - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) - if (PN->getIncomingValueForBlock(*PI) != PN) { - DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in " - << BB->getNameStart() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getNameStart() << "\n"; - return false; - } - } - return true; } @@ -182,8 +155,36 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, // Check to see if merging these blocks would cause conflicts for any of the // phi nodes in BB or Succ. If not, we can safely merge. if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; - - DOUT << "Killing Trivial BB: \n" << *BB; + + // Check for cases where Succ has multiple predecessors and a PHI node in BB + // has uses which will not disappear when the PHI nodes are merged. It is + // possible to handle such cases, but difficult: it requires checking whether + // BB dominates Succ, which is non-trivial to calculate in the case where + // Succ has multiple predecessors. Also, it requires checking whether + // constructing the necessary self-referential PHI node doesn't intoduce any + // conflicts; this isn't too difficult, but the previous code for doing this + // was incorrect. + // + // Note that if this check finds a live use, BB dominates Succ, so BB is + // something like a loop pre-header (or rarely, a part of an irreducible CFG); + // folding the branch isn't profitable in that case anyway. + if (!Succ->getSinglePredecessor()) { + BasicBlock::iterator BBI = BB->begin(); + while (isa(*BBI)) { + for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); + UI != E; ++UI) { + if (PHINode* PN = dyn_cast(*UI)) { + if (PN->getIncomingBlock(UI) != BB) + return false; + } else { + return false; + } + } + ++BBI; + } + } + + DEBUG(errs() << "Killing Trivial BB: \n" << *BB); if (isa(Succ->begin())) { // If there is more than one pred of succ, and there are PHI nodes in @@ -217,38 +218,16 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, } } - if (isa(&BB->front())) { - SmallVector - OldSuccPreds(pred_begin(Succ), pred_end(Succ)); - - // Move all PHI nodes in BB to Succ if they are alive, otherwise - // delete them. - while (PHINode *PN = dyn_cast(&BB->front())) { - if (PN->use_empty()) { - // Just remove the dead phi. This happens if Succ's PHIs were the only - // users of the PHI nodes. - PN->eraseFromParent(); - continue; - } - - // The instruction is alive, so this means that BB must dominate all - // predecessors of Succ (Since all uses of the PN are after its - // definition, so in Succ or a block dominated by Succ. If a predecessor - // of Succ would not be dominated by BB, PN would violate the def before - // use SSA demand). Therefore, we can simply move the phi node to the - // next block. + while (PHINode *PN = dyn_cast(&BB->front())) { + if (Succ->getSinglePredecessor()) { + // BB is the only predecessor of Succ, so Succ will end up with exactly + // the same predecessors BB had. Succ->getInstList().splice(Succ->begin(), BB->getInstList(), BB->begin()); - - // We need to add new entries for the PHI node to account for - // predecessors of Succ that the PHI node does not take into - // account. At this point, since we know that BB dominated succ and all - // of its predecessors, this means that we should any newly added - // incoming edges should use the PHI node itself as the value for these - // edges, because they are loop back edges. - for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i) - if (OldSuccPreds[i] != BB) - PN->addIncoming(PN, OldSuccPreds[i]); + } else { + // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. + assert(PN->use_empty() && "There shouldn't be any uses here!"); + PN->eraseFromParent(); } } @@ -383,26 +362,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if its a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. + if (!I->isSafeToSpeculativelyExecute()) + return false; + switch (I->getOpcode()) { default: return false; // Cannot hoist this out safely. case Instruction::Load: { - // We can hoist loads that are non-volatile and obviously cannot trap. - if (cast(I)->isVolatile()) - return false; - // FIXME: A computation of a constant can trap! - if (!isa(I->getOperand(0)) && - !isa(I->getOperand(0))) - return false; - // External weak globals may have address 0, so we can't load them. - Value *V2 = I->getOperand(0)->getUnderlyingObject(); - if (V2) { - GlobalVariable* GV = dyn_cast(V2); - if (GV && GV->hasExternalWeakLinkage()) - return false; - } - // Finally, we have to check to make sure there are no instructions - // before the load in its basic block, as we are going to hoist the loop - // out to its predecessor. + // We have to check to make sure there are no instructions before the + // load in its basic block, as we are going to hoist the loop out to + // its predecessor. BasicBlock::iterator IP = PBB->begin(); while (isa(IP)) IP++; @@ -645,12 +613,13 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisCases.size() == 1 && "Branch can only have one case!"); // Insert the new branch. Instruction *NI = BranchInst::Create(ThisDef, TI); + (void) NI; // Remove PHI node entries for the dead edge. ThisCases[0].second->removePredecessor(TI->getParent()); - DOUT << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; + DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorInstAndDCECond(TI); return true; @@ -662,8 +631,8 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) DeadCases.insert(PredCases[i].first); - DOUT << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI; + DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI); for (unsigned i = SI->getNumCases()-1; i != 0; --i) if (DeadCases.count(SI->getCaseValue(i))) { @@ -671,7 +640,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, SI->removeCase(i); } - DOUT << "Leaving: " << *TI << "\n"; + DEBUG(errs() << "Leaving: " << *TI << "\n"); return true; } } @@ -712,9 +681,10 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Insert the new branch. Instruction *NI = BranchInst::Create(TheRealDest, TI); + (void) NI; - DOUT << "Threading pred instr: " << *Pred->getTerminator() - << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; + DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() + << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorInstAndDCECond(TI); return true; @@ -847,7 +817,8 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) { if (InfLoopBlock == 0) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) - InfLoopBlock = BasicBlock::Create("infloop", BB->getParent()); + InfLoopBlock = BasicBlock::Create(BB->getContext(), + "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); } NewSI->setSuccessor(i, InfLoopBlock); @@ -900,7 +871,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { while (isa(I2)) I2 = BB2_Itr++; if (I1->getOpcode() != I2->getOpcode() || isa(I1) || - !I1->isIdenticalTo(I2) || + !I1->isIdenticalToWhenDefined(I2) || (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; @@ -919,6 +890,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { BIParent->getInstList().splice(BI, BB1->getInstList(), I1); if (!I2->use_empty()) I2->replaceAllUsesWith(I1); + I1->intersectOptionalDataWith(I2); BB2->getInstList().erase(I2); I1 = BB1_Itr++; @@ -927,7 +899,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I2 = BB2_Itr++; while (isa(I2)) I2 = BB2_Itr++; - } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2)); + } while (I1->getOpcode() == I2->getOpcode() && + I1->isIdenticalToWhenDefined(I2)); return true; @@ -939,7 +912,7 @@ HoistTerminator: // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); BIParent->getInstList().insert(BI, NT); - if (NT->getType() != Type::VoidTy) { + if (NT->getType() != Type::getVoidTy(BB1->getContext())) { I1->replaceAllUsesWith(NT); I2->replaceAllUsesWith(NT); NT->takeName(I1); @@ -1197,7 +1170,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB; if ((CB = dyn_cast(PN->getIncomingValue(i))) && - CB->getType() == Type::Int1Ty) { + CB->getType() == Type::getInt1Ty(BB->getContext())) { // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); @@ -1209,7 +1182,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting // the edge we are about to create. - BasicBlock *EdgeBB = BasicBlock::Create(RealDest->getName()+".critedge", + BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), + RealDest->getName()+".critedge", RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); PHINode *PN; @@ -1242,7 +1216,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) { } // Check for trivial simplification. - if (Constant *C = ConstantFoldInstruction(N)) { + if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) { TranslateMap[BBI] = C; delete N; // Constant folded away, don't need actual inst } else { @@ -1296,8 +1270,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN) { if (NumPhis > 2) return false; - DOUT << "FOUND IF CONDITION! " << *IfCond << " T: " - << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"; + DEBUG(errs() << "FOUND IF CONDITION! " << *IfCond << " T: " + << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions @@ -1427,7 +1401,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { if (FalseRet->getNumOperands() == 0) { TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - ReturnInst::Create(0, BI); + ReturnInst::Create(BI->getContext(), 0, BI); EraseTerminatorInstAndDCECond(BI); return true; } @@ -1476,12 +1450,13 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { } Value *RI = !TrueValue ? - ReturnInst::Create(BI) : - ReturnInst::Create(TrueValue, BI); + ReturnInst::Create(BI->getContext(), BI) : + ReturnInst::Create(BI->getContext(), TrueValue, BI); + (void) RI; - DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" - << "\n " << *BI << "NewRet = " << *RI - << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc; + DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" + << "\n " << *BI << "NewRet = " << *RI + << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); EraseTerminatorInstAndDCECond(BI); @@ -1561,7 +1536,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { else continue; - DOUT << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB; + DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { @@ -1605,7 +1580,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); - + // If this block ends with a branch instruction, and if there is a // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. @@ -1616,7 +1591,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue)); + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + CondIsTrue)); return true; // Nuke the branch on constant. } @@ -1624,7 +1600,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { - PHINode *NewPN = PHINode::Create(Type::Int1Ty, + PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), BI->getCondition()->getName() + ".pr", BB->begin()); // Okay, we're going to insert the PHI node. Since PBI is not the only @@ -1636,7 +1612,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::Int1Ty, + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), *PI); } else { NewPN->addIncoming(BI->getCondition(), *PI); @@ -1694,8 +1670,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - DOUT << "FOLDING BRs:" << *PBI->getParent() - << "AND: " << *BI->getParent(); + DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent() + << "AND: " << *BI->getParent()); // If OtherDest *is* BB, then BB is a basic block with a single conditional @@ -1708,12 +1684,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (OtherDest == BB) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) - BasicBlock *InfLoopBlock = BasicBlock::Create("infloop", BB->getParent()); + BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), + "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; } - DOUT << *PBI->getParent()->getParent(); + DEBUG(errs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. @@ -1763,9 +1740,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } } - DOUT << "INTO: " << *PBI->getParent(); - - DOUT << *PBI->getParent()->getParent(); + DEBUG(errs() << "INTO: " << *PBI->getParent()); + DEBUG(errs() << *PBI->getParent()->getParent()); // This basic block is probably dead. We know it has at least // one fewer predecessor. @@ -1792,7 +1768,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { // Remove basic blocks that have no predecessors... or that just have themself // as a predecessor. These are unreachable. if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { - DOUT << "Removing BB: \n" << *BB; + DEBUG(errs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB); return true; } @@ -1832,8 +1808,8 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { if (!UncondBranchPreds.empty()) { while (!UncondBranchPreds.empty()) { BasicBlock *Pred = UncondBranchPreds.pop_back_val(); - DOUT << "FOLDING: " << *BB - << "INTO UNCOND BRANCH PRED: " << *Pred; + DEBUG(errs() << "FOLDING: " << *BB + << "INTO UNCOND BRANCH PRED: " << *Pred); Instruction *UncondBranch = Pred->getTerminator(); // Clone the return and add it to the end of the predecessor. Instruction *NewRet = RI->clone(); @@ -1884,33 +1860,26 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { } else if (isa(BB->begin())) { // Check to see if the first instruction in this block is just an unwind. // If so, replace any invoke instructions which use this as an exception - // destination with call instructions, and any unconditional branch - // predecessor with an unwind. + // destination with call instructions. // SmallVector Preds(pred_begin(BB), pred_end(BB)); while (!Preds.empty()) { BasicBlock *Pred = Preds.back(); - if (BranchInst *BI = dyn_cast(Pred->getTerminator())) { - if (BI->isUnconditional()) { - Pred->getInstList().pop_back(); // nuke uncond branch - new UnwindInst(Pred); // Use unwind. - Changed = true; - } - } else if (InvokeInst *II = dyn_cast(Pred->getTerminator())) + if (InvokeInst *II = dyn_cast(Pred->getTerminator())) if (II->getUnwindDest() == BB) { // Insert a new branch instruction before the invoke, because this - // is now a fall through... + // is now a fall through. BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); Pred->getInstList().remove(II); // Take out of symbol table - // Insert the call now... + // Insert the call now. SmallVector Args(II->op_begin()+3, II->op_end()); CallInst *CI = CallInst::Create(II->getCalledValue(), Args.begin(), Args.end(), II->getName(), BI); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); - // If the invoke produced a value, the Call now does instead + // If the invoke produced a value, the Call now does instead. II->replaceAllUsesWith(CI); delete II; Changed = true; @@ -2042,7 +2011,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) { if (BranchInst *BI = dyn_cast(TI)) { if (BI->isUnconditional()) { if (BI->getSuccessor(0) == BB) { - new UnreachableInst(TI); + new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 848f2b87c4eed..30cb94d903857 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -66,8 +66,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } else if (UnwindingBlocks.size() == 1) { UnwindBlock = UnwindingBlocks.front(); } else { - UnwindBlock = BasicBlock::Create("UnifiedUnwindBlock", &F); - new UnwindInst(UnwindBlock); + UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F); + new UnwindInst(F.getContext(), UnwindBlock); for (std::vector::iterator I = UnwindingBlocks.begin(), E = UnwindingBlocks.end(); I != E; ++I) { @@ -83,8 +83,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { - UnreachableBlock = BasicBlock::Create("UnifiedUnreachableBlock", &F); - new UnreachableInst(UnreachableBlock); + UnreachableBlock = BasicBlock::Create(F.getContext(), + "UnifiedUnreachableBlock", &F); + new UnreachableInst(F.getContext(), UnreachableBlock); for (std::vector::iterator I = UnreachableBlocks.begin(), E = UnreachableBlocks.end(); I != E; ++I) { @@ -107,16 +108,17 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // nodes (if the function returns values), and convert all of the return // instructions into unconditional branches. // - BasicBlock *NewRetBlock = BasicBlock::Create("UnifiedReturnBlock", &F); + BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), + "UnifiedReturnBlock", &F); PHINode *PN = 0; - if (F.getReturnType() == Type::VoidTy) { - ReturnInst::Create(NULL, NewRetBlock); + if (F.getReturnType() == Type::getVoidTy(F.getContext())) { + ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); - ReturnInst::Create(PN, NewRetBlock); + ReturnInst::Create(F.getContext(), PN, NewRetBlock); } // Loop over all of the blocks, replacing the return instruction with an diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/UnrollLoop.cpp index caef7ec5c45f9..4d838b50e3451 100644 --- a/lib/Transforms/Utils/UnrollLoop.cpp +++ b/lib/Transforms/Utils/UnrollLoop.cpp @@ -25,6 +25,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" @@ -62,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) { if (OnlyPred->getTerminator()->getNumSuccessors() != 1) return 0; - DOUT << "Merging: " << *BB << "into: " << *OnlyPred; + DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred); // Resolve any PHI nodes at the start of the block. They are all // guaranteed to have exactly one entry if they exist, unless there are @@ -113,7 +114,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) if (!BI || BI->isUnconditional()) { // The loop-rotate pass can be helpful to avoid this in many cases. - DOUT << " Can't unroll; loop not terminated by a conditional branch.\n"; + DEBUG(errs() << + " Can't unroll; loop not terminated by a conditional branch.\n"); return false; } @@ -125,9 +127,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) TripMultiple = L->getSmallConstantTripMultiple(); if (TripCount != 0) - DOUT << " Trip Count = " << TripCount << "\n"; + DEBUG(errs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) - DOUT << " Trip Multiple = " << TripMultiple << "\n"; + DEBUG(errs() << " Trip Multiple = " << TripMultiple << "\n"); // Effectively "DCE" unrolled iterations that are beyond the tripcount // and will never be executed. @@ -153,17 +155,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) } if (CompletelyUnroll) { - DOUT << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << TripCount << "!\n"; + DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() + << " with trip count " << TripCount << "!\n"); } else { - DOUT << "UNROLLING loop %" << Header->getName() - << " by " << Count; + DEBUG(errs() << "UNROLLING loop %" << Header->getName() + << " by " << Count); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { - DOUT << " with a breakout at trip " << BreakoutTrip; + DEBUG(errs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { - DOUT << " with " << TripMultiple << " trips per branch"; + DEBUG(errs() << " with " << TripMultiple << " trips per branch"); } - DOUT << "!\n"; + DEBUG(errs() << "!\n"); } std::vector LoopBlocks = L->getBlocks(); @@ -349,7 +351,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) if (isInstructionTriviallyDead(Inst)) (*BB)->getInstList().erase(Inst); - else if (Constant *C = ConstantFoldInstruction(Inst)) { + else if (Constant *C = ConstantFoldInstruction(Inst, + Header->getContext())) { Inst->replaceAllUsesWith(C); (*BB)->getInstList().erase(Inst); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 20b676d0fb8de..2d8332f5252ab 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,23 +13,27 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" // For getNullValue(Type::Int32Ty) #include "llvm/Constants.h" #include "llvm/GlobalValue.h" #include "llvm/Instruction.h" -#include "llvm/MDNode.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { +Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) { Value *&VMSlot = VM[V]; if (VMSlot) return VMSlot; // Does it exist in the map yet? // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the // DenseMap. This includes any recursive calls to MapValue. - // Global values do not need to be seeded into the ValueMap if they are using - // the identity mapping. - if (isa(V) || isa(V)) + // Global values and metadata do not need to be seeded into the ValueMap if + // they are using the identity mapping. + if (isa(V) || isa(V) || isa(V)) return VMSlot = const_cast(V); if (Constant *C = const_cast(dyn_cast(V))) { @@ -40,7 +44,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { else if (ConstantArray *CA = dyn_cast(C)) { for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, Context); if (MV != *i) { // This array must contain a reference to a global, make a new array // and return it. @@ -51,7 +55,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { Values.push_back(cast(*j)); Values.push_back(cast(MV)); for (++i; i != e; ++i) - Values.push_back(cast(MapValue(*i, VM))); + Values.push_back(cast(MapValue(*i, VM, Context))); return VM[V] = ConstantArray::get(CA->getType(), Values); } } @@ -60,7 +64,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { } else if (ConstantStruct *CS = dyn_cast(C)) { for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, Context); if (MV != *i) { // This struct must contain a reference to a global, make a new struct // and return it. @@ -71,7 +75,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { Values.push_back(cast(*j)); Values.push_back(cast(MV)); for (++i; i != e; ++i) - Values.push_back(cast(MapValue(*i, VM))); + Values.push_back(cast(MapValue(*i, VM, Context))); return VM[V] = ConstantStruct::get(CS->getType(), Values); } } @@ -80,12 +84,12 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { } else if (ConstantExpr *CE = dyn_cast(C)) { std::vector Ops; for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) - Ops.push_back(cast(MapValue(*i, VM))); + Ops.push_back(cast(MapValue(*i, VM, Context))); return VM[V] = CE->getWithOperands(Ops); } else if (ConstantVector *CP = dyn_cast(C)) { for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end(); i != e; ++i) { - Value *MV = MapValue(*i, VM); + Value *MV = MapValue(*i, VM, Context); if (MV != *i) { // This vector value must contain a reference to a global, make a new // vector constant and return it. @@ -96,38 +100,16 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { Values.push_back(cast(*j)); Values.push_back(cast(MV)); for (++i; i != e; ++i) - Values.push_back(cast(MapValue(*i, VM))); + Values.push_back(cast(MapValue(*i, VM, Context))); return VM[V] = ConstantVector::get(Values); } } return VM[V] = C; - } else if (MDNode *N = dyn_cast(C)) { - for (MDNode::const_elem_iterator b = N->elem_begin(), i = b, - e = N->elem_end(); i != e; ++i) { - if (!*i) continue; - - Value *MV = MapValue(*i, VM); - if (MV != *i) { - // This MDNode must contain a reference to a global, make a new MDNode - // and return it. - SmallVector Values; - Values.reserve(N->getNumElements()); - for (MDNode::const_elem_iterator j = b; j != i; ++j) - Values.push_back(*j); - Values.push_back(MV); - for (++i; i != e; ++i) - Values.push_back(MapValue(*i, VM)); - return VM[V] = MDNode::get(Values.data(), Values.size()); - } - } - return VM[V] = C; - } else { - assert(0 && "Unknown type of constant!"); + llvm_unreachable("Unknown type of constant!"); } } - return 0; } @@ -136,7 +118,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { /// void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) { for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, ValueMap); + Value *V = MapValue(*op, ValueMap, I->getParent()->getContext()); assert(V && "Referenced value not in value map!"); *op = V; } diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index cbf7070d17eda..b5ae81b50f979 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -23,7 +23,8 @@ #include "llvm/InlineAsm.h" #include "llvm/Instruction.h" #include "llvm/Instructions.h" -#include "llvm/MDNode.h" +#include "llvm/Operator.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/ValueSymbolTable.h" #include "llvm/TypeSymbolTable.h" @@ -31,8 +32,10 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/FormattedStream.h" #include #include #include @@ -48,15 +51,15 @@ AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} static const Module *getModuleFromVal(const Value *V) { if (const Argument *MA = dyn_cast(V)) return MA->getParent() ? MA->getParent()->getParent() : 0; - + if (const BasicBlock *BB = dyn_cast(V)) return BB->getParent() ? BB->getParent()->getParent() : 0; - + if (const Instruction *I = dyn_cast(V)) { const Function *M = I->getParent() ? I->getParent()->getParent() : 0; return M ? M->getParent() : 0; } - + if (const GlobalValue *GV = dyn_cast(V)) return GV->getParent(); return 0; @@ -64,10 +67,10 @@ static const Module *getModuleFromVal(const Value *V) { // PrintEscapedString - Print each character of the specified string, escaping // it if it is not printable or if it is an escape char. -static void PrintEscapedString(const char *Str, unsigned Length, +static void PrintEscapedString(const StringRef &Name, raw_ostream &Out) { - for (unsigned i = 0; i != Length; ++i) { - unsigned char C = Str[i]; + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + unsigned char C = Name[i]; if (isprint(C) && C != '\\' && C != '"') Out << C; else @@ -75,12 +78,6 @@ static void PrintEscapedString(const char *Str, unsigned Length, } } -// PrintEscapedString - Print each character of the specified string, escaping -// it if it is not printable or if it is an escape char. -static void PrintEscapedString(const std::string &Str, raw_ostream &Out) { - PrintEscapedString(Str.c_str(), Str.size(), Out); -} - enum PrefixType { GlobalPrefix, LabelPrefix, @@ -91,39 +88,39 @@ enum PrefixType { /// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either /// prefixed with % (if the string only contains simple characters) or is /// surrounded with ""'s (if it has special chars in it). Print it out. -static void PrintLLVMName(raw_ostream &OS, const char *NameStr, - unsigned NameLen, PrefixType Prefix) { - assert(NameStr && "Cannot get empty name!"); +static void PrintLLVMName(raw_ostream &OS, const StringRef &Name, + PrefixType Prefix) { + assert(Name.data() && "Cannot get empty name!"); switch (Prefix) { - default: assert(0 && "Bad prefix!"); + default: llvm_unreachable("Bad prefix!"); case NoPrefix: break; case GlobalPrefix: OS << '@'; break; case LabelPrefix: break; case LocalPrefix: OS << '%'; break; } - + // Scan the name to see if it needs quotes first. - bool NeedsQuotes = isdigit(NameStr[0]); + bool NeedsQuotes = isdigit(Name[0]); if (!NeedsQuotes) { - for (unsigned i = 0; i != NameLen; ++i) { - char C = NameStr[i]; + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + char C = Name[i]; if (!isalnum(C) && C != '-' && C != '.' && C != '_') { NeedsQuotes = true; break; } } } - + // If we didn't need any quotes, just write out the name in one blast. if (!NeedsQuotes) { - OS.write(NameStr, NameLen); + OS << Name; return; } - + // Okay, we need quotes. Output the quotes and escape any scary characters as // needed. OS << '"'; - PrintEscapedString(NameStr, NameLen, OS); + PrintEscapedString(Name, OS); OS << '"'; } @@ -131,7 +128,7 @@ static void PrintLLVMName(raw_ostream &OS, const char *NameStr, /// prefixed with % (if the string only contains simple characters) or is /// surrounded with ""'s (if it has special chars in it). Print it out. static void PrintLLVMName(raw_ostream &OS, const Value *V) { - PrintLLVMName(OS, V->getNameStart(), V->getNameLen(), + PrintLLVMName(OS, V->getName(), isa(V) ? GlobalPrefix : LocalPrefix); } @@ -178,11 +175,11 @@ void TypePrinting::CalcTypeName(const Type *Ty, return; } } - + // Check to see if the Type is already on the stack... unsigned Slot = 0, CurSize = TypeStack.size(); while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type - + // This is another base case for the recursion. In this case, we know // that we have looped back to a type that we have previously visited. // Generate the appropriate upreference to handle this. @@ -190,9 +187,9 @@ void TypePrinting::CalcTypeName(const Type *Ty, OS << '\\' << unsigned(CurSize-Slot); // Here's the upreference return; } - + TypeStack.push_back(Ty); // Recursive case: Add us to the stack.. - + switch (Ty->getTypeID()) { case Type::VoidTyID: OS << "void"; break; case Type::FloatTyID: OS << "float"; break; @@ -205,7 +202,7 @@ void TypePrinting::CalcTypeName(const Type *Ty, case Type::IntegerTyID: OS << 'i' << cast(Ty)->getBitWidth(); break; - + case Type::FunctionTyID: { const FunctionType *FTy = cast(Ty); CalcTypeName(FTy->getReturnType(), TypeStack, OS); @@ -269,7 +266,7 @@ void TypePrinting::CalcTypeName(const Type *Ty, OS << ""; break; } - + TypeStack.pop_back(); // Remove self from stack. } @@ -287,13 +284,13 @@ void TypePrinting::print(const Type *Ty, raw_ostream &OS, return; } } - + // Otherwise we have a type that has not been named but is a derived type. // Carefully recurse the type hierarchy to print out any contained symbolic // names. SmallVector TypeStack; std::string TypeName; - + raw_string_ostream TypeOS(TypeName); CalcTypeName(Ty, TypeStack, TypeOS, IgnoreTopLevelName); OS << TypeOS.str(); @@ -309,13 +306,13 @@ namespace { // objects, we keep several helper maps. DenseSet VisitedConstants; DenseSet VisitedTypes; - + TypePrinting &TP; std::vector &NumberedTypes; public: TypeFinder(TypePrinting &tp, std::vector &numberedTypes) : TP(tp), NumberedTypes(numberedTypes) {} - + void Run(const Module &M) { // Get types from the type symbol table. This gets opaque types referened // only through derived named types. @@ -323,7 +320,7 @@ namespace { for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end(); TI != E; ++TI) IncorporateType(TI->second); - + // Get types from global variables. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { @@ -331,18 +328,18 @@ namespace { if (I->hasInitializer()) IncorporateValue(I->getInitializer()); } - + // Get types from aliases. for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { IncorporateType(I->getType()); IncorporateValue(I->getAliasee()); } - + // Get types from functions. for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { IncorporateType(FI->getType()); - + for (Function::const_iterator BB = FI->begin(), E = FI->end(); BB != E;++BB) for (BasicBlock::const_iterator II = BB->begin(), @@ -356,40 +353,40 @@ namespace { } } } - + private: void IncorporateType(const Type *Ty) { // Check to see if we're already visited this type. if (!VisitedTypes.insert(Ty).second) return; - + // If this is a structure or opaque type, add a name for the type. if (((isa(Ty) && cast(Ty)->getNumElements()) || isa(Ty)) && !TP.hasTypeName(Ty)) { TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size()))); NumberedTypes.push_back(Ty); } - + // Recursively walk all contained types. for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I) - IncorporateType(*I); + IncorporateType(*I); } - + /// IncorporateValue - This method is used to walk operand lists finding /// types hiding in constant expressions and other operands that won't be /// walked in other ways. GlobalValues, basic blocks, instructions, and /// inst operands are all explicitly enumerated. void IncorporateValue(const Value *V) { if (V == 0 || !isa(V) || isa(V)) return; - + // Already visited? if (!VisitedConstants.insert(V).second) return; - + // Check this type. IncorporateType(V->getType()); - + // Look in operands for types. const Constant *C = cast(V); for (Constant::const_op_iterator I = C->op_begin(), @@ -403,18 +400,18 @@ namespace { /// AddModuleTypesToPrinter - Add all of the symbolic type names for types in /// the specified module to the TypePrinter and all numbered types to it and the /// NumberedTypes table. -static void AddModuleTypesToPrinter(TypePrinting &TP, +static void AddModuleTypesToPrinter(TypePrinting &TP, std::vector &NumberedTypes, const Module *M) { if (M == 0) return; - + // If the module has a symbol table, take all global types and stuff their // names into the TypeNames map. const TypeSymbolTable &ST = M->getTypeSymbolTable(); for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end(); TI != E; ++TI) { const Type *Ty = cast(TI->second); - + // As a heuristic, don't insert pointer to primitive types, because // they are used too often to have a single useful name. if (const PointerType *PTy = dyn_cast(Ty)) { @@ -423,18 +420,20 @@ static void AddModuleTypesToPrinter(TypePrinting &TP, !isa(PETy)) continue; } - + // Likewise don't insert primitives either. if (Ty->isInteger() || Ty->isPrimitiveType()) continue; - + // Get the name as a string and insert it into TypeNames. std::string NameStr; - raw_string_ostream NameOS(NameStr); - PrintLLVMName(NameOS, TI->first.c_str(), TI->first.length(), LocalPrefix); - TP.addTypeName(Ty, NameOS.str()); + raw_string_ostream NameROS(NameStr); + formatted_raw_ostream NameOS(NameROS); + PrintLLVMName(NameOS, TI->first, LocalPrefix); + NameOS.flush(); + TP.addTypeName(Ty, NameStr); } - + // Walk the entire module to find references to unnamed structure and opaque // types. This is required for correctness by opaque types (because multiple // uses of an unnamed opaque type needs to be referred to by the same ID) and @@ -464,35 +463,49 @@ namespace { /// class SlotTracker { public: - /// ValueMap - A mapping of Values to slot numbers + /// ValueMap - A mapping of Values to slot numbers. typedef DenseMap ValueMap; - -private: - /// TheModule - The module for which we are holding slot numbers + +private: + /// TheModule - The module for which we are holding slot numbers. const Module* TheModule; - - /// TheFunction - The function for which we are holding slot numbers + + /// TheFunction - The function for which we are holding slot numbers. const Function* TheFunction; bool FunctionProcessed; - - /// mMap - The TypePlanes map for the module level data + + /// TheMDNode - The MDNode for which we are holding slot numbers. + const MDNode *TheMDNode; + + /// TheNamedMDNode - The MDNode for which we are holding slot numbers. + const NamedMDNode *TheNamedMDNode; + + /// mMap - The TypePlanes map for the module level data. ValueMap mMap; unsigned mNext; - - /// fMap - The TypePlanes map for the function level data + + /// fMap - The TypePlanes map for the function level data. ValueMap fMap; unsigned fNext; - + + /// mdnMap - Map for MDNodes. + ValueMap mdnMap; + unsigned mdnNext; public: /// Construct from a module explicit SlotTracker(const Module *M); /// Construct from a function, starting out in incorp state. explicit SlotTracker(const Function *F); + /// Construct from a mdnode. + explicit SlotTracker(const MDNode *N); + /// Construct from a named mdnode. + explicit SlotTracker(const NamedMDNode *N); /// Return the slot number of the specified value in it's type /// plane. If something is not in the SlotTracker, return -1. int getLocalSlot(const Value *V); int getGlobalSlot(const GlobalValue *V); + int getMetadataSlot(const MDNode *N); /// If you'd like to deal with a function instead of just a module, use /// this method to get its data into the SlotTracker. @@ -506,14 +519,23 @@ public: /// will reset the state of the machine back to just the module contents. void purgeFunction(); - // Implementation Details -private: + /// MDNode map iterators. + ValueMap::iterator mdnBegin() { return mdnMap.begin(); } + ValueMap::iterator mdnEnd() { return mdnMap.end(); } + unsigned mdnSize() const { return mdnMap.size(); } + bool mdnEmpty() const { return mdnMap.empty(); } + /// This function does the actual initialization. inline void initialize(); + // Implementation Details +private: /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table. void CreateModuleSlot(const GlobalValue *V); - + + /// CreateMetadataSlot - Insert the specified MDNode* into the slot table. + void CreateMetadataSlot(const MDNode *N); + /// CreateFunctionSlot - Insert the specified Value* into the slot table. void CreateFunctionSlot(const Value *V); @@ -521,9 +543,15 @@ private: /// and function declarations, but not the contents of those functions. void processModule(); - /// Add all of the functions arguments, basic blocks, and instructions + /// Add all of the functions arguments, basic blocks, and instructions. void processFunction(); + /// Add all MDNode operands. + void processMDNode(); + + /// Add all MDNode operands. + void processNamedMDNode(); + SlotTracker(const SlotTracker &); // DO NOT IMPLEMENT void operator=(const SlotTracker &); // DO NOT IMPLEMENT }; @@ -534,27 +562,27 @@ private: static SlotTracker *createSlotTracker(const Value *V) { if (const Argument *FA = dyn_cast(V)) return new SlotTracker(FA->getParent()); - + if (const Instruction *I = dyn_cast(V)) return new SlotTracker(I->getParent()->getParent()); - + if (const BasicBlock *BB = dyn_cast(V)) return new SlotTracker(BB->getParent()); - + if (const GlobalVariable *GV = dyn_cast(V)) return new SlotTracker(GV->getParent()); - + if (const GlobalAlias *GA = dyn_cast(V)) - return new SlotTracker(GA->getParent()); - + return new SlotTracker(GA->getParent()); + if (const Function *Func = dyn_cast(V)) return new SlotTracker(Func); - + return 0; } #if 0 -#define ST_DEBUG(X) cerr << X +#define ST_DEBUG(X) errs() << X #else #define ST_DEBUG(X) #endif @@ -562,14 +590,27 @@ static SlotTracker *createSlotTracker(const Value *V) { // Module level constructor. Causes the contents of the Module (sans functions) // to be added to the slot table. SlotTracker::SlotTracker(const Module *M) - : TheModule(M), TheFunction(0), FunctionProcessed(false), mNext(0), fNext(0) { + : TheModule(M), TheFunction(0), FunctionProcessed(false), TheMDNode(0), + TheNamedMDNode(0), mNext(0), fNext(0), mdnNext(0) { } // Function level constructor. Causes the contents of the Module and the one // function provided to be added to the slot table. SlotTracker::SlotTracker(const Function *F) : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false), - mNext(0), fNext(0) { + TheMDNode(0), TheNamedMDNode(0), mNext(0), fNext(0), mdnNext(0) { +} + +// Constructor to handle single MDNode. +SlotTracker::SlotTracker(const MDNode *C) + : TheModule(0), TheFunction(0), FunctionProcessed(false), TheMDNode(C), + TheNamedMDNode(0), mNext(0), fNext(0), mdnNext(0) { +} + +// Constructor to handle single NamedMDNode. +SlotTracker::SlotTracker(const NamedMDNode *N) + : TheModule(0), TheFunction(0), FunctionProcessed(false), TheMDNode(0), + TheNamedMDNode(N), mNext(0), fNext(0), mdnNext(0) { } inline void SlotTracker::initialize() { @@ -577,60 +618,120 @@ inline void SlotTracker::initialize() { processModule(); TheModule = 0; ///< Prevent re-processing next time we're called. } - + if (TheFunction && !FunctionProcessed) processFunction(); + + if (TheMDNode) + processMDNode(); + + if (TheNamedMDNode) + processNamedMDNode(); } // Iterate through all the global variables, functions, and global // variable initializers and create slots for them. void SlotTracker::processModule() { ST_DEBUG("begin processModule!\n"); - + // Add all of the unnamed global variables to the value table. for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) - if (!I->hasName()) + E = TheModule->global_end(); I != E; ++I) { + if (!I->hasName()) CreateModuleSlot(I); - + if (I->hasInitializer()) { + if (MDNode *N = dyn_cast(I->getInitializer())) + CreateMetadataSlot(N); + } + } + + // Add metadata used by named metadata. + for (Module::const_named_metadata_iterator + I = TheModule->named_metadata_begin(), + E = TheModule->named_metadata_end(); I != E; ++I) { + const NamedMDNode *NMD = I; + for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) { + MDNode *MD = dyn_cast_or_null(NMD->getElement(i)); + if (MD) + CreateMetadataSlot(MD); + } + } + // Add all the unnamed functions to the table. for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); I != E; ++I) if (!I->hasName()) CreateModuleSlot(I); - + ST_DEBUG("end processModule!\n"); } - // Process the arguments, basic blocks, and instructions of a function. void SlotTracker::processFunction() { ST_DEBUG("begin processFunction!\n"); fNext = 0; - + // Add all the function arguments with no names. for(Function::const_arg_iterator AI = TheFunction->arg_begin(), AE = TheFunction->arg_end(); AI != AE; ++AI) if (!AI->hasName()) CreateFunctionSlot(AI); - + ST_DEBUG("Inserting Instructions:\n"); - + + MetadataContext &TheMetadata = TheFunction->getContext().getMetadata(); + // Add all of the basic blocks and instructions with no names. for (Function::const_iterator BB = TheFunction->begin(), E = TheFunction->end(); BB != E; ++BB) { if (!BB->hasName()) CreateFunctionSlot(BB); - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (I->getType() != Type::VoidTy && !I->hasName()) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + if (I->getType() != Type::getVoidTy(TheFunction->getContext()) && + !I->hasName()) CreateFunctionSlot(I); + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (MDNode *N = dyn_cast_or_null(I->getOperand(i))) + CreateMetadataSlot(N); + + // Process metadata attached with this instruction. + const MetadataContext::MDMapTy *MDs = TheMetadata.getMDs(I); + if (MDs) + for (MetadataContext::MDMapTy::const_iterator MI = MDs->begin(), + ME = MDs->end(); MI != ME; ++MI) + if (MDNode *MDN = dyn_cast_or_null(MI->second)) + CreateMetadataSlot(MDN); + } } - + FunctionProcessed = true; - + ST_DEBUG("end processFunction!\n"); } +/// processMDNode - Process TheMDNode. +void SlotTracker::processMDNode() { + ST_DEBUG("begin processMDNode!\n"); + mdnNext = 0; + CreateMetadataSlot(TheMDNode); + TheMDNode = 0; + ST_DEBUG("end processMDNode!\n"); +} + +/// processNamedMDNode - Process TheNamedMDNode. +void SlotTracker::processNamedMDNode() { + ST_DEBUG("begin processNamedMDNode!\n"); + mdnNext = 0; + for (unsigned i = 0, e = TheNamedMDNode->getNumElements(); i != e; ++i) { + MDNode *MD = dyn_cast_or_null(TheNamedMDNode->getElement(i)); + if (MD) + CreateMetadataSlot(MD); + } + TheNamedMDNode = 0; + ST_DEBUG("end processNamedMDNode!\n"); +} + /// Clean up after incorporating a function. This is the only way to get out of /// the function incorporation state that affects get*Slot/Create*Slot. Function /// incorporation state is indicated by TheFunction != 0. @@ -646,20 +747,30 @@ void SlotTracker::purgeFunction() { int SlotTracker::getGlobalSlot(const GlobalValue *V) { // Check for uninitialized state and do lazy initialization. initialize(); - + // Find the type plane in the module map ValueMap::iterator MI = mMap.find(V); return MI == mMap.end() ? -1 : (int)MI->second; } +/// getGlobalSlot - Get the slot number of a MDNode. +int SlotTracker::getMetadataSlot(const MDNode *N) { + // Check for uninitialized state and do lazy initialization. + initialize(); + + // Find the type plane in the module map + ValueMap::iterator MI = mdnMap.find(N); + return MI == mdnMap.end() ? -1 : (int)MI->second; +} + /// getLocalSlot - Get the slot number for a value that is local to a function. int SlotTracker::getLocalSlot(const Value *V) { assert(!isa(V) && "Can't get a constant or global slot with this!"); - + // Check for uninitialized state and do lazy initialization. initialize(); - + ValueMap::iterator FI = fMap.find(V); return FI == fMap.end() ? -1 : (int)FI->second; } @@ -668,12 +779,13 @@ int SlotTracker::getLocalSlot(const Value *V) { /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table. void SlotTracker::CreateModuleSlot(const GlobalValue *V) { assert(V && "Can't insert a null Value into SlotTracker!"); - assert(V->getType() != Type::VoidTy && "Doesn't need a slot!"); + assert(V->getType() != Type::getVoidTy(V->getContext()) && + "Doesn't need a slot!"); assert(!V->hasName() && "Doesn't need a slot!"); - + unsigned DestSlot = mNext++; mMap[V] = DestSlot; - + ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" << DestSlot << " ["); // G = Global, F = Function, A = Alias, o = other @@ -682,28 +794,45 @@ void SlotTracker::CreateModuleSlot(const GlobalValue *V) { (isa(V) ? 'A' : 'o'))) << "]\n"); } - /// CreateSlot - Create a new slot for the specified value if it has no name. void SlotTracker::CreateFunctionSlot(const Value *V) { - assert(V->getType() != Type::VoidTy && !V->hasName() && - "Doesn't need a slot!"); - + assert(V->getType() != Type::getVoidTy(TheFunction->getContext()) && + !V->hasName() && "Doesn't need a slot!"); + unsigned DestSlot = fNext++; fMap[V] = DestSlot; - + // G = Global, F = Function, o = other ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" << DestSlot << " [o]\n"); -} +} +/// CreateModuleSlot - Insert the specified MDNode* into the slot table. +void SlotTracker::CreateMetadataSlot(const MDNode *N) { + assert(N && "Can't insert a null Value into SlotTracker!"); + + ValueMap::iterator I = mdnMap.find(N); + if (I != mdnMap.end()) + return; + unsigned DestSlot = mdnNext++; + mdnMap[N] = DestSlot; + + for (MDNode::const_elem_iterator MDI = N->elem_begin(), + MDE = N->elem_end(); MDI != MDE; ++MDI) { + const Value *TV = *MDI; + if (TV) + if (const MDNode *N2 = dyn_cast(TV)) + CreateMetadataSlot(N2); + } +} //===----------------------------------------------------------------------===// // AsmWriter Implementation //===----------------------------------------------------------------------===// static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, - TypePrinting &TypePrinter, + TypePrinting *TypePrinter, SlotTracker *Machine); @@ -741,17 +870,93 @@ static const char *getPredicateText(unsigned predicate) { return pred; } +static void WriteMDNodeComment(const MDNode *Node, + formatted_raw_ostream &Out) { + if (Node->getNumElements() < 1) + return; + ConstantInt *CI = dyn_cast_or_null(Node->getElement(0)); + if (!CI) return; + unsigned Val = CI->getZExtValue(); + unsigned Tag = Val & ~LLVMDebugVersionMask; + if (Val >= LLVMDebugVersion) { + if (Tag == dwarf::DW_TAG_auto_variable) + Out << "; [ DW_TAG_auto_variable ]"; + else if (Tag == dwarf::DW_TAG_arg_variable) + Out << "; [ DW_TAG_arg_variable ]"; + else if (Tag == dwarf::DW_TAG_return_variable) + Out << "; [ DW_TAG_return_variable ]"; + else if (Tag == dwarf::DW_TAG_vector_type) + Out << "; [ DW_TAG_vector_type ]"; + else if (Tag == dwarf::DW_TAG_user_base) + Out << "; [ DW_TAG_user_base ]"; + else + Out << "; [" << dwarf::TagString(Tag) << " ]"; + } +} + +static void WriteMDNodes(formatted_raw_ostream &Out, TypePrinting &TypePrinter, + SlotTracker &Machine) { + SmallVector Nodes; + Nodes.resize(Machine.mdnSize()); + for (SlotTracker::ValueMap::iterator I = + Machine.mdnBegin(), E = Machine.mdnEnd(); I != E; ++I) + Nodes[I->second] = cast(I->first); + + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) { + Out << '!' << i << " = metadata "; + const MDNode *Node = Nodes[i]; + Out << "!{"; + for (MDNode::const_elem_iterator NI = Node->elem_begin(), + NE = Node->elem_end(); NI != NE;) { + const Value *V = *NI; + if (!V) + Out << "null"; + else if (const MDNode *N = dyn_cast(V)) { + Out << "metadata "; + Out << '!' << Machine.getMetadataSlot(N); + } + else { + TypePrinter.print((*NI)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, *NI, &TypePrinter, &Machine); + } + if (++NI != NE) + Out << ", "; + } + + Out << "}"; + WriteMDNodeComment(Node, Out); + Out << "\n"; + } +} + +static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { + if (const OverflowingBinaryOperator *OBO = + dyn_cast(U)) { + if (OBO->hasNoUnsignedWrap()) + Out << " nuw"; + if (OBO->hasNoSignedWrap()) + Out << " nsw"; + } else if (const SDivOperator *Div = dyn_cast(U)) { + if (Div->isExact()) + Out << " exact"; + } else if (const GEPOperator *GEP = dyn_cast(U)) { + if (GEP->isInBounds()) + Out << " inbounds"; + } +} + static void WriteConstantInt(raw_ostream &Out, const Constant *CV, TypePrinting &TypePrinter, SlotTracker *Machine) { if (const ConstantInt *CI = dyn_cast(CV)) { - if (CI->getType() == Type::Int1Ty) { + if (CI->getType() == Type::getInt1Ty(CV->getContext())) { Out << (CI->getZExtValue() ? "true" : "false"); return; } Out << CI->getValue(); return; } - + if (const ConstantFP *CFP = dyn_cast(CV)) { if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble || &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) { @@ -789,14 +994,14 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, APFloat apf = CFP->getValueAPF(); // Floats are represented in ASCII IR as double, convert. if (!isDouble) - apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "0x" << - utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()), + Out << "0x" << + utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()), Buffer+40); return; } - + // Some form of long double. These appear as a magic letter identifying // the type, then a fixed number of hex digits. Out << "0x"; @@ -827,7 +1032,7 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) Out << 'M'; else - assert(0 && "Unsupported floating point type"); + llvm_unreachable("Unsupported floating point type"); // api needed to prevent premature destruction APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t* p = api.getRawData(); @@ -849,12 +1054,12 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, } return; } - + if (isa(CV)) { Out << "zeroinitializer"; return; } - + if (const ConstantArray *CA = dyn_cast(CV)) { // As a special case, print the array as a string if it is an array of // i8 with ConstantInt values. @@ -870,19 +1075,19 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, TypePrinter.print(ETy, Out); Out << ' '; WriteAsOperandInternal(Out, CA->getOperand(0), - TypePrinter, Machine); + &TypePrinter, Machine); for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { Out << ", "; TypePrinter.print(ETy, Out); Out << ' '; - WriteAsOperandInternal(Out, CA->getOperand(i), TypePrinter, Machine); + WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine); } } Out << ']'; } return; } - + if (const ConstantStruct *CS = dyn_cast(CV)) { if (CS->getType()->isPacked()) Out << '<'; @@ -893,24 +1098,24 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, TypePrinter.print(CS->getOperand(0)->getType(), Out); Out << ' '; - WriteAsOperandInternal(Out, CS->getOperand(0), TypePrinter, Machine); + WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine); for (unsigned i = 1; i < N; i++) { Out << ", "; TypePrinter.print(CS->getOperand(i)->getType(), Out); Out << ' '; - WriteAsOperandInternal(Out, CS->getOperand(i), TypePrinter, Machine); + WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine); } Out << ' '; } - + Out << '}'; if (CS->getType()->isPacked()) Out << '>'; return; } - + if (const ConstantVector *CP = dyn_cast(CV)) { const Type *ETy = CP->getType()->getElementType(); assert(CP->getNumOperands() > 0 && @@ -918,36 +1123,35 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, Out << '<'; TypePrinter.print(ETy, Out); Out << ' '; - WriteAsOperandInternal(Out, CP->getOperand(0), TypePrinter, Machine); + WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine); for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { Out << ", "; TypePrinter.print(ETy, Out); Out << ' '; - WriteAsOperandInternal(Out, CP->getOperand(i), TypePrinter, Machine); + WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine); } Out << '>'; return; } - + if (isa(CV)) { Out << "null"; return; } - + if (isa(CV)) { Out << "undef"; return; } - - if (const MDString *S = dyn_cast(CV)) { - Out << "!\""; - PrintEscapedString(S->begin(), S->size(), Out); - Out << '"'; + + if (const MDNode *Node = dyn_cast(CV)) { + Out << "!" << Machine->getMetadataSlot(Node); return; } if (const ConstantExpr *CE = dyn_cast(CV)) { Out << CE->getOpcodeName(); + WriteOptimizationInfo(Out, CE); if (CE->isCompare()) Out << ' ' << getPredicateText(CE->getPredicate()); Out << " ("; @@ -955,7 +1159,7 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { TypePrinter.print((*OI)->getType(), Out); Out << ' '; - WriteAsOperandInternal(Out, *OI, TypePrinter, Machine); + WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine); if (OI+1 != CE->op_end()) Out << ", "; } @@ -974,7 +1178,7 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, Out << ')'; return; } - + Out << ""; } @@ -984,23 +1188,26 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, /// the whole instruction that generated it. /// static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, - TypePrinting &TypePrinter, + TypePrinting *TypePrinter, SlotTracker *Machine) { if (V->hasName()) { PrintLLVMName(Out, V); return; } - + const Constant *CV = dyn_cast(V); if (CV && !isa(CV)) { - WriteConstantInt(Out, CV, TypePrinter, Machine); + assert(TypePrinter && "Constants require TypePrinting!"); + WriteConstantInt(Out, CV, *TypePrinter, Machine); return; } - + if (const InlineAsm *IA = dyn_cast(V)) { Out << "asm "; if (IA->hasSideEffects()) Out << "sideeffect "; + if (IA->isMsAsm()) + Out << "msasm "; Out << '"'; PrintEscapedString(IA->getAsmString(), Out); Out << "\", \""; @@ -1008,7 +1215,24 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, Out << '"'; return; } - + + if (const MDNode *N = dyn_cast(V)) { + Out << '!' << Machine->getMetadataSlot(N); + return; + } + + if (const MDString *MDS = dyn_cast(V)) { + Out << "!\""; + PrintEscapedString(MDS->getString(), Out); + Out << '"'; + return; + } + + if (V->getValueID() == Value::PseudoSourceValueVal) { + V->print(Out); + return; + } + char Prefix = '%'; int Slot; if (Machine) { @@ -1027,30 +1251,29 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, } else { Slot = Machine->getLocalSlot(V); } + delete Machine; } else { Slot = -1; } - delete Machine; } - + if (Slot != -1) Out << Prefix << Slot; else Out << ""; } -/// WriteAsOperand - Write the name of the specified value out to the specified -/// ostream. This can be useful when you just want to print int %reg126, not -/// the whole instruction that generated it. -/// -void llvm::WriteAsOperand(std::ostream &Out, const Value *V, bool PrintType, - const Module *Context) { - raw_os_ostream OS(Out); - WriteAsOperand(OS, V, PrintType, Context); -} +void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, + bool PrintType, const Module *Context) { + + // Fast path: Don't construct and populate a TypePrinting object if we + // won't be needing any types printed. + if (!PrintType && + (!isa(V) || V->hasName() || isa(V))) { + WriteAsOperandInternal(Out, V, 0, 0); + return; + } -void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, bool PrintType, - const Module *Context) { if (Context == 0) Context = getModuleFromVal(V); TypePrinting TypePrinter; @@ -1061,32 +1284,40 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, bool PrintType, Out << ' '; } - WriteAsOperandInternal(Out, V, TypePrinter, 0); + WriteAsOperandInternal(Out, V, &TypePrinter, 0); } - namespace { class AssemblyWriter { - raw_ostream &Out; + formatted_raw_ostream &Out; SlotTracker &Machine; const Module *TheModule; TypePrinting TypePrinter; AssemblyAnnotationWriter *AnnotationWriter; std::vector NumberedTypes; + DenseMap MDNames; - // Each MDNode is assigned unique MetadataIDNo. - std::map MDNodes; - unsigned MetadataIDNo; public: - inline AssemblyWriter(raw_ostream &o, SlotTracker &Mac, const Module *M, + inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, + const Module *M, AssemblyAnnotationWriter *AAW) - : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW), MetadataIDNo(0) { + : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M); + // FIXME: Provide MDPrinter + if (M) { + MetadataContext &TheMetadata = M->getContext().getMetadata(); + const StringMap *Names = TheMetadata.getHandlerNames(); + for (StringMapConstIterator I = Names->begin(), + E = Names->end(); I != E; ++I) { + const StringMapEntry &Entry = *I; + MDNames[I->second] = Entry.getKeyData(); + } + } } void write(const Module *M) { printModule(M); } - + void write(const GlobalValue *G) { if (const GlobalVariable *GV = dyn_cast(G)) printGlobal(GV); @@ -1095,17 +1326,14 @@ public: else if (const Function *F = dyn_cast(G)) printFunction(F); else - assert(0 && "Unknown global"); + llvm_unreachable("Unknown global"); } - + void write(const BasicBlock *BB) { printBasicBlock(BB); } void write(const Instruction *I) { printInstruction(*I); } void writeOperand(const Value *Op, bool PrintType); void writeParamOperand(const Value *Operand, Attributes Attrs); - void printMDNode(const MDNode *Node, bool StandAlone); - - const Module* getModule() { return TheModule; } private: void printModule(const Module *M); @@ -1132,11 +1360,11 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { TypePrinter.print(Operand->getType(), Out); Out << ' '; } - WriteAsOperandInternal(Out, Operand, TypePrinter, &Machine); + WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine); } } -void AssemblyWriter::writeParamOperand(const Value *Operand, +void AssemblyWriter::writeParamOperand(const Value *Operand, Attributes Attrs) { if (Operand == 0) { Out << ""; @@ -1148,7 +1376,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand, Out << ' ' << Attribute::getAsString(Attrs); Out << ' '; // Print the operand - WriteAsOperandInternal(Out, Operand, TypePrinter, &Machine); + WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine); } } @@ -1169,6 +1397,7 @@ void AssemblyWriter::printModule(const Module *M) { std::string Asm = M->getModuleInlineAsm(); size_t CurPos = 0; size_t NewLine = Asm.find_first_of('\n', CurPos); + Out << '\n'; while (NewLine != std::string::npos) { // We found a newline, print the portion of the asm string from the // last newline up to this newline. @@ -1183,11 +1412,12 @@ void AssemblyWriter::printModule(const Module *M) { PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out); Out << "\"\n"; } - + // Loop over the dependent libraries and emit them. Module::lib_iterator LI = M->lib_begin(); Module::lib_iterator LE = M->lib_end(); if (LI != LE) { + Out << '\n'; Out << "deplibs = [ "; while (LI != LE) { Out << '"' << *LI << '"'; @@ -1195,16 +1425,19 @@ void AssemblyWriter::printModule(const Module *M) { if (LI != LE) Out << ", "; } - Out << " ]\n"; + Out << " ]"; } // Loop over the symbol table, emitting all id'd types. + if (!M->getTypeSymbolTable().empty() || !NumberedTypes.empty()) Out << '\n'; printTypeSymbolTable(M->getTypeSymbolTable()); + // Output all globals. + if (!M->global_empty()) Out << '\n'; for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) printGlobal(I); - + // Output all aliases. if (!M->alias_empty()) Out << "\n"; for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); @@ -1214,36 +1447,55 @@ void AssemblyWriter::printModule(const Module *M) { // Output all of the functions. for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) printFunction(I); + + // Output named metadata. + if (!M->named_metadata_empty()) Out << '\n'; + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) { + const NamedMDNode *NMD = I; + Out << "!" << NMD->getName() << " = !{"; + for (unsigned i = 0, e = NMD->getNumElements(); i != e; ++i) { + if (i) Out << ", "; + MDNode *MD = dyn_cast_or_null(NMD->getElement(i)); + Out << '!' << Machine.getMetadataSlot(MD); + } + Out << "}\n"; + } + + // Output metadata. + if (!Machine.mdnEmpty()) Out << '\n'; + WriteMDNodes(Out, TypePrinter, Machine); } -static void PrintLinkage(GlobalValue::LinkageTypes LT, raw_ostream &Out) { +static void PrintLinkage(GlobalValue::LinkageTypes LT, + formatted_raw_ostream &Out) { switch (LT) { - case GlobalValue::PrivateLinkage: Out << "private "; break; - case GlobalValue::InternalLinkage: Out << "internal "; break; + case GlobalValue::ExternalLinkage: break; + case GlobalValue::PrivateLinkage: Out << "private "; break; + case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break; + case GlobalValue::InternalLinkage: Out << "internal "; break; + case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; + case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; + case GlobalValue::WeakAnyLinkage: Out << "weak "; break; + case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break; + case GlobalValue::CommonLinkage: Out << "common "; break; + case GlobalValue::AppendingLinkage: Out << "appending "; break; + case GlobalValue::DLLImportLinkage: Out << "dllimport "; break; + case GlobalValue::DLLExportLinkage: Out << "dllexport "; break; + case GlobalValue::ExternalWeakLinkage: Out << "extern_weak "; break; case GlobalValue::AvailableExternallyLinkage: Out << "available_externally "; break; - case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; - case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; - case GlobalValue::WeakAnyLinkage: Out << "weak "; break; - case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break; - case GlobalValue::CommonLinkage: Out << "common "; break; - case GlobalValue::AppendingLinkage: Out << "appending "; break; - case GlobalValue::DLLImportLinkage: Out << "dllimport "; break; - case GlobalValue::DLLExportLinkage: Out << "dllexport "; break; - case GlobalValue::ExternalWeakLinkage: Out << "extern_weak "; break; - case GlobalValue::ExternalLinkage: break; case GlobalValue::GhostLinkage: - Out << "GhostLinkage not allowed in AsmWriter!\n"; - abort(); + llvm_unreachable("GhostLinkage not allowed in AsmWriter!"); } } static void PrintVisibility(GlobalValue::VisibilityTypes Vis, - raw_ostream &Out) { + formatted_raw_ostream &Out) { switch (Vis) { - default: assert(0 && "Invalid visibility style!"); + default: llvm_unreachable("Invalid visibility style!"); case GlobalValue::DefaultVisibility: break; case GlobalValue::HiddenVisibility: Out << "hidden "; break; case GlobalValue::ProtectedVisibility: Out << "protected "; break; @@ -1251,36 +1503,12 @@ static void PrintVisibility(GlobalValue::VisibilityTypes Vis, } void AssemblyWriter::printGlobal(const GlobalVariable *GV) { - if (GV->hasInitializer()) - // If GV is initialized using Metadata then separate out metadata - // operands used by the initializer. Note, MDNodes are not cyclic. - if (MDNode *N = dyn_cast(GV->getInitializer())) { - SmallVector WorkList; - // Collect MDNodes used by the initializer. - for (MDNode::const_elem_iterator I = N->elem_begin(), E = N->elem_end(); - I != E; ++I) { - const Value *TV = *I; - if (TV) - if (const MDNode *NN = dyn_cast(TV)) - WorkList.push_back(NN); - } - - // Print MDNodes used by the initializer. - while (!WorkList.empty()) { - const MDNode *N = WorkList.back(); WorkList.pop_back(); - printMDNode(N, true); - Out << '\n'; - } - } - - if (GV->hasName()) { - PrintLLVMName(Out, GV); - Out << " = "; - } + WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine); + Out << " = "; if (!GV->hasInitializer() && GV->hasExternalLinkage()) Out << "external "; - + PrintLinkage(GV->getLinkage(), Out); PrintVisibility(GV->getVisibility(), Out); @@ -1292,12 +1520,9 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { if (GV->hasInitializer()) { Out << ' '; - if (MDNode *N = dyn_cast(GV->getInitializer())) - printMDNode(N, false); - else - writeOperand(GV->getInitializer(), false); + writeOperand(GV->getInitializer(), false); } - + if (GV->hasSection()) Out << ", section \"" << GV->getSection() << '"'; if (GV->getAlignment()) @@ -1307,47 +1532,6 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { Out << '\n'; } -void AssemblyWriter::printMDNode(const MDNode *Node, - bool StandAlone) { - std::map::iterator MI = MDNodes.find(Node); - // If this node is already printed then just refer it using its Metadata - // id number. - if (MI != MDNodes.end()) { - if (!StandAlone) - Out << "!" << MI->second; - return; - } - - if (StandAlone) { - // Print standalone MDNode. - // !42 = !{ ... } - Out << "!" << MetadataIDNo << " = "; - Out << "constant metadata "; - } - - Out << "!{"; - for (MDNode::const_elem_iterator I = Node->elem_begin(), E = Node->elem_end(); - I != E;) { - const Value *TV = *I; - if (!TV) - Out << "null"; - else if (const MDNode *N = dyn_cast(TV)) { - TypePrinter.print(N->getType(), Out); - Out << ' '; - printMDNode(N, StandAlone); - } - else if (!*I) - Out << "null"; - else - writeOperand(*I, true); - if (++I != E) - Out << ", "; - } - Out << "}"; - - MDNodes[Node] = MetadataIDNo++; -} - void AssemblyWriter::printAlias(const GlobalAlias *GA) { // Don't crash when dumping partially built GA if (!GA->hasName()) @@ -1361,9 +1545,9 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { Out << "alias "; PrintLinkage(GA->getLinkage(), Out); - + const Constant *Aliasee = GA->getAliasee(); - + if (const GlobalVariable *GV = dyn_cast(Aliasee)) { TypePrinter.print(GV->getType(), Out); Out << ' '; @@ -1372,7 +1556,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { TypePrinter.print(F->getFunctionType(), Out); Out << "* "; - WriteAsOperandInternal(Out, F, TypePrinter, &Machine); + WriteAsOperandInternal(Out, F, &TypePrinter, &Machine); } else if (const GlobalAlias *GA = dyn_cast(Aliasee)) { TypePrinter.print(GA->getType(), Out); Out << ' '; @@ -1385,7 +1569,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { "Unsupported aliasee"); writeOperand(CE, false); } - + printInfoComment(*GA); Out << '\n'; } @@ -1393,19 +1577,18 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { void AssemblyWriter::printTypeSymbolTable(const TypeSymbolTable &ST) { // Emit all numbered types. for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) { - Out << "\ttype "; - + Out << '%' << i << " = type "; + // Make sure we print out at least one level of the type structure, so // that we do not get %2 = type %2 TypePrinter.printAtLeastOneLevel(NumberedTypes[i], Out); - Out << "\t\t; type %" << i << '\n'; + Out << '\n'; } - + // Print the named types. for (TypeSymbolTable::const_iterator TI = ST.begin(), TE = ST.end(); TI != TE; ++TI) { - Out << '\t'; - PrintLLVMName(Out, &TI->first[0], TI->first.size(), LocalPrefix); + PrintLLVMName(Out, TI->first, LocalPrefix); Out << " = type "; // Make sure we print out at least one level of the type structure, so @@ -1427,7 +1610,7 @@ void AssemblyWriter::printFunction(const Function *F) { Out << "declare "; else Out << "define "; - + PrintLinkage(F->getLinkage(), Out); PrintVisibility(F->getVisibility(), Out); @@ -1451,7 +1634,7 @@ void AssemblyWriter::printFunction(const Function *F) { Out << Attribute::getAsString(Attrs.getRetAttributes()) << ' '; TypePrinter.print(F->getReturnType(), Out); Out << ' '; - WriteAsOperandInternal(Out, F, TypePrinter, &Machine); + WriteAsOperandInternal(Out, F, &TypePrinter, &Machine); Out << '('; Machine.incorporateFunction(F); @@ -1472,10 +1655,10 @@ void AssemblyWriter::printFunction(const Function *F) { for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { // Insert commas as we go... the first arg doesn't get a comma if (i) Out << ", "; - + // Output type... TypePrinter.print(FT->getParamType(i), Out); - + Attributes ArgAttrs = Attrs.getParamAttributes(i+1); if (ArgAttrs != Attribute::None) Out << ' ' << Attribute::getAsString(ArgAttrs); @@ -1515,7 +1698,7 @@ void AssemblyWriter::printFunction(const Function *F) { /// printArgument - This member is called for every argument that is passed into /// the function. Simply print it out /// -void AssemblyWriter::printArgument(const Argument *Arg, +void AssemblyWriter::printArgument(const Argument *Arg, Attributes Attrs) { // Output type... TypePrinter.print(Arg->getType(), Out); @@ -1536,7 +1719,7 @@ void AssemblyWriter::printArgument(const Argument *Arg, void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (BB->hasName()) { // Print out the label if it exists... Out << "\n"; - PrintLLVMName(Out, BB->getNameStart(), BB->getNameLen(), LabelPrefix); + PrintLLVMName(Out, BB->getName(), LabelPrefix); Out << ':'; } else if (!BB->use_empty()) { // Don't print block # of no uses... Out << "\n;

    } define void @getAndMoveToFrontDecode() { diff --git a/test/Analysis/PointerTracking/dg.exp b/test/Analysis/PointerTracking/dg.exp new file mode 100644 index 0000000000000..f2005891a59a8 --- /dev/null +++ b/test/Analysis/PointerTracking/dg.exp @@ -0,0 +1,3 @@ +load_lib llvm.exp + +RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] diff --git a/test/Analysis/PointerTracking/sizes.ll b/test/Analysis/PointerTracking/sizes.ll new file mode 100644 index 0000000000000..c0b0606af0b52 --- /dev/null +++ b/test/Analysis/PointerTracking/sizes.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -pointertracking -analyze | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" +@.str = internal constant [5 x i8] c"1234\00" ; <[5 x i8]*> [#uses=1] +@test1p = global i8* getelementptr ([5 x i8]* @.str, i32 0, i32 0), align 8 ; [#uses=1] +@test1a = global [5 x i8] c"1234\00", align 1 ; <[5 x i8]*> [#uses=1] +@test2a = global [5 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5], align 4 ; <[5 x i32]*> [#uses=2] +@test2p = global i32* getelementptr ([5 x i32]* @test2a, i32 0, i32 0), align 8 ; [#uses=1] +@test0p = common global i32* null, align 8 ; [#uses=1] +@test0i = common global i32 0, align 4 ; [#uses=1] + +define i32 @foo0() nounwind { +entry: + %tmp = load i32** @test0p ; [#uses=1] + %conv = bitcast i32* %tmp to i8* ; [#uses=1] + %call = tail call i32 @bar(i8* %conv) nounwind ; [#uses=1] + %tmp1 = load i8** @test1p ; [#uses=1] + %call2 = tail call i32 @bar(i8* %tmp1) nounwind ; [#uses=1] + %call3 = tail call i32 @bar(i8* getelementptr ([5 x i8]* @test1a, i32 0, i32 0)) nounwind ; [#uses=1] + %call5 = tail call i32 @bar(i8* bitcast ([5 x i32]* @test2a to i8*)) nounwind ; [#uses=1] + %tmp7 = load i32** @test2p ; [#uses=1] + %conv8 = bitcast i32* %tmp7 to i8* ; [#uses=1] + %call9 = tail call i32 @bar(i8* %conv8) nounwind ; [#uses=1] + %call11 = tail call i32 @bar(i8* bitcast (i32* @test0i to i8*)) nounwind ; [#uses=1] + %add = add i32 %call2, %call ; [#uses=1] + %add4 = add i32 %add, %call3 ; [#uses=1] + %add6 = add i32 %add4, %call5 ; [#uses=1] + %add10 = add i32 %add6, %call9 ; [#uses=1] + %add12 = add i32 %add10, %call11 ; [#uses=1] + ret i32 %add12 +} + +declare i32 @bar(i8*) + +define i32 @foo1(i32 %n) nounwind { +entry: +; CHECK: 'foo1': + %test4a = alloca [10 x i8], align 1 ; <[10 x i8]*> [#uses=1] +; CHECK: %test4a = +; CHECK: ==> 1 elements, 10 bytes allocated + %test6a = alloca [10 x i32], align 4 ; <[10 x i32]*> [#uses=1] +; CHECK: %test6a = +; CHECK: ==> 1 elements, 40 bytes allocated + %vla = alloca i8, i32 %n, align 1 ; [#uses=1] +; CHECK: %vla = +; CHECK: ==> %n elements, %n bytes allocated + %0 = shl i32 %n, 2 ; [#uses=1] + %vla7 = alloca i8, i32 %0, align 1 ; [#uses=1] +; CHECK: %vla7 = +; CHECK: ==> (4 * %n) elements, (4 * %n) bytes allocated + %call = call i32 @bar(i8* %vla) nounwind ; [#uses=1] + %arraydecay = getelementptr [10 x i8]* %test4a, i64 0, i64 0 ; [#uses=1] + %call10 = call i32 @bar(i8* %arraydecay) nounwind ; [#uses=1] + %call11 = call i32 @bar(i8* %vla7) nounwind ; [#uses=1] + %ptrconv14 = bitcast [10 x i32]* %test6a to i8* ; [#uses=1] + %call15 = call i32 @bar(i8* %ptrconv14) nounwind ; [#uses=1] + %add = add i32 %call10, %call ; [#uses=1] + %add12 = add i32 %add, %call11 ; [#uses=1] + %add16 = add i32 %add12, %call15 ; [#uses=1] + ret i32 %add16 +} + +define i32 @foo2(i32 %n) nounwind { +entry: + %call = malloc i8, i32 %n ; [#uses=1] +; CHECK: %call = +; CHECK: ==> %n elements, %n bytes allocated + %call2 = tail call i8* @calloc(i64 2, i64 4) nounwind ; [#uses=1] +; CHECK: %call2 = +; CHECK: ==> 8 elements, 8 bytes allocated + %call4 = tail call i8* @realloc(i8* null, i64 16) nounwind ; [#uses=1] +; CHECK: %call4 = +; CHECK: ==> 16 elements, 16 bytes allocated + %call6 = tail call i32 @bar(i8* %call) nounwind ; [#uses=1] + %call8 = tail call i32 @bar(i8* %call2) nounwind ; [#uses=1] + %call10 = tail call i32 @bar(i8* %call4) nounwind ; [#uses=1] + %add = add i32 %call8, %call6 ; [#uses=1] + %add11 = add i32 %add, %call10 ; [#uses=1] + ret i32 %add11 +} + +declare noalias i8* @calloc(i64, i64) nounwind + +declare noalias i8* @realloc(i8* nocapture, i64) nounwind diff --git a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll index b272f92499e5c..b73b7f03f7e7a 100644 --- a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll +++ b/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -postdomfrontier \ +; RUN: opt < %s -analyze -postdomfrontier \ ; RUN: -disable-verify ; ModuleID = '2006-09-26-PostDominanceFrontier.bc' target datalayout = "e-p:64:64" diff --git a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll index 51e4c2aeb5534..1ec056bc34e0e 100644 --- a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll +++ b/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -postdomfrontier -disable-output +; RUN: opt < %s -postdomfrontier -disable-output define void @SManager() { entry: diff --git a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll b/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll index 4deec98a9ee8b..767e5db94ce86 100644 --- a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll +++ b/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -postdomfrontier -disable-output +; RUN: opt < %s -postdomfrontier -disable-output define void @args_out_of_range() { entry: diff --git a/test/Analysis/PostDominators/pr1098.ll b/test/Analysis/PostDominators/pr1098.ll index b54a9fe1c75b9..afb47769ee498 100644 --- a/test/Analysis/PostDominators/pr1098.ll +++ b/test/Analysis/PostDominators/pr1098.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -postdomtree -analyze | grep entry +; RUN: opt < %s -postdomtree -analyze | grep entry ; PR932 define void @foo(i1 %x) { diff --git a/test/Analysis/Profiling/dg.exp b/test/Analysis/Profiling/dg.exp new file mode 100644 index 0000000000000..1eb4755c41028 --- /dev/null +++ b/test/Analysis/Profiling/dg.exp @@ -0,0 +1,4 @@ +load_lib llvm.exp + +RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]] + diff --git a/test/Analysis/Profiling/edge-profiling.ll b/test/Analysis/Profiling/edge-profiling.ll new file mode 100644 index 0000000000000..cbaf47617fb6c --- /dev/null +++ b/test/Analysis/Profiling/edge-profiling.ll @@ -0,0 +1,139 @@ +; Test the edge profiling instrumentation. +; RUN: opt < %s -insert-edge-profiling -S | FileCheck %s + +; ModuleID = '' + +@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1] +@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1] +@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1] +@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1] +@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1] +@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1] +; CHECK:@EdgeProfCounters +; CHECK:[19 x i32] +; CHECK:zeroinitializer + +define void @oneblock() nounwind { +entry: +; CHECK:entry: +; CHECK:%OldFuncCounter +; CHECK:load +; CHECK:getelementptr +; CHECK:@EdgeProfCounters +; CHECK:i32 0 +; CHECK:i32 0 +; CHECK:%NewFuncCounter +; CHECK:add +; CHECK:%OldFuncCounter +; CHECK:store +; CHECK:%NewFuncCounter +; CHECK:getelementptr +; CHECK:@EdgeProfCounters + %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; [#uses=0] + ret void +} + +declare i32 @puts(i8*) + +define i32 @main(i32 %argc, i8** %argv) nounwind { +entry: +; CHECK:entry: + %argc_addr = alloca i32 ; [#uses=4] + %argv_addr = alloca i8** ; [#uses=1] + %retval = alloca i32 ; [#uses=2] + %j = alloca i32 ; [#uses=4] + %i = alloca i32 ; [#uses=4] + %0 = alloca i32 ; [#uses=2] +; CHECK:call +; CHECK:@llvm_start_edge_profiling +; CHECK:@EdgeProfCounters + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store i32 %argc, i32* %argc_addr + store i8** %argv, i8*** %argv_addr + store i32 0, i32* %i, align 4 + br label %bb10 + +bb: ; preds = %bb10 +; CHECK:bb: + %1 = load i32* %argc_addr, align 4 ; [#uses=1] + %2 = icmp sgt i32 %1, 1 ; [#uses=1] + br i1 %2, label %bb1, label %bb8 + +bb1: ; preds = %bb +; CHECK:bb1: + store i32 0, i32* %j, align 4 + br label %bb6 + +bb2: ; preds = %bb6 +; CHECK:bb2: + %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; [#uses=0] + %4 = load i32* %argc_addr, align 4 ; [#uses=1] + %5 = icmp sgt i32 %4, 2 ; [#uses=1] + br i1 %5, label %bb3, label %bb4 + +bb3: ; preds = %bb2 +; CHECK:bb3: + %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; [#uses=0] + br label %bb5 + +bb4: ; preds = %bb2 +; CHECK:bb4: + %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; [#uses=0] + br label %bb11 + +bb5: ; preds = %bb3 +; CHECK:bb5: + %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; [#uses=0] + %9 = load i32* %j, align 4 ; [#uses=1] + %10 = add nsw i32 %9, 1 ; [#uses=1] + store i32 %10, i32* %j, align 4 + br label %bb6 + +bb6: ; preds = %bb5, %bb1 +; CHECK:bb6: + %11 = load i32* %j, align 4 ; [#uses=1] + %12 = load i32* %argc_addr, align 4 ; [#uses=1] + %13 = icmp slt i32 %11, %12 ; [#uses=1] + br i1 %13, label %bb2, label %bb7 + +bb7: ; preds = %bb6 +; CHECK:bb7: + br label %bb9 + +bb8: ; preds = %bb +; CHECK:bb8: + %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; [#uses=0] + br label %bb9 + +bb9: ; preds = %bb8, %bb7 +; CHECK:bb9: + %15 = load i32* %i, align 4 ; [#uses=1] + %16 = add nsw i32 %15, 1 ; [#uses=1] + store i32 %16, i32* %i, align 4 + br label %bb10 + +bb10: ; preds = %bb9, %entry +; CHECK:bb10: + %17 = load i32* %i, align 4 ; [#uses=1] + %18 = icmp ne i32 %17, 3 ; [#uses=1] + br i1 %18, label %bb, label %bb11 +; CHECK:br +; CHECK:label %bb10.bb11_crit_edge + +; CHECK:bb10.bb11_crit_edge: +; CHECK:br +; CHECK:label %bb11 + +bb11: ; preds = %bb10, %bb4 +; CHECK:bb11: + call void @oneblock() nounwind + store i32 0, i32* %0, align 4 + %19 = load i32* %0, align 4 ; [#uses=1] + store i32 %19, i32* %retval, align 4 + br label %return + +return: ; preds = %bb11 +; CHECK:return: + %retval12 = load i32* %retval ; [#uses=1] + ret i32 %retval12 +} diff --git a/test/Analysis/Profiling/profiling-tool-chain.ll b/test/Analysis/Profiling/profiling-tool-chain.ll new file mode 100644 index 0000000000000..5ac31b59bdcbb --- /dev/null +++ b/test/Analysis/Profiling/profiling-tool-chain.ll @@ -0,0 +1,212 @@ +; RUN: llvm-as %s -o %t1 + +; FIXME: The RUX parts of the test are disabled for now, they aren't working on +; llvm-gcc-x86_64-darwin10-selfhost. + +; Test the edge optimal profiling instrumentation. +; RUN: opt %t1 -insert-optimal-edge-profiling -o %t2 +; RUX: llvm-dis < %t2 | FileCheck --check-prefix=INST %s + +; Test the creation, reading and displaying of profile +; RUX: rm -f llvmprof.out +; RUX: lli -load %llvmlibsdir/profile_rt%shlibext %t2 +; RUX: lli -load %llvmlibsdir/profile_rt%shlibext %t2 1 2 +; RUX: llvm-prof -print-all-code %t1 | FileCheck --check-prefix=PROF %s + +; Test the loaded profile also with verifier. +; RUX opt %t1 -profile-loader -profile-verifier -o %t3 + +; Test profile estimator. +; RUN: opt %t1 -profile-estimator -profile-verifier -o %t3 + +; PROF: 1. 2/4 oneblock +; PROF: 2. 2/4 main +; PROF: 1. 15.7895% 12/76 main() - bb6 +; PROF: 2. 11.8421% 9/76 main() - bb2 +; PROF: 3. 11.8421% 9/76 main() - bb3 +; PROF: 4. 11.8421% 9/76 main() - bb5 +; PROF: 5. 10.5263% 8/76 main() - bb10 +; PROF: 6. 7.89474% 6/76 main() - bb +; PROF: 7. 7.89474% 6/76 main() - bb9 +; PROF: 8. 3.94737% 3/76 main() - bb1 +; PROF: 9. 3.94737% 3/76 main() - bb7 +; PROF: 10. 3.94737% 3/76 main() - bb8 +; PROF: 11. 2.63158% 2/76 oneblock() - entry +; PROF: 12. 2.63158% 2/76 main() - entry +; PROF: 13. 2.63158% 2/76 main() - bb11 +; PROF: 14. 2.63158% 2/76 main() - return + +; ModuleID = '' + +@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1] +@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1] +@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1] +@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1] +@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1] +@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1] +; INST:@OptEdgeProfCounters +; INST:[21 x i32] +; INST:[i32 0, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 0, +; INST:i32 0, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 0, +; INST:i32 0, +; INST:i32 -1, +; INST:i32 -1, +; INST:i32 0, +; INST:i32 -1, +; INST:i32 -1] + +; PROF:;;; %oneblock called 2 times. +; PROF:;;; +define void @oneblock() nounwind { +entry: +; PROF:entry: +; PROF: ;;; Basic block executed 2 times. + %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; [#uses=0] + ret void +} + +declare i32 @puts(i8*) + +; PROF:;;; %main called 2 times. +; PROF:;;; +define i32 @main(i32 %argc, i8** %argv) nounwind { +entry: +; PROF:entry: +; PROF: ;;; Basic block executed 2 times. + %argc_addr = alloca i32 ; [#uses=4] + %argv_addr = alloca i8** ; [#uses=1] + %retval = alloca i32 ; [#uses=2] + %j = alloca i32 ; [#uses=4] + %i = alloca i32 ; [#uses=4] + %0 = alloca i32 ; [#uses=2] +; INST:call +; INST:@llvm_start_opt_edge_profiling +; INST:@OptEdgeProfCounters + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store i32 %argc, i32* %argc_addr + store i8** %argv, i8*** %argv_addr + store i32 0, i32* %i, align 4 + br label %bb10 +; PROF: ;;; Out-edge counts: [2.000000e+00 -> bb10] + +bb: ; preds = %bb10 +; PROF:bb: +; PROF: ;;; Basic block executed 6 times. + %1 = load i32* %argc_addr, align 4 ; [#uses=1] + %2 = icmp sgt i32 %1, 1 ; [#uses=1] + br i1 %2, label %bb1, label %bb8 +; PROF: ;;; Out-edge counts: [3.000000e+00 -> bb1] [3.000000e+00 -> bb8] + +bb1: ; preds = %bb +; PROF:bb1: +; PROF: ;;; Basic block executed 3 times. + store i32 0, i32* %j, align 4 + br label %bb6 +; PROF: ;;; Out-edge counts: [3.000000e+00 -> bb6] + +bb2: ; preds = %bb6 +; PROF:bb2: +; PROF: ;;; Basic block executed 9 times. + %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; [#uses=0] + %4 = load i32* %argc_addr, align 4 ; [#uses=1] + %5 = icmp sgt i32 %4, 2 ; [#uses=1] + br i1 %5, label %bb3, label %bb4 +; PROF: ;;; Out-edge counts: [9.000000e+00 -> bb3] + +bb3: ; preds = %bb2 +; PROF:bb3: +; PROF: ;;; Basic block executed 9 times. + %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; [#uses=0] + br label %bb5 +; PROF: ;;; Out-edge counts: [9.000000e+00 -> bb5] + +bb4: ; preds = %bb2 +; PROF:bb4: +; PROF: ;;; Never executed! + %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; [#uses=0] + br label %bb11 + +bb5: ; preds = %bb3 +; PROF:bb5: +; PROF: ;;; Basic block executed 9 times. + %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; [#uses=0] + %9 = load i32* %j, align 4 ; [#uses=1] + %10 = add nsw i32 %9, 1 ; [#uses=1] + store i32 %10, i32* %j, align 4 + br label %bb6 +; PROF: ;;; Out-edge counts: [9.000000e+00 -> bb6] + +bb6: ; preds = %bb5, %bb1 +; PROF:bb6: +; PROF: ;;; Basic block executed 12 times. + %11 = load i32* %j, align 4 ; [#uses=1] + %12 = load i32* %argc_addr, align 4 ; [#uses=1] + %13 = icmp slt i32 %11, %12 ; [#uses=1] + br i1 %13, label %bb2, label %bb7 +; PROF: ;;; Out-edge counts: [9.000000e+00 -> bb2] [3.000000e+00 -> bb7] + +bb7: ; preds = %bb6 +; PROF:bb7: +; PROF: ;;; Basic block executed 3 times. + br label %bb9 +; PROF: ;;; Out-edge counts: [3.000000e+00 -> bb9] + +bb8: ; preds = %bb +; PROF:bb8: +; PROF: ;;; Basic block executed 3 times. + %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; [#uses=0] + br label %bb9 +; PROF: ;;; Out-edge counts: [3.000000e+00 -> bb9] + +bb9: ; preds = %bb8, %bb7 +; PROF:bb9: +; PROF: ;;; Basic block executed 6 times. + %15 = load i32* %i, align 4 ; [#uses=1] + %16 = add nsw i32 %15, 1 ; [#uses=1] + store i32 %16, i32* %i, align 4 + br label %bb10 +; PROF: ;;; Out-edge counts: [6.000000e+00 -> bb10] + +bb10: ; preds = %bb9, %entry +; PROF:bb10: +; PROF: ;;; Basic block executed 8 times. + %17 = load i32* %i, align 4 ; [#uses=1] + %18 = icmp ne i32 %17, 3 ; [#uses=1] + br i1 %18, label %bb, label %bb11 +; INST:br +; INST:label %bb10.bb11_crit_edge +; PROF: ;;; Out-edge counts: [6.000000e+00 -> bb] [2.000000e+00 -> bb11] + +; INST:bb10.bb11_crit_edge: +; INST:br +; INST:label %bb11 + +bb11: ; preds = %bb10, %bb4 +; PROF:bb11: +; PROF: ;;; Basic block executed 2 times. + call void @oneblock() nounwind + store i32 0, i32* %0, align 4 + %19 = load i32* %0, align 4 ; [#uses=1] + store i32 %19, i32* %retval, align 4 + br label %return +; PROF: ;;; Out-edge counts: [2.000000e+00 -> return] + +return: ; preds = %bb11 +; PROF:return: +; PROF: ;;; Basic block executed 2 times. + %retval12 = load i32* %retval ; [#uses=1] + ret i32 %retval12 +} diff --git a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll index bf27e7753538c..7f82ea4357911 100644 --- a/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll +++ b/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep {Loop bb: backedge-taken count is 100} ; PR1533 diff --git a/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll b/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll index e3393d5eed622..e67e4d00d6258 100644 --- a/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll +++ b/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -indvars -adce -simplifycfg | llvm-dis | grep "icmp s" +; RUN: opt < %s -indvars -adce -simplifycfg -S | grep "icmp s" ; PR1598 define i32 @f(i32 %a, i32 %b, i32 %x, i32 %y) { diff --git a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll index 95f932a9a581c..f623da1b27575 100644 --- a/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll +++ b/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)} +; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)} ; PR1597 define i32 @f(i32 %x, i32 %y) { diff --git a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll index e5e47d549f53b..817090ffef650 100644 --- a/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll +++ b/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 13} ; PR1706 diff --git a/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll b/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll index 66ca7551c240e..514920f0f6fad 100644 --- a/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll +++ b/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep printd | grep 1206807378 +; RUN: opt < %s -indvars -S | grep printd | grep 1206807378 ; PR1798 declare void @printd(i32) diff --git a/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll b/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll index 01f338a29c273..2b3c982d6b127 100644 --- a/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll +++ b/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep -e {--> %b} +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep -e {--> %b} ; PR1810 define void @fun() { diff --git a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll index b9a53b318bf5b..c8e483e7d50f9 100644 --- a/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll +++ b/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop header: backedge-taken count is (0 smax %n)} +; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop header: backedge-taken count is (0 smax %n)} define void @foo(i32 %n) { entry: diff --git a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll index b943bc7d4c61c..cb9a1829eb7c0 100644 --- a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll +++ b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop loop: backedge-taken count is (100 + (-100 smax %n))} +; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop loop: backedge-taken count is (100 + (-100 smax %n))} ; PR2002 define void @foo(i8 %n) { diff --git a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll index 59b51093f40a9..bf9f4a9e8607e 100644 --- a/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll +++ b/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep umax +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep umax ; PR2003 define i32 @foo(i32 %n) { diff --git a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll index 5453ae3ae80ba..8d15b772f1fe0 100644 --- a/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll +++ b/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 61} ; PR2364 diff --git a/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll b/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll index cbe5c97905cee..d503329292c7d 100644 --- a/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll +++ b/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution 2>/dev/null +; RUN: opt < %s -analyze -scalar-evolution 2>/dev/null ; PR2433 define i32 @main1(i32 %argc, i8** %argv) nounwind { diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll index 6ba0f25eb0612..850b6708f4fe8 100644 --- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll +++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& not grep smax +; RUN: opt < %s -analyze -scalar-evolution -disable-output |& not grep smax ; PR2261 @lut = common global [256 x i8] zeroinitializer, align 32 ; <[256 x i8]*> [#uses=1] diff --git a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll index 3c022e7181bdd..59e9fda41e646 100644 --- a/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll +++ b/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& not grep smax +; RUN: opt < %s -analyze -scalar-evolution -disable-output |& not grep smax ; PR2070 define i32 @a(i32 %x) nounwind { diff --git a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll index 5dcad53f6a60a..989ac51226dce 100644 --- a/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll +++ b/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep Unpredictable ; PR2088 diff --git a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll index 54c929dcdaf63..803c7d110e72b 100644 --- a/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll +++ b/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 113} ; PR2088 diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll b/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll index 527b8b0add23e..97d0640c6c585 100644 --- a/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll +++ b/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | \ ; RUN: grep -F "backedge-taken count is (-1 + (-1 * %j))" ; PR2607 diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll index 9051dc7ec515d..7f4de91733365 100644 --- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | \ ; RUN: grep -F "backedge-taken count is (-2147483632 + ((-1 + (-1 * %x)) smax (-1 + (-1 * %y))))" ; PR2607 diff --git a/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll index f8e1cfcd7fbe4..fa09895eac326 100644 --- a/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll +++ b/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep -F "Exits: 20028" ; PR2621 diff --git a/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll index fbd249fbc0406..5a28117eb60b1 100644 --- a/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll +++ b/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep -F "Exits: -19168" ; PR2621 diff --git a/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll b/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll index 1e9d0bfc9c231..9daff991aee92 100644 --- a/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll +++ b/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output +; RUN: opt < %s -analyze -scalar-evolution -disable-output ; PR1827 declare void @use(i32) diff --git a/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll b/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll index c0b3a1fe01b2a..5a2c36659c72e 100644 --- a/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll +++ b/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output +; RUN: opt < %s -analyze -scalar-evolution -disable-output ; PR2602 define i32 @a() nounwind { diff --git a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll index 56d1fe7b541dd..daeb26a202e3c 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output |& \ ; RUN: grep {Loop bb: backedge-taken count is (7 + (-1 \\* %argc))} ; XFAIL: * diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll index 8fb1604fd190f..9dda78b21f7d1 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: | grep {Loop bb: Unpredictable backedge-taken count\\.} ; ScalarEvolution can't compute a trip count because it doesn't know if diff --git a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll index d506f9c3f82f5..bcbe92f509ae1 100644 --- a/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll +++ b/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& grep {/u 3} +; RUN: opt < %s -analyze -scalar-evolution -disable-output |& grep {/u 3} ; XFAIL: * define i32 @f(i32 %x) nounwind readnone { diff --git a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll index 643d2f835b8c0..2ee107a4a43a9 100644 --- a/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll +++ b/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {backedge-taken count is 255} +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {backedge-taken count is 255} ; XFAIL: * define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind { diff --git a/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll b/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll index 995a1d95a8a89..0cfd84c997b40 100644 --- a/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll +++ b/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {0 smax} +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {0 smax} ; XFAIL: * define i32 @f(i32 %c.idx.val) { diff --git a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll index 8e064c70da9a0..4ec358c8a4dc6 100644 --- a/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll +++ b/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output |& \ ; RUN: grep {(((-1 \\* %i0) + (100005 smax %i0)) /u 5)} ; XFAIL: * diff --git a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll index 950c1d21d910a..1fe10689f3fe7 100644 --- a/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll +++ b/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output |& grep {/u 5} +; RUN: opt < %s -analyze -scalar-evolution -disable-output |& grep {/u 5} ; XFAIL: * define i8 @foo0(i8 %i0) nounwind { diff --git a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll index 65c4cdbb1362d..9d13695c3e47c 100644 --- a/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll +++ b/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | not grep {/u -1} +; RUN: opt < %s -analyze -scalar-evolution -disable-output | not grep {/u -1} ; PR3275 @g_16 = external global i16 ; [#uses=3] diff --git a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll index 6aced23cf9500..78a7fd016716c 100644 --- a/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll +++ b/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {(trunc i} | not grep ext +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {(trunc i} | not grep ext define i16 @test1(i8 %x) { %A = sext i8 %x to i32 diff --git a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll index 5e5128bd577a6..6ed261481e2d2 100644 --- a/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll +++ b/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep {count is 2} +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep {count is 2} ; PR3171 %struct.Foo = type { i32 } diff --git a/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll b/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll new file mode 100644 index 0000000000000..a4358aa632152 --- /dev/null +++ b/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -analyze -scalar-evolution +; PR4501 + +define void @test() { +entry: + %0 = load i16* undef, align 1 + %1 = lshr i16 %0, 8 + %2 = and i16 %1, 3 + %3 = zext i16 %2 to i32 + %4 = load i8* undef, align 1 + %5 = lshr i8 %4, 4 + %6 = and i8 %5, 1 + %7 = zext i8 %6 to i32 + %t1 = add i32 %3, %7 + ret void +} diff --git a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll index 27a546f32e827..fcc6fc3297c0d 100644 --- a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll +++ b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 100} ; PR1101 diff --git a/test/Analysis/ScalarEvolution/and-xor.ll b/test/Analysis/ScalarEvolution/and-xor.ll index 94cca83ea74db..90d947f15bba3 100644 --- a/test/Analysis/ScalarEvolution/and-xor.ll +++ b/test/Analysis/ScalarEvolution/and-xor.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \ +; RUN: opt < %s -scalar-evolution -analyze -disable-output \ ; RUN: | grep {\\--> (zext} | count 2 define i32 @foo(i32 %x) { diff --git a/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll new file mode 100644 index 0000000000000..f638eb340140c --- /dev/null +++ b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -analyze -scalar-evolution -disable-output +; PR4537 + +; ModuleID = 'b.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @test() { +entry: + %0 = load i32** undef, align 8 ; [#uses=1] + %1 = ptrtoint i32* %0 to i64 ; [#uses=1] + %2 = sub i64 undef, %1 ; [#uses=1] + %3 = lshr i64 %2, 3 ; [#uses=1] + %4 = trunc i64 %3 to i32 ; [#uses=2] + br i1 undef, label %bb10, label %bb4.i + +bb4.i: ; preds = %bb4.i, %entry + %i.0.i6 = phi i32 [ %8, %bb4.i ], [ 0, %entry ] ; [#uses=2] + %5 = sub i32 %4, %i.0.i6 ; [#uses=1] + %6 = sext i32 %5 to i64 ; [#uses=1] + %7 = udiv i64 undef, %6 ; [#uses=1] + %8 = add i32 %i.0.i6, 1 ; [#uses=2] + %phitmp = icmp eq i64 %7, 0 ; [#uses=1] + %.not.i = icmp sge i32 %8, %4 ; [#uses=1] + %or.cond.i = or i1 %phitmp, %.not.i ; [#uses=1] + br i1 %or.cond.i, label %bb10, label %bb4.i + +bb10: ; preds = %bb4.i, %entry + unreachable +} diff --git a/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll new file mode 100644 index 0000000000000..31b95e1470b23 --- /dev/null +++ b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll @@ -0,0 +1,354 @@ +; RUN: opt < %s -iv-users +; PR4538 + +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-freebsd8.0" +module asm ".ident\09\22$FreeBSD: head/sys/kern/vfs_subr.c 195285 2009-07-02 14:19:33Z jamie $\22" +module asm ".section set_pcpu, \22aw\22, @progbits" +module asm ".previous" + type <{ [40 x i8] }> ; type %0 + type <{ %struct.vm_object*, %struct.vm_object** }> ; type %1 + type <{ %struct.vm_object* }> ; type %2 + type <{ %struct.vm_page*, %struct.vm_page** }> ; type %3 + type <{ %struct.pv_entry*, %struct.pv_entry** }> ; type %4 + type <{ %struct.vm_reserv* }> ; type %5 + type <{ %struct.bufobj*, %struct.bufobj** }> ; type %6 + type <{ %struct.proc*, %struct.proc** }> ; type %7 + type <{ %struct.thread*, %struct.thread** }> ; type %8 + type <{ %struct.prison*, %struct.prison** }> ; type %9 + type <{ %struct.prison* }> ; type %10 + type <{ %struct.task* }> ; type %11 + type <{ %struct.osd*, %struct.osd** }> ; type %12 + type <{ %struct.proc* }> ; type %13 + type <{ %struct.ksiginfo*, %struct.ksiginfo** }> ; type %14 + type <{ %struct.pv_chunk*, %struct.pv_chunk** }> ; type %15 + type <{ %struct.pgrp*, %struct.pgrp** }> ; type %16 + type <{ %struct.knote*, %struct.knote** }> ; type %17 + type <{ %struct.ktr_request*, %struct.ktr_request** }> ; type %18 + type <{ %struct.mqueue_notifier* }> ; type %19 + type <{ %struct.turnstile* }> ; type %20 + type <{ %struct.namecache* }> ; type %21 + type <{ %struct.namecache*, %struct.namecache** }> ; type %22 + type <{ %struct.lockf*, %struct.lockf** }> ; type %23 + type <{ %struct.lockf_entry*, %struct.lockf_entry** }> ; type %24 + type <{ %struct.lockf_edge*, %struct.lockf_edge** }> ; type %25 + %struct.__siginfo = type <{ i32, i32, i32, i32, i32, i32, i8*, %union.sigval, %0 }> + %struct.__sigset = type <{ [4 x i32] }> + %struct.acl = type <{ i32, i32, [4 x i32], [254 x %struct.acl_entry] }> + %struct.acl_entry = type <{ i32, i32, i32, i16, i16 }> + %struct.au_mask = type <{ i32, i32 }> + %struct.au_tid_addr = type <{ i32, i32, [4 x i32] }> + %struct.auditinfo_addr = type <{ i32, %struct.au_mask, %struct.au_tid_addr, i32, i64 }> + %struct.bintime = type <{ i64, i64 }> + %struct.buf = type <{ %struct.bufobj*, i64, i8*, i8*, i32, i8, i8, i8, i8, i64, i64, void (%struct.buf*)*, i64, i64, %struct.buflists, %struct.buf*, %struct.buf*, i32, i8, i8, i8, i8, %struct.buflists, i16, i8, i8, i32, i8, i8, i8, i8, i8, i8, i8, i8, %struct.lock, i64, i64, i8*, i32, i8, i8, i8, i8, i64, %struct.vnode*, i32, i32, %struct.ucred*, %struct.ucred*, i8*, %union.pager_info, i8, i8, i8, i8, %union.anon, [32 x %struct.vm_page*], i32, i8, i8, i8, i8, %struct.workhead, i8*, i8*, i8*, i32, i8, i8, i8, i8 }> + %struct.buf_ops = type <{ i8*, i32 (%struct.buf*)*, void (%struct.bufobj*, %struct.buf*)*, i32 (%struct.bufobj*, i32)*, void (%struct.bufobj*, %struct.buf*)* }> + %struct.buflists = type <{ %struct.buf*, %struct.buf** }> + %struct.bufobj = type <{ %struct.mtx, %struct.bufv, %struct.bufv, i64, i32, i8, i8, i8, i8, %struct.buf_ops*, i32, i8, i8, i8, i8, %struct.vm_object*, %6, i8*, %struct.vnode* }> + %struct.bufv = type <{ %struct.buflists, %struct.buf*, i32, i8, i8, i8, i8 }> + %struct.callout = type <{ %union.anon, i32, i8, i8, i8, i8, i8*, void (i8*)*, %struct.lock_object*, i32, i32 }> + %struct.cdev_privdata = type opaque + %struct.cluster_save = type <{ i64, i64, i8*, i32, i8, i8, i8, i8, %struct.buf** }> + %struct.componentname = type <{ i64, i64, %struct.thread*, %struct.ucred*, i32, i8, i8, i8, i8, i8*, i8*, i64, i64 }> + %struct.cpuset = type opaque + %struct.cv = type <{ i8*, i32, i8, i8, i8, i8 }> + %struct.fid = type <{ i16, i16, [16 x i8] }> + %struct.file = type <{ i8*, %struct.fileops*, %struct.ucred*, %struct.vnode*, i16, i16, i32, i32, i32, i64, %struct.cdev_privdata*, i64, i8* }> + %struct.filedesc = type opaque + %struct.filedesc_to_leader = type opaque + %struct.fileops = type <{ i32 (%struct.file*, %struct.uio*, %struct.ucred*, i32, %struct.thread*)*, i32 (%struct.file*, %struct.uio*, %struct.ucred*, i32, %struct.thread*)*, i32 (%struct.file*, i64, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, i64, i8*, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, i32, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, %struct.knote*)*, i32 (%struct.file*, %struct.stat*, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, %struct.thread*)*, i32, i8, i8, i8, i8 }> + %struct.filterops = type <{ i32, i8, i8, i8, i8, i32 (%struct.knote*)*, void (%struct.knote*)*, i32 (%struct.knote*, i64)* }> + %struct.flock = type <{ i64, i64, i32, i16, i16, i32, i8, i8, i8, i8 }> + %struct.freelst = type <{ %struct.vnode*, %struct.vnode** }> + %struct.fsid = type <{ [2 x i32] }> + %struct.in6_addr = type opaque + %struct.in_addr = type opaque + %struct.inode = type opaque + %struct.iovec = type <{ i8*, i64 }> + %struct.itimers = type opaque + %struct.itimerval = type <{ %struct.bintime, %struct.bintime }> + %struct.kaioinfo = type opaque + %struct.kaudit_record = type opaque + %struct.kdtrace_proc = type opaque + %struct.kdtrace_thread = type opaque + %struct.kevent = type <{ i64, i16, i16, i32, i64, i8* }> + %struct.klist = type <{ %struct.knote* }> + %struct.knlist = type <{ %struct.klist, void (i8*)*, void (i8*)*, void (i8*)*, void (i8*)*, i8* }> + %struct.knote = type <{ %struct.klist, %struct.klist, %struct.knlist*, %17, %struct.kqueue*, %struct.kevent, i32, i32, i64, %union.sigval, %struct.filterops*, i8* }> + %struct.kqueue = type opaque + %struct.ksiginfo = type <{ %14, %struct.__siginfo, i32, i8, i8, i8, i8, %struct.sigqueue* }> + %struct.ktr_request = type opaque + %struct.label = type opaque + %struct.lock = type <{ %struct.lock_object, i64, i32, i32 }> + %struct.lock_list_entry = type opaque + %struct.lock_object = type <{ i8*, i32, i32, %struct.witness* }> + %struct.lock_owner = type opaque + %struct.lock_profile_object = type opaque + %struct.lockf = type <{ %23, %struct.mtx, %struct.lockf_entry_list, %struct.lockf_entry_list, i32, i8, i8, i8, i8 }> + %struct.lockf_edge = type <{ %25, %25, %struct.lockf_entry*, %struct.lockf_entry* }> + %struct.lockf_edge_list = type <{ %struct.lockf_edge* }> + %struct.lockf_entry = type <{ i16, i16, i8, i8, i8, i8, i64, i64, %struct.lock_owner*, %struct.vnode*, %struct.inode*, %struct.task*, %24, %struct.lockf_edge_list, %struct.lockf_edge_list, i32, i8, i8, i8, i8 }> + %struct.lockf_entry_list = type <{ %struct.lockf_entry* }> + %struct.lpohead = type <{ %struct.lock_profile_object* }> + %struct.md_page = type <{ %4 }> + %struct.mdproc = type <{ %struct.cv*, %struct.system_segment_descriptor }> + %struct.mdthread = type <{ i32, i8, i8, i8, i8, i64 }> + %struct.mntarg = type opaque + %struct.mntlist = type <{ %struct.mount*, %struct.mount** }> + %struct.mount = type <{ %struct.mtx, i32, i8, i8, i8, i8, %struct.mntlist, %struct.vfsops*, %struct.vfsconf*, %struct.vnode*, %struct.vnode*, i32, i8, i8, i8, i8, %struct.freelst, i32, i32, i32, i32, i32, i32, %struct.vfsoptlist*, %struct.vfsoptlist*, i32, i8, i8, i8, i8, %struct.statfs, %struct.ucred*, i8*, i64, i32, i8, i8, i8, i8, %struct.netexport*, %struct.label*, i32, i32, i32, i32, %struct.thread*, i8*, %struct.lock }> + %struct.mqueue_notifier = type opaque + %struct.mtx = type <{ %struct.lock_object, i64 }> + %struct.namecache = type opaque + %struct.netexport = type opaque + %struct.nlminfo = type opaque + %struct.osd = type <{ i32, i8, i8, i8, i8, i8**, %12 }> + %struct.p_sched = type opaque + %struct.pargs = type <{ i32, i32, [1 x i8], i8, i8, i8 }> + %struct.pcb = type opaque + %struct.pgrp = type <{ %16, %13, %struct.session*, %struct.sigiolst, i32, i32, %struct.mtx }> + %struct.plimit = type opaque + %struct.pmap = type <{ %struct.mtx, i64*, %15, i32, i8, i8, i8, i8, %struct.bintime, %struct.vm_page* }> + %struct.prison = type <{ %9, i32, i32, i32, i32, %10, %9, %struct.prison*, %struct.mtx, %struct.task, %struct.osd, %struct.cpuset*, %struct.vnet*, %struct.vnode*, i32, i32, %struct.in_addr*, %struct.in6_addr*, [4 x i8*], i32, i32, i32, i32, i32, [5 x i32], i64, [256 x i8], [1024 x i8], [256 x i8], [256 x i8], [64 x i8] }> + %struct.proc = type <{ %7, %8, %struct.mtx, %struct.ucred*, %struct.filedesc*, %struct.filedesc_to_leader*, %struct.pstats*, %struct.plimit*, %struct.callout, %struct.sigacts*, i32, i32, i32, i8, i8, i8, i8, %7, %7, %struct.proc*, %7, %13, %struct.mtx, %struct.ksiginfo*, %struct.sigqueue, i32, i8, i8, i8, i8, %struct.vmspace*, i32, i8, i8, i8, i8, %struct.itimerval, %struct.rusage, %struct.rusage_ext, %struct.rusage_ext, i32, i32, i32, i8, i8, i8, i8, %struct.vnode*, %struct.ucred*, %struct.vnode*, i32, i8, i8, i8, i8, %struct.sigiolst, i32, i32, i64, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, %struct.nlminfo*, %struct.kaioinfo*, %struct.thread*, i32, i8, i8, i8, i8, %struct.thread*, i32, i32, %struct.itimers*, i32, i32, [20 x i8], i8, i8, i8, i8, %struct.pgrp*, %struct.sysentvec*, %struct.pargs*, i64, i8, i8, i8, i8, i32, i16, i8, i8, i8, i8, i8, i8, %struct.knlist, i32, i8, i8, i8, i8, %struct.mdproc, %struct.callout, i16, i8, i8, i8, i8, i8, i8, %struct.proc*, %struct.proc*, i8*, %struct.label*, %struct.p_sched*, %18, %19, %struct.kdtrace_proc*, %struct.cv }> + %struct.pstats = type opaque + %struct.pv_chunk = type <{ %struct.pmap*, %15, [3 x i64], [2 x i64], [168 x %struct.pv_entry] }> + %struct.pv_entry = type <{ i64, %4 }> + %struct.rusage = type <{ %struct.bintime, %struct.bintime, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 }> + %struct.rusage_ext = type <{ i64, i64, i64, i64, i64, i64, i64 }> + %struct.selfd = type opaque + %struct.selfdlist = type <{ %struct.selfd*, %struct.selfd** }> + %struct.selinfo = type <{ %struct.selfdlist, %struct.knlist, %struct.mtx* }> + %struct.seltd = type opaque + %struct.session = type <{ i32, i8, i8, i8, i8, %struct.proc*, %struct.vnode*, %struct.tty*, i32, [24 x i8], i8, i8, i8, i8, %struct.mtx }> + %struct.shmmap_state = type opaque + %struct.sigacts = type <{ [128 x void (i32)*], [128 x %struct.__sigset], %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, i32, i32, %struct.mtx }> + %struct.sigaltstack = type <{ i8*, i64, i32, i8, i8, i8, i8 }> + %struct.sigio = type <{ %union.sigval, %struct.sigiolst, %struct.sigio**, %struct.ucred*, i32, i8, i8, i8, i8 }> + %struct.sigiolst = type <{ %struct.sigio* }> + %struct.sigqueue = type <{ %struct.__sigset, %struct.__sigset, %14, %struct.proc*, i32, i8, i8, i8, i8 }> + %struct.sleepqueue = type opaque + %struct.sockaddr = type opaque + %struct.stat = type <{ i32, i32, i16, i16, i32, i32, i32, %struct.bintime, %struct.bintime, %struct.bintime, i64, i64, i32, i32, i32, i32, %struct.bintime }> + %struct.statfs = type <{ i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, [10 x i64], i32, i32, %struct.fsid, [80 x i8], [16 x i8], [88 x i8], [88 x i8] }> + %struct.sysctl_req = type <{ %struct.thread*, i32, i8, i8, i8, i8, i8*, i64, i64, i32 (%struct.sysctl_req*, i8*, i64)*, i8*, i64, i64, i32 (%struct.sysctl_req*, i8*, i64)*, i64, i32, i8, i8, i8, i8 }> + %struct.sysentvec = type opaque + %struct.system_segment_descriptor = type <{ i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }> + %struct.task = type <{ %11, i16, i16, i8, i8, i8, i8, void (i8*, i32)*, i8* }> + %struct.td_sched = type opaque + %struct.thread = type <{ %struct.mtx*, %struct.proc*, %8, %8, %8, %8, %struct.cpuset*, %struct.seltd*, %struct.sleepqueue*, %struct.turnstile*, %struct.umtx_q*, i32, i8, i8, i8, i8, %struct.sigqueue, i32, i32, i32, i32, i32, i8, i8, i8, i8, i8*, i8*, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, %struct.turnstile*, i8*, %20, %struct.lock_list_entry*, i32, i32, %struct.ucred*, i32, i32, %struct.rusage, i64, i64, i32, i32, i32, i32, i32, %struct.__sigset, %struct.__sigset, i32, %struct.sigaltstack, i32, i8, i8, i8, i8, i64, i32, [20 x i8], %struct.file*, i32, i32, %struct.osd, i8, i8, i8, i8, i8, i8, i8, i8, %struct.pcb*, i32, i8, i8, i8, i8, [2 x i64], %struct.callout, %struct.trapframe*, %struct.vm_object*, i64, i32, i8, i8, i8, i8, %struct.vm_object*, i64, i32, i32, %struct.mdthread, %struct.td_sched*, %struct.kaudit_record*, i32, i8, i8, i8, i8, [2 x %struct.lpohead], %struct.kdtrace_thread*, i32, i8, i8, i8, i8, %struct.vnet*, i8* }> + %struct.trapframe = type <{ i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i16, i16, i64, i32, i16, i16, i64, i64, i64, i64, i64, i64 }> + %struct.tty = type opaque + %struct.turnstile = type opaque + %struct.ucred = type <{ i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, %struct.uidinfo*, %struct.uidinfo*, %struct.prison*, %struct.vimage*, i32, i8, i8, i8, i8, [2 x i8*], %struct.label*, %struct.auditinfo_addr, i32*, i32, i8, i8, i8, i8 }> + %struct.uidinfo = type opaque + %struct.uio = type <{ %struct.iovec*, i32, i8, i8, i8, i8, i64, i64, i32, i32, %struct.thread* }> + %struct.umtx_q = type opaque + %struct.vattr = type <{ i32, i16, i16, i32, i32, i32, i8, i8, i8, i8, i64, i64, i64, %struct.bintime, %struct.bintime, %struct.bintime, %struct.bintime, i64, i64, i32, i8, i8, i8, i8, i64, i64, i32, i8, i8, i8, i8, i64 }> + %struct.vfsconf = type <{ i32, [16 x i8], i8, i8, i8, i8, %struct.vfsops*, i32, i32, i32, i8, i8, i8, i8, %struct.vfsoptdecl*, %struct.vfsconfhead }> + %struct.vfsconfhead = type <{ %struct.vfsconf*, %struct.vfsconf** }> + %struct.vfsops = type <{ i32 (%struct.mount*)*, i32 (%struct.mntarg*, i8*, i32)*, i32 (%struct.mount*, i32)*, i32 (%struct.mount*, i32, %struct.vnode**)*, i32 (%struct.mount*, i32, i32, i8*)*, i32 (%struct.mount*, %struct.statfs*)*, i32 (%struct.mount*, i32)*, i32 (%struct.mount*, i32, i32, %struct.vnode**)*, i32 (%struct.mount*, %struct.fid*, %struct.vnode**)*, i32 (%struct.mount*, %struct.sockaddr*, i32*, %struct.ucred**, i32*, i32**)*, i32 (%struct.vfsconf*)*, i32 (%struct.vfsconf*)*, i32 (%struct.mount*, i32, %struct.vnode*, i32, i8*)*, i32 (%struct.mount*, i32, %struct.sysctl_req*)*, void (%struct.mount*)* }> + %struct.vfsopt = type <{ %struct.vfsoptlist, i8*, i8*, i32, i32, i32, i8, i8, i8, i8 }> + %struct.vfsoptdecl = type opaque + %struct.vfsoptlist = type <{ %struct.vfsopt*, %struct.vfsopt** }> + %struct.vimage = type opaque + %struct.vm_map = type <{ %struct.vm_map_entry, %struct.mtx, %struct.mtx, i32, i8, i8, i8, i8, i64, i32, i8, i8, i8, i8, %struct.vm_map_entry*, %struct.pmap*, %struct.vm_map_entry* }> + %struct.vm_map_entry = type <{ %struct.vm_map_entry*, %struct.vm_map_entry*, %struct.vm_map_entry*, %struct.vm_map_entry*, i64, i64, i64, i64, i64, %union.sigval, i64, i32, i8, i8, i8, i8, i32, i8, i8, i8, i8, i64, %struct.uidinfo* }> + %struct.vm_object = type <{ %struct.mtx, %1, %2, %1, %3, %struct.vm_page*, i64, i32, i32, i32, i8, i8, i16, i16, i16, i32, %struct.vm_object*, i64, %1, %5, %struct.vm_page*, i8*, %union.anon, %struct.uidinfo*, i64 }> + %struct.vm_page = type <{ %3, %3, %struct.vm_page*, %struct.vm_page*, %struct.vm_object*, i64, i64, %struct.md_page, i8, i8, i16, i8, i8, i16, i32, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }> + %struct.vm_reserv = type opaque + %struct.vmspace = type <{ %struct.vm_map, %struct.shmmap_state*, i64, i64, i64, i64, i8*, i8*, i8*, i32, i8, i8, i8, i8, %struct.pmap }> + %struct.vnet = type opaque + %struct.vnode = type <{ i32, i8, i8, i8, i8, i8*, %struct.vop_vector*, i8*, %struct.mount*, %struct.freelst, %union.sigval, %struct.freelst, i32, i8, i8, i8, i8, %21, %22, %struct.namecache*, i64, i64, i64, i32, i8, i8, i8, i8, %struct.lock, %struct.mtx, %struct.lock*, i32, i32, i64, i64, i32, i8, i8, i8, i8, %struct.freelst, %struct.bufobj, %struct.vpollinfo*, %struct.label*, %struct.lockf* }> + %struct.vnodeop_desc = type <{ i8*, i32, i8, i8, i8, i8, i32 (%struct.vop_generic_args*)*, i32*, i32, i32, i32, i32 }> + %struct.vop_access_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread* }> + %struct.vop_aclcheck_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.acl*, %struct.ucred*, %struct.thread* }> + %struct.vop_advlock_args = type <{ %struct.vop_generic_args, %struct.vnode*, i8*, i32, i8, i8, i8, i8, %struct.flock*, i32, i8, i8, i8, i8 }> + %struct.vop_advlockasync_args = type <{ %struct.vop_generic_args, %struct.vnode*, i8*, i32, i8, i8, i8, i8, %struct.flock*, i32, i8, i8, i8, i8, %struct.task*, i8** }> + %struct.vop_bmap_args = type <{ %struct.vop_generic_args, %struct.vnode*, i64, %struct.bufobj**, i64*, i32*, i32* }> + %struct.vop_cachedlookup_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname* }> + %struct.vop_create_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname*, %struct.vattr* }> + %struct.vop_deleteextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.ucred*, %struct.thread* }> + %struct.vop_fsync_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.thread* }> + %struct.vop_generic_args = type <{ %struct.vnodeop_desc* }> + %struct.vop_getattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vattr*, %struct.ucred* }> + %struct.vop_getextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.uio*, i64*, %struct.ucred*, %struct.thread* }> + %struct.vop_getpages_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vm_page**, i32, i32, i64 }> + %struct.vop_getwritemount_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.mount** }> + %struct.vop_inactive_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.thread* }> + %struct.vop_ioctl_args = type <{ %struct.vop_generic_args, %struct.vnode*, i64, i8*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread* }> + %struct.vop_islocked_args = type <{ %struct.vop_generic_args, %struct.vnode* }> + %struct.vop_kqfilter_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.knote* }> + %struct.vop_link_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode*, %struct.componentname* }> + %struct.vop_listextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.uio*, i64*, %struct.ucred*, %struct.thread* }> + %struct.vop_lock1_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8 }> + %struct.vop_open_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread*, %struct.file* }> + %struct.vop_openextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.ucred*, %struct.thread* }> + %struct.vop_pathconf_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i64* }> + %struct.vop_putpages_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vm_page**, i32, i32, i32*, i64 }> + %struct.vop_read_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, i32, i8, i8, i8, i8, %struct.ucred* }> + %struct.vop_readdir_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, %struct.ucred*, i32*, i32*, i64** }> + %struct.vop_readlink_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, %struct.ucred* }> + %struct.vop_reallocblks_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.cluster_save* }> + %struct.vop_rename_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode*, %struct.componentname*, %struct.vnode*, %struct.vnode*, %struct.componentname* }> + %struct.vop_revoke_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8 }> + %struct.vop_setextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.uio*, %struct.ucred*, %struct.thread* }> + %struct.vop_setlabel_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.label*, %struct.ucred*, %struct.thread* }> + %struct.vop_strategy_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.buf* }> + %struct.vop_symlink_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname*, %struct.vattr*, i8* }> + %struct.vop_vector = type <{ %struct.vop_vector*, i32 (%struct.vop_generic_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_cachedlookup_args*)*, i32 (%struct.vop_cachedlookup_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_whiteout_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_open_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_getattr_args*)*, i32 (%struct.vop_getattr_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_read_args*)*, i32 (%struct.vop_read_args*)*, i32 (%struct.vop_ioctl_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_kqfilter_args*)*, i32 (%struct.vop_revoke_args*)*, i32 (%struct.vop_fsync_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_rename_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_symlink_args*)*, i32 (%struct.vop_readdir_args*)*, i32 (%struct.vop_readlink_args*)*, i32 (%struct.vop_inactive_args*)*, i32 (%struct.vop_inactive_args*)*, i32 (%struct.vop_lock1_args*)*, i32 (%struct.vop_revoke_args*)*, i32 (%struct.vop_bmap_args*)*, i32 (%struct.vop_strategy_args*)*, i32 (%struct.vop_getwritemount_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_pathconf_args*)*, i32 (%struct.vop_advlock_args*)*, i32 (%struct.vop_advlockasync_args*)*, i32 (%struct.vop_reallocblks_args*)*, i32 (%struct.vop_getpages_args*)*, i32 (%struct.vop_putpages_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_getextattr_args*)*, i32 (%struct.vop_listextattr_args*)*, i32 (%struct.vop_openextattr_args*)*, i32 (%struct.vop_deleteextattr_args*)*, i32 (%struct.vop_setextattr_args*)*, i32 (%struct.vop_setlabel_args*)*, i32 (%struct.vop_vptofh_args*)*, i32 (%struct.vop_vptocnp_args*)* }> + %struct.vop_vptocnp_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.ucred*, i8*, i32* }> + %struct.vop_vptofh_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.fid* }> + %struct.vop_whiteout_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.componentname*, i32, i8, i8, i8, i8 }> + %struct.vpollinfo = type <{ %struct.mtx, %struct.selinfo, i16, i16, i8, i8, i8, i8 }> + %struct.witness = type opaque + %struct.workhead = type <{ %struct.worklist* }> + %struct.worklist = type opaque + %union.anon = type <{ [16 x i8] }> + %union.pager_info = type <{ [4 x i8] }> + %union.sigval = type <{ [8 x i8] }> + +define i32 @vlrureclaim(%struct.mount* %mp) nounwind { +entry: + br i1 undef, label %if.then11, label %do.end + +if.then11: ; preds = %entry + br label %do.end + +do.end: ; preds = %if.then11, %entry + br label %while.cond.outer + +while.cond.outer: ; preds = %while.cond.outer.backedge, %do.end + %count.0.ph = phi i32 [ undef, %do.end ], [ undef, %while.cond.outer.backedge ] ; [#uses=1] + br label %while.cond + +while.cond: ; preds = %next_iter, %while.cond.outer + %count.0 = phi i32 [ %dec, %next_iter ], [ %count.0.ph, %while.cond.outer ] ; [#uses=2] + %cmp21 = icmp eq i32 %count.0, 0 ; [#uses=1] + br i1 %cmp21, label %do.body288.loopexit4, label %while.body + +while.body: ; preds = %while.cond + br label %while.cond27 + +while.cond27: ; preds = %while.body36, %while.body + br i1 undef, label %do.body288.loopexit, label %land.rhs + +land.rhs: ; preds = %while.cond27 + br i1 undef, label %while.body36, label %while.end + +while.body36: ; preds = %land.rhs + br label %while.cond27 + +while.end: ; preds = %land.rhs + br i1 undef, label %do.body288.loopexit4, label %do.body46 + +do.body46: ; preds = %while.end + br i1 undef, label %if.else64, label %if.then53 + +if.then53: ; preds = %do.body46 + br label %if.end72 + +if.else64: ; preds = %do.body46 + br label %if.end72 + +if.end72: ; preds = %if.else64, %if.then53 + %dec = add i32 %count.0, -1 ; [#uses=2] + br i1 undef, label %next_iter, label %if.end111 + +if.end111: ; preds = %if.end72 + br i1 undef, label %lor.lhs.false, label %do.body145 + +lor.lhs.false: ; preds = %if.end111 + br i1 undef, label %lor.lhs.false122, label %do.body145 + +lor.lhs.false122: ; preds = %lor.lhs.false + br i1 undef, label %lor.lhs.false128, label %do.body145 + +lor.lhs.false128: ; preds = %lor.lhs.false122 + br i1 undef, label %do.body162, label %land.lhs.true + +land.lhs.true: ; preds = %lor.lhs.false128 + br i1 undef, label %do.body145, label %do.body162 + +do.body145: ; preds = %land.lhs.true, %lor.lhs.false122, %lor.lhs.false, %if.end111 + br i1 undef, label %if.then156, label %next_iter + +if.then156: ; preds = %do.body145 + br label %next_iter + +do.body162: ; preds = %land.lhs.true, %lor.lhs.false128 + br i1 undef, label %if.then173, label %do.end177 + +if.then173: ; preds = %do.body162 + br label %do.end177 + +do.end177: ; preds = %if.then173, %do.body162 + br i1 undef, label %do.body185, label %if.then182 + +if.then182: ; preds = %do.end177 + br label %next_iter_mntunlocked + +do.body185: ; preds = %do.end177 + br i1 undef, label %if.then196, label %do.end202 + +if.then196: ; preds = %do.body185 + br label %do.end202 + +do.end202: ; preds = %if.then196, %do.body185 + br i1 undef, label %lor.lhs.false207, label %if.then231 + +lor.lhs.false207: ; preds = %do.end202 + br i1 undef, label %lor.lhs.false214, label %if.then231 + +lor.lhs.false214: ; preds = %lor.lhs.false207 + br i1 undef, label %do.end236, label %land.lhs.true221 + +land.lhs.true221: ; preds = %lor.lhs.false214 + br i1 undef, label %if.then231, label %do.end236 + +if.then231: ; preds = %land.lhs.true221, %lor.lhs.false207, %do.end202 + br label %next_iter_mntunlocked + +do.end236: ; preds = %land.lhs.true221, %lor.lhs.false214 + br label %next_iter_mntunlocked + +next_iter_mntunlocked: ; preds = %do.end236, %if.then231, %if.then182 + br i1 undef, label %yield, label %do.body269 + +next_iter: ; preds = %if.then156, %do.body145, %if.end72 + %rem2482 = and i32 %dec, 255 ; [#uses=1] + %cmp249 = icmp eq i32 %rem2482, 0 ; [#uses=1] + br i1 %cmp249, label %do.body253, label %while.cond + +do.body253: ; preds = %next_iter + br i1 undef, label %if.then264, label %yield + +if.then264: ; preds = %do.body253 + br label %yield + +yield: ; preds = %if.then264, %do.body253, %next_iter_mntunlocked + br label %do.body269 + +do.body269: ; preds = %yield, %next_iter_mntunlocked + br i1 undef, label %if.then280, label %while.cond.outer.backedge + +if.then280: ; preds = %do.body269 + br label %while.cond.outer.backedge + +while.cond.outer.backedge: ; preds = %if.then280, %do.body269 + br label %while.cond.outer + +do.body288.loopexit: ; preds = %while.cond27 + br label %do.body288 + +do.body288.loopexit4: ; preds = %while.end, %while.cond + br label %do.body288 + +do.body288: ; preds = %do.body288.loopexit4, %do.body288.loopexit + br i1 undef, label %if.then299, label %do.end303 + +if.then299: ; preds = %do.body288 + br label %do.end303 + +do.end303: ; preds = %if.then299, %do.body288 + ret i32 undef +} diff --git a/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/test/Analysis/ScalarEvolution/avoid-smax-0.ll index ce7ee7791d581..b733d6acb5040 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-0.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-0.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output | grep {Loop bb3: backedge-taken count is (-1 + %n)} +; RUN: opt < %s -scalar-evolution -analyze -disable-output | grep {Loop bb3: backedge-taken count is (-1 + %n)} ; We don't want to use a max in the trip count expression in ; this testcase. diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll index 9270b6e6c8901..0bc9ce8241a88 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | opt -indvars | llvm-dis > %t +; RUN: opt < %s -indvars -S > %t ; RUN: grep select %t | count 2 -; RUN: grep {icmp ne i32.\* %w } %t +; RUN: grep {icmp ne i32.\* %w } %t ; Indvars should be able to insert a canonical induction variable ; for the bb6 loop without using a maximum calculation (icmp, select) diff --git a/test/Analysis/ScalarEvolution/div-overflow.ll b/test/Analysis/ScalarEvolution/div-overflow.ll index cb64b856a7779..0c01044b977f8 100644 --- a/test/Analysis/ScalarEvolution/div-overflow.ll +++ b/test/Analysis/ScalarEvolution/div-overflow.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \ +; RUN: opt < %s -scalar-evolution -analyze -disable-output \ ; RUN: | grep {\\--> ((-128 \\* %a) /u -128)} ; Don't let ScalarEvolution fold this div away. diff --git a/test/Analysis/ScalarEvolution/do-loop.ll b/test/Analysis/ScalarEvolution/do-loop.ll index 85c38e4f1c5ab..f8d7da7c9a0a9 100644 --- a/test/Analysis/ScalarEvolution/do-loop.ll +++ b/test/Analysis/ScalarEvolution/do-loop.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep smax +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep smax ; PR1614 define i32 @f(i32 %x, i32 %y) { diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index 05674149da825..506401dafea55 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -1,5 +1,7 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ -; RUN: | grep {\{%d,+,4\}} +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ +; RUN: | grep {\{%d,+,\[^\{\}\]\*\}} + +; ScalarEvolution should be able to understand the loop and eliminate the casts. define void @foo(i32* nocapture %d, i32 %n) nounwind { entry: diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll new file mode 100644 index 0000000000000..1e165bf622262 --- /dev/null +++ b/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -0,0 +1,76 @@ +; RUN: opt < %s -S -analyze -scalar-evolution -disable-output | FileCheck %s + +; ScalarEvolution should be able to fold away the sign-extensions +; on this loop with a primary induction variable incremented with +; a nsw add of 2. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + +define void @foo(i32 %n, double* nocapture %d, double* nocapture %q) nounwind { +entry: + %0 = icmp sgt i32 %n, 0 ; [#uses=1] + br i1 %0, label %bb.nph, label %return + +bb.nph: ; preds = %entry + br label %bb + +bb: ; preds = %bb.nph, %bb1 + %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ] ; [#uses=5] + +; CHECK: %1 = sext i32 %i.01 to i64 +; CHECK: --> {0,+,2} + %1 = sext i32 %i.01 to i64 ; [#uses=1] + +; CHECK: %2 = getelementptr inbounds double* %d, i64 %1 +; CHECK: --> {%d,+,16} + %2 = getelementptr inbounds double* %d, i64 %1 ; [#uses=1] + + %3 = load double* %2, align 8 ; [#uses=1] + %4 = sext i32 %i.01 to i64 ; [#uses=1] + %5 = getelementptr inbounds double* %q, i64 %4 ; [#uses=1] + %6 = load double* %5, align 8 ; [#uses=1] + %7 = or i32 %i.01, 1 ; [#uses=1] + +; CHECK: %8 = sext i32 %7 to i64 +; CHECK: --> {1,+,2} + %8 = sext i32 %7 to i64 ; [#uses=1] + +; CHECK: %9 = getelementptr inbounds double* %q, i64 %8 +; CHECK: {(8 + %q),+,16} + %9 = getelementptr inbounds double* %q, i64 %8 ; [#uses=1] + +; Artificially repeat the above three instructions, this time using +; add nsw instead of or. + %t7 = add nsw i32 %i.01, 1 ; [#uses=1] + +; CHECK: %t8 = sext i32 %t7 to i64 +; CHECK: --> {1,+,2} + %t8 = sext i32 %t7 to i64 ; [#uses=1] + +; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8 +; CHECK: {(8 + %q),+,16} + %t9 = getelementptr inbounds double* %q, i64 %t8 ; [#uses=1] + + %10 = load double* %9, align 8 ; [#uses=1] + %11 = fadd double %6, %10 ; [#uses=1] + %12 = fadd double %11, 3.200000e+00 ; [#uses=1] + %13 = fmul double %3, %12 ; [#uses=1] + %14 = sext i32 %i.01 to i64 ; [#uses=1] + %15 = getelementptr inbounds double* %d, i64 %14 ; [#uses=1] + store double %13, double* %15, align 8 + %16 = add nsw i32 %i.01, 2 ; [#uses=2] + br label %bb1 + +bb1: ; preds = %bb + %17 = icmp slt i32 %16, %n ; [#uses=1] + br i1 %17, label %bb, label %bb1.return_crit_edge + +bb1.return_crit_edge: ; preds = %bb1 + br label %return + +return: ; preds = %bb1.return_crit_edge, %entry + ret void +} + +; CHECK: Loop bb: backedge-taken count is ((-1 + %n) /u 2) +; CHECK: Loop bb: max backedge-taken count is 1073741823 diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll new file mode 100644 index 0000000000000..c31edabf38eee --- /dev/null +++ b/test/Analysis/ScalarEvolution/nsw.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep { --> {.*,+,.*}} | count 8 + +; The addrecs in this loop are analyzable only by using nsw information. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" + +define void @foo(double* %p) nounwind { +entry: + %tmp = load double* %p, align 8 ; [#uses=1] + %tmp1 = fcmp ogt double %tmp, 2.000000e+00 ; [#uses=1] + br i1 %tmp1, label %bb.nph, label %return + +bb.nph: ; preds = %entry + br label %bb + +bb: ; preds = %bb1, %bb.nph + %i.01 = phi i32 [ %tmp8, %bb1 ], [ 0, %bb.nph ] ; [#uses=3] + %tmp2 = sext i32 %i.01 to i64 ; [#uses=1] + %tmp3 = getelementptr double* %p, i64 %tmp2 ; [#uses=1] + %tmp4 = load double* %tmp3, align 8 ; [#uses=1] + %tmp5 = fmul double %tmp4, 9.200000e+00 ; [#uses=1] + %tmp6 = sext i32 %i.01 to i64 ; [#uses=1] + %tmp7 = getelementptr double* %p, i64 %tmp6 ; [#uses=1] + store double %tmp5, double* %tmp7, align 8 + %tmp8 = add nsw i32 %i.01, 1 ; [#uses=2] + br label %bb1 + +bb1: ; preds = %bb + %phitmp = sext i32 %tmp8 to i64 ; [#uses=1] + %tmp9 = getelementptr double* %p, i64 %phitmp ; [#uses=1] + %tmp10 = load double* %tmp9, align 8 ; [#uses=1] + %tmp11 = fcmp ogt double %tmp10, 2.000000e+00 ; [#uses=1] + br i1 %tmp11, label %bb, label %bb1.return_crit_edge + +bb1.return_crit_edge: ; preds = %bb1 + br label %return + +return: ; preds = %bb1.return_crit_edge, %entry + ret void +} diff --git a/test/Analysis/ScalarEvolution/pointer-sign-bits.ll b/test/Analysis/ScalarEvolution/pointer-sign-bits.ll index 05cb81b3ba6bf..4de006c4ed182 100644 --- a/test/Analysis/ScalarEvolution/pointer-sign-bits.ll +++ b/test/Analysis/ScalarEvolution/pointer-sign-bits.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output +; RUN: opt < %s -analyze -scalar-evolution -disable-output target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" %JavaObject = type { [0 x i32 (...)*]*, i8* } diff --git a/test/Analysis/ScalarEvolution/pr3909.ll b/test/Analysis/ScalarEvolution/pr3909.ll index 80720c724afb8..10e328ddf7e74 100644 --- a/test/Analysis/ScalarEvolution/pr3909.ll +++ b/test/Analysis/ScalarEvolution/pr3909.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -indvars -disable-output +; RUN: opt < %s -indvars -disable-output ; PR 3909 diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll new file mode 100644 index 0000000000000..0dcf52977a0a1 --- /dev/null +++ b/test/Analysis/ScalarEvolution/scev-aa.ll @@ -0,0 +1,194 @@ +; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \ +; RUN: |& FileCheck %s + +; At the time of this writing, all of these CHECK lines are cases that +; plain -basicaa misses. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" + +; p[i] and p[i+1] don't alias. + +; CHECK: Function: loop: 3 pointers, 0 call sites +; CHECK: NoAlias: double* %pi, double* %pi.next + +define void @loop(double* nocapture %p, i64 %n) nounwind { +entry: + %j = icmp sgt i64 %n, 0 + br i1 %j, label %bb, label %return + +bb: + %i = phi i64 [ 0, %entry ], [ %i.next, %bb ] + %pi = getelementptr double* %p, i64 %i + %i.next = add i64 %i, 1 + %pi.next = getelementptr double* %p, i64 %i.next + %x = load double* %pi + %y = load double* %pi.next + %z = fmul double %x, %y + store double %z, double* %pi + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %return, label %bb + +return: + ret void +} + +; Slightly more involved: p[j][i], p[j][i+1], and p[j+1][i] don't alias. + +; CHECK: Function: nestedloop: 4 pointers, 0 call sites +; CHECK: NoAlias: double* %pi.j, double* %pi.next.j +; CHECK: NoAlias: double* %pi.j, double* %pi.j.next +; CHECK: NoAlias: double* %pi.j.next, double* %pi.next.j + +define void @nestedloop(double* nocapture %p, i64 %m) nounwind { +entry: + %k = icmp sgt i64 %m, 0 + br i1 %k, label %guard, label %return + +guard: + %l = icmp sgt i64 91, 0 + br i1 %l, label %outer.loop, label %return + +outer.loop: + %j = phi i64 [ 0, %guard ], [ %j.next, %outer.latch ] + br label %bb + +bb: + %i = phi i64 [ 0, %outer.loop ], [ %i.next, %bb ] + %i.next = add i64 %i, 1 + + %e = add i64 %i, %j + %pi.j = getelementptr double* %p, i64 %e + %f = add i64 %i.next, %j + %pi.next.j = getelementptr double* %p, i64 %f + %x = load double* %pi.j + %y = load double* %pi.next.j + %z = fmul double %x, %y + store double %z, double* %pi.j + + %o = add i64 %j, 91 + %g = add i64 %i, %o + %pi.j.next = getelementptr double* %p, i64 %g + %a = load double* %pi.j.next + %b = fmul double %x, %a + store double %b, double* %pi.j.next + + %exitcond = icmp eq i64 %i.next, 91 + br i1 %exitcond, label %outer.latch, label %bb + +outer.latch: + %j.next = add i64 %j, 91 + %h = icmp eq i64 %j.next, %m + br i1 %h, label %return, label %outer.loop + +return: + ret void +} + +; Even more involved: same as nestedloop, but with a variable extent. +; When n is 1, p[j+1][i] does alias p[j][i+1], and there's no way to +; prove whether n will be greater than 1, so that relation will always +; by MayAlias. The loop is guarded by a n > 0 test though, so +; p[j+1][i] and p[j][i] can theoretically be determined to be NoAlias, +; however the analysis currently doesn't do that. +; TODO: Make the analysis smarter and turn that MayAlias into a NoAlias. + +; CHECK: Function: nestedloop_more: 4 pointers, 0 call sites +; CHECK: NoAlias: double* %pi.j, double* %pi.next.j +; CHECK: MayAlias: double* %pi.j, double* %pi.j.next + +define void @nestedloop_more(double* nocapture %p, i64 %n, i64 %m) nounwind { +entry: + %k = icmp sgt i64 %m, 0 + br i1 %k, label %guard, label %return + +guard: + %l = icmp sgt i64 %n, 0 + br i1 %l, label %outer.loop, label %return + +outer.loop: + %j = phi i64 [ 0, %guard ], [ %j.next, %outer.latch ] + br label %bb + +bb: + %i = phi i64 [ 0, %outer.loop ], [ %i.next, %bb ] + %i.next = add i64 %i, 1 + + %e = add i64 %i, %j + %pi.j = getelementptr double* %p, i64 %e + %f = add i64 %i.next, %j + %pi.next.j = getelementptr double* %p, i64 %f + %x = load double* %pi.j + %y = load double* %pi.next.j + %z = fmul double %x, %y + store double %z, double* %pi.j + + %o = add i64 %j, %n + %g = add i64 %i, %o + %pi.j.next = getelementptr double* %p, i64 %g + %a = load double* %pi.j.next + %b = fmul double %x, %a + store double %b, double* %pi.j.next + + %exitcond = icmp eq i64 %i.next, %n + br i1 %exitcond, label %outer.latch, label %bb + +outer.latch: + %j.next = add i64 %j, %n + %h = icmp eq i64 %j.next, %m + br i1 %h, label %return, label %outer.loop + +return: + ret void +} + +; ScalarEvolution expands field offsets into constants, which allows it to +; do aggressive analysis. Contrast this with BasicAA, which works by +; recognizing GEP idioms. + +%struct.A = type { %struct.B, i32, i32 } +%struct.B = type { double } + +; CHECK: Function: foo: 7 pointers, 0 call sites +; CHECK: NoAlias: %struct.B* %B, i32* %Z +; CHECK: NoAlias: %struct.B* %B, %struct.B* %C +; CHECK: MustAlias: %struct.B* %C, i32* %Z +; CHECK: NoAlias: %struct.B* %B, i32* %X +; CHECK: MustAlias: i32* %X, i32* %Z +; CHECK: MustAlias: %struct.B* %C, i32* %Y +; CHECK: MustAlias: i32* %X, i32* %Y + +define void @foo() { +entry: + %A = alloca %struct.A + %B = getelementptr %struct.A* %A, i32 0, i32 0 + %Q = bitcast %struct.B* %B to %struct.A* + %Z = getelementptr %struct.A* %Q, i32 0, i32 1 + %C = getelementptr %struct.B* %B, i32 1 + %X = bitcast %struct.B* %C to i32* + %Y = getelementptr %struct.A* %A, i32 0, i32 1 + ret void +} + +; CHECK: Function: bar: 7 pointers, 0 call sites +; CHECK: NoAlias: %struct.B* %N, i32* %P +; CHECK: NoAlias: %struct.B* %N, %struct.B* %R +; CHECK: MustAlias: %struct.B* %R, i32* %P +; CHECK: NoAlias: %struct.B* %N, i32* %W +; CHECK: MustAlias: i32* %P, i32* %W +; CHECK: MustAlias: %struct.B* %R, i32* %V +; CHECK: MustAlias: i32* %V, i32* %W + +define void @bar() { + %M = alloca %struct.A + %N = getelementptr %struct.A* %M, i32 0, i32 0 + %O = bitcast %struct.B* %N to %struct.A* + %P = getelementptr %struct.A* %O, i32 0, i32 1 + %R = getelementptr %struct.B* %N, i32 1 + %W = bitcast %struct.B* %R to i32* + %V = getelementptr %struct.A* %M, i32 0, i32 1 + ret void +} + +; CHECK: 13 no alias responses +; CHECK: 26 may alias responses +; CHECK: 18 must alias responses diff --git a/test/Analysis/ScalarEvolution/sext-inreg.ll b/test/Analysis/ScalarEvolution/sext-inreg.ll index 8a88f0f7d9678..16128354aeb41 100644 --- a/test/Analysis/ScalarEvolution/sext-inreg.ll +++ b/test/Analysis/ScalarEvolution/sext-inreg.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output > %t +; RUN: opt < %s -analyze -scalar-evolution -disable-output > %t ; RUN: grep {sext i57 \{0,+,199\} to i64} %t | count 1 ; RUN: grep {sext i59 \{0,+,199\} to i64} %t | count 1 diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll index 17f2dffdbfcf6..8f887c4a57eba 100644 --- a/test/Analysis/ScalarEvolution/sext-iv-0.ll +++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -disable-output -scalar-evolution -analyze \ +; RUN: opt < %s -disable-output -scalar-evolution -analyze \ ; RUN: | grep { --> \{-128,+,1\} Exits: 127} | count 5 ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll index ca6ad0aaba121..02c3206c6fe76 100644 --- a/test/Analysis/ScalarEvolution/sext-iv-1.ll +++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -disable-output -scalar-evolution -analyze \ +; RUN: opt < %s -disable-output -scalar-evolution -analyze \ ; RUN: | grep { --> (sext i. \{.\*,+,.\*\} to i64)} | count 5 ; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases diff --git a/test/Analysis/ScalarEvolution/sext-iv-2.ll b/test/Analysis/ScalarEvolution/sext-iv-2.ll new file mode 100644 index 0000000000000..b25c237958c03 --- /dev/null +++ b/test/Analysis/ScalarEvolution/sext-iv-2.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -analyze -scalar-evolution -disable-output | FileCheck %s + +; CHECK: %tmp3 = sext i8 %tmp2 to i32 +; CHECK: --> (sext i8 {0,+,1} to i32) Exits: -1 +; CHECK: %tmp4 = mul i32 %tmp3, %i.02 +; CHECK: --> ((sext i8 {0,+,1} to i32) * {0,+,1}) Exits: {0,+,-1} + +; These sexts are not foldable. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" + +@table = common global [32 x [256 x i32]] zeroinitializer, align 32 ; <[32 x [256 x i32]]*> [#uses=2] + +define i32 @main() nounwind { +entry: + br i1 false, label %bb5, label %bb.nph3 + +bb.nph3: ; preds = %entry + br label %bb + +bb: ; preds = %bb4, %bb.nph3 + %i.02 = phi i32 [ %tmp10, %bb4 ], [ 0, %bb.nph3 ] ; [#uses=3] + br i1 false, label %bb3, label %bb.nph + +bb.nph: ; preds = %bb + br label %bb1 + +bb1: ; preds = %bb2, %bb.nph + %j.01 = phi i32 [ %tmp8, %bb2 ], [ 0, %bb.nph ] ; [#uses=3] + %tmp2 = trunc i32 %j.01 to i8 ; [#uses=1] + %tmp3 = sext i8 %tmp2 to i32 ; [#uses=1] + %tmp4 = mul i32 %tmp3, %i.02 ; [#uses=1] + %tmp5 = sext i32 %i.02 to i64 ; [#uses=1] + %tmp6 = sext i32 %j.01 to i64 ; [#uses=1] + %tmp7 = getelementptr [32 x [256 x i32]]* @table, i64 0, i64 %tmp5, i64 %tmp6 ; [#uses=1] + store i32 %tmp4, i32* %tmp7, align 4 + %tmp8 = add i32 %j.01, 1 ; [#uses=2] + br label %bb2 + +bb2: ; preds = %bb1 + %phitmp1 = icmp sgt i32 %tmp8, 255 ; [#uses=1] + br i1 %phitmp1, label %bb2.bb3_crit_edge, label %bb1 + +bb2.bb3_crit_edge: ; preds = %bb2 + br label %bb3 + +bb3: ; preds = %bb2.bb3_crit_edge, %bb + %tmp10 = add i32 %i.02, 1 ; [#uses=2] + br label %bb4 + +bb4: ; preds = %bb3 + %phitmp = icmp sgt i32 %tmp10, 31 ; [#uses=1] + br i1 %phitmp, label %bb4.bb5_crit_edge, label %bb + +bb4.bb5_crit_edge: ; preds = %bb4 + br label %bb5 + +bb5: ; preds = %bb4.bb5_crit_edge, %entry + %tmp12 = load i32* getelementptr ([32 x [256 x i32]]* @table, i64 0, i64 9, i64 132), align 16 ; [#uses=1] + %tmp13 = icmp eq i32 %tmp12, -1116 ; [#uses=1] + br i1 %tmp13, label %bb7, label %bb6 + +bb6: ; preds = %bb5 + call void @abort() noreturn nounwind + unreachable + +bb7: ; preds = %bb5 + br label %return + +return: ; preds = %bb7 + ret i32 0 +} + +declare void @abort() noreturn nounwind diff --git a/test/Analysis/ScalarEvolution/smax.ll b/test/Analysis/ScalarEvolution/smax.ll index 366dfdee71460..39de8d6c5a791 100644 --- a/test/Analysis/ScalarEvolution/smax.ll +++ b/test/Analysis/ScalarEvolution/smax.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep smax | count 2 -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep smax | count 2 +; RUN: opt < %s -analyze -scalar-evolution -disable-output | grep \ ; RUN: {%. smax %. smax %.} ; PR1614 diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll index c5be858d1ee91..66cc304918ae5 100644 --- a/test/Analysis/ScalarEvolution/trip-count.ll +++ b/test/Analysis/ScalarEvolution/trip-count.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 10000} ; PR1101 diff --git a/test/Analysis/ScalarEvolution/trip-count2.ll b/test/Analysis/ScalarEvolution/trip-count2.ll index 374a5621cebfd..bbe64358d4e54 100644 --- a/test/Analysis/ScalarEvolution/trip-count2.ll +++ b/test/Analysis/ScalarEvolution/trip-count2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output | \ ; RUN: grep {backedge-taken count is 4} ; PR1101 diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll index 35c86835f3ac4..240983178b403 100644 --- a/test/Analysis/ScalarEvolution/trip-count3.ll +++ b/test/Analysis/ScalarEvolution/trip-count3.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -scalar-evolution -analyze -disable-output \ +; RUN: opt < %s -scalar-evolution -analyze -disable-output \ ; RUN: | grep {Loop bb3\\.i: Unpredictable backedge-taken count\\.} ; ScalarEvolution can't compute a trip count because it doesn't know if diff --git a/test/Analysis/ScalarEvolution/trip-count4.ll b/test/Analysis/ScalarEvolution/trip-count4.ll index 49c4e133b4679..e8d59cf550a83 100644 --- a/test/Analysis/ScalarEvolution/trip-count4.ll +++ b/test/Analysis/ScalarEvolution/trip-count4.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: | grep {sext.*trunc.*Exits: 11} ; ScalarEvolution should be able to compute a loop exit value for %indvar.i8. diff --git a/test/Analysis/ScalarEvolution/trip-count5.ll b/test/Analysis/ScalarEvolution/trip-count5.ll index 822dc2638f5a0..2512a966ed206 100644 --- a/test/Analysis/ScalarEvolution/trip-count5.ll +++ b/test/Analysis/ScalarEvolution/trip-count5.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output > %t +; RUN: opt < %s -analyze -scalar-evolution -disable-output > %t ; RUN: grep sext %t | count 2 ; RUN: not grep {(sext} %t diff --git a/test/Analysis/ScalarEvolution/trip-count6.ll b/test/Analysis/ScalarEvolution/trip-count6.ll index a6674092e7995..5833286317cea 100644 --- a/test/Analysis/ScalarEvolution/trip-count6.ll +++ b/test/Analysis/ScalarEvolution/trip-count6.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -disable-output -scalar-evolution \ +; RUN: opt < %s -analyze -disable-output -scalar-evolution \ ; RUN: | grep {max backedge-taken count is 1\$} @mode_table = global [4 x i32] zeroinitializer ; <[4 x i32]*> [#uses=1] diff --git a/test/Analysis/ScalarEvolution/trip-count7.ll b/test/Analysis/ScalarEvolution/trip-count7.ll index cea826ef1d303..0cd8d7c4a9a3a 100644 --- a/test/Analysis/ScalarEvolution/trip-count7.ll +++ b/test/Analysis/ScalarEvolution/trip-count7.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output \ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ ; RUN: | grep {Loop bb7.i: Unpredictable backedge-taken count\\.} target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/test/Analysis/ScalarEvolution/trip-count8.ll b/test/Analysis/ScalarEvolution/trip-count8.ll new file mode 100644 index 0000000000000..c49f5ceea7042 --- /dev/null +++ b/test/Analysis/ScalarEvolution/trip-count8.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ +; RUN: | grep {Loop for\\.body: backedge-taken count is (-1 + \[%\]ecx)} +; PR4599 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + +define i32 @foo(i32 %ecx) nounwind { +entry: + %cmp2 = icmp eq i32 %ecx, 0 ; [#uses=1] + br i1 %cmp2, label %for.end, label %bb.nph + +for.cond: ; preds = %for.inc + %cmp = icmp ult i32 %inc, %ecx ; [#uses=1] + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.cond + %phitmp = add i32 %i.01, 2 ; [#uses=1] + br label %for.end + +bb.nph: ; preds = %entry + br label %for.body + +for.body: ; preds = %bb.nph, %for.cond + %i.01 = phi i32 [ %inc, %for.cond ], [ 0, %bb.nph ] ; [#uses=3] + %call = call i32 @bar(i32 %i.01) nounwind ; [#uses=0] + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i32 %i.01, 1 ; [#uses=2] + br label %for.cond + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %i.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 1, %entry ] ; [#uses=1] + ret i32 %i.0.lcssa +} + +declare i32 @bar(i32) diff --git a/test/Analysis/ScalarEvolution/xor-and.ll b/test/Analysis/ScalarEvolution/xor-and.ll index 843052456a800..c8339d7138d9d 100644 --- a/test/Analysis/ScalarEvolution/xor-and.ll +++ b/test/Analysis/ScalarEvolution/xor-and.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -scalar-evolution -disable-output -analyze \ +; RUN: opt < %s -scalar-evolution -disable-output -analyze \ ; RUN: | grep {\\--> (zext i4 (-8 + (trunc i64 (8 \\* %x) to i4)) to i64)} ; ScalarEvolution shouldn't try to analyze %z into something like diff --git a/test/Analysis/ScalarEvolution/zext-wrap.ll b/test/Analysis/ScalarEvolution/zext-wrap.ll new file mode 100644 index 0000000000000..9ff99be736a04 --- /dev/null +++ b/test/Analysis/ScalarEvolution/zext-wrap.ll @@ -0,0 +1,24 @@ +; RUN: opt < %s -analyze -scalar-evolution -disable-output \ +; RUN: | FileCheck %s +; PR4569 + +define i16 @main() nounwind { +entry: + br label %bb.i + +bb.i: ; preds = %bb1.i, %bb.nph + %l_95.0.i1 = phi i8 [ %tmp1, %bb.i ], [ 0, %entry ] + +; This cast shouldn't be folded into the addrec. +; CHECK: %tmp = zext i8 %l_95.0.i1 to i16 +; CHECK: --> (zext i8 {0,+,-1} to i16) Exits: 2 + + %tmp = zext i8 %l_95.0.i1 to i16 + + %tmp1 = add i8 %l_95.0.i1, -1 + %phitmp = icmp eq i8 %tmp1, 1 + br i1 %phitmp, label %bb1.i.func_36.exit_crit_edge, label %bb.i + +bb1.i.func_36.exit_crit_edge: + ret i16 %tmp +} diff --git a/test/Archive/extract.ll b/test/Archive/extract.ll new file mode 100644 index 0000000000000..3649714259b8c --- /dev/null +++ b/test/Archive/extract.ll @@ -0,0 +1,16 @@ +; This isn't really an assembly file, its just here to run the test. + +; This test just makes sure that llvm-ar can extract bytecode members +; from various style archives. + +; RUN: llvm-ar x %p/GNU.a very_long_bytecode_file_name.bc +; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null + +; RUN: llvm-ar x %p/MacOSX.a very_long_bytecode_file_name.bc +; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc > /dev/null 2>/dev/null + +; RUN: llvm-ar x %p/SVR4.a very_long_bytecode_file_name.bc +; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null + +; RUN: llvm-ar x %p/xpg4.a very_long_bytecode_file_name.bc +; RUN: diff %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc >/dev/null 2>/dev/null diff --git a/test/Assembler/2002-01-24-BadSymbolTableAssert.ll b/test/Assembler/2002-01-24-BadSymbolTableAssert.ll index b2a48f5568932..7c49e2bd99358 100644 --- a/test/Assembler/2002-01-24-BadSymbolTableAssert.ll +++ b/test/Assembler/2002-01-24-BadSymbolTableAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; This testcase failed due to a bad assertion in SymbolTable.cpp, removed in ; the 1.20 revision. Basically the symbol table assumed that if there was an diff --git a/test/Assembler/2002-01-24-ValueRefineAbsType.ll b/test/Assembler/2002-01-24-ValueRefineAbsType.ll index fb7c4fbf587f1..6e49674a32fb1 100644 --- a/test/Assembler/2002-01-24-ValueRefineAbsType.ll +++ b/test/Assembler/2002-01-24-ValueRefineAbsType.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; This testcase used to fail due to a lack of this diff in Value.cpp: ; diff -r1.16 Value.cpp diff --git a/test/Assembler/2002-02-19-TypeParsing.ll b/test/Assembler/2002-02-19-TypeParsing.ll index b7cadbdb05684..0df6784978413 100644 --- a/test/Assembler/2002-02-19-TypeParsing.ll +++ b/test/Assembler/2002-02-19-TypeParsing.ll @@ -1,3 +1,3 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null %Hosp = type { i32, i32, i32, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* } } diff --git a/test/Assembler/2002-03-08-NameCollision.ll b/test/Assembler/2002-03-08-NameCollision.ll index 539dfd6719887..b49789b2902d1 100644 --- a/test/Assembler/2002-03-08-NameCollision.ll +++ b/test/Assembler/2002-03-08-NameCollision.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; Method arguments were being checked for collisions at the global scope before ; the method object was created by the parser. Because of this, false diff --git a/test/Assembler/2002-03-08-NameCollision2.ll b/test/Assembler/2002-03-08-NameCollision2.ll index 57dc517d3bb03..1f7a4e16f8b3f 100644 --- a/test/Assembler/2002-03-08-NameCollision2.ll +++ b/test/Assembler/2002-03-08-NameCollision2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; Another name collision problem. Here the problem was that if a forward ; declaration for a method was found, that this would cause spurious conflicts diff --git a/test/Assembler/2002-04-04-PureVirtMethCall.ll b/test/Assembler/2002-04-04-PureVirtMethCall.ll index 4c63e44e9d210..29aed55a3a9f2 100644 --- a/test/Assembler/2002-04-04-PureVirtMethCall.ll +++ b/test/Assembler/2002-04-04-PureVirtMethCall.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null type { { \2 *, \4 ** }, { \2 *, \4 ** } diff --git a/test/Assembler/2002-04-04-PureVirtMethCall2.ll b/test/Assembler/2002-04-04-PureVirtMethCall2.ll index 553401f563672..a0968999a92dc 100644 --- a/test/Assembler/2002-04-04-PureVirtMethCall2.ll +++ b/test/Assembler/2002-04-04-PureVirtMethCall2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null %t = type { { \2*, \2 }, { \2*, \2 } diff --git a/test/Assembler/2002-04-05-TypeParsing.ll b/test/Assembler/2002-04-05-TypeParsing.ll index 82db75f1ce978..f725944b92107 100644 --- a/test/Assembler/2002-04-05-TypeParsing.ll +++ b/test/Assembler/2002-04-05-TypeParsing.ll @@ -1,3 +1,3 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null %Hosp = type { { \2*, { \2, %Hosp }* }, { \2*, { \2, %Hosp }* } } diff --git a/test/Assembler/2002-04-07-HexFloatConstants.ll b/test/Assembler/2002-04-07-HexFloatConstants.ll index 5c54b39b80815..b0d7cc0e43a4a 100644 --- a/test/Assembler/2002-04-07-HexFloatConstants.ll +++ b/test/Assembler/2002-04-07-HexFloatConstants.ll @@ -5,7 +5,7 @@ ; of the bug that was causing the Olden Health benchmark to output incorrect ; results! ; -; RUN: llvm-as < %s | opt -constprop | llvm-dis > %t.1 +; RUN: opt -constprop -S > %t.1 < %s ; RUN: llvm-as < %s | llvm-dis | llvm-as | opt -constprop | \ ; RUN: llvm-dis > %t.2 ; RUN: diff %t.1 %t.2 diff --git a/test/Assembler/2002-04-29-NameBinding.ll b/test/Assembler/2002-04-29-NameBinding.ll index 9665aef14323d..7960c20ddcea9 100644 --- a/test/Assembler/2002-04-29-NameBinding.ll +++ b/test/Assembler/2002-04-29-NameBinding.ll @@ -4,7 +4,7 @@ ; Check by running globaldce, which will remove the constant if there are ; no references to it! ; -; RUN: llvm-as < %s | opt -globaldce | llvm-dis | \ +; RUN: opt < %s -globaldce -S | \ ; RUN: not grep constant ; diff --git a/test/Assembler/2002-05-02-InvalidForwardRef.ll b/test/Assembler/2002-05-02-InvalidForwardRef.ll index 00a0a01537767..234545c2936fb 100644 --- a/test/Assembler/2002-05-02-InvalidForwardRef.ll +++ b/test/Assembler/2002-05-02-InvalidForwardRef.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; It looks like the assembler is not forward resolving the function declaraion ; correctly. diff --git a/test/Assembler/2002-05-02-ParseError.ll b/test/Assembler/2002-05-02-ParseError.ll index b198edfd31f5c..5a9817c1eaa80 100644 --- a/test/Assembler/2002-05-02-ParseError.ll +++ b/test/Assembler/2002-05-02-ParseError.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null %T = type i32 * diff --git a/test/Assembler/2002-07-08-HugePerformanceProblem.ll b/test/Assembler/2002-07-08-HugePerformanceProblem.ll index b9ebfbe2ad619..52c90af18c8db 100644 --- a/test/Assembler/2002-07-08-HugePerformanceProblem.ll +++ b/test/Assembler/2002-07-08-HugePerformanceProblem.ll @@ -1,6 +1,6 @@ ; This file takes about 48 __MINUTES__ to assemble using as. This is WAY too ; long. The type resolution code needs to be sped up a lot. -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null %ALL_INTERSECTIONS_METHOD = type i32 (%OBJECT*, %RAY*, %ISTACK*)* %BBOX = type { %BBOX_VECT, %BBOX_VECT } %BBOX_TREE = type { i16, i16, %BBOX, %BBOX_TREE** } diff --git a/test/Assembler/2002-07-25-ParserAssertionFailure.ll b/test/Assembler/2002-07-25-ParserAssertionFailure.ll index 29c7c02ff8505..3c5c5546b4315 100644 --- a/test/Assembler/2002-07-25-ParserAssertionFailure.ll +++ b/test/Assembler/2002-07-25-ParserAssertionFailure.ll @@ -1,6 +1,6 @@ ; Make sure we don't get an assertion failure, even though this is a parse ; error -; RUN: not llvm-as %s -o /dev/null -f |& grep {'@foo' defined with} +; RUN: not llvm-as %s -o /dev/null |& grep {'@foo' defined with} %ty = type void (i32) diff --git a/test/Assembler/2002-08-15-CastAmbiguity.ll b/test/Assembler/2002-08-15-CastAmbiguity.ll index c10f91103fdc5..c71652446d6f0 100644 --- a/test/Assembler/2002-08-15-CastAmbiguity.ll +++ b/test/Assembler/2002-08-15-CastAmbiguity.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null define void @test(i32 %X) { call void @test( i32 6 ) diff --git a/test/Assembler/2002-08-15-ConstantExprProblem.ll b/test/Assembler/2002-08-15-ConstantExprProblem.ll index d02c26a80cdb1..02b9ea9adb874 100644 --- a/test/Assembler/2002-08-15-ConstantExprProblem.ll +++ b/test/Assembler/2002-08-15-ConstantExprProblem.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null @.LC0 = internal global [12 x i8] c"hello world\00" ; <[12 x i8]*> [#uses=1] diff --git a/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll b/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll index 5252be266a167..2ba3f14a48e58 100644 --- a/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll +++ b/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null @.LC0 = internal global [12 x i8] c"hello world\00" ; <[12 x i8]*> [#uses=1] diff --git a/test/Assembler/2002-08-19-BytecodeReader.ll b/test/Assembler/2002-08-19-BytecodeReader.ll index e42cda0baa07a..e211014eb0e8c 100644 --- a/test/Assembler/2002-08-19-BytecodeReader.ll +++ b/test/Assembler/2002-08-19-BytecodeReader.ll @@ -1,7 +1,7 @@ ; Testcase that seems to break the bytecode reader. This comes from the ; "crafty" spec benchmark. ; -; RUN: llvm-as < %s | opt -instcombine | llvm-dis | llvm-as +; RUN: opt < %s -instcombine | llvm-dis %CHESS_POSITION = type { i32, i32 } @pawn_probes = external global i32 ; [#uses=0] diff --git a/test/Assembler/2002-08-22-DominanceProblem.ll b/test/Assembler/2002-08-22-DominanceProblem.ll index a841dfa838551..0dc192df23563 100644 --- a/test/Assembler/2002-08-22-DominanceProblem.ll +++ b/test/Assembler/2002-08-22-DominanceProblem.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; Dominance relationships is not calculated correctly for unreachable blocks, ; which causes the verifier to barf on this input. diff --git a/test/Assembler/2002-10-08-LargeArrayPerformance.ll b/test/Assembler/2002-10-08-LargeArrayPerformance.ll index 2c4cba412b1ee..34a993214e92c 100644 --- a/test/Assembler/2002-10-08-LargeArrayPerformance.ll +++ b/test/Assembler/2002-10-08-LargeArrayPerformance.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; This testcase comes from the following really simple c file: ;; int foo[30000] ;;; We should not be soo slow for such a simple case! diff --git a/test/Assembler/2002-10-15-NameClash.ll b/test/Assembler/2002-10-15-NameClash.ll index 8ba5ed2507c19..89346cba9be38 100644 --- a/test/Assembler/2002-10-15-NameClash.ll +++ b/test/Assembler/2002-10-15-NameClash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null declare i32 @"ArrayRef"([100 x i32] * %Array) diff --git a/test/Assembler/2002-12-15-GlobalResolve.ll b/test/Assembler/2002-12-15-GlobalResolve.ll index da049c4f4320d..f9ad12e5478f8 100644 --- a/test/Assembler/2002-12-15-GlobalResolve.ll +++ b/test/Assembler/2002-12-15-GlobalResolve.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null @X = external global i32* @X1 = external global %T* diff --git a/test/Assembler/2003-01-30-UnsignedString.ll b/test/Assembler/2003-01-30-UnsignedString.ll index 5eaa9c2c83893..3c14d71621c71 100644 --- a/test/Assembler/2003-01-30-UnsignedString.ll +++ b/test/Assembler/2003-01-30-UnsignedString.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null @spell_order = global [4 x i8] c"\FF\00\F7\00" diff --git a/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll b/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll index 608eb6ae343c7..f1a5ed7b56b2a 100644 --- a/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll +++ b/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; There should be absolutely no problem with this testcase. define i32 @test(i32 %arg1, i32 %arg2) { diff --git a/test/Assembler/2003-05-15-AssemblerProblem.ll b/test/Assembler/2003-05-15-AssemblerProblem.ll index 17967a936506b..146ce6534d709 100644 --- a/test/Assembler/2003-05-15-AssemblerProblem.ll +++ b/test/Assembler/2003-05-15-AssemblerProblem.ll @@ -1,6 +1,6 @@ ; This bug was caused by two CPR's existing for the same global variable, ; colliding in the Module level CPR map. -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null define void @test() { call void (...)* bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* null, i32 0 ) diff --git a/test/Assembler/2003-05-15-SwitchBug.ll b/test/Assembler/2003-05-15-SwitchBug.ll index af42020ca167e..3768d9c9a677c 100644 --- a/test/Assembler/2003-05-15-SwitchBug.ll +++ b/test/Assembler/2003-05-15-SwitchBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; Check minimal switch statement diff --git a/test/Assembler/2003-05-21-ConstantShiftExpr.ll b/test/Assembler/2003-05-21-ConstantShiftExpr.ll index 667bc9b933206..40b96514e045e 100644 --- a/test/Assembler/2003-05-21-ConstantShiftExpr.ll +++ b/test/Assembler/2003-05-21-ConstantShiftExpr.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; Test that shift instructions can be used in constant expressions. global i32 3670016 diff --git a/test/Assembler/2003-05-21-EmptyStructTest.ll b/test/Assembler/2003-05-21-EmptyStructTest.ll index 6925d2693250a..26e83d931c4db 100644 --- a/test/Assembler/2003-05-21-EmptyStructTest.ll +++ b/test/Assembler/2003-05-21-EmptyStructTest.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; The old C front-end never generated empty structures, now the new one ; can. For some reason we never handled them in the parser. Weird. diff --git a/test/Assembler/2003-06-30-RecursiveTypeProblem.ll b/test/Assembler/2003-06-30-RecursiveTypeProblem.ll index 33f63a621711c..5db31140a7416 100644 --- a/test/Assembler/2003-06-30-RecursiveTypeProblem.ll +++ b/test/Assembler/2003-06-30-RecursiveTypeProblem.ll @@ -1,3 +1,3 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null %MidFnTy = type void (%MidFnTy*) diff --git a/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll b/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll index 6f31f16778f6d..50cdeedd695e2 100644 --- a/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll +++ b/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -instcombine -simplifycfg | llvm-dis | not grep br +; RUN: opt < %s -instcombine -simplifycfg -S | not grep br @.str_1 = internal constant [6 x i8] c"_Bool\00" ; <[6 x i8]*> [#uses=2] diff --git a/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll b/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll index 10a0280e90609..5fec05d8cbf13 100644 --- a/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll +++ b/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null %T = type i32 @X = global i32* null ; [#uses=0] diff --git a/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll b/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll index bdb4d546854c9..93f9a70814777 100644 --- a/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll +++ b/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null -f |& grep {use of undefined type named 'struct.D_Scope'} +; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'struct.D_Scope'} ; END. @d_reduction_0_dparser_gram = global { diff --git a/test/Assembler/2004-02-27-SelfUseAssertError.ll b/test/Assembler/2004-02-27-SelfUseAssertError.ll index ff4c0b43e48a6..7052eac5cbd43 100644 --- a/test/Assembler/2004-02-27-SelfUseAssertError.ll +++ b/test/Assembler/2004-02-27-SelfUseAssertError.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null ; %inc2 uses it's own value, but that's ok, as it's unreachable! diff --git a/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll b/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll index c9363dbcdca90..ab46f887be074 100644 --- a/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll +++ b/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null define i32* @t1({ float, i32 }* %X) { %W = getelementptr { float, i32 }* %X, i32 20, i32 1 ; [#uses=0] diff --git a/test/Assembler/2004-10-22-BCWriterUndefBug.ll b/test/Assembler/2004-10-22-BCWriterUndefBug.ll index 4b2ebeee93420..694b80b78c167 100644 --- a/test/Assembler/2004-10-22-BCWriterUndefBug.ll +++ b/test/Assembler/2004-10-22-BCWriterUndefBug.ll @@ -1,5 +1,5 @@ ;; The bytecode writer was trying to treat undef values as ConstantArray's when ;; they looked like strings. -;; RUN: llvm-as %s -o /dev/null -f +;; RUN: llvm-as %s -o /dev/null @G = internal global [8 x i8] undef diff --git a/test/Assembler/2004-11-28-InvalidTypeCrash.ll b/test/Assembler/2004-11-28-InvalidTypeCrash.ll index 6f264393a5981..f9b453b574624 100644 --- a/test/Assembler/2004-11-28-InvalidTypeCrash.ll +++ b/test/Assembler/2004-11-28-InvalidTypeCrash.ll @@ -1,4 +1,4 @@ ; Test for PR463. This program is erroneous, but should not crash llvm-as. -; RUN: not llvm-as %s -o /dev/null -f |& grep {invalid type for null constant} +; RUN: not llvm-as %s -o /dev/null |& grep {invalid type for null constant} @.FOO = internal global %struct.none zeroinitializer diff --git a/test/Assembler/2005-01-31-CallingAggregateFunction.ll b/test/Assembler/2005-01-31-CallingAggregateFunction.ll index 14045138f811e..ce769a2e9d7bf 100644 --- a/test/Assembler/2005-01-31-CallingAggregateFunction.ll +++ b/test/Assembler/2005-01-31-CallingAggregateFunction.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null define void @test() { call {i32} @foo() diff --git a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll index 1962ae70c03d8..a39de1cb6cba4 100644 --- a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll +++ b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll @@ -1,5 +1,5 @@ ; The assembler should catch an undefined argument type . -; RUN: not llvm-as %s -o /dev/null -f |& grep {use of undefined type named 'typedef.bc_struct'} +; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'typedef.bc_struct'} ; %typedef.bc_struct = type opaque diff --git a/test/Assembler/2007-01-05-Cmp-ConstExpr.ll b/test/Assembler/2007-01-05-Cmp-ConstExpr.ll index 8c25989d21a3a..e3f67ba13afca 100644 --- a/test/Assembler/2007-01-05-Cmp-ConstExpr.ll +++ b/test/Assembler/2007-01-05-Cmp-ConstExpr.ll @@ -1,5 +1,5 @@ ; Test Case for PR1080 -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null @str = internal constant [4 x i8] c"-ga\00" ; <[4 x i8]*> [#uses=2] diff --git a/test/Assembler/2007-01-16-CrashOnBadCast.ll b/test/Assembler/2007-01-16-CrashOnBadCast.ll index 33666b8cb2ce5..81f5458b2ebec 100644 --- a/test/Assembler/2007-01-16-CrashOnBadCast.ll +++ b/test/Assembler/2007-01-16-CrashOnBadCast.ll @@ -1,5 +1,5 @@ ; PR1117 -; RUN: not llvm-as %s -o /dev/null -f |& grep {invalid cast opcode for cast from} +; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from} define i8* @nada(i64 %X) { %result = trunc i64 %X to i8* diff --git a/test/Assembler/2007-01-16-CrashOnBadCast2.ll b/test/Assembler/2007-01-16-CrashOnBadCast2.ll index 49c539f5024b5..c05c60952c599 100644 --- a/test/Assembler/2007-01-16-CrashOnBadCast2.ll +++ b/test/Assembler/2007-01-16-CrashOnBadCast2.ll @@ -1,4 +1,4 @@ ; PR1117 -; RUN: not llvm-as %s -o /dev/null -f |& grep {invalid cast opcode for cast from} +; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from} @X = constant i8* trunc (i64 0 to i8*) diff --git a/test/Assembler/2007-03-18-InvalidNumberedVar.ll b/test/Assembler/2007-03-18-InvalidNumberedVar.ll index 12bac61124e7d..b2193b1701304 100644 --- a/test/Assembler/2007-03-18-InvalidNumberedVar.ll +++ b/test/Assembler/2007-03-18-InvalidNumberedVar.ll @@ -1,5 +1,5 @@ ; PR 1258 -; RUN: not llvm-as < %s >/dev/null -f |& grep {'%0' defined with type 'i1'} +; RUN: not llvm-as < %s >/dev/null |& grep {'%0' defined with type 'i1'} define i32 @test1(i32 %a, i32 %b) { entry: diff --git a/test/Assembler/2008-02-20-MultipleReturnValue.ll b/test/Assembler/2008-02-20-MultipleReturnValue.ll index 5b2ed7e8f4806..32c893a9f5f19 100644 --- a/test/Assembler/2008-02-20-MultipleReturnValue.ll +++ b/test/Assembler/2008-02-20-MultipleReturnValue.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -verify | llvm-dis | llvm-as -disable-output +; RUN: opt < %s -verify -S | llvm-as -disable-output define {i32, i8} @foo(i32 %p) { ret i32 1, i8 2 diff --git a/test/Assembler/2008-09-02-FunctionNotes2.ll b/test/Assembler/2008-09-02-FunctionNotes2.ll index dbe75be700b7e..8a49e89902802 100644 --- a/test/Assembler/2008-09-02-FunctionNotes2.ll +++ b/test/Assembler/2008-09-02-FunctionNotes2.ll @@ -1,5 +1,5 @@ ; Test function notes -; RUN: not llvm-as %s -o /dev/null -f |& grep "Attributes noinline alwaysinline are incompatible" +; RUN: not llvm-as %s -o /dev/null |& grep "Attributes noinline alwaysinline are incompatible" define void @fn1() alwaysinline noinline { ret void } diff --git a/test/Assembler/2009-02-28-StripOpaqueName.ll b/test/Assembler/2009-02-28-StripOpaqueName.ll index eef5d3614a818..f61a44cbd15d9 100644 --- a/test/Assembler/2009-02-28-StripOpaqueName.ll +++ b/test/Assembler/2009-02-28-StripOpaqueName.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -strip | llvm-dis | llvm-as | llvm-dis +; RUN: opt < %s -strip -S | llvm-as | llvm-dis ; Stripping the name from A should not break references to it. %A = type opaque diff --git a/test/Assembler/2009-07-24-ZeroArgGEP.ll b/test/Assembler/2009-07-24-ZeroArgGEP.ll new file mode 100644 index 0000000000000..2a3d11477cb17 --- /dev/null +++ b/test/Assembler/2009-07-24-ZeroArgGEP.ll @@ -0,0 +1,5 @@ +; RUN: llvm-as %s -o /dev/null + +@foo = global i32 0 +@bar = constant i32* getelementptr(i32* @foo) + diff --git a/test/Assembler/ConstantExprFold.ll b/test/Assembler/ConstantExprFold.ll index 89edc24b37ec7..d3d374a07cf1f 100644 --- a/test/Assembler/ConstantExprFold.ll +++ b/test/Assembler/ConstantExprFold.ll @@ -19,6 +19,7 @@ global i64* inttoptr (i64 xor (i64 ptrtoint (i64* @A to i64), i64 0) to i64*) ; @B = external global %Ty global i1 icmp slt (i64* @A, i64* getelementptr (i64* @A, i64 1)) ; true +global i1 icmp ult (i64* @A, i64* getelementptr (i64* @A, i64 1)) ; true global i1 icmp slt (i64* @A, i64* getelementptr (i64* @A, i64 0)) ; false global i1 icmp slt (i32* getelementptr (%Ty* @B, i64 0, i32 0), i32* getelementptr (%Ty* @B, i64 0, i32 1)) ; true diff --git a/test/Assembler/anon-functions.ll b/test/Assembler/anon-functions.ll index e08063e655d62..ac06e8ce30555 100644 --- a/test/Assembler/anon-functions.ll +++ b/test/Assembler/anon-functions.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s |llvm-dis | llvm-as | llvm-dis +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis ; PR3611 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" diff --git a/test/Assembler/flags.ll b/test/Assembler/flags.ll new file mode 100644 index 0000000000000..3241909059758 --- /dev/null +++ b/test/Assembler/flags.ll @@ -0,0 +1,212 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +@addr = external global i64 + +define i64 @add_unsigned(i64 %x, i64 %y) { +; CHECK: %z = add nuw i64 %x, %y + %z = add nuw i64 %x, %y + ret i64 %z +} + +define i64 @sub_unsigned(i64 %x, i64 %y) { +; CHECK: %z = sub nuw i64 %x, %y + %z = sub nuw i64 %x, %y + ret i64 %z +} + +define i64 @mul_unsigned(i64 %x, i64 %y) { +; CHECK: %z = mul nuw i64 %x, %y + %z = mul nuw i64 %x, %y + ret i64 %z +} + +define i64 @add_signed(i64 %x, i64 %y) { +; CHECK: %z = add nsw i64 %x, %y + %z = add nsw i64 %x, %y + ret i64 %z +} + +define i64 @sub_signed(i64 %x, i64 %y) { +; CHECK: %z = sub nsw i64 %x, %y + %z = sub nsw i64 %x, %y + ret i64 %z +} + +define i64 @mul_signed(i64 %x, i64 %y) { +; CHECK: %z = mul nsw i64 %x, %y + %z = mul nsw i64 %x, %y + ret i64 %z +} + +define i64 @add_plain(i64 %x, i64 %y) { +; CHECK: %z = add i64 %x, %y + %z = add i64 %x, %y + ret i64 %z +} + +define i64 @sub_plain(i64 %x, i64 %y) { +; CHECK: %z = sub i64 %x, %y + %z = sub i64 %x, %y + ret i64 %z +} + +define i64 @mul_plain(i64 %x, i64 %y) { +; CHECK: %z = mul i64 %x, %y + %z = mul i64 %x, %y + ret i64 %z +} + +define i64 @add_both(i64 %x, i64 %y) { +; CHECK: %z = add nuw nsw i64 %x, %y + %z = add nuw nsw i64 %x, %y + ret i64 %z +} + +define i64 @sub_both(i64 %x, i64 %y) { +; CHECK: %z = sub nuw nsw i64 %x, %y + %z = sub nuw nsw i64 %x, %y + ret i64 %z +} + +define i64 @mul_both(i64 %x, i64 %y) { +; CHECK: %z = mul nuw nsw i64 %x, %y + %z = mul nuw nsw i64 %x, %y + ret i64 %z +} + +define i64 @add_both_reversed(i64 %x, i64 %y) { +; CHECK: %z = add nuw nsw i64 %x, %y + %z = add nsw nuw i64 %x, %y + ret i64 %z +} + +define i64 @sub_both_reversed(i64 %x, i64 %y) { +; CHECK: %z = sub nuw nsw i64 %x, %y + %z = sub nsw nuw i64 %x, %y + ret i64 %z +} + +define i64 @mul_both_reversed(i64 %x, i64 %y) { +; CHECK: %z = mul nuw nsw i64 %x, %y + %z = mul nsw nuw i64 %x, %y + ret i64 %z +} + +define i64 @sdiv_exact(i64 %x, i64 %y) { +; CHECK: %z = sdiv exact i64 %x, %y + %z = sdiv exact i64 %x, %y + ret i64 %z +} + +define i64 @sdiv_plain(i64 %x, i64 %y) { +; CHECK: %z = sdiv i64 %x, %y + %z = sdiv i64 %x, %y + ret i64 %z +} + +define i64* @gep_nw(i64* %p, i64 %x) { +; CHECK: %z = getelementptr inbounds i64* %p, i64 %x + %z = getelementptr inbounds i64* %p, i64 %x + ret i64* %z +} + +define i64* @gep_plain(i64* %p, i64 %x) { +; CHECK: %z = getelementptr i64* %p, i64 %x + %z = getelementptr i64* %p, i64 %x + ret i64* %z +} + +define i64 @add_both_ce() { +; CHECK: ret i64 add nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 add nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @sub_both_ce() { +; CHECK: ret i64 sub nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 sub nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @mul_both_ce() { +; CHECK: ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @sdiv_exact_ce() { +; CHECK: ret i64 sdiv exact (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 sdiv exact (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64* @gep_nw_ce() { +; CHECK: ret i64* getelementptr inbounds (i64* @addr, i64 171) + ret i64* getelementptr inbounds (i64* @addr, i64 171) +} + +define i64 @add_plain_ce() { +; CHECK: ret i64 add (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 add (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @sub_plain_ce() { +; CHECK: ret i64 sub (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 sub (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @mul_plain_ce() { +; CHECK: ret i64 mul (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 mul (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @sdiv_plain_ce() { +; CHECK: ret i64 sdiv (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 sdiv (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64* @gep_plain_ce() { +; CHECK: ret i64* getelementptr (i64* @addr, i64 171) + ret i64* getelementptr (i64* @addr, i64 171) +} + +define i64 @add_both_reversed_ce() { +; CHECK: ret i64 add nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 add nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @sub_both_reversed_ce() { +; CHECK: ret i64 sub nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 sub nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @mul_both_reversed_ce() { +; CHECK: ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 mul nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @add_signed_ce() { +; CHECK: ret i64 add nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 add nsw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @sub_signed_ce() { +; CHECK: ret i64 sub nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 sub nsw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @mul_signed_ce() { +; CHECK: ret i64 mul nsw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 mul nsw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @add_unsigned_ce() { +; CHECK: ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @sub_unsigned_ce() { +; CHECK: ret i64 sub nuw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 sub nuw (i64 ptrtoint (i64* @addr to i64), i64 91) +} + +define i64 @mul_unsigned_ce() { +; CHECK: ret i64 mul nuw (i64 ptrtoint (i64* @addr to i64), i64 91) + ret i64 mul nuw (i64 ptrtoint (i64* @addr to i64), i64 91) +} diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll index 10e5011397da0..803d6d3430631 100644 --- a/test/Assembler/getelementptr.ll +++ b/test/Assembler/getelementptr.ll @@ -1,11 +1,21 @@ -; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s + +; Verify that over-indexed getelementptrs are folded. +@A = external global [2 x [3 x [5 x [7 x i32]]]] +@B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 0, i64 0, i64 2, i64 1, i64 7523) +; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) ; [#uses=0] +@C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523) +; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ; [#uses=0] ;; Verify that i16 indices work. @x = external global {i32, i32} @y = global i32* getelementptr ({i32, i32}* @x, i16 42, i32 0) +; CHECK: @y = global i32* getelementptr (%0* @x, i16 42, i32 0) ; see if i92 indices work too. define i32 *@test({i32, i32}* %t, i92 %n) { +; CHECK: @test +; CHECK: %B = getelementptr %0* %t, i92 %n, i32 0 %B = getelementptr {i32, i32}* %t, i92 %n, i32 0 ret i32* %B } diff --git a/test/Assembler/insertextractvalue.ll b/test/Assembler/insertextractvalue.ll index 3581238aa4c1c..2f5521fba8725 100644 --- a/test/Assembler/insertextractvalue.ll +++ b/test/Assembler/insertextractvalue.ll @@ -21,3 +21,9 @@ define float @dar({{i32},{float, double}}* %p) nounwind { store {{i32},{float, double}} insertvalue ({{i32},{float, double}} zeroinitializer, double 20.0, 1, 1), {{i32},{float, double}}* %p ret float extractvalue ({{i32},{float, double}} zeroinitializer, 1, 0) } + + +; PR4963 +define <{ i32, i32 }> @test57() { + ret <{ i32, i32 }> insertvalue (<{ i32, i32 }> zeroinitializer, i32 4, 1) +} diff --git a/test/Assembler/msasm.ll b/test/Assembler/msasm.ll new file mode 100644 index 0000000000000..5e32963abd8eb --- /dev/null +++ b/test/Assembler/msasm.ll @@ -0,0 +1,36 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin10.0" + +define void @test1() nounwind { +; CHECK: test1 +; CHECK: sideeffect +; CHECK-NOT: msasm + tail call void asm sideeffect "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind + ret void +; CHECK: ret +} +define void @test2() nounwind { +; CHECK: test2 +; CHECK: sideeffect +; CHECK: msasm + tail call void asm sideeffect msasm "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind + ret void +; CHECK: ret +} +define void @test3() nounwind { +; CHECK: test3 +; CHECK-NOT: sideeffect +; CHECK: msasm + tail call void asm msasm "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind + ret void +; CHECK: ret +} +define void @test4() nounwind { +; CHECK: test4 +; CHECK-NOT: sideeffect +; CHECK-NOT: msasm + tail call void asm "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind + ret void +; CHECK: ret +} diff --git a/test/Assembler/select.ll b/test/Assembler/select.ll index b018fbe9c4160..2d3f412d256d9 100644 --- a/test/Assembler/select.ll +++ b/test/Assembler/select.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null define i32 @test(i1 %C, i32 %V1, i32 %V2) { diff --git a/test/Assembler/unnamed.ll b/test/Assembler/unnamed.ll new file mode 100644 index 0000000000000..fb4fa6244e5af --- /dev/null +++ b/test/Assembler/unnamed.ll @@ -0,0 +1,51 @@ +; RUN: llvm-as < %s | llvm-dis + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + +module asm "this is an inline asm block" +module asm "this is another inline asm block" + +%0 = type { %1, %2 } +%1 = type { i32 } +%2 = type { float, double } + +@0 = global i32 0 +@1 = global float 3.0 +@2 = global i8* null +@3 = global x86_fp80 0xK4001E000000000000000 + +define float @foo(%0* %p) nounwind { + %t = load %0* %p ; <%0> [#uses=2] + %s = extractvalue %0 %t, 1, 0 ; [#uses=1] + %r = insertvalue %0 %t, double 2.000000e+00, 1, 1; <%0> [#uses=1] + store %0 %r, %0* %p + ret float %s +} + +define float @bar(%0* %p) nounwind { + store %0 { %1 { i32 4 }, %2 { float 4.000000e+00, double 2.000000e+01 } }, %0* %p + ret float 7.000000e+00 +} + +define float @car(%0* %p) nounwind { + store %0 { %1 undef, %2 { float undef, double 2.000000e+01 } }, %0* %p + ret float undef +} + +define float @dar(%0* %p) nounwind { + store %0 { %1 zeroinitializer, %2 { float 0.000000e+00, double 2.000000e+01 } }, %0* %p + ret float 0.000000e+00 +} + +define i32* @qqq() { + ret i32* @0 +} +define float* @rrr() { + ret float* @1 +} +define i8** @sss() { + ret i8** @2 +} +define x86_fp80* @nnn() { + ret x86_fp80* @3 +} diff --git a/test/Assembler/vector-cmp.ll b/test/Assembler/vector-cmp.ll index 383c0faf6206b..e4d35d9c98280 100644 --- a/test/Assembler/vector-cmp.ll +++ b/test/Assembler/vector-cmp.ll @@ -1,16 +1,16 @@ -; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {global.*vicmp slt} +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {global.*icmp slt} ; PR2317 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9.2.2" -define <4 x i32> @foo(<4 x float> %a, <4 x float> %b) nounwind { +define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind { entry: - %cmp = vfcmp olt <4 x float> %a, %b ; <4 x i32> [#uses=1] - ret <4 x i32> %cmp + %cmp = fcmp olt <4 x float> %a, %b ; <4 x i32> [#uses=1] + ret <4 x i1> %cmp } -global <4 x i32> vicmp slt ( <4 x i32> , <4 x i32> ) ; +global <4 x i1> icmp slt ( <4 x i32> , <4 x i32> ) ; @B = external global i32; -global <4 x i32> vicmp slt ( <4 x i32> , <4 x i32> ) ; +global <4 x i1> icmp slt ( <4 x i32> , <4 x i32> ) ; diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml index 5a6fde8da24d4..e830106c11ffd 100644 --- a/test/Bindings/Ocaml/analysis.ml +++ b/test/Bindings/Ocaml/analysis.ml @@ -1,4 +1,4 @@ -(* RUN: %ocamlc -warn-error A llvm.cma llvm_analysis.cma %s -o %t 2> /dev/null +(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t * RUN: ./%t %t.bc *) @@ -8,6 +8,8 @@ open Llvm_analysis (* Note that this takes a moment to link, so it's best to keep the number of individual tests low. *) +let context = global_context () + let test x = if not x then exit 1 else () let bomb msg = @@ -15,10 +17,10 @@ let bomb msg = exit 2 let _ = - let fty = function_type void_type [| |] in - let m = create_module "valid_m" in + let fty = function_type (void_type context) [| |] in + let m = create_module context "valid_m" in let fn = define_function "valid_fn" fty m in - let at_entry = builder_at_end (entry_block fn) in + let at_entry = builder_at_end context (entry_block fn) in ignore (build_ret_void at_entry); diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml index 776228fc16488..5c23041c80d35 100644 --- a/test/Bindings/Ocaml/bitreader.ml +++ b/test/Bindings/Ocaml/bitreader.ml @@ -1,4 +1,4 @@ -(* RUN: %ocamlc -warn-error A llvm.cma llvm_bitreader.cma llvm_bitwriter.cma %s -o %t 2> /dev/null +(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t * RUN: ./%t %t.bc * RUN: llvm-dis < %t.bc | grep caml_int_ty *) @@ -6,13 +6,15 @@ (* Note that this takes a moment to link, so it's best to keep the number of individual tests low. *) +let context = Llvm.global_context () + let test x = if not x then exit 1 else () let _ = let fn = Sys.argv.(1) in - let m = Llvm.create_module "ocaml_test_module" in + let m = Llvm.create_module context "ocaml_test_module" in - ignore (Llvm.define_type_name "caml_int_ty" Llvm.i32_type m); + ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m); test (Llvm_bitwriter.write_bitcode_file m fn); @@ -22,7 +24,7 @@ let _ = begin let mb = Llvm.MemoryBuffer.of_file fn in begin try - let m = Llvm_bitreader.parse_bitcode mb in + let m = Llvm_bitreader.parse_bitcode context mb in Llvm.dispose_module m with x -> Llvm.MemoryBuffer.dispose mb; @@ -43,7 +45,7 @@ let _ = begin let mb = Llvm.MemoryBuffer.of_file fn in let mp = begin try - Llvm_bitreader.get_module_provider mb + Llvm_bitreader.get_module_provider context mb with x -> Llvm.MemoryBuffer.dispose mb; raise x @@ -63,7 +65,7 @@ let _ = try let mb = Llvm.MemoryBuffer.of_file fn in let mp = begin try - Llvm_bitreader.get_module_provider mb + Llvm_bitreader.get_module_provider context mb with x -> Llvm.MemoryBuffer.dispose mb; raise x diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml index ec9dbc8329973..57caac7cb97d3 100644 --- a/test/Bindings/Ocaml/bitwriter.ml +++ b/test/Bindings/Ocaml/bitwriter.ml @@ -1,4 +1,4 @@ -(* RUN: %ocamlc -warn-error A llvm.cma llvm_bitwriter.cma %s -o %t 2> /dev/null +(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitwriter.cmxa %s -o %t * RUN: ./%t %t.bc * RUN: llvm-dis < %t.bc | grep caml_int_ty *) @@ -6,11 +6,13 @@ (* Note that this takes a moment to link, so it's best to keep the number of individual tests low. *) +let context = Llvm.global_context () + let test x = if not x then exit 1 else () let _ = - let m = Llvm.create_module "ocaml_test_module" in + let m = Llvm.create_module context "ocaml_test_module" in - ignore (Llvm.define_type_name "caml_int_ty" Llvm.i32_type m); + ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m); test (Llvm_bitwriter.write_bitcode_file m Sys.argv.(1)) diff --git a/test/Bindings/Ocaml/dg.exp b/test/Bindings/Ocaml/dg.exp new file mode 100644 index 0000000000000..fb4bd078e37fd --- /dev/null +++ b/test/Bindings/Ocaml/dg.exp @@ -0,0 +1,5 @@ +load_lib llvm.exp + +if [ llvm_supports_binding ocaml ] then { + RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,ml}]] +} diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml index 726a700f0ce32..ce56c50dcb618 100644 --- a/test/Bindings/Ocaml/executionengine.ml +++ b/test/Bindings/Ocaml/executionengine.ml @@ -1,4 +1,4 @@ -(* RUN: %ocamlc -warn-error A llvm.cma llvm_target.cma llvm_executionengine.cma %s -o %t 2> /dev/null +(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t * RUN: ./%t %t.bc *) @@ -9,6 +9,12 @@ open Llvm_target (* Note that this takes a moment to link, so it's best to keep the number of individual tests low. *) +let context = global_context () +let i8_type = Llvm.i8_type context +let i32_type = Llvm.i32_type context +let i64_type = Llvm.i64_type context +let double_type = Llvm.double_type context + let bomb msg = prerr_endline msg; exit 2 @@ -19,14 +25,14 @@ let define_main_fn m retval = define_function "main" (function_type i32_type [| i32_type; str_arr_type; str_arr_type |]) m in - let b = builder_at_end (entry_block fn) in + let b = builder_at_end (global_context ()) (entry_block fn) in ignore (build_ret (const_int i32_type retval) b); fn let define_plus m = let fn = define_function "plus" (function_type i32_type [| i32_type; i32_type |]) m in - let b = builder_at_end (entry_block fn) in + let b = builder_at_end (global_context ()) (entry_block fn) in let add = build_add (param fn 0) (param fn 1) "sum" b in ignore (build_ret add b) @@ -52,10 +58,10 @@ let test_genericvalue () = let test_executionengine () = (* create *) - let m = create_module "test_module" in + let m = create_module (global_context ()) "test_module" in let main = define_main_fn m 42 in - let m2 = create_module "test_module2" in + let m2 = create_module (global_context ()) "test_module2" in define_plus m2; let ee = ExecutionEngine.create (ModuleProvider.create m) in diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml index 0a65db996bb44..0a65810105b07 100644 --- a/test/Bindings/Ocaml/scalar_opts.ml +++ b/test/Bindings/Ocaml/scalar_opts.ml @@ -1,7 +1,7 @@ -(* RUN: %ocamlc -warn-error A llvm.cma llvm_scalar_opts.cma llvm_target.cma %s -o %t 2> /dev/null +(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t *) -(* Note: It takes several seconds for ocamlc to link an executable with +(* Note: It takes several seconds for ocamlopt to link an executable with libLLVMCore.a, so it's better to write a big test than a bunch of little ones. *) @@ -9,6 +9,8 @@ open Llvm open Llvm_scalar_opts open Llvm_target +let context = global_context () +let void_type = Llvm.void_type context (* Tiny unit test framework - really just to help find which line is busted *) let suite name f = @@ -19,7 +21,7 @@ let suite name f = (*===-- Fixture -----------------------------------------------------------===*) let filename = Sys.argv.(1) -let m = create_module filename +let m = create_module context filename let mp = ModuleProvider.create m @@ -30,7 +32,7 @@ let test_transforms () = let fty = function_type void_type [| |] in let fn = define_function "fn" fty m in - ignore (build_ret_void (builder_at_end (entry_block fn))); + ignore (build_ret_void (builder_at_end context (entry_block fn))); let td = TargetData.create (target_triple m) in diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml index e6d08ed6db754..3c3b7339fef87 100644 --- a/test/Bindings/Ocaml/target.ml +++ b/test/Bindings/Ocaml/target.ml @@ -1,13 +1,16 @@ -(* RUN: %ocamlc -warn-error A llvm.cma llvm_target.cma %s -o %t 2> /dev/null +(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t *) -(* Note: It takes several seconds for ocamlc to link an executable with +(* Note: It takes several seconds for ocamlopt to link an executable with libLLVMCore.a, so it's better to write a big test than a bunch of little ones. *) open Llvm open Llvm_target +let context = global_context () +let i32_type = Llvm.i32_type context +let i64_type = Llvm.i64_type context (* Tiny unit test framework - really just to help find which line is busted *) let suite name f = @@ -18,14 +21,14 @@ let suite name f = (*===-- Fixture -----------------------------------------------------------===*) let filename = Sys.argv.(1) -let m = create_module filename +let m = create_module context filename (*===-- Target Data -------------------------------------------------------===*) let test_target_data () = let td = TargetData.create (target_triple m) in - let sty = struct_type [| i32_type; i64_type |] in + let sty = struct_type context [| i32_type; i64_type |] in ignore (TargetData.as_string td); ignore (TargetData.invalidate_struct_layout td sty); diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml index 9016d3927f59e..9e976d34aa4bc 100644 --- a/test/Bindings/Ocaml/vmcore.ml +++ b/test/Bindings/Ocaml/vmcore.ml @@ -1,9 +1,9 @@ -(* RUN: %ocamlc -warn-error A llvm.cma llvm_analysis.cma llvm_bitwriter.cma %s -o %t 2> /dev/null +(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t * RUN: ./%t %t.bc * RUN: llvm-dis < %t.bc > %t.ll *) -(* Note: It takes several seconds for ocamlc to link an executable with +(* Note: It takes several seconds for ocamlopt to link an executable with libLLVMCore.a, so it's better to write a big test than a bunch of little ones. *) @@ -17,6 +17,16 @@ let suite_name = ref "" let group_name = ref "" let case_num = ref 0 let print_checkpoints = false +let context = global_context () +let i1_type = Llvm.i1_type context +let i8_type = Llvm.i8_type context +let i16_type = Llvm.i16_type context +let i32_type = Llvm.i32_type context +let i64_type = Llvm.i64_type context +let void_type = Llvm.void_type context +let float_type = Llvm.float_type context +let double_type = Llvm.double_type context +let fp128_type = Llvm.fp128_type context let group name = group_name := !suite_name ^ "/" ^ name; @@ -47,7 +57,7 @@ let suite name f = (*===-- Fixture -----------------------------------------------------------===*) let filename = Sys.argv.(1) -let m = create_module filename +let m = create_module context filename let mp = ModuleProvider.create m @@ -93,7 +103,7 @@ let test_types () = (* RUN: grep {Ty04.*i42} < %t.ll *) group "i42"; - let ty = integer_type 42 in + let ty = integer_type context 42 in insist (define_type_name "Ty04" ty m); (* RUN: grep {Ty05.*float} < %t.ll @@ -164,22 +174,22 @@ let test_types () = (* RUN: grep {Ty12.*opaque} < %t.ll *) group "opaque"; - let ty = opaque_type () in + let ty = opaque_type context in insist (define_type_name "Ty12" ty m); insist (ty == ty); - insist (ty <> opaque_type ()); + insist (ty <> opaque_type context); (* RUN: grep -v {Ty13} < %t.ll *) group "delete"; - let ty = opaque_type () in + let ty = opaque_type context in insist (define_type_name "Ty13" ty m); delete_type_name "Ty13" m; (* RUN: grep -v {RecursiveTy.*RecursiveTy} < %t.ll *) group "recursive"; - let ty = opaque_type () in + let ty = opaque_type context in let th = handle_to_type ty in refine_type ty (pointer_type ty); let ty = type_of_handle th in @@ -212,22 +222,30 @@ let test_constants () = ignore (define_global "Const03" c m); insist (i64_type = type_of c); + (* RUN: grep {ConstIntString.*i32.*-1} < %t.ll + *) + group "int string"; + let c = const_int_of_string i32_type "-1" 10 in + ignore (define_global "ConstIntString" c m); + insist (i32_type = type_of c); + (* RUN: grep {Const04.*"cruel\\\\00world"} < %t.ll *) group "string"; - let c = const_string "cruel\000world" in + let c = const_string context "cruel\000world" in ignore (define_global "Const04" c m); insist ((array_type i8_type 11) = type_of c); (* RUN: grep {Const05.*"hi\\\\00again\\\\00"} < %t.ll *) group "stringz"; - let c = const_stringz "hi\000again" in + let c = const_stringz context "hi\000again" in ignore (define_global "Const05" c m); insist ((array_type i8_type 9) = type_of c); (* RUN: grep {ConstSingle.*2.75} < %t.ll * RUN: grep {ConstDouble.*3.1459} < %t.ll + * RUN: grep {ConstDoubleString.*1.25} < %t.ll *) begin group "real"; let cs = const_float float_type 2.75 in @@ -236,6 +254,10 @@ let test_constants () = let cd = const_float double_type 3.1459 in ignore (define_global "ConstDouble" cd m); + insist (double_type = type_of cd); + + let cd = const_float_of_string double_type "1.25" in + ignore (define_global "ConstDoubleString" cd m); insist (double_type = type_of cd) end; @@ -258,20 +280,20 @@ let test_constants () = one; two; one; two |] in ignore (define_global "Const08" c m); insist ((vector_type i16_type 8) = (type_of c)); - + (* RUN: grep {Const09.*.i16 1, i16 2, i32 3, i32 4} < %t.ll *) group "structure"; - let c = const_struct [| one; two; three; four |] in + let c = const_struct context [| one; two; three; four |] in ignore (define_global "Const09" c m); - insist ((struct_type [| i16_type; i16_type; i32_type; i32_type |]) + insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |]) = (type_of c)); (* RUN: grep {Const10.*zeroinit} < %t.ll *) group "null"; - let c = const_null (packed_struct_type [| i1_type; i8_type; - i64_type; double_type |]) in + let c = const_null (packed_struct_type context [| i1_type; i8_type; i64_type; + double_type |]) in ignore (define_global "Const10" c m); (* RUN: grep {Const11.*-1} < %t.ll @@ -343,7 +365,7 @@ let test_constants () = * RUN: grep {ConstIntToPtr.*inttoptr} < %t.ll * RUN: grep {ConstBitCast.*bitcast} < %t.ll *) - let i128_type = integer_type 128 in + let i128_type = integer_type context 128 in ignore (define_global "ConstTrunc" (const_trunc (const_add foldbomb five) i8_type) m); ignore (define_global "ConstSExt" (const_sext foldbomb i128_type) m); @@ -484,7 +506,7 @@ let test_global_variables () = insist (is_global_constant g); begin group "iteration"; - let m = create_module "temp" in + let m = create_module context "temp" in insist (At_end m = global_begin m); insist (At_start m = global_end m); @@ -544,7 +566,7 @@ let test_functions () = let fn = define_function "Fn3" ty m in insist (not (is_declaration fn)); insist (1 = Array.length (basic_blocks fn)); - ignore (build_unreachable (builder_at_end (entry_block fn))); + ignore (build_unreachable (builder_at_end context (entry_block fn))); (* RUN: grep {define.*Fn4.*Param1.*Param2} < %t.ll *) @@ -558,7 +580,7 @@ let test_functions () = insist (i64_type = type_of params.(1)); set_value_name "Param1" params.(0); set_value_name "Param2" params.(1); - ignore (build_unreachable (builder_at_end (entry_block fn))); + ignore (build_unreachable (builder_at_end context (entry_block fn))); (* RUN: grep {fastcc.*Fn5} < %t.ll *) @@ -567,7 +589,7 @@ let test_functions () = insist (CallConv.c = function_call_conv fn); set_function_call_conv CallConv.fast fn; insist (CallConv.fast = function_call_conv fn); - ignore (build_unreachable (builder_at_end (entry_block fn))); + ignore (build_unreachable (builder_at_end context (entry_block fn))); begin group "gc"; (* RUN: grep {Fn6.*gc.*shadowstack} < %t.ll @@ -579,11 +601,11 @@ let test_functions () = set_gc None fn; insist (None = gc fn); set_gc (Some "shadowstack") fn; - ignore (build_unreachable (builder_at_end (entry_block fn))); + ignore (build_unreachable (builder_at_end context (entry_block fn))); end; begin group "iteration"; - let m = create_module "temp" in + let m = create_module context "temp" in insist (At_end m = function_begin m); insist (At_start m = function_end m); @@ -613,7 +635,7 @@ let test_functions () = let test_params () = begin group "iteration"; - let m = create_module "temp" in + let m = create_module context "temp" in let vf = define_function "void" (function_type void_type [| |]) m in @@ -660,31 +682,31 @@ let test_basic_blocks () = *) group "entry"; let fn = declare_function "X" ty m in - let bb = append_block "Bb1" fn in + let bb = append_block context "Bb1" fn in insist (bb = entry_block fn); - ignore (build_unreachable (builder_at_end bb)); + ignore (build_unreachable (builder_at_end context bb)); (* RUN: grep -v Bb2 < %t.ll *) group "delete"; let fn = declare_function "X2" ty m in - let bb = append_block "Bb2" fn in + let bb = append_block context "Bb2" fn in delete_block bb; group "insert"; let fn = declare_function "X3" ty m in - let bbb = append_block "b" fn in - let bba = insert_block "a" bbb in + let bbb = append_block context "b" fn in + let bba = insert_block context "a" bbb in insist ([| bba; bbb |] = basic_blocks fn); - ignore (build_unreachable (builder_at_end bba)); - ignore (build_unreachable (builder_at_end bbb)); + ignore (build_unreachable (builder_at_end context bba)); + ignore (build_unreachable (builder_at_end context bbb)); (* RUN: grep Bb3 < %t.ll *) group "name/value"; let fn = define_function "X4" ty m in let bb = entry_block fn in - ignore (build_unreachable (builder_at_end bb)); + ignore (build_unreachable (builder_at_end context bb)); let bbv = value_of_block bb in set_value_name "Bb3" bbv; insist ("Bb3" = value_name bbv); @@ -692,20 +714,20 @@ let test_basic_blocks () = group "casts"; let fn = define_function "X5" ty m in let bb = entry_block fn in - ignore (build_unreachable (builder_at_end bb)); + ignore (build_unreachable (builder_at_end context bb)); insist (bb = block_of_value (value_of_block bb)); insist (value_is_block (value_of_block bb)); insist (not (value_is_block (const_null i32_type))); begin group "iteration"; - let m = create_module "temp" in + let m = create_module context "temp" in let f = declare_function "Temp" (function_type i32_type [| |]) m in insist (At_end f = block_begin f); insist (At_start f = block_end f); - let b1 = append_block "One" f in - let b2 = append_block "Two" f in + let b1 = append_block context "One" f in + let b2 = append_block context "Two" f in insist (Before b1 = block_begin f); insist (Before b2 = block_succ b1); @@ -729,11 +751,11 @@ let test_basic_blocks () = let test_instructions () = begin group "iteration"; - let m = create_module "temp" in + let m = create_module context "temp" in let fty = function_type void_type [| i32_type; i32_type |] in let f = define_function "f" fty m in let bb = entry_block f in - let b = builder_at (At_end bb) in + let b = builder_at context (At_end bb) in insist (At_end bb = instr_begin bb); insist (At_start bb = instr_end bb); @@ -766,7 +788,7 @@ let test_builder () = begin group "parent"; insist (try - ignore (insertion_block (builder ())); + ignore (insertion_block (builder context)); false with Not_found -> true); @@ -774,7 +796,7 @@ let test_builder () = let fty = function_type void_type [| i32_type |] in let fn = define_function "BuilderParent" fty m in let bb = entry_block fn in - let b = builder_at_end bb in + let b = builder_at_end context bb in let p = param fn 0 in let sum = build_add p p "sum" b in ignore (build_ret_void b); @@ -791,21 +813,21 @@ let test_builder () = *) let fty = function_type void_type [| |] in let fn = declare_function "X6" fty m in - let b = builder_at_end (append_block "Bb01" fn) in + let b = builder_at_end context (append_block context "Bb01" fn) in ignore (build_ret_void b) end; (* The rest of the tests will use one big function. *) let fty = function_type i32_type [| i32_type; i32_type |] in let fn = define_function "X7" fty m in - let atentry = builder_at_end (entry_block fn) in + let atentry = builder_at_end context (entry_block fn) in let p1 = param fn 0 ++ set_value_name "P1" in let p2 = param fn 1 ++ set_value_name "P2" in let f1 = build_uitofp p1 float_type "F1" atentry in let f2 = build_uitofp p2 float_type "F2" atentry in - let bb00 = append_block "Bb00" fn in - ignore (build_unreachable (builder_at_end bb00)); + let bb00 = append_block context "Bb00" fn in + ignore (build_unreachable (builder_at_end context bb00)); group "ret"; begin (* RUN: grep {ret.*P1} < %t.ll @@ -817,16 +839,16 @@ let test_builder () = group "br"; begin (* RUN: grep {br.*Bb02} < %t.ll *) - let bb02 = append_block "Bb02" fn in - let b = builder_at_end bb02 in + let bb02 = append_block context "Bb02" fn in + let b = builder_at_end context bb02 in ignore (build_br bb02 b) end; group "cond_br"; begin (* RUN: grep {br.*Inst01.*Bb03.*Bb00} < %t.ll *) - let bb03 = append_block "Bb03" fn in - let b = builder_at_end bb03 in + let bb03 = append_block context "Bb03" fn in + let b = builder_at_end context bb03 in let cond = build_trunc p1 i1_type "Inst01" b in ignore (build_cond_br cond bb03 bb00 b) end; @@ -835,12 +857,12 @@ let test_builder () = (* RUN: grep {switch.*P1.*SwiBlock3} < %t.ll * RUN: grep {2,.*SwiBlock2} < %t.ll *) - let bb1 = append_block "SwiBlock1" fn in - let bb2 = append_block "SwiBlock2" fn in - ignore (build_unreachable (builder_at_end bb2)); - let bb3 = append_block "SwiBlock3" fn in - ignore (build_unreachable (builder_at_end bb3)); - let si = build_switch p1 bb3 1 (builder_at_end bb1) in + let bb1 = append_block context "SwiBlock1" fn in + let bb2 = append_block context "SwiBlock2" fn in + ignore (build_unreachable (builder_at_end context bb2)); + let bb3 = append_block context "SwiBlock3" fn in + ignore (build_unreachable (builder_at_end context bb3)); + let si = build_switch p1 bb3 1 (builder_at_end context bb1) in ignore (add_case si (const_int i32_type 2) bb2) end; @@ -848,30 +870,30 @@ let test_builder () = (* RUN: grep {Inst02.*invoke.*P1.*P2} < %t.ll * RUN: grep {to.*Bb04.*unwind.*Bb00} < %t.ll *) - let bb04 = append_block "Bb04" fn in - let b = builder_at_end bb04 in + let bb04 = append_block context "Bb04" fn in + let b = builder_at_end context bb04 in ignore (build_invoke fn [| p1; p2 |] bb04 bb00 "Inst02" b) end; group "unwind"; begin (* RUN: grep {unwind} < %t.ll *) - let bb05 = append_block "Bb05" fn in - let b = builder_at_end bb05 in + let bb05 = append_block context "Bb05" fn in + let b = builder_at_end context bb05 in ignore (build_unwind b) end; group "unreachable"; begin (* RUN: grep {unreachable} < %t.ll *) - let bb06 = append_block "Bb06" fn in - let b = builder_at_end bb06 in + let bb06 = append_block context "Bb06" fn in + let b = builder_at_end context bb06 in ignore (build_unreachable b) end; group "arithmetic"; begin - let bb07 = append_block "Bb07" fn in - let b = builder_at_end bb07 in + let bb07 = append_block context "Bb07" fn in + let b = builder_at_end context bb07 in (* RUN: grep {Inst03.*add.*P1.*P2} < %t.ll * RUN: grep {Inst04.*sub.*P1.*Inst03} < %t.ll @@ -912,12 +934,12 @@ let test_builder () = end; group "memory"; begin - let bb08 = append_block "Bb08" fn in - let b = builder_at_end bb08 in + let bb08 = append_block context "Bb08" fn in + let b = builder_at_end context bb08 in - (* RUN: grep {Inst20.*malloc.*i8 } < %t.ll + (* RUN: grep {Inst20.*malloc.*i8 } < %t.ll * RUN: grep {Inst21.*malloc.*i8.*P1} < %t.ll - * RUN: grep {Inst22.*alloca.*i32 } < %t.ll + * RUN: grep {Inst22.*alloca.*i32 } < %t.ll * RUN: grep {Inst23.*alloca.*i32.*P2} < %t.ll * RUN: grep {free.*Inst20} < %t.ll * RUN: grep {Inst25.*load.*Inst21} < %t.ll @@ -1021,13 +1043,13 @@ let test_builder () = group "phi"; begin (* RUN: grep {PhiNode.*P1.*PhiBlock1.*P2.*PhiBlock2} < %t.ll *) - let b1 = append_block "PhiBlock1" fn in - let b2 = append_block "PhiBlock2" fn in + let b1 = append_block context "PhiBlock1" fn in + let b2 = append_block context "PhiBlock2" fn in - let jb = append_block "PhiJoinBlock" fn in - ignore (build_br jb (builder_at_end b1)); - ignore (build_br jb (builder_at_end b2)); - let at_jb = builder_at_end jb in + let jb = append_block context "PhiJoinBlock" fn in + ignore (build_br jb (builder_at_end context b1)); + ignore (build_br jb (builder_at_end context b2)); + let at_jb = builder_at_end context jb in let phi = build_phi [(p1, b1)] "PhiNode" at_jb in insist ([(p1, b1)] = incoming phi); @@ -1042,7 +1064,7 @@ let test_builder () = (*===-- Module Provider ---------------------------------------------------===*) let test_module_provider () = - let m = create_module "test" in + let m = create_module context "test" in let mp = ModuleProvider.create m in ModuleProvider.dispose mp @@ -1061,7 +1083,7 @@ let test_pass_manager () = begin group "function pass manager"; let fty = function_type void_type [| |] in let fn = define_function "FunctionPassManager" fty m in - ignore (build_ret_void (builder_at_end (entry_block fn))); + ignore (build_ret_void (builder_at_end context (entry_block fn))); ignore (PassManager.create_function mp ++ PassManager.initialize diff --git a/test/Bitcode/extractelement.ll b/test/Bitcode/extractelement.ll index 04cb131f6e7f7..d88f811e8e9cc 100644 --- a/test/Bitcode/extractelement.ll +++ b/test/Bitcode/extractelement.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | opt -constprop | llvm-dis +; RUN: opt < %s -constprop | llvm-dis ; PR3465 define double @test() { diff --git a/test/Bitcode/memcpy.ll b/test/Bitcode/memcpy.ll index fb509b8e8eeee..85b95fe572631 100644 --- a/test/Bitcode/memcpy.ll +++ b/test/Bitcode/memcpy.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o /dev/null -f +; RUN: llvm-as %s -o /dev/null define void @test(i32* %P, i32* %Q) { entry: diff --git a/test/Bitcode/metadata-2.ll b/test/Bitcode/metadata-2.ll new file mode 100644 index 0000000000000..1a59ce6f9dfbc --- /dev/null +++ b/test/Bitcode/metadata-2.ll @@ -0,0 +1,87 @@ +; RUN: llvm-as < %s | llvm-dis -o /dev/null + type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* } ; type %0 + type { i64, %object.ModuleInfo* } ; type %1 + type { i32, void ()* } ; type %2 + %"ClassInfo[]" = type { i64, %object.ClassInfo** } + %"Interface[]" = type { i64, %object.Interface* } + %"ModuleInfo[]" = type { i64, %object.ModuleInfo** } + %ModuleReference = type { %ModuleReference*, %object.ModuleInfo* } + %"OffsetTypeInfo[]" = type { i64, %object.OffsetTypeInfo* } + %"byte[]" = type { i64, i8* } + %object.ClassInfo = type { %object.ClassInfo.__vtbl*, i8*, %"byte[]", %"byte[]", %"void*[]", %"Interface[]", %object.ClassInfo*, i8*, i8*, i32, i8*, %"OffsetTypeInfo[]", i8*, %object.TypeInfo* } + %object.ClassInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)*, %object.Object* (%object.ClassInfo*)* } + %object.Interface = type { %object.ClassInfo*, %"void*[]", i64 } + %object.ModuleInfo = type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %"ModuleInfo[]", %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* } + %object.ModuleInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)* } + %object.Object = type { %object.ModuleInfo.__vtbl*, i8* } + %object.OffsetTypeInfo = type { i64, %object.TypeInfo* } + %object.TypeInfo = type { %object.TypeInfo.__vtbl*, i8* } + %object.TypeInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)*, i64 (%object.TypeInfo*, i8*)*, i32 (%object.TypeInfo*, i8*, i8*)*, i32 (%object.TypeInfo*, i8*, i8*)*, i64 (%object.TypeInfo*)*, void (%object.TypeInfo*, i8*, i8*)*, %object.TypeInfo* (%object.TypeInfo*)*, %"byte[]" (%object.TypeInfo*)*, i32 (%object.TypeInfo*)*, %"OffsetTypeInfo[]" (%object.TypeInfo*)* } + %"void*[]" = type { i64, i8** } +@_D10ModuleInfo6__vtblZ = external constant %object.ModuleInfo.__vtbl ; <%object.ModuleInfo.__vtbl*> [#uses=1] +@.str = internal constant [20 x i8] c"tango.core.BitManip\00" ; <[20 x i8]*> [#uses=1] +@_D5tango4core8BitManip8__ModuleZ = global %0 { %object.ModuleInfo.__vtbl* @_D10ModuleInfo6__vtblZ, i8* null, %"byte[]" { i64 19, i8* getelementptr ([20 x i8]* @.str, i32 0, i32 0) }, %1 zeroinitializer, %"ClassInfo[]" zeroinitializer, i32 4, void ()* null, void ()* null, void ()* null, i8* null, void ()* null } ; <%0*> [#uses=1] +@_D5tango4core8BitManip11__moduleRefZ = internal global %ModuleReference { %ModuleReference* null, %object.ModuleInfo* bitcast (%0* @_D5tango4core8BitManip8__ModuleZ to %object.ModuleInfo*) } ; <%ModuleReference*> [#uses=2] +@_Dmodule_ref = external global %ModuleReference* ; <%ModuleReference**> [#uses=2] +@llvm.global_ctors = appending constant [1 x %2] [%2 { i32 65535, void ()* @_D5tango4core8BitManip16__moduleinfoCtorZ }] ; <[1 x %2]*> [#uses=0] + +define fastcc i32 @_D5tango4core8BitManip6popcntFkZi(i32 %x_arg) nounwind readnone { +entry: + %tmp1 = lshr i32 %x_arg, 1 ; [#uses=1] + %tmp2 = and i32 %tmp1, 1431655765 ; [#uses=1] + %tmp4 = sub i32 %x_arg, %tmp2 ; [#uses=2] + %tmp6 = lshr i32 %tmp4, 2 ; [#uses=1] + %tmp7 = and i32 %tmp6, 858993459 ; [#uses=1] + %tmp9 = and i32 %tmp4, 858993459 ; [#uses=1] + %tmp10 = add i32 %tmp7, %tmp9 ; [#uses=2] + %tmp12 = lshr i32 %tmp10, 4 ; [#uses=1] + %tmp14 = add i32 %tmp12, %tmp10 ; [#uses=1] + %tmp16 = and i32 %tmp14, 252645135 ; [#uses=2] + %tmp18 = lshr i32 %tmp16, 8 ; [#uses=1] + %tmp20 = add i32 %tmp18, %tmp16 ; [#uses=1] + %tmp22 = and i32 %tmp20, 16711935 ; [#uses=2] + %tmp24 = lshr i32 %tmp22, 16 ; [#uses=1] + %tmp26 = add i32 %tmp24, %tmp22 ; [#uses=1] + %tmp28 = and i32 %tmp26, 65535 ; [#uses=1] + ret i32 %tmp28 +} + +define fastcc i32 @_D5tango4core8BitManip7bitswapFkZk(i32 %x_arg) nounwind readnone { +entry: + %tmp1 = lshr i32 %x_arg, 1 ; [#uses=1] + %tmp2 = and i32 %tmp1, 1431655765 ; [#uses=1] + %tmp4 = shl i32 %x_arg, 1 ; [#uses=1] + %tmp5 = and i32 %tmp4, -1431655766 ; [#uses=1] + %tmp6 = or i32 %tmp2, %tmp5 ; [#uses=2] + %tmp8 = lshr i32 %tmp6, 2 ; [#uses=1] + %tmp9 = and i32 %tmp8, 858993459 ; [#uses=1] + %tmp11 = shl i32 %tmp6, 2 ; [#uses=1] + %tmp12 = and i32 %tmp11, -858993460 ; [#uses=1] + %tmp13 = or i32 %tmp9, %tmp12 ; [#uses=2] + %tmp15 = lshr i32 %tmp13, 4 ; [#uses=1] + %tmp16 = and i32 %tmp15, 252645135 ; [#uses=1] + %tmp18 = shl i32 %tmp13, 4 ; [#uses=1] + %tmp19 = and i32 %tmp18, -252645136 ; [#uses=1] + %tmp20 = or i32 %tmp16, %tmp19 ; [#uses=2] + %tmp22 = lshr i32 %tmp20, 8 ; [#uses=1] + %tmp23 = and i32 %tmp22, 16711935 ; [#uses=1] + %tmp25 = shl i32 %tmp20, 8 ; [#uses=1] + %tmp26 = and i32 %tmp25, -16711936 ; [#uses=1] + %tmp27 = or i32 %tmp23, %tmp26 ; [#uses=2] + %tmp29 = lshr i32 %tmp27, 16 ; [#uses=1] + %tmp31 = shl i32 %tmp27, 16 ; [#uses=1] + %tmp32 = or i32 %tmp29, %tmp31 ; [#uses=1] + ret i32 %tmp32 +} + +define internal void @_D5tango4core8BitManip16__moduleinfoCtorZ() nounwind { +moduleinfoCtorEntry: + %current = load %ModuleReference** @_Dmodule_ref ; <%ModuleReference*> [#uses=1] + store %ModuleReference* %current, %ModuleReference** getelementptr (%ModuleReference* @_D5tango4core8BitManip11__moduleRefZ, i32 0, i32 0) + store %ModuleReference* @_D5tango4core8BitManip11__moduleRefZ, %ModuleReference** @_Dmodule_ref + ret void +} +!llvm.ldc.classinfo._D6Object7__ClassZ = !{!0} +!llvm.ldc.classinfo._D10ModuleInfo7__ClassZ = !{!1} +!0 = metadata !{%object.Object undef, i1 false, i1 false} +!1 = metadata !{%object.ModuleInfo undef, i1 false, i1 false} diff --git a/test/Bitcode/metadata.ll b/test/Bitcode/metadata.ll new file mode 100644 index 0000000000000..19db3eac21602 --- /dev/null +++ b/test/Bitcode/metadata.ll @@ -0,0 +1,6 @@ +; RUN: llvm-as < %s | llvm-dis -o /dev/null + +!llvm.foo = !{!0} +!0 = metadata !{i32 42} +@my.str = internal constant [4 x i8] c"foo\00" + diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll index 3ee9f8aa0ebb1..6ad09d2e25cdf 100644 --- a/test/BugPoint/crash-narrowfunctiontest.ll +++ b/test/BugPoint/crash-narrowfunctiontest.ll @@ -1,6 +1,6 @@ ; Test that bugpoint can narrow down the testcase to the important function ; -; RUN: bugpoint %s -bugpoint-crashcalls -silence-passes > /dev/null +; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null define i32 @foo() { ret i32 1 } diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll index e5655974f7e5c..fb17c78a140bc 100644 --- a/test/BugPoint/remove_arguments_test.ll +++ b/test/BugPoint/remove_arguments_test.ll @@ -1,4 +1,4 @@ -; RUN: bugpoint %s -bugpoint-crashcalls -silence-passes +; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes ; Test to make sure that arguments are removed from the function if they are ; unnecessary. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000000000..627b57d856348 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,31 @@ +include(GetTargetTriple) +get_target_triple(target) + +foreach(c ${LLVM_TARGETS_TO_BUILD}) + set(TARGETS_BUILT "${TARGETS_BUILT} ${c}") +endforeach(c) +set(TARGETS_TO_BUILD ${TARGETS_BUILT}) + +include(FindPythonInterp) +if(PYTHONINTERP_FOUND) + get_target_property(LLVM_TOOLS_PATH llvm-config RUNTIME_OUTPUT_DIRECTORY) + + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in + ${CMAKE_CURRENT_BINARY_DIR}/site.exp) + + add_custom_target(llvm-test + COMMAND sed -e "s#\@LLVM_SOURCE_DIR\@#${LLVM_MAIN_SRC_DIR}#" + -e "s#\@LLVM_BINARY_DIR\@#${LLVM_BINARY_DIR}#" + -e "s#\@LLVM_TOOLS_DIR\@#${LLVM_TOOLS_PATH}/${CMAKE_CFG_INTDIR}#" + -e "s#\@LLVMGCC_DIR\@##" + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in > + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg + COMMAND ${PYTHON_EXECUTABLE} + ${LLVM_SOURCE_DIR}/utils/lit/lit.py + -sv + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS + COMMENT "Running LLVM regression tests") + +endif() diff --git a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll index caa9a981fc6a2..a0235f787061d 100644 --- a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll +++ b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 +; RUN: llc < %s -march=arm -mattr=+v6 %struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] } @ld = external global %struct.layer_data* ; <%struct.layer_data**> [#uses=1] diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll index 6e11b16910184..81483cb4e7c55 100644 --- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 @quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] @dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1] diff --git a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll index 7317e62e31824..83b26d340062f 100644 --- a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll +++ b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 define fastcc i8* @read_sleb128(i8* %p, i32* %val) { br label %bb diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll index 07390add55381..33f935e960b1a 100644 --- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll +++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic \ +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ ; RUN: -mattr=+v6 | grep r9 -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic \ +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ ; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer ; | grep 35 diff --git a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll index 32daf839f0fc9..b0953dc8b61f8 100644 --- a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll +++ b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi +; RUN: llc < %s -mtriple=arm-linux-gnueabi ; PR1257 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 } diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll index 6d3f6404af840..d741112e2886e 100644 --- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm ; PR1266 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll index f927ef43ca194..e4635f50279d2 100644 --- a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi ; PR1279 %struct.rtx_def = type { i16, i8, i8, %struct.u } diff --git a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll index 55d29933a55c9..ea27676a9f0f5 100644 --- a/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi ; PR1279 %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 } diff --git a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll index ef5a1ae404598..f24def31f97aa 100644 --- a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-apple-darwin +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin %struct.H_TBL = type { [17 x i8], [256 x i8], i32 } %struct.Q_TBL = type { [64 x i16], i32 } diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll index e412127eae7b5..b543c57e1a85e 100644 --- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll +++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | not grep {add.*#0} +; RUN: llc < %s -march=arm | not grep {add.*#0} define i32 @foo() { entry: diff --git a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll index 42f5034c70a76..e001cde8351bf 100644 --- a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll +++ b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic | \ +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \ ; RUN: not grep LPC9 %struct.B = type { i32 } diff --git a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll index ec70a596bc3ac..a89e937d3e106 100644 --- a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll +++ b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" target triple = "arm-apple-darwin8" diff --git a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll index f3f82bc4846f0..c73b6793da0ff 100644 --- a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll +++ b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin %struct.Connection = type { i32, [10 x i8], i32 } %struct.IntChunk = type { %struct.cppobjtype, i32, i32*, i32 } diff --git a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll index 11431be9c28c8..26864f18a69c0 100644 --- a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll +++ b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc | not grep 1_0 +; RUN: llc < %s | not grep 1_0 ; This used to create an extra branch to 'entry', LBB1_0. ; ModuleID = 'bug.bc' diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll index c3596e7c7b4f3..f2a8ee1a14240 100644 --- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll +++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll @@ -1,7 +1,7 @@ -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1 ; Check that calls to baz and quux are tail-merged. ; PR1628 diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll index 41ab1e52f674b..2758505811549 100644 --- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll +++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll @@ -1,7 +1,7 @@ -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*baz | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge | grep bl.*quux | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*baz | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge -enable-eh | grep bl.*quux | count 1 ; Check that calls to baz and quux are tail-merged. ; PR1628 diff --git a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll index 58c5f89c619d5..b3b0769347f16 100644 --- a/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll +++ b/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 +; RUN: llc < %s -march=arm -mattr=+v6 define i32 @test3() { tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 ) diff --git a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll index 430b3689c0b41..7b15ded44799c 100644 --- a/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi ; PR1406 %struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* } diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll index 4c4a9336fd91b..061bf5e851b02 100644 --- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll +++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll @@ -1,11 +1,11 @@ -; RUN: llvm-as < %s | llc -march=arm | grep bl.*baz | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep bl.*quux | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2 -; RUN: llvm-as < %s | llc -march=arm -enable-eh | grep bl.*baz | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-eh | grep bl.*quux | count 1 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2 -; RUN: llvm-as < %s | llc -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2 +; RUN: llc < %s -march=arm | grep bl.*baz | count 1 +; RUN: llc < %s -march=arm | grep bl.*quux | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2 +; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2 +; RUN: llc < %s -march=arm -enable-eh | grep bl.*baz | count 1 +; RUN: llc < %s -march=arm -enable-eh | grep bl.*quux | count 1 +; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*baz | count 2 +; RUN: llc < %s -march=arm -enable-tail-merge=0 -enable-eh | grep bl.*quux | count 2 ; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works. ; PR1628 diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll index de32a26ae9cfb..d2eb85d356c53 100644 --- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll +++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | not grep {str.*\\!} +; RUN: llc < %s -march=arm | not grep {str.*\\!} %struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 } %struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 } diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll index d21a8f209e960..030486a7c9832 100644 --- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll +++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc +; RUN: llc < %s ; PR1424 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll index 3cfcdef48f4b4..30b72e09a1145 100644 --- a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll +++ b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6 +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6 ; PR1609 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll index ec170f8eac5be..ff015065ef01b 100644 --- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll +++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -regalloc=local +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=local ; PR1925 %struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 } diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll index b81d5759b6cf6..06bc98746076f 100644 --- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll +++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -regalloc=local +; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=local ; PR1925 %"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" } diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll index ca34275f79f49..a604c5cd574e2 100644 --- a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll +++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | not grep 255 +; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255 define i32 @main(i32 %argc, i8** %argv) { entry: diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll index 70f1774b4c521..78c6222375632 100644 --- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 @accum = external global { double, double } ; <{ double, double }*> [#uses=1] @.str = external constant [4 x i8] ; <[4 x i8]*> [#uses=1] diff --git a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll index 610f5ea7cd05d..234c7b69e3e7b 100644 --- a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll +++ b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin @numBinsY = external global i32 ; [#uses=1] diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll index 80ccddfcd735d..77418be380843 100644 --- a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll +++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } diff --git a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll index 3cd757fa62ad2..33bd4def5b495 100644 --- a/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll +++ b/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin declare void @foo(i8*, i8*, i32, i32, i32, i32, i32, i32, i32) diff --git a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll index 035af08cd40ac..71aa6037a1374 100644 --- a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll +++ b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin %struct.BiContextType = type { i16, i8, i32 } %struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 } diff --git a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll index e98126bf87aa9..aa61d86e1389e 100644 --- a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll +++ b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin %struct.Decoders = type { i32**, i16***, i16****, i16***, i16**, i8**, i8** } @decoders = external global %struct.Decoders ; <%struct.Decoders*> [#uses=1] diff --git a/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/test/CodeGen/ARM/2008-07-17-Fdiv.ll index aa75970418a60..4cb768ef5b6d3 100644 --- a/test/CodeGen/ARM/2008-07-17-Fdiv.ll +++ b/test/CodeGen/ARM/2008-07-17-Fdiv.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define float @f(float %a, float %b) nounwind { %tmp = fdiv float %a, %b diff --git a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll index 6ea75eb5c79c5..83fde07779bc0 100644 --- a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll +++ b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm ; PR2589 define void @main({ i32 }*) { diff --git a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll index 0a79e8665a75e..adb011277604d 100644 --- a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll +++ b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } %struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] } diff --git a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll index c601b90e0710c..5f9d9aea58dd1 100644 --- a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll +++ b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin @"\01LC1" = external constant [288 x i8] ; <[288 x i8]*> [#uses=1] diff --git a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll index b3ea6fc5945fc..d3bc3e1663bcf 100644 --- a/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll +++ b/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin define void @gcov_exit() nounwind { entry: diff --git a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll index 164e9643f170a..601a516eb09a9 100644 --- a/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll +++ b/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind { entry: diff --git a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll index 3f17a5150fbee..35ca7b4c9af26 100644 --- a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll +++ b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164 +; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 154 %"struct.Adv5::Ekin<3>" = type <{ i8 }> %"struct.Adv5::X::Energyflux<3>" = type { double } diff --git a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll index 48e663dd80675..4c0c59ccfbc6c 100644 --- a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll +++ b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 target triple = "arm-apple-darwin9" %struct.FILE_POS = type { i8, i8, i16, i32 } diff --git a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll index d7befa0987488..a48f0033acc8c 100644 --- a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll +++ b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc +; RUN: llc < %s ; PR3610 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32" target triple = "arm-elf" diff --git a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll index bd5b719594428..bc5e6023409f3 100644 --- a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll +++ b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 target triple = "arm-apple-darwin9" @a = external global double ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll index 399ed3081f20c..0ec17ae23d694 100644 --- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll +++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin9 -mattr=+vfp2 +; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2 ; rdar://6653182 %struct.ggBRDF = type { i32 (...)** } diff --git a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll index 0ec6d7d4ff735..a1ce384b53452 100644 --- a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll +++ b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm %struct.hit_t = type { %struct.v_t, double } %struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 } diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll index 11c05c6ea7b3d..352672274d20a 100644 --- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll +++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep {swi 107} +; RUN: llc < %s -march=arm | grep {swi 107} define i32 @_swilseek(i32) nounwind { entry: diff --git a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll index c00b1fb986069..f6b3d2c0147b6 100644 --- a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll +++ b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm ; PR3795 define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) { diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll index c7e343c89203c..99907fc697bd0 100644 --- a/test/CodeGen/ARM/2009-04-08-FREM.ll +++ b/test/CodeGen/ARM/2009-04-08-FREM.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm declare i32 @printf(i8*, ...) diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll index f394847362f9d..05d2f26be0b7a 100644 --- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll +++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) { entry: diff --git a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll index 223fa0f435c9e..deb092bbf86e2 100644 --- a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll +++ b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm ; PR3954 define void @foo(...) nounwind { diff --git a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll index 2bca6e62fc301..670d2045f8ecf 100644 --- a/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll +++ b/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6 +; RUN: llc < %s -mtriple=arm-linuxeabi-unknown-gnu -mattr=+v6 ; PR4166 %"byte[]" = type { i32, i8* } diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll index d03b7ce87539c..75610ffecec27 100644 --- a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll +++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local +; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local ; PR4100 @.str = external constant [30 x i8] ; <[30 x i8]*> [#uses=1] diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll index 35d4306e9d14e..7046fccb5ee9b 100644 --- a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll +++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm %struct.List = type { %struct.List*, i32 } @Node5 = external constant %struct.List ; <%struct.List*> [#uses=1] @"\01LC" = external constant [7 x i8] ; <[7 x i8]*> [#uses=1] diff --git a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll index f942c9fc22168..1e2707f7b5bba 100644 --- a/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll +++ b/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll @@ -1,7 +1,9 @@ -; RUN: llvm-as < %s | llc -march=arm | grep swp +; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -march=thumb | FileCheck %s ; PR4091 define void @foo(i32 %i, i32* %p) nounwind { +;CHECK: swp r2, r0, [r1] %asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* %p, i32 %i, i32* %p) nounwind ret void } diff --git a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll index 7cd35b9557d01..403e3f6509f3c 100644 --- a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll +++ b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2 +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2 @"\01LC" = external constant [15 x i8] ; <[15 x i8]*> [#uses=1] diff --git a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll index 5eaae7aa9b46d..98e002302558b 100644 --- a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll +++ b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 %struct.anon = type { i16, i16 } %struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* } diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll index 45b4bd48f5161..27888d75f67a5 100644 --- a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin +; RUN: llc < %s -mtriple=armv6-apple-darwin type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] } ; type %0 type { i32, %struct.D_Reduction** } ; type %1 diff --git a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll index c715a189287a3..a0f903b0bdf56 100644 --- a/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin +; RUN: llc < %s -mtriple=armv6-apple-darwin %struct.term = type { i32, i32, i32 } diff --git a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll index cbe2385ab27af..b56b684473609 100644 --- a/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard +; RUN: llc < %s -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard ; PR4419 define float @__ieee754_acosf(float %x) nounwind { diff --git a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll index 5c8d7b0f62208..e068be74bae46 100644 --- a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll +++ b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin +; RUN: llc < %s -mtriple=armv6-apple-darwin %struct.rtunion = type { i64 } %struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] } diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll index 27cad7ccf6b7c..17efe0035419a 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9 +; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9 @nn = external global i32 ; [#uses=1] @al_len = external global i32 ; [#uses=2] diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll index 3a14d67247b9d..f520be3946ae7 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9 +; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9 @no_mat = external global i32 ; [#uses=1] @no_mis = external global i32 ; [#uses=2] diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll index f94b59dc91bdd..eee6ff98c6109 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9 +; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9 @JJ = external global i32* ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll index bca7f793eef4a..93c92b1c93f41 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9 +; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9 @r = external global i32 ; [#uses=1] @qr = external global i32 ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll index 0c90592f1d211..277283dc08890 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9 +; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9 @XX = external global i32* ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll index dfccefcac7a0d..5c0e5fa57b9fd 100644 --- a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll +++ b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=armv6-apple-darwin9 +; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9 @qr = external global i32 ; [#uses=1] @II = external global i32* ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll new file mode 100644 index 0000000000000..e1e94b6412149 --- /dev/null +++ b/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -march=arm -mattr=+v6 + +define void @test(i8* %x) nounwind { +entry: + call void asm sideeffect "pld\09${0:a}", "r,~{cc}"(i8* %x) nounwind + ret void +} diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll new file mode 100644 index 0000000000000..ee93fde998c10 --- /dev/null +++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll @@ -0,0 +1,1323 @@ +; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep fcmpezd | count 13 + + %struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* } + %struct.VEC2 = type { double, double, double } + %struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* } + %struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* } +@avail_edge = internal global %struct.edge_rec* null ; <%struct.edge_rec**> [#uses=6] +@_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[21 x i8]*> [#uses=1] +@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind { +entry: + %delright = alloca %struct.EDGE_PAIR, align 8 ; <%struct.EDGE_PAIR*> [#uses=3] + %delleft = alloca %struct.EDGE_PAIR, align 8 ; <%struct.EDGE_PAIR*> [#uses=3] + %0 = icmp eq %struct.VERTEX* %tree, null ; [#uses=1] + br i1 %0, label %bb8, label %bb + +bb: ; preds = %entry + %1 = getelementptr %struct.VERTEX* %tree, i32 0, i32 2 ; <%struct.VERTEX**> [#uses=1] + %2 = load %struct.VERTEX** %1, align 4 ; <%struct.VERTEX*> [#uses=2] + %3 = icmp eq %struct.VERTEX* %2, null ; [#uses=1] + br i1 %3, label %bb7, label %bb1.i + +bb1.i: ; preds = %bb1.i, %bb + %tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ] ; <%struct.VERTEX*> [#uses=3] + %4 = getelementptr %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1] + %5 = load %struct.VERTEX** %4, align 4 ; <%struct.VERTEX*> [#uses=2] + %6 = icmp eq %struct.VERTEX* %5, null ; [#uses=1] + br i1 %6, label %get_low.exit, label %bb1.i + +get_low.exit: ; preds = %bb1.i + call arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind + %7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1] + %8 = load %struct.VERTEX** %7, align 4 ; <%struct.VERTEX*> [#uses=1] + call arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind + %9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1] + %10 = load %struct.edge_rec** %9, align 8 ; <%struct.edge_rec*> [#uses=2] + %11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %12 = load %struct.edge_rec** %11, align 4 ; <%struct.edge_rec*> [#uses=1] + %13 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1] + %14 = load %struct.edge_rec** %13, align 8 ; <%struct.edge_rec*> [#uses=1] + %15 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %16 = load %struct.edge_rec** %15, align 4 ; <%struct.edge_rec*> [#uses=2] + br label %bb.i + +bb.i: ; preds = %bb4.i, %get_low.exit + %rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ] ; <%struct.edge_rec*> [#uses=2] + %ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ] ; <%struct.edge_rec*> [#uses=3] + %17 = getelementptr %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %18 = load %struct.VERTEX** %17, align 4 ; <%struct.VERTEX*> [#uses=3] + %19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32 ; [#uses=1] + %20 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 0 ; [#uses=1] + %21 = load double* %20, align 4 ; [#uses=3] + %22 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 1 ; [#uses=1] + %23 = load double* %22, align 4 ; [#uses=3] + br label %bb2.i + +bb1.i1: ; preds = %bb2.i + %24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32 ; [#uses=2] + %25 = add i32 %24, 48 ; [#uses=1] + %26 = and i32 %25, 63 ; [#uses=1] + %27 = and i32 %24, -64 ; [#uses=1] + %28 = or i32 %26, %27 ; [#uses=1] + %29 = inttoptr i32 %28 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %30 = getelementptr %struct.edge_rec* %29, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %31 = load %struct.edge_rec** %30, align 4 ; <%struct.edge_rec*> [#uses=1] + %32 = ptrtoint %struct.edge_rec* %31 to i32 ; [#uses=2] + %33 = add i32 %32, 16 ; [#uses=1] + %34 = and i32 %33, 63 ; [#uses=1] + %35 = and i32 %32, -64 ; [#uses=1] + %36 = or i32 %34, %35 ; [#uses=2] + %37 = inttoptr i32 %36 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + br label %bb2.i + +bb2.i: ; preds = %bb1.i1, %bb.i + %ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] ; <%struct.edge_rec*> [#uses=1] + %.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ] ; [#uses=1] + %ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] ; <%struct.edge_rec*> [#uses=4] + %.pn6.in.i = xor i32 %.pn6.in.in.i, 32 ; [#uses=1] + %.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %t1.0.in.i = getelementptr %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %t2.0.in.i = getelementptr %struct.edge_rec* %.pn6.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %t1.0.i = load %struct.VERTEX** %t1.0.in.i ; <%struct.VERTEX*> [#uses=2] + %t2.0.i = load %struct.VERTEX** %t2.0.in.i ; <%struct.VERTEX*> [#uses=2] + %38 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0 ; [#uses=1] + %39 = load double* %38, align 4 ; [#uses=3] + %40 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1 ; [#uses=1] + %41 = load double* %40, align 4 ; [#uses=3] + %42 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0 ; [#uses=1] + %43 = load double* %42, align 4 ; [#uses=1] + %44 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1 ; [#uses=1] + %45 = load double* %44, align 4 ; [#uses=1] + %46 = fsub double %39, %21 ; [#uses=1] + %47 = fsub double %45, %23 ; [#uses=1] + %48 = fmul double %46, %47 ; [#uses=1] + %49 = fsub double %43, %21 ; [#uses=1] + %50 = fsub double %41, %23 ; [#uses=1] + %51 = fmul double %49, %50 ; [#uses=1] + %52 = fsub double %48, %51 ; [#uses=1] + %53 = fcmp ogt double %52, 0.000000e+00 ; [#uses=1] + br i1 %53, label %bb1.i1, label %bb3.i + +bb3.i: ; preds = %bb2.i + %54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32 ; [#uses=1] + %55 = xor i32 %54, 32 ; [#uses=3] + %56 = inttoptr i32 %55 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %57 = getelementptr %struct.edge_rec* %56, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %58 = load %struct.VERTEX** %57, align 4 ; <%struct.VERTEX*> [#uses=2] + %59 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 0 ; [#uses=1] + %60 = load double* %59, align 4 ; [#uses=1] + %61 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 1 ; [#uses=1] + %62 = load double* %61, align 4 ; [#uses=1] + %63 = fsub double %60, %39 ; [#uses=1] + %64 = fsub double %23, %41 ; [#uses=1] + %65 = fmul double %63, %64 ; [#uses=1] + %66 = fsub double %21, %39 ; [#uses=1] + %67 = fsub double %62, %41 ; [#uses=1] + %68 = fmul double %66, %67 ; [#uses=1] + %69 = fsub double %65, %68 ; [#uses=1] + %70 = fcmp ogt double %69, 0.000000e+00 ; [#uses=1] + br i1 %70, label %bb4.i, label %bb5.i + +bb4.i: ; preds = %bb3.i + %71 = getelementptr %struct.edge_rec* %56, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %72 = load %struct.edge_rec** %71, align 4 ; <%struct.edge_rec*> [#uses=1] + br label %bb.i + +bb5.i: ; preds = %bb3.i + %73 = add i32 %55, 48 ; [#uses=1] + %74 = and i32 %73, 63 ; [#uses=1] + %75 = and i32 %55, -64 ; [#uses=1] + %76 = or i32 %74, %75 ; [#uses=1] + %77 = inttoptr i32 %76 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %78 = getelementptr %struct.edge_rec* %77, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %79 = load %struct.edge_rec** %78, align 4 ; <%struct.edge_rec*> [#uses=1] + %80 = ptrtoint %struct.edge_rec* %79 to i32 ; [#uses=2] + %81 = add i32 %80, 16 ; [#uses=1] + %82 = and i32 %81, 63 ; [#uses=1] + %83 = and i32 %80, -64 ; [#uses=1] + %84 = or i32 %82, %83 ; [#uses=1] + %85 = inttoptr i32 %84 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %87 = load %struct.VERTEX** %86, align 4 ; <%struct.VERTEX*> [#uses=1] + %88 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=6] + %89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4] + store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4 + %90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=2] + store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4 + %91 = ptrtoint %struct.edge_rec* %88 to i32 ; [#uses=5] + %92 = add i32 %91, 16 ; [#uses=2] + %93 = inttoptr i32 %92 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %94 = add i32 %91, 48 ; [#uses=1] + %95 = inttoptr i32 %94 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %96 = getelementptr %struct.edge_rec* %93, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4 + %97 = add i32 %91, 32 ; [#uses=1] + %98 = inttoptr i32 %97 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %99 = getelementptr %struct.edge_rec* %98, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4 + %100 = getelementptr %struct.edge_rec* %98, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4 + %101 = getelementptr %struct.edge_rec* %95, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4 + %102 = load %struct.edge_rec** %89, align 4 ; <%struct.edge_rec*> [#uses=1] + %103 = ptrtoint %struct.edge_rec* %102 to i32 ; [#uses=2] + %104 = add i32 %103, 16 ; [#uses=1] + %105 = and i32 %104, 63 ; [#uses=1] + %106 = and i32 %103, -64 ; [#uses=1] + %107 = or i32 %105, %106 ; [#uses=1] + %108 = inttoptr i32 %107 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %109 = getelementptr %struct.edge_rec* %85, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %110 = load %struct.edge_rec** %109, align 4 ; <%struct.edge_rec*> [#uses=1] + %111 = ptrtoint %struct.edge_rec* %110 to i32 ; [#uses=2] + %112 = add i32 %111, 16 ; [#uses=1] + %113 = and i32 %112, 63 ; [#uses=1] + %114 = and i32 %111, -64 ; [#uses=1] + %115 = or i32 %113, %114 ; [#uses=1] + %116 = inttoptr i32 %115 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %117 = getelementptr %struct.edge_rec* %116, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %118 = load %struct.edge_rec** %117, align 4 ; <%struct.edge_rec*> [#uses=1] + %119 = getelementptr %struct.edge_rec* %108, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %120 = load %struct.edge_rec** %119, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4 + store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4 + %121 = load %struct.edge_rec** %89, align 4 ; <%struct.edge_rec*> [#uses=1] + %122 = load %struct.edge_rec** %109, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4 + store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4 + %123 = xor i32 %91, 32 ; [#uses=1] + %124 = inttoptr i32 %123 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %125 = getelementptr %struct.edge_rec* %124, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %126 = load %struct.edge_rec** %125, align 4 ; <%struct.edge_rec*> [#uses=1] + %127 = ptrtoint %struct.edge_rec* %126 to i32 ; [#uses=2] + %128 = add i32 %127, 16 ; [#uses=1] + %129 = and i32 %128, 63 ; [#uses=1] + %130 = and i32 %127, -64 ; [#uses=1] + %131 = or i32 %129, %130 ; [#uses=1] + %132 = inttoptr i32 %131 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %133 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %134 = load %struct.edge_rec** %133, align 4 ; <%struct.edge_rec*> [#uses=1] + %135 = ptrtoint %struct.edge_rec* %134 to i32 ; [#uses=2] + %136 = add i32 %135, 16 ; [#uses=1] + %137 = and i32 %136, 63 ; [#uses=1] + %138 = and i32 %135, -64 ; [#uses=1] + %139 = or i32 %137, %138 ; [#uses=1] + %140 = inttoptr i32 %139 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %141 = getelementptr %struct.edge_rec* %140, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %142 = load %struct.edge_rec** %141, align 4 ; <%struct.edge_rec*> [#uses=1] + %143 = getelementptr %struct.edge_rec* %132, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %144 = load %struct.edge_rec** %143, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4 + store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4 + %145 = load %struct.edge_rec** %125, align 4 ; <%struct.edge_rec*> [#uses=1] + %146 = load %struct.edge_rec** %133, align 4 ; <%struct.edge_rec*> [#uses=2] + store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4 + store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4 + %147 = and i32 %92, 63 ; [#uses=1] + %148 = and i32 %91, -64 ; [#uses=1] + %149 = or i32 %147, %148 ; [#uses=1] + %150 = inttoptr i32 %149 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %151 = getelementptr %struct.edge_rec* %150, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %152 = load %struct.edge_rec** %151, align 4 ; <%struct.edge_rec*> [#uses=1] + %153 = ptrtoint %struct.edge_rec* %152 to i32 ; [#uses=2] + %154 = add i32 %153, 16 ; [#uses=1] + %155 = and i32 %154, 63 ; [#uses=1] + %156 = and i32 %153, -64 ; [#uses=1] + %157 = or i32 %155, %156 ; [#uses=1] + %158 = inttoptr i32 %157 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %159 = load %struct.VERTEX** %90, align 4 ; <%struct.VERTEX*> [#uses=1] + %160 = getelementptr %struct.edge_rec* %124, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %161 = load %struct.VERTEX** %160, align 4 ; <%struct.VERTEX*> [#uses=1] + %162 = getelementptr %struct.edge_rec* %16, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %163 = load %struct.VERTEX** %162, align 4 ; <%struct.VERTEX*> [#uses=1] + %164 = icmp eq %struct.VERTEX* %163, %159 ; [#uses=1] + %rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16 ; <%struct.edge_rec*> [#uses=3] + %165 = getelementptr %struct.edge_rec* %10, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %166 = load %struct.VERTEX** %165, align 4 ; <%struct.VERTEX*> [#uses=1] + %167 = icmp eq %struct.VERTEX* %166, %161 ; [#uses=1] + %ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10 ; <%struct.edge_rec*> [#uses=3] + br label %bb9.i + +bb9.i: ; preds = %bb25.i, %bb24.i, %bb5.i + %lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ] ; <%struct.edge_rec*> [#uses=5] + %rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ] ; <%struct.edge_rec*> [#uses=5] + %basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ] ; <%struct.edge_rec*> [#uses=2] + %168 = getelementptr %struct.edge_rec* %lcand.2.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %169 = load %struct.edge_rec** %168, align 4 ; <%struct.edge_rec*> [#uses=3] + %170 = getelementptr %struct.edge_rec* %basel.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3] + %171 = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=4] + %172 = ptrtoint %struct.edge_rec* %basel.0.i to i32 ; [#uses=3] + %173 = xor i32 %172, 32 ; [#uses=1] + %174 = inttoptr i32 %173 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %175 = getelementptr %struct.edge_rec* %174, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3] + %176 = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=3] + %177 = ptrtoint %struct.edge_rec* %169 to i32 ; [#uses=1] + %178 = xor i32 %177, 32 ; [#uses=1] + %179 = inttoptr i32 %178 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %180 = getelementptr %struct.edge_rec* %179, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %181 = load %struct.VERTEX** %180, align 4 ; <%struct.VERTEX*> [#uses=2] + %182 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 0 ; [#uses=2] + %183 = load double* %182, align 4 ; [#uses=2] + %184 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 1 ; [#uses=2] + %185 = load double* %184, align 4 ; [#uses=2] + %186 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 0 ; [#uses=1] + %187 = load double* %186, align 4 ; [#uses=1] + %188 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 1 ; [#uses=1] + %189 = load double* %188, align 4 ; [#uses=1] + %190 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 0 ; [#uses=1] + %191 = load double* %190, align 4 ; [#uses=2] + %192 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 1 ; [#uses=1] + %193 = load double* %192, align 4 ; [#uses=2] + %194 = fsub double %183, %191 ; [#uses=1] + %195 = fsub double %189, %193 ; [#uses=1] + %196 = fmul double %194, %195 ; [#uses=1] + %197 = fsub double %187, %191 ; [#uses=1] + %198 = fsub double %185, %193 ; [#uses=1] + %199 = fmul double %197, %198 ; [#uses=1] + %200 = fsub double %196, %199 ; [#uses=1] + %201 = fcmp ogt double %200, 0.000000e+00 ; [#uses=1] + br i1 %201, label %bb10.i, label %bb13.i + +bb10.i: ; preds = %bb9.i + %202 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 2 ; [#uses=1] + %avail_edge.promoted25 = load %struct.edge_rec** @avail_edge ; <%struct.edge_rec*> [#uses=1] + br label %bb12.i + +bb11.i: ; preds = %bb12.i + %203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32 ; [#uses=3] + %204 = add i32 %203, 16 ; [#uses=1] + %205 = and i32 %204, 63 ; [#uses=1] + %206 = and i32 %203, -64 ; [#uses=3] + %207 = or i32 %205, %206 ; [#uses=1] + %208 = inttoptr i32 %207 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %209 = getelementptr %struct.edge_rec* %208, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %210 = load %struct.edge_rec** %209, align 4 ; <%struct.edge_rec*> [#uses=1] + %211 = ptrtoint %struct.edge_rec* %210 to i32 ; [#uses=2] + %212 = add i32 %211, 16 ; [#uses=1] + %213 = and i32 %212, 63 ; [#uses=1] + %214 = and i32 %211, -64 ; [#uses=1] + %215 = or i32 %213, %214 ; [#uses=1] + %216 = inttoptr i32 %215 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %217 = getelementptr %struct.edge_rec* %lcand.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %218 = load %struct.edge_rec** %217, align 4 ; <%struct.edge_rec*> [#uses=1] + %219 = ptrtoint %struct.edge_rec* %218 to i32 ; [#uses=2] + %220 = add i32 %219, 16 ; [#uses=1] + %221 = and i32 %220, 63 ; [#uses=1] + %222 = and i32 %219, -64 ; [#uses=1] + %223 = or i32 %221, %222 ; [#uses=1] + %224 = inttoptr i32 %223 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %225 = getelementptr %struct.edge_rec* %216, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %226 = load %struct.edge_rec** %225, align 4 ; <%struct.edge_rec*> [#uses=1] + %227 = ptrtoint %struct.edge_rec* %226 to i32 ; [#uses=2] + %228 = add i32 %227, 16 ; [#uses=1] + %229 = and i32 %228, 63 ; [#uses=1] + %230 = and i32 %227, -64 ; [#uses=1] + %231 = or i32 %229, %230 ; [#uses=1] + %232 = inttoptr i32 %231 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %233 = getelementptr %struct.edge_rec* %232, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %234 = load %struct.edge_rec** %233, align 4 ; <%struct.edge_rec*> [#uses=1] + %235 = getelementptr %struct.edge_rec* %224, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %236 = load %struct.edge_rec** %235, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4 + store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4 + %237 = load %struct.edge_rec** %217, align 4 ; <%struct.edge_rec*> [#uses=1] + %238 = load %struct.edge_rec** %225, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4 + store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4 + %239 = xor i32 %203, 32 ; [#uses=2] + %240 = add i32 %239, 16 ; [#uses=1] + %241 = and i32 %240, 63 ; [#uses=1] + %242 = or i32 %241, %206 ; [#uses=1] + %243 = inttoptr i32 %242 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %244 = getelementptr %struct.edge_rec* %243, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %245 = load %struct.edge_rec** %244, align 4 ; <%struct.edge_rec*> [#uses=1] + %246 = ptrtoint %struct.edge_rec* %245 to i32 ; [#uses=2] + %247 = add i32 %246, 16 ; [#uses=1] + %248 = and i32 %247, 63 ; [#uses=1] + %249 = and i32 %246, -64 ; [#uses=1] + %250 = or i32 %248, %249 ; [#uses=1] + %251 = inttoptr i32 %250 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %252 = inttoptr i32 %239 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %253 = getelementptr %struct.edge_rec* %252, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %254 = load %struct.edge_rec** %253, align 4 ; <%struct.edge_rec*> [#uses=1] + %255 = ptrtoint %struct.edge_rec* %254 to i32 ; [#uses=2] + %256 = add i32 %255, 16 ; [#uses=1] + %257 = and i32 %256, 63 ; [#uses=1] + %258 = and i32 %255, -64 ; [#uses=1] + %259 = or i32 %257, %258 ; [#uses=1] + %260 = inttoptr i32 %259 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %261 = getelementptr %struct.edge_rec* %251, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %262 = load %struct.edge_rec** %261, align 4 ; <%struct.edge_rec*> [#uses=1] + %263 = ptrtoint %struct.edge_rec* %262 to i32 ; [#uses=2] + %264 = add i32 %263, 16 ; [#uses=1] + %265 = and i32 %264, 63 ; [#uses=1] + %266 = and i32 %263, -64 ; [#uses=1] + %267 = or i32 %265, %266 ; [#uses=1] + %268 = inttoptr i32 %267 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %269 = getelementptr %struct.edge_rec* %268, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %270 = load %struct.edge_rec** %269, align 4 ; <%struct.edge_rec*> [#uses=1] + %271 = getelementptr %struct.edge_rec* %260, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %272 = load %struct.edge_rec** %271, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4 + store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4 + %273 = load %struct.edge_rec** %253, align 4 ; <%struct.edge_rec*> [#uses=1] + %274 = load %struct.edge_rec** %261, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4 + store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4 + %275 = inttoptr i32 %206 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %276 = getelementptr %struct.edge_rec* %275, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4 + %277 = getelementptr %struct.edge_rec* %t.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %278 = load %struct.edge_rec** %277, align 4 ; <%struct.edge_rec*> [#uses=2] + %.pre.i = load double* %182, align 4 ; [#uses=1] + %.pre22.i = load double* %184, align 4 ; [#uses=1] + br label %bb12.i + +bb12.i: ; preds = %bb11.i, %bb10.i + %avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ] ; <%struct.edge_rec*> [#uses=2] + %279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ] ; [#uses=3] + %280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ] ; [#uses=3] + %lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] ; <%struct.edge_rec*> [#uses=3] + %t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ] ; <%struct.edge_rec*> [#uses=4] + %.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] ; <%struct.edge_rec*> [#uses=1] + %.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ] ; <%struct.edge_rec*> [#uses=1] + %lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ] ; <%struct.edge_rec*> [#uses=1] + %.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32 ; [#uses=1] + %.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32 ; [#uses=1] + %.pn5.in.i = xor i32 %.pn5.in.in.i, 32 ; [#uses=1] + %.pn4.in.i = xor i32 %.pn4.in.in.i, 32 ; [#uses=1] + %.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %v1.0.in.i = getelementptr %struct.edge_rec* %.pn5.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %v2.0.in.i = getelementptr %struct.edge_rec* %.pn4.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %v3.0.in.i = getelementptr %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %v1.0.i = load %struct.VERTEX** %v1.0.in.i ; <%struct.VERTEX*> [#uses=3] + %v2.0.i = load %struct.VERTEX** %v2.0.in.i ; <%struct.VERTEX*> [#uses=3] + %v3.0.i = load %struct.VERTEX** %v3.0.in.i ; <%struct.VERTEX*> [#uses=3] + %281 = load double* %202, align 4 ; [#uses=3] + %282 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0 ; [#uses=1] + %283 = load double* %282, align 4 ; [#uses=1] + %284 = fsub double %283, %280 ; [#uses=2] + %285 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1 ; [#uses=1] + %286 = load double* %285, align 4 ; [#uses=1] + %287 = fsub double %286, %279 ; [#uses=2] + %288 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2 ; [#uses=1] + %289 = load double* %288, align 4 ; [#uses=1] + %290 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0 ; [#uses=1] + %291 = load double* %290, align 4 ; [#uses=1] + %292 = fsub double %291, %280 ; [#uses=2] + %293 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1 ; [#uses=1] + %294 = load double* %293, align 4 ; [#uses=1] + %295 = fsub double %294, %279 ; [#uses=2] + %296 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2 ; [#uses=1] + %297 = load double* %296, align 4 ; [#uses=1] + %298 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0 ; [#uses=1] + %299 = load double* %298, align 4 ; [#uses=1] + %300 = fsub double %299, %280 ; [#uses=2] + %301 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1 ; [#uses=1] + %302 = load double* %301, align 4 ; [#uses=1] + %303 = fsub double %302, %279 ; [#uses=2] + %304 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2 ; [#uses=1] + %305 = load double* %304, align 4 ; [#uses=1] + %306 = fsub double %289, %281 ; [#uses=1] + %307 = fmul double %292, %303 ; [#uses=1] + %308 = fmul double %295, %300 ; [#uses=1] + %309 = fsub double %307, %308 ; [#uses=1] + %310 = fmul double %306, %309 ; [#uses=1] + %311 = fsub double %297, %281 ; [#uses=1] + %312 = fmul double %300, %287 ; [#uses=1] + %313 = fmul double %303, %284 ; [#uses=1] + %314 = fsub double %312, %313 ; [#uses=1] + %315 = fmul double %311, %314 ; [#uses=1] + %316 = fadd double %315, %310 ; [#uses=1] + %317 = fsub double %305, %281 ; [#uses=1] + %318 = fmul double %284, %295 ; [#uses=1] + %319 = fmul double %287, %292 ; [#uses=1] + %320 = fsub double %318, %319 ; [#uses=1] + %321 = fmul double %317, %320 ; [#uses=1] + %322 = fadd double %321, %316 ; [#uses=1] + %323 = fcmp ogt double %322, 0.000000e+00 ; [#uses=1] + br i1 %323, label %bb11.i, label %bb13.loopexit.i + +bb13.loopexit.i: ; preds = %bb12.i + store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge + %.pre23.i = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=1] + %.pre24.i = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=1] + br label %bb13.i + +bb13.i: ; preds = %bb13.loopexit.i, %bb9.i + %324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ] ; <%struct.VERTEX*> [#uses=4] + %325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ] ; <%struct.VERTEX*> [#uses=3] + %lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ] ; <%struct.edge_rec*> [#uses=3] + %326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32 ; [#uses=2] + %327 = add i32 %326, 16 ; [#uses=1] + %328 = and i32 %327, 63 ; [#uses=1] + %329 = and i32 %326, -64 ; [#uses=1] + %330 = or i32 %328, %329 ; [#uses=1] + %331 = inttoptr i32 %330 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %332 = getelementptr %struct.edge_rec* %331, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %333 = load %struct.edge_rec** %332, align 4 ; <%struct.edge_rec*> [#uses=1] + %334 = ptrtoint %struct.edge_rec* %333 to i32 ; [#uses=2] + %335 = add i32 %334, 16 ; [#uses=1] + %336 = and i32 %335, 63 ; [#uses=1] + %337 = and i32 %334, -64 ; [#uses=1] + %338 = or i32 %336, %337 ; [#uses=3] + %339 = xor i32 %338, 32 ; [#uses=1] + %340 = inttoptr i32 %339 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %341 = getelementptr %struct.edge_rec* %340, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %342 = load %struct.VERTEX** %341, align 4 ; <%struct.VERTEX*> [#uses=2] + %343 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 0 ; [#uses=1] + %344 = load double* %343, align 4 ; [#uses=1] + %345 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 1 ; [#uses=1] + %346 = load double* %345, align 4 ; [#uses=1] + %347 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 0 ; [#uses=1] + %348 = load double* %347, align 4 ; [#uses=1] + %349 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 1 ; [#uses=1] + %350 = load double* %349, align 4 ; [#uses=1] + %351 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 0 ; [#uses=2] + %352 = load double* %351, align 4 ; [#uses=3] + %353 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 1 ; [#uses=2] + %354 = load double* %353, align 4 ; [#uses=3] + %355 = fsub double %344, %352 ; [#uses=1] + %356 = fsub double %350, %354 ; [#uses=1] + %357 = fmul double %355, %356 ; [#uses=1] + %358 = fsub double %348, %352 ; [#uses=1] + %359 = fsub double %346, %354 ; [#uses=1] + %360 = fmul double %358, %359 ; [#uses=1] + %361 = fsub double %357, %360 ; [#uses=1] + %362 = fcmp ogt double %361, 0.000000e+00 ; [#uses=1] + br i1 %362, label %bb14.i, label %bb17.i + +bb14.i: ; preds = %bb13.i + %363 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 2 ; [#uses=1] + %avail_edge.promoted = load %struct.edge_rec** @avail_edge ; <%struct.edge_rec*> [#uses=1] + br label %bb16.i + +bb15.i: ; preds = %bb16.i + %364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32 ; [#uses=3] + %365 = add i32 %364, 16 ; [#uses=1] + %366 = and i32 %365, 63 ; [#uses=1] + %367 = and i32 %364, -64 ; [#uses=3] + %368 = or i32 %366, %367 ; [#uses=1] + %369 = inttoptr i32 %368 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %370 = getelementptr %struct.edge_rec* %369, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %371 = load %struct.edge_rec** %370, align 4 ; <%struct.edge_rec*> [#uses=1] + %372 = ptrtoint %struct.edge_rec* %371 to i32 ; [#uses=2] + %373 = add i32 %372, 16 ; [#uses=1] + %374 = and i32 %373, 63 ; [#uses=1] + %375 = and i32 %372, -64 ; [#uses=1] + %376 = or i32 %374, %375 ; [#uses=1] + %377 = inttoptr i32 %376 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %378 = getelementptr %struct.edge_rec* %rcand.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %379 = load %struct.edge_rec** %378, align 4 ; <%struct.edge_rec*> [#uses=1] + %380 = ptrtoint %struct.edge_rec* %379 to i32 ; [#uses=2] + %381 = add i32 %380, 16 ; [#uses=1] + %382 = and i32 %381, 63 ; [#uses=1] + %383 = and i32 %380, -64 ; [#uses=1] + %384 = or i32 %382, %383 ; [#uses=1] + %385 = inttoptr i32 %384 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %386 = getelementptr %struct.edge_rec* %377, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %387 = load %struct.edge_rec** %386, align 4 ; <%struct.edge_rec*> [#uses=1] + %388 = ptrtoint %struct.edge_rec* %387 to i32 ; [#uses=2] + %389 = add i32 %388, 16 ; [#uses=1] + %390 = and i32 %389, 63 ; [#uses=1] + %391 = and i32 %388, -64 ; [#uses=1] + %392 = or i32 %390, %391 ; [#uses=1] + %393 = inttoptr i32 %392 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %394 = getelementptr %struct.edge_rec* %393, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %395 = load %struct.edge_rec** %394, align 4 ; <%struct.edge_rec*> [#uses=1] + %396 = getelementptr %struct.edge_rec* %385, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %397 = load %struct.edge_rec** %396, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4 + store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4 + %398 = load %struct.edge_rec** %378, align 4 ; <%struct.edge_rec*> [#uses=1] + %399 = load %struct.edge_rec** %386, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4 + store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4 + %400 = xor i32 %364, 32 ; [#uses=2] + %401 = add i32 %400, 16 ; [#uses=1] + %402 = and i32 %401, 63 ; [#uses=1] + %403 = or i32 %402, %367 ; [#uses=1] + %404 = inttoptr i32 %403 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %405 = getelementptr %struct.edge_rec* %404, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %406 = load %struct.edge_rec** %405, align 4 ; <%struct.edge_rec*> [#uses=1] + %407 = ptrtoint %struct.edge_rec* %406 to i32 ; [#uses=2] + %408 = add i32 %407, 16 ; [#uses=1] + %409 = and i32 %408, 63 ; [#uses=1] + %410 = and i32 %407, -64 ; [#uses=1] + %411 = or i32 %409, %410 ; [#uses=1] + %412 = inttoptr i32 %411 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %413 = inttoptr i32 %400 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %414 = getelementptr %struct.edge_rec* %413, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %415 = load %struct.edge_rec** %414, align 4 ; <%struct.edge_rec*> [#uses=1] + %416 = ptrtoint %struct.edge_rec* %415 to i32 ; [#uses=2] + %417 = add i32 %416, 16 ; [#uses=1] + %418 = and i32 %417, 63 ; [#uses=1] + %419 = and i32 %416, -64 ; [#uses=1] + %420 = or i32 %418, %419 ; [#uses=1] + %421 = inttoptr i32 %420 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %422 = getelementptr %struct.edge_rec* %412, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %423 = load %struct.edge_rec** %422, align 4 ; <%struct.edge_rec*> [#uses=1] + %424 = ptrtoint %struct.edge_rec* %423 to i32 ; [#uses=2] + %425 = add i32 %424, 16 ; [#uses=1] + %426 = and i32 %425, 63 ; [#uses=1] + %427 = and i32 %424, -64 ; [#uses=1] + %428 = or i32 %426, %427 ; [#uses=1] + %429 = inttoptr i32 %428 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %430 = getelementptr %struct.edge_rec* %429, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %431 = load %struct.edge_rec** %430, align 4 ; <%struct.edge_rec*> [#uses=1] + %432 = getelementptr %struct.edge_rec* %421, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %433 = load %struct.edge_rec** %432, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4 + store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4 + %434 = load %struct.edge_rec** %414, align 4 ; <%struct.edge_rec*> [#uses=1] + %435 = load %struct.edge_rec** %422, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4 + store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4 + %436 = inttoptr i32 %367 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %437 = getelementptr %struct.edge_rec* %436, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4 + %438 = add i32 %t.1.in.i, 16 ; [#uses=1] + %439 = and i32 %438, 63 ; [#uses=1] + %440 = and i32 %t.1.in.i, -64 ; [#uses=1] + %441 = or i32 %439, %440 ; [#uses=1] + %442 = inttoptr i32 %441 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %443 = getelementptr %struct.edge_rec* %442, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %444 = load %struct.edge_rec** %443, align 4 ; <%struct.edge_rec*> [#uses=1] + %445 = ptrtoint %struct.edge_rec* %444 to i32 ; [#uses=2] + %446 = add i32 %445, 16 ; [#uses=1] + %447 = and i32 %446, 63 ; [#uses=1] + %448 = and i32 %445, -64 ; [#uses=1] + %449 = or i32 %447, %448 ; [#uses=2] + %.pre25.i = load double* %351, align 4 ; [#uses=1] + %.pre26.i = load double* %353, align 4 ; [#uses=1] + br label %bb16.i + +bb16.i: ; preds = %bb15.i, %bb14.i + %avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ] ; <%struct.edge_rec*> [#uses=2] + %450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ] ; [#uses=3] + %451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ] ; [#uses=3] + %rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] ; <%struct.edge_rec*> [#uses=3] + %t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ] ; [#uses=3] + %.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ] ; [#uses=1] + %.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] ; <%struct.edge_rec*> [#uses=1] + %rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ] ; <%struct.edge_rec*> [#uses=1] + %t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32 ; [#uses=1] + %.pn3.in.i = xor i32 %.pn3.in.in.i, 32 ; [#uses=1] + %.pn.in.i = xor i32 %.pn.in.in.i, 32 ; [#uses=1] + %.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %v1.1.in.i = getelementptr %struct.edge_rec* %.pn3.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %v2.1.in.i = getelementptr %struct.edge_rec* %.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %v3.1.in.i = getelementptr %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %v1.1.i = load %struct.VERTEX** %v1.1.in.i ; <%struct.VERTEX*> [#uses=3] + %v2.1.i = load %struct.VERTEX** %v2.1.in.i ; <%struct.VERTEX*> [#uses=3] + %v3.1.i = load %struct.VERTEX** %v3.1.in.i ; <%struct.VERTEX*> [#uses=3] + %452 = load double* %363, align 4 ; [#uses=3] + %453 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0 ; [#uses=1] + %454 = load double* %453, align 4 ; [#uses=1] + %455 = fsub double %454, %451 ; [#uses=2] + %456 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1 ; [#uses=1] + %457 = load double* %456, align 4 ; [#uses=1] + %458 = fsub double %457, %450 ; [#uses=2] + %459 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2 ; [#uses=1] + %460 = load double* %459, align 4 ; [#uses=1] + %461 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0 ; [#uses=1] + %462 = load double* %461, align 4 ; [#uses=1] + %463 = fsub double %462, %451 ; [#uses=2] + %464 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1 ; [#uses=1] + %465 = load double* %464, align 4 ; [#uses=1] + %466 = fsub double %465, %450 ; [#uses=2] + %467 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2 ; [#uses=1] + %468 = load double* %467, align 4 ; [#uses=1] + %469 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0 ; [#uses=1] + %470 = load double* %469, align 4 ; [#uses=1] + %471 = fsub double %470, %451 ; [#uses=2] + %472 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1 ; [#uses=1] + %473 = load double* %472, align 4 ; [#uses=1] + %474 = fsub double %473, %450 ; [#uses=2] + %475 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2 ; [#uses=1] + %476 = load double* %475, align 4 ; [#uses=1] + %477 = fsub double %460, %452 ; [#uses=1] + %478 = fmul double %463, %474 ; [#uses=1] + %479 = fmul double %466, %471 ; [#uses=1] + %480 = fsub double %478, %479 ; [#uses=1] + %481 = fmul double %477, %480 ; [#uses=1] + %482 = fsub double %468, %452 ; [#uses=1] + %483 = fmul double %471, %458 ; [#uses=1] + %484 = fmul double %474, %455 ; [#uses=1] + %485 = fsub double %483, %484 ; [#uses=1] + %486 = fmul double %482, %485 ; [#uses=1] + %487 = fadd double %486, %481 ; [#uses=1] + %488 = fsub double %476, %452 ; [#uses=1] + %489 = fmul double %455, %466 ; [#uses=1] + %490 = fmul double %458, %463 ; [#uses=1] + %491 = fsub double %489, %490 ; [#uses=1] + %492 = fmul double %488, %491 ; [#uses=1] + %493 = fadd double %492, %487 ; [#uses=1] + %494 = fcmp ogt double %493, 0.000000e+00 ; [#uses=1] + br i1 %494, label %bb15.i, label %bb17.loopexit.i + +bb17.loopexit.i: ; preds = %bb16.i + store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge + %.pre27.i = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=1] + %.pre28.i = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=1] + br label %bb17.i + +bb17.i: ; preds = %bb17.loopexit.i, %bb13.i + %495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ] ; <%struct.VERTEX*> [#uses=3] + %496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ] ; <%struct.VERTEX*> [#uses=3] + %rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ] ; <%struct.edge_rec*> [#uses=3] + %497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32 ; [#uses=1] + %498 = xor i32 %497, 32 ; [#uses=1] + %499 = inttoptr i32 %498 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %500 = getelementptr %struct.edge_rec* %499, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %501 = load %struct.VERTEX** %500, align 4 ; <%struct.VERTEX*> [#uses=4] + %502 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 0 ; [#uses=1] + %503 = load double* %502, align 4 ; [#uses=1] + %504 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 1 ; [#uses=1] + %505 = load double* %504, align 4 ; [#uses=1] + %506 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 0 ; [#uses=1] + %507 = load double* %506, align 4 ; [#uses=2] + %508 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 1 ; [#uses=1] + %509 = load double* %508, align 4 ; [#uses=2] + %510 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 0 ; [#uses=1] + %511 = load double* %510, align 4 ; [#uses=3] + %512 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 1 ; [#uses=1] + %513 = load double* %512, align 4 ; [#uses=3] + %514 = fsub double %503, %511 ; [#uses=2] + %515 = fsub double %509, %513 ; [#uses=1] + %516 = fmul double %514, %515 ; [#uses=1] + %517 = fsub double %507, %511 ; [#uses=1] + %518 = fsub double %505, %513 ; [#uses=2] + %519 = fmul double %517, %518 ; [#uses=1] + %520 = fsub double %516, %519 ; [#uses=1] + %521 = fcmp ogt double %520, 0.000000e+00 ; [#uses=2] + %522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32 ; [#uses=3] + %523 = xor i32 %522, 32 ; [#uses=1] + %524 = inttoptr i32 %523 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %525 = getelementptr %struct.edge_rec* %524, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %526 = load %struct.VERTEX** %525, align 4 ; <%struct.VERTEX*> [#uses=4] + %527 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 0 ; [#uses=1] + %528 = load double* %527, align 4 ; [#uses=4] + %529 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 1 ; [#uses=1] + %530 = load double* %529, align 4 ; [#uses=4] + %531 = fsub double %530, %513 ; [#uses=1] + %532 = fmul double %514, %531 ; [#uses=1] + %533 = fsub double %528, %511 ; [#uses=1] + %534 = fmul double %533, %518 ; [#uses=1] + %535 = fsub double %532, %534 ; [#uses=1] + %536 = fcmp ogt double %535, 0.000000e+00 ; [#uses=2] + %537 = or i1 %536, %521 ; [#uses=1] + br i1 %537, label %bb21.i, label %do_merge.exit + +bb21.i: ; preds = %bb17.i + %538 = getelementptr %struct.edge_rec* %lcand.1.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %539 = load %struct.VERTEX** %538, align 4 ; <%struct.VERTEX*> [#uses=3] + %540 = getelementptr %struct.edge_rec* %rcand.1.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %541 = load %struct.VERTEX** %540, align 4 ; <%struct.VERTEX*> [#uses=3] + br i1 %521, label %bb22.i, label %bb24.i + +bb22.i: ; preds = %bb21.i + br i1 %536, label %bb23.i, label %bb25.i + +bb23.i: ; preds = %bb22.i + %542 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 2 ; [#uses=1] + %543 = load double* %542, align 4 ; [#uses=3] + %544 = fsub double %507, %528 ; [#uses=2] + %545 = fsub double %509, %530 ; [#uses=2] + %546 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 2 ; [#uses=1] + %547 = load double* %546, align 4 ; [#uses=1] + %548 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 0 ; [#uses=1] + %549 = load double* %548, align 4 ; [#uses=1] + %550 = fsub double %549, %528 ; [#uses=2] + %551 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 1 ; [#uses=1] + %552 = load double* %551, align 4 ; [#uses=1] + %553 = fsub double %552, %530 ; [#uses=2] + %554 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 2 ; [#uses=1] + %555 = load double* %554, align 4 ; [#uses=1] + %556 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 0 ; [#uses=1] + %557 = load double* %556, align 4 ; [#uses=1] + %558 = fsub double %557, %528 ; [#uses=2] + %559 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 1 ; [#uses=1] + %560 = load double* %559, align 4 ; [#uses=1] + %561 = fsub double %560, %530 ; [#uses=2] + %562 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 2 ; [#uses=1] + %563 = load double* %562, align 4 ; [#uses=1] + %564 = fsub double %547, %543 ; [#uses=1] + %565 = fmul double %550, %561 ; [#uses=1] + %566 = fmul double %553, %558 ; [#uses=1] + %567 = fsub double %565, %566 ; [#uses=1] + %568 = fmul double %564, %567 ; [#uses=1] + %569 = fsub double %555, %543 ; [#uses=1] + %570 = fmul double %558, %545 ; [#uses=1] + %571 = fmul double %561, %544 ; [#uses=1] + %572 = fsub double %570, %571 ; [#uses=1] + %573 = fmul double %569, %572 ; [#uses=1] + %574 = fadd double %573, %568 ; [#uses=1] + %575 = fsub double %563, %543 ; [#uses=1] + %576 = fmul double %544, %553 ; [#uses=1] + %577 = fmul double %545, %550 ; [#uses=1] + %578 = fsub double %576, %577 ; [#uses=1] + %579 = fmul double %575, %578 ; [#uses=1] + %580 = fadd double %579, %574 ; [#uses=1] + %581 = fcmp ogt double %580, 0.000000e+00 ; [#uses=1] + br i1 %581, label %bb24.i, label %bb25.i + +bb24.i: ; preds = %bb23.i, %bb21.i + %582 = add i32 %522, 48 ; [#uses=1] + %583 = and i32 %582, 63 ; [#uses=1] + %584 = and i32 %522, -64 ; [#uses=1] + %585 = or i32 %583, %584 ; [#uses=1] + %586 = inttoptr i32 %585 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %587 = getelementptr %struct.edge_rec* %586, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %588 = load %struct.edge_rec** %587, align 4 ; <%struct.edge_rec*> [#uses=1] + %589 = ptrtoint %struct.edge_rec* %588 to i32 ; [#uses=2] + %590 = add i32 %589, 16 ; [#uses=1] + %591 = and i32 %590, 63 ; [#uses=1] + %592 = and i32 %589, -64 ; [#uses=1] + %593 = or i32 %591, %592 ; [#uses=1] + %594 = inttoptr i32 %593 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %595 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5] + %596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4] + store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4 + %597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4 + %598 = ptrtoint %struct.edge_rec* %595 to i32 ; [#uses=5] + %599 = add i32 %598, 16 ; [#uses=1] + %600 = inttoptr i32 %599 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %601 = add i32 %598, 48 ; [#uses=1] + %602 = inttoptr i32 %601 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %603 = getelementptr %struct.edge_rec* %600, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4 + %604 = add i32 %598, 32 ; [#uses=1] + %605 = inttoptr i32 %604 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %606 = getelementptr %struct.edge_rec* %605, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4 + %607 = getelementptr %struct.edge_rec* %605, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4 + %608 = getelementptr %struct.edge_rec* %602, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4 + %609 = load %struct.edge_rec** %596, align 4 ; <%struct.edge_rec*> [#uses=1] + %610 = ptrtoint %struct.edge_rec* %609 to i32 ; [#uses=2] + %611 = add i32 %610, 16 ; [#uses=1] + %612 = and i32 %611, 63 ; [#uses=1] + %613 = and i32 %610, -64 ; [#uses=1] + %614 = or i32 %612, %613 ; [#uses=1] + %615 = inttoptr i32 %614 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %616 = getelementptr %struct.edge_rec* %594, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %617 = load %struct.edge_rec** %616, align 4 ; <%struct.edge_rec*> [#uses=1] + %618 = ptrtoint %struct.edge_rec* %617 to i32 ; [#uses=2] + %619 = add i32 %618, 16 ; [#uses=1] + %620 = and i32 %619, 63 ; [#uses=1] + %621 = and i32 %618, -64 ; [#uses=1] + %622 = or i32 %620, %621 ; [#uses=1] + %623 = inttoptr i32 %622 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %624 = getelementptr %struct.edge_rec* %623, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %625 = load %struct.edge_rec** %624, align 4 ; <%struct.edge_rec*> [#uses=1] + %626 = getelementptr %struct.edge_rec* %615, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %627 = load %struct.edge_rec** %626, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4 + store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4 + %628 = load %struct.edge_rec** %596, align 4 ; <%struct.edge_rec*> [#uses=1] + %629 = load %struct.edge_rec** %616, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4 + store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4 + %630 = xor i32 %598, 32 ; [#uses=2] + %631 = inttoptr i32 %630 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %632 = getelementptr %struct.edge_rec* %631, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %633 = load %struct.edge_rec** %632, align 4 ; <%struct.edge_rec*> [#uses=1] + %634 = ptrtoint %struct.edge_rec* %633 to i32 ; [#uses=2] + %635 = add i32 %634, 16 ; [#uses=1] + %636 = and i32 %635, 63 ; [#uses=1] + %637 = and i32 %634, -64 ; [#uses=1] + %638 = or i32 %636, %637 ; [#uses=1] + %639 = inttoptr i32 %638 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %640 = getelementptr %struct.edge_rec* %174, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %641 = load %struct.edge_rec** %640, align 4 ; <%struct.edge_rec*> [#uses=1] + %642 = ptrtoint %struct.edge_rec* %641 to i32 ; [#uses=2] + %643 = add i32 %642, 16 ; [#uses=1] + %644 = and i32 %643, 63 ; [#uses=1] + %645 = and i32 %642, -64 ; [#uses=1] + %646 = or i32 %644, %645 ; [#uses=1] + %647 = inttoptr i32 %646 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %648 = getelementptr %struct.edge_rec* %647, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %649 = load %struct.edge_rec** %648, align 4 ; <%struct.edge_rec*> [#uses=1] + %650 = getelementptr %struct.edge_rec* %639, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %651 = load %struct.edge_rec** %650, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4 + store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4 + %652 = load %struct.edge_rec** %632, align 4 ; <%struct.edge_rec*> [#uses=1] + %653 = load %struct.edge_rec** %640, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4 + store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4 + %654 = add i32 %630, 48 ; [#uses=1] + %655 = and i32 %654, 63 ; [#uses=1] + %656 = and i32 %598, -64 ; [#uses=1] + %657 = or i32 %655, %656 ; [#uses=1] + %658 = inttoptr i32 %657 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %659 = getelementptr %struct.edge_rec* %658, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %660 = load %struct.edge_rec** %659, align 4 ; <%struct.edge_rec*> [#uses=1] + %661 = ptrtoint %struct.edge_rec* %660 to i32 ; [#uses=2] + %662 = add i32 %661, 16 ; [#uses=1] + %663 = and i32 %662, 63 ; [#uses=1] + %664 = and i32 %661, -64 ; [#uses=1] + %665 = or i32 %663, %664 ; [#uses=1] + %666 = inttoptr i32 %665 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + br label %bb9.i + +bb25.i: ; preds = %bb23.i, %bb22.i + %667 = add i32 %172, 16 ; [#uses=1] + %668 = and i32 %667, 63 ; [#uses=1] + %669 = and i32 %172, -64 ; [#uses=1] + %670 = or i32 %668, %669 ; [#uses=1] + %671 = inttoptr i32 %670 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %672 = getelementptr %struct.edge_rec* %671, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %673 = load %struct.edge_rec** %672, align 4 ; <%struct.edge_rec*> [#uses=1] + %674 = ptrtoint %struct.edge_rec* %673 to i32 ; [#uses=2] + %675 = add i32 %674, 16 ; [#uses=1] + %676 = and i32 %675, 63 ; [#uses=1] + %677 = and i32 %674, -64 ; [#uses=1] + %678 = or i32 %676, %677 ; [#uses=1] + %679 = inttoptr i32 %678 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %680 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] + %681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=5] + store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4 + %682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4 + %683 = ptrtoint %struct.edge_rec* %680 to i32 ; [#uses=4] + %684 = add i32 %683, 16 ; [#uses=1] + %685 = inttoptr i32 %684 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %686 = add i32 %683, 48 ; [#uses=1] + %687 = inttoptr i32 %686 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %688 = getelementptr %struct.edge_rec* %685, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4 + %689 = add i32 %683, 32 ; [#uses=1] + %690 = inttoptr i32 %689 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %691 = getelementptr %struct.edge_rec* %690, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4 + %692 = getelementptr %struct.edge_rec* %690, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4 + %693 = getelementptr %struct.edge_rec* %687, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4 + %694 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1] + %695 = ptrtoint %struct.edge_rec* %694 to i32 ; [#uses=2] + %696 = add i32 %695, 16 ; [#uses=1] + %697 = and i32 %696, 63 ; [#uses=1] + %698 = and i32 %695, -64 ; [#uses=1] + %699 = or i32 %697, %698 ; [#uses=1] + %700 = inttoptr i32 %699 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %701 = getelementptr %struct.edge_rec* %499, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %702 = load %struct.edge_rec** %701, align 4 ; <%struct.edge_rec*> [#uses=1] + %703 = ptrtoint %struct.edge_rec* %702 to i32 ; [#uses=2] + %704 = add i32 %703, 16 ; [#uses=1] + %705 = and i32 %704, 63 ; [#uses=1] + %706 = and i32 %703, -64 ; [#uses=1] + %707 = or i32 %705, %706 ; [#uses=1] + %708 = inttoptr i32 %707 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %709 = getelementptr %struct.edge_rec* %708, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %710 = load %struct.edge_rec** %709, align 4 ; <%struct.edge_rec*> [#uses=1] + %711 = getelementptr %struct.edge_rec* %700, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %712 = load %struct.edge_rec** %711, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4 + store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4 + %713 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1] + %714 = load %struct.edge_rec** %701, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4 + store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4 + %715 = xor i32 %683, 32 ; [#uses=1] + %716 = inttoptr i32 %715 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %717 = getelementptr %struct.edge_rec* %716, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %718 = load %struct.edge_rec** %717, align 4 ; <%struct.edge_rec*> [#uses=1] + %719 = ptrtoint %struct.edge_rec* %718 to i32 ; [#uses=2] + %720 = add i32 %719, 16 ; [#uses=1] + %721 = and i32 %720, 63 ; [#uses=1] + %722 = and i32 %719, -64 ; [#uses=1] + %723 = or i32 %721, %722 ; [#uses=1] + %724 = inttoptr i32 %723 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %725 = getelementptr %struct.edge_rec* %679, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %726 = load %struct.edge_rec** %725, align 4 ; <%struct.edge_rec*> [#uses=1] + %727 = ptrtoint %struct.edge_rec* %726 to i32 ; [#uses=2] + %728 = add i32 %727, 16 ; [#uses=1] + %729 = and i32 %728, 63 ; [#uses=1] + %730 = and i32 %727, -64 ; [#uses=1] + %731 = or i32 %729, %730 ; [#uses=1] + %732 = inttoptr i32 %731 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %733 = getelementptr %struct.edge_rec* %732, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %734 = load %struct.edge_rec** %733, align 4 ; <%struct.edge_rec*> [#uses=1] + %735 = getelementptr %struct.edge_rec* %724, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %736 = load %struct.edge_rec** %735, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4 + store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4 + %737 = load %struct.edge_rec** %717, align 4 ; <%struct.edge_rec*> [#uses=1] + %738 = load %struct.edge_rec** %725, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4 + store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4 + %739 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1] + br label %bb9.i + +do_merge.exit: ; preds = %bb17.i + %740 = getelementptr %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %741 = load %struct.VERTEX** %740, align 4 ; <%struct.VERTEX*> [#uses=1] + %742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i ; [#uses=1] + br i1 %742, label %bb5.loopexit, label %bb2 + +bb2: ; preds = %bb2, %do_merge.exit + %ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ] ; <%struct.edge_rec*> [#uses=1] + %743 = ptrtoint %struct.edge_rec* %ldo.07 to i32 ; [#uses=1] + %744 = xor i32 %743, 32 ; [#uses=1] + %745 = inttoptr i32 %744 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %746 = getelementptr %struct.edge_rec* %745, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %747 = load %struct.edge_rec** %746, align 4 ; <%struct.edge_rec*> [#uses=3] + %748 = getelementptr %struct.edge_rec* %747, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %749 = load %struct.VERTEX** %748, align 4 ; <%struct.VERTEX*> [#uses=1] + %750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i ; [#uses=1] + br i1 %750, label %bb5.loopexit, label %bb2 + +bb4: ; preds = %bb5.loopexit, %bb4 + %rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ] ; <%struct.edge_rec*> [#uses=1] + %751 = getelementptr %struct.edge_rec* %rdo.05, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %752 = load %struct.edge_rec** %751, align 4 ; <%struct.edge_rec*> [#uses=1] + %753 = ptrtoint %struct.edge_rec* %752 to i32 ; [#uses=1] + %754 = xor i32 %753, 32 ; [#uses=1] + %755 = inttoptr i32 %754 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %756 = getelementptr %struct.edge_rec* %755, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %757 = load %struct.VERTEX** %756, align 4 ; <%struct.VERTEX*> [#uses=1] + %758 = icmp eq %struct.VERTEX* %757, %extra ; [#uses=1] + br i1 %758, label %bb6, label %bb4 + +bb5.loopexit: ; preds = %bb2, %do_merge.exit + %ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ] ; <%struct.edge_rec*> [#uses=1] + %759 = getelementptr %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %760 = load %struct.VERTEX** %759, align 4 ; <%struct.VERTEX*> [#uses=1] + %761 = icmp eq %struct.VERTEX* %760, %extra ; [#uses=1] + br i1 %761, label %bb6, label %bb4 + +bb6: ; preds = %bb5.loopexit, %bb4 + %rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ] ; <%struct.edge_rec*> [#uses=1] + %tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32 ; [#uses=1] + %tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32 ; [#uses=1] + br label %bb15 + +bb7: ; preds = %bb + %762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1] + %763 = load %struct.VERTEX** %762, align 4 ; <%struct.VERTEX*> [#uses=4] + %764 = icmp eq %struct.VERTEX* %763, null ; [#uses=1] + %765 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5] + %766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4] + store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4 + %767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3] + br i1 %764, label %bb10, label %bb11 + +bb8: ; preds = %entry + %768 = call arm_apcscc i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind ; [#uses=0] + call arm_apcscc void @exit(i32 -1) noreturn nounwind + unreachable + +bb10: ; preds = %bb7 + store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4 + %769 = ptrtoint %struct.edge_rec* %765 to i32 ; [#uses=5] + %770 = add i32 %769, 16 ; [#uses=1] + %771 = inttoptr i32 %770 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %772 = add i32 %769, 48 ; [#uses=1] + %773 = inttoptr i32 %772 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %774 = getelementptr %struct.edge_rec* %771, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4 + %775 = add i32 %769, 32 ; [#uses=1] + %776 = inttoptr i32 %775 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %777 = getelementptr %struct.edge_rec* %776, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4 + %778 = getelementptr %struct.edge_rec* %776, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4 + %779 = getelementptr %struct.edge_rec* %773, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4 + %780 = xor i32 %769, 32 ; [#uses=1] + br label %bb15 + +bb11: ; preds = %bb7 + store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4 + %781 = ptrtoint %struct.edge_rec* %765 to i32 ; [#uses=6] + %782 = add i32 %781, 16 ; [#uses=1] + %783 = inttoptr i32 %782 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %784 = add i32 %781, 48 ; [#uses=1] + %785 = inttoptr i32 %784 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %786 = getelementptr %struct.edge_rec* %783, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4 + %787 = add i32 %781, 32 ; [#uses=1] + %788 = inttoptr i32 %787 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %789 = getelementptr %struct.edge_rec* %788, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4 + %790 = getelementptr %struct.edge_rec* %788, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4 + %791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4 + %792 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] + %793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4] + store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4 + %794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4 + %795 = ptrtoint %struct.edge_rec* %792 to i32 ; [#uses=5] + %796 = add i32 %795, 16 ; [#uses=1] + %797 = inttoptr i32 %796 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %798 = add i32 %795, 48 ; [#uses=2] + %799 = inttoptr i32 %798 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %800 = getelementptr %struct.edge_rec* %797, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4 + %801 = add i32 %795, 32 ; [#uses=1] + %802 = inttoptr i32 %801 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %803 = getelementptr %struct.edge_rec* %802, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4 + %804 = getelementptr %struct.edge_rec* %802, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4 + %805 = getelementptr %struct.edge_rec* %799, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4 + %806 = xor i32 %781, 32 ; [#uses=1] + %807 = inttoptr i32 %806 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %808 = getelementptr %struct.edge_rec* %807, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %809 = load %struct.edge_rec** %808, align 4 ; <%struct.edge_rec*> [#uses=1] + %810 = ptrtoint %struct.edge_rec* %809 to i32 ; [#uses=2] + %811 = add i32 %810, 16 ; [#uses=1] + %812 = and i32 %811, 63 ; [#uses=1] + %813 = and i32 %810, -64 ; [#uses=1] + %814 = or i32 %812, %813 ; [#uses=1] + %815 = inttoptr i32 %814 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %816 = load %struct.edge_rec** %793, align 4 ; <%struct.edge_rec*> [#uses=1] + %817 = ptrtoint %struct.edge_rec* %816 to i32 ; [#uses=2] + %818 = add i32 %817, 16 ; [#uses=1] + %819 = and i32 %818, 63 ; [#uses=1] + %820 = and i32 %817, -64 ; [#uses=1] + %821 = or i32 %819, %820 ; [#uses=1] + %822 = inttoptr i32 %821 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %823 = getelementptr %struct.edge_rec* %822, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %824 = load %struct.edge_rec** %823, align 4 ; <%struct.edge_rec*> [#uses=1] + %825 = getelementptr %struct.edge_rec* %815, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %826 = load %struct.edge_rec** %825, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4 + store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4 + %827 = load %struct.edge_rec** %808, align 4 ; <%struct.edge_rec*> [#uses=1] + %828 = load %struct.edge_rec** %793, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4 + store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4 + %829 = xor i32 %795, 32 ; [#uses=3] + %830 = inttoptr i32 %829 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %831 = getelementptr %struct.edge_rec* %830, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + %832 = load %struct.VERTEX** %831, align 4 ; <%struct.VERTEX*> [#uses=1] + %833 = and i32 %798, 63 ; [#uses=1] + %834 = and i32 %795, -64 ; [#uses=1] + %835 = or i32 %833, %834 ; [#uses=1] + %836 = inttoptr i32 %835 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %837 = getelementptr %struct.edge_rec* %836, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %838 = load %struct.edge_rec** %837, align 4 ; <%struct.edge_rec*> [#uses=1] + %839 = ptrtoint %struct.edge_rec* %838 to i32 ; [#uses=2] + %840 = add i32 %839, 16 ; [#uses=1] + %841 = and i32 %840, 63 ; [#uses=1] + %842 = and i32 %839, -64 ; [#uses=1] + %843 = or i32 %841, %842 ; [#uses=1] + %844 = inttoptr i32 %843 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %845 = load %struct.VERTEX** %767, align 4 ; <%struct.VERTEX*> [#uses=1] + %846 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] + %847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=7] + store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4 + %848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4 + %849 = ptrtoint %struct.edge_rec* %846 to i32 ; [#uses=6] + %850 = add i32 %849, 16 ; [#uses=2] + %851 = inttoptr i32 %850 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %852 = add i32 %849, 48 ; [#uses=1] + %853 = inttoptr i32 %852 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %854 = getelementptr %struct.edge_rec* %851, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4 + %855 = add i32 %849, 32 ; [#uses=1] + %856 = inttoptr i32 %855 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3] + %857 = getelementptr %struct.edge_rec* %856, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4 + %858 = getelementptr %struct.edge_rec* %856, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] + store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4 + %859 = getelementptr %struct.edge_rec* %853, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4 + %860 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1] + %861 = ptrtoint %struct.edge_rec* %860 to i32 ; [#uses=2] + %862 = add i32 %861, 16 ; [#uses=1] + %863 = and i32 %862, 63 ; [#uses=1] + %864 = and i32 %861, -64 ; [#uses=1] + %865 = or i32 %863, %864 ; [#uses=1] + %866 = inttoptr i32 %865 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %867 = getelementptr %struct.edge_rec* %844, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %868 = load %struct.edge_rec** %867, align 4 ; <%struct.edge_rec*> [#uses=1] + %869 = ptrtoint %struct.edge_rec* %868 to i32 ; [#uses=2] + %870 = add i32 %869, 16 ; [#uses=1] + %871 = and i32 %870, 63 ; [#uses=1] + %872 = and i32 %869, -64 ; [#uses=1] + %873 = or i32 %871, %872 ; [#uses=1] + %874 = inttoptr i32 %873 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %875 = getelementptr %struct.edge_rec* %874, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %876 = load %struct.edge_rec** %875, align 4 ; <%struct.edge_rec*> [#uses=1] + %877 = getelementptr %struct.edge_rec* %866, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %878 = load %struct.edge_rec** %877, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4 + store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4 + %879 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1] + %880 = load %struct.edge_rec** %867, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4 + store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4 + %881 = xor i32 %849, 32 ; [#uses=3] + %882 = inttoptr i32 %881 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %883 = getelementptr %struct.edge_rec* %882, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=6] + %884 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1] + %885 = ptrtoint %struct.edge_rec* %884 to i32 ; [#uses=2] + %886 = add i32 %885, 16 ; [#uses=1] + %887 = and i32 %886, 63 ; [#uses=1] + %888 = and i32 %885, -64 ; [#uses=1] + %889 = or i32 %887, %888 ; [#uses=1] + %890 = inttoptr i32 %889 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %891 = load %struct.edge_rec** %766, align 4 ; <%struct.edge_rec*> [#uses=1] + %892 = ptrtoint %struct.edge_rec* %891 to i32 ; [#uses=2] + %893 = add i32 %892, 16 ; [#uses=1] + %894 = and i32 %893, 63 ; [#uses=1] + %895 = and i32 %892, -64 ; [#uses=1] + %896 = or i32 %894, %895 ; [#uses=1] + %897 = inttoptr i32 %896 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %898 = getelementptr %struct.edge_rec* %897, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %899 = load %struct.edge_rec** %898, align 4 ; <%struct.edge_rec*> [#uses=1] + %900 = getelementptr %struct.edge_rec* %890, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %901 = load %struct.edge_rec** %900, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4 + store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4 + %902 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1] + %903 = load %struct.edge_rec** %766, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4 + store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4 + %904 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 0 ; [#uses=1] + %905 = load double* %904, align 4 ; [#uses=2] + %906 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 1 ; [#uses=1] + %907 = load double* %906, align 4 ; [#uses=2] + %908 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 0 ; [#uses=1] + %909 = load double* %908, align 4 ; [#uses=3] + %910 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 1 ; [#uses=1] + %911 = load double* %910, align 4 ; [#uses=3] + %912 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 0 ; [#uses=1] + %913 = load double* %912, align 4 ; [#uses=3] + %914 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 1 ; [#uses=1] + %915 = load double* %914, align 4 ; [#uses=3] + %916 = fsub double %905, %913 ; [#uses=1] + %917 = fsub double %911, %915 ; [#uses=1] + %918 = fmul double %916, %917 ; [#uses=1] + %919 = fsub double %909, %913 ; [#uses=1] + %920 = fsub double %907, %915 ; [#uses=1] + %921 = fmul double %919, %920 ; [#uses=1] + %922 = fsub double %918, %921 ; [#uses=1] + %923 = fcmp ogt double %922, 0.000000e+00 ; [#uses=1] + br i1 %923, label %bb15, label %bb13 + +bb13: ; preds = %bb11 + %924 = fsub double %905, %909 ; [#uses=1] + %925 = fsub double %915, %911 ; [#uses=1] + %926 = fmul double %924, %925 ; [#uses=1] + %927 = fsub double %913, %909 ; [#uses=1] + %928 = fsub double %907, %911 ; [#uses=1] + %929 = fmul double %927, %928 ; [#uses=1] + %930 = fsub double %926, %929 ; [#uses=1] + %931 = fcmp ogt double %930, 0.000000e+00 ; [#uses=1] + br i1 %931, label %bb15, label %bb14 + +bb14: ; preds = %bb13 + %932 = and i32 %850, 63 ; [#uses=1] + %933 = and i32 %849, -64 ; [#uses=3] + %934 = or i32 %932, %933 ; [#uses=1] + %935 = inttoptr i32 %934 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %936 = getelementptr %struct.edge_rec* %935, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %937 = load %struct.edge_rec** %936, align 4 ; <%struct.edge_rec*> [#uses=1] + %938 = ptrtoint %struct.edge_rec* %937 to i32 ; [#uses=2] + %939 = add i32 %938, 16 ; [#uses=1] + %940 = and i32 %939, 63 ; [#uses=1] + %941 = and i32 %938, -64 ; [#uses=1] + %942 = or i32 %940, %941 ; [#uses=1] + %943 = inttoptr i32 %942 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %944 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1] + %945 = ptrtoint %struct.edge_rec* %944 to i32 ; [#uses=2] + %946 = add i32 %945, 16 ; [#uses=1] + %947 = and i32 %946, 63 ; [#uses=1] + %948 = and i32 %945, -64 ; [#uses=1] + %949 = or i32 %947, %948 ; [#uses=1] + %950 = inttoptr i32 %949 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %951 = getelementptr %struct.edge_rec* %943, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %952 = load %struct.edge_rec** %951, align 4 ; <%struct.edge_rec*> [#uses=1] + %953 = ptrtoint %struct.edge_rec* %952 to i32 ; [#uses=2] + %954 = add i32 %953, 16 ; [#uses=1] + %955 = and i32 %954, 63 ; [#uses=1] + %956 = and i32 %953, -64 ; [#uses=1] + %957 = or i32 %955, %956 ; [#uses=1] + %958 = inttoptr i32 %957 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %959 = getelementptr %struct.edge_rec* %958, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %960 = load %struct.edge_rec** %959, align 4 ; <%struct.edge_rec*> [#uses=1] + %961 = getelementptr %struct.edge_rec* %950, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %962 = load %struct.edge_rec** %961, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4 + store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4 + %963 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1] + %964 = load %struct.edge_rec** %951, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4 + store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4 + %965 = add i32 %881, 16 ; [#uses=1] + %966 = and i32 %965, 63 ; [#uses=1] + %967 = or i32 %966, %933 ; [#uses=1] + %968 = inttoptr i32 %967 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %969 = getelementptr %struct.edge_rec* %968, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + %970 = load %struct.edge_rec** %969, align 4 ; <%struct.edge_rec*> [#uses=1] + %971 = ptrtoint %struct.edge_rec* %970 to i32 ; [#uses=2] + %972 = add i32 %971, 16 ; [#uses=1] + %973 = and i32 %972, 63 ; [#uses=1] + %974 = and i32 %971, -64 ; [#uses=1] + %975 = or i32 %973, %974 ; [#uses=1] + %976 = inttoptr i32 %975 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %977 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1] + %978 = ptrtoint %struct.edge_rec* %977 to i32 ; [#uses=2] + %979 = add i32 %978, 16 ; [#uses=1] + %980 = and i32 %979, 63 ; [#uses=1] + %981 = and i32 %978, -64 ; [#uses=1] + %982 = or i32 %980, %981 ; [#uses=1] + %983 = inttoptr i32 %982 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %984 = getelementptr %struct.edge_rec* %976, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3] + %985 = load %struct.edge_rec** %984, align 4 ; <%struct.edge_rec*> [#uses=1] + %986 = ptrtoint %struct.edge_rec* %985 to i32 ; [#uses=2] + %987 = add i32 %986, 16 ; [#uses=1] + %988 = and i32 %987, 63 ; [#uses=1] + %989 = and i32 %986, -64 ; [#uses=1] + %990 = or i32 %988, %989 ; [#uses=1] + %991 = inttoptr i32 %990 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] + %992 = getelementptr %struct.edge_rec* %991, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %993 = load %struct.edge_rec** %992, align 4 ; <%struct.edge_rec*> [#uses=1] + %994 = getelementptr %struct.edge_rec* %983, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2] + %995 = load %struct.edge_rec** %994, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4 + store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4 + %996 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1] + %997 = load %struct.edge_rec** %984, align 4 ; <%struct.edge_rec*> [#uses=1] + store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4 + store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4 + %998 = inttoptr i32 %933 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2] + %999 = load %struct.edge_rec** @avail_edge, align 4 ; <%struct.edge_rec*> [#uses=1] + %1000 = getelementptr %struct.edge_rec* %998, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] + store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4 + store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4 + br label %bb15 + +bb15: ; preds = %bb14, %bb13, %bb11, %bb10, %bb6 + %retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ] ; [#uses=1] + %retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ] ; [#uses=1] + %agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64* ; [#uses=1] + %1001 = zext i32 %retval.0.0 to i64 ; [#uses=1] + %1002 = zext i32 %retval.1.0 to i64 ; [#uses=1] + %1003 = shl i64 %1002, 32 ; [#uses=1] + %1004 = or i64 %1003, %1001 ; [#uses=1] + store i64 %1004, i64* %agg.result162, align 4 + ret void +} + +declare arm_apcscc i32 @puts(i8* nocapture) nounwind + +declare arm_apcscc void @exit(i32) noreturn nounwind + +declare arm_apcscc %struct.edge_rec* @alloc_edge() nounwind diff --git a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll new file mode 100644 index 0000000000000..b4b989bf38a45 --- /dev/null +++ b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll @@ -0,0 +1,94 @@ +; RUN: llc < %s -mtriple=armv6-apple-darwin10 + + %struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* } + %struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* } + %struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* } + %struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 } + %struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 } + +declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly + +define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind { +entry: + br i1 undef, label %bb126, label %bb1 + +bb1: ; preds = %entry + br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit + +cli_calloc.exit.thread: ; preds = %bb1 + ret i32 -114 + +cli_calloc.exit: ; preds = %bb1 + store i16 %parts, i16* undef, align 4 + br i1 undef, label %bb52, label %bb4 + +bb4: ; preds = %cli_calloc.exit + br i1 undef, label %bb.i, label %bb1.i3 + +bb.i: ; preds = %bb4 + unreachable + +bb1.i3: ; preds = %bb4 + br i1 undef, label %bb2.i4, label %cli_strdup.exit + +bb2.i4: ; preds = %bb1.i3 + ret i32 -114 + +cli_strdup.exit: ; preds = %bb1.i3 + br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54 + +cli_calloc.exit54.thread: ; preds = %cli_strdup.exit + ret i32 -114 + +cli_calloc.exit54: ; preds = %cli_strdup.exit + br label %bb45 + +cli_calloc.exit70.thread: ; preds = %bb45 + unreachable + +cli_calloc.exit70: ; preds = %bb45 + br i1 undef, label %bb.i83, label %bb1.i84 + +bb.i83: ; preds = %cli_calloc.exit70 + unreachable + +bb1.i84: ; preds = %cli_calloc.exit70 + br i1 undef, label %bb2.i85, label %bb17 + +bb2.i85: ; preds = %bb1.i84 + unreachable + +bb17: ; preds = %bb1.i84 + br i1 undef, label %bb22, label %bb.nph + +bb.nph: ; preds = %bb17 + br label %bb18 + +bb18: ; preds = %bb18, %bb.nph + br i1 undef, label %bb18, label %bb22 + +bb22: ; preds = %bb18, %bb17 + br i1 undef, label %bb25, label %bb43.preheader + +bb43.preheader: ; preds = %bb22 + br i1 undef, label %bb28, label %bb45 + +bb25: ; preds = %bb22 + unreachable + +bb28: ; preds = %bb43.preheader + unreachable + +bb45: ; preds = %bb43.preheader, %cli_calloc.exit54 + br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70 + +bb52: ; preds = %cli_calloc.exit + %0 = load i16* undef, align 4 ; [#uses=1] + %1 = icmp eq i16 %0, 0 ; [#uses=1] + %iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null ; [#uses=1] + %2 = tail call arm_apcscc i32 @strlen(i8* %iftmp.20.0) nounwind readonly ; [#uses=0] + unreachable + +bb126: ; preds = %entry + ret i32 -117 +} diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll new file mode 100644 index 0000000000000..24f499036ce42 --- /dev/null +++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll @@ -0,0 +1,95 @@ +; RUN: llc < %s -march=arm + + %struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* } + %struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* } + %struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* } + %struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 } + %struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 } + +define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind { +entry: + br i1 undef, label %bb126, label %bb1 + +bb1: ; preds = %entry + br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit + +cli_calloc.exit.thread: ; preds = %bb1 + ret i32 -114 + +cli_calloc.exit: ; preds = %bb1 + br i1 undef, label %bb52, label %bb4 + +bb4: ; preds = %cli_calloc.exit + br i1 undef, label %bb.i, label %bb1.i3 + +bb.i: ; preds = %bb4 + unreachable + +bb1.i3: ; preds = %bb4 + br i1 undef, label %bb2.i4, label %cli_strdup.exit + +bb2.i4: ; preds = %bb1.i3 + ret i32 -114 + +cli_strdup.exit: ; preds = %bb1.i3 + br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54 + +cli_calloc.exit54.thread: ; preds = %cli_strdup.exit + ret i32 -114 + +cli_calloc.exit54: ; preds = %cli_strdup.exit + br label %bb45 + +cli_calloc.exit70.thread: ; preds = %bb45 + unreachable + +cli_calloc.exit70: ; preds = %bb45 + br i1 undef, label %bb.i83, label %bb1.i84 + +bb.i83: ; preds = %cli_calloc.exit70 + unreachable + +bb1.i84: ; preds = %cli_calloc.exit70 + br i1 undef, label %bb2.i85, label %bb17 + +bb2.i85: ; preds = %bb1.i84 + unreachable + +bb17: ; preds = %bb1.i84 + br i1 undef, label %bb22, label %bb.nph + +bb.nph: ; preds = %bb17 + br label %bb18 + +bb18: ; preds = %bb18, %bb.nph + br i1 undef, label %bb18, label %bb22 + +bb22: ; preds = %bb18, %bb17 + %0 = getelementptr i8* null, i32 10 ; [#uses=1] + %1 = bitcast i8* %0 to i16* ; [#uses=1] + %2 = load i16* %1, align 2 ; [#uses=1] + %3 = add i16 %2, 1 ; [#uses=1] + %4 = zext i16 %3 to i32 ; [#uses=1] + %5 = mul i32 %4, 3 ; [#uses=1] + %6 = add i32 %5, -1 ; [#uses=1] + %7 = icmp eq i32 %6, undef ; [#uses=1] + br i1 %7, label %bb25, label %bb43.preheader + +bb43.preheader: ; preds = %bb22 + br i1 undef, label %bb28, label %bb45 + +bb25: ; preds = %bb22 + unreachable + +bb28: ; preds = %bb43.preheader + unreachable + +bb45: ; preds = %bb43.preheader, %cli_calloc.exit54 + br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70 + +bb52: ; preds = %cli_calloc.exit + unreachable + +bb126: ; preds = %entry + ret i32 -117 +} diff --git a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll new file mode 100644 index 0000000000000..e1d19d1ac2ffe --- /dev/null +++ b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll @@ -0,0 +1,108 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin10 -mattr=+vfp3 + +@a = external global double ; [#uses=1] + +declare double @llvm.exp.f64(double) nounwind readonly + +define arm_apcscc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind { +entry: + br label %bb + +bb: ; preds = %bb, %entry + br i1 undef, label %bb28, label %bb + +bb28: ; preds = %bb + %0 = load double* @a, align 4 ; [#uses=2] + %1 = fadd double %0, undef ; [#uses=2] + br i1 undef, label %bb59, label %bb60 + +bb59: ; preds = %bb28 + %2 = fsub double -0.000000e+00, undef ; [#uses=2] + br label %bb61 + +bb60: ; preds = %bb28 + %3 = tail call double @llvm.exp.f64(double undef) nounwind ; [#uses=1] + %4 = fsub double -0.000000e+00, %3 ; [#uses=2] + %5 = fsub double -0.000000e+00, undef ; [#uses=1] + %6 = fsub double -0.000000e+00, undef ; [#uses=1] + br label %bb61 + +bb61: ; preds = %bb60, %bb59 + %.pn201 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn111 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn452 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn85 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn238 = phi double [ 0.000000e+00, %bb59 ], [ 0.000000e+00, %bb60 ] ; [#uses=1] + %.pn39 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn230 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn228 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn224 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn222 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn218 = phi double [ %2, %bb59 ], [ %4, %bb60 ] ; [#uses=1] + %.pn214 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn212 = phi double [ %2, %bb59 ], [ %4, %bb60 ] ; [#uses=1] + %.pn213 = phi double [ undef, %bb59 ], [ undef, %bb60 ] ; [#uses=1] + %.pn210 = phi double [ undef, %bb59 ], [ %5, %bb60 ] ; [#uses=1] + %.pn202 = phi double [ undef, %bb59 ], [ %6, %bb60 ] ; [#uses=0] + %.pn390 = fdiv double %.pn452, undef ; [#uses=0] + %.pn145 = fdiv double %.pn238, %1 ; [#uses=0] + %.pn138 = fdiv double %.pn230, undef ; [#uses=1] + %.pn139 = fdiv double %.pn228, undef ; [#uses=1] + %.pn134 = fdiv double %.pn224, %0 ; [#uses=1] + %.pn135 = fdiv double %.pn222, %1 ; [#uses=1] + %.pn133 = fdiv double %.pn218, undef ; [#uses=0] + %.pn128 = fdiv double %.pn214, undef ; [#uses=1] + %.pn129 = fdiv double %.pn212, %.pn213 ; [#uses=1] + %.pn126 = fdiv double %.pn210, undef ; [#uses=0] + %.pn54.in = fmul double undef, %.pn201 ; [#uses=1] + %.pn42.in = fmul double undef, undef ; [#uses=1] + %.pn76 = fsub double %.pn138, %.pn139 ; [#uses=1] + %.pn74 = fsub double %.pn134, %.pn135 ; [#uses=1] + %.pn70 = fsub double %.pn128, %.pn129 ; [#uses=1] + %.pn54 = fdiv double %.pn54.in, 6.000000e+00 ; [#uses=1] + %.pn64 = fmul double undef, 0x3FE5555555555555 ; [#uses=1] + %.pn65 = fmul double undef, undef ; [#uses=1] + %.pn50 = fmul double undef, %.pn111 ; [#uses=0] + %.pn42 = fdiv double %.pn42.in, 6.000000e+00 ; [#uses=1] + %.pn40 = fmul double undef, %.pn85 ; [#uses=0] + %.pn56 = fadd double %.pn76, undef ; [#uses=1] + %.pn57 = fmul double %.pn74, undef ; [#uses=1] + %.pn36 = fadd double undef, undef ; [#uses=1] + %.pn37 = fmul double %.pn70, undef ; [#uses=1] + %.pn33 = fmul double undef, 0x3FC5555555555555 ; [#uses=1] + %.pn29 = fsub double %.pn64, %.pn65 ; [#uses=1] + %.pn21 = fadd double undef, undef ; [#uses=1] + %.pn27 = fmul double undef, 0x3FC5555555555555 ; [#uses=1] + %.pn11 = fadd double %.pn56, %.pn57 ; [#uses=1] + %.pn32 = fmul double %.pn54, undef ; [#uses=1] + %.pn26 = fmul double %.pn42, undef ; [#uses=1] + %.pn15 = fmul double 0.000000e+00, %.pn39 ; [#uses=1] + %.pn7 = fadd double %.pn36, %.pn37 ; [#uses=1] + %.pn30 = fsub double %.pn32, %.pn33 ; [#uses=1] + %.pn28 = fadd double %.pn30, 0.000000e+00 ; [#uses=1] + %.pn24 = fsub double %.pn28, %.pn29 ; [#uses=1] + %.pn22 = fsub double %.pn26, %.pn27 ; [#uses=1] + %.pn20 = fadd double %.pn24, undef ; [#uses=1] + %.pn18 = fadd double %.pn22, 0.000000e+00 ; [#uses=1] + %.pn16 = fsub double %.pn20, %.pn21 ; [#uses=1] + %.pn14 = fsub double %.pn18, undef ; [#uses=1] + %.pn12 = fadd double %.pn16, undef ; [#uses=1] + %.pn10 = fadd double %.pn14, %.pn15 ; [#uses=1] + %.pn8 = fsub double %.pn12, undef ; [#uses=1] + %.pn6 = fsub double %.pn10, %.pn11 ; [#uses=1] + %.pn4 = fadd double %.pn8, undef ; [#uses=1] + %.pn2 = fadd double %.pn6, %.pn7 ; [#uses=1] + %N1.0 = fsub double %.pn4, undef ; [#uses=1] + %D1.0 = fsub double %.pn2, undef ; [#uses=2] + br i1 undef, label %bb62, label %bb64 + +bb62: ; preds = %bb61 + %7 = fadd double %D1.0, undef ; [#uses=1] + br label %bb64 + +bb64: ; preds = %bb62, %bb61 + %.pn = phi double [ undef, %bb62 ], [ %N1.0, %bb61 ] ; [#uses=1] + %.pn1 = phi double [ %7, %bb62 ], [ %D1.0, %bb61 ] ; [#uses=1] + %x.1 = fdiv double %.pn, %.pn1 ; [#uses=0] + ret void +} diff --git a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll new file mode 100644 index 0000000000000..2d4e58d63603a --- /dev/null +++ b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=arm -mattr=+neon +; PR4657 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +define arm_apcscc <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind { +entry: + %v_addr = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] + %f_addr = alloca i32 ; [#uses=2] + %retval = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] + %0 = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store <4 x i32> %v, <4 x i32>* %v_addr + store i32 %f, i32* %f_addr + %1 = load <4 x i32>* %v_addr, align 16 ; <<4 x i32>> [#uses=1] + %2 = load i32* %f_addr, align 4 ; [#uses=1] + %3 = insertelement <4 x i32> undef, i32 %2, i32 0 ; <<4 x i32>> [#uses=1] + %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>> [#uses=1] + %5 = mul <4 x i32> %1, %4 ; <<4 x i32>> [#uses=1] + store <4 x i32> %5, <4 x i32>* %0, align 16 + %6 = load <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1] + store <4 x i32> %6, <4 x i32>* %retval, align 16 + br label %return + +return: ; preds = %entry + %retval1 = load <4 x i32>* %retval ; <<4 x i32>> [#uses=1] + ret <4 x i32> %retval1 +} diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll new file mode 100644 index 0000000000000..65ffed2b80a0b --- /dev/null +++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=armv6-elf +; PR4528 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv6-elf" + +define arm_aapcscc i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize { +entry: + br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i + +bb5.i: ; preds = %entry + %asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind ; [#uses=1] + %0 = icmp eq i32 %asmtmp.i, 0 ; [#uses=1] + br i1 %0, label %bb6.i, label %fault_in_pages_writeable.exit + +bb6.i: ; preds = %bb5.i + br i1 undef, label %fault_in_pages_writeable.exit, label %bb7.i + +bb7.i: ; preds = %bb6.i + unreachable + +fault_in_pages_writeable.exit: ; preds = %bb6.i, %bb5.i, %entry + br i1 undef, label %bb2, label %bb3 + +bb2: ; preds = %fault_in_pages_writeable.exit + unreachable + +bb3: ; preds = %fault_in_pages_writeable.exit + %1 = tail call arm_aapcscc i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind ; [#uses=0] + unreachable +} + +declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32) diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll new file mode 100644 index 0000000000000..9e5372a793522 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mtriple=armv6-elf +; PR4528 + +define arm_aapcscc i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize { +entry: + br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i + +bb5.i: ; preds = %entry + %asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind ; [#uses=1] + br label %fault_in_pages_writeable.exit + +fault_in_pages_writeable.exit: ; preds = %bb5.i, %entry + %0 = phi i32 [ 0, %entry ], [ %asmtmp.i, %bb5.i ] ; [#uses=1] + %1 = icmp eq i32 %0, 0 ; [#uses=1] + br i1 %1, label %bb2, label %bb3 + +bb2: ; preds = %fault_in_pages_writeable.exit + unreachable + +bb3: ; preds = %fault_in_pages_writeable.exit + %2 = tail call arm_aapcscc i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind ; [#uses=0] + unreachable +} + +declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32) diff --git a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll new file mode 100644 index 0000000000000..18d68f79370ce --- /dev/null +++ b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -march=arm +; PR4528 + +; Inline asm is allowed to contain operands "=&r", "0". + +%struct.device_dma_parameters = type { i32, i32 } +%struct.iovec = type { i8*, i32 } + +define arm_aapcscc i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize { +entry: + br label %bb8 + +bb: ; preds = %bb8 + br i1 undef, label %bb10, label %bb2 + +bb2: ; preds = %bb + %asmtmp = tail call %struct.device_dma_parameters asm "adds $1, $2, $3; sbcccs $1, $1, $0; movcc $0, #0", "=&r,=&r,r,Ir,0,~{cc}"(i8* undef, i32 undef, i32 0) nounwind; <%struct.device_dma_parameters> [#uses=1] + %asmresult = extractvalue %struct.device_dma_parameters %asmtmp, 0; [#uses=1] + %0 = icmp eq i32 %asmresult, 0 ; [#uses=1] + br i1 %0, label %bb7, label %bb4 + +bb4: ; preds = %bb2 + br i1 undef, label %bb10, label %bb9 + +bb7: ; preds = %bb2 + %1 = add i32 %2, 1 ; [#uses=1] + br label %bb8 + +bb8: ; preds = %bb7, %entry + %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ] ; [#uses=3] + %scevgep22 = getelementptr %struct.iovec* %iov, i32 %2, i32 0; [#uses=0] + %3 = load i32* %nr_segs, align 4 ; [#uses=1] + %4 = icmp ult i32 %2, %3 ; [#uses=1] + br i1 %4, label %bb, label %bb9 + +bb9: ; preds = %bb8, %bb4 + store i32 undef, i32* %count, align 4 + ret i32 0 + +bb10: ; preds = %bb4, %bb + ret i32 0 +} diff --git a/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll new file mode 100644 index 0000000000000..a46482cc73175 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s -march=arm +; PR4716 + +define arm_aapcscc void @_start() nounwind naked { +entry: + tail call arm_aapcscc void @exit(i32 undef) noreturn nounwind + unreachable +} + +declare arm_aapcscc void @exit(i32) noreturn nounwind diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll new file mode 100644 index 0000000000000..84915c48824a4 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 -post-RA-scheduler -mcpu=cortex-a8 + +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* } +@g = common global %struct.tree* null + +define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind { +entry: + %t.idx51.val.i = load double* null ; [#uses=1] + br i1 undef, label %bb4.i, label %bb.i + +bb.i: ; preds = %entry + unreachable + +bb4.i: ; preds = %entry + %0 = load %struct.tree** @g, align 4 ; <%struct.tree*> [#uses=2] + %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; [#uses=1] + %.idx45.val.i = load double* %.idx45.i ; [#uses=1] + %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; [#uses=1] + %.idx46.val.i = load double* %.idx46.i ; [#uses=1] + %1 = fsub double 0.000000e+00, %.idx45.val.i ; [#uses=2] + %2 = fmul double %1, %1 ; [#uses=1] + %3 = fsub double %t.idx51.val.i, %.idx46.val.i ; [#uses=2] + %4 = fmul double %3, %3 ; [#uses=1] + %5 = fadd double %2, %4 ; [#uses=1] + %6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; [#uses=1] + br i1 undef, label %bb7.i4, label %bb6.i + +bb6.i: ; preds = %bb4.i + br label %bb7.i4 + +bb7.i4: ; preds = %bb6.i, %bb4.i + %tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; [#uses=0] + unreachable +} + +declare double @llvm.sqrt.f64(double) nounwind readonly diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll new file mode 100644 index 0000000000000..a21ffc38d09e6 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler + +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] } +%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* } +%struct.icstruct = type { [3 x i32], i16 } +%struct.node = type { i16, double, [3 x double], i32, i32 } + +declare arm_apcscc double @floor(double) nounwind readnone + +define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) { +entry: + br i1 %a, label %bb3, label %bb1 + +bb1: ; preds = %entry + unreachable + +bb3: ; preds = %entry + br i1 %a, label %bb7, label %bb5 + +bb5: ; preds = %bb3 + unreachable + +bb7: ; preds = %bb3 + br i1 %a, label %bb11, label %bb9 + +bb9: ; preds = %bb7 + %0 = tail call arm_apcscc double @floor(double %b) nounwind readnone ; [#uses=0] + br label %bb11 + +bb11: ; preds = %bb9, %bb7 + %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; [#uses=1] + store i32 0, i32* %1 + ret void +} diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll new file mode 100644 index 0000000000000..e3d8ea60f9927 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler + +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List } +%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* } +%struct.Patient = type { i32, i32, i32, %struct.Village* } +%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 } + +define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind { +entry: + br i1 %p, label %bb8, label %bb1 + +bb1: ; preds = %entry + %0 = malloc %struct.Village ; <%struct.Village*> [#uses=3] + %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; [#uses=1] + %.c = fptosi double %exp2 to i32 ; [#uses=1] + store i32 %.c, i32* null + %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1] + store %struct.List* null, %struct.List** %1 + %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1] + store %struct.List* null, %struct.List** %2 + ret %struct.Village* %0 + +bb8: ; preds = %entry + ret %struct.Village* null +} + +declare double @ldexp(double, i32) diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll new file mode 100644 index 0000000000000..9123377e7151b --- /dev/null +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler + +; ModuleID = '' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +@.str = external constant [36 x i8], align 1 ; <[36 x i8]*> [#uses=0] +@.str1 = external constant [31 x i8], align 1 ; <[31 x i8]*> [#uses=1] +@.str2 = external constant [4 x i8], align 1 ; <[4 x i8]*> [#uses=1] + +declare arm_apcscc i32 @getUnknown(i32, ...) nounwind + +declare void @llvm.va_start(i8*) nounwind + +declare void @llvm.va_end(i8*) nounwind + +declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind + +define arm_apcscc i32 @main() nounwind { +entry: + %0 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; [#uses=0] + %1 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; [#uses=0] + %2 = tail call arm_apcscc i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; [#uses=1] + %3 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; [#uses=0] + ret i32 0 +} diff --git a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll new file mode 100644 index 0000000000000..0fad533b6c595 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll @@ -0,0 +1,8 @@ +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s + +; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm' + +@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; [#uses=0] + +; CHECK: .globl l_objc_msgSend_fixup_alloc +; CHECK: .weak_definition l_objc_msgSend_fixup_alloc diff --git a/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll new file mode 100644 index 0000000000000..c6ef2561490cb --- /dev/null +++ b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mattr=+neon | not grep fldmfdd +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-elf" + +%bar = type { float, float, float } +%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 } +%foo = type { <4 x float> } +%quux = type { i32 (...)**, %baz*, i32 } +%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } + +declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +define arm_apcscc void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) { +entry: + %0 = lshr <4 x i32> zeroinitializer, ; <<4 x i32>> [#uses=1] + %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %2 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> undef, <2 x i32> %1) nounwind ; <<2 x i32>> [#uses=1] + %3 = extractelement <2 x i32> %2, i32 0 ; [#uses=1] + %not..i = icmp eq i32 %3, undef ; [#uses=1] + br i1 %not..i, label %return, label %bb221 + +bb221: ; preds = %bb221, %entry + br label %bb221 + +return: ; preds = %entry + ret void +} diff --git a/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll new file mode 100644 index 0000000000000..bc5bfe9f60983 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -mattr=+neon | not grep fldmfdd +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-elf" + +%bar = type { float, float, float } +%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 } +%foo = type { <4 x float> } +%quux = type { i32 (...)**, %baz*, i32 } +%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } + +define arm_apcscc void @aaaa(%quuz* %this, i8* %block) { +entry: + br i1 undef, label %bb.nph269, label %bb201 + +bb.nph269: ; preds = %entry + br label %bb12 + +bb12: ; preds = %bb194, %bb.nph269 + %0 = fmul <4 x float> undef, undef ; <<4 x float>> [#uses=1] + %1 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %3 = fadd <4 x float> undef, %2 ; <<4 x float>> [#uses=1] + br i1 undef, label %bb194, label %bb186 + +bb186: ; preds = %bb12 + br label %bb194 + +bb194: ; preds = %bb186, %bb12 + %besterror.0.0 = phi <4 x float> [ %3, %bb186 ], [ undef, %bb12 ] ; <<4 x float>> [#uses=0] + %indvar.next294 = add i32 undef, 1 ; [#uses=0] + br label %bb12 + +bb201: ; preds = %entry + ret void +} diff --git a/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll new file mode 100644 index 0000000000000..d5178b4bfb3f3 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mattr=+neon +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-elf" + +define arm_apcscc void @foo() nounwind { +entry: + %0 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> undef, <2 x float> undef) nounwind ; <<2 x float>> [#uses=1] + %tmp28 = extractelement <2 x float> %0, i32 0 ; [#uses=1] + %1 = fcmp une float %tmp28, 4.900000e+01 ; [#uses=1] + br i1 %1, label %bb, label %bb7 + +bb: ; preds = %entry + unreachable + +bb7: ; preds = %entry + br i1 undef, label %bb8, label %bb9 + +bb8: ; preds = %bb7 + unreachable + +bb9: ; preds = %bb7 + ret void +} + +declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll new file mode 100644 index 0000000000000..266fce6e0c5e2 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mattr=+neon +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-elf" + +define arm_apcscc void @aaa() nounwind { +entry: + %0 = fmul <4 x float> undef, ; <<4 x float>> [#uses=1] + %tmp31 = extractelement <4 x float> %0, i32 0 ; [#uses=1] + %1 = fpext float %tmp31 to double ; [#uses=1] + %2 = fsub double 1.000000e+00, %1 ; [#uses=1] + %3 = fdiv double %2, 1.000000e+00 ; [#uses=1] + %4 = tail call double @fabs(double %3) nounwind readnone ; [#uses=1] + %5 = fcmp ogt double %4, 1.000000e-05 ; [#uses=1] + br i1 %5, label %bb, label %bb7 + +bb: ; preds = %entry + unreachable + +bb7: ; preds = %entry + unreachable +} + +declare double @fabs(double) diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll new file mode 100644 index 0000000000000..b6cf880a30015 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll @@ -0,0 +1,103 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s + +%struct.A = type { i32* } + +define arm_apcscc void @"\01-[MyFunction Name:]"() { +entry: + %save_filt.1 = alloca i32 ; [#uses=2] + %save_eptr.0 = alloca i8* ; [#uses=2] + %a = alloca %struct.A ; <%struct.A*> [#uses=3] + %eh_exception = alloca i8* ; [#uses=5] + %eh_selector = alloca i32 ; [#uses=3] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + call arm_apcscc void @_ZN1AC1Ev(%struct.A* %a) + invoke arm_apcscc void @_Z3barv() + to label %invcont unwind label %lpad + +invcont: ; preds = %entry + call arm_apcscc void @_ZN1AD1Ev(%struct.A* %a) nounwind + br label %return + +bb: ; preds = %ppad + %eh_select = load i32* %eh_selector ; [#uses=1] + store i32 %eh_select, i32* %save_filt.1, align 4 + %eh_value = load i8** %eh_exception ; [#uses=1] + store i8* %eh_value, i8** %save_eptr.0, align 4 + call arm_apcscc void @_ZN1AD1Ev(%struct.A* %a) nounwind + %0 = load i8** %save_eptr.0, align 4 ; [#uses=1] + store i8* %0, i8** %eh_exception, align 4 + %1 = load i32* %save_filt.1, align 4 ; [#uses=1] + store i32 %1, i32* %eh_selector, align 4 + br label %Unwind + +return: ; preds = %invcont + ret void + +lpad: ; preds = %entry + %eh_ptr = call i8* @llvm.eh.exception() ; [#uses=1] + store i8* %eh_ptr, i8** %eh_exception + %eh_ptr1 = load i8** %eh_exception ; [#uses=1] + %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; [#uses=1] + store i32 %eh_select2, i32* %eh_selector + br label %ppad + +ppad: ; preds = %lpad + br label %bb + +Unwind: ; preds = %bb + %eh_ptr3 = load i8** %eh_exception ; [#uses=1] + call arm_apcscc void @_Unwind_SjLj_Resume(i8* %eh_ptr3) + unreachable +} + +define linkonce_odr arm_apcscc void @_ZN1AC1Ev(%struct.A* %this) { +entry: + %this_addr = alloca %struct.A* ; <%struct.A**> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store %struct.A* %this, %struct.A** %this_addr + %0 = call arm_apcscc i8* @_Znwm(i32 4) ; [#uses=1] + %1 = bitcast i8* %0 to i32* ; [#uses=1] + %2 = load %struct.A** %this_addr, align 4 ; <%struct.A*> [#uses=1] + %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; [#uses=1] + store i32* %1, i32** %3, align 4 + br label %return + +return: ; preds = %entry + ret void +} + +declare arm_apcscc i8* @_Znwm(i32) + +define linkonce_odr arm_apcscc void @_ZN1AD1Ev(%struct.A* %this) nounwind { +entry: + %this_addr = alloca %struct.A* ; <%struct.A**> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store %struct.A* %this, %struct.A** %this_addr + %0 = load %struct.A** %this_addr, align 4 ; <%struct.A*> [#uses=1] + %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; [#uses=1] + %2 = load i32** %1, align 4 ; [#uses=1] + %3 = bitcast i32* %2 to i8* ; [#uses=1] + call arm_apcscc void @_ZdlPv(i8* %3) nounwind + br label %bb + +bb: ; preds = %entry + br label %return + +return: ; preds = %bb + ret void +} +;CHECK: L_LSDA_1: + +declare arm_apcscc void @_ZdlPv(i8*) nounwind + +declare arm_apcscc void @_Z3barv() + +declare i8* @llvm.eh.exception() nounwind + +declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind + +declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind + +declare arm_apcscc i32 @__gxx_personality_sj0(...) + +declare arm_apcscc void @_Unwind_SjLj_Resume(i8*) diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll new file mode 100644 index 0000000000000..e1e60e6317a69 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; pr4843 +define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind { +;CHECK: v2regbug: +;CHECK: vzip.16 + %tmp1 = load <4 x i16>* %B + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> + ret <4 x i16> %tmp2 +} diff --git a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll new file mode 100644 index 0000000000000..bf91fe099e6b6 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll @@ -0,0 +1,106 @@ +; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-apple-darwin9" + +@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3] +@nodes = internal global i64 0 ; [#uses=4] +@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2] +@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1] +@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1] +@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1] +@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1] +@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1] +@.str6 = private constant [28 x i8] c"score = %d (%c) work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1] +@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1] +@plycnt = internal global i32 0 ; [#uses=21] +@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43] +@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18] +@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21] +@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20] +@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5] +@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9] +@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1] +@he = internal global i8* null ; [#uses=9] +@hits = internal global i64 0 ; [#uses=8] +@posed = internal global i64 0 ; [#uses=7] +@ht = internal global i32* null ; [#uses=5] +@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1] +@.str117 = private constant [45 x i8] c"- %5.3f < %5.3f = %5.3f > %5.3f + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1] +@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1] +@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1] + +declare arm_apcscc i32 @puts(i8* nocapture) nounwind + +declare arm_apcscc i32 @getchar() nounwind + +define internal arm_apcscc i32 @transpose() nounwind readonly { +; CHECK: push +entry: + %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; [#uses=1] + %1 = shl i32 %0, 7 ; [#uses=1] + %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; [#uses=1] + %3 = or i32 %1, %2 ; [#uses=1] + %4 = shl i32 %3, 7 ; [#uses=1] + %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; [#uses=1] + %6 = or i32 %4, %5 ; [#uses=3] + %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; [#uses=1] + %8 = shl i32 %7, 7 ; [#uses=1] + %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; [#uses=1] + %10 = or i32 %8, %9 ; [#uses=1] + %11 = shl i32 %10, 7 ; [#uses=1] + %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; [#uses=1] + %13 = or i32 %11, %12 ; [#uses=3] + %14 = icmp ugt i32 %6, %13 ; [#uses=2] + %.pn2.in.i = select i1 %14, i32 %6, i32 %13 ; [#uses=1] + %.pn1.in.i = select i1 %14, i32 %13, i32 %6 ; [#uses=1] + %.pn2.i = shl i32 %.pn2.in.i, 7 ; [#uses=1] + %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; [#uses=1] + %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i ; [#uses=1] + %.pn.in.i = zext i32 %.pn.in.in.i to i64 ; [#uses=1] + %.pn.i = shl i64 %.pn.in.i, 21 ; [#uses=1] + %.pn1.i = zext i32 %.pn1.in.i to i64 ; [#uses=1] + %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i ; [#uses=2] + %15 = lshr i64 %iftmp.22.0.i, 17 ; [#uses=1] + %16 = trunc i64 %15 to i32 ; [#uses=2] + %17 = urem i64 %iftmp.22.0.i, 1050011 ; [#uses=1] + %18 = trunc i64 %17 to i32 ; [#uses=1] + %19 = urem i32 %16, 179 ; [#uses=1] + %20 = or i32 %19, 131072 ; [#uses=1] + %21 = load i32** @ht, align 4 ; [#uses=1] + br label %bb5 + +bb: ; preds = %bb5 + %22 = getelementptr inbounds i32* %21, i32 %x.0 ; [#uses=1] + %23 = load i32* %22, align 4 ; [#uses=1] + %24 = icmp eq i32 %23, %16 ; [#uses=1] + br i1 %24, label %bb1, label %bb2 + +bb1: ; preds = %bb + %25 = load i8** @he, align 4 ; [#uses=1] + %26 = getelementptr inbounds i8* %25, i32 %x.0 ; [#uses=1] + %27 = load i8* %26, align 1 ; [#uses=1] + %28 = sext i8 %27 to i32 ; [#uses=1] + ret i32 %28 + +bb2: ; preds = %bb + %29 = add nsw i32 %20, %x.0 ; [#uses=3] + %30 = add i32 %29, -1050011 ; [#uses=1] + %31 = icmp sgt i32 %29, 1050010 ; [#uses=1] + %. = select i1 %31, i32 %30, i32 %29 ; [#uses=1] + %32 = add i32 %33, 1 ; [#uses=1] + br label %bb5 + +bb5: ; preds = %bb2, %entry + %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ] ; [#uses=2] + %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ] ; [#uses=3] + %34 = icmp sgt i32 %33, 7 ; [#uses=1] + br i1 %34, label %bb7, label %bb + +bb7: ; preds = %bb5 + ret i32 -128 +} + +declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind + +declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind diff --git a/test/CodeGen/ARM/2009-09-09-AllOnes.ll b/test/CodeGen/ARM/2009-09-09-AllOnes.ll new file mode 100644 index 0000000000000..f654a1664c8b4 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-09-AllOnes.ll @@ -0,0 +1,10 @@ +; RUN: llc -mattr=+neon < %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-elf" + +define arm_apcscc void @foo() { +entry: + %0 = insertelement <4 x i32> undef, i32 -1, i32 3 + store <4 x i32> %0, <4 x i32>* undef, align 16 + unreachable +} diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll new file mode 100644 index 0000000000000..98cab9a9149ea --- /dev/null +++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll @@ -0,0 +1,18 @@ +; RUN: llc -O1 -march=arm -mattr=+vfp2 < %s | FileCheck %s +; pr4939 + +define void @test(double* %x, double* %y) nounwind { + %1 = load double* %x, align 4 + %2 = load double* %y, align 4 + %3 = fsub double -0.000000e+00, %1 + %4 = fcmp ugt double %2, %3 + br i1 %4, label %bb1, label %bb2 + +bb1: +;CHECK: fstdhi + store double %1, double* %y, align 4 + br label %bb2 + +bb2: + ret void +} diff --git a/test/CodeGen/ARM/2009-09-10-postdec.ll b/test/CodeGen/ARM/2009-09-10-postdec.ll new file mode 100644 index 0000000000000..10653b51c1463 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-10-postdec.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=arm < %s | FileCheck %s +; Radar 7213850 + +define i32 @test(i8* %d, i32 %x, i32 %y) nounwind { + %1 = ptrtoint i8* %d to i32 +;CHECK: sub + %2 = sub i32 %x, %1 + %3 = add nsw i32 %2, %y + store i8 0, i8* %d, align 1 + ret i32 %3 +} diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll new file mode 100644 index 0000000000000..13adb24e2f6fd --- /dev/null +++ b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll @@ -0,0 +1,61 @@ +; RUN: llc -mattr=+neon < %s +; PR4965 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-eabi" + +%struct.fr = type { [6 x %struct.pl] } +%struct.obb = type { %"struct.m4", %"struct.p3" } +%struct.pl = type { %"struct.p3" } +%"struct.m4" = type { %"struct.p3", %"struct.p3", %"struct.p3", %"struct.p3" } +%"struct.p3" = type { <4 x float> } + +declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone + +define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind { +entry: + %val.i.i = load <4 x float>* undef ; <<4 x float>> [#uses=1] + %val2.i.i = load <4 x float>* null ; <<4 x float>> [#uses=1] + %elt3.i.i = getelementptr inbounds %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1] + %val4.i.i = load <4 x float>* %elt3.i.i ; <<4 x float>> [#uses=1] + %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> ; <<4 x float>> [#uses=1] + %1 = fadd <4 x float> undef, zeroinitializer ; <<4 x float>> [#uses=1] + br label %bb33 + +bb: ; preds = %bb33 + %2 = fmul <4 x float> %val.i.i, undef ; <<4 x float>> [#uses=1] + %3 = fmul <4 x float> %val2.i.i, undef ; <<4 x float>> [#uses=1] + %4 = fadd <4 x float> %3, %2 ; <<4 x float>> [#uses=1] + %5 = fmul <4 x float> %val4.i.i, undef ; <<4 x float>> [#uses=1] + %6 = fadd <4 x float> %5, %4 ; <<4 x float>> [#uses=1] + %7 = bitcast <4 x float> %6 to <4 x i32> ; <<4 x i32>> [#uses=1] + %8 = and <4 x i32> %7, ; <<4 x i32>> [#uses=1] + %9 = or <4 x i32> %8, undef ; <<4 x i32>> [#uses=1] + %10 = bitcast <4 x i32> %9 to <4 x float> ; <<4 x float>> [#uses=1] + %11 = shufflevector <4 x float> %10, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %12 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %13 = fmul <4 x float> undef, %12 ; <<4 x float>> [#uses=1] + %14 = fmul <4 x float> %0, undef ; <<4 x float>> [#uses=1] + %15 = fadd <4 x float> %14, %13 ; <<4 x float>> [#uses=1] + %16 = fadd <4 x float> undef, %15 ; <<4 x float>> [#uses=1] + %17 = fadd <4 x float> %1, %16 ; <<4 x float>> [#uses=1] + %18 = fmul <4 x float> zeroinitializer, %17 ; <<4 x float>> [#uses=1] + %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=2] + %20 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %21 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %22 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %20, <2 x float> %21) nounwind ; <<2 x float>> [#uses=2] + %23 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %22, <2 x float> %22) nounwind ; <<2 x float>> [#uses=2] + %24 = shufflevector <2 x float> %23, <2 x float> %23, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %25 = fadd <4 x float> %24, zeroinitializer ; <<4 x float>> [#uses=1] + %tmp46 = extractelement <4 x float> %25, i32 0 ; [#uses=1] + %26 = fcmp olt float %tmp46, 0.000000e+00 ; [#uses=1] + br i1 %26, label %bb41, label %bb33 + +bb33: ; preds = %bb, %entry + br i1 undef, label %bb34, label %bb + +bb34: ; preds = %bb33 + ret i8 undef + +bb41: ; preds = %bb + ret i8 1 +} diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll new file mode 100644 index 0000000000000..758b59a4638d5 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9 + +define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind { + %1 = ptrtoint i8* %pBuffer to i32 + + %lsr.iv2641 = inttoptr i32 %1 to float* + %tmp29 = add i32 %1, 4 + %tmp2930 = inttoptr i32 %tmp29 to float* + %tmp31 = add i32 %1, 8 + %tmp3132 = inttoptr i32 %tmp31 to float* + %tmp33 = add i32 %1, 12 + %tmp3334 = inttoptr i32 %tmp33 to float* + %tmp35 = add i32 %1, 16 + %tmp3536 = inttoptr i32 %tmp35 to float* + %tmp37 = add i32 %1, 20 + %tmp3738 = inttoptr i32 %tmp37 to float* + %tmp39 = add i32 %1, 24 + %tmp3940 = inttoptr i32 %tmp39 to float* + %2 = load float* %lsr.iv2641, align 4 + %3 = load float* %tmp2930, align 4 + %4 = load float* %tmp3132, align 4 + %5 = load float* %tmp3334, align 4 + %6 = load float* %tmp3536, align 4 + %7 = load float* %tmp3738, align 4 + %8 = load float* %tmp3940, align 4 + %9 = insertelement <4 x float> undef, float %6, i32 0 + %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer + %11 = insertelement <4 x float> %10, float %7, i32 1 + %12 = insertelement <4 x float> %11, float %8, i32 2 + %13 = insertelement <4 x float> undef, float %2, i32 0 + %14 = shufflevector <4 x float> %13, <4 x float> undef, <4 x i32> zeroinitializer + %15 = insertelement <4 x float> %14, float %3, i32 1 + %16 = insertelement <4 x float> %15, float %4, i32 2 + %17 = insertelement <4 x float> %16, float %5, i32 3 + %18 = fsub <4 x float> zeroinitializer, %12 + %19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32> zeroinitializer + %20 = shufflevector <4 x float> %17, <4 x float> undef, <2 x i32> + %21 = shufflevector <2 x float> %20, <2 x float> undef, <4 x i32> + + ret <4 x float> %21 +} diff --git a/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll new file mode 100644 index 0000000000000..980f8ce6fa1b2 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9 + +; PR4986 + +define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind { +entry: + br i1 undef, label %return, label %bb.preheader + +bb.preheader: ; preds = %entry + br label %bb + +bb: ; preds = %bb, %bb.preheader + %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1] + %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1] + %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1] + %4 = fmul <4 x float> undef, %3 ; <<4 x float>> [#uses=1] + %5 = extractelement <4 x float> %4, i32 3 ; [#uses=1] + store float %5, float* undef, align 4 + br i1 undef, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +define arm_aapcs_vfpcc <4 x float> @bar(i8* nocapture %pBuffer, i32 %numItems) nounwind { + %1 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %2 = insertelement <4 x float> %1, float undef, i32 1 ; <<4 x float>> [#uses=1] + %3 = insertelement <4 x float> %2, float undef, i32 2 ; <<4 x float>> [#uses=1] + %4 = insertelement <4 x float> %3, float undef, i32 3 ; <<4 x float>> [#uses=1] + %5 = shufflevector <4 x float> %4, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + ret <4 x float> %6 +} diff --git a/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll new file mode 100644 index 0000000000000..aace4751915d1 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon + +; PR5024 + +%bar = type { <4 x float> } +%foo = type { %bar, %bar, %bar, %bar } + +declare arm_aapcs_vfpcc <4 x float> @bbb(%bar*) nounwind + +define arm_aapcs_vfpcc void @aaa(%foo* noalias sret %agg.result, %foo* %tfrm) nounwind { +entry: + %0 = call arm_aapcs_vfpcc <4 x float> @bbb(%bar* undef) nounwind ; <<4 x float>> [#uses=0] + ret void +} diff --git a/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll new file mode 100644 index 0000000000000..30931a2ffb66d --- /dev/null +++ b/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon + +; PR5024 + +%bar = type { %foo, %foo } +%foo = type { <4 x float> } + +declare arm_aapcs_vfpcc float @aaa(%foo* nocapture) nounwind readonly + +declare arm_aapcs_vfpcc %bar* @bbb(%bar*, <4 x float>, <4 x float>) nounwind + +define arm_aapcs_vfpcc void @ccc(i8* nocapture %pBuffer, i32 %numItems) nounwind { +entry: + br i1 undef, label %return, label %bb.nph + +bb.nph: ; preds = %entry + %0 = call arm_aapcs_vfpcc %bar* @bbb(%bar* undef, <4 x float> undef, <4 x float> undef) nounwind ; <%bar*> [#uses=0] + %1 = call arm_aapcs_vfpcc float @aaa(%foo* undef) nounwind ; [#uses=0] + unreachable + +return: ; preds = %entry + ret void +} diff --git a/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll new file mode 100644 index 0000000000000..2ff479b217818 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon + +; PR5024 + +%struct.1 = type { %struct.4, %struct.4 } +%struct.4 = type { <4 x float> } + +define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.0, <4 x float> %legalation.0) nounwind { +entry: + %0 = call arm_aapcs_vfpcc %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0] + %1 = call arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0] + %val92 = load <4 x float>* null ; <<4 x float>> [#uses=1] + %2 = call arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0] + ret %struct.1* %this +} + +declare arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4*, float) nounwind + +declare arm_aapcs_vfpcc %struct.4* @sss1(%struct.4*, float) nounwind + +declare arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4*, <4 x float>) nounwind diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll new file mode 100644 index 0000000000000..6281775d0616f --- /dev/null +++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; pr4926 + +define arm_apcscc void @test_vget_lanep16() nounwind { +entry: + %arg0_poly16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1] + %out_poly16_t = alloca i16 ; [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] +; CHECK: fldd + %0 = load <4 x i16>* %arg0_poly16x4_t, align 8 ; <<4 x i16>> [#uses=1] + %1 = extractelement <4 x i16> %0, i32 1 ; [#uses=1] + store i16 %1, i16* %out_poly16_t, align 2 + br label %return + +return: ; preds = %entry + ret void +} diff --git a/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll new file mode 100644 index 0000000000000..ea2693ac2e408 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8 +; PR5055 + +module asm ".globl\09__aeabi_f2lz" +module asm ".set\09__aeabi_f2lz, __fixsfdi" +module asm "" + +define arm_aapcs_vfpcc i64 @__fixsfdi(float %a) nounwind { +entry: + %0 = fcmp olt float %a, 0.000000e+00 ; [#uses=1] + br i1 %0, label %bb, label %bb1 + +bb: ; preds = %entry + %1 = fsub float -0.000000e+00, %a ; [#uses=1] + %2 = tail call arm_aapcs_vfpcc i64 @__fixunssfdi(float %1) nounwind ; [#uses=1] + %3 = sub i64 0, %2 ; [#uses=1] + ret i64 %3 + +bb1: ; preds = %entry + %4 = tail call arm_aapcs_vfpcc i64 @__fixunssfdi(float %a) nounwind ; [#uses=1] + ret i64 %4 +} + +declare arm_aapcs_vfpcc i64 @__fixunssfdi(float) diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll new file mode 100644 index 0000000000000..53bd668259538 --- /dev/null +++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -mcpu=arm10tdmi | FileCheck %s +; PR4687 + +%0 = type { double, double } + +define arm_aapcscc void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind { +; CHECK: foo: +; CHECK: bl __adddf3 +; CHECK-NOT: strd +; CHECK: mov + %x76 = fmul double %y.0, 0.000000e+00 ; [#uses=1] + %x77 = fadd double %y.0, 0.000000e+00 ; [#uses=1] + %tmpr = fadd double %x.0, %x76 ; [#uses=1] + %agg.result.0 = getelementptr %0* %agg.result, i32 0, i32 0 ; [#uses=1] + store double %tmpr, double* %agg.result.0, align 8 + %agg.result.1 = getelementptr %0* %agg.result, i32 0, i32 1 ; [#uses=1] + store double %x77, double* %agg.result.1, align 8 + ret void +} diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll index a3832c0ea3dba..9ccff07d456bb 100644 --- a/test/CodeGen/ARM/addrmode.ll +++ b/test/CodeGen/ARM/addrmode.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -stats |& grep asm-printer | grep 4 +; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4 define i32 @t1(i32 %a) { %b = mul i32 %a, 9 diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll index 70b2c4d4195bf..b2c03147740ba 100644 --- a/test/CodeGen/ARM/aliases.ll +++ b/test/CodeGen/ARM/aliases.ll @@ -1,5 +1,4 @@ -; RUN: llvm-as < %s | \ -; RUN: llc -mtriple=arm-linux-gnueabi -o %t -f +; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t ; RUN: grep set %t | count 5 ; RUN: grep globl %t | count 4 ; RUN: grep weak %t | count 1 diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll index bb336ceebbabd..d73abe6a560c3 100644 --- a/test/CodeGen/ARM/align.ll +++ b/test/CodeGen/ARM/align.ll @@ -1,9 +1,9 @@ -; RUN: llvm-as < %s | llc -march=arm | grep align.*1 | count 1 -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -march=arm | grep align.*1 | count 1 +; RUN: llc < %s -mtriple=arm-linux-gnueabi | \ ; RUN: grep align.*2 | count 2 -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -mtriple=arm-linux-gnueabi | \ ; RUN: grep align.*3 | count 2 -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -mtriple=arm-apple-darwin | \ ; RUN: grep align.*2 | count 4 @a = global i1 true diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll index f7e450f593242..15cf67734cb2d 100644 --- a/test/CodeGen/ARM/alloca.ll +++ b/test/CodeGen/ARM/alloca.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \ ; RUN: grep {mov r11, sp} -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | \ ; RUN: grep {mov sp, r11} define void @f(i32 %a) { diff --git a/test/CodeGen/ARM/argaddr.ll b/test/CodeGen/ARM/argaddr.ll index 080827d7f42e0..116a32f9c74d3 100644 --- a/test/CodeGen/ARM/argaddr.ll +++ b/test/CodeGen/ARM/argaddr.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define void @f(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { entry: diff --git a/test/CodeGen/ARM/arguments-nosplit-double.ll b/test/CodeGen/ARM/arguments-nosplit-double.ll index 57ff95c0cb6d0..770e41df2c246 100644 --- a/test/CodeGen/ARM/arguments-nosplit-double.ll +++ b/test/CodeGen/ARM/arguments-nosplit-double.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | not grep r3 +; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3 ; PR4059 define i32 @f(i64 %z, i32 %a, double %b) { diff --git a/test/CodeGen/ARM/arguments-nosplit-i64.ll b/test/CodeGen/ARM/arguments-nosplit-i64.ll index 5464674dbca5f..815edfd845ad5 100644 --- a/test/CodeGen/ARM/arguments-nosplit-i64.ll +++ b/test/CodeGen/ARM/arguments-nosplit-i64.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | not grep r3 +; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3 ; PR4058 define i32 @f(i64 %z, i32 %a, i64 %b) { diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll index 833e22dc269d1..ad5b2d69fab92 100644 --- a/test/CodeGen/ARM/arguments.ll +++ b/test/CodeGen/ARM/arguments.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -mtriple=arm-linux-gnueabi | \ ; RUN: grep {mov r0, r2} | count 1 -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -mtriple=arm-apple-darwin | \ ; RUN: grep {mov r0, r1} | count 1 define i32 @f(i32 %a, i64 %b) { diff --git a/test/CodeGen/ARM/arguments2.ll b/test/CodeGen/ARM/arguments2.ll index eb7e45b4f3664..a515ad75a6694 100644 --- a/test/CodeGen/ARM/arguments2.ll +++ b/test/CodeGen/ARM/arguments2.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-linux-gnueabi +; RUN: llc < %s -mtriple=arm-apple-darwin define i32 @f(i32 %a, i128 %b) { %tmp = call i32 @g(i128 %b) diff --git a/test/CodeGen/ARM/arguments3.ll b/test/CodeGen/ARM/arguments3.ll index 97c040521d8b6..58f64c6c2f108 100644 --- a/test/CodeGen/ARM/arguments3.ll +++ b/test/CodeGen/ARM/arguments3.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-linux-gnueabi +; RUN: llc < %s -mtriple=arm-apple-darwin define i64 @f(i32 %a, i128 %b) { %tmp = call i64 @g(i128 %b) diff --git a/test/CodeGen/ARM/arguments4.ll b/test/CodeGen/ARM/arguments4.ll index 63ba64b27f1f2..f5f4207b7b372 100644 --- a/test/CodeGen/ARM/arguments4.ll +++ b/test/CodeGen/ARM/arguments4.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-linux-gnueabi +; RUN: llc < %s -mtriple=arm-apple-darwin define float @f(i32 %a, i128 %b) { %tmp = call float @g(i128 %b) diff --git a/test/CodeGen/ARM/arguments5.ll b/test/CodeGen/ARM/arguments5.ll index 2000ff7b4a857..388a8ebee670d 100644 --- a/test/CodeGen/ARM/arguments5.ll +++ b/test/CodeGen/ARM/arguments5.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-linux-gnueabi +; RUN: llc < %s -mtriple=arm-apple-darwin define double @f(i32 %a, i128 %b) { %tmp = call double @g(i128 %b) diff --git a/test/CodeGen/ARM/arguments6.ll b/test/CodeGen/ARM/arguments6.ll index a18c621d14374..3f757fee45e46 100644 --- a/test/CodeGen/ARM/arguments6.ll +++ b/test/CodeGen/ARM/arguments6.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-linux-gnueabi +; RUN: llc < %s -mtriple=arm-apple-darwin define i128 @f(i32 %a, i128 %b) { %tmp = call i128 @g(i128 %b) diff --git a/test/CodeGen/ARM/arguments7.ll b/test/CodeGen/ARM/arguments7.ll index 489ffd41604d7..038e417b333ae 100644 --- a/test/CodeGen/ARM/arguments7.ll +++ b/test/CodeGen/ARM/arguments7.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-linux-gnueabi +; RUN: llc < %s -mtriple=arm-apple-darwin define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) { %tmp = call double @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) diff --git a/test/CodeGen/ARM/arguments8.ll b/test/CodeGen/ARM/arguments8.ll index 5ff7e09548ea8..6999a4d4f6566 100644 --- a/test/CodeGen/ARM/arguments8.ll +++ b/test/CodeGen/ARM/arguments8.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-linux-gnueabi +; RUN: llc < %s -mtriple=arm-apple-darwin define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) { %tmp = call i64 @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) diff --git a/test/CodeGen/ARM/arguments_f64_backfill.ll b/test/CodeGen/ARM/arguments_f64_backfill.ll index 07d928abe81f9..690f488d8483d 100644 --- a/test/CodeGen/ARM/arguments_f64_backfill.ll +++ b/test/CodeGen/ARM/arguments_f64_backfill.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1} +; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | grep {fcpys s0, s1} define float @f(float %z, double %a, float %b) { %tmp = call float @g(float %b) diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll index b260b1312daff..2e35e3953f7e9 100644 --- a/test/CodeGen/ARM/arm-asm.ll +++ b/test/CodeGen/ARM/arm-asm.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define void @frame_dummy() { entry: diff --git a/test/CodeGen/ARM/arm-frameaddr.ll b/test/CodeGen/ARM/arm-frameaddr.ll index f1e4c2aeb7fbe..273986034c9b7 100644 --- a/test/CodeGen/ARM/arm-frameaddr.ll +++ b/test/CodeGen/ARM/arm-frameaddr.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep mov | grep r7 -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep mov | grep r11 +; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | grep r7 +; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | grep r11 ; PR4344 ; PR4416 diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll index 553c2fb646710..c4b4ec613ee55 100644 --- a/test/CodeGen/ARM/arm-negative-stride.ll +++ b/test/CodeGen/ARM/arm-negative-stride.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\} +; RUN: llc < %s -march=arm | grep {str r1, \\\[r.*, -r.*, lsl #2\} define void @test(i32* %P, i32 %A, i32 %i) nounwind { entry: diff --git a/test/CodeGen/ARM/bfc.ll b/test/CodeGen/ARM/bfc.ll new file mode 100644 index 0000000000000..53392de73fcf8 --- /dev/null +++ b/test/CodeGen/ARM/bfc.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=arm -mattr=+v6t2 | grep "bfc " | count 3 + +; 4278190095 = 0xff00000f +define i32 @f1(i32 %a) { + %tmp = and i32 %a, 4278190095 + ret i32 %tmp +} + +; 4286578688 = 0xff800000 +define i32 @f2(i32 %a) { + %tmp = and i32 %a, 4286578688 + ret i32 %tmp +} + +; 4095 = 0x00000fff +define i32 @f3(i32 %a) { + %tmp = and i32 %a, 4095 + ret i32 %tmp +} diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll index b4ea433c40cc0..b16dcc6755b1f 100644 --- a/test/CodeGen/ARM/bic.ll +++ b/test/CodeGen/ARM/bic.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2 +; RUN: llc < %s -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2 define i32 @f1(i32 %a, i32 %b) { %tmp = xor i32 %b, 4294967295 diff --git a/test/CodeGen/ARM/bits.ll b/test/CodeGen/ARM/bits.ll index 0ac4f9a3833dc..9e94efe3f9dbb 100644 --- a/test/CodeGen/ARM/bits.ll +++ b/test/CodeGen/ARM/bits.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm > %t +; RUN: llc < %s -march=arm > %t ; RUN: grep and %t | count 1 ; RUN: grep orr %t | count 1 ; RUN: grep eor %t | count 1 diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll index 437b3189141dc..0e3e070a818fa 100644 --- a/test/CodeGen/ARM/bx_fold.ll +++ b/test/CodeGen/ARM/bx_fold.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -march=arm | not grep bx +; RUN: llc < %s -march=arm +; RUN: llc < %s -march=arm | not grep bx define void @test(i32 %Ptr, i8* %L) { entry: diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll index 6b196653e05ac..52246c3f0cd77 100644 --- a/test/CodeGen/ARM/call.ll +++ b/test/CodeGen/ARM/call.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm | grep {mov lr, pc} -; RUN: llvm-as < %s | llc -march=arm -mattr=+v5t | grep blx -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi\ +; RUN: llc < %s -march=arm | grep {mov lr, pc} +; RUN: llc < %s -march=arm -mattr=+v5t | grep blx +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\ ; RUN: -relocation-model=pic | grep {PLT} @t = weak global i32 ()* null ; [#uses=1] diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll index 1af6fad099b4e..efe29d857d235 100644 --- a/test/CodeGen/ARM/call_nolink.ll +++ b/test/CodeGen/ARM/call_nolink.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: not grep {bx lr} %struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* } diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll index 3bf2dc0b4f03a..294de5ff72780 100644 --- a/test/CodeGen/ARM/carry.ll +++ b/test/CodeGen/ARM/carry.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm | grep "subs r" | count 2 -; RUN: llvm-as < %s | llc -march=arm | grep "adc r" -; RUN: llvm-as < %s | llc -march=arm | grep "sbc r" | count 2 +; RUN: llc < %s -march=arm | grep "subs r" | count 2 +; RUN: llc < %s -march=arm | grep "adc r" +; RUN: llc < %s -march=arm | grep "sbc r" | count 2 define i64 @f1(i64 %a, i64 %b) { entry: diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll index 389fb2ce1ee8f..d2235c9221cef 100644 --- a/test/CodeGen/ARM/clz.ll +++ b/test/CodeGen/ARM/clz.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v5t | grep clz +; RUN: llc < %s -march=arm -mattr=+v5t | grep clz declare i32 @llvm.ctlz.i32(i32) diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll index fcb8b179c803d..5f3ed1d2743c0 100644 --- a/test/CodeGen/ARM/compare-call.ll +++ b/test/CodeGen/ARM/compare-call.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | \ +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \ ; RUN: grep fcmpes define void @test3(float* %glob, i32 %X) { diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll index 095157b592bf7..e2d8ddc63fcf4 100644 --- a/test/CodeGen/ARM/constants.ll +++ b/test/CodeGen/ARM/constants.ll @@ -1,13 +1,13 @@ -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep {mov r0, #0} | count 1 -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep {mov r0, #255$} | count 1 -; RUN: llvm-as < %s | llc -march=arm -asm-verbose | \ +; RUN: llc < %s -march=arm -asm-verbose | \ ; RUN: grep {mov r0.*256} | count 1 -; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {orr.*256} | count 1 -; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1 -; RUN: llvm-as < %s | llc -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep {cmp r0, #1, 16} | count 1 +; RUN: llc < %s -march=arm -asm-verbose | grep {orr.*256} | count 1 +; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*-1073741761} | count 1 +; RUN: llc < %s -march=arm -asm-verbose | grep {mov r0, .*1008} | count 1 +; RUN: llc < %s -march=arm | grep {cmp r0, #1, 16} | count 1 define i32 @f1() { ret i32 0 diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll index 4f4091af4837c..0dcf9ddc0bb17 100644 --- a/test/CodeGen/ARM/cse-libcalls.ll +++ b/test/CodeGen/ARM/cse-libcalls.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep {bl.\*__ltdf} | count 1 +; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" diff --git a/test/CodeGen/ARM/ctors_dtors.ll b/test/CodeGen/ARM/ctors_dtors.ll index 5caa5b1266dac..fb94626ab7dd0 100644 --- a/test/CodeGen/ARM/ctors_dtors.ll +++ b/test/CodeGen/ARM/ctors_dtors.ll @@ -1,15 +1,15 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \ -; RUN: grep {\\.mod_init_func} -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \ -; RUN: grep {\\.mod_term_func} -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | \ -; RUN: grep {\\.section \\.ctors,"aw",.progbits} -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | \ -; RUN: grep {\\.section \\.dtors,"aw",.progbits} -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \ -; RUN: grep {\\.section \\.init_array,"aw",.init_array} -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | \ -; RUN: grep {\\.section \\.fini_array,"aw",.fini_array} +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=ELF +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI + +; DARWIN: .section __DATA,__mod_init_func,mod_init_funcs +; DARWIN: .section __DATA,__mod_term_func,mod_term_funcs + +; ELF: .section .ctors,"aw",%progbits +; ELF: .section .dtors,"aw",%progbits + +; GNUEABI: .section .init_array,"aw",%init_array +; GNUEABI: .section .fini_array,"aw",%fini_array @llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_init } ] ; <[1 x { i32, void ()* }]*> [#uses=0] @llvm.global_dtors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_fini } ] ; <[1 x { i32, void ()* }]*> [#uses=0] diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll index 1085ec7fa624e..2f724e79f104e 100644 --- a/test/CodeGen/ARM/div.ll +++ b/test/CodeGen/ARM/div.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm > %t +; RUN: llc < %s -march=arm > %t ; RUN: grep __divsi3 %t ; RUN: grep __udivsi3 %t ; RUN: grep __modsi3 %t diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll index e0cd4e15f4e3e..92e2d136af68d 100644 --- a/test/CodeGen/ARM/dyn-stackalloc.ll +++ b/test/CodeGen/ARM/dyn-stackalloc.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm %struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } diff --git a/test/CodeGen/ARM/extloadi1.ll b/test/CodeGen/ARM/extloadi1.ll index 2e9041c6ecab1..dc45ce705f444 100644 --- a/test/CodeGen/ARM/extloadi1.ll +++ b/test/CodeGen/ARM/extloadi1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm @handler_installed.6144.b = external global i1 ; [#uses=1] define void @__mf_sigusr1_respond() { diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll new file mode 100644 index 0000000000000..5690a01d750b4 --- /dev/null +++ b/test/CodeGen/ARM/fabss.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fabss\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %dum = fadd float %a, %b + %0 = tail call float @fabsf(float %dum) + %dum1 = fadd float %0, %b + ret float %dum1 +} + +declare float @fabsf(float) diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll new file mode 100644 index 0000000000000..a01f868d18b24 --- /dev/null +++ b/test/CodeGen/ARM/fadds.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fadds\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fadd float %a, %b + ret float %0 +} + diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll index 66acda9c9b913..bf7c305c89599 100644 --- a/test/CodeGen/ARM/fcopysign.ll +++ b/test/CodeGen/ARM/fcopysign.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep bic | count 2 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | \ +; RUN: llc < %s -march=arm | grep bic | count 2 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \ ; RUN: grep fneg | count 2 define float @test1(float %x, double %y) { diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll new file mode 100644 index 0000000000000..2af250d121d14 --- /dev/null +++ b/test/CodeGen/ARM/fdivs.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fdivs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fdiv float %a, %b + ret float %0 +} + diff --git a/test/CodeGen/ARM/fixunsdfdi.ll b/test/CodeGen/ARM/fixunsdfdi.ll index 777a3d69a1913..ebf1d84536e3b 100644 --- a/test/CodeGen/ARM/fixunsdfdi.ll +++ b/test/CodeGen/ARM/fixunsdfdi.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 -; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fstd +; RUN: llc < %s -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fstd define hidden i64 @__fixunsdfdi(double %x) nounwind readnone { entry: diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll new file mode 100644 index 0000000000000..1a1cd0747b498 --- /dev/null +++ b/test/CodeGen/ARM/fmacs.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmla.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %acc, float %a, float %b) { +entry: + %0 = fmul float %a, %b + %1 = fadd float %acc, %0 + ret float %1 +} + diff --git a/test/CodeGen/ARM/fmdrr-fmrrd.ll b/test/CodeGen/ARM/fmdrr-fmrrd.ll index 315e6238732fc..eb72faf8d811b 100644 --- a/test/CodeGen/ARM/fmdrr-fmrrd.ll +++ b/test/CodeGen/ARM/fmdrr-fmrrd.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fmdrr -; RUN: llvm-as < %s | llc -march=arm -mattr=vfp2 | not grep fmrrd +; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr +; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd ; naive codegen for this is: ; _i: diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll new file mode 100644 index 0000000000000..c6e6d40604028 --- /dev/null +++ b/test/CodeGen/ARM/fmscs.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmscs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %acc, float %a, float %b) { +entry: + %0 = fmul float %a, %b + %1 = fsub float %0, %acc + ret float %1 +} + diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll new file mode 100644 index 0000000000000..cb5dadeb21044 --- /dev/null +++ b/test/CodeGen/ARM/fmuls.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fmuls\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fmul float %a, %b + ret float %0 +} + diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll new file mode 100644 index 0000000000000..7da443dd93f50 --- /dev/null +++ b/test/CodeGen/ARM/fnegs.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnegs\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 + +define float @test1(float* %a) { +entry: + %0 = load float* %a, align 4 ; [#uses=2] + %1 = fsub float -0.000000e+00, %0 ; [#uses=2] + %2 = fpext float %1 to double ; [#uses=1] + %3 = fcmp olt double %2, 1.234000e+00 ; [#uses=1] + %retval = select i1 %3, float %1, float %0 ; [#uses=1] + ret float %retval +} + +define float @test2(float* %a) { +entry: + %0 = load float* %a, align 4 ; [#uses=2] + %1 = fmul float -1.000000e+00, %0 ; [#uses=2] + %2 = fpext float %1 to double ; [#uses=1] + %3 = fcmp olt double %2, 1.234000e+00 ; [#uses=1] + %retval = select i1 %3, float %1, float %0 ; [#uses=1] + ret float %retval +} diff --git a/test/CodeGen/ARM/fnmacs.ll b/test/CodeGen/ARM/fnmacs.ll new file mode 100644 index 0000000000000..e57bbbba3b384 --- /dev/null +++ b/test/CodeGen/ARM/fnmacs.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmls.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {fnmacs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %acc, float %a, float %b) { +entry: + %0 = fmul float %a, %b + %1 = fsub float %acc, %0 + ret float %1 +} + diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll new file mode 100644 index 0000000000000..3ae437d69db18 --- /dev/null +++ b/test/CodeGen/ARM/fnmscs.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s + +define float @test1(float %acc, float %a, float %b) nounwind { +; CHECK: fnmscs s2, s1, s0 +entry: + %0 = fmul float %a, %b + %1 = fsub float -0.0, %0 + %2 = fsub float %1, %acc + ret float %2 +} + +define float @test2(float %acc, float %a, float %b) nounwind { +; CHECK: fnmscs s2, s1, s0 +entry: + %0 = fmul float %a, %b + %1 = fmul float -1.0, %0 + %2 = fsub float %1, %acc + ret float %2 +} + diff --git a/test/CodeGen/ARM/fnmul.ll b/test/CodeGen/ARM/fnmul.ll index 7bbda2d76d5d2..613b347cdbf20 100644 --- a/test/CodeGen/ARM/fnmul.ll +++ b/test/CodeGen/ARM/fnmul.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | grep fnmuld -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fnmuld +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep fmul define double @t1(double %a, double %b) { diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll new file mode 100644 index 0000000000000..efd87d2dcb896 --- /dev/null +++ b/test/CodeGen/ARM/fnmuls.ll @@ -0,0 +1,23 @@ +; XFAIL: * +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s + +define float @test1(float %a, float %b) nounwind { +; CHECK: fnmscs s2, s1, s0 +entry: + %0 = fmul float %a, %b + %1 = fsub float -0.0, %0 + ret float %1 +} + +define float @test2(float %a, float %b) nounwind { +; CHECK: fnmscs s2, s1, s0 +entry: + %0 = fmul float %a, %b + %1 = fmul float -1.0, %0 + ret float %1 +} + diff --git a/test/CodeGen/ARM/formal.ll b/test/CodeGen/ARM/formal.ll index 6d6d108f32835..4ac10badea976 100644 --- a/test/CodeGen/ARM/formal.ll +++ b/test/CodeGen/ARM/formal.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 declare void @bar(i64 %x, i64 %y) diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll index ba199dbf56080..4e4ef722f97e6 100644 --- a/test/CodeGen/ARM/fp.ll +++ b/test/CodeGen/ARM/fp.ll @@ -1,55 +1,71 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t -; RUN: grep fmsr %t | count 4 -; RUN: grep fsitos %t -; RUN: grep fmrs %t | count 2 -; RUN: grep fsitod %t -; RUN: grep fmrrd %t | count 3 -; RUN: not grep fmdrr %t -; RUN: grep fldd %t -; RUN: grep fuitod %t -; RUN: grep fuitos %t -; RUN: grep 1065353216 %t +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s define float @f(i32 %a) { +;CHECK: f: +;CHECK: fmsr +;CHECK-NEXT: fsitos +;CHECK-NEXT: fmrs entry: %tmp = sitofp i32 %a to float ; [#uses=1] ret float %tmp } define double @g(i32 %a) { +;CHECK: g: +;CHECK: fmsr +;CHECK-NEXT: fsitod +;CHECK-NEXT: fmrrd entry: %tmp = sitofp i32 %a to double ; [#uses=1] ret double %tmp } define double @uint_to_double(i32 %a) { +;CHECK: uint_to_double: +;CHECK: fmsr +;CHECK-NEXT: fuitod +;CHECK-NEXT: fmrrd entry: %tmp = uitofp i32 %a to double ; [#uses=1] ret double %tmp } define float @uint_to_float(i32 %a) { +;CHECK: uint_to_float: +;CHECK: fmsr +;CHECK-NEXT: fuitos +;CHECK-NEXT: fmrs entry: %tmp = uitofp i32 %a to float ; [#uses=1] ret float %tmp } define double @h(double* %v) { +;CHECK: h: +;CHECK: fldd +;CHECK-NEXT: fmrrd entry: %tmp = load double* %v ; [#uses=1] ret double %tmp } define float @h2() { +;CHECK: h2: +;CHECK: 1065353216 entry: ret float 1.000000e+00 } define double @f2(double %a) { +;CHECK: f2: +;CHECK-NOT: fmdrr ret double %a } define void @f3() { +;CHECK: f3: +;CHECK-NOT: fmdrr +;CHECK: f4 entry: %tmp = call double @f5( ) ; [#uses=1] call void @f4( double %tmp ) diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll new file mode 100644 index 0000000000000..9ce2ac549b571 --- /dev/null +++ b/test/CodeGen/ARM/fp_convert.ll @@ -0,0 +1,49 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2 + +define i32 @test1(float %a, float %b) { +; VFP2: test1: +; VFP2: ftosizs s0, s0 +; NEON: test1: +; NEON: vcvt.s32.f32 d0, d0 +entry: + %0 = fadd float %a, %b + %1 = fptosi float %0 to i32 + ret i32 %1 +} + +define i32 @test2(float %a, float %b) { +; VFP2: test2: +; VFP2: ftouizs s0, s0 +; NEON: test2: +; NEON: vcvt.u32.f32 d0, d0 +entry: + %0 = fadd float %a, %b + %1 = fptoui float %0 to i32 + ret i32 %1 +} + +define float @test3(i32 %a, i32 %b) { +; VFP2: test3: +; VFP2: fuitos s0, s0 +; NEON: test3: +; NEON: vcvt.f32.u32 d0, d0 +entry: + %0 = add i32 %a, %b + %1 = uitofp i32 %0 to float + ret float %1 +} + +define float @test4(i32 %a, i32 %b) { +; VFP2: test4: +; VFP2: fsitos s0, s0 +; NEON: test4: +; NEON: vcvt.f32.s32 d0, d0 +entry: + %0 = add i32 %a, %b + %1 = sitofp i32 %0 to float + ret float %1 +} diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll index 568a6c41a0dde..ebeeb184121b9 100644 --- a/test/CodeGen/ARM/fparith.ll +++ b/test/CodeGen/ARM/fparith.ll @@ -1,74 +1,88 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t -; RUN: grep fadds %t -; RUN: grep faddd %t -; RUN: grep fmuls %t -; RUN: grep fmuld %t -; RUN: grep eor %t -; RUN: grep fnegd %t -; RUN: grep fdivs %t -; RUN: grep fdivd %t +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s define float @f1(float %a, float %b) { +;CHECK: f1: +;CHECK: fadds entry: %tmp = fadd float %a, %b ; [#uses=1] ret float %tmp } define double @f2(double %a, double %b) { +;CHECK: f2: +;CHECK: faddd entry: %tmp = fadd double %a, %b ; [#uses=1] ret double %tmp } define float @f3(float %a, float %b) { +;CHECK: f3: +;CHECK: fmuls entry: %tmp = fmul float %a, %b ; [#uses=1] ret float %tmp } define double @f4(double %a, double %b) { +;CHECK: f4: +;CHECK: fmuld entry: %tmp = fmul double %a, %b ; [#uses=1] ret double %tmp } define float @f5(float %a, float %b) { +;CHECK: f5: +;CHECK: fsubs entry: %tmp = fsub float %a, %b ; [#uses=1] ret float %tmp } define double @f6(double %a, double %b) { +;CHECK: f6: +;CHECK: fsubd entry: %tmp = fsub double %a, %b ; [#uses=1] ret double %tmp } define float @f7(float %a) { +;CHECK: f7: +;CHECK: eor entry: %tmp1 = fsub float -0.000000e+00, %a ; [#uses=1] ret float %tmp1 } define double @f8(double %a) { +;CHECK: f8: +;CHECK: fnegd entry: %tmp1 = fsub double -0.000000e+00, %a ; [#uses=1] ret double %tmp1 } define float @f9(float %a, float %b) { +;CHECK: f9: +;CHECK: fdivs entry: %tmp1 = fdiv float %a, %b ; [#uses=1] ret float %tmp1 } define double @f10(double %a, double %b) { +;CHECK: f10: +;CHECK: fdivd entry: %tmp1 = fdiv double %a, %b ; [#uses=1] ret double %tmp1 } define float @f11(float %a) { +;CHECK: f11: +;CHECK: bic entry: %tmp1 = call float @fabsf( float %a ) ; [#uses=1] ret float %tmp1 @@ -77,6 +91,8 @@ entry: declare float @fabsf(float) define double @f12(double %a) { +;CHECK: f12: +;CHECK: fabsd entry: %tmp1 = call double @fabs( double %a ) ; [#uses=1] ret double %tmp1 diff --git a/test/CodeGen/ARM/fpcmp.ll b/test/CodeGen/ARM/fpcmp.ll index ce0f4029589d5..2c9591ca5429c 100644 --- a/test/CodeGen/ARM/fpcmp.ll +++ b/test/CodeGen/ARM/fpcmp.ll @@ -1,13 +1,9 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t -; RUN: grep movmi %t -; RUN: grep moveq %t -; RUN: grep movgt %t -; RUN: grep movge %t -; RUN: grep movne %t -; RUN: grep fcmped %t | count 1 -; RUN: grep fcmpes %t | count 6 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s define i32 @f1(float %a) { +;CHECK: f1: +;CHECK: fcmpes +;CHECK: movmi entry: %tmp = fcmp olt float %a, 1.000000e+00 ; [#uses=1] %tmp1 = zext i1 %tmp to i32 ; [#uses=1] @@ -15,6 +11,9 @@ entry: } define i32 @f2(float %a) { +;CHECK: f2: +;CHECK: fcmpes +;CHECK: moveq entry: %tmp = fcmp oeq float %a, 1.000000e+00 ; [#uses=1] %tmp2 = zext i1 %tmp to i32 ; [#uses=1] @@ -22,6 +21,9 @@ entry: } define i32 @f3(float %a) { +;CHECK: f3: +;CHECK: fcmpes +;CHECK: movgt entry: %tmp = fcmp ogt float %a, 1.000000e+00 ; [#uses=1] %tmp3 = zext i1 %tmp to i32 ; [#uses=1] @@ -29,6 +31,9 @@ entry: } define i32 @f4(float %a) { +;CHECK: f4: +;CHECK: fcmpes +;CHECK: movge entry: %tmp = fcmp oge float %a, 1.000000e+00 ; [#uses=1] %tmp4 = zext i1 %tmp to i32 ; [#uses=1] @@ -36,6 +41,9 @@ entry: } define i32 @f5(float %a) { +;CHECK: f5: +;CHECK: fcmpes +;CHECK: movls entry: %tmp = fcmp ole float %a, 1.000000e+00 ; [#uses=1] %tmp5 = zext i1 %tmp to i32 ; [#uses=1] @@ -43,6 +51,9 @@ entry: } define i32 @f6(float %a) { +;CHECK: f6: +;CHECK: fcmpes +;CHECK: movne entry: %tmp = fcmp une float %a, 1.000000e+00 ; [#uses=1] %tmp6 = zext i1 %tmp to i32 ; [#uses=1] @@ -50,6 +61,9 @@ entry: } define i32 @g1(double %a) { +;CHECK: g1: +;CHECK: fcmped +;CHECK: movmi entry: %tmp = fcmp olt double %a, 1.000000e+00 ; [#uses=1] %tmp7 = zext i1 %tmp to i32 ; [#uses=1] diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll index 3e749afb400cb..67f70e9eb5ed2 100644 --- a/test/CodeGen/ARM/fpcmp_ueq.ll +++ b/test/CodeGen/ARM/fpcmp_ueq.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep moveq -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | grep movvs +; RUN: llc < %s -march=arm | grep moveq +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep movvs define i32 @f7(float %a, float %b) { entry: diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll index 218b25f9c1b1b..ee3c338e3b301 100644 --- a/test/CodeGen/ARM/fpconv.ll +++ b/test/CodeGen/ARM/fpconv.ll @@ -1,81 +1,101 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 > %t -; RUN: grep fcvtsd %t -; RUN: grep fcvtds %t -; RUN: grep ftosizs %t -; RUN: grep ftouizs %t -; RUN: grep ftosizd %t -; RUN: grep ftouizd %t -; RUN: grep fsitos %t -; RUN: grep fsitod %t -; RUN: grep fuitos %t -; RUN: grep fuitod %t -; RUN: llvm-as < %s | llc -march=arm > %t -; RUN: grep truncdfsf2 %t -; RUN: grep extendsfdf2 %t -; RUN: grep fixsfsi %t -; RUN: grep fixunssfsi %t -; RUN: grep fixdfsi %t -; RUN: grep fixunsdfsi %t -; RUN: grep floatsisf %t -; RUN: grep floatsidf %t -; RUN: grep floatunsisf %t -; RUN: grep floatunsidf %t +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP +; RUN: llc < %s -march=arm | FileCheck %s define float @f1(double %x) { +;CHECK-VFP: f1: +;CHECK-VFP: fcvtsd +;CHECK: f1: +;CHECK: truncdfsf2 entry: %tmp1 = fptrunc double %x to float ; [#uses=1] ret float %tmp1 } define double @f2(float %x) { +;CHECK-VFP: f2: +;CHECK-VFP: fcvtds +;CHECK: f2: +;CHECK: extendsfdf2 entry: %tmp1 = fpext float %x to double ; [#uses=1] ret double %tmp1 } define i32 @f3(float %x) { +;CHECK-VFP: f3: +;CHECK-VFP: ftosizs +;CHECK: f3: +;CHECK: fixsfsi entry: %tmp = fptosi float %x to i32 ; [#uses=1] ret i32 %tmp } define i32 @f4(float %x) { +;CHECK-VFP: f4: +;CHECK-VFP: ftouizs +;CHECK: f4: +;CHECK: fixunssfsi entry: %tmp = fptoui float %x to i32 ; [#uses=1] ret i32 %tmp } define i32 @f5(double %x) { +;CHECK-VFP: f5: +;CHECK-VFP: ftosizd +;CHECK: f5: +;CHECK: fixdfsi entry: %tmp = fptosi double %x to i32 ; [#uses=1] ret i32 %tmp } define i32 @f6(double %x) { +;CHECK-VFP: f6: +;CHECK-VFP: ftouizd +;CHECK: f6: +;CHECK: fixunsdfsi entry: %tmp = fptoui double %x to i32 ; [#uses=1] ret i32 %tmp } define float @f7(i32 %a) { +;CHECK-VFP: f7: +;CHECK-VFP: fsitos +;CHECK: f7: +;CHECK: floatsisf entry: %tmp = sitofp i32 %a to float ; [#uses=1] ret float %tmp } define double @f8(i32 %a) { +;CHECK-VFP: f8: +;CHECK-VFP: fsitod +;CHECK: f8: +;CHECK: floatsidf entry: %tmp = sitofp i32 %a to double ; [#uses=1] ret double %tmp } define float @f9(i32 %a) { +;CHECK-VFP: f9: +;CHECK-VFP: fuitos +;CHECK: f9: +;CHECK: floatunsisf entry: %tmp = uitofp i32 %a to float ; [#uses=1] ret float %tmp } define double @f10(i32 %a) { +;CHECK-VFP: f10: +;CHECK-VFP: fuitod +;CHECK: f10: +;CHECK: floatunsidf entry: %tmp = uitofp i32 %a to double ; [#uses=1] ret double %tmp diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll index 13653bbe6aa02..fa897bf83f3a4 100644 --- a/test/CodeGen/ARM/fpmem.ll +++ b/test/CodeGen/ARM/fpmem.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep {mov r0, #0} | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ +; RUN: llc < %s -march=arm -mattr=+vfp2 | \ ; RUN: grep {flds.*\\\[} | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ +; RUN: llc < %s -march=arm -mattr=+vfp2 | \ ; RUN: grep {fsts.*\\\[} | count 1 define float @f1(float %a) { diff --git a/test/CodeGen/ARM/fpow.ll b/test/CodeGen/ARM/fpow.ll index 461a2c966ec49..6d487927ee616 100644 --- a/test/CodeGen/ARM/fpow.ll +++ b/test/CodeGen/ARM/fpow.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define double @t(double %x, double %y) nounwind optsize { entry: diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll index ab09ffff6b36a..174106bf4fafb 100644 --- a/test/CodeGen/ARM/fpowi.ll +++ b/test/CodeGen/ARM/fpowi.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep powidf2 +; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep powidf2 ; PR1287 ; ModuleID = '' diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll index 41168acc42a55..0d270b0c0568b 100644 --- a/test/CodeGen/ARM/fptoint.ll +++ b/test/CodeGen/ARM/fptoint.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6,+vfp2 | not grep fmrrd +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep fmrs | count 1 +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd @i = weak global i32 0 ; [#uses=2] @u = weak global i32 0 ; [#uses=2] diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll new file mode 100644 index 0000000000000..060dd464f1b8d --- /dev/null +++ b/test/CodeGen/ARM/fsubs.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {fsubs\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 + +define float @test(float %a, float %b) { +entry: + %0 = fsub float %a, %b + ret float %0 +} + diff --git a/test/CodeGen/ARM/hardfloat_neon.ll b/test/CodeGen/ARM/hardfloat_neon.ll new file mode 100644 index 0000000000000..4abf04b0a4b6c --- /dev/null +++ b/test/CodeGen/ARM/hardfloat_neon.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+neon -float-abi=hard + +define <16 x i8> @vmulQi8_reg(<16 x i8> %A, <16 x i8> %B) nounwind { + %tmp1 = mul <16 x i8> %A, %B + ret <16 x i8> %tmp1 +} + +define <16 x i8> @f(<16 x i8> %a, <16 x i8> %b) { + %tmp = call <16 x i8> @g(<16 x i8> %b) + ret <16 x i8> %tmp +} + +declare <16 x i8> @g(<16 x i8>) diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll index 16231da39b7cf..ccdc7bf4c1408 100644 --- a/test/CodeGen/ARM/hello.ll +++ b/test/CodeGen/ARM/hello.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi | grep mov | count 1 -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu --disable-fp-elim | \ +; RUN: llc < %s -march=arm +; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1 +; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \ ; RUN: grep mov | count 3 -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep mov | count 2 +; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2 @str = internal constant [12 x i8] c"Hello World\00" diff --git a/test/CodeGen/ARM/hidden-vis-2.ll b/test/CodeGen/ARM/hidden-vis-2.ll index 6cf69aa486d5c..90f5308d5ff01 100644 --- a/test/CodeGen/ARM/hidden-vis-2.ll +++ b/test/CodeGen/ARM/hidden-vis-2.ll @@ -1,9 +1,12 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldr | count 2 +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s @x = weak hidden global i32 0 ; [#uses=1] define i32 @t() nounwind readonly { entry: +; CHECK: t: +; CHECK: ldr +; CHECK-NEXT: ldr %0 = load i32* @x, align 4 ; [#uses=1] ret i32 %0 } diff --git a/test/CodeGen/ARM/hidden-vis-3.ll b/test/CodeGen/ARM/hidden-vis-3.ll index 4477f2a441a15..3bd710ae949fa 100644 --- a/test/CodeGen/ARM/hidden-vis-3.ll +++ b/test/CodeGen/ARM/hidden-vis-3.ll @@ -1,12 +1,15 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldr | count 6 -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep non_lazy_ptr -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep long | count 4 +; RUN: llc < %s -mtriple=arm-apple-darwin9 | FileCheck %s @x = external hidden global i32 ; [#uses=1] @y = extern_weak hidden global i32 ; [#uses=1] define i32 @t() nounwind readonly { entry: +; CHECK: LCPI1_0: +; CHECK-NEXT: .long _x +; CHECK: LCPI1_1: +; CHECK-NEXT: .long _y + %0 = load i32* @x, align 4 ; [#uses=1] %1 = load i32* @y, align 4 ; [#uses=1] %2 = add i32 %1, %0 ; [#uses=1] diff --git a/test/CodeGen/ARM/hidden-vis.ll b/test/CodeGen/ARM/hidden-vis.ll index 93f81ecdae053..3544ae81a0a45 100644 --- a/test/CodeGen/ARM/hidden-vis.ll +++ b/test/CodeGen/ARM/hidden-vis.ll @@ -1,18 +1,23 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \ -; RUN: grep .private_extern | count 2 +; RUN: llc < %s -mtriple=arm-linux | FileCheck %s -check-prefix=LINUX +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN -%struct.Person = type { i32 } @a = hidden global i32 0 @b = external global i32 +define weak hidden void @t1() nounwind { +; LINUX: .hidden t1 +; LINUX: t1: -define weak hidden void @_ZN6Person13privateMethodEv(%struct.Person* %this) { +; DARWIN: .private_extern _t1 +; DARWIN: t1: ret void } -declare void @function(i32) +define weak void @t2() nounwind { +; LINUX: t2: +; LINUX: .hidden a -define weak void @_ZN6PersonC1Ei(%struct.Person* %this, i32 %_c) { +; DARWIN: t2: +; DARWIN: .private_extern _a ret void } - diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll index ede6d74553309..1054f27dbe302 100644 --- a/test/CodeGen/ARM/iabs.ll +++ b/test/CodeGen/ARM/iabs.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -stats |& \ +; RUN: llc < %s -march=arm -stats |& \ ; RUN: grep {3 .*Number of machine instrs printed} ;; Integer absolute value, should produce something as good as: ARM: diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll index 7d429550b3add..e6aa044564a2b 100644 --- a/test/CodeGen/ARM/ifcvt1.ll +++ b/test/CodeGen/ARM/ifcvt1.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -march=arm | grep bx | count 1 +; RUN: llc < %s -march=arm +; RUN: llc < %s -march=arm | grep bx | count 1 define i32 @t1(i32 %a, i32 %b) { %tmp2 = icmp eq i32 %a, 0 diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll index 3942061212182..ce57d736c1677 100644 --- a/test/CodeGen/ARM/ifcvt2.ll +++ b/test/CodeGen/ARM/ifcvt2.ll @@ -1,7 +1,7 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -march=arm | grep bxlt | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep bxgt | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep bxge | count 1 +; RUN: llc < %s -march=arm +; RUN: llc < %s -march=arm | grep bxlt | count 1 +; RUN: llc < %s -march=arm | grep bxgt | count 1 +; RUN: llc < %s -march=arm | grep bxge | count 1 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { %tmp2 = icmp sgt i32 %c, 10 diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll index 620bcbea1f27e..f7ebac6f2bac9 100644 --- a/test/CodeGen/ARM/ifcvt3.ll +++ b/test/CodeGen/ARM/ifcvt3.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -march=arm | grep cmpne | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep bx | count 2 +; RUN: llc < %s -march=arm +; RUN: llc < %s -march=arm | grep cmpne | count 1 +; RUN: llc < %s -march=arm | grep bx | count 2 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { switch i32 %c, label %cond_next [ diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll index ce5a679196c59..f28c61b9787ff 100644 --- a/test/CodeGen/ARM/ifcvt4.ll +++ b/test/CodeGen/ARM/ifcvt4.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -march=arm | grep subgt | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep suble | count 1 +; RUN: llc < %s -march=arm +; RUN: llc < %s -march=arm | grep subgt | count 1 +; RUN: llc < %s -march=arm | grep suble | count 1 ; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt. define i32 @t(i32 %a, i32 %b) { diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll index f8d4f82bbe28b..e9145ac36ddfb 100644 --- a/test/CodeGen/ARM/ifcvt5.ll +++ b/test/CodeGen/ARM/ifcvt5.ll @@ -1,5 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -march=arm | grep blge | count 1 +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s @x = external global i32* ; [#uses=1] @@ -11,6 +10,8 @@ entry: } define void @t1(i32 %a, i32 %b) { +; CHECK: t1: +; CHECK: ldmltfd sp!, {r7, pc} entry: %tmp1 = icmp sgt i32 %a, 10 ; [#uses=1] br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll index 63c4a0819dbf3..58241157580cc 100644 --- a/test/CodeGen/ARM/ifcvt6.ll +++ b/test/CodeGen/ARM/ifcvt6.ll @@ -1,10 +1,6 @@ -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ ; RUN: grep cmpne | count 1 -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ ; RUN: grep ldmhi | count 1 define void @foo(i32 %X, i32 %Y) { diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll index 6bb4b5609a580..f9cf88f7292e5 100644 --- a/test/CodeGen/ARM/ifcvt7.ll +++ b/test/CodeGen/ARM/ifcvt7.ll @@ -1,13 +1,8 @@ -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ ; RUN: grep cmpeq | count 1 -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ ; RUN: grep moveq | count 1 -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ ; RUN: grep ldmeq | count 1 ; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1. diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll index 85bd8c7bf1fc8..6cb8e7bb69fd6 100644 --- a/test/CodeGen/ARM/ifcvt8.ll +++ b/test/CodeGen/ARM/ifcvt8.ll @@ -1,7 +1,4 @@ -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | \ ; RUN: grep ldmne | count 1 %struct.SString = type { i8*, i32, i32 } diff --git a/test/CodeGen/ARM/ifcvt9.ll b/test/CodeGen/ARM/ifcvt9.ll index bbd2f2ed62139..05bdc459c83fe 100644 --- a/test/CodeGen/ARM/ifcvt9.ll +++ b/test/CodeGen/ARM/ifcvt9.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define fastcc void @t() nounwind { entry: diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll index ad24eb5dad716..febe6f56b66cd 100644 --- a/test/CodeGen/ARM/illegal-vector-bitcast.ll +++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll @@ -1,4 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm +; RUN: llc < %s -mtriple=arm-linux define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y) { diff --git a/test/CodeGen/ARM/imm.ll b/test/CodeGen/ARM/imm.ll index 998adbae5c94f..6f25f9dcb323d 100644 --- a/test/CodeGen/ARM/imm.ll +++ b/test/CodeGen/ARM/imm.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | not grep CPI +; RUN: llc < %s -march=arm | not grep CPI define i32 @test1(i32 %A) { %B = add i32 %A, -268435441 ; [#uses=1] diff --git a/test/CodeGen/ARM/inlineasm-imm-arm.ll b/test/CodeGen/ARM/inlineasm-imm-arm.ll index 2ceceae0d9d17..45dfcf0b82a5a 100644 --- a/test/CodeGen/ARM/inlineasm-imm-arm.ll +++ b/test/CodeGen/ARM/inlineasm-imm-arm.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm ; Test ARM-mode "I" constraint, for any Data Processing immediate. define i32 @testI(i32 %x) { diff --git a/test/CodeGen/ARM/inlineasm.ll b/test/CodeGen/ARM/inlineasm.ll index 2f7332a5f4805..d522348ba9993 100644 --- a/test/CodeGen/ARM/inlineasm.ll +++ b/test/CodeGen/ARM/inlineasm.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 +; RUN: llc < %s -march=arm -mattr=+v6 define i32 @test1(i32 %tmp54) { %tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 ) ; [#uses=1] diff --git a/test/CodeGen/ARM/inlineasm2.ll b/test/CodeGen/ARM/inlineasm2.ll index 69394eb5bd494..a99bccf5a6541 100644 --- a/test/CodeGen/ARM/inlineasm2.ll +++ b/test/CodeGen/ARM/inlineasm2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define double @__ieee754_sqrt(double %x) { %tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x ) diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll index f20344301e997..59f0d538d47cc 100644 --- a/test/CodeGen/ARM/insn-sched1.ll +++ b/test/CodeGen/ARM/insn-sched1.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6 |\ +; RUN: llc < %s -march=arm -mattr=+v6 +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\ ; RUN: grep mov | count 3 define i32 @test(i32 %x) { diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll index 7e8eb42b690fe..5116ac82862a4 100644 --- a/test/CodeGen/ARM/ispositive.ll +++ b/test/CodeGen/ARM/ispositive.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep {mov r0, r0, lsr #31} +; RUN: llc < %s -march=arm | grep {mov r0, r0, lsr #31} define i32 @test1(i32 %X) { entry: diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll index b1738a4a38a69..ddf0f0ec7cc0f 100644 --- a/test/CodeGen/ARM/large-stack.ll +++ b/test/CodeGen/ARM/large-stack.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define void @test1() { %tmp = alloca [ 64 x i32 ] , align 4 diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll index 6a054577fc8bf..774b3c09bed42 100644 --- a/test/CodeGen/ARM/ldm.ll +++ b/test/CodeGen/ARM/ldm.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep ldmia | count 2 -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep ldmib | count 1 -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | \ +; RUN: llc < %s -mtriple=arm-apple-darwin | \ ; RUN: grep {ldmfd sp\!} | count 3 @X = external global [0 x i32] ; <[0 x i32]*> [#uses=5] diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll index ea99655723190..954fb5b8ad311 100644 --- a/test/CodeGen/ARM/ldr.ll +++ b/test/CodeGen/ARM/ldr.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm | grep {ldr r0} | count 7 -; RUN: llvm-as < %s | llc -march=arm | grep mov | grep 1 -; RUN: llvm-as < %s | llc -march=arm | not grep mvn -; RUN: llvm-as < %s | llc -march=arm | grep ldr | grep lsl -; RUN: llvm-as < %s | llc -march=arm | grep ldr | grep lsr +; RUN: llc < %s -march=arm | grep {ldr r0} | count 7 +; RUN: llc < %s -march=arm | grep mov | grep 1 +; RUN: llc < %s -march=arm | not grep mvn +; RUN: llc < %s -march=arm | grep ldr | grep lsl +; RUN: llc < %s -march=arm | grep ldr | grep lsr define i32 @f1(i32* %v) { entry: diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll index b99c72197740e..d29eb022bacee 100644 --- a/test/CodeGen/ARM/ldr_ext.ll +++ b/test/CodeGen/ARM/ldr_ext.ll @@ -1,27 +1,36 @@ -; RUN: llvm-as < %s | llc -march=arm | grep ldrb | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep ldrh | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep ldrsb | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep ldrsh | count 1 +; RUN: llc < %s -march=arm | FileCheck %s -define i32 @test1(i8* %v.pntr.s0.u1) { - %tmp.u = load i8* %v.pntr.s0.u1 +define i32 @test1(i8* %t1) nounwind { +; CHECK: ldrb + %tmp.u = load i8* %t1 %tmp1.s = zext i8 %tmp.u to i32 ret i32 %tmp1.s } -define i32 @test2(i16* %v.pntr.s0.u1) { - %tmp.u = load i16* %v.pntr.s0.u1 +define i32 @test2(i16* %t1) nounwind { +; CHECK: ldrh + %tmp.u = load i16* %t1 %tmp1.s = zext i16 %tmp.u to i32 ret i32 %tmp1.s } -define i32 @test3(i8* %v.pntr.s1.u0) { - %tmp.s = load i8* %v.pntr.s1.u0 +define i32 @test3(i8* %t0) nounwind { +; CHECK: ldrsb + %tmp.s = load i8* %t0 %tmp1.s = sext i8 %tmp.s to i32 ret i32 %tmp1.s } -define i32 @test4() { +define i32 @test4(i16* %t0) nounwind { +; CHECK: ldrsh + %tmp.s = load i16* %t0 + %tmp1.s = sext i16 %tmp.s to i32 + ret i32 %tmp1.s +} + +define i32 @test5() nounwind { +; CHECK: mov r0, #0 +; CHECK: ldrsh %tmp.s = load i16* null %tmp1.s = sext i16 %tmp.s to i32 ret i32 %tmp1.s diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll index 44315066c4c00..a3abdb603fa79 100644 --- a/test/CodeGen/ARM/ldr_frame.ll +++ b/test/CodeGen/ARM/ldr_frame.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | not grep mov +; RUN: llc < %s -march=arm | not grep mov define i32 @f1() { %buf = alloca [32 x i32], align 4 diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll index 0491563fc6a69..97a48e1377e55 100644 --- a/test/CodeGen/ARM/ldr_post.ll +++ b/test/CodeGen/ARM/ldr_post.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep {ldr.*\\\[.*\],} | count 1 define i32 @test(i32 %a, i32 %b, i32 %c) { diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll index 7e447422361ea..7c442845682ec 100644 --- a/test/CodeGen/ARM/ldr_pre.ll +++ b/test/CodeGen/ARM/ldr_pre.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep {ldr.*\\!} | count 2 define i32* @test1(i32* %X, i32* %dest) { diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index f1bee058a0fc7..8f7ae55c6eaf1 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -1,12 +1,20 @@ -; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | grep ldrd -; RUN: llvm-as < %s | llc -mtriple=armv5-apple-darwin | not grep ldrd -; RUN: llvm-as < %s | llc -mtriple=armv6-eabi | not grep ldrd +; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6 +; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5 +; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI ; rdar://r6949835 @b = external global i64* define i64 @t(i64 %a) nounwind readonly { entry: +;V6: ldrd r2, [r2] + +;V5: ldr r3, [r2] +;V5-NEXT: ldr r2, [r2, #+4] + +;EABI: ldr r3, [r2] +;EABI-NEXT: ldr r2, [r2, #+4] + %0 = load i64** @b, align 4 %1 = load i64* %0, align 4 %2 = mul i64 %1, %a diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll index 8896ead5a51cd..56a4a477f510c 100644 --- a/test/CodeGen/ARM/load-global.ll +++ b/test/CodeGen/ARM/load-global.ll @@ -1,14 +1,10 @@ -; RUN: llvm-as < %s | \ -; RUN: llc -mtriple=arm-apple-darwin -relocation-model=static | \ +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | \ ; RUN: not grep {L_G\$non_lazy_ptr} -; RUN: llvm-as < %s | \ -; RUN: llc -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \ +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \ ; RUN: grep {L_G\$non_lazy_ptr} | count 2 -; RUN: llvm-as < %s | \ -; RUN: llc -mtriple=arm-apple-darwin -relocation-model=pic | \ +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \ ; RUN: grep {ldr.*pc} | count 1 -; RUN: llvm-as < %s | \ -; RUN: llc -mtriple=arm-linux-gnueabi -relocation-model=pic | \ +; RUN: llc < %s -mtriple=arm-linux-gnueabi -relocation-model=pic | \ ; RUN: grep {GOT} | count 1 @G = external global i32 diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll index 05097328102c1..253b0e145f811 100644 --- a/test/CodeGen/ARM/load.ll +++ b/test/CodeGen/ARM/load.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm > %t +; RUN: llc < %s -march=arm > %t ; RUN: grep ldrsb %t ; RUN: grep ldrb %t ; RUN: grep ldrsh %t diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll index 4bab330c73604..c76a5e4d4d1f8 100644 --- a/test/CodeGen/ARM/long-setcc.ll +++ b/test/CodeGen/ARM/long-setcc.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep cmp | count 1 +; RUN: llc < %s -march=arm | grep cmp | count 1 define i1 @t1(i64 %x) { diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll index fe0ee5473305b..2fcaac0d9c982 100644 --- a/test/CodeGen/ARM/long.ll +++ b/test/CodeGen/ARM/long.ll @@ -1,13 +1,13 @@ -; RUN: llvm-as < %s | llc -march=arm -asm-verbose | \ +; RUN: llc < %s -march=arm -asm-verbose | \ ; RUN: grep -- {-2147483648} | count 3 -; RUN: llvm-as < %s | llc -march=arm | grep mvn | count 3 -; RUN: llvm-as < %s | llc -march=arm | grep adds | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep adc | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep {subs } | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep sbc | count 1 -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | grep mvn | count 3 +; RUN: llc < %s -march=arm | grep adds | count 1 +; RUN: llc < %s -march=arm | grep adc | count 1 +; RUN: llc < %s -march=arm | grep {subs } | count 1 +; RUN: llc < %s -march=arm | grep sbc | count 1 +; RUN: llc < %s -march=arm | \ ; RUN: grep smull | count 1 -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep umull | count 1 define i64 @f1() { diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 55d0cdc54151c..057b5f067f803 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm > %t +; RUN: llc < %s -march=arm > %t ; RUN: grep rrx %t | count 1 ; RUN: grep __ashldi3 %t ; RUN: grep __ashrdi3 %t diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll index 3881e91453b47..507ec2c7bd3e2 100644 --- a/test/CodeGen/ARM/lsr-code-insertion.ll +++ b/test/CodeGen/ARM/lsr-code-insertion.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -stats |& grep {39.*Number of machine instrs printed} -; RUN: llvm-as < %s | llc -stats |& grep {.*Number of re-materialization} +; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed} +; RUN: llc < %s -stats |& grep {.*Number of re-materialization} ; This test really wants to check that the resultant "cond_true" block only ; has a single store in it, and that cond_true55 only has code to materialize ; the constant and do a store. We do *not* want something like this: diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll index 02902f2debd39..8130019cbfd95 100644 --- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll +++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep lsl | grep -F {lsl #2\]} +; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]} ; Should use scaled addressing mode. define void @sintzero(i32* %a) nounwind { diff --git a/test/CodeGen/ARM/mem.ll b/test/CodeGen/ARM/mem.ll index e98316576d8a1..f46c7a5857ab9 100644 --- a/test/CodeGen/ARM/mem.ll +++ b/test/CodeGen/ARM/mem.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep strb -; RUN: llvm-as < %s | llc -march=arm | grep strh +; RUN: llc < %s -march=arm | grep strb +; RUN: llc < %s -march=arm | grep strh define void @f1() { entry: diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index 4bf0b4f6f3b19..ed20c32dc0d59 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -1,7 +1,7 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldmia -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep stmia -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrb -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin | grep ldrh +; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia +; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia +; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb +; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } @src = external global %struct.x diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index 0b58bf680157a..41d5944cb83e5 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define void @f() { entry: diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll new file mode 100644 index 0000000000000..85407fa254b08 --- /dev/null +++ b/test/CodeGen/ARM/mls.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=arm -mattr=+v6t2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1 + +define i32 @f1(i32 %a, i32 %b, i32 %c) { + %tmp1 = mul i32 %a, %b + %tmp2 = sub i32 %c, %tmp1 + ret i32 %tmp2 +} + +; sub doesn't commute, so no mls for this one +define i32 @f2(i32 %a, i32 %b, i32 %c) { + %tmp1 = mul i32 %a, %b + %tmp2 = sub i32 %tmp1, %c + ret i32 %tmp2 +} diff --git a/test/CodeGen/ARM/mul.ll b/test/CodeGen/ARM/mul.ll index 3543b5de55db7..466a8020accec 100644 --- a/test/CodeGen/ARM/mul.ll +++ b/test/CodeGen/ARM/mul.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep mul | count 2 -; RUN: llvm-as < %s | llc -march=arm | grep lsl | count 2 +; RUN: llc < %s -march=arm | grep mul | count 2 +; RUN: llc < %s -march=arm | grep lsl | count 2 define i32 @f1(i32 %u) { %tmp = mul i32 %u, %u diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll new file mode 100644 index 0000000000000..93188cdd883f6 --- /dev/null +++ b/test/CodeGen/ARM/mul_const.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=arm | FileCheck %s + +define i32 @t1(i32 %v) nounwind readnone { +entry: +; CHECK: t1: +; CHECK: add r0, r0, r0, lsl #3 + %0 = mul i32 %v, 9 + ret i32 %0 +} + +define i32 @t2(i32 %v) nounwind readnone { +entry: +; CHECK: t2: +; CHECK: rsb r0, r0, r0, lsl #3 + %0 = mul i32 %v, 7 + ret i32 %0 +} diff --git a/test/CodeGen/ARM/mulhi.ll b/test/CodeGen/ARM/mulhi.ll index de75e96b87046..148f291e551d7 100644 --- a/test/CodeGen/ARM/mulhi.ll +++ b/test/CodeGen/ARM/mulhi.ll @@ -1,7 +1,7 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \ +; RUN: llc < %s -march=arm -mattr=+v6 +; RUN: llc < %s -march=arm -mattr=+v6 | \ ; RUN: grep smmul | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep umull | count 1 +; RUN: llc < %s -march=arm | grep umull | count 1 define i32 @smulhi(i32 %x, i32 %y) { %tmp = sext i32 %x to i64 ; [#uses=1] diff --git a/test/CodeGen/ARM/mvn.ll b/test/CodeGen/ARM/mvn.ll index a7ef907033de3..571c21a833ecd 100644 --- a/test/CodeGen/ARM/mvn.ll +++ b/test/CodeGen/ARM/mvn.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep mvn | count 8 +; RUN: llc < %s -march=arm | grep mvn | count 8 define i32 @f1() { entry: diff --git a/test/CodeGen/ARM/neon_arith1.ll b/test/CodeGen/ARM/neon_arith1.ll index 18b516fc1a8c6..58927374177a0 100644 --- a/test/CodeGen/ARM/neon_arith1.ll +++ b/test/CodeGen/ARM/neon_arith1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vadd +; RUN: llc < %s -march=arm -mattr=+neon | grep vadd define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind { entry: diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll index 8901ba177dac6..2796dec5b9705 100644 --- a/test/CodeGen/ARM/neon_ld1.ll +++ b/test/CodeGen/ARM/neon_ld1.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fldd | count 4 -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fstd -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fmrrd +; RUN: llc < %s -march=arm -mattr=+neon | grep fldd | count 4 +; RUN: llc < %s -march=arm -mattr=+neon | grep fstd +; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind { entry: diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll index a26904afca30b..547bab76356b9 100644 --- a/test/CodeGen/ARM/neon_ld2.ll +++ b/test/CodeGen/ARM/neon_ld2.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vldmia | count 4 -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep vstmia | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | grep fmrrd | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4 +; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | grep fmrrd | count 2 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind { entry: diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll index 151beac3efce0..1e2e7aa0c8ff1 100644 --- a/test/CodeGen/ARM/pack.ll +++ b/test/CodeGen/ARM/pack.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \ +; RUN: llc < %s -march=arm -mattr=+v6 | \ ; RUN: grep pkhbt | count 5 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \ +; RUN: llc < %s -march=arm -mattr=+v6 | \ ; RUN: grep pkhtb | count 4 define i32 @test1(i32 %X, i32 %Y) { diff --git a/test/CodeGen/ARM/pr3502.ll b/test/CodeGen/ARM/pr3502.ll index dee3fc43f9733..606d9698b977d 100644 --- a/test/CodeGen/ARM/pr3502.ll +++ b/test/CodeGen/ARM/pr3502.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-none-linux-gnueabi +; RUN: llc < %s -mtriple=arm-none-linux-gnueabi ;pr3502 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll index e5eeccb356a5b..03376a4c61b7c 100644 --- a/test/CodeGen/ARM/private.ll +++ b/test/CodeGen/ARM/private.ll @@ -1,6 +1,6 @@ ; Test to make sure that the 'private' is used correctly. ; -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnueabi > %t +; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t ; RUN: grep .Lfoo: %t ; RUN: egrep bl.*\.Lfoo %t ; RUN: grep .Lbaz: %t diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll index 454d36b46f29d..ba9699efd5973 100644 --- a/test/CodeGen/ARM/remat.ll +++ b/test/CodeGen/ARM/remat.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2 +; RUN: llc < %s -mtriple=arm-apple-darwin +; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 4 %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } %struct.LOCBOX = type { i32, i32, i32, i32 } diff --git a/test/CodeGen/ARM/ret0.ll b/test/CodeGen/ARM/ret0.ll index 792b1690add28..5c312eb98a327 100644 --- a/test/CodeGen/ARM/ret0.ll +++ b/test/CodeGen/ARM/ret0.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define i32 @test() { ret i32 0 diff --git a/test/CodeGen/ARM/ret_arg1.ll b/test/CodeGen/ARM/ret_arg1.ll index 48a1fda35b331..1ab947b1e20d0 100644 --- a/test/CodeGen/ARM/ret_arg1.ll +++ b/test/CodeGen/ARM/ret_arg1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define i32 @test(i32 %a1) { ret i32 %a1 diff --git a/test/CodeGen/ARM/ret_arg2.ll b/test/CodeGen/ARM/ret_arg2.ll index a74870f85870a..84477d042c749 100644 --- a/test/CodeGen/ARM/ret_arg2.ll +++ b/test/CodeGen/ARM/ret_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define i32 @test(i32 %a1, i32 %a2) { ret i32 %a2 diff --git a/test/CodeGen/ARM/ret_arg3.ll b/test/CodeGen/ARM/ret_arg3.ll index 9210e7b09f58c..f7f9057432d10 100644 --- a/test/CodeGen/ARM/ret_arg3.ll +++ b/test/CodeGen/ARM/ret_arg3.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define i32 @test(i32 %a1, i32 %a2, i32 %a3) { ret i32 %a3 } diff --git a/test/CodeGen/ARM/ret_arg4.ll b/test/CodeGen/ARM/ret_arg4.ll index a9c66e9e98d1a..f7b3e4a282b2b 100644 --- a/test/CodeGen/ARM/ret_arg4.ll +++ b/test/CodeGen/ARM/ret_arg4.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { ret i32 %a4 diff --git a/test/CodeGen/ARM/ret_arg5.ll b/test/CodeGen/ARM/ret_arg5.ll index 620a0175e0728..c4f9fb5e0a9b7 100644 --- a/test/CodeGen/ARM/ret_arg5.ll +++ b/test/CodeGen/ARM/ret_arg5.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) { ret i32 %a5 diff --git a/test/CodeGen/ARM/ret_f32_arg2.ll b/test/CodeGen/ARM/ret_f32_arg2.ll index 287d92b9eb6e3..2bafea6755318 100644 --- a/test/CodeGen/ARM/ret_f32_arg2.ll +++ b/test/CodeGen/ARM/ret_f32_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define float @test_f32(float %a1, float %a2) { ret float %a2 diff --git a/test/CodeGen/ARM/ret_f32_arg5.ll b/test/CodeGen/ARM/ret_f32_arg5.ll index 3418be93e1e87..c6ce60ecb9c86 100644 --- a/test/CodeGen/ARM/ret_f32_arg5.ll +++ b/test/CodeGen/ARM/ret_f32_arg5.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) { ret float %a5 diff --git a/test/CodeGen/ARM/ret_f64_arg2.ll b/test/CodeGen/ARM/ret_f64_arg2.ll index 66848d5fb49b1..386e85f4b9a55 100644 --- a/test/CodeGen/ARM/ret_f64_arg2.ll +++ b/test/CodeGen/ARM/ret_f64_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define double @test_f64(double %a1, double %a2) { ret double %a2 diff --git a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll index 626ee6fb13749..bdb0a606227b6 100644 --- a/test/CodeGen/ARM/ret_f64_arg_reg_split.ll +++ b/test/CodeGen/ARM/ret_f64_arg_reg_split.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mcpu=arm8 -mattr=+vfp2 +; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2 define double @test_double_arg_reg_split(i32 %a1, double %a2) { ret double %a2 diff --git a/test/CodeGen/ARM/ret_f64_arg_split.ll b/test/CodeGen/ARM/ret_f64_arg_split.ll index b03b604beee75..4f841a3cde7b6 100644 --- a/test/CodeGen/ARM/ret_f64_arg_split.ll +++ b/test/CodeGen/ARM/ret_f64_arg_split.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) { ret double %a3 diff --git a/test/CodeGen/ARM/ret_f64_arg_stack.ll b/test/CodeGen/ARM/ret_f64_arg_stack.ll index ba3ec7fb75173..21443177d3de6 100644 --- a/test/CodeGen/ARM/ret_f64_arg_stack.ll +++ b/test/CodeGen/ARM/ret_f64_arg_stack.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) { ret double %a4 diff --git a/test/CodeGen/ARM/ret_i128_arg2.ll b/test/CodeGen/ARM/ret_i128_arg2.ll index 0fe98e6b70fc4..908c34f8cda68 100644 --- a/test/CodeGen/ARM/ret_i128_arg2.ll +++ b/test/CodeGen/ARM/ret_i128_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) { ret i128 %a3 diff --git a/test/CodeGen/ARM/ret_i64_arg2.ll b/test/CodeGen/ARM/ret_i64_arg2.ll index b015a96e0bf0a..b1a1024acaf1c 100644 --- a/test/CodeGen/ARM/ret_i64_arg2.ll +++ b/test/CodeGen/ARM/ret_i64_arg2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define i64 @test_i64(i64 %a1, i64 %a2) { ret i64 %a2 diff --git a/test/CodeGen/ARM/ret_i64_arg3.ll b/test/CodeGen/ARM/ret_i64_arg3.ll index 5dfecca319a17..ffc1d2f4b52af 100644 --- a/test/CodeGen/ARM/ret_i64_arg3.ll +++ b/test/CodeGen/ARM/ret_i64_arg3.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) { ret i64 %a3 diff --git a/test/CodeGen/ARM/ret_i64_arg_split.ll b/test/CodeGen/ARM/ret_i64_arg_split.ll index 5bd5cb2a230be..956bce558fc5e 100644 --- a/test/CodeGen/ARM/ret_i64_arg_split.ll +++ b/test/CodeGen/ARM/ret_i64_arg_split.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 +; RUN: llc < %s -march=arm -mattr=+vfp2 define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) { ret i64 %a3 diff --git a/test/CodeGen/ARM/ret_void.ll b/test/CodeGen/ARM/ret_void.ll index 68db8c423461e..2b7ae05628843 100644 --- a/test/CodeGen/ARM/ret_void.ll +++ b/test/CodeGen/ARM/ret_void.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm define void @test() { ret void diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll index 68f6264e8a063..1c12268ef86c8 100644 --- a/test/CodeGen/ARM/rev.ll +++ b/test/CodeGen/ARM/rev.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep rev16 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep revsh +; RUN: llc < %s -march=arm -mattr=+v6 | grep rev16 +; RUN: llc < %s -march=arm -mattr=+v6 | grep revsh define i32 @test1(i32 %X) { %tmp1 = lshr i32 %X, 8 ; [#uses=3] diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll new file mode 100644 index 0000000000000..923f52a868626 --- /dev/null +++ b/test/CodeGen/ARM/sbfx.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s + +define i32 @f1(i32 %a) { +entry: +; CHECK: f1: +; CHECK: sbfx r0, r0, #0, #20 + %tmp = shl i32 %a, 12 + %tmp2 = ashr i32 %tmp, 12 + ret i32 %tmp2 +} + +define i32 @f2(i32 %a) { +entry: +; CHECK: f2: +; CHECK: ubfx r0, r0, #0, #20 + %tmp = shl i32 %a, 12 + %tmp2 = lshr i32 %tmp, 12 + ret i32 %tmp2 +} + +define i32 @f3(i32 %a) { +entry: +; CHECK: f3: +; CHECK: sbfx r0, r0, #5, #3 + %tmp = shl i32 %a, 24 + %tmp2 = ashr i32 %tmp, 29 + ret i32 %tmp2 +} + +define i32 @f4(i32 %a) { +entry: +; CHECK: f4: +; CHECK: ubfx r0, r0, #5, #3 + %tmp = shl i32 %a, 24 + %tmp2 = lshr i32 %tmp, 29 + ret i32 %tmp2 +} diff --git a/test/CodeGen/ARM/section.ll b/test/CodeGen/ARM/section.ll index aa658451675bc..7a566d49d322d 100644 --- a/test/CodeGen/ARM/section.ll +++ b/test/CodeGen/ARM/section.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux | \ +; RUN: llc < %s -mtriple=arm-linux | \ ; RUN: grep {__DTOR_END__:} -; RUN: llvm-as < %s | llc -mtriple=arm-linux | \ +; RUN: llc < %s -mtriple=arm-linux | \ ; RUN: grep {\\.section.\\.dtors,"aw",.progbits} @__DTOR_END__ = internal global [1 x i32] zeroinitializer, section ".dtors" ; <[1 x i32]*> [#uses=0] diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 5148a5b86998c..85c8b5b8477f8 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -1,13 +1,9 @@ -; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep movgt | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep movlt | count 3 -; RUN: llvm-as < %s | llc -march=arm | grep movle | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep movls | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep movhi | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep fcpydmi | count 1 +; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP define i32 @f1(i32 %a.s) { +;CHECK: f1: +;CHECK: moveq entry: %tmp = icmp eq i32 %a.s, 4 %tmp1.s = select i1 %tmp, i32 2, i32 3 @@ -15,6 +11,8 @@ entry: } define i32 @f2(i32 %a.s) { +;CHECK: f2: +;CHECK: movgt entry: %tmp = icmp sgt i32 %a.s, 4 %tmp1.s = select i1 %tmp, i32 2, i32 3 @@ -22,6 +20,8 @@ entry: } define i32 @f3(i32 %a.s, i32 %b.s) { +;CHECK: f3: +;CHECK: movlt entry: %tmp = icmp slt i32 %a.s, %b.s %tmp1.s = select i1 %tmp, i32 2, i32 3 @@ -29,6 +29,8 @@ entry: } define i32 @f4(i32 %a.s, i32 %b.s) { +;CHECK: f4: +;CHECK: movle entry: %tmp = icmp sle i32 %a.s, %b.s %tmp1.s = select i1 %tmp, i32 2, i32 3 @@ -36,6 +38,8 @@ entry: } define i32 @f5(i32 %a.u, i32 %b.u) { +;CHECK: f5: +;CHECK: movls entry: %tmp = icmp ule i32 %a.u, %b.u %tmp1.s = select i1 %tmp, i32 2, i32 3 @@ -43,6 +47,8 @@ entry: } define i32 @f6(i32 %a.u, i32 %b.u) { +;CHECK: f6: +;CHECK: movhi entry: %tmp = icmp ugt i32 %a.u, %b.u %tmp1.s = select i1 %tmp, i32 2, i32 3 @@ -50,6 +56,11 @@ entry: } define double @f7(double %a, double %b) { +;CHECK: f7: +;CHECK: movlt +;CHECK: movlt +;CHECK-VFP: f7: +;CHECK-VFP: fcpydmi %tmp = fcmp olt double %a, 1.234e+00 %tmp1 = select i1 %tmp, double -1.000e+00, double %b ret double %tmp1 diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index 6855e3227b991..7fd91ceea5ad7 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep mov | count 2 +; RUN: llc < %s -march=arm | grep mov | count 2 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { %tmp1 = icmp sgt i32 %c, 10 diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll index cae1c44a729d2..2bbe9fd2602c2 100644 --- a/test/CodeGen/ARM/shifter_operand.ll +++ b/test/CodeGen/ARM/shifter_operand.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep add | grep lsl -; RUN: llvm-as < %s | llc -march=arm | grep bic | grep asr +; RUN: llc < %s -march=arm | grep add | grep lsl +; RUN: llc < %s -march=arm | grep bic | grep asr define i32 @test1(i32 %X, i32 %Y, i8 %sh) { diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll index 7a4e4887cc7b6..b7ab2e796f8a4 100644 --- a/test/CodeGen/ARM/smul.ll +++ b/test/CodeGen/ARM/smul.ll @@ -1,10 +1,10 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE -; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \ +; RUN: llc < %s -march=arm +; RUN: llc < %s -march=arm -mattr=+v5TE +; RUN: llc < %s -march=arm -mattr=+v5TE | \ ; RUN: grep smulbt | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \ +; RUN: llc < %s -march=arm -mattr=+v5TE | \ ; RUN: grep smultt | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v5TE | \ +; RUN: llc < %s -march=arm -mattr=+v5TE | \ ; RUN: grep smlabt | count 1 @x = weak global i16 0 ; [#uses=1] diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll new file mode 100644 index 0000000000000..f4b27a7603e52 --- /dev/null +++ b/test/CodeGen/ARM/spill-q.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s +; PR4789 + +%bar = type { float, float, float } +%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 } +%foo = type { <4 x float> } +%quux = type { i32 (...)**, %baz*, i32 } +%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } + +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly + +define arm_apcscc void @aaa(%quuz* %this, i8* %block) { +; CHECK: aaa: +; CHECK: vstmia sp +; CHECK: vldmia sp +entry: + %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + store float 6.300000e+01, float* undef, align 4 + %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + store float 0.000000e+00, float* undef, align 4 + %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] + %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] + br label %bb4 + +bb4: ; preds = %bb193, %entry + %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2] + %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2] + %3 = fmul <4 x float> zeroinitializer, %0 ; <<4 x float>> [#uses=2] + %4 = fadd <4 x float> %3, %part0.0.0261 ; <<4 x float>> [#uses=1] + %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %7 = fmul <4 x float> %1, undef ; <<4 x float>> [#uses=1] + %8 = fadd <4 x float> %7, ; <<4 x float>> [#uses=1] + %9 = fptosi <4 x float> %8 to <4 x i32> ; <<4 x i32>> [#uses=1] + %10 = sitofp <4 x i32> %9 to <4 x float> ; <<4 x float>> [#uses=1] + %11 = fmul <4 x float> %10, %2 ; <<4 x float>> [#uses=1] + %12 = fmul <4 x float> undef, %6 ; <<4 x float>> [#uses=1] + %13 = fmul <4 x float> %11, %4 ; <<4 x float>> [#uses=1] + %14 = fsub <4 x float> %12, %13 ; <<4 x float>> [#uses=1] + %15 = fsub <4 x float> %14, undef ; <<4 x float>> [#uses=1] + %16 = fmul <4 x float> %15, ; <<4 x float>> [#uses=1] + %17 = fadd <4 x float> %16, undef ; <<4 x float>> [#uses=1] + %18 = fmul <4 x float> %17, %val173 ; <<4 x float>> [#uses=1] + %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] + %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] + %21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2] + %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0] + br i1 undef, label %bb193, label %bb186 + +bb186: ; preds = %bb4 + br label %bb193 + +bb193: ; preds = %bb186, %bb4 + %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1] + %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1] + br label %bb4 +} diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll index c3dd65a594d67..1dd57ddb9f2fc 100644 --- a/test/CodeGen/ARM/stack-frame.ll +++ b/test/CodeGen/ARM/stack-frame.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -; RUN: llvm-as < %s | llc -march=arm | grep add | count 1 +; RUN: llc < %s -march=arm +; RUN: llc < %s -march=arm | grep add | count 1 define void @f1() { %c = alloca i8, align 1 diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll index ed5e4c5f59433..22a7ecb4aa283 100644 --- a/test/CodeGen/ARM/stm.ll +++ b/test/CodeGen/ARM/stm.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2 +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | grep stm | count 2 @"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals" ; <[32 x i8]*> [#uses=1] @"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals" ; <[26 x i8]*> [#uses=1] diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll index ba813805bacc1..801b9cee37d69 100644 --- a/test/CodeGen/ARM/str_post.ll +++ b/test/CodeGen/ARM/str_post.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep {strh .*\\\[.*\], #-4} | count 1 -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep {str .*\\\[.*\],} | count 1 define i16 @test1(i32* %X, i16* %A) { diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll index e9f194574e43e..f8d3df29c4089 100644 --- a/test/CodeGen/ARM/str_pre-2.ll +++ b/test/CodeGen/ARM/str_pre-2.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {str.*\\!} -; RUN: llvm-as < %s | llc -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4} +; RUN: llc < %s -mtriple=arm-linux-gnu | grep {str.*\\!} +; RUN: llc < %s -mtriple=arm-linux-gnu | grep {ldr.*\\\[.*\], #+4} @b = external global i64* diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll index c02663fa4040e..e56e3f253e63a 100644 --- a/test/CodeGen/ARM/str_pre.ll +++ b/test/CodeGen/ARM/str_pre.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep {str.*\\!} | count 2 define void @test1(i32* %X, i32* %A, i32** %dest) { diff --git a/test/CodeGen/ARM/str_trunc.ll b/test/CodeGen/ARM/str_trunc.ll index 77c66ec2c7e0c..2f1166b64b59b 100644 --- a/test/CodeGen/ARM/str_trunc.ll +++ b/test/CodeGen/ARM/str_trunc.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep strb | count 1 -; RUN: llvm-as < %s | llc -march=arm | \ +; RUN: llc < %s -march=arm | \ ; RUN: grep strh | count 1 define void @test1(i32 %v, i16* %ptr) { diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll index e9f302c88d1c0..4752f17f1e1c3 100644 --- a/test/CodeGen/ARM/sxt_rot.ll +++ b/test/CodeGen/ARM/sxt_rot.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \ +; RUN: llc < %s -march=arm -mattr=+v6 | \ ; RUN: grep sxtb | count 2 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \ +; RUN: llc < %s -march=arm -mattr=+v6 | \ ; RUN: grep sxtb | grep ror | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | \ +; RUN: llc < %s -march=arm -mattr=+v6 | \ ; RUN: grep sxtab | count 1 define i32 @test0(i8 %A) { diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll new file mode 100644 index 0000000000000..848a4dfed0542 --- /dev/null +++ b/test/CodeGen/ARM/t2-imm.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s + +define i32 @f6(i32 %a) { +; CHECK:f6 +; CHECK: movw r0, #:lower16:65537123 +; CHECK: movt r0, #:upper16:65537123 + %tmp = add i32 0, 65537123 + ret i32 %tmp +} diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll index 6476b483d7d9e..3143387ead657 100644 --- a/test/CodeGen/ARM/thread_pointer.ll +++ b/test/CodeGen/ARM/thread_pointer.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep {__aeabi_read_tp} define i8* @test() { diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll index 6866a42db4951..1087094e5798a 100644 --- a/test/CodeGen/ARM/tls1.ll +++ b/test/CodeGen/ARM/tls1.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep {i(tpoff)} -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep {__aeabi_read_tp} -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \ ; RUN: -relocation-model=pic | grep {__tls_get_addr} diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll index 90e3bcf9040b5..328472081e197 100644 --- a/test/CodeGen/ARM/tls2.ll +++ b/test/CodeGen/ARM/tls2.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep {i(gottpoff)} -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep {ldr r., \[pc, r.\]} -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \ ; RUN: -relocation-model=pic | grep {__tls_get_addr} @i = external thread_local global i32 ; [#uses=2] diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll index df2913b61cda9..df7a4ca02db8e 100644 --- a/test/CodeGen/ARM/tls3.ll +++ b/test/CodeGen/ARM/tls3.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \ +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ ; RUN: grep {tbss} %struct.anon = type { i32, i32 } diff --git a/test/CodeGen/ARM/trunc_ldr.ll b/test/CodeGen/ARM/trunc_ldr.ll index 6111ec9d2f48b..3033c2ba3e252 100644 --- a/test/CodeGen/ARM/trunc_ldr.ll +++ b/test/CodeGen/ARM/trunc_ldr.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep ldrb.*7 | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep ldrsb.*7 | count 1 +; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1 +; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1 %struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** } %struct.B = type { float, float, i32, i32, i32, [0 x i8] } diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll index 0e85fb69eb3ae..2da08b60e86cd 100644 --- a/test/CodeGen/ARM/truncstore-dag-combine.ll +++ b/test/CodeGen/ARM/truncstore-dag-combine.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | not grep orr -; RUN: llvm-as < %s | llc -march=arm | not grep mov +; RUN: llc < %s -march=arm | not grep orr +; RUN: llc < %s -march=arm | not grep mov define void @bar(i8* %P, i16* %Q) { entry: diff --git a/test/CodeGen/ARM/tst_teq.ll b/test/CodeGen/ARM/tst_teq.ll index bdeee3fa43fee..c83111e69937f 100644 --- a/test/CodeGen/ARM/tst_teq.ll +++ b/test/CodeGen/ARM/tst_teq.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep tst -; RUN: llvm-as < %s | llc -march=arm | grep teq +; RUN: llc < %s -march=arm | grep tst +; RUN: llc < %s -march=arm | grep teq define i32 @f(i32 %a) { entry: diff --git a/test/CodeGen/ARM/uint64tof64.ll b/test/CodeGen/ARM/uint64tof64.ll index 055c3c370ee69..32eb225a2ad6b 100644 --- a/test/CodeGen/ARM/uint64tof64.ll +++ b/test/CodeGen/ARM/uint64tof64.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+vfp2 +; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } %struct.__sFILEX = type opaque diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll index dad1897463a68..fcaa2b3103e93 100644 --- a/test/CodeGen/ARM/unaligned_load_store.ll +++ b/test/CodeGen/ARM/unaligned_load_store.ll @@ -1,16 +1,31 @@ -; RUN: llvm-as < %s | \ -; RUN: llc -march=arm -o %t -f -; RUN: grep ldrb %t | count 4 -; RUN: grep strb %t | count 4 +; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC +; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6 +; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s -check-prefix=V7 +; rdar://7113725 - %struct.p = type <{ i8, i32 }> -@t = global %struct.p <{ i8 1, i32 10 }> ; <%struct.p*> [#uses=1] -@u = weak global %struct.p zeroinitializer ; <%struct.p*> [#uses=1] - -define i32 @main() { +define arm_apcscc void @t(i8* nocapture %a, i8* nocapture %b) nounwind { entry: - %tmp3 = load i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1 ; [#uses=2] - store i32 %tmp3, i32* getelementptr (%struct.p* @u, i32 0, i32 1), align 1 - ret i32 %tmp3 +; GENERIC: t: +; GENERIC: ldrb r2 +; GENERIC: ldrb r3 +; GENERIC: ldrb r12 +; GENERIC: ldrb r1 +; GENERIC: strb r1 +; GENERIC: strb r12 +; GENERIC: strb r3 +; GENERIC: strb r2 + +; DARWIN_V6: t: +; DARWIN_V6: ldr r1 +; DARWIN_V6: str r1 + +; V7: t: +; V7: ldr r1 +; V7: str r1 + %__src1.i = bitcast i8* %b to i32* ; [#uses=1] + %__dest2.i = bitcast i8* %a to i32* ; [#uses=1] + %tmp.i = load i32* %__src1.i, align 1 ; [#uses=1] + store i32 %tmp.i, i32* %__dest2.i, align 1 + ret void } diff --git a/test/CodeGen/ARM/unord.ll b/test/CodeGen/ARM/unord.ll index 149afc4abafe9..bd28034b3adb9 100644 --- a/test/CodeGen/ARM/unord.ll +++ b/test/CodeGen/ARM/unord.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep movne | count 1 -; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1 +; RUN: llc < %s -march=arm | grep movne | count 1 +; RUN: llc < %s -march=arm | grep moveq | count 1 define i32 @f1(float %X, float %Y) { %tmp = fcmp uno float %X, %Y diff --git a/test/CodeGen/ARM/uxt_rot.ll b/test/CodeGen/ARM/uxt_rot.ll index 09c74ebbb7765..6307795499bf4 100644 --- a/test/CodeGen/ARM/uxt_rot.ll +++ b/test/CodeGen/ARM/uxt_rot.ll @@ -1,6 +1,6 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxtb | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxtab | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+v6 | grep uxth | count 1 +; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1 +; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1 +; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1 define i8 @test1(i32 %A.u) zeroext { %B.u = trunc i32 %A.u to i8 diff --git a/test/CodeGen/ARM/uxtb.ll b/test/CodeGen/ARM/uxtb.ll index 73e918b7a5d3b..9d6e4bd4dfce1 100644 --- a/test/CodeGen/ARM/uxtb.ll +++ b/test/CodeGen/ARM/uxtb.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin | \ +; RUN: llc < %s -mtriple=armv6-apple-darwin | \ ; RUN: grep uxt | count 10 define i32 @test1(i32 %x) { diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll index 98ee1e155ba81..e2dca4647bce6 100644 --- a/test/CodeGen/ARM/vaba.ll +++ b/test/CodeGen/ARM/vaba.ll @@ -1,12 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vaba\\.s8} %t | count 2 -; RUN: grep {vaba\\.s16} %t | count 2 -; RUN: grep {vaba\\.s32} %t | count 2 -; RUN: grep {vaba\\.u8} %t | count 2 -; RUN: grep {vaba\\.u16} %t | count 2 -; RUN: grep {vaba\\.u32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vabas8: +;CHECK: vaba.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -15,6 +11,8 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { } define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vabas16: +;CHECK: vaba.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -23,6 +21,8 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind } define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vabas32: +;CHECK: vaba.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -31,6 +31,8 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind } define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vabau8: +;CHECK: vaba.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -39,6 +41,8 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { } define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vabau16: +;CHECK: vaba.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -47,6 +51,8 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind } define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vabau32: +;CHECK: vaba.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -55,6 +61,8 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind } define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { +;CHECK: vabaQs8: +;CHECK: vaba.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C @@ -63,6 +71,8 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind } define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: vabaQs16: +;CHECK: vaba.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C @@ -71,6 +81,8 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind } define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: vabaQs32: +;CHECK: vaba.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C @@ -79,6 +91,8 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind } define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { +;CHECK: vabaQu8: +;CHECK: vaba.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C @@ -87,6 +101,8 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind } define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: vabaQu16: +;CHECK: vaba.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C @@ -95,6 +111,8 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind } define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: vabaQu32: +;CHECK: vaba.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C @@ -117,3 +135,71 @@ declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) no declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone + +define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vabals8: +;CHECK: vabal.s8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vabals16: +;CHECK: vabal.s16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vabals32: +;CHECK: vabal.s32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} + +define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vabalu8: +;CHECK: vabal.u8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vabalu16: +;CHECK: vabal.u16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vabalu32: +;CHECK: vabal.u32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} + +declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll index 0fe5ddb94ba49..2b4539361459b 100644 --- a/test/CodeGen/ARM/vabd.ll +++ b/test/CodeGen/ARM/vabd.ll @@ -1,13 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vabd\\.s8} %t | count 2 -; RUN: grep {vabd\\.s16} %t | count 2 -; RUN: grep {vabd\\.s32} %t | count 2 -; RUN: grep {vabd\\.u8} %t | count 2 -; RUN: grep {vabd\\.u16} %t | count 2 -; RUN: grep {vabd\\.u32} %t | count 2 -; RUN: grep {vabd\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vabds8: +;CHECK: vabd.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -15,6 +10,8 @@ define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vabds16: +;CHECK: vabd.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -22,6 +19,8 @@ define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vabds32: +;CHECK: vabd.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -29,6 +28,8 @@ define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vabdu8: +;CHECK: vabd.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -36,6 +37,8 @@ define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vabdu16: +;CHECK: vabd.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -43,6 +46,8 @@ define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vabdu32: +;CHECK: vabd.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -50,13 +55,17 @@ define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vabdf32: +;CHECK: vabd.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vabdQs8: +;CHECK: vabd.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -64,6 +73,8 @@ define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vabdQs16: +;CHECK: vabd.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -71,6 +82,8 @@ define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vabdQs32: +;CHECK: vabd.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -78,6 +91,8 @@ define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vabdQu8: +;CHECK: vabd.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -85,6 +100,8 @@ define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vabdQu16: +;CHECK: vabd.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -92,6 +109,8 @@ define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vabdQu32: +;CHECK: vabd.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -99,9 +118,11 @@ define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vabdQf32: +;CHECK: vabd.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = call <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + %tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x float> %tmp3 } @@ -113,7 +134,7 @@ declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone @@ -123,4 +144,66 @@ declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind read declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone + +define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vabdls8: +;CHECK: vabdl.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vabdls16: +;CHECK: vabdl.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vabdls32: +;CHECK: vabdl.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vabdlu8: +;CHECK: vabdl.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vabdlu16: +;CHECK: vabdl.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vabdlu32: +;CHECK: vabdl.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll index 629baa762a00c..18ba61f81e658 100644 --- a/test/CodeGen/ARM/vabs.ll +++ b/test/CodeGen/ARM/vabs.ll @@ -1,64 +1,131 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vabs\\.s8} %t | count 2 -; RUN: grep {vabs\\.s16} %t | count 2 -; RUN: grep {vabs\\.s32} %t | count 2 -; RUN: grep {vabs\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vabss8(<8 x i8>* %A) nounwind { +;CHECK: vabss8: +;CHECK: vabs.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp2 } define <4 x i16> @vabss16(<4 x i16>* %A) nounwind { +;CHECK: vabss16: +;CHECK: vabs.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1) ret <4 x i16> %tmp2 } define <2 x i32> @vabss32(<2 x i32>* %A) nounwind { +;CHECK: vabss32: +;CHECK: vabs.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1) ret <2 x i32> %tmp2 } define <2 x float> @vabsf32(<2 x float>* %A) nounwind { +;CHECK: vabsf32: +;CHECK: vabs.f32 %tmp1 = load <2 x float>* %A - %tmp2 = call <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float> %tmp1) + %tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1) ret <2 x float> %tmp2 } define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind { +;CHECK: vabsQs8: +;CHECK: vabs.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp2 } define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind { +;CHECK: vabsQs16: +;CHECK: vabs.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1) ret <8 x i16> %tmp2 } define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind { +;CHECK: vabsQs32: +;CHECK: vabs.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1) ret <4 x i32> %tmp2 } define <4 x float> @vabsQf32(<4 x float>* %A) nounwind { +;CHECK: vabsQf32: +;CHECK: vabs.f32 %tmp1 = load <4 x float>* %A - %tmp2 = call <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float> %tmp1) + %tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1) ret <4 x float> %tmp2 } declare <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone -declare <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float>) nounwind readnone +declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone -declare <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float>) nounwind readnone +declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone +define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind { +;CHECK: vqabss8: +;CHECK: vqabs.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind { +;CHECK: vqabss16: +;CHECK: vqabs.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind { +;CHECK: vqabss32: +;CHECK: vqabs.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1) + ret <2 x i32> %tmp2 +} + +define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind { +;CHECK: vqabsQs8: +;CHECK: vqabs.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1) + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind { +;CHECK: vqabsQs16: +;CHECK: vqabs.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind { +;CHECK: vqabsQs32: +;CHECK: vqabs.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1) + ret <4 x i32> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll index b2b0e2397c72f..9fa530750aa16 100644 --- a/test/CodeGen/ARM/vadd.ll +++ b/test/CodeGen/ARM/vadd.ll @@ -1,11 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vadd\\.i8} %t | count 2 -; RUN: grep {vadd\\.i16} %t | count 2 -; RUN: grep {vadd\\.i32} %t | count 2 -; RUN: grep {vadd\\.i64} %t | count 2 -; RUN: grep {vadd\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vaddi8: +;CHECK: vadd.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = add <8 x i8> %tmp1, %tmp2 @@ -13,6 +10,8 @@ define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vaddi16: +;CHECK: vadd.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = add <4 x i16> %tmp1, %tmp2 @@ -20,6 +19,8 @@ define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vaddi32: +;CHECK: vadd.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = add <2 x i32> %tmp1, %tmp2 @@ -27,6 +28,8 @@ define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vaddi64: +;CHECK: vadd.i64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = add <1 x i64> %tmp1, %tmp2 @@ -34,6 +37,8 @@ define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vaddf32: +;CHECK: vadd.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = add <2 x float> %tmp1, %tmp2 @@ -41,6 +46,8 @@ define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind { } define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vaddQi8: +;CHECK: vadd.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = add <16 x i8> %tmp1, %tmp2 @@ -48,6 +55,8 @@ define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vaddQi16: +;CHECK: vadd.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = add <8 x i16> %tmp1, %tmp2 @@ -55,6 +64,8 @@ define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vaddQi32: +;CHECK: vadd.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = add <4 x i32> %tmp1, %tmp2 @@ -62,6 +73,8 @@ define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vaddQi64: +;CHECK: vadd.i64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = add <2 x i64> %tmp1, %tmp2 @@ -69,8 +82,196 @@ define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vaddQf32: +;CHECK: vadd.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = add <4 x float> %tmp1, %tmp2 ret <4 x float> %tmp3 } + +define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vaddhni16: +;CHECK: vaddhn.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vaddhni32: +;CHECK: vaddhn.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vaddhni64: +;CHECK: vaddhn.i64 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i32> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vraddhni16: +;CHECK: vraddhn.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vraddhni32: +;CHECK: vraddhn.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vraddhni64: +;CHECK: vraddhn.i64 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i32> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vaddls8: +;CHECK: vaddl.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vaddls16: +;CHECK: vaddl.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vaddls32: +;CHECK: vaddl.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vaddlu8: +;CHECK: vaddl.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vaddlu16: +;CHECK: vaddl.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vaddlu32: +;CHECK: vaddl.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vaddws8: +;CHECK: vaddw.s8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vaddws16: +;CHECK: vaddw.s16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vaddws32: +;CHECK: vaddw.s32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vaddwu8: +;CHECK: vaddw.u8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vaddwu16: +;CHECK: vaddw.u16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vaddwu32: +;CHECK: vaddw.u32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll index 4bf79c0419225..5f3536cbb9a39 100644 --- a/test/CodeGen/ARM/vargs.ll +++ b/test/CodeGen/ARM/vargs.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm +; RUN: llc < %s -march=arm @str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00" ; <[43 x i8]*> [#uses=1] define i32 @main() { diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll index 1f2f05bd6086f..e4ef9e3c36c12 100644 --- a/test/CodeGen/ARM/vargs_align.ll +++ b/test/CodeGen/ARM/vargs_align.ll @@ -1,7 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: grep {add sp, sp, #16} | count 1 -; RUN: llvm-as < %s | llc -march=arm -mtriple=arm-linux-gnu | \ -; RUN: grep {add sp, sp, #12} | count 2 +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI define i32 @f(i32 %a, ...) { entry: @@ -18,4 +16,8 @@ entry: return: ; preds = %entry %retval2 = load i32* %retval ; [#uses=1] ret i32 %retval2 +; EABI: add sp, sp, #12 +; EABI: add sp, sp, #16 +; OABI: add sp, sp, #12 +; OABI: add sp, sp, #12 } diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll new file mode 100644 index 0000000000000..e1d23a17b4cbf --- /dev/null +++ b/test/CodeGen/ARM/vbits.ll @@ -0,0 +1,507 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: v_andi8: +;CHECK: vand + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = and <8 x i8> %tmp1, %tmp2 + ret <8 x i8> %tmp3 +} + +define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: v_andi16: +;CHECK: vand + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = and <4 x i16> %tmp1, %tmp2 + ret <4 x i16> %tmp3 +} + +define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: v_andi32: +;CHECK: vand + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = and <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: v_andi64: +;CHECK: vand + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = and <1 x i64> %tmp1, %tmp2 + ret <1 x i64> %tmp3 +} + +define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: v_andQi8: +;CHECK: vand + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = and <16 x i8> %tmp1, %tmp2 + ret <16 x i8> %tmp3 +} + +define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: v_andQi16: +;CHECK: vand + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = and <8 x i16> %tmp1, %tmp2 + ret <8 x i16> %tmp3 +} + +define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: v_andQi32: +;CHECK: vand + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = and <4 x i32> %tmp1, %tmp2 + ret <4 x i32> %tmp3 +} + +define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: v_andQi64: +;CHECK: vand + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = and <2 x i64> %tmp1, %tmp2 + ret <2 x i64> %tmp3 +} + +define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: v_bici8: +;CHECK: vbic + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > + %tmp4 = and <8 x i8> %tmp1, %tmp3 + ret <8 x i8> %tmp4 +} + +define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: v_bici16: +;CHECK: vbic + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > + %tmp4 = and <4 x i16> %tmp1, %tmp3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: v_bici32: +;CHECK: vbic + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > + %tmp4 = and <2 x i32> %tmp1, %tmp3 + ret <2 x i32> %tmp4 +} + +define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: v_bici64: +;CHECK: vbic + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > + %tmp4 = and <1 x i64> %tmp1, %tmp3 + ret <1 x i64> %tmp4 +} + +define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: v_bicQi8: +;CHECK: vbic + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > + %tmp4 = and <16 x i8> %tmp1, %tmp3 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: v_bicQi16: +;CHECK: vbic + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > + %tmp4 = and <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: v_bicQi32: +;CHECK: vbic + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > + %tmp4 = and <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 +} + +define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: v_bicQi64: +;CHECK: vbic + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > + %tmp4 = and <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 +} + +define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: v_eori8: +;CHECK: veor + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = xor <8 x i8> %tmp1, %tmp2 + ret <8 x i8> %tmp3 +} + +define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: v_eori16: +;CHECK: veor + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = xor <4 x i16> %tmp1, %tmp2 + ret <4 x i16> %tmp3 +} + +define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: v_eori32: +;CHECK: veor + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = xor <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: v_eori64: +;CHECK: veor + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = xor <1 x i64> %tmp1, %tmp2 + ret <1 x i64> %tmp3 +} + +define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: v_eorQi8: +;CHECK: veor + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = xor <16 x i8> %tmp1, %tmp2 + ret <16 x i8> %tmp3 +} + +define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: v_eorQi16: +;CHECK: veor + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = xor <8 x i16> %tmp1, %tmp2 + ret <8 x i16> %tmp3 +} + +define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: v_eorQi32: +;CHECK: veor + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = xor <4 x i32> %tmp1, %tmp2 + ret <4 x i32> %tmp3 +} + +define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: v_eorQi64: +;CHECK: veor + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = xor <2 x i64> %tmp1, %tmp2 + ret <2 x i64> %tmp3 +} + +define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind { +;CHECK: v_mvni8: +;CHECK: vmvn + %tmp1 = load <8 x i8>* %A + %tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > + ret <8 x i8> %tmp2 +} + +define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind { +;CHECK: v_mvni16: +;CHECK: vmvn + %tmp1 = load <4 x i16>* %A + %tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 > + ret <4 x i16> %tmp2 +} + +define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind { +;CHECK: v_mvni32: +;CHECK: vmvn + %tmp1 = load <2 x i32>* %A + %tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 > + ret <2 x i32> %tmp2 +} + +define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind { +;CHECK: v_mvni64: +;CHECK: vmvn + %tmp1 = load <1 x i64>* %A + %tmp2 = xor <1 x i64> %tmp1, < i64 -1 > + ret <1 x i64> %tmp2 +} + +define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind { +;CHECK: v_mvnQi8: +;CHECK: vmvn + %tmp1 = load <16 x i8>* %A + %tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > + ret <16 x i8> %tmp2 +} + +define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind { +;CHECK: v_mvnQi16: +;CHECK: vmvn + %tmp1 = load <8 x i16>* %A + %tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > + ret <8 x i16> %tmp2 +} + +define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind { +;CHECK: v_mvnQi32: +;CHECK: vmvn + %tmp1 = load <4 x i32>* %A + %tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 > + ret <4 x i32> %tmp2 +} + +define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind { +;CHECK: v_mvnQi64: +;CHECK: vmvn + %tmp1 = load <2 x i64>* %A + %tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 > + ret <2 x i64> %tmp2 +} + +define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: v_orri8: +;CHECK: vorr + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = or <8 x i8> %tmp1, %tmp2 + ret <8 x i8> %tmp3 +} + +define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: v_orri16: +;CHECK: vorr + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = or <4 x i16> %tmp1, %tmp2 + ret <4 x i16> %tmp3 +} + +define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: v_orri32: +;CHECK: vorr + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = or <2 x i32> %tmp1, %tmp2 + ret <2 x i32> %tmp3 +} + +define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: v_orri64: +;CHECK: vorr + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = or <1 x i64> %tmp1, %tmp2 + ret <1 x i64> %tmp3 +} + +define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: v_orrQi8: +;CHECK: vorr + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = or <16 x i8> %tmp1, %tmp2 + ret <16 x i8> %tmp3 +} + +define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: v_orrQi16: +;CHECK: vorr + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = or <8 x i16> %tmp1, %tmp2 + ret <8 x i16> %tmp3 +} + +define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: v_orrQi32: +;CHECK: vorr + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = or <4 x i32> %tmp1, %tmp2 + ret <4 x i32> %tmp3 +} + +define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: v_orrQi64: +;CHECK: vorr + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = or <2 x i64> %tmp1, %tmp2 + ret <2 x i64> %tmp3 +} + +define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: v_orni8: +;CHECK: vorn + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > + %tmp4 = or <8 x i8> %tmp1, %tmp3 + ret <8 x i8> %tmp4 +} + +define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: v_orni16: +;CHECK: vorn + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 > + %tmp4 = or <4 x i16> %tmp1, %tmp3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: v_orni32: +;CHECK: vorn + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 > + %tmp4 = or <2 x i32> %tmp1, %tmp3 + ret <2 x i32> %tmp4 +} + +define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: v_orni64: +;CHECK: vorn + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = xor <1 x i64> %tmp2, < i64 -1 > + %tmp4 = or <1 x i64> %tmp1, %tmp3 + ret <1 x i64> %tmp4 +} + +define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: v_ornQi8: +;CHECK: vorn + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > + %tmp4 = or <16 x i8> %tmp1, %tmp3 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: v_ornQi16: +;CHECK: vorn + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > + %tmp4 = or <8 x i16> %tmp1, %tmp3 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: v_ornQi32: +;CHECK: vorn + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 > + %tmp4 = or <4 x i32> %tmp1, %tmp3 + ret <4 x i32> %tmp4 +} + +define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: v_ornQi64: +;CHECK: vorn + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 > + %tmp4 = or <2 x i64> %tmp1, %tmp3 + ret <2 x i64> %tmp4 +} + +define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vtsti8: +;CHECK: vtst.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = and <8 x i8> %tmp1, %tmp2 + %tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer + %tmp5 = sext <8 x i1> %tmp4 to <8 x i8> + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vtsti16: +;CHECK: vtst.i16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = and <4 x i16> %tmp1, %tmp2 + %tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer + %tmp5 = sext <4 x i1> %tmp4 to <4 x i16> + ret <4 x i16> %tmp5 +} + +define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vtsti32: +;CHECK: vtst.i32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = and <2 x i32> %tmp1, %tmp2 + %tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer + %tmp5 = sext <2 x i1> %tmp4 to <2 x i32> + ret <2 x i32> %tmp5 +} + +define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vtstQi8: +;CHECK: vtst.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = and <16 x i8> %tmp1, %tmp2 + %tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer + %tmp5 = sext <16 x i1> %tmp4 to <16 x i8> + ret <16 x i8> %tmp5 +} + +define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vtstQi16: +;CHECK: vtst.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = and <8 x i16> %tmp1, %tmp2 + %tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer + %tmp5 = sext <8 x i1> %tmp4 to <8 x i16> + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vtstQi32: +;CHECK: vtst.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = and <4 x i32> %tmp1, %tmp2 + %tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer + %tmp5 = sext <4 x i1> %tmp4 to <4 x i32> + ret <4 x i32> %tmp5 +} diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll index 37ddf4de6d329..9f3bb4e1030c7 100644 --- a/test/CodeGen/ARM/vbsl.ll +++ b/test/CodeGen/ARM/vbsl.ll @@ -1,8 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep vbsl %t | count 8 -; Note: function names do not include "vbsl" to allow simple grep for opcodes +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: v_bsli8: +;CHECK: vbsl %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -14,6 +14,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { } define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: v_bsli16: +;CHECK: vbsl %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -25,6 +27,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind } define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: v_bsli32: +;CHECK: vbsl %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -36,6 +40,8 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind } define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind { +;CHECK: v_bsli64: +;CHECK: vbsl %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = load <1 x i64>* %C @@ -47,6 +53,8 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind } define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { +;CHECK: v_bslQi8: +;CHECK: vbsl %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C @@ -58,6 +66,8 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind } define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: v_bslQi16: +;CHECK: vbsl %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C @@ -69,6 +79,8 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin } define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: v_bslQi32: +;CHECK: vbsl %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C @@ -80,6 +92,8 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin } define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind { +;CHECK: v_bslQi64: +;CHECK: vbsl %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = load <2 x i64>* %C diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll index 77f1890d08650..e4787518e731c 100644 --- a/test/CodeGen/ARM/vceq.ll +++ b/test/CodeGen/ARM/vceq.ll @@ -1,61 +1,81 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vceq\\.i8} %t | count 2 -; RUN: grep {vceq\\.i16} %t | count 2 -; RUN: grep {vceq\\.i32} %t | count 2 -; RUN: grep {vceq\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vceqi8: +;CHECK: vceq.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = vicmp eq <8 x i8> %tmp1, %tmp2 - ret <8 x i8> %tmp3 + %tmp3 = icmp eq <8 x i8> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 } define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vceqi16: +;CHECK: vceq.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = vicmp eq <4 x i16> %tmp1, %tmp2 - ret <4 x i16> %tmp3 + %tmp3 = icmp eq <4 x i16> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 } define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vceqi32: +;CHECK: vceq.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = vicmp eq <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = icmp eq <2 x i32> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vceqf32: +;CHECK: vceq.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp oeq <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vceqQi8: +;CHECK: vceq.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = vicmp eq <16 x i8> %tmp1, %tmp2 - ret <16 x i8> %tmp3 + %tmp3 = icmp eq <16 x i8> %tmp1, %tmp2 + %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> + ret <16 x i8> %tmp4 } define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vceqQi16: +;CHECK: vceq.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = vicmp eq <8 x i16> %tmp1, %tmp2 - ret <8 x i16> %tmp3 + %tmp3 = icmp eq <8 x i16> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vceqQi32: +;CHECK: vceq.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = vicmp eq <4 x i32> %tmp1, %tmp2 - ret <4 x i32> %tmp3 + %tmp3 = icmp eq <4 x i32> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vceqQf32: +;CHECK: vceq.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = vfcmp oeq <4 x float> %tmp1, %tmp2 - ret <4 x i32> %tmp3 + %tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll index 14c623ea082f2..2c161113c1130 100644 --- a/test/CodeGen/ARM/vcge.ll +++ b/test/CodeGen/ARM/vcge.ll @@ -1,106 +1,162 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vcge\\.s8} %t | count 2 -; RUN: grep {vcge\\.s16} %t | count 2 -; RUN: grep {vcge\\.s32} %t | count 2 -; RUN: grep {vcge\\.u8} %t | count 2 -; RUN: grep {vcge\\.u16} %t | count 2 -; RUN: grep {vcge\\.u32} %t | count 2 -; RUN: grep {vcge\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vcges8: +;CHECK: vcge.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = vicmp sge <8 x i8> %tmp1, %tmp2 - ret <8 x i8> %tmp3 + %tmp3 = icmp sge <8 x i8> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 } define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcges16: +;CHECK: vcge.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = vicmp sge <4 x i16> %tmp1, %tmp2 - ret <4 x i16> %tmp3 + %tmp3 = icmp sge <4 x i16> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 } define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vcges32: +;CHECK: vcge.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = vicmp sge <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = icmp sge <2 x i32> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vcgeu8: +;CHECK: vcge.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = vicmp uge <8 x i8> %tmp1, %tmp2 - ret <8 x i8> %tmp3 + %tmp3 = icmp uge <8 x i8> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 } define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcgeu16: +;CHECK: vcge.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = vicmp uge <4 x i16> %tmp1, %tmp2 - ret <4 x i16> %tmp3 + %tmp3 = icmp uge <4 x i16> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 } define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vcgeu32: +;CHECK: vcge.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = vicmp uge <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = icmp uge <2 x i32> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcgef32: +;CHECK: vcge.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp oge <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp oge <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vcgeQs8: +;CHECK: vcge.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = vicmp sge <16 x i8> %tmp1, %tmp2 - ret <16 x i8> %tmp3 + %tmp3 = icmp sge <16 x i8> %tmp1, %tmp2 + %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> + ret <16 x i8> %tmp4 } define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vcgeQs16: +;CHECK: vcge.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = vicmp sge <8 x i16> %tmp1, %tmp2 - ret <8 x i16> %tmp3 + %tmp3 = icmp sge <8 x i16> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vcgeQs32: +;CHECK: vcge.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = vicmp sge <4 x i32> %tmp1, %tmp2 - ret <4 x i32> %tmp3 + %tmp3 = icmp sge <4 x i32> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vcgeQu8: +;CHECK: vcge.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = vicmp uge <16 x i8> %tmp1, %tmp2 - ret <16 x i8> %tmp3 + %tmp3 = icmp uge <16 x i8> %tmp1, %tmp2 + %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> + ret <16 x i8> %tmp4 } define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vcgeQu16: +;CHECK: vcge.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = vicmp uge <8 x i16> %tmp1, %tmp2 - ret <8 x i16> %tmp3 + %tmp3 = icmp uge <8 x i16> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vcgeQu32: +;CHECK: vcge.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = vicmp uge <4 x i32> %tmp1, %tmp2 - ret <4 x i32> %tmp3 + %tmp3 = icmp uge <4 x i32> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vcgeQf32: +;CHECK: vcge.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = vfcmp oge <4 x float> %tmp1, %tmp2 + %tmp3 = fcmp oge <4 x float> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 +} + +define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vacgef32: +;CHECK: vacge.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vacgeQf32: +;CHECK: vacge.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } + +declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index 3f7e55078733c..6b11ba5ce6933 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -1,106 +1,162 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vcgt\\.s8} %t | count 2 -; RUN: grep {vcgt\\.s16} %t | count 2 -; RUN: grep {vcgt\\.s32} %t | count 2 -; RUN: grep {vcgt\\.u8} %t | count 2 -; RUN: grep {vcgt\\.u16} %t | count 2 -; RUN: grep {vcgt\\.u32} %t | count 2 -; RUN: grep {vcgt\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vcgts8: +;CHECK: vcgt.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = vicmp sgt <8 x i8> %tmp1, %tmp2 - ret <8 x i8> %tmp3 + %tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 } define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcgts16: +;CHECK: vcgt.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = vicmp sgt <4 x i16> %tmp1, %tmp2 - ret <4 x i16> %tmp3 + %tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 } define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vcgts32: +;CHECK: vcgt.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = vicmp sgt <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vcgtu8: +;CHECK: vcgt.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = vicmp ugt <8 x i8> %tmp1, %tmp2 - ret <8 x i8> %tmp3 + %tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 } define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcgtu16: +;CHECK: vcgt.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = vicmp ugt <4 x i16> %tmp1, %tmp2 - ret <4 x i16> %tmp3 + %tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 } define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vcgtu32: +;CHECK: vcgt.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = vicmp ugt <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcgtf32: +;CHECK: vcgt.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp ogt <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vcgtQs8: +;CHECK: vcgt.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = vicmp sgt <16 x i8> %tmp1, %tmp2 - ret <16 x i8> %tmp3 + %tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2 + %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> + ret <16 x i8> %tmp4 } define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vcgtQs16: +;CHECK: vcgt.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = vicmp sgt <8 x i16> %tmp1, %tmp2 - ret <8 x i16> %tmp3 + %tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vcgtQs32: +;CHECK: vcgt.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = vicmp sgt <4 x i32> %tmp1, %tmp2 - ret <4 x i32> %tmp3 + %tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vcgtQu8: +;CHECK: vcgt.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = vicmp ugt <16 x i8> %tmp1, %tmp2 - ret <16 x i8> %tmp3 + %tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2 + %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> + ret <16 x i8> %tmp4 } define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vcgtQu16: +;CHECK: vcgt.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = vicmp ugt <8 x i16> %tmp1, %tmp2 - ret <8 x i16> %tmp3 + %tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vcgtQu32: +;CHECK: vcgt.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = vicmp ugt <4 x i32> %tmp1, %tmp2 - ret <4 x i32> %tmp3 + %tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vcgtQf32: +;CHECK: vcgt.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B - %tmp3 = vfcmp ogt <4 x float> %tmp1, %tmp2 + %tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 +} + +define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vacgtf32: +;CHECK: vacgt.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vacgtQf32: +;CHECK: vacgt.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2) ret <4 x i32> %tmp3 } + +declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll index 981716895894e..450f90d03dfe7 100644 --- a/test/CodeGen/ARM/vcnt.ll +++ b/test/CodeGen/ARM/vcnt.ll @@ -1,13 +1,16 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vcnt\\.8} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { +;CHECK: vcnt8: +;CHECK: vcnt.8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1) ret <8 x i8> %tmp2 } define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { +;CHECK: vcntQ8: +;CHECK: vcnt.8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1) ret <16 x i8> %tmp2 @@ -15,3 +18,115 @@ define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone + +define <8 x i8> @vclz8(<8 x i8>* %A) nounwind { +;CHECK: vclz8: +;CHECK: vclz.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { +;CHECK: vclz16: +;CHECK: vclz.i16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vclz32(<2 x i32>* %A) nounwind { +;CHECK: vclz32: +;CHECK: vclz.i32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1) + ret <2 x i32> %tmp2 +} + +define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind { +;CHECK: vclzQ8: +;CHECK: vclz.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1) + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind { +;CHECK: vclzQ16: +;CHECK: vclz.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind { +;CHECK: vclzQ32: +;CHECK: vclz.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1) + ret <4 x i32> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone + +define <8 x i8> @vclss8(<8 x i8>* %A) nounwind { +;CHECK: vclss8: +;CHECK: vcls.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vclss16(<4 x i16>* %A) nounwind { +;CHECK: vclss16: +;CHECK: vcls.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vclss32(<2 x i32>* %A) nounwind { +;CHECK: vclss32: +;CHECK: vcls.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1) + ret <2 x i32> %tmp2 +} + +define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind { +;CHECK: vclsQs8: +;CHECK: vcls.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1) + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind { +;CHECK: vclsQs16: +;CHECK: vcls.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind { +;CHECK: vclsQs32: +;CHECK: vcls.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1) + ret <4 x i32> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll new file mode 100644 index 0000000000000..e6733051f269c --- /dev/null +++ b/test/CodeGen/ARM/vcombine.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=arm -mattr=+neon + +define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind { + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind { + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> + ret <4 x i32> %tmp3 +} + +define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind { + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> + ret <4 x float> %tmp3 +} + +define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind { + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> + ret <2 x i64> %tmp3 +} diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll index 1cb42bf155cb7..f4cc5368d9aaa 100644 --- a/test/CodeGen/ARM/vcvt.ll +++ b/test/CodeGen/ARM/vcvt.ll @@ -1,53 +1,140 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vcvt\\.s32\\.f32} %t | count 2 -; RUN: grep {vcvt\\.u32\\.f32} %t | count 2 -; RUN: grep {vcvt\\.f32\\.s32} %t | count 2 -; RUN: grep {vcvt\\.f32\\.u32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind { +;CHECK: vcvt_f32tos32: +;CHECK: vcvt.s32.f32 %tmp1 = load <2 x float>* %A %tmp2 = fptosi <2 x float> %tmp1 to <2 x i32> ret <2 x i32> %tmp2 } define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind { +;CHECK: vcvt_f32tou32: +;CHECK: vcvt.u32.f32 %tmp1 = load <2 x float>* %A %tmp2 = fptoui <2 x float> %tmp1 to <2 x i32> ret <2 x i32> %tmp2 } define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind { +;CHECK: vcvt_s32tof32: +;CHECK: vcvt.f32.s32 %tmp1 = load <2 x i32>* %A %tmp2 = sitofp <2 x i32> %tmp1 to <2 x float> ret <2 x float> %tmp2 } define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind { +;CHECK: vcvt_u32tof32: +;CHECK: vcvt.f32.u32 %tmp1 = load <2 x i32>* %A %tmp2 = uitofp <2 x i32> %tmp1 to <2 x float> ret <2 x float> %tmp2 } define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind { +;CHECK: vcvtQ_f32tos32: +;CHECK: vcvt.s32.f32 %tmp1 = load <4 x float>* %A %tmp2 = fptosi <4 x float> %tmp1 to <4 x i32> ret <4 x i32> %tmp2 } define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind { +;CHECK: vcvtQ_f32tou32: +;CHECK: vcvt.u32.f32 %tmp1 = load <4 x float>* %A %tmp2 = fptoui <4 x float> %tmp1 to <4 x i32> ret <4 x i32> %tmp2 } define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind { +;CHECK: vcvtQ_s32tof32: +;CHECK: vcvt.f32.s32 %tmp1 = load <4 x i32>* %A %tmp2 = sitofp <4 x i32> %tmp1 to <4 x float> ret <4 x float> %tmp2 } define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind { +;CHECK: vcvtQ_u32tof32: +;CHECK: vcvt.f32.u32 %tmp1 = load <4 x i32>* %A %tmp2 = uitofp <4 x i32> %tmp1 to <4 x float> ret <4 x float> %tmp2 } + +define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind { +;CHECK: vcvt_n_f32tos32: +;CHECK: vcvt.s32.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1) + ret <2 x i32> %tmp2 +} + +define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind { +;CHECK: vcvt_n_f32tou32: +;CHECK: vcvt.u32.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1) + ret <2 x i32> %tmp2 +} + +define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind { +;CHECK: vcvt_n_s32tof32: +;CHECK: vcvt.f32.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1) + ret <2 x float> %tmp2 +} + +define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind { +;CHECK: vcvt_n_u32tof32: +;CHECK: vcvt.f32.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1) + ret <2 x float> %tmp2 +} + +declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone +declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone +declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone + +define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind { +;CHECK: vcvtQ_n_f32tos32: +;CHECK: vcvt.s32.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1) + ret <4 x i32> %tmp2 +} + +define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind { +;CHECK: vcvtQ_n_f32tou32: +;CHECK: vcvt.u32.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1) + ret <4 x i32> %tmp2 +} + +define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind { +;CHECK: vcvtQ_n_s32tof32: +;CHECK: vcvt.f32.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1) + ret <4 x float> %tmp2 +} + +define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind { +;CHECK: vcvtQ_n_u32tof32: +;CHECK: vcvt.f32.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1) + ret <4 x float> %tmp2 +} + +declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone +declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone +declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone + diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll index 1c0887a2492df..c9a68cabbc42b 100644 --- a/test/CodeGen/ARM/vdup.ll +++ b/test/CodeGen/ARM/vdup.ll @@ -1,9 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep vdup.8 %t | count 4 -; RUN: grep vdup.16 %t | count 4 -; RUN: grep vdup.32 %t | count 8 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @v_dup8(i8 %A) nounwind { +;CHECK: v_dup8: +;CHECK: vdup.8 %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0 %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1 %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2 @@ -16,6 +15,8 @@ define <8 x i8> @v_dup8(i8 %A) nounwind { } define <4 x i16> @v_dup16(i16 %A) nounwind { +;CHECK: v_dup16: +;CHECK: vdup.16 %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0 %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1 %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2 @@ -24,18 +25,24 @@ define <4 x i16> @v_dup16(i16 %A) nounwind { } define <2 x i32> @v_dup32(i32 %A) nounwind { +;CHECK: v_dup32: +;CHECK: vdup.32 %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0 %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1 ret <2 x i32> %tmp2 } define <2 x float> @v_dupfloat(float %A) nounwind { +;CHECK: v_dupfloat: +;CHECK: vdup.32 %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0 %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1 ret <2 x float> %tmp2 } define <16 x i8> @v_dupQ8(i8 %A) nounwind { +;CHECK: v_dupQ8: +;CHECK: vdup.8 %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0 %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1 %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2 @@ -56,6 +63,8 @@ define <16 x i8> @v_dupQ8(i8 %A) nounwind { } define <8 x i16> @v_dupQ16(i16 %A) nounwind { +;CHECK: v_dupQ16: +;CHECK: vdup.16 %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0 %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1 %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2 @@ -68,6 +77,8 @@ define <8 x i16> @v_dupQ16(i16 %A) nounwind { } define <4 x i32> @v_dupQ32(i32 %A) nounwind { +;CHECK: v_dupQ32: +;CHECK: vdup.32 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0 %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1 %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2 @@ -76,6 +87,8 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind { } define <4 x float> @v_dupQfloat(float %A) nounwind { +;CHECK: v_dupQfloat: +;CHECK: vdup.32 %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0 %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1 %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2 @@ -86,49 +99,171 @@ define <4 x float> @v_dupQfloat(float %A) nounwind { ; Check to make sure it works with shuffles, too. define <8 x i8> @v_shuffledup8(i8 %A) nounwind { +;CHECK: v_shuffledup8: +;CHECK: vdup.8 %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer ret <8 x i8> %tmp2 } define <4 x i16> @v_shuffledup16(i16 %A) nounwind { +;CHECK: v_shuffledup16: +;CHECK: vdup.16 %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer ret <4 x i16> %tmp2 } define <2 x i32> @v_shuffledup32(i32 %A) nounwind { +;CHECK: v_shuffledup32: +;CHECK: vdup.32 %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer ret <2 x i32> %tmp2 } define <2 x float> @v_shuffledupfloat(float %A) nounwind { +;CHECK: v_shuffledupfloat: +;CHECK: vdup.32 %tmp1 = insertelement <2 x float> undef, float %A, i32 0 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer ret <2 x float> %tmp2 } define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind { +;CHECK: v_shuffledupQ8: +;CHECK: vdup.8 %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer ret <16 x i8> %tmp2 } define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind { +;CHECK: v_shuffledupQ16: +;CHECK: vdup.16 %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %tmp2 } define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind { +;CHECK: v_shuffledupQ32: +;CHECK: vdup.32 %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %tmp2 } define <4 x float> @v_shuffledupQfloat(float %A) nounwind { +;CHECK: v_shuffledupQfloat: +;CHECK: vdup.32 %tmp1 = insertelement <4 x float> undef, float %A, i32 0 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %tmp2 } + +define <2 x float> @v_shuffledupfloat2(float* %A) nounwind { +;CHECK: v_shuffledupfloat2: +;CHECK: vdup.32 + %tmp0 = load float* %A + %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0 + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer + ret <2 x float> %tmp2 +} + +define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind { +;CHECK: v_shuffledupQfloat2: +;CHECK: vdup.32 + %tmp0 = load float* %A + %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0 + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %tmp2 +} + +define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind { +;CHECK: vduplane8: +;CHECK: vdup.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind { +;CHECK: vduplane16: +;CHECK: vdup.16 + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind { +;CHECK: vduplane32: +;CHECK: vdup.32 + %tmp1 = load <2 x i32>* %A + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 > + ret <2 x i32> %tmp2 +} + +define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind { +;CHECK: vduplanefloat: +;CHECK: vdup.32 + %tmp1 = load <2 x float>* %A + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 > + ret <2 x float> %tmp2 +} + +define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind { +;CHECK: vduplaneQ8: +;CHECK: vdup.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind { +;CHECK: vduplaneQ16: +;CHECK: vdup.16 + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind { +;CHECK: vduplaneQ32: +;CHECK: vdup.32 + %tmp1 = load <2 x i32>* %A + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x i32> %tmp2 +} + +define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind { +;CHECK: vduplaneQfloat: +;CHECK: vdup.32 + %tmp1 = load <2 x float>* %A + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > + ret <4 x float> %tmp2 +} + +define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone { +entry: + %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> + ret <2 x i64> %0 +} + +define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone { +entry: + %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> + ret <2 x i64> %0 +} + +define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone { +entry: + %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +} + +define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone { +entry: + %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> + ret <2 x double> %0 +} diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll new file mode 100644 index 0000000000000..20d953bfb4a07 --- /dev/null +++ b/test/CodeGen/ARM/vext.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define arm_apcscc <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextd: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define arm_apcscc <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: test_vextRd: +;CHECK: vext + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + ret <8 x i8> %tmp3 +} + +define arm_apcscc <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextq: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +define arm_apcscc <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: test_vextRq: +;CHECK: vext + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + ret <16 x i8> %tmp3 +} + +define arm_apcscc <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: test_vextd16: +;CHECK: vext + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + ret <4 x i16> %tmp3 +} + +define arm_apcscc <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: test_vextq32: +;CHECK: vext + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + ret <4 x i32> %tmp3 +} + diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll index 58c2068bc8f4a..6946d02637ea8 100644 --- a/test/CodeGen/ARM/vfcmp.ll +++ b/test/CodeGen/ARM/vfcmp.ll @@ -1,96 +1,139 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vceq\\.f32} %t | count 1 -; RUN: grep {vcgt\\.f32} %t | count 9 -; RUN: grep {vcge\\.f32} %t | count 5 -; RUN: grep vorr %t | count 4 -; RUN: grep vmvn %t | count 7 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; This tests vfcmp operations that do not map directly to NEON instructions. +; This tests fcmp operations that do not map directly to NEON instructions. ; une is implemented with VCEQ/VMVN define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcunef32: +;CHECK: vceq.f32 +;CHECK-NEXT: vmvn %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp une <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp une <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; olt is implemented with VCGT define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcoltf32: +;CHECK: vcgt.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp olt <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp olt <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ole is implemented with VCGE define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcolef32: +;CHECK: vcge.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp ole <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp ole <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; uge is implemented with VCGT/VMVN define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcugef32: +;CHECK: vcgt.f32 +;CHECK-NEXT: vmvn %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp uge <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp uge <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ule is implemented with VCGT/VMVN define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vculef32: +;CHECK: vcgt.f32 +;CHECK-NEXT: vmvn %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp ule <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp ule <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ugt is implemented with VCGE/VMVN define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcugtf32: +;CHECK: vcge.f32 +;CHECK-NEXT: vmvn %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp ugt <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ult is implemented with VCGE/VMVN define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcultf32: +;CHECK: vcge.f32 +;CHECK-NEXT: vmvn %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp ult <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp ult <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ueq is implemented with VCGT/VCGT/VORR/VMVN define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcueqf32: +;CHECK: vcgt.f32 +;CHECK-NEXT: vcgt.f32 +;CHECK-NEXT: vorr +;CHECK-NEXT: vmvn %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp ueq <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; one is implemented with VCGT/VCGT/VORR define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vconef32: +;CHECK: vcgt.f32 +;CHECK-NEXT: vcgt.f32 +;CHECK-NEXT: vorr %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp one <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp one <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; uno is implemented with VCGT/VCGE/VORR/VMVN define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcunof32: +;CHECK: vcge.f32 +;CHECK-NEXT: vcgt.f32 +;CHECK-NEXT: vorr +;CHECK-NEXT: vmvn %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp uno <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp uno <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } ; ord is implemented with VCGT/VCGE/VORR define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vcordf32: +;CHECK: vcge.f32 +;CHECK-NEXT: vcgt.f32 +;CHECK-NEXT: vorr %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = vfcmp ord <2 x float> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = fcmp ord <2 x float> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll index f58da44093565..50000e31e1129 100644 --- a/test/CodeGen/ARM/vfp.ll +++ b/test/CodeGen/ARM/vfp.ll @@ -1,19 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep fabs | count 2 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep fmscs | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep fcvt | count 2 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep fuito | count 2 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep fto.i | count 4 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep bmi | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep bgt | count 1 -; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \ -; RUN: grep fcmpezs | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s define void @test(float* %P, double* %D) { %A = load float* %P ; [#uses=1] @@ -28,16 +13,20 @@ declare float @fabsf(float) declare double @fabs(double) define void @test_abs(float* %P, double* %D) { +;CHECK: test_abs: %a = load float* %P ; [#uses=1] +;CHECK: fabss %b = call float @fabsf( float %a ) ; [#uses=1] store float %b, float* %P %A = load double* %D ; [#uses=1] +;CHECK: fabsd %B = call double @fabs( double %A ) ; [#uses=1] store double %B, double* %D ret void } define void @test_add(float* %P, double* %D) { +;CHECK: test_add: %a = load float* %P ; [#uses=2] %b = fadd float %a, %a ; [#uses=1] store float %b, float* %P @@ -48,9 +37,12 @@ define void @test_add(float* %P, double* %D) { } define void @test_ext_round(float* %P, double* %D) { +;CHECK: test_ext_round: %a = load float* %P ; [#uses=1] +;CHECK: fcvtds %b = fpext float %a to double ; [#uses=1] %A = load double* %D ; [#uses=1] +;CHECK: fcvtsd %B = fptrunc double %A to float ; [#uses=1] store double %b, double* %D store float %B, float* %P @@ -58,9 +50,11 @@ define void @test_ext_round(float* %P, double* %D) { } define void @test_fma(float* %P1, float* %P2, float* %P3) { +;CHECK: test_fma: %a1 = load float* %P1 ; [#uses=1] %a2 = load float* %P2 ; [#uses=1] %a3 = load float* %P3 ; [#uses=1] +;CHECK: fmscs %X = fmul float %a1, %a2 ; [#uses=1] %Y = fsub float %X, %a3 ; [#uses=1] store float %Y, float* %P1 @@ -68,42 +62,55 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) { } define i32 @test_ftoi(float* %P1) { +;CHECK: test_ftoi: %a1 = load float* %P1 ; [#uses=1] +;CHECK: ftosizs %b1 = fptosi float %a1 to i32 ; [#uses=1] ret i32 %b1 } define i32 @test_ftou(float* %P1) { +;CHECK: test_ftou: %a1 = load float* %P1 ; [#uses=1] +;CHECK: ftouizs %b1 = fptoui float %a1 to i32 ; [#uses=1] ret i32 %b1 } define i32 @test_dtoi(double* %P1) { +;CHECK: test_dtoi: %a1 = load double* %P1 ; [#uses=1] +;CHECK: ftosizd %b1 = fptosi double %a1 to i32 ; [#uses=1] ret i32 %b1 } define i32 @test_dtou(double* %P1) { +;CHECK: test_dtou: %a1 = load double* %P1 ; [#uses=1] +;CHECK: ftouizd %b1 = fptoui double %a1 to i32 ; [#uses=1] ret i32 %b1 } define void @test_utod(double* %P1, i32 %X) { +;CHECK: test_utod: +;CHECK: fuitod %b1 = uitofp i32 %X to double ; [#uses=1] store double %b1, double* %P1 ret void } define void @test_utod2(double* %P1, i8 %X) { +;CHECK: test_utod2: +;CHECK: fuitod %b1 = uitofp i8 %X to double ; [#uses=1] store double %b1, double* %P1 ret void } define void @test_cmp(float* %glob, i32 %X) { +;CHECK: test_cmp: entry: %tmp = load float* %glob ; [#uses=2] %tmp3 = getelementptr float* %glob, i32 2 ; [#uses=1] @@ -111,6 +118,8 @@ entry: %tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4 ; [#uses=1] %tmp5 = fcmp uno float %tmp, %tmp4 ; [#uses=1] %tmp6 = or i1 %tmp.upgrd.1, %tmp5 ; [#uses=1] +;CHECK: bmi +;CHECK-NEXT: bgt br i1 %tmp6, label %cond_true, label %cond_false cond_true: ; preds = %entry @@ -129,8 +138,10 @@ declare i32 @bar(...) declare i32 @baz(...) define void @test_cmpfp0(float* %glob, i32 %X) { +;CHECK: test_cmpfp0: entry: %tmp = load float* %glob ; [#uses=1] +;CHECK: fcmpezs %tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00 ; [#uses=1] br i1 %tmp.upgrd.3, label %cond_true, label %cond_false diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll index a361ba2ba97f5..f0df7982ef425 100644 --- a/test/CodeGen/ARM/vget_lane.ll +++ b/test/CodeGen/ARM/vget_lane.ll @@ -1,11 +1,10 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vmov\\.s8} %t | count 2 -; RUN: grep {vmov\\.s16} %t | count 2 -; RUN: grep {vmov\\.u8} %t | count 2 -; RUN: grep {vmov\\.u16} %t | count 2 -; RUN: grep {vmov\\.32} %t | count 2 +; RUN: llc < %s -mattr=+neon | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-elf" define i32 @vget_lanes8(<8 x i8>* %A) nounwind { +;CHECK: vget_lanes8: +;CHECK: vmov.s8 %tmp1 = load <8 x i8>* %A %tmp2 = extractelement <8 x i8> %tmp1, i32 1 %tmp3 = sext i8 %tmp2 to i32 @@ -13,6 +12,8 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind { } define i32 @vget_lanes16(<4 x i16>* %A) nounwind { +;CHECK: vget_lanes16: +;CHECK: vmov.s16 %tmp1 = load <4 x i16>* %A %tmp2 = extractelement <4 x i16> %tmp1, i32 1 %tmp3 = sext i16 %tmp2 to i32 @@ -20,6 +21,8 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind { } define i32 @vget_laneu8(<8 x i8>* %A) nounwind { +;CHECK: vget_laneu8: +;CHECK: vmov.u8 %tmp1 = load <8 x i8>* %A %tmp2 = extractelement <8 x i8> %tmp1, i32 1 %tmp3 = zext i8 %tmp2 to i32 @@ -27,6 +30,8 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind { } define i32 @vget_laneu16(<4 x i16>* %A) nounwind { +;CHECK: vget_laneu16: +;CHECK: vmov.u16 %tmp1 = load <4 x i16>* %A %tmp2 = extractelement <4 x i16> %tmp1, i32 1 %tmp3 = zext i16 %tmp2 to i32 @@ -35,6 +40,8 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind { ; Do a vector add to keep the extraction from being done directly from memory. define i32 @vget_lanei32(<2 x i32>* %A) nounwind { +;CHECK: vget_lanei32: +;CHECK: vmov.32 %tmp1 = load <2 x i32>* %A %tmp2 = add <2 x i32> %tmp1, %tmp1 %tmp3 = extractelement <2 x i32> %tmp2, i32 1 @@ -42,6 +49,8 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind { } define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind { +;CHECK: vgetQ_lanes8: +;CHECK: vmov.s8 %tmp1 = load <16 x i8>* %A %tmp2 = extractelement <16 x i8> %tmp1, i32 1 %tmp3 = sext i8 %tmp2 to i32 @@ -49,6 +58,8 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind { } define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind { +;CHECK: vgetQ_lanes16: +;CHECK: vmov.s16 %tmp1 = load <8 x i16>* %A %tmp2 = extractelement <8 x i16> %tmp1, i32 1 %tmp3 = sext i16 %tmp2 to i32 @@ -56,6 +67,8 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind { } define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind { +;CHECK: vgetQ_laneu8: +;CHECK: vmov.u8 %tmp1 = load <16 x i8>* %A %tmp2 = extractelement <16 x i8> %tmp1, i32 1 %tmp3 = zext i8 %tmp2 to i32 @@ -63,6 +76,8 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind { } define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind { +;CHECK: vgetQ_laneu16: +;CHECK: vmov.u16 %tmp1 = load <8 x i16>* %A %tmp2 = extractelement <8 x i16> %tmp1, i32 1 %tmp3 = zext i16 %tmp2 to i32 @@ -71,8 +86,127 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind { ; Do a vector add to keep the extraction from being done directly from memory. define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind { +;CHECK: vgetQ_lanei32: +;CHECK: vmov.32 %tmp1 = load <4 x i32>* %A %tmp2 = add <4 x i32> %tmp1, %tmp1 %tmp3 = extractelement <4 x i32> %tmp2, i32 1 ret i32 %tmp3 } + +define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind { +entry: +; CHECK: vmov.u16 r0, d0[1] + %arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1] + %out_uint16_t = alloca i16 ; [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1] + %1 = extractelement <4 x i16> %0, i32 1 ; [#uses=1] + store i16 %1, i16* %out_uint16_t, align 2 + br label %return + +return: ; preds = %entry + ret void +} + +define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind { +entry: +; CHECK: vmov.u8 r0, d0[1] + %arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1] + %out_uint8_t = alloca i8 ; [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1] + %1 = extractelement <8 x i8> %0, i32 1 ; [#uses=1] + store i8 %1, i8* %out_uint8_t, align 1 + br label %return + +return: ; preds = %entry + ret void +} + +define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind { +entry: +; CHECK: vmov.u16 r0, d0[1] + %arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1] + %out_uint16_t = alloca i16 ; [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1] + %1 = extractelement <8 x i16> %0, i32 1 ; [#uses=1] + store i16 %1, i16* %out_uint16_t, align 2 + br label %return + +return: ; preds = %entry + ret void +} + +define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind { +entry: +; CHECK: vmov.u8 r0, d0[1] + %arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1] + %out_uint8_t = alloca i8 ; [#uses=1] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1] + %1 = extractelement <16 x i8> %0, i32 1 ; [#uses=1] + store i8 %1, i8* %out_uint8_t, align 1 + br label %return + +return: ; preds = %entry + ret void +} + +define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind { +;CHECK: vset_lane8: +;CHECK: vmov.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1 + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind { +;CHECK: vset_lane16: +;CHECK: vmov.16 + %tmp1 = load <4 x i16>* %A + %tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1 + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind { +;CHECK: vset_lane32: +;CHECK: vmov.32 + %tmp1 = load <2 x i32>* %A + %tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1 + ret <2 x i32> %tmp2 +} + +define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind { +;CHECK: vsetQ_lane8: +;CHECK: vmov.8 + %tmp1 = load <16 x i8>* %A + %tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1 + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind { +;CHECK: vsetQ_lane16: +;CHECK: vmov.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1 + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind { +;CHECK: vsetQ_lane32: +;CHECK: vmov.32 + %tmp1 = load <4 x i32>* %A + %tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1 + ret <4 x i32> %tmp2 +} + +define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind { +;CHECK: test_vset_lanef32: +;CHECK: fcpys +;CHECK: fcpys +entry: + %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1] + ret <2 x float> %0 +} diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll index 5e7503dc71cf3..379e062838f61 100644 --- a/test/CodeGen/ARM/vhadd.ll +++ b/test/CodeGen/ARM/vhadd.ll @@ -1,12 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vhadd\\.s8} %t | count 2 -; RUN: grep {vhadd\\.s16} %t | count 2 -; RUN: grep {vhadd\\.s32} %t | count 2 -; RUN: grep {vhadd\\.u8} %t | count 2 -; RUN: grep {vhadd\\.u16} %t | count 2 -; RUN: grep {vhadd\\.u32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vhadds8: +;CHECK: vhadd.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -14,6 +10,8 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vhadds16: +;CHECK: vhadd.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -21,6 +19,8 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vhadds32: +;CHECK: vhadd.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -28,6 +28,8 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vhaddu8: +;CHECK: vhadd.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -35,6 +37,8 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vhaddu16: +;CHECK: vhadd.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -42,6 +46,8 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vhaddu32: +;CHECK: vhadd.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -49,6 +55,8 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vhaddQs8: +;CHECK: vhadd.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -56,6 +64,8 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vhaddQs16: +;CHECK: vhadd.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -63,6 +73,8 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vhaddQs32: +;CHECK: vhadd.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -70,6 +82,8 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vhaddQu8: +;CHECK: vhadd.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -77,6 +91,8 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vhaddQu16: +;CHECK: vhadd.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -84,6 +100,8 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vhaddQu32: +;CHECK: vhadd.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -105,3 +123,127 @@ declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind rea declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone + +define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrhadds8: +;CHECK: vrhadd.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrhadds16: +;CHECK: vrhadd.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrhadds32: +;CHECK: vrhadd.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrhaddu8: +;CHECK: vrhadd.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrhaddu16: +;CHECK: vrhadd.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrhaddu32: +;CHECK: vrhadd.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrhaddQs8: +;CHECK: vrhadd.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrhaddQs16: +;CHECK: vrhadd.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrhaddQs32: +;CHECK: vrhadd.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrhaddQu8: +;CHECK: vrhadd.u8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrhaddQu16: +;CHECK: vrhadd.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrhaddQu32: +;CHECK: vrhadd.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll index 32a66e5479454..0f0d0279a5217 100644 --- a/test/CodeGen/ARM/vhsub.ll +++ b/test/CodeGen/ARM/vhsub.ll @@ -1,12 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vhsub\\.s8} %t | count 2 -; RUN: grep {vhsub\\.s16} %t | count 2 -; RUN: grep {vhsub\\.s32} %t | count 2 -; RUN: grep {vhsub\\.u8} %t | count 2 -; RUN: grep {vhsub\\.u16} %t | count 2 -; RUN: grep {vhsub\\.u32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vhsubs8: +;CHECK: vhsub.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -14,6 +10,8 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vhsubs16: +;CHECK: vhsub.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -21,6 +19,8 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vhsubs32: +;CHECK: vhsub.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -28,6 +28,8 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vhsubu8: +;CHECK: vhsub.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -35,6 +37,8 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vhsubu16: +;CHECK: vhsub.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -42,6 +46,8 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vhsubu32: +;CHECK: vhsub.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -49,6 +55,8 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vhsubQs8: +;CHECK: vhsub.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -56,6 +64,8 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vhsubQs16: +;CHECK: vhsub.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -63,6 +73,8 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vhsubQs32: +;CHECK: vhsub.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -70,6 +82,8 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vhsubQu8: +;CHECK: vhsub.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -77,6 +91,8 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vhsubQu16: +;CHECK: vhsub.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -84,6 +100,8 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vhsubQu32: +;CHECK: vhsub.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll index 86858f9293487..2d8cb893bd867 100644 --- a/test/CodeGen/ARM/vicmp.ll +++ b/test/CodeGen/ARM/vicmp.ll @@ -1,85 +1,113 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vceq\\.i8} %t | count 2 -; RUN: grep {vceq\\.i16} %t | count 2 -; RUN: grep {vceq\\.i32} %t | count 2 -; RUN: grep vmvn %t | count 6 -; RUN: grep {vcgt\\.s8} %t | count 1 -; RUN: grep {vcge\\.s16} %t | count 1 -; RUN: grep {vcgt\\.u16} %t | count 1 -; RUN: grep {vcge\\.u32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -; This tests vicmp operations that do not map directly to NEON instructions. +; This tests icmp operations that do not map directly to NEON instructions. ; Not-equal (ne) operations are implemented by VCEQ/VMVN. Less-than (lt/ult) ; and less-than-or-equal (le/ule) are implemented by swapping the arguments ; to VCGT and VCGE. Test all the operand types for not-equal but only sample ; the other operations. define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vcnei8: +;CHECK: vceq.i8 +;CHECK-NEXT: vmvn %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = vicmp ne <8 x i8> %tmp1, %tmp2 - ret <8 x i8> %tmp3 + %tmp3 = icmp ne <8 x i8> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> + ret <8 x i8> %tmp4 } define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcnei16: +;CHECK: vceq.i16 +;CHECK-NEXT: vmvn %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = vicmp ne <4 x i16> %tmp1, %tmp2 - ret <4 x i16> %tmp3 + %tmp3 = icmp ne <4 x i16> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 } define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vcnei32: +;CHECK: vceq.i32 +;CHECK-NEXT: vmvn %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = vicmp ne <2 x i32> %tmp1, %tmp2 - ret <2 x i32> %tmp3 + %tmp3 = icmp ne <2 x i32> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> + ret <2 x i32> %tmp4 } define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vcneQi8: +;CHECK: vceq.i8 +;CHECK-NEXT: vmvn %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = vicmp ne <16 x i8> %tmp1, %tmp2 - ret <16 x i8> %tmp3 + %tmp3 = icmp ne <16 x i8> %tmp1, %tmp2 + %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> + ret <16 x i8> %tmp4 } define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vcneQi16: +;CHECK: vceq.i16 +;CHECK-NEXT: vmvn %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B - %tmp3 = vicmp ne <8 x i16> %tmp1, %tmp2 - ret <8 x i16> %tmp3 + %tmp3 = icmp ne <8 x i16> %tmp1, %tmp2 + %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> + ret <8 x i16> %tmp4 } define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vcneQi32: +;CHECK: vceq.i32 +;CHECK-NEXT: vmvn %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = vicmp ne <4 x i32> %tmp1, %tmp2 - ret <4 x i32> %tmp3 + %tmp3 = icmp ne <4 x i32> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vcltQs8: +;CHECK: vcgt.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B - %tmp3 = vicmp slt <16 x i8> %tmp1, %tmp2 - ret <16 x i8> %tmp3 + %tmp3 = icmp slt <16 x i8> %tmp1, %tmp2 + %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> + ret <16 x i8> %tmp4 } define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcles16: +;CHECK: vcge.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = vicmp sle <4 x i16> %tmp1, %tmp2 - ret <4 x i16> %tmp3 + %tmp3 = icmp sle <4 x i16> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 } define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vcltu16: +;CHECK: vcgt.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = vicmp ult <4 x i16> %tmp1, %tmp2 - ret <4 x i16> %tmp3 + %tmp3 = icmp ult <4 x i16> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> + ret <4 x i16> %tmp4 } define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vcleQu32: +;CHECK: vcge.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B - %tmp3 = vicmp ule <4 x i32> %tmp1, %tmp2 - ret <4 x i32> %tmp3 + %tmp3 = icmp ule <4 x i32> %tmp1, %tmp2 + %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> + ret <4 x i32> %tmp4 } diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll new file mode 100644 index 0000000000000..f5383aafb2bb0 --- /dev/null +++ b/test/CodeGen/ARM/vld1.ll @@ -0,0 +1,83 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @vld1i8(i8* %A) nounwind { +;CHECK: vld1i8: +;CHECK: vld1.8 + %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A) + ret <8 x i8> %tmp1 +} + +define <4 x i16> @vld1i16(i16* %A) nounwind { +;CHECK: vld1i16: +;CHECK: vld1.16 + %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A) + ret <4 x i16> %tmp1 +} + +define <2 x i32> @vld1i32(i32* %A) nounwind { +;CHECK: vld1i32: +;CHECK: vld1.32 + %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A) + ret <2 x i32> %tmp1 +} + +define <2 x float> @vld1f(float* %A) nounwind { +;CHECK: vld1f: +;CHECK: vld1.32 + %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A) + ret <2 x float> %tmp1 +} + +define <1 x i64> @vld1i64(i64* %A) nounwind { +;CHECK: vld1i64: +;CHECK: vld1.64 + %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A) + ret <1 x i64> %tmp1 +} + +define <16 x i8> @vld1Qi8(i8* %A) nounwind { +;CHECK: vld1Qi8: +;CHECK: vld1.8 + %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A) + ret <16 x i8> %tmp1 +} + +define <8 x i16> @vld1Qi16(i16* %A) nounwind { +;CHECK: vld1Qi16: +;CHECK: vld1.16 + %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A) + ret <8 x i16> %tmp1 +} + +define <4 x i32> @vld1Qi32(i32* %A) nounwind { +;CHECK: vld1Qi32: +;CHECK: vld1.32 + %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A) + ret <4 x i32> %tmp1 +} + +define <4 x float> @vld1Qf(float* %A) nounwind { +;CHECK: vld1Qf: +;CHECK: vld1.32 + %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A) + ret <4 x float> %tmp1 +} + +define <2 x i64> @vld1Qi64(i64* %A) nounwind { +;CHECK: vld1Qi64: +;CHECK: vld1.64 + %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A) + ret <2 x i64> %tmp1 +} + +declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly +declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly +declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly +declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly +declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly + +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly +declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly +declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll new file mode 100644 index 0000000000000..23f7d2ca0cd3e --- /dev/null +++ b/test/CodeGen/ARM/vld2.ll @@ -0,0 +1,113 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } +%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } + +define <8 x i8> @vld2i8(i8* %A) nounwind { +;CHECK: vld2i8: +;CHECK: vld2.8 + %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 + %tmp4 = add <8 x i8> %tmp2, %tmp3 + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld2i16(i16* %A) nounwind { +;CHECK: vld2i16: +;CHECK: vld2.16 + %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1 + %tmp4 = add <4 x i16> %tmp2, %tmp3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld2i32(i32* %A) nounwind { +;CHECK: vld2i32: +;CHECK: vld2.32 + %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1 + %tmp4 = add <2 x i32> %tmp2, %tmp3 + ret <2 x i32> %tmp4 +} + +define <2 x float> @vld2f(float* %A) nounwind { +;CHECK: vld2f: +;CHECK: vld2.32 + %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1 + %tmp4 = add <2 x float> %tmp2, %tmp3 + ret <2 x float> %tmp4 +} + +define <1 x i64> @vld2i64(i64* %A) nounwind { +;CHECK: vld2i64: +;CHECK: vld1.64 + %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i64* %A) + %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + +define <16 x i8> @vld2Qi8(i8* %A) nounwind { +;CHECK: vld2Qi8: +;CHECK: vld2.8 + %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 + %tmp4 = add <16 x i8> %tmp2, %tmp3 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @vld2Qi16(i16* %A) nounwind { +;CHECK: vld2Qi16: +;CHECK: vld2.16 + %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1 + %tmp4 = add <8 x i16> %tmp2, %tmp3 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vld2Qi32(i32* %A) nounwind { +;CHECK: vld2Qi32: +;CHECK: vld2.32 + %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1 + %tmp4 = add <4 x i32> %tmp2, %tmp3 + ret <4 x i32> %tmp4 +} + +define <4 x float> @vld2Qf(float* %A) nounwind { +;CHECK: vld2Qf: +;CHECK: vld2.32 + %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1 + %tmp4 = add <4 x float> %tmp2, %tmp3 + ret <4 x float> %tmp4 +} + +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly +declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly + +declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll new file mode 100644 index 0000000000000..207dc6a22e459 --- /dev/null +++ b/test/CodeGen/ARM/vld3.ll @@ -0,0 +1,117 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } + +define <8 x i8> @vld3i8(i8* %A) nounwind { +;CHECK: vld3i8: +;CHECK: vld3.8 + %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 + %tmp4 = add <8 x i8> %tmp2, %tmp3 + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld3i16(i16* %A) nounwind { +;CHECK: vld3i16: +;CHECK: vld3.16 + %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 + %tmp4 = add <4 x i16> %tmp2, %tmp3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld3i32(i32* %A) nounwind { +;CHECK: vld3i32: +;CHECK: vld3.32 + %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 + %tmp4 = add <2 x i32> %tmp2, %tmp3 + ret <2 x i32> %tmp4 +} + +define <2 x float> @vld3f(float* %A) nounwind { +;CHECK: vld3f: +;CHECK: vld3.32 + %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2 + %tmp4 = add <2 x float> %tmp2, %tmp3 + ret <2 x float> %tmp4 +} + +define <1 x i64> @vld3i64(i64* %A) nounwind { +;CHECK: vld3i64: +;CHECK: vld1.64 + %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i64* %A) + %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + +define <16 x i8> @vld3Qi8(i8* %A) nounwind { +;CHECK: vld3Qi8: +;CHECK: vld3.8 +;CHECK: vld3.8 + %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 + %tmp4 = add <16 x i8> %tmp2, %tmp3 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @vld3Qi16(i16* %A) nounwind { +;CHECK: vld3Qi16: +;CHECK: vld3.16 +;CHECK: vld3.16 + %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2 + %tmp4 = add <8 x i16> %tmp2, %tmp3 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vld3Qi32(i32* %A) nounwind { +;CHECK: vld3Qi32: +;CHECK: vld3.32 +;CHECK: vld3.32 + %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 + %tmp4 = add <4 x i32> %tmp2, %tmp3 + ret <4 x i32> %tmp4 +} + +define <4 x float> @vld3Qf(float* %A) nounwind { +;CHECK: vld3Qf: +;CHECK: vld3.32 +;CHECK: vld3.32 + %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2 + %tmp4 = add <4 x float> %tmp2, %tmp3 + ret <4 x float> %tmp4 +} + +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly +declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly + +declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll new file mode 100644 index 0000000000000..0624f2977ea46 --- /dev/null +++ b/test/CodeGen/ARM/vld4.ll @@ -0,0 +1,117 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } + +%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } +%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } + +define <8 x i8> @vld4i8(i8* %A) nounwind { +;CHECK: vld4i8: +;CHECK: vld4.8 + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 + %tmp4 = add <8 x i8> %tmp2, %tmp3 + ret <8 x i8> %tmp4 +} + +define <4 x i16> @vld4i16(i16* %A) nounwind { +;CHECK: vld4i16: +;CHECK: vld4.16 + %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2 + %tmp4 = add <4 x i16> %tmp2, %tmp3 + ret <4 x i16> %tmp4 +} + +define <2 x i32> @vld4i32(i32* %A) nounwind { +;CHECK: vld4i32: +;CHECK: vld4.32 + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 + %tmp4 = add <2 x i32> %tmp2, %tmp3 + ret <2 x i32> %tmp4 +} + +define <2 x float> @vld4f(float* %A) nounwind { +;CHECK: vld4f: +;CHECK: vld4.32 + %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2 + %tmp4 = add <2 x float> %tmp2, %tmp3 + ret <2 x float> %tmp4 +} + +define <1 x i64> @vld4i64(i64* %A) nounwind { +;CHECK: vld4i64: +;CHECK: vld1.64 + %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i64* %A) + %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2 + %tmp4 = add <1 x i64> %tmp2, %tmp3 + ret <1 x i64> %tmp4 +} + +define <16 x i8> @vld4Qi8(i8* %A) nounwind { +;CHECK: vld4Qi8: +;CHECK: vld4.8 +;CHECK: vld4.8 + %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A) + %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 + %tmp4 = add <16 x i8> %tmp2, %tmp3 + ret <16 x i8> %tmp4 +} + +define <8 x i16> @vld4Qi16(i16* %A) nounwind { +;CHECK: vld4Qi16: +;CHECK: vld4.16 +;CHECK: vld4.16 + %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i16* %A) + %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2 + %tmp4 = add <8 x i16> %tmp2, %tmp3 + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vld4Qi32(i32* %A) nounwind { +;CHECK: vld4Qi32: +;CHECK: vld4.32 +;CHECK: vld4.32 + %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i32* %A) + %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2 + %tmp4 = add <4 x i32> %tmp2, %tmp3 + ret <4 x i32> %tmp4 +} + +define <4 x float> @vld4Qf(float* %A) nounwind { +;CHECK: vld4Qf: +;CHECK: vld4.32 +;CHECK: vld4.32 + %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(float* %A) + %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2 + %tmp4 = add <4 x float> %tmp2, %tmp3 + ret <4 x float> %tmp4 +} + +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly +declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly + +declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll new file mode 100644 index 0000000000000..53881a3f924e2 --- /dev/null +++ b/test/CodeGen/ARM/vldlane.ll @@ -0,0 +1,328 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } + +%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } + +define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vld2lanei8: +;CHECK: vld2.8 + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vld2lanei16: +;CHECK: vld2.16 + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1 + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vld2lanei32: +;CHECK: vld2.32 + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 +} + +define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind { +;CHECK: vld2lanef: +;CHECK: vld2.32 + %tmp1 = load <2 x float>* %B + %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1 + %tmp5 = add <2 x float> %tmp3, %tmp4 + ret <2 x float> %tmp5 +} + +define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vld2laneQi16: +;CHECK: vld2.16 + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vld2laneQi32: +;CHECK: vld2.32 + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vld2laneQf: +;CHECK: vld2.32 + %tmp1 = load <4 x float>* %B + %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1 + %tmp5 = add <4 x float> %tmp3, %tmp4 + ret <4 x float> %tmp5 +} + +declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly + +declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly + +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } + +%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } + +define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vld3lanei8: +;CHECK: vld3.8 + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 + %tmp6 = add <8 x i8> %tmp3, %tmp4 + %tmp7 = add <8 x i8> %tmp5, %tmp6 + ret <8 x i8> %tmp7 +} + +define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vld3lanei16: +;CHECK: vld3.16 + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2 + %tmp6 = add <4 x i16> %tmp3, %tmp4 + %tmp7 = add <4 x i16> %tmp5, %tmp6 + ret <4 x i16> %tmp7 +} + +define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vld3lanei32: +;CHECK: vld3.32 + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2 + %tmp6 = add <2 x i32> %tmp3, %tmp4 + %tmp7 = add <2 x i32> %tmp5, %tmp6 + ret <2 x i32> %tmp7 +} + +define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind { +;CHECK: vld3lanef: +;CHECK: vld3.32 + %tmp1 = load <2 x float>* %B + %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2 + %tmp6 = add <2 x float> %tmp3, %tmp4 + %tmp7 = add <2 x float> %tmp5, %tmp6 + ret <2 x float> %tmp7 +} + +define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vld3laneQi16: +;CHECK: vld3.16 + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2 + %tmp6 = add <8 x i16> %tmp3, %tmp4 + %tmp7 = add <8 x i16> %tmp5, %tmp6 + ret <8 x i16> %tmp7 +} + +define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vld3laneQi32: +;CHECK: vld3.32 + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3) + %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2 + %tmp6 = add <4 x i32> %tmp3, %tmp4 + %tmp7 = add <4 x i32> %tmp5, %tmp6 + ret <4 x i32> %tmp7 +} + +define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vld3laneQf: +;CHECK: vld3.32 + %tmp1 = load <4 x float>* %B + %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2 + %tmp6 = add <4 x float> %tmp3, %tmp4 + %tmp7 = add <4 x float> %tmp5, %tmp6 + ret <4 x float> %tmp7 +} + +declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly + +declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly + +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } + +%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } +%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } +%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } + +define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vld4lanei8: +;CHECK: vld4.8 + %tmp1 = load <8 x i8>* %B + %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 + %tmp7 = add <8 x i8> %tmp3, %tmp4 + %tmp8 = add <8 x i8> %tmp5, %tmp6 + %tmp9 = add <8 x i8> %tmp7, %tmp8 + ret <8 x i8> %tmp9 +} + +define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vld4lanei16: +;CHECK: vld4.16 + %tmp1 = load <4 x i16>* %B + %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3 + %tmp7 = add <4 x i16> %tmp3, %tmp4 + %tmp8 = add <4 x i16> %tmp5, %tmp6 + %tmp9 = add <4 x i16> %tmp7, %tmp8 + ret <4 x i16> %tmp9 +} + +define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vld4lanei32: +;CHECK: vld4.32 + %tmp1 = load <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3 + %tmp7 = add <2 x i32> %tmp3, %tmp4 + %tmp8 = add <2 x i32> %tmp5, %tmp6 + %tmp9 = add <2 x i32> %tmp7, %tmp8 + ret <2 x i32> %tmp9 +} + +define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind { +;CHECK: vld4lanef: +;CHECK: vld4.32 + %tmp1 = load <2 x float>* %B + %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3 + %tmp7 = add <2 x float> %tmp3, %tmp4 + %tmp8 = add <2 x float> %tmp5, %tmp6 + %tmp9 = add <2 x float> %tmp7, %tmp8 + ret <2 x float> %tmp9 +} + +define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vld4laneQi16: +;CHECK: vld4.16 + %tmp1 = load <8 x i16>* %B + %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3 + %tmp7 = add <8 x i16> %tmp3, %tmp4 + %tmp8 = add <8 x i16> %tmp5, %tmp6 + %tmp9 = add <8 x i16> %tmp7, %tmp8 + ret <8 x i16> %tmp9 +} + +define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vld4laneQi32: +;CHECK: vld4.32 + %tmp1 = load <4 x i32>* %B + %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3 + %tmp7 = add <4 x i32> %tmp3, %tmp4 + %tmp8 = add <4 x i32> %tmp5, %tmp6 + %tmp9 = add <4 x i32> %tmp7, %tmp8 + ret <4 x i32> %tmp9 +} + +define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vld4laneQf: +;CHECK: vld4.32 + %tmp1 = load <4 x float>* %B + %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3 + %tmp7 = add <4 x float> %tmp3, %tmp4 + %tmp8 = add <4 x float> %tmp5, %tmp6 + %tmp9 = add <4 x float> %tmp7, %tmp8 + ret <4 x float> %tmp9 +} + +declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly +declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly + +declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly +declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly +declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll new file mode 100644 index 0000000000000..e3527c1a4d9ba --- /dev/null +++ b/test/CodeGen/ARM/vminmax.ll @@ -0,0 +1,293 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmins8: +;CHECK: vmin.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmins16: +;CHECK: vmin.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmins32: +;CHECK: vmin.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vminu8: +;CHECK: vmin.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vminu16: +;CHECK: vmin.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vminu32: +;CHECK: vmin.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vminf32: +;CHECK: vmin.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + ret <2 x float> %tmp3 +} + +define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vminQs8: +;CHECK: vmin.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vminQs16: +;CHECK: vmin.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vminQs32: +;CHECK: vmin.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vminQu8: +;CHECK: vmin.u8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vminQu16: +;CHECK: vmin.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vminQu32: +;CHECK: vmin.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vminQf32: +;CHECK: vmin.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + ret <4 x float> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone + +declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone + +define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmaxs8: +;CHECK: vmax.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmaxs16: +;CHECK: vmax.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmaxs32: +;CHECK: vmax.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmaxu8: +;CHECK: vmax.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmaxu16: +;CHECK: vmax.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmaxu32: +;CHECK: vmax.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vmaxf32: +;CHECK: vmax.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + ret <2 x float> %tmp3 +} + +define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vmaxQs8: +;CHECK: vmax.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vmaxQs16: +;CHECK: vmax.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vmaxQs32: +;CHECK: vmax.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vmaxQu8: +;CHECK: vmax.u8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vmaxQu16: +;CHECK: vmax.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vmaxQu32: +;CHECK: vmax.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vmaxQf32: +;CHECK: vmax.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + ret <4 x float> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone + +declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll index ed77e11a7c47e..840521827413a 100644 --- a/test/CodeGen/ARM/vmla.ll +++ b/test/CodeGen/ARM/vmla.ll @@ -1,10 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vmla\\.i8} %t | count 2 -; RUN: grep {vmla\\.i16} %t | count 2 -; RUN: grep {vmla\\.i32} %t | count 2 -; RUN: grep {vmla\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind { +;CHECK: vmlai8: +;CHECK: vmla.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -14,6 +12,8 @@ define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind { } define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlai16: +;CHECK: vmla.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -23,6 +23,8 @@ define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind } define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlai32: +;CHECK: vmla.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -32,6 +34,8 @@ define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind } define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind { +;CHECK: vmlaf32: +;CHECK: vmla.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = load <2 x float>* %C @@ -41,6 +45,8 @@ define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n } define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind { +;CHECK: vmlaQi8: +;CHECK: vmla.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C @@ -50,6 +56,8 @@ define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind } define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: vmlaQi16: +;CHECK: vmla.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C @@ -59,6 +67,8 @@ define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind } define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: vmlaQi32: +;CHECK: vmla.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C @@ -68,6 +78,8 @@ define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind } define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { +;CHECK: vmlaQf32: +;CHECK: vmla.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = load <4 x float>* %C @@ -75,3 +87,107 @@ define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) %tmp5 = add <4 x float> %tmp1, %tmp4 ret <4 x float> %tmp5 } + +define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vmlals8: +;CHECK: vmlal.s8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlals16: +;CHECK: vmlal.s16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlals32: +;CHECK: vmlal.s32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} + +define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vmlalu8: +;CHECK: vmlal.u8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlalu16: +;CHECK: vmlal.u16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlalu32: +;CHECK: vmlal.u32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vmlal_lanes16 +; CHECK: vmlal.s16 q0, d2, d3[1] + %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vmlal_lanes32 +; CHECK: vmlal.s32 q0, d2, d3[1] + %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone { +entry: +; CHECK: test_vmlal_laneu16 +; CHECK: vmlal.u16 q0, d2, d3[1] + %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone { +entry: +; CHECK: test_vmlal_laneu32 +; CHECK: vmlal.u32 q0, d2, d3[1] + %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll index d519b7e70e1e3..c89552e6f9eaa 100644 --- a/test/CodeGen/ARM/vmls.ll +++ b/test/CodeGen/ARM/vmls.ll @@ -1,10 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vmls\\.i8} %t | count 2 -; RUN: grep {vmls\\.i16} %t | count 2 -; RUN: grep {vmls\\.i32} %t | count 2 -; RUN: grep {vmls\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind { +;CHECK: vmlsi8: +;CHECK: vmls.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = load <8 x i8>* %C @@ -14,6 +12,8 @@ define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind { } define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlsi16: +;CHECK: vmls.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = load <4 x i16>* %C @@ -23,6 +23,8 @@ define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind } define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlsi32: +;CHECK: vmls.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = load <2 x i32>* %C @@ -32,6 +34,8 @@ define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind } define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind { +;CHECK: vmlsf32: +;CHECK: vmls.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = load <2 x float>* %C @@ -41,6 +45,8 @@ define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n } define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind { +;CHECK: vmlsQi8: +;CHECK: vmls.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = load <16 x i8>* %C @@ -50,6 +56,8 @@ define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind } define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: vmlsQi16: +;CHECK: vmls.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = load <8 x i16>* %C @@ -59,6 +67,8 @@ define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind } define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: vmlsQi32: +;CHECK: vmls.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = load <4 x i32>* %C @@ -68,6 +78,8 @@ define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind } define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind { +;CHECK: vmlsQf32: +;CHECK: vmls.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = load <4 x float>* %C @@ -75,3 +87,107 @@ define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) %tmp5 = sub <4 x float> %tmp1, %tmp4 ret <4 x float> %tmp5 } + +define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vmlsls8: +;CHECK: vmlsl.s8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlsls16: +;CHECK: vmlsl.s16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlsls32: +;CHECK: vmlsl.s32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} + +define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vmlslu8: +;CHECK: vmlsl.u8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) + ret <8 x i16> %tmp4 +} + +define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vmlslu16: +;CHECK: vmlsl.u16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vmlslu32: +;CHECK: vmlsl.u32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vmlsl_lanes16 +; CHECK: vmlsl.s16 q0, d2, d3[1] + %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vmlsl_lanes32 +; CHECK: vmlsl.s32 q0, d2, d3[1] + %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone { +entry: +; CHECK: test_vmlsl_laneu16 +; CHECK: vmlsl.u16 q0, d2, d3[1] + %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone { +entry: +; CHECK: test_vmlsl_laneu32 +; CHECK: vmlsl.u32 q0, d2, d3[1] + %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index af9c8e25989c9..ed69f970c611c 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -1,101 +1,303 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep vmov.i8 %t | count 2 -; RUN: grep vmov.i16 %t | count 4 -; RUN: grep vmov.i32 %t | count 12 -; RUN: grep vmov.i64 %t | count 2 -; Note: function names do not include "vmov" to allow simple grep for opcodes +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @v_movi8() nounwind { +;CHECK: v_movi8: +;CHECK: vmov.i8 ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <4 x i16> @v_movi16a() nounwind { +;CHECK: v_movi16a: +;CHECK: vmov.i16 ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 > } ; 0x1000 = 4096 define <4 x i16> @v_movi16b() nounwind { +;CHECK: v_movi16b: +;CHECK: vmov.i16 ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 > } define <2 x i32> @v_movi32a() nounwind { +;CHECK: v_movi32a: +;CHECK: vmov.i32 ret <2 x i32> < i32 32, i32 32 > } ; 0x2000 = 8192 define <2 x i32> @v_movi32b() nounwind { +;CHECK: v_movi32b: +;CHECK: vmov.i32 ret <2 x i32> < i32 8192, i32 8192 > } ; 0x200000 = 2097152 define <2 x i32> @v_movi32c() nounwind { +;CHECK: v_movi32c: +;CHECK: vmov.i32 ret <2 x i32> < i32 2097152, i32 2097152 > } ; 0x20000000 = 536870912 define <2 x i32> @v_movi32d() nounwind { +;CHECK: v_movi32d: +;CHECK: vmov.i32 ret <2 x i32> < i32 536870912, i32 536870912 > } ; 0x20ff = 8447 define <2 x i32> @v_movi32e() nounwind { +;CHECK: v_movi32e: +;CHECK: vmov.i32 ret <2 x i32> < i32 8447, i32 8447 > } ; 0x20ffff = 2162687 define <2 x i32> @v_movi32f() nounwind { +;CHECK: v_movi32f: +;CHECK: vmov.i32 ret <2 x i32> < i32 2162687, i32 2162687 > } ; 0xff0000ff0000ffff = 18374687574888349695 define <1 x i64> @v_movi64() nounwind { +;CHECK: v_movi64: +;CHECK: vmov.i64 ret <1 x i64> < i64 18374687574888349695 > } define <16 x i8> @v_movQi8() nounwind { +;CHECK: v_movQi8: +;CHECK: vmov.i8 ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <8 x i16> @v_movQi16a() nounwind { +;CHECK: v_movQi16a: +;CHECK: vmov.i16 ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > } ; 0x1000 = 4096 define <8 x i16> @v_movQi16b() nounwind { +;CHECK: v_movQi16b: +;CHECK: vmov.i16 ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 > } define <4 x i32> @v_movQi32a() nounwind { +;CHECK: v_movQi32a: +;CHECK: vmov.i32 ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 > } ; 0x2000 = 8192 define <4 x i32> @v_movQi32b() nounwind { +;CHECK: v_movQi32b: +;CHECK: vmov.i32 ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 > } ; 0x200000 = 2097152 define <4 x i32> @v_movQi32c() nounwind { +;CHECK: v_movQi32c: +;CHECK: vmov.i32 ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 > } ; 0x20000000 = 536870912 define <4 x i32> @v_movQi32d() nounwind { +;CHECK: v_movQi32d: +;CHECK: vmov.i32 ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 > } ; 0x20ff = 8447 define <4 x i32> @v_movQi32e() nounwind { +;CHECK: v_movQi32e: +;CHECK: vmov.i32 ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 > } ; 0x20ffff = 2162687 define <4 x i32> @v_movQi32f() nounwind { +;CHECK: v_movQi32f: +;CHECK: vmov.i32 ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 > } ; 0xff0000ff0000ffff = 18374687574888349695 define <2 x i64> @v_movQi64() nounwind { +;CHECK: v_movQi64: +;CHECK: vmov.i64 ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } +define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind { +;CHECK: vmovls8: +;CHECK: vmovl.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind { +;CHECK: vmovls16: +;CHECK: vmovl.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1) + ret <4 x i32> %tmp2 +} + +define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind { +;CHECK: vmovls32: +;CHECK: vmovl.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1) + ret <2 x i64> %tmp2 +} + +define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind { +;CHECK: vmovlu8: +;CHECK: vmovl.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind { +;CHECK: vmovlu16: +;CHECK: vmovl.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1) + ret <4 x i32> %tmp2 +} + +define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind { +;CHECK: vmovlu32: +;CHECK: vmovl.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1) + ret <2 x i64> %tmp2 +} + +declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone + +define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind { +;CHECK: vmovni16: +;CHECK: vmovn.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind { +;CHECK: vmovni32: +;CHECK: vmovn.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind { +;CHECK: vmovni64: +;CHECK: vmovn.i64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1) + ret <2 x i32> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone + +define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind { +;CHECK: vqmovns16: +;CHECK: vqmovn.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind { +;CHECK: vqmovns32: +;CHECK: vqmovn.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind { +;CHECK: vqmovns64: +;CHECK: vqmovn.s64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1) + ret <2 x i32> %tmp2 +} + +define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind { +;CHECK: vqmovnu16: +;CHECK: vqmovn.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind { +;CHECK: vqmovnu32: +;CHECK: vqmovn.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind { +;CHECK: vqmovnu64: +;CHECK: vqmovn.u64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1) + ret <2 x i32> %tmp2 +} + +define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind { +;CHECK: vqmovuns16: +;CHECK: vqmovun.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind { +;CHECK: vqmovuns32: +;CHECK: vqmovun.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind { +;CHECK: vqmovuns64: +;CHECK: vqmovun.s64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1) + ret <2 x i32> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index eb9ae7b95c2d8..325da5deabe51 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -1,11 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vmul\\.i8} %t | count 2 -; RUN: grep {vmul\\.i16} %t | count 2 -; RUN: grep {vmul\\.i32} %t | count 2 -; RUN: grep {vmul\\.f32} %t | count 2 -; RUN: grep {vmul\\.p8} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmuli8: +;CHECK: vmul.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = mul <8 x i8> %tmp1, %tmp2 @@ -13,6 +10,8 @@ define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmuli16: +;CHECK: vmul.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = mul <4 x i16> %tmp1, %tmp2 @@ -20,6 +19,8 @@ define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmuli32: +;CHECK: vmul.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = mul <2 x i32> %tmp1, %tmp2 @@ -27,6 +28,8 @@ define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vmulf32: +;CHECK: vmul.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = mul <2 x float> %tmp1, %tmp2 @@ -34,6 +37,8 @@ define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind { } define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmulp8: +;CHECK: vmul.p8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -41,6 +46,8 @@ define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vmulQi8: +;CHECK: vmul.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = mul <16 x i8> %tmp1, %tmp2 @@ -48,6 +55,8 @@ define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vmulQi16: +;CHECK: vmul.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = mul <8 x i16> %tmp1, %tmp2 @@ -55,6 +64,8 @@ define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vmulQi32: +;CHECK: vmul.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = mul <4 x i32> %tmp1, %tmp2 @@ -62,6 +73,8 @@ define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vmulQf32: +;CHECK: vmul.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = mul <4 x float> %tmp1, %tmp2 @@ -69,6 +82,8 @@ define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind { } define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vmulQp8: +;CHECK: vmul.p8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -77,3 +92,166 @@ define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind { declare <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone + +define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone { +entry: +; CHECK: test_vmul_lanef32: +; CHECK: vmul.f32 d0, d0, d1[0] + %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1] + %1 = fmul <2 x float> %0, %arg0_float32x2_t ; <<2 x float>> [#uses=1] + ret <2 x float> %1 +} + +define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vmul_lanes16: +; CHECK: vmul.i16 d0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses$ + %1 = mul <4 x i16> %0, %arg0_int16x4_t ; <<4 x i16>> [#uses=1] + ret <4 x i16> %1 +} + +define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vmul_lanes32: +; CHECK: vmul.i32 d0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = mul <2 x i32> %0, %arg0_int32x2_t ; <<2 x i32>> [#uses=1] + ret <2 x i32> %1 +} + +define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone { +entry: +; CHECK: test_vmulQ_lanef32: +; CHECK: vmul.f32 q0, q0, d2[1] + %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> ; <<4 x float>$ + %1 = fmul <4 x float> %0, %arg0_float32x4_t ; <<4 x float>> [#uses=1] + ret <4 x float> %1 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vmulQ_lanes16: +; CHECK: vmul.i16 q0, q0, d2[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> + %1 = mul <8 x i16> %0, %arg0_int16x8_t ; <<8 x i16>> [#uses=1] + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vmulQ_lanes32: +; CHECK: vmul.i32 q0, q0, d2[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> ; <<4 x i32>> [#uses$ + %1 = mul <4 x i32> %0, %arg0_int32x4_t ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmulls8: +;CHECK: vmull.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmulls16: +;CHECK: vmull.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmulls32: +;CHECK: vmull.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmullu8: +;CHECK: vmull.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmullu16: +;CHECK: vmull.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmullu32: +;CHECK: vmull.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmullp8: +;CHECK: vmull.p8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vmull_lanes16 +; CHECK: vmull.s16 q0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vmull_lanes32 +; CHECK: vmull.s32 q0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone { +entry: +; CHECK: test_vmull_laneu16 +; CHECK: vmull.u16 q0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone { +entry: +; CHECK: test_vmull_laneu32 +; CHECK: vmull.u32 q0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll index 9fa527f52fcc6..7764e87c6ac6b 100644 --- a/test/CodeGen/ARM/vneg.ll +++ b/test/CodeGen/ARM/vneg.ll @@ -1,53 +1,121 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vneg\\.s8} %t | count 2 -; RUN: grep {vneg\\.s16} %t | count 2 -; RUN: grep {vneg\\.s32} %t | count 2 -; RUN: grep {vneg\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind { +;CHECK: vnegs8: +;CHECK: vneg.s8 %tmp1 = load <8 x i8>* %A %tmp2 = sub <8 x i8> zeroinitializer, %tmp1 ret <8 x i8> %tmp2 } define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind { +;CHECK: vnegs16: +;CHECK: vneg.s16 %tmp1 = load <4 x i16>* %A %tmp2 = sub <4 x i16> zeroinitializer, %tmp1 ret <4 x i16> %tmp2 } define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind { +;CHECK: vnegs32: +;CHECK: vneg.s32 %tmp1 = load <2 x i32>* %A %tmp2 = sub <2 x i32> zeroinitializer, %tmp1 ret <2 x i32> %tmp2 } define <2 x float> @vnegf32(<2 x float>* %A) nounwind { +;CHECK: vnegf32: +;CHECK: vneg.f32 %tmp1 = load <2 x float>* %A %tmp2 = sub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1 ret <2 x float> %tmp2 } define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind { +;CHECK: vnegQs8: +;CHECK: vneg.s8 %tmp1 = load <16 x i8>* %A %tmp2 = sub <16 x i8> zeroinitializer, %tmp1 ret <16 x i8> %tmp2 } define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind { +;CHECK: vnegQs16: +;CHECK: vneg.s16 %tmp1 = load <8 x i16>* %A %tmp2 = sub <8 x i16> zeroinitializer, %tmp1 ret <8 x i16> %tmp2 } define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind { +;CHECK: vnegQs32: +;CHECK: vneg.s32 %tmp1 = load <4 x i32>* %A %tmp2 = sub <4 x i32> zeroinitializer, %tmp1 ret <4 x i32> %tmp2 } define <4 x float> @vnegQf32(<4 x float>* %A) nounwind { +;CHECK: vnegQf32: +;CHECK: vneg.f32 %tmp1 = load <4 x float>* %A %tmp2 = sub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1 ret <4 x float> %tmp2 } + +define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind { +;CHECK: vqnegs8: +;CHECK: vqneg.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind { +;CHECK: vqnegs16: +;CHECK: vqneg.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind { +;CHECK: vqnegs32: +;CHECK: vqneg.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1) + ret <2 x i32> %tmp2 +} + +define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind { +;CHECK: vqnegQs8: +;CHECK: vqneg.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1) + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind { +;CHECK: vqnegQs16: +;CHECK: vqneg.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind { +;CHECK: vqnegQs32: +;CHECK: vqneg.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1) + ret <4 x i32> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll index c41c532988e8f..7296e936cd73e 100644 --- a/test/CodeGen/ARM/vpadal.ll +++ b/test/CodeGen/ARM/vpadal.ll @@ -1,12 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vpadal\\.s8} %t | count 2 -; RUN: grep {vpadal\\.s16} %t | count 2 -; RUN: grep {vpadal\\.s32} %t | count 2 -; RUN: grep {vpadal\\.u8} %t | count 2 -; RUN: grep {vpadal\\.u16} %t | count 2 -; RUN: grep {vpadal\\.u32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpadals8: +;CHECK: vpadal.s8 %tmp1 = load <4 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2) @@ -14,6 +10,8 @@ define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind { } define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpadals16: +;CHECK: vpadal.s16 %tmp1 = load <2 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2) @@ -21,6 +19,8 @@ define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind { } define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpadals32: +;CHECK: vpadal.s32 %tmp1 = load <1 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2) @@ -28,6 +28,8 @@ define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind { } define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpadalu8: +;CHECK: vpadal.u8 %tmp1 = load <4 x i16>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2) @@ -35,6 +37,8 @@ define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind { } define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpadalu16: +;CHECK: vpadal.u16 %tmp1 = load <2 x i32>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2) @@ -42,6 +46,8 @@ define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind { } define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpadalu32: +;CHECK: vpadal.u32 %tmp1 = load <1 x i64>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2) @@ -49,6 +55,8 @@ define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind { } define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind { +;CHECK: vpadalQs8: +;CHECK: vpadal.s8 %tmp1 = load <8 x i16>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2) @@ -56,6 +64,8 @@ define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind { } define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind { +;CHECK: vpadalQs16: +;CHECK: vpadal.s16 %tmp1 = load <4 x i32>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2) @@ -63,6 +73,8 @@ define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind { } define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind { +;CHECK: vpadalQs32: +;CHECK: vpadal.s32 %tmp1 = load <2 x i64>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2) @@ -70,6 +82,8 @@ define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind { } define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind { +;CHECK: vpadalQu8: +;CHECK: vpadal.u8 %tmp1 = load <8 x i16>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2) @@ -77,6 +91,8 @@ define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind { } define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind { +;CHECK: vpadalQu16: +;CHECK: vpadal.u16 %tmp1 = load <4 x i32>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2) @@ -84,6 +100,8 @@ define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind { } define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind { +;CHECK: vpadalQu32: +;CHECK: vpadal.u32 %tmp1 = load <2 x i64>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2) diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll index baff49227e64e..212557394518a 100644 --- a/test/CodeGen/ARM/vpadd.ll +++ b/test/CodeGen/ARM/vpadd.ll @@ -1,39 +1,155 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vpadd\\.i8} %t | count 1 -; RUN: grep {vpadd\\.i16} %t | count 1 -; RUN: grep {vpadd\\.i32} %t | count 1 -; RUN: grep {vpadd\\.f32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpaddi8: +;CHECK: vpadd.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B - %tmp3 = call <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + %tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) ret <8 x i8> %tmp3 } define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpaddi16: +;CHECK: vpadd.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B - %tmp3 = call <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + %tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) ret <4 x i16> %tmp3 } define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpaddi32: +;CHECK: vpadd.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B - %tmp3 = call <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + %tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) ret <2 x i32> %tmp3 } define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vpaddf32: +;CHECK: vpadd.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B - %tmp3 = call <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + %tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) ret <2 x float> %tmp3 } -declare <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone -declare <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone -declare <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -declare <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone + +define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind { +;CHECK: vpaddls8: +;CHECK: vpaddl.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind { +;CHECK: vpaddls16: +;CHECK: vpaddl.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1) + ret <2 x i32> %tmp2 +} + +define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind { +;CHECK: vpaddls32: +;CHECK: vpaddl.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1) + ret <1 x i64> %tmp2 +} + +define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind { +;CHECK: vpaddlu8: +;CHECK: vpaddl.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind { +;CHECK: vpaddlu16: +;CHECK: vpaddl.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1) + ret <2 x i32> %tmp2 +} + +define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind { +;CHECK: vpaddlu32: +;CHECK: vpaddl.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1) + ret <1 x i64> %tmp2 +} + +define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind { +;CHECK: vpaddlQs8: +;CHECK: vpaddl.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind { +;CHECK: vpaddlQs16: +;CHECK: vpaddl.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1) + ret <4 x i32> %tmp2 +} + +define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind { +;CHECK: vpaddlQs32: +;CHECK: vpaddl.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1) + ret <2 x i64> %tmp2 +} + +define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind { +;CHECK: vpaddlQu8: +;CHECK: vpaddl.u8 + %tmp1 = load <16 x i8>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind { +;CHECK: vpaddlQu16: +;CHECK: vpaddl.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1) + ret <4 x i32> %tmp2 +} + +define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind { +;CHECK: vpaddlQu32: +;CHECK: vpaddl.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1) + ret <2 x i64> %tmp2 +} + +declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone + +declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll new file mode 100644 index 0000000000000..b75bcc99f6b67 --- /dev/null +++ b/test/CodeGen/ARM/vpminmax.ll @@ -0,0 +1,147 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpmins8: +;CHECK: vpmin.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpmins16: +;CHECK: vpmin.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpmins32: +;CHECK: vpmin.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpminu8: +;CHECK: vpmin.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpminu16: +;CHECK: vpmin.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpminu32: +;CHECK: vpmin.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vpminf32: +;CHECK: vpmin.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + ret <2 x float> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone + +define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpmaxs8: +;CHECK: vpmax.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpmaxs16: +;CHECK: vpmax.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpmaxs32: +;CHECK: vpmax.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vpmaxu8: +;CHECK: vpmax.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vpmaxu16: +;CHECK: vpmax.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vpmaxu32: +;CHECK: vpmax.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vpmaxf32: +;CHECK: vpmax.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + ret <2 x float> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll index c9e235995360a..a1669b60ab564 100644 --- a/test/CodeGen/ARM/vqadd.ll +++ b/test/CodeGen/ARM/vqadd.ll @@ -1,14 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vqadd\\.s8} %t | count 2 -; RUN: grep {vqadd\\.s16} %t | count 2 -; RUN: grep {vqadd\\.s32} %t | count 2 -; RUN: grep {vqadd\\.s64} %t | count 2 -; RUN: grep {vqadd\\.u8} %t | count 2 -; RUN: grep {vqadd\\.u16} %t | count 2 -; RUN: grep {vqadd\\.u32} %t | count 2 -; RUN: grep {vqadd\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqadds8: +;CHECK: vqadd.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -16,6 +10,8 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqadds16: +;CHECK: vqadd.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -23,6 +19,8 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqadds32: +;CHECK: vqadd.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -30,6 +28,8 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqadds64: +;CHECK: vqadd.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -37,6 +37,8 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqaddu8: +;CHECK: vqadd.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -44,6 +46,8 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqaddu16: +;CHECK: vqadd.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -51,6 +55,8 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqaddu32: +;CHECK: vqadd.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -58,6 +64,8 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqaddu64: +;CHECK: vqadd.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -65,6 +73,8 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqaddQs8: +;CHECK: vqadd.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -72,6 +82,8 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqaddQs16: +;CHECK: vqadd.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -79,6 +91,8 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqaddQs32: +;CHECK: vqadd.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -86,6 +100,8 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqaddQs64: +;CHECK: vqadd.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -93,6 +109,8 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqaddQu8: +;CHECK: vqadd.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -100,6 +118,8 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqaddQu16: +;CHECK: vqadd.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -107,6 +127,8 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqaddQu32: +;CHECK: vqadd.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -114,6 +136,8 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqaddQu64: +;CHECK: vqadd.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll new file mode 100644 index 0000000000000..8dcc7f73633c3 --- /dev/null +++ b/test/CodeGen/ARM/vqdmul.ll @@ -0,0 +1,281 @@ +; RUN: llc -mattr=+neon < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-elf" + +define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqdmulhs16: +;CHECK: vqdmulh.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqdmulhs32: +;CHECK: vqdmulh.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqdmulhQs16: +;CHECK: vqdmulh.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqdmulhQs32: +;CHECK: vqdmulh.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vqdmulhQ_lanes16 +; CHECK: vqdmulh.s16 q0, q0, d2[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> ; <<8 x i16>> [#uses=1] + %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vqdmulhQ_lanes32 +; CHECK: vqdmulh.s32 q0, q0, d2[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> ; <<4 x i32>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vqdmulh_lanes16 +; CHECK: vqdmulh.s16 d0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1] + ret <4 x i16> %1 +} + +define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vqdmulh_lanes32 +; CHECK: vqdmulh.s32 d0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1] + ret <2 x i32> %1 +} + +declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqrdmulhs16: +;CHECK: vqrdmulh.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqrdmulhs32: +;CHECK: vqrdmulh.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqrdmulhQs16: +;CHECK: vqrdmulh.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqrdmulhQs32: +;CHECK: vqrdmulh.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vqRdmulhQ_lanes16 +; CHECK: vqrdmulh.s16 q0, q0, d2[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> ; <<8 x i16>> [#uses=1] + %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %1 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vqRdmulhQ_lanes32 +; CHECK: vqrdmulh.s32 q0, q0, d2[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> ; <<4 x i32>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vqRdmulh_lanes16 +; CHECK: vqrdmulh.s16 d0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1] + ret <4 x i16> %1 +} + +define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vqRdmulh_lanes32 +; CHECK: vqrdmulh.s32 d0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1] + ret <2 x i32> %1 +} + +declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqdmulls16: +;CHECK: vqdmull.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqdmulls32: +;CHECK: vqdmull.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vqdmull_lanes16 +; CHECK: vqdmull.s16 q0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vqdmull_lanes32 +; CHECK: vqdmull.s32 q0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +declare <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vqdmlals16: +;CHECK: vqdmlal.s16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vqdmlals32: +;CHECK: vqdmlal.s32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vqdmlal_lanes16 +; CHECK: vqdmlal.s16 q0, d2, d3[1] + %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vqdmlal_lanes32 +; CHECK: vqdmlal.s32 q0, d2, d3[1] + %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +declare <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone + +define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: vqdmlsls16: +;CHECK: vqdmlsl.s16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3) + ret <4 x i32> %tmp4 +} + +define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: vqdmlsls32: +;CHECK: vqdmlsl.s32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3) + ret <2 x i64> %tmp4 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vqdmlsl_lanes16 +; CHECK: vqdmlsl.s16 q0, d2, d3[1] + %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + +define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vqdmlsl_lanes32 +; CHECK: vqdmlsl.s32 q0, d2, d3[1] + %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +declare <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll index 60b04bd5830ef..e4d29a337cf06 100644 --- a/test/CodeGen/ARM/vqshl.ll +++ b/test/CodeGen/ARM/vqshl.ll @@ -1,26 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vqshl\\.s8} %t | count 4 -; RUN: grep {vqshl\\.s16} %t | count 4 -; RUN: grep {vqshl\\.s32} %t | count 4 -; RUN: grep {vqshl\\.s64} %t | count 4 -; RUN: grep {vqshl\\.u8} %t | count 4 -; RUN: grep {vqshl\\.u16} %t | count 4 -; RUN: grep {vqshl\\.u32} %t | count 4 -; RUN: grep {vqshl\\.u64} %t | count 4 -; RUN: grep {vqshl\\.s8.*#7} %t | count 2 -; RUN: grep {vqshl\\.s16.*#15} %t | count 2 -; RUN: grep {vqshl\\.s32.*#31} %t | count 2 -; RUN: grep {vqshl\\.s64.*#63} %t | count 2 -; RUN: grep {vqshl\\.u8.*#7} %t | count 2 -; RUN: grep {vqshl\\.u16.*#15} %t | count 2 -; RUN: grep {vqshl\\.u32.*#31} %t | count 2 -; RUN: grep {vqshl\\.u64.*#63} %t | count 2 -; RUN: grep {vqshlu\\.s8} %t | count 2 -; RUN: grep {vqshlu\\.s16} %t | count 2 -; RUN: grep {vqshlu\\.s32} %t | count 2 -; RUN: grep {vqshlu\\.s64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqshls8: +;CHECK: vqshl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -28,6 +10,8 @@ define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqshls16: +;CHECK: vqshl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -35,6 +19,8 @@ define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqshls32: +;CHECK: vqshl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -42,6 +28,8 @@ define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqshls64: +;CHECK: vqshl.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -49,6 +37,8 @@ define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqshlu8: +;CHECK: vqshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -56,6 +46,8 @@ define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqshlu16: +;CHECK: vqshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -63,6 +55,8 @@ define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqshlu32: +;CHECK: vqshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -70,6 +64,8 @@ define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqshlu64: +;CHECK: vqshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -77,6 +73,8 @@ define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqshlQs8: +;CHECK: vqshl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -84,6 +82,8 @@ define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqshlQs16: +;CHECK: vqshl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -91,6 +91,8 @@ define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqshlQs32: +;CHECK: vqshl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -98,6 +100,8 @@ define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqshlQs64: +;CHECK: vqshl.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -105,6 +109,8 @@ define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqshlQu8: +;CHECK: vqshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -112,6 +118,8 @@ define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqshlQu16: +;CHECK: vqshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -119,6 +127,8 @@ define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqshlQu32: +;CHECK: vqshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -126,6 +136,8 @@ define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqshlQu64: +;CHECK: vqshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -133,144 +145,192 @@ define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind { +;CHECK: vqshls_n8: +;CHECK: vqshl.s8{{.*#7}} %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind { +;CHECK: vqshls_n16: +;CHECK: vqshl.s16{{.*#15}} %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind { +;CHECK: vqshls_n32: +;CHECK: vqshl.s32{{.*#31}} %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i32> %tmp2 } define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind { +;CHECK: vqshls_n64: +;CHECK: vqshl.s64{{.*#63}} %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) ret <1 x i64> %tmp2 } define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind { +;CHECK: vqshlu_n8: +;CHECK: vqshl.u8{{.*#7}} %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind { +;CHECK: vqshlu_n16: +;CHECK: vqshl.u16{{.*#15}} %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind { +;CHECK: vqshlu_n32: +;CHECK: vqshl.u32{{.*#31}} %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i32> %tmp2 } define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind { +;CHECK: vqshlu_n64: +;CHECK: vqshl.u64{{.*#63}} %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) ret <1 x i64> %tmp2 } define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind { +;CHECK: vqshlsu_n8: +;CHECK: vqshlu.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind { +;CHECK: vqshlsu_n16: +;CHECK: vqshlu.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind { +;CHECK: vqshlsu_n32: +;CHECK: vqshlu.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i32> %tmp2 } define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind { +;CHECK: vqshlsu_n64: +;CHECK: vqshlu.s64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) ret <1 x i64> %tmp2 } define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind { +;CHECK: vqshlQs_n8: +;CHECK: vqshl.s8{{.*#7}} %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <16 x i8> %tmp2 } define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind { +;CHECK: vqshlQs_n16: +;CHECK: vqshl.s16{{.*#15}} %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) ret <8 x i16> %tmp2 } define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind { +;CHECK: vqshlQs_n32: +;CHECK: vqshl.s32{{.*#31}} %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) ret <4 x i32> %tmp2 } define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind { +;CHECK: vqshlQs_n64: +;CHECK: vqshl.s64{{.*#63}} %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) ret <2 x i64> %tmp2 } define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind { +;CHECK: vqshlQu_n8: +;CHECK: vqshl.u8{{.*#7}} %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <16 x i8> %tmp2 } define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind { +;CHECK: vqshlQu_n16: +;CHECK: vqshl.u16{{.*#15}} %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) ret <8 x i16> %tmp2 } define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind { +;CHECK: vqshlQu_n32: +;CHECK: vqshl.u32{{.*#31}} %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) ret <4 x i32> %tmp2 } define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind { +;CHECK: vqshlQu_n64: +;CHECK: vqshl.u64{{.*#63}} %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) ret <2 x i64> %tmp2 } define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind { +;CHECK: vqshlQsu_n8: +;CHECK: vqshlu.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <16 x i8> %tmp2 } define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind { +;CHECK: vqshlQsu_n16: +;CHECK: vqshlu.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) ret <8 x i16> %tmp2 } define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind { +;CHECK: vqshlQsu_n32: +;CHECK: vqshlu.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) ret <4 x i32> %tmp2 } define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind { +;CHECK: vqshlQsu_n64: +;CHECK: vqshlu.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) ret <2 x i64> %tmp2 @@ -305,3 +365,167 @@ declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqrshls8: +;CHECK: vqrshl.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqrshls16: +;CHECK: vqrshl.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqrshls32: +;CHECK: vqrshl.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqrshls64: +;CHECK: vqrshl.s64 + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqrshlu8: +;CHECK: vqrshl.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqrshlu16: +;CHECK: vqrshl.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqrshlu32: +;CHECK: vqrshl.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqrshlu64: +;CHECK: vqrshl.u64 + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqrshlQs8: +;CHECK: vqrshl.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqrshlQs16: +;CHECK: vqrshl.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqrshlQs32: +;CHECK: vqrshl.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqrshlQs64: +;CHECK: vqrshl.s64 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqrshlQu8: +;CHECK: vqrshl.u8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqrshlQu16: +;CHECK: vqrshl.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqrshlQu32: +;CHECK: vqrshl.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqrshlQu64: +;CHECK: vqrshl.u64 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll index 6bd607abb4d07..5da79432bb42e 100644 --- a/test/CodeGen/ARM/vqshrn.ll +++ b/test/CodeGen/ARM/vqshrn.ll @@ -1,63 +1,72 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vqshrn\\.s16} %t | count 1 -; RUN: grep {vqshrn\\.s32} %t | count 1 -; RUN: grep {vqshrn\\.s64} %t | count 1 -; RUN: grep {vqshrn\\.u16} %t | count 1 -; RUN: grep {vqshrn\\.u32} %t | count 1 -; RUN: grep {vqshrn\\.u64} %t | count 1 -; RUN: grep {vqshrun\\.s16} %t | count 1 -; RUN: grep {vqshrun\\.s32} %t | count 1 -; RUN: grep {vqshrun\\.s64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind { +;CHECK: vqshrns8: +;CHECK: vqshrn.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind { +;CHECK: vqshrns16: +;CHECK: vqshrn.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind { +;CHECK: vqshrns32: +;CHECK: vqshrn.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 } define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind { +;CHECK: vqshrnu8: +;CHECK: vqshrn.u16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind { +;CHECK: vqshrnu16: +;CHECK: vqshrn.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind { +;CHECK: vqshrnu32: +;CHECK: vqshrn.u64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 } define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind { +;CHECK: vqshruns8: +;CHECK: vqshrun.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind { +;CHECK: vqshruns16: +;CHECK: vqshrun.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind { +;CHECK: vqshruns32: +;CHECK: vqshrun.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 @@ -74,3 +83,87 @@ declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind declare <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind { +;CHECK: vqrshrns8: +;CHECK: vqrshrn.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind { +;CHECK: vqrshrns16: +;CHECK: vqrshrn.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind { +;CHECK: vqrshrns32: +;CHECK: vqrshrn.s64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) + ret <2 x i32> %tmp2 +} + +define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind { +;CHECK: vqrshrnu8: +;CHECK: vqrshrn.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind { +;CHECK: vqrshrnu16: +;CHECK: vqrshrn.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind { +;CHECK: vqrshrnu32: +;CHECK: vqrshrn.u64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) + ret <2 x i32> %tmp2 +} + +define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind { +;CHECK: vqrshruns8: +;CHECK: vqrshrun.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind { +;CHECK: vqrshruns16: +;CHECK: vqrshrun.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind { +;CHECK: vqrshruns32: +;CHECK: vqrshrun.s64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) + ret <2 x i32> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll index 07052f78dbea2..4231fca37e370 100644 --- a/test/CodeGen/ARM/vqsub.ll +++ b/test/CodeGen/ARM/vqsub.ll @@ -1,14 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vqsub\\.s8} %t | count 2 -; RUN: grep {vqsub\\.s16} %t | count 2 -; RUN: grep {vqsub\\.s32} %t | count 2 -; RUN: grep {vqsub\\.s64} %t | count 2 -; RUN: grep {vqsub\\.u8} %t | count 2 -; RUN: grep {vqsub\\.u16} %t | count 2 -; RUN: grep {vqsub\\.u32} %t | count 2 -; RUN: grep {vqsub\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqsubs8: +;CHECK: vqsub.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -16,6 +10,8 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqsubs16: +;CHECK: vqsub.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -23,6 +19,8 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqsubs32: +;CHECK: vqsub.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -30,6 +28,8 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqsubs64: +;CHECK: vqsub.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -37,6 +37,8 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vqsubu8: +;CHECK: vqsub.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -44,6 +46,8 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vqsubu16: +;CHECK: vqsub.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -51,6 +55,8 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vqsubu32: +;CHECK: vqsub.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -58,6 +64,8 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vqsubu64: +;CHECK: vqsub.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -65,6 +73,8 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqsubQs8: +;CHECK: vqsub.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -72,6 +82,8 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqsubQs16: +;CHECK: vqsub.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -79,6 +91,8 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqsubQs32: +;CHECK: vqsub.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -86,6 +100,8 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqsubQs64: +;CHECK: vqsub.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -93,6 +109,8 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vqsubQu8: +;CHECK: vqsub.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -100,6 +118,8 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vqsubQu16: +;CHECK: vqsub.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -107,6 +127,8 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vqsubQu32: +;CHECK: vqsub.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -114,6 +136,8 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vqsubQu64: +;CHECK: vqsub.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll new file mode 100644 index 0000000000000..99989e9d61448 --- /dev/null +++ b/test/CodeGen/ARM/vrec.ll @@ -0,0 +1,119 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind { +;CHECK: vrecpei32: +;CHECK: vrecpe.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1) + ret <2 x i32> %tmp2 +} + +define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind { +;CHECK: vrecpeQi32: +;CHECK: vrecpe.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1) + ret <4 x i32> %tmp2 +} + +define <2 x float> @vrecpef32(<2 x float>* %A) nounwind { +;CHECK: vrecpef32: +;CHECK: vrecpe.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1) + ret <2 x float> %tmp2 +} + +define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind { +;CHECK: vrecpeQf32: +;CHECK: vrecpe.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1) + ret <4 x float> %tmp2 +} + +declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone + +declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone + +define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vrecpsf32: +;CHECK: vrecps.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + ret <2 x float> %tmp3 +} + +define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vrecpsQf32: +;CHECK: vrecps.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + ret <4 x float> %tmp3 +} + +declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone + +define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind { +;CHECK: vrsqrtei32: +;CHECK: vrsqrte.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1) + ret <2 x i32> %tmp2 +} + +define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind { +;CHECK: vrsqrteQi32: +;CHECK: vrsqrte.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1) + ret <4 x i32> %tmp2 +} + +define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind { +;CHECK: vrsqrtef32: +;CHECK: vrsqrte.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1) + ret <2 x float> %tmp2 +} + +define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind { +;CHECK: vrsqrteQf32: +;CHECK: vrsqrte.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1) + ret <4 x float> %tmp2 +} + +declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone + +declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone +declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone + +define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vrsqrtsf32: +;CHECK: vrsqrts.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) + ret <2 x float> %tmp3 +} + +define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vrsqrtsQf32: +;CHECK: vrsqrts.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) + ret <4 x float> %tmp3 +} + +declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll new file mode 100644 index 0000000000000..f0a04a4416452 --- /dev/null +++ b/test/CodeGen/ARM/vrev.ll @@ -0,0 +1,113 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define arm_apcscc <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind { +;CHECK: test_vrev64D8: +;CHECK: vrev64.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + ret <8 x i8> %tmp2 +} + +define arm_apcscc <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind { +;CHECK: test_vrev64D16: +;CHECK: vrev64.16 + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> + ret <4 x i16> %tmp2 +} + +define arm_apcscc <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind { +;CHECK: test_vrev64D32: +;CHECK: vrev64.32 + %tmp1 = load <2 x i32>* %A + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> + ret <2 x i32> %tmp2 +} + +define arm_apcscc <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind { +;CHECK: test_vrev64Df: +;CHECK: vrev64.32 + %tmp1 = load <2 x float>* %A + %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> + ret <2 x float> %tmp2 +} + +define arm_apcscc <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind { +;CHECK: test_vrev64Q8: +;CHECK: vrev64.8 + %tmp1 = load <16 x i8>* %A + %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> + ret <16 x i8> %tmp2 +} + +define arm_apcscc <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind { +;CHECK: test_vrev64Q16: +;CHECK: vrev64.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %tmp2 +} + +define arm_apcscc <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind { +;CHECK: test_vrev64Q32: +;CHECK: vrev64.32 + %tmp1 = load <4 x i32>* %A + %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %tmp2 +} + +define arm_apcscc <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind { +;CHECK: test_vrev64Qf: +;CHECK: vrev64.32 + %tmp1 = load <4 x float>* %A + %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> + ret <4 x float> %tmp2 +} + +define arm_apcscc <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind { +;CHECK: test_vrev32D8: +;CHECK: vrev32.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + ret <8 x i8> %tmp2 +} + +define arm_apcscc <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind { +;CHECK: test_vrev32D16: +;CHECK: vrev32.16 + %tmp1 = load <4 x i16>* %A + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> + ret <4 x i16> %tmp2 +} + +define arm_apcscc <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind { +;CHECK: test_vrev32Q8: +;CHECK: vrev32.8 + %tmp1 = load <16 x i8>* %A + %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> + ret <16 x i8> %tmp2 +} + +define arm_apcscc <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind { +;CHECK: test_vrev32Q16: +;CHECK: vrev32.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %tmp2 +} + +define arm_apcscc <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind { +;CHECK: test_vrev16D8: +;CHECK: vrev16.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + ret <8 x i8> %tmp2 +} + +define arm_apcscc <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind { +;CHECK: test_vrev16Q8: +;CHECK: vrev16.8 + %tmp1 = load <16 x i8>* %A + %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> + ret <16 x i8> %tmp2 +} diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll index 8c5c4aad18d83..f3cbec7457d06 100644 --- a/test/CodeGen/ARM/vshift.ll +++ b/test/CodeGen/ARM/vshift.ll @@ -1,30 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vshl\\.s8} %t | count 2 -; RUN: grep {vshl\\.s16} %t | count 2 -; RUN: grep {vshl\\.s32} %t | count 2 -; RUN: grep {vshl\\.s64} %t | count 2 -; RUN: grep {vshl\\.u8} %t | count 4 -; RUN: grep {vshl\\.u16} %t | count 4 -; RUN: grep {vshl\\.u32} %t | count 4 -; RUN: grep {vshl\\.u64} %t | count 4 -; RUN: grep {vshl\\.i8} %t | count 2 -; RUN: grep {vshl\\.i16} %t | count 2 -; RUN: grep {vshl\\.i32} %t | count 2 -; RUN: grep {vshl\\.i64} %t | count 2 -; RUN: grep {vshr\\.u8} %t | count 2 -; RUN: grep {vshr\\.u16} %t | count 2 -; RUN: grep {vshr\\.u32} %t | count 2 -; RUN: grep {vshr\\.u64} %t | count 2 -; RUN: grep {vshr\\.s8} %t | count 2 -; RUN: grep {vshr\\.s16} %t | count 2 -; RUN: grep {vshr\\.s32} %t | count 2 -; RUN: grep {vshr\\.s64} %t | count 2 -; RUN: grep {vneg\\.s8} %t | count 4 -; RUN: grep {vneg\\.s16} %t | count 4 -; RUN: grep {vneg\\.s32} %t | count 4 -; RUN: grep {vsub\\.i64} %t | count 4 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vshls8: +;CHECK: vshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = shl <8 x i8> %tmp1, %tmp2 @@ -32,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vshls16: +;CHECK: vshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = shl <4 x i16> %tmp1, %tmp2 @@ -39,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vshls32: +;CHECK: vshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = shl <2 x i32> %tmp1, %tmp2 @@ -46,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vshls64: +;CHECK: vshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = shl <1 x i64> %tmp1, %tmp2 @@ -53,30 +37,40 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vshli8(<8 x i8>* %A) nounwind { +;CHECK: vshli8: +;CHECK: vshl.i8 %tmp1 = load <8 x i8>* %A %tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > ret <8 x i8> %tmp2 } define <4 x i16> @vshli16(<4 x i16>* %A) nounwind { +;CHECK: vshli16: +;CHECK: vshl.i16 %tmp1 = load <4 x i16>* %A %tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 > ret <4 x i16> %tmp2 } define <2 x i32> @vshli32(<2 x i32>* %A) nounwind { +;CHECK: vshli32: +;CHECK: vshl.i32 %tmp1 = load <2 x i32>* %A %tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 > ret <2 x i32> %tmp2 } define <1 x i64> @vshli64(<1 x i64>* %A) nounwind { +;CHECK: vshli64: +;CHECK: vshl.i64 %tmp1 = load <1 x i64>* %A %tmp2 = shl <1 x i64> %tmp1, < i64 63 > ret <1 x i64> %tmp2 } define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vshlQs8: +;CHECK: vshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = shl <16 x i8> %tmp1, %tmp2 @@ -84,6 +78,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vshlQs16: +;CHECK: vshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = shl <8 x i16> %tmp1, %tmp2 @@ -91,6 +87,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vshlQs32: +;CHECK: vshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = shl <4 x i32> %tmp1, %tmp2 @@ -98,6 +96,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vshlQs64: +;CHECK: vshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = shl <2 x i64> %tmp1, %tmp2 @@ -105,30 +105,41 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind { +;CHECK: vshlQi8: +;CHECK: vshl.i8 %tmp1 = load <16 x i8>* %A %tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 > ret <16 x i8> %tmp2 } define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind { +;CHECK: vshlQi16: +;CHECK: vshl.i16 %tmp1 = load <8 x i16>* %A %tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 > ret <8 x i16> %tmp2 } define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind { +;CHECK: vshlQi32: +;CHECK: vshl.i32 %tmp1 = load <4 x i32>* %A %tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 > ret <4 x i32> %tmp2 } define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind { +;CHECK: vshlQi64: +;CHECK: vshl.i64 %tmp1 = load <2 x i64>* %A %tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 > ret <2 x i64> %tmp2 } define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vlshru8: +;CHECK: vneg.s8 +;CHECK: vshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = lshr <8 x i8> %tmp1, %tmp2 @@ -136,6 +147,9 @@ define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vlshru16: +;CHECK: vneg.s16 +;CHECK: vshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = lshr <4 x i16> %tmp1, %tmp2 @@ -143,6 +157,9 @@ define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vlshru32: +;CHECK: vneg.s32 +;CHECK: vshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = lshr <2 x i32> %tmp1, %tmp2 @@ -150,6 +167,9 @@ define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vlshru64: +;CHECK: vsub.i64 +;CHECK: vshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = lshr <1 x i64> %tmp1, %tmp2 @@ -157,30 +177,41 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind { +;CHECK: vlshri8: +;CHECK: vshr.u8 %tmp1 = load <8 x i8>* %A %tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > ret <8 x i8> %tmp2 } define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind { +;CHECK: vlshri16: +;CHECK: vshr.u16 %tmp1 = load <4 x i16>* %A %tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > ret <4 x i16> %tmp2 } define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind { +;CHECK: vlshri32: +;CHECK: vshr.u32 %tmp1 = load <2 x i32>* %A %tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 > ret <2 x i32> %tmp2 } define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind { +;CHECK: vlshri64: +;CHECK: vshr.u64 %tmp1 = load <1 x i64>* %A %tmp2 = lshr <1 x i64> %tmp1, < i64 64 > ret <1 x i64> %tmp2 } define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vlshrQu8: +;CHECK: vneg.s8 +;CHECK: vshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = lshr <16 x i8> %tmp1, %tmp2 @@ -188,6 +219,9 @@ define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vlshrQu16: +;CHECK: vneg.s16 +;CHECK: vshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = lshr <8 x i16> %tmp1, %tmp2 @@ -195,6 +229,9 @@ define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vlshrQu32: +;CHECK: vneg.s32 +;CHECK: vshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = lshr <4 x i32> %tmp1, %tmp2 @@ -202,6 +239,9 @@ define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vlshrQu64: +;CHECK: vsub.i64 +;CHECK: vshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = lshr <2 x i64> %tmp1, %tmp2 @@ -209,30 +249,48 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind { +;CHECK: vlshrQi8: +;CHECK: vshr.u8 %tmp1 = load <16 x i8>* %A %tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > ret <16 x i8> %tmp2 } define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind { +;CHECK: vlshrQi16: +;CHECK: vshr.u16 %tmp1 = load <8 x i16>* %A %tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > ret <8 x i16> %tmp2 } define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind { +;CHECK: vlshrQi32: +;CHECK: vshr.u32 %tmp1 = load <4 x i32>* %A %tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > ret <4 x i32> %tmp2 } define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind { +;CHECK: vlshrQi64: +;CHECK: vshr.u64 %tmp1 = load <2 x i64>* %A %tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 > ret <2 x i64> %tmp2 } +; Example that requires splitting and expanding a vector shift. +define <2 x i64> @update(<2 x i64> %val) nounwind readnone { +entry: + %shr = lshr <2 x i64> %val, < i64 2, i64 2 > ; <<2 x i64>> [#uses=1] + ret <2 x i64> %shr +} + define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vashrs8: +;CHECK: vneg.s8 +;CHECK: vshl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = ashr <8 x i8> %tmp1, %tmp2 @@ -240,6 +298,9 @@ define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vashrs16: +;CHECK: vneg.s16 +;CHECK: vshl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = ashr <4 x i16> %tmp1, %tmp2 @@ -247,6 +308,9 @@ define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vashrs32: +;CHECK: vneg.s32 +;CHECK: vshl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = ashr <2 x i32> %tmp1, %tmp2 @@ -254,6 +318,9 @@ define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vashrs64: +;CHECK: vsub.i64 +;CHECK: vshl.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = ashr <1 x i64> %tmp1, %tmp2 @@ -261,30 +328,41 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vashri8(<8 x i8>* %A) nounwind { +;CHECK: vashri8: +;CHECK: vshr.s8 %tmp1 = load <8 x i8>* %A %tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > ret <8 x i8> %tmp2 } define <4 x i16> @vashri16(<4 x i16>* %A) nounwind { +;CHECK: vashri16: +;CHECK: vshr.s16 %tmp1 = load <4 x i16>* %A %tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 > ret <4 x i16> %tmp2 } define <2 x i32> @vashri32(<2 x i32>* %A) nounwind { +;CHECK: vashri32: +;CHECK: vshr.s32 %tmp1 = load <2 x i32>* %A %tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 > ret <2 x i32> %tmp2 } define <1 x i64> @vashri64(<1 x i64>* %A) nounwind { +;CHECK: vashri64: +;CHECK: vshr.s64 %tmp1 = load <1 x i64>* %A %tmp2 = ashr <1 x i64> %tmp1, < i64 64 > ret <1 x i64> %tmp2 } define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vashrQs8: +;CHECK: vneg.s8 +;CHECK: vshl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = ashr <16 x i8> %tmp1, %tmp2 @@ -292,6 +370,9 @@ define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vashrQs16: +;CHECK: vneg.s16 +;CHECK: vshl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = ashr <8 x i16> %tmp1, %tmp2 @@ -299,6 +380,9 @@ define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vashrQs32: +;CHECK: vneg.s32 +;CHECK: vshl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = ashr <4 x i32> %tmp1, %tmp2 @@ -306,6 +390,9 @@ define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vashrQs64: +;CHECK: vsub.i64 +;CHECK: vshl.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = ashr <2 x i64> %tmp1, %tmp2 @@ -313,24 +400,32 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind { +;CHECK: vashrQi8: +;CHECK: vshr.s8 %tmp1 = load <16 x i8>* %A %tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > ret <16 x i8> %tmp2 } define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind { +;CHECK: vashrQi16: +;CHECK: vshr.s16 %tmp1 = load <8 x i16>* %A %tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > ret <8 x i16> %tmp2 } define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind { +;CHECK: vashrQi32: +;CHECK: vshr.s32 %tmp1 = load <4 x i32>* %A %tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 > ret <4 x i32> %tmp2 } define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind { +;CHECK: vashrQi64: +;CHECK: vshr.s64 %tmp1 = load <2 x i64>* %A %tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 > ret <2 x i64> %tmp2 diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll index cb7cbb89ecdb3..3a4f8574e3977 100644 --- a/test/CodeGen/ARM/vshiftins.ll +++ b/test/CodeGen/ARM/vshiftins.ll @@ -1,14 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vsli\\.8} %t | count 2 -; RUN: grep {vsli\\.16} %t | count 2 -; RUN: grep {vsli\\.32} %t | count 2 -; RUN: grep {vsli\\.64} %t | count 2 -; RUN: grep {vsri\\.8} %t | count 2 -; RUN: grep {vsri\\.16} %t | count 2 -; RUN: grep {vsri\\.32} %t | count 2 -; RUN: grep {vsri\\.64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsli8: +;CHECK: vsli.8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) @@ -16,6 +10,8 @@ define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsli16: +;CHECK: vsli.16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) @@ -23,6 +19,8 @@ define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsli32: +;CHECK: vsli.32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >) @@ -30,6 +28,8 @@ define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsli64: +;CHECK: vsli.64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >) @@ -37,6 +37,8 @@ define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsliQ8: +;CHECK: vsli.8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) @@ -44,6 +46,8 @@ define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsliQ16: +;CHECK: vsli.16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) @@ -51,6 +55,8 @@ define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsliQ32: +;CHECK: vsli.32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) @@ -58,6 +64,8 @@ define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsliQ64: +;CHECK: vsli.64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >) @@ -65,6 +73,8 @@ define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsri8: +;CHECK: vsri.8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -72,6 +82,8 @@ define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsri16: +;CHECK: vsri.16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -79,6 +91,8 @@ define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsri32: +;CHECK: vsri.32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >) @@ -86,6 +100,8 @@ define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsri64: +;CHECK: vsri.64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >) @@ -93,6 +109,8 @@ define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsriQ8: +;CHECK: vsri.8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -100,6 +118,8 @@ define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsriQ16: +;CHECK: vsri.16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -107,6 +127,8 @@ define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsriQ32: +;CHECK: vsri.32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) @@ -114,6 +136,8 @@ define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsriQ64: +;CHECK: vsri.64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >) diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll index 993126ea578c0..818e71b8ff89b 100644 --- a/test/CodeGen/ARM/vshl.ll +++ b/test/CodeGen/ARM/vshl.ll @@ -1,26 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vshl\\.s8} %t | count 2 -; RUN: grep {vshl\\.s16} %t | count 2 -; RUN: grep {vshl\\.s32} %t | count 2 -; RUN: grep {vshl\\.s64} %t | count 2 -; RUN: grep {vshl\\.u8} %t | count 2 -; RUN: grep {vshl\\.u16} %t | count 2 -; RUN: grep {vshl\\.u32} %t | count 2 -; RUN: grep {vshl\\.u64} %t | count 2 -; RUN: grep {vshl\\.i8} %t | count 2 -; RUN: grep {vshl\\.i16} %t | count 2 -; RUN: grep {vshl\\.i32} %t | count 2 -; RUN: grep {vshl\\.i64} %t | count 2 -; RUN: grep {vshr\\.s8} %t | count 2 -; RUN: grep {vshr\\.s16} %t | count 2 -; RUN: grep {vshr\\.s32} %t | count 2 -; RUN: grep {vshr\\.s64} %t | count 2 -; RUN: grep {vshr\\.u8} %t | count 2 -; RUN: grep {vshr\\.u16} %t | count 2 -; RUN: grep {vshr\\.u32} %t | count 2 -; RUN: grep {vshr\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vshls8: +;CHECK: vshl.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -28,6 +10,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vshls16: +;CHECK: vshl.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -35,6 +19,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vshls32: +;CHECK: vshl.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -42,6 +28,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vshls64: +;CHECK: vshl.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -49,6 +37,8 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vshlu8: +;CHECK: vshl.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -56,6 +46,8 @@ define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vshlu16: +;CHECK: vshl.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -63,6 +55,8 @@ define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vshlu32: +;CHECK: vshl.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -70,6 +64,8 @@ define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vshlu64: +;CHECK: vshl.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) @@ -77,6 +73,8 @@ define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vshlQs8: +;CHECK: vshl.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -84,6 +82,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vshlQs16: +;CHECK: vshl.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -91,6 +91,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vshlQs32: +;CHECK: vshl.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -98,6 +100,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vshlQs64: +;CHECK: vshl.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -105,6 +109,8 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vshlQu8: +;CHECK: vshl.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -112,6 +118,8 @@ define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vshlQu16: +;CHECK: vshl.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -119,6 +127,8 @@ define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vshlQu32: +;CHECK: vshl.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -126,6 +136,8 @@ define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vshlQu64: +;CHECK: vshl.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) @@ -136,48 +148,64 @@ define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { ; Test a mix of both signed and unsigned intrinsics. define <8 x i8> @vshli8(<8 x i8>* %A) nounwind { +;CHECK: vshli8: +;CHECK: vshl.i8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i8> %tmp2 } define <4 x i16> @vshli16(<4 x i16>* %A) nounwind { +;CHECK: vshli16: +;CHECK: vshl.i16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i16> %tmp2 } define <2 x i32> @vshli32(<2 x i32>* %A) nounwind { +;CHECK: vshli32: +;CHECK: vshl.i32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i32> %tmp2 } define <1 x i64> @vshli64(<1 x i64>* %A) nounwind { +;CHECK: vshli64: +;CHECK: vshl.i64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >) ret <1 x i64> %tmp2 } define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind { +;CHECK: vshlQi8: +;CHECK: vshl.i8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <16 x i8> %tmp2 } define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind { +;CHECK: vshlQi16: +;CHECK: vshl.i16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind { +;CHECK: vshlQi32: +;CHECK: vshl.i32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind { +;CHECK: vshlQi64: +;CHECK: vshl.i64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >) ret <2 x i64> %tmp2 @@ -186,96 +214,128 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind { ; Right shift by immediate: define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind { +;CHECK: vshrs8: +;CHECK: vshr.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind { +;CHECK: vshrs16: +;CHECK: vshr.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind { +;CHECK: vshrs32: +;CHECK: vshr.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >) ret <2 x i32> %tmp2 } define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind { +;CHECK: vshrs64: +;CHECK: vshr.s64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >) ret <1 x i64> %tmp2 } define <8 x i8> @vshru8(<8 x i8>* %A) nounwind { +;CHECK: vshru8: +;CHECK: vshr.u8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vshru16(<4 x i16>* %A) nounwind { +;CHECK: vshru16: +;CHECK: vshr.u16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vshru32(<2 x i32>* %A) nounwind { +;CHECK: vshru32: +;CHECK: vshr.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >) ret <2 x i32> %tmp2 } define <1 x i64> @vshru64(<1 x i64>* %A) nounwind { +;CHECK: vshru64: +;CHECK: vshr.u64 %tmp1 = load <1 x i64>* %A %tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >) ret <1 x i64> %tmp2 } define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind { +;CHECK: vshrQs8: +;CHECK: vshr.s8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <16 x i8> %tmp2 } define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind { +;CHECK: vshrQs16: +;CHECK: vshr.s16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind { +;CHECK: vshrQs32: +;CHECK: vshr.s32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind { +;CHECK: vshrQs64: +;CHECK: vshr.s64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >) ret <2 x i64> %tmp2 } define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind { +;CHECK: vshrQu8: +;CHECK: vshr.u8 %tmp1 = load <16 x i8>* %A %tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) ret <16 x i8> %tmp2 } define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind { +;CHECK: vshrQu16: +;CHECK: vshr.u16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind { +;CHECK: vshrQu32: +;CHECK: vshr.u32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind { +;CHECK: vshrQu64: +;CHECK: vshr.u64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >) ret <2 x i64> %tmp2 @@ -300,3 +360,295 @@ declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind re declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrshls8: +;CHECK: vrshl.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrshls16: +;CHECK: vrshl.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrshls32: +;CHECK: vrshl.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vrshls64: +;CHECK: vrshl.s64 + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrshlu8: +;CHECK: vrshl.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrshlu16: +;CHECK: vrshl.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrshlu32: +;CHECK: vrshl.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vrshlu64: +;CHECK: vrshl.u64 + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2) + ret <1 x i64> %tmp3 +} + +define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrshlQs8: +;CHECK: vrshl.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrshlQs16: +;CHECK: vrshl.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrshlQs32: +;CHECK: vrshl.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrshlQs64: +;CHECK: vrshl.s64 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrshlQu8: +;CHECK: vrshl.u8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) + ret <16 x i8> %tmp3 +} + +define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrshlQu16: +;CHECK: vrshl.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrshlQu32: +;CHECK: vrshl.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrshlQu64: +;CHECK: vrshl.u64 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i64> %tmp3 +} + +define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind { +;CHECK: vrshrs8: +;CHECK: vrshr.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind { +;CHECK: vrshrs16: +;CHECK: vrshr.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind { +;CHECK: vrshrs32: +;CHECK: vrshr.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >) + ret <2 x i32> %tmp2 +} + +define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind { +;CHECK: vrshrs64: +;CHECK: vrshr.s64 + %tmp1 = load <1 x i64>* %A + %tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >) + ret <1 x i64> %tmp2 +} + +define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind { +;CHECK: vrshru8: +;CHECK: vrshr.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind { +;CHECK: vrshru16: +;CHECK: vrshr.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind { +;CHECK: vrshru32: +;CHECK: vrshr.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >) + ret <2 x i32> %tmp2 +} + +define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind { +;CHECK: vrshru64: +;CHECK: vrshr.u64 + %tmp1 = load <1 x i64>* %A + %tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >) + ret <1 x i64> %tmp2 +} + +define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind { +;CHECK: vrshrQs8: +;CHECK: vrshr.s8 + %tmp1 = load <16 x i8>* %A + %tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind { +;CHECK: vrshrQs16: +;CHECK: vrshr.s16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind { +;CHECK: vrshrQs32: +;CHECK: vrshr.s32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) + ret <4 x i32> %tmp2 +} + +define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind { +;CHECK: vrshrQs64: +;CHECK: vrshr.s64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >) + ret <2 x i64> %tmp2 +} + +define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind { +;CHECK: vrshrQu8: +;CHECK: vrshr.u8 + %tmp1 = load <16 x i8>* %A + %tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) + ret <16 x i8> %tmp2 +} + +define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind { +;CHECK: vrshrQu16: +;CHECK: vrshr.u16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) + ret <8 x i16> %tmp2 +} + +define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind { +;CHECK: vrshrQu32: +;CHECK: vrshr.u32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) + ret <4 x i32> %tmp2 +} + +define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind { +;CHECK: vrshrQu64: +;CHECK: vrshr.u64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >) + ret <2 x i64> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll index f81c09a7b9d31..8e85b98f49b10 100644 --- a/test/CodeGen/ARM/vshll.ll +++ b/test/CodeGen/ARM/vshll.ll @@ -1,45 +1,48 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vshll\\.s8} %t | count 1 -; RUN: grep {vshll\\.s16} %t | count 1 -; RUN: grep {vshll\\.s32} %t | count 1 -; RUN: grep {vshll\\.u8} %t | count 1 -; RUN: grep {vshll\\.u16} %t | count 1 -; RUN: grep {vshll\\.u32} %t | count 1 -; RUN: grep {vshll\\.i8} %t | count 1 -; RUN: grep {vshll\\.i16} %t | count 1 -; RUN: grep {vshll\\.i32} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind { +;CHECK: vshlls8: +;CHECK: vshll.s8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind { +;CHECK: vshlls16: +;CHECK: vshll.s16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind { +;CHECK: vshlls32: +;CHECK: vshll.s32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i64> %tmp2 } define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind { +;CHECK: vshllu8: +;CHECK: vshll.u8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind { +;CHECK: vshllu16: +;CHECK: vshll.u16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind { +;CHECK: vshllu32: +;CHECK: vshll.u32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >) ret <2 x i64> %tmp2 @@ -48,18 +51,24 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind { ; The following tests use the maximum shift count, so the signedness is ; irrelevant. Test both signed and unsigned versions. define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind { +;CHECK: vshlli8: +;CHECK: vshll.i8 %tmp1 = load <8 x i8>* %A %tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >) ret <8 x i16> %tmp2 } define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind { +;CHECK: vshlli16: +;CHECK: vshll.i16 %tmp1 = load <4 x i16>* %A %tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >) ret <4 x i32> %tmp2 } define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind { +;CHECK: vshlli32: +;CHECK: vshll.i32 %tmp1 = load <2 x i32>* %A %tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >) ret <2 x i64> %tmp2 diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll index bc640cbbca61c..e2544f424a2c3 100644 --- a/test/CodeGen/ARM/vshrn.ll +++ b/test/CodeGen/ARM/vshrn.ll @@ -1,21 +1,24 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vshrn\\.i16} %t | count 1 -; RUN: grep {vshrn\\.i32} %t | count 1 -; RUN: grep {vshrn\\.i64} %t | count 1 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind { +;CHECK: vshrns8: +;CHECK: vshrn.i16 %tmp1 = load <8 x i16>* %A %tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) ret <8 x i8> %tmp2 } define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind { +;CHECK: vshrns16: +;CHECK: vshrn.i32 %tmp1 = load <4 x i32>* %A %tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) ret <4 x i16> %tmp2 } define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind { +;CHECK: vshrns32: +;CHECK: vshrn.i64 %tmp1 = load <2 x i64>* %A %tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) ret <2 x i32> %tmp2 @@ -24,3 +27,31 @@ define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind { declare <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind { +;CHECK: vrshrns8: +;CHECK: vrshrn.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >) + ret <8 x i8> %tmp2 +} + +define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind { +;CHECK: vrshrns16: +;CHECK: vrshrn.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >) + ret <4 x i16> %tmp2 +} + +define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind { +;CHECK: vrshrns32: +;CHECK: vrshrn.i64 + %tmp1 = load <2 x i64>* %A + %tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >) + ret <2 x i32> %tmp2 +} + +declare <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll index e2829dcddae61..acb672d00fa26 100644 --- a/test/CodeGen/ARM/vsra.ll +++ b/test/CodeGen/ARM/vsra.ll @@ -1,22 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vsra\\.s8} %t | count 2 -; RUN: grep {vsra\\.s16} %t | count 2 -; RUN: grep {vsra\\.s32} %t | count 2 -; RUN: grep {vsra\\.s64} %t | count 2 -; RUN: grep {vsra\\.u8} %t | count 2 -; RUN: grep {vsra\\.u16} %t | count 2 -; RUN: grep {vsra\\.u32} %t | count 2 -; RUN: grep {vsra\\.u64} %t | count 2 -; RUN: grep {vrsra\\.s8} %t | count 2 -; RUN: grep {vrsra\\.s16} %t | count 2 -; RUN: grep {vrsra\\.s32} %t | count 2 -; RUN: grep {vrsra\\.s64} %t | count 2 -; RUN: grep {vrsra\\.u8} %t | count 2 -; RUN: grep {vrsra\\.u16} %t | count 2 -; RUN: grep {vrsra\\.u32} %t | count 2 -; RUN: grep {vrsra\\.u64} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsras8: +;CHECK: vsra.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > @@ -25,6 +11,8 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsras16: +;CHECK: vsra.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 > @@ -33,6 +21,8 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsras32: +;CHECK: vsra.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 > @@ -41,6 +31,8 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsras64: +;CHECK: vsra.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = ashr <1 x i64> %tmp2, < i64 64 > @@ -49,6 +41,8 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsraQs8: +;CHECK: vsra.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > @@ -57,6 +51,8 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsraQs16: +;CHECK: vsra.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > @@ -65,6 +61,8 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsraQs32: +;CHECK: vsra.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 > @@ -73,6 +71,8 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsraQs64: +;CHECK: vsra.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 > @@ -81,6 +81,8 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsrau8: +;CHECK: vsra.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > @@ -89,6 +91,8 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsrau16: +;CHECK: vsra.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 > @@ -97,6 +101,8 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsrau32: +;CHECK: vsra.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 > @@ -105,6 +111,8 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsrau64: +;CHECK: vsra.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = lshr <1 x i64> %tmp2, < i64 64 > @@ -113,6 +121,8 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsraQu8: +;CHECK: vsra.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > @@ -121,6 +131,8 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsraQu16: +;CHECK: vsra.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > @@ -129,6 +141,8 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsraQu32: +;CHECK: vsra.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 > @@ -137,6 +151,8 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsraQu64: +;CHECK: vsra.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 > @@ -145,6 +161,8 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrsras8: +;CHECK: vrsra.s8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -153,6 +171,8 @@ define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrsras16: +;CHECK: vrsra.s16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -161,6 +181,8 @@ define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrsras32: +;CHECK: vrsra.s32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >) @@ -169,6 +191,8 @@ define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vrsras64: +;CHECK: vrsra.s64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >) @@ -177,6 +201,8 @@ define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vrsrau8: +;CHECK: vrsra.u8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -185,6 +211,8 @@ define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vrsrau16: +;CHECK: vrsra.u16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -193,6 +221,8 @@ define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vrsrau32: +;CHECK: vrsra.u32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >) @@ -201,6 +231,8 @@ define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vrsrau64: +;CHECK: vrsra.u64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >) @@ -209,6 +241,8 @@ define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrsraQs8: +;CHECK: vrsra.s8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -217,6 +251,8 @@ define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrsraQs16: +;CHECK: vrsra.s16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -225,6 +261,8 @@ define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrsraQs32: +;CHECK: vrsra.s32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) @@ -233,6 +271,8 @@ define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrsraQs64: +;CHECK: vrsra.s64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >) @@ -241,6 +281,8 @@ define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vrsraQu8: +;CHECK: vrsra.u8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >) @@ -249,6 +291,8 @@ define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrsraQu16: +;CHECK: vrsra.u16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >) @@ -257,6 +301,8 @@ define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrsraQu32: +;CHECK: vrsra.u32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >) @@ -265,6 +311,8 @@ define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrsraQu64: +;CHECK: vrsra.u64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >) diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll new file mode 100644 index 0000000000000..602b124ffad9a --- /dev/null +++ b/test/CodeGen/ARM/vst1.ll @@ -0,0 +1,93 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vst1i8: +;CHECK: vst1.8 + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1) + ret void +} + +define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vst1i16: +;CHECK: vst1.16 + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst1.v4i16(i16* %A, <4 x i16> %tmp1) + ret void +} + +define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vst1i32: +;CHECK: vst1.32 + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst1.v2i32(i32* %A, <2 x i32> %tmp1) + ret void +} + +define void @vst1f(float* %A, <2 x float>* %B) nounwind { +;CHECK: vst1f: +;CHECK: vst1.32 + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst1.v2f32(float* %A, <2 x float> %tmp1) + ret void +} + +define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind { +;CHECK: vst1i64: +;CHECK: vst1.64 + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst1.v1i64(i64* %A, <1 x i64> %tmp1) + ret void +} + +define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { +;CHECK: vst1Qi8: +;CHECK: vst1.8 + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1) + ret void +} + +define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst1Qi16: +;CHECK: vst1.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst1.v8i16(i16* %A, <8 x i16> %tmp1) + ret void +} + +define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst1Qi32: +;CHECK: vst1.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst1.v4i32(i32* %A, <4 x i32> %tmp1) + ret void +} + +define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst1Qf: +;CHECK: vst1.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst1.v4f32(float* %A, <4 x float> %tmp1) + ret void +} + +define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind { +;CHECK: vst1Qi64: +;CHECK: vst1.64 + %tmp1 = load <2 x i64>* %B + call void @llvm.arm.neon.vst1.v2i64(i64* %A, <2 x i64> %tmp1) + ret void +} + +declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind +declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind +declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind +declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind + +declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind +declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll new file mode 100644 index 0000000000000..17d6bee0f56c4 --- /dev/null +++ b/test/CodeGen/ARM/vst2.ll @@ -0,0 +1,84 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vst2i8: +;CHECK: vst2.8 + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1) + ret void +} + +define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vst2i16: +;CHECK: vst2.16 + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst2.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1) + ret void +} + +define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vst2i32: +;CHECK: vst2.32 + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst2.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1) + ret void +} + +define void @vst2f(float* %A, <2 x float>* %B) nounwind { +;CHECK: vst2f: +;CHECK: vst2.32 + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst2.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1) + ret void +} + +define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind { +;CHECK: vst2i64: +;CHECK: vst1.64 + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst2.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1) + ret void +} + +define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { +;CHECK: vst2Qi8: +;CHECK: vst2.8 + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1) + ret void +} + +define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst2Qi16: +;CHECK: vst2.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst2.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1) + ret void +} + +define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst2Qi32: +;CHECK: vst2.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst2.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1) + ret void +} + +define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst2Qf: +;CHECK: vst2.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst2.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1) + ret void +} + +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind +declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind +declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind +declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind + +declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind +declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll new file mode 100644 index 0000000000000..a831a0c08ce9a --- /dev/null +++ b/test/CodeGen/ARM/vst3.ll @@ -0,0 +1,88 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vst3i8: +;CHECK: vst3.8 + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1) + ret void +} + +define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vst3i16: +;CHECK: vst3.16 + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst3.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1) + ret void +} + +define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vst3i32: +;CHECK: vst3.32 + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst3.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1) + ret void +} + +define void @vst3f(float* %A, <2 x float>* %B) nounwind { +;CHECK: vst3f: +;CHECK: vst3.32 + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst3.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1) + ret void +} + +define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind { +;CHECK: vst3i64: +;CHECK: vst1.64 + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst3.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + ret void +} + +define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind { +;CHECK: vst3Qi8: +;CHECK: vst3.8 +;CHECK: vst3.8 + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + ret void +} + +define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst3Qi16: +;CHECK: vst3.16 +;CHECK: vst3.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst3.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + ret void +} + +define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst3Qi32: +;CHECK: vst3.32 +;CHECK: vst3.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst3.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + ret void +} + +define void @vst3Qf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst3Qf: +;CHECK: vst3.32 +;CHECK: vst3.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst3.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + ret void +} + +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind +declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind +declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind +declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind + +declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind +declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll new file mode 100644 index 0000000000000..d92c017c30b29 --- /dev/null +++ b/test/CodeGen/ARM/vst4.ll @@ -0,0 +1,88 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vst4i8: +;CHECK: vst4.8 + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1) + ret void +} + +define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vst4i16: +;CHECK: vst4.16 + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst4.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1) + ret void +} + +define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vst4i32: +;CHECK: vst4.32 + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst4.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1) + ret void +} + +define void @vst4f(float* %A, <2 x float>* %B) nounwind { +;CHECK: vst4f: +;CHECK: vst4.32 + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst4.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1) + ret void +} + +define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { +;CHECK: vst4i64: +;CHECK: vst1.64 + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst4.v1i64(i64* %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1) + ret void +} + +define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { +;CHECK: vst4Qi8: +;CHECK: vst4.8 +;CHECK: vst4.8 + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1) + ret void +} + +define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst4Qi16: +;CHECK: vst4.16 +;CHECK: vst4.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst4.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1) + ret void +} + +define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst4Qi32: +;CHECK: vst4.32 +;CHECK: vst4.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst4.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1) + ret void +} + +define void @vst4Qf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst4Qf: +;CHECK: vst4.32 +;CHECK: vst4.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst4.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1) + ret void +} + +declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind +declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind +declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind +declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind +declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind + +declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind +declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind +declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll new file mode 100644 index 0000000000000..3bfb14f17b775 --- /dev/null +++ b/test/CodeGen/ARM/vstlane.ll @@ -0,0 +1,197 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vst2lanei8: +;CHECK: vst2.8 + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + ret void +} + +define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vst2lanei16: +;CHECK: vst2.16 + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst2lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + ret void +} + +define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vst2lanei32: +;CHECK: vst2.32 + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst2lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + ret void +} + +define void @vst2lanef(float* %A, <2 x float>* %B) nounwind { +;CHECK: vst2lanef: +;CHECK: vst2.32 + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst2lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + ret void +} + +define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst2laneQi16: +;CHECK: vst2.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst2lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) + ret void +} + +define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst2laneQi32: +;CHECK: vst2.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst2lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + ret void +} + +define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst2laneQf: +;CHECK: vst2.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst2lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 3) + ret void +} + +declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind + +declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind + +define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vst3lanei8: +;CHECK: vst3.8 + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + ret void +} + +define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vst3lanei16: +;CHECK: vst3.16 + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst3lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + ret void +} + +define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vst3lanei32: +;CHECK: vst3.32 + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst3lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + ret void +} + +define void @vst3lanef(float* %A, <2 x float>* %B) nounwind { +;CHECK: vst3lanef: +;CHECK: vst3.32 + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst3lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + ret void +} + +define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst3laneQi16: +;CHECK: vst3.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst3lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6) + ret void +} + +define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst3laneQi32: +;CHECK: vst3.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst3lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0) + ret void +} + +define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst3laneQf: +;CHECK: vst3.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst3lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + ret void +} + +declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind + +declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind + + +define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vst4lanei8: +;CHECK: vst4.8 + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) + ret void +} + +define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vst4lanei16: +;CHECK: vst4.16 + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst4lane.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) + ret void +} + +define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vst4lanei32: +;CHECK: vst4.32 + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst4lane.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) + ret void +} + +define void @vst4lanef(float* %A, <2 x float>* %B) nounwind { +;CHECK: vst4lanef: +;CHECK: vst4.32 + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst4lane.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) + ret void +} + +define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst4laneQi16: +;CHECK: vst4.16 + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst4lane.v8i16(i16* %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7) + ret void +} + +define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst4laneQi32: +;CHECK: vst4.32 + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst4lane.v4i32(i32* %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2) + ret void +} + +define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind { +;CHECK: vst4laneQf: +;CHECK: vst4.32 + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst4lane.v4f32(float* %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) + ret void +} + +declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind + +declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll index 85dea41835f85..8f0055fd41037 100644 --- a/test/CodeGen/ARM/vsub.ll +++ b/test/CodeGen/ARM/vsub.ll @@ -1,11 +1,8 @@ -; RUN: llvm-as < %s | llc -march=arm -mattr=+neon > %t -; RUN: grep {vsub\\.i8} %t | count 2 -; RUN: grep {vsub\\.i16} %t | count 2 -; RUN: grep {vsub\\.i32} %t | count 2 -; RUN: grep {vsub\\.i64} %t | count 2 -; RUN: grep {vsub\\.f32} %t | count 2 +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsubi8: +;CHECK: vsub.i8 %tmp1 = load <8 x i8>* %A %tmp2 = load <8 x i8>* %B %tmp3 = sub <8 x i8> %tmp1, %tmp2 @@ -13,6 +10,8 @@ define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { } define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsubi16: +;CHECK: vsub.i16 %tmp1 = load <4 x i16>* %A %tmp2 = load <4 x i16>* %B %tmp3 = sub <4 x i16> %tmp1, %tmp2 @@ -20,6 +19,8 @@ define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { } define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsubi32: +;CHECK: vsub.i32 %tmp1 = load <2 x i32>* %A %tmp2 = load <2 x i32>* %B %tmp3 = sub <2 x i32> %tmp1, %tmp2 @@ -27,6 +28,8 @@ define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind { } define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { +;CHECK: vsubi64: +;CHECK: vsub.i64 %tmp1 = load <1 x i64>* %A %tmp2 = load <1 x i64>* %B %tmp3 = sub <1 x i64> %tmp1, %tmp2 @@ -34,6 +37,8 @@ define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind { } define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vsubf32: +;CHECK: vsub.f32 %tmp1 = load <2 x float>* %A %tmp2 = load <2 x float>* %B %tmp3 = sub <2 x float> %tmp1, %tmp2 @@ -41,6 +46,8 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind { } define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vsubQi8: +;CHECK: vsub.i8 %tmp1 = load <16 x i8>* %A %tmp2 = load <16 x i8>* %B %tmp3 = sub <16 x i8> %tmp1, %tmp2 @@ -48,6 +55,8 @@ define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { } define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsubQi16: +;CHECK: vsub.i16 %tmp1 = load <8 x i16>* %A %tmp2 = load <8 x i16>* %B %tmp3 = sub <8 x i16> %tmp1, %tmp2 @@ -55,6 +64,8 @@ define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { } define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsubQi32: +;CHECK: vsub.i32 %tmp1 = load <4 x i32>* %A %tmp2 = load <4 x i32>* %B %tmp3 = sub <4 x i32> %tmp1, %tmp2 @@ -62,6 +73,8 @@ define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { } define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsubQi64: +;CHECK: vsub.i64 %tmp1 = load <2 x i64>* %A %tmp2 = load <2 x i64>* %B %tmp3 = sub <2 x i64> %tmp1, %tmp2 @@ -69,8 +82,196 @@ define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind { } define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vsubQf32: +;CHECK: vsub.f32 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = sub <4 x float> %tmp1, %tmp2 ret <4 x float> %tmp3 } + +define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vsubhni16: +;CHECK: vsubhn.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vsubhni32: +;CHECK: vsubhn.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vsubhni64: +;CHECK: vsubhn.i64 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i32> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vrsubhni16: +;CHECK: vrsubhn.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i8> %tmp3 +} + +define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vrsubhni32: +;CHECK: vrsubhn.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i16> %tmp3 +} + +define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK: vrsubhni64: +;CHECK: vrsubhn.i64 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2) + ret <2 x i32> %tmp3 +} + +declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone +declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone + +define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsubls8: +;CHECK: vsubl.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsubls16: +;CHECK: vsubl.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsubls32: +;CHECK: vsubl.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsublu8: +;CHECK: vsubl.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsublu16: +;CHECK: vsubl.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsublu32: +;CHECK: vsubl.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsubws8: +;CHECK: vsubw.s8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsubws16: +;CHECK: vsubw.s16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsubws32: +;CHECK: vsubw.s32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind { +;CHECK: vsubwu8: +;CHECK: vsubw.u8 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + +define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind { +;CHECK: vsubwu16: +;CHECK: vsubw.u16 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + +define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind { +;CHECK: vsubwu32: +;CHECK: vsubw.u32 + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + +declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll new file mode 100644 index 0000000000000..926498739e8ab --- /dev/null +++ b/test/CodeGen/ARM/vtbl.ll @@ -0,0 +1,109 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } +%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } +%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } + +define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vtbl1: +;CHECK: vtbl.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i8> %tmp3 +} + +define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind { +;CHECK: vtbl2: +;CHECK: vtbl.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load %struct.__neon_int8x8x2_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 + %tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4) + ret <8 x i8> %tmp5 +} + +define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind { +;CHECK: vtbl3: +;CHECK: vtbl.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load %struct.__neon_int8x8x3_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 + %tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5) + ret <8 x i8> %tmp6 +} + +define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind { +;CHECK: vtbl4: +;CHECK: vtbl.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load %struct.__neon_int8x8x4_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 + %tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6) + ret <8 x i8> %tmp7 +} + +define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: vtbx1: +;CHECK: vtbx.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3) + ret <8 x i8> %tmp4 +} + +define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind { +;CHECK: vtbx2: +;CHECK: vtbx.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load %struct.__neon_int8x8x2_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 + %tmp5 = load <8 x i8>* %C + %tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5) + ret <8 x i8> %tmp6 +} + +define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind { +;CHECK: vtbx3: +;CHECK: vtbx.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load %struct.__neon_int8x8x3_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2 + %tmp6 = load <8 x i8>* %C + %tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6) + ret <8 x i8> %tmp7 +} + +define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind { +;CHECK: vtbx4: +;CHECK: vtbx.8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load %struct.__neon_int8x8x4_t* %B + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1 + %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2 + %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3 + %tmp7 = load <8 x i8>* %C + %tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7) + ret <8 x i8> %tmp8 +} + +declare <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone + +declare <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone +declare <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll new file mode 100644 index 0000000000000..5122b0981e961 --- /dev/null +++ b/test/CodeGen/ARM/vtrn.ll @@ -0,0 +1,97 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vtrni8: +;CHECK: vtrn.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vtrni16: +;CHECK: vtrn.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vtrni32: +;CHECK: vtrn.32 +;CHECK-NEXT: vadd.i32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> + %tmp5 = add <2 x i32> %tmp3, %tmp4 + ret <2 x i32> %tmp5 +} + +define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind { +;CHECK: vtrnf: +;CHECK: vtrn.32 +;CHECK-NEXT: vadd.f32 + %tmp1 = load <2 x float>* %A + %tmp2 = load <2 x float>* %B + %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> + %tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> + %tmp5 = add <2 x float> %tmp3, %tmp4 + ret <2 x float> %tmp5 +} + +define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vtrnQi8: +;CHECK: vtrn.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vtrnQi16: +;CHECK: vtrn.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vtrnQi32: +;CHECK: vtrn.32 +;CHECK-NEXT: vadd.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vtrnQf: +;CHECK: vtrn.32 +;CHECK-NEXT: vadd.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> + %tmp5 = add <4 x float> %tmp3, %tmp4 + ret <4 x float> %tmp5 +} diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll new file mode 100644 index 0000000000000..e531718d94aa1 --- /dev/null +++ b/test/CodeGen/ARM/vuzp.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vuzpi8: +;CHECK: vuzp.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vuzpi16: +;CHECK: vuzp.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors. + +define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vuzpQi8: +;CHECK: vuzp.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vuzpQi16: +;CHECK: vuzp.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vuzpQi32: +;CHECK: vuzp.32 +;CHECK-NEXT: vadd.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vuzpQf: +;CHECK: vuzp.32 +;CHECK-NEXT: vadd.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> + %tmp5 = add <4 x float> %tmp3, %tmp4 + ret <4 x float> %tmp5 +} diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll new file mode 100644 index 0000000000000..32f7e0d02c446 --- /dev/null +++ b/test/CodeGen/ARM/vzip.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vzipi8: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 +} + +define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vzipi16: +;CHECK: vzip.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 +} + +; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors. + +define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { +;CHECK: vzipQi8: +;CHECK: vzip.8 +;CHECK-NEXT: vadd.i8 + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 +} + +define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK: vzipQi16: +;CHECK: vzip.16 +;CHECK-NEXT: vadd.i16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK: vzipQi32: +;CHECK: vzip.32 +;CHECK-NEXT: vadd.i32 + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 +} + +define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { +;CHECK: vzipQf: +;CHECK: vzip.32 +;CHECK-NEXT: vadd.f32 + %tmp1 = load <4 x float>* %A + %tmp2 = load <4 x float>* %B + %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> + %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> + %tmp5 = add <4 x float> %tmp3, %tmp4 + ret <4 x float> %tmp5 +} diff --git a/test/CodeGen/ARM/weak.ll b/test/CodeGen/ARM/weak.ll index dadd1b9767985..5ac4b8c061d8a 100644 --- a/test/CodeGen/ARM/weak.ll +++ b/test/CodeGen/ARM/weak.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=arm | grep .weak.*f -; RUN: llvm-as < %s | llc -march=arm | grep .weak.*h +; RUN: llc < %s -march=arm | grep .weak.*f +; RUN: llc < %s -march=arm | grep .weak.*h define weak i32 @f() { entry: diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll index a57a76707ce69..cf327bbf5c876 100644 --- a/test/CodeGen/ARM/weak2.ll +++ b/test/CodeGen/ARM/weak2.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=arm | grep .weak +; RUN: llc < %s -march=arm | grep .weak define i32 @f(i32 %a) { entry: diff --git a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll index c96b14ac97e59..87d992836bc3c 100644 --- a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll +++ b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll @@ -1,5 +1,5 @@ ; There should be exactly two calls here (memset and malloc), no more. -; RUN: llvm-as < %s | llc -march=alpha | grep jsr | count 2 +; RUN: llc < %s -march=alpha | grep jsr | count 2 %typedef.bc_struct = type opaque declare void @llvm.memset.i64(i8*, i8, i64, i32) diff --git a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll index b45c2a44388e1..4b3d022c1d8d4 100644 --- a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll +++ b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll @@ -1,5 +1,5 @@ ; This shouldn't crash -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha @.str_4 = external global [44 x i8] ; <[44 x i8]*> [#uses=0] diff --git a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll index f89997e0bf6b5..65d2a8d02ac8d 100644 --- a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll +++ b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll @@ -1,5 +1,5 @@ ; The global symbol should be legalized -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha target datalayout = "e-p:64:64" %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* } diff --git a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll index 05ebe1eb888b2..45587f08fd6cf 100644 --- a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll +++ b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll @@ -1,5 +1,5 @@ ; This shouldn't crash -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha target datalayout = "e-p:64:64" target triple = "alphaev6-unknown-linux-gnu" diff --git a/test/CodeGen/Alpha/2006-04-04-zextload.ll b/test/CodeGen/Alpha/2006-04-04-zextload.ll index f3ff5b1750fe3..2b28903c50148 100644 --- a/test/CodeGen/Alpha/2006-04-04-zextload.ll +++ b/test/CodeGen/Alpha/2006-04-04-zextload.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha target datalayout = "e-p:64:64" target triple = "alphaev67-unknown-linux-gnu" diff --git a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll index 6b55047579133..5d31bc3798dc1 100644 --- a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll +++ b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha target datalayout = "e-p:64:64" target triple = "alphaev67-unknown-linux-gnu" diff --git a/test/CodeGen/Alpha/2006-11-01-vastart.ll b/test/CodeGen/Alpha/2006-11-01-vastart.ll index 3f42eda4beb5b..14e0bccc8482c 100644 --- a/test/CodeGen/Alpha/2006-11-01-vastart.ll +++ b/test/CodeGen/Alpha/2006-11-01-vastart.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha target datalayout = "e-p:64:64" target triple = "alphaev67-unknown-linux-gnu" diff --git a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll index 3eac13d2b7ac1..b537e250ad869 100644 --- a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll +++ b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha ;FIXME: this should produce no mul inst. But not crashing will have to do for now diff --git a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll index 9d814da982d46..1a4b40e2da2c8 100644 --- a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll +++ b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128" target triple = "alphaev6-unknown-linux-gnu" diff --git a/test/CodeGen/Alpha/2008-11-12-Add128.ll b/test/CodeGen/Alpha/2008-11-12-Add128.ll index e6e57464cb216..8b9b603fe6feb 100644 --- a/test/CodeGen/Alpha/2008-11-12-Add128.ll +++ b/test/CodeGen/Alpha/2008-11-12-Add128.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc +; RUN: llc < %s ; PR3044 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128" target triple = "alphaev6-unknown-linux-gnu" diff --git a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll new file mode 100644 index 0000000000000..cfbf7fcdfd900 --- /dev/null +++ b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll @@ -0,0 +1,6 @@ +; RUN: llc < %s -march=alpha + +define i1 @a(float %x) { + %r = fcmp ult float %x, 1.0 + ret i1 %r +} diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll index 260584b79048d..24a74188f8c09 100644 --- a/test/CodeGen/Alpha/add.ll +++ b/test/CodeGen/Alpha/add.ll @@ -1,6 +1,6 @@ ;test all the shifted and signextending adds and subs with and without consts ; -; RUN: llvm-as < %s | llc -march=alpha -o %t.s -f +; RUN: llc < %s -march=alpha -o %t.s ; RUN: grep { addl} %t.s | count 2 ; RUN: grep { addq} %t.s | count 2 ; RUN: grep { subl} %t.s | count 2 diff --git a/test/CodeGen/Alpha/add128.ll b/test/CodeGen/Alpha/add128.ll index 61d020890e89b..fa3b949fc7b8c 100644 --- a/test/CodeGen/Alpha/add128.ll +++ b/test/CodeGen/Alpha/add128.ll @@ -1,6 +1,6 @@ ;test for ADDC and ADDE expansion ; -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha define i128 @add128(i128 %x, i128 %y) { entry: diff --git a/test/CodeGen/Alpha/bic.ll b/test/CodeGen/Alpha/bic.ll index 6e635119e569e..9f0035097b0ef 100644 --- a/test/CodeGen/Alpha/bic.ll +++ b/test/CodeGen/Alpha/bic.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the bic instruction -; RUN: llvm-as < %s | llc -march=alpha | grep {bic} +; RUN: llc < %s -march=alpha | grep {bic} define i64 @bar(i64 %x, i64 %y) { entry: diff --git a/test/CodeGen/Alpha/bsr.ll b/test/CodeGen/Alpha/bsr.ll index d4618577a0444..14f6b46c54907 100644 --- a/test/CodeGen/Alpha/bsr.ll +++ b/test/CodeGen/Alpha/bsr.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens the bsr instruction -; RUN: llvm-as < %s | llc -march=alpha | grep bsr +; RUN: llc < %s -march=alpha | grep bsr define internal i64 @abc(i32 %x) { %tmp.2 = add i32 %x, -1 ; [#uses=1] diff --git a/test/CodeGen/Alpha/call_adj.ll b/test/CodeGen/Alpha/call_adj.ll index ee8cda840e0ac..24e97a92b86b1 100644 --- a/test/CodeGen/Alpha/call_adj.ll +++ b/test/CodeGen/Alpha/call_adj.ll @@ -1,5 +1,5 @@ ;All this should do is not crash -;RUN: llvm-as < %s | llc -march=alpha +;RUN: llc < %s -march=alpha target datalayout = "e-p:64:64" target triple = "alphaev67-unknown-linux-gnu" diff --git a/test/CodeGen/Alpha/cmov.ll b/test/CodeGen/Alpha/cmov.ll index 08e1dad2c0e77..9b655f03efdcf 100644 --- a/test/CodeGen/Alpha/cmov.ll +++ b/test/CodeGen/Alpha/cmov.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=alpha | not grep cmovlt -; RUN: llvm-as < %s | llc -march=alpha | grep cmoveq +; RUN: llc < %s -march=alpha | not grep cmovlt +; RUN: llc < %s -march=alpha | grep cmoveq define i64 @cmov_lt(i64 %a, i64 %c) { entry: diff --git a/test/CodeGen/Alpha/cmpbge.ll b/test/CodeGen/Alpha/cmpbge.ll index 9b83215181c96..e88d2eec75e10 100644 --- a/test/CodeGen/Alpha/cmpbge.ll +++ b/test/CodeGen/Alpha/cmpbge.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha | grep cmpbge | count 2 +; RUN: llc < %s -march=alpha | grep cmpbge | count 2 define i1 @test1(i64 %A, i64 %B) { %C = and i64 %A, 255 ; [#uses=1] diff --git a/test/CodeGen/Alpha/ctlz.ll b/test/CodeGen/Alpha/ctlz.ll index 83d97b5833c46..aa1588aa39e8f 100644 --- a/test/CodeGen/Alpha/ctlz.ll +++ b/test/CodeGen/Alpha/ctlz.ll @@ -1,8 +1,8 @@ ; Make sure this testcase codegens to the ctlz instruction -; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev67 | grep -i ctlz -; RUN: llvm-as < %s | llc -march=alpha -mattr=+CIX | grep -i ctlz -; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev6 | not grep -i ctlz -; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | not grep -i ctlz +; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz +; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz +; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz +; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz declare i8 @llvm.ctlz.i8(i8) diff --git a/test/CodeGen/Alpha/ctlz_e.ll b/test/CodeGen/Alpha/ctlz_e.ll index 56027dd3ea7b9..230e096b08d2b 100644 --- a/test/CodeGen/Alpha/ctlz_e.ll +++ b/test/CodeGen/Alpha/ctlz_e.ll @@ -1,5 +1,5 @@ ; Make sure this testcase does not use ctpop -; RUN: llvm-as < %s | llc -march=alpha | not grep -i ctpop +; RUN: llc < %s -march=alpha | not grep -i ctpop declare i64 @llvm.ctlz.i64(i64) diff --git a/test/CodeGen/Alpha/ctpop.ll b/test/CodeGen/Alpha/ctpop.ll index a528d728be066..f887882cec2fb 100644 --- a/test/CodeGen/Alpha/ctpop.ll +++ b/test/CodeGen/Alpha/ctpop.ll @@ -1,10 +1,10 @@ ; Make sure this testcase codegens to the ctpop instruction -; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev67 | grep -i ctpop -; RUN: llvm-as < %s | llc -march=alpha -mattr=+CIX | \ +; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop +; RUN: llc < %s -march=alpha -mattr=+CIX | \ ; RUN: grep -i ctpop -; RUN: llvm-as < %s | llc -march=alpha -mcpu=ev6 | \ +; RUN: llc < %s -march=alpha -mcpu=ev6 | \ ; RUN: not grep -i ctpop -; RUN: llvm-as < %s | llc -march=alpha -mattr=-CIX | \ +; RUN: llc < %s -march=alpha -mattr=-CIX | \ ; RUN: not grep -i ctpop declare i64 @llvm.ctpop.i64(i64) diff --git a/test/CodeGen/Alpha/eqv.ll b/test/CodeGen/Alpha/eqv.ll index 2539d72474487..b3413d6b5dce5 100644 --- a/test/CodeGen/Alpha/eqv.ll +++ b/test/CodeGen/Alpha/eqv.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the eqv instruction -; RUN: llvm-as < %s | llc -march=alpha | grep eqv +; RUN: llc < %s -march=alpha | grep eqv define i64 @bar(i64 %x, i64 %y) { entry: diff --git a/test/CodeGen/Alpha/i32_sub_1.ll b/test/CodeGen/Alpha/i32_sub_1.ll index 7af813454072d..ffeafbd75938d 100644 --- a/test/CodeGen/Alpha/i32_sub_1.ll +++ b/test/CodeGen/Alpha/i32_sub_1.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the ctpop instruction -; RUN: llvm-as < %s | llc -march=alpha | grep -i {subl \$16,1,\$0} +; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0} define i32 @foo(i32 signext %x) signext { diff --git a/test/CodeGen/Alpha/illegal-element-type.ll b/test/CodeGen/Alpha/illegal-element-type.ll index c95d57153db2c..4cf80dee57b78 100644 --- a/test/CodeGen/Alpha/illegal-element-type.ll +++ b/test/CodeGen/Alpha/illegal-element-type.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -mtriple=alphaev6-unknown-linux-gnu +; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu define void @foo() { entry: diff --git a/test/CodeGen/Alpha/jmp_table.ll b/test/CodeGen/Alpha/jmp_table.ll index d7b61163e7b80..917c9327dc16f 100644 --- a/test/CodeGen/Alpha/jmp_table.ll +++ b/test/CodeGen/Alpha/jmp_table.ll @@ -1,9 +1,9 @@ ; try to check that we have the most important instructions, which shouldn't ; appear otherwise -; RUN: llvm-as < %s | llc -march=alpha | grep jmp -; RUN: llvm-as < %s | llc -march=alpha | grep gprel32 -; RUN: llvm-as < %s | llc -march=alpha | grep ldl -; RUN: llvm-as < %s | llc -march=alpha | grep rodata +; RUN: llc < %s -march=alpha | grep jmp +; RUN: llc < %s -march=alpha | grep gprel32 +; RUN: llc < %s -march=alpha | grep ldl +; RUN: llc < %s -march=alpha | grep rodata ; END. target datalayout = "e-p:64:64" diff --git a/test/CodeGen/Alpha/mb.ll b/test/CodeGen/Alpha/mb.ll index 50c245ff3d9e0..93e8b1b04465a 100644 --- a/test/CodeGen/Alpha/mb.ll +++ b/test/CodeGen/Alpha/mb.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha | grep mb +; RUN: llc < %s -march=alpha | grep mb declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) diff --git a/test/CodeGen/Alpha/mul128.ll b/test/CodeGen/Alpha/mul128.ll index b069fea4a5ca9..daf8409409dd5 100644 --- a/test/CodeGen/Alpha/mul128.ll +++ b/test/CodeGen/Alpha/mul128.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha define i128 @__mulvdi3(i128 %a, i128 %b) nounwind { entry: diff --git a/test/CodeGen/Alpha/mul5.ll b/test/CodeGen/Alpha/mul5.ll index 5af73a1cc774c..4075dd6289ebc 100644 --- a/test/CodeGen/Alpha/mul5.ll +++ b/test/CodeGen/Alpha/mul5.ll @@ -1,5 +1,5 @@ ; Make sure this testcase does not use mulq -; RUN: llvm-as < %s | llc -march=alpha | not grep -i mul +; RUN: llc < %s -march=alpha | not grep -i mul define i64 @foo1(i64 %x) { entry: diff --git a/test/CodeGen/Alpha/neg1.ll b/test/CodeGen/Alpha/neg1.ll index ddaed4a0c6e24..0db767f68e517 100644 --- a/test/CodeGen/Alpha/neg1.ll +++ b/test/CodeGen/Alpha/neg1.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the lda -1 instruction -; RUN: llvm-as < %s | llc -march=alpha | grep {\\-1} +; RUN: llc < %s -march=alpha | grep {\\-1} define i64 @bar() { entry: diff --git a/test/CodeGen/Alpha/not.ll b/test/CodeGen/Alpha/not.ll index cea9f6bc95f58..4f0a5c2946ef3 100644 --- a/test/CodeGen/Alpha/not.ll +++ b/test/CodeGen/Alpha/not.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the ornot instruction -; RUN: llvm-as < %s | llc -march=alpha | grep eqv +; RUN: llc < %s -march=alpha | grep eqv define i64 @bar(i64 %x) { entry: diff --git a/test/CodeGen/Alpha/ornot.ll b/test/CodeGen/Alpha/ornot.ll index b8d350dc100e1..f930e345ce426 100644 --- a/test/CodeGen/Alpha/ornot.ll +++ b/test/CodeGen/Alpha/ornot.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the ornot instruction -; RUN: llvm-as < %s | llc -march=alpha | grep ornot +; RUN: llc < %s -march=alpha | grep ornot define i64 @bar(i64 %x, i64 %y) { entry: diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll index 2d9ed1e413db5..96ab4eb400ea1 100644 --- a/test/CodeGen/Alpha/private.ll +++ b/test/CodeGen/Alpha/private.ll @@ -1,6 +1,6 @@ ; Test to make sure that the 'private' is used correctly. ; -; RUN: llvm-as < %s | llc -march=alpha > %t +; RUN: llc < %s -march=alpha > %t ; RUN: grep \\\$foo: %t ; RUN: grep bsr.*\\\$\\\$foo %t ; RUN: grep \\\$baz: %t diff --git a/test/CodeGen/Alpha/rpcc.ll b/test/CodeGen/Alpha/rpcc.ll index 193a47f7ce3f1..d6665b5d8d6f8 100644 --- a/test/CodeGen/Alpha/rpcc.ll +++ b/test/CodeGen/Alpha/rpcc.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha | grep rpcc +; RUN: llc < %s -march=alpha | grep rpcc declare i64 @llvm.readcyclecounter() diff --git a/test/CodeGen/Alpha/srl_and.ll b/test/CodeGen/Alpha/srl_and.ll index 2344833dc5b36..3042ef3d0237e 100644 --- a/test/CodeGen/Alpha/srl_and.ll +++ b/test/CodeGen/Alpha/srl_and.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the zapnot instruction -; RUN: llvm-as < %s | llc -march=alpha | grep zapnot +; RUN: llc < %s -march=alpha | grep zapnot define i64 @foo(i64 %y) { entry: diff --git a/test/CodeGen/Alpha/sub128.ll b/test/CodeGen/Alpha/sub128.ll index cb18559e532c0..d26404bfe024b 100644 --- a/test/CodeGen/Alpha/sub128.ll +++ b/test/CodeGen/Alpha/sub128.ll @@ -1,6 +1,6 @@ ;test for SUBC and SUBE expansion ; -; RUN: llvm-as < %s | llc -march=alpha +; RUN: llc < %s -march=alpha define i128 @sub128(i128 %x, i128 %y) { entry: diff --git a/test/CodeGen/Alpha/weak.ll b/test/CodeGen/Alpha/weak.ll index e00e6d7bfe260..ff04de9ef4679 100644 --- a/test/CodeGen/Alpha/weak.ll +++ b/test/CodeGen/Alpha/weak.ll @@ -1,5 +1,5 @@ -; RUN: llvm-as < %s | llc -march=alpha | grep .weak.*f -; RUN: llvm-as < %s | llc -march=alpha | grep .weak.*h +; RUN: llc < %s -march=alpha | grep .weak.*f +; RUN: llc < %s -march=alpha | grep .weak.*h define weak i32 @f() { entry: diff --git a/test/CodeGen/Alpha/wmb.ll b/test/CodeGen/Alpha/wmb.ll index f745cd52ba3d8..a3e2ccf57256c 100644 --- a/test/CodeGen/Alpha/wmb.ll +++ b/test/CodeGen/Alpha/wmb.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha | grep wmb +; RUN: llc < %s -march=alpha | grep wmb declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1) diff --git a/test/CodeGen/Alpha/zapnot.ll b/test/CodeGen/Alpha/zapnot.ll index 7fec19bdf3f57..d00984acf7f32 100644 --- a/test/CodeGen/Alpha/zapnot.ll +++ b/test/CodeGen/Alpha/zapnot.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the bic instruction -; RUN: llvm-as < %s | llc -march=alpha | grep zapnot +; RUN: llc < %s -march=alpha | grep zapnot define i16 @foo(i64 %y) zeroext { diff --git a/test/CodeGen/Alpha/zapnot2.ll b/test/CodeGen/Alpha/zapnot2.ll index 6a33ca2ab21f2..cd3caae41d5aa 100644 --- a/test/CodeGen/Alpha/zapnot2.ll +++ b/test/CodeGen/Alpha/zapnot2.ll @@ -1,5 +1,5 @@ ; Make sure this testcase codegens to the zapnot instruction -; RUN: llvm-as < %s | llc -march=alpha | grep zapnot +; RUN: llc < %s -march=alpha | grep zapnot define i64 @bar(i64 %x) { entry: diff --git a/test/CodeGen/Alpha/zapnot3.ll b/test/CodeGen/Alpha/zapnot3.ll index 26aab37d7bb9f..f02961f1eaec8 100644 --- a/test/CodeGen/Alpha/zapnot3.ll +++ b/test/CodeGen/Alpha/zapnot3.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha | grep zapnot +; RUN: llc < %s -march=alpha | grep zapnot ;demanded bits mess up this mask in a hard to fix way ;define i64 @foo(i64 %y) { diff --git a/test/CodeGen/Alpha/zapnot4.ll b/test/CodeGen/Alpha/zapnot4.ll index 1be3ca2e3c72c..89beeef2d8100 100644 --- a/test/CodeGen/Alpha/zapnot4.ll +++ b/test/CodeGen/Alpha/zapnot4.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=alpha | grep zapnot +; RUN: llc < %s -march=alpha | grep zapnot define i64 @foo(i64 %y) { %tmp = shl i64 %y, 3 ; [#uses=1] diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll new file mode 100644 index 0000000000000..3ee5e8df9972f --- /dev/null +++ b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs + +; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a +; super-register is illegally extended. + +define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) { + %y1 = add i16 %x1, 1 + %y2 = add i16 %x2, 2 + %y3 = add i16 %x3, 3 + %y4 = add i16 %x4, 4 + %z12 = add i16 %y1, %y2 + %z34 = add i16 %y3, %y4 + %p = add i16 %z12, %z34 + ret i16 %p +} diff --git a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll new file mode 100644 index 0000000000000..e5d1637a50cb5 --- /dev/null +++ b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs + +declare i64 @llvm.cttz.i64(i64) nounwind readnone + +declare i16 @llvm.cttz.i16(i16) nounwind readnone + +declare i8 @llvm.cttz.i8(i8) nounwind readnone + +define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) { + %a = call i8 @llvm.cttz.i8(i8 %A) ; [#uses=1] + %b = call i16 @llvm.cttz.i16(i16 %B) ; [#uses=1] + %d = call i64 @llvm.cttz.i64(i64 %D) ; [#uses=1] + store i8 %a, i8* %AP + store i16 %b, i16* %BP + store i64 %d, i64* %DP + ret void +} diff --git a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll new file mode 100644 index 0000000000000..0b731dccd19f9 --- /dev/null +++ b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs + +; When joining live intervals of sub-registers, an MBB live-in list is not +; updated properly. The register scavenger asserts on an undefined register. + +define i32 @foo(i8 %bar) { +entry: + switch i8 %bar, label %bb1203 [ + i8 117, label %bb1204 + i8 85, label %bb1204 + i8 106, label %bb1204 + ] + +bb1203: ; preds = %entry + ret i32 1 + +bb1204: ; preds = %entry, %entry, %entry + ret i32 2 +} diff --git a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll new file mode 100644 index 0000000000000..dcc3ea0dec883 --- /dev/null +++ b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs + +; LocalRewriter can forget to transfer a flag when setting up call +; argument registers. This then causes register scavenger asserts. + +declare i32 @printf(i8*, i32, float) + +define i32 @testissue(i32 %i, float %x, float %y) { + br label %bb1 + +bb1: ; preds = %bb1, %0 + %x2 = fmul float %x, 5.000000e-01 ; [#uses=1] + %y2 = fmul float %y, 0x3FECCCCCC0000000 ; [#uses=1] + %z2 = fadd float %x2, %y2 ; [#uses=1] + %z3 = fadd float undef, %z2 ; [#uses=1] + %i1 = shl i32 %i, 3 ; [#uses=1] + %j1 = add i32 %i, 7 ; [#uses=1] + %m1 = add i32 %i1, %j1 ; [#uses=2] + %b = icmp sle i32 %m1, 6 ; [#uses=1] + br i1 %b, label %bb1, label %bb2 + +bb2: ; preds = %bb1 + %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); [#uses=0] + ret i32 0 +} diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll new file mode 100644 index 0000000000000..f21da52315faf --- /dev/null +++ b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs +; XFAIL: * + +; An undef argument causes a setugt node to escape instruction selection. + +define void @bugt() { +cond_next305: + %tmp306307 = trunc i32 undef to i8 ; [#uses=1] + %tmp308 = icmp ugt i8 %tmp306307, 6 ; [#uses=1] + br i1 %tmp308, label %bb311, label %bb314 + +bb311: ; preds = %cond_next305 + unreachable + +bb314: ; preds = %cond_next305 + ret void +} diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll new file mode 100644 index 0000000000000..e982e437d6871 --- /dev/null +++ b/test/CodeGen/Blackfin/add-overflow.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs > %t + + type { i24, i1 } ; type %0 + +define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind { +entry: + %t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2) ; <%0> [#uses=1] + %obit = extractvalue %0 %t, 1 ; [#uses=1] + br i1 %obit, label %carry, label %normal + +normal: ; preds = %entry + ret i1 true + +carry: ; preds = %entry + ret i1 false +} + +declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind diff --git a/test/CodeGen/Blackfin/add.ll b/test/CodeGen/Blackfin/add.ll new file mode 100644 index 0000000000000..3311c03199ee9 --- /dev/null +++ b/test/CodeGen/Blackfin/add.ll @@ -0,0 +1,5 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs +define i32 @add(i32 %A, i32 %B) { + %R = add i32 %A, %B ; [#uses=1] + ret i32 %R +} diff --git a/test/CodeGen/Blackfin/addsub-i128.ll b/test/CodeGen/Blackfin/addsub-i128.ll new file mode 100644 index 0000000000000..dd5610120b4d9 --- /dev/null +++ b/test/CodeGen/Blackfin/addsub-i128.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs + +; These functions have just the right size to annoy the register scavenger: They +; use all the scratch registers, but not all the callee-saved registers. + +define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) { +entry: + %tmp1 = zext i64 %AL to i128 ; [#uses=1] + %tmp23 = zext i64 %AH to i128 ; [#uses=1] + %tmp4 = shl i128 %tmp23, 64 ; [#uses=1] + %tmp5 = or i128 %tmp4, %tmp1 ; [#uses=1] + %tmp67 = zext i64 %BL to i128 ; [#uses=1] + %tmp89 = zext i64 %BH to i128 ; [#uses=1] + %tmp11 = shl i128 %tmp89, 64 ; [#uses=1] + %tmp12 = or i128 %tmp11, %tmp67 ; [#uses=1] + %tmp15 = add i128 %tmp12, %tmp5 ; [#uses=2] + %tmp1617 = trunc i128 %tmp15 to i64 ; [#uses=1] + store i64 %tmp1617, i64* %RL + %tmp21 = lshr i128 %tmp15, 64 ; [#uses=1] + %tmp2122 = trunc i128 %tmp21 to i64 ; [#uses=1] + store i64 %tmp2122, i64* %RH + ret void +} + +define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) { +entry: + %tmp1 = zext i64 %AL to i128 ; [#uses=1] + %tmp23 = zext i64 %AH to i128 ; [#uses=1] + %tmp4 = shl i128 %tmp23, 64 ; [#uses=1] + %tmp5 = or i128 %tmp4, %tmp1 ; [#uses=1] + %tmp67 = zext i64 %BL to i128 ; [#uses=1] + %tmp89 = zext i64 %BH to i128 ; [#uses=1] + %tmp11 = shl i128 %tmp89, 64 ; [#uses=1] + %tmp12 = or i128 %tmp11, %tmp67 ; [#uses=1] + %tmp15 = sub i128 %tmp5, %tmp12 ; [#uses=2] + %tmp1617 = trunc i128 %tmp15 to i64 ; [#uses=1] + store i64 %tmp1617, i64* %RL + %tmp21 = lshr i128 %tmp15, 64 ; [#uses=1] + %tmp2122 = trunc i128 %tmp21 to i64 ; [#uses=1] + store i64 %tmp2122, i64* %RH + ret void +} diff --git a/test/CodeGen/Blackfin/basic-i1.ll b/test/CodeGen/Blackfin/basic-i1.ll new file mode 100644 index 0000000000000..c63adaba06cfa --- /dev/null +++ b/test/CodeGen/Blackfin/basic-i1.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=bfin > %t + +define i1 @add(i1 %A, i1 %B) { + %R = add i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @sub(i1 %A, i1 %B) { + %R = sub i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @mul(i1 %A, i1 %B) { + %R = mul i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @sdiv(i1 %A, i1 %B) { + %R = sdiv i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @udiv(i1 %A, i1 %B) { + %R = udiv i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @srem(i1 %A, i1 %B) { + %R = srem i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @urem(i1 %A, i1 %B) { + %R = urem i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @and(i1 %A, i1 %B) { + %R = and i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @or(i1 %A, i1 %B) { + %R = or i1 %A, %B ; [#uses=1] + ret i1 %R +} + +define i1 @xor(i1 %A, i1 %B) { + %R = xor i1 %A, %B ; [#uses=1] + ret i1 %R +} diff --git a/test/CodeGen/Blackfin/basic-i16.ll b/test/CodeGen/Blackfin/basic-i16.ll new file mode 100644 index 0000000000000..541e9a8dc948c --- /dev/null +++ b/test/CodeGen/Blackfin/basic-i16.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=bfin + +define i16 @add(i16 %A, i16 %B) { + %R = add i16 %A, %B ; [#uses=1] + ret i16 %R +} + +define i16 @sub(i16 %A, i16 %B) { + %R = sub i16 %A, %B ; [#uses=1] + ret i16 %R +} + +define i16 @mul(i16 %A, i16 %B) { + %R = mul i16 %A, %B ; [#uses=1] + ret i16 %R +} + +define i16 @sdiv(i16 %A, i16 %B) { + %R = sdiv i16 %A, %B ; [#uses=1] + ret i16 %R +} + +define i16 @udiv(i16 %A, i16 %B) { + %R = udiv i16 %A, %B ; [#uses=1] + ret i16 %R +} + +define i16 @srem(i16 %A, i16 %B) { + %R = srem i16 %A, %B ; [#uses=1] + ret i16 %R +} + +define i16 @urem(i16 %A, i16 %B) { + %R = urem i16 %A, %B ; [#uses=1] + ret i16 %R +} diff --git a/test/CodeGen/Blackfin/basic-i32.ll b/test/CodeGen/Blackfin/basic-i32.ll new file mode 100644 index 0000000000000..4b5dbfcb957ec --- /dev/null +++ b/test/CodeGen/Blackfin/basic-i32.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs + +define i32 @add(i32 %A, i32 %B) { + %R = add i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @sub(i32 %A, i32 %B) { + %R = sub i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @mul(i32 %A, i32 %B) { + %R = mul i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @sdiv(i32 %A, i32 %B) { + %R = sdiv i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @udiv(i32 %A, i32 %B) { + %R = udiv i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @srem(i32 %A, i32 %B) { + %R = srem i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @urem(i32 %A, i32 %B) { + %R = urem i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @and(i32 %A, i32 %B) { + %R = and i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @or(i32 %A, i32 %B) { + %R = or i32 %A, %B ; [#uses=1] + ret i32 %R +} + +define i32 @xor(i32 %A, i32 %B) { + %R = xor i32 %A, %B ; [#uses=1] + ret i32 %R +} diff --git a/test/CodeGen/Blackfin/basic-i64.ll b/test/CodeGen/Blackfin/basic-i64.ll new file mode 100644 index 0000000000000..d4dd8e2703bf8 --- /dev/null +++ b/test/CodeGen/Blackfin/basic-i64.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs + +define i64 @add(i64 %A, i64 %B) { + %R = add i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @sub(i64 %A, i64 %B) { + %R = sub i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @mul(i64 %A, i64 %B) { + %R = mul i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @sdiv(i64 %A, i64 %B) { + %R = sdiv i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @udiv(i64 %A, i64 %B) { + %R = udiv i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @srem(i64 %A, i64 %B) { + %R = srem i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @urem(i64 %A, i64 %B) { + %R = urem i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @and(i64 %A, i64 %B) { + %R = and i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @or(i64 %A, i64 %B) { + %R = or i64 %A, %B ; [#uses=1] + ret i64 %R +} + +define i64 @xor(i64 %A, i64 %B) { + %R = xor i64 %A, %B ; [#uses=1] + ret i64 %R +} diff --git a/test/CodeGen/Blackfin/basic-i8.ll b/test/CodeGen/Blackfin/basic-i8.ll new file mode 100644 index 0000000000000..2c7ce9d1015ae --- /dev/null +++ b/test/CodeGen/Blackfin/basic-i8.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=bfin + +define i8 @add(i8 %A, i8 %B) { + %R = add i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @sub(i8 %A, i8 %B) { + %R = sub i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @mul(i8 %A, i8 %B) { + %R = mul i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @sdiv(i8 %A, i8 %B) { + %R = sdiv i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @udiv(i8 %A, i8 %B) { + %R = udiv i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @srem(i8 %A, i8 %B) { + %R = srem i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @urem(i8 %A, i8 %B) { + %R = urem i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @and(i8 %A, i8 %B) { + %R = and i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @or(i8 %A, i8 %B) { + %R = or i8 %A, %B ; [#uses=1] + ret i8 %R +} + +define i8 @xor(i8 %A, i8 %B) { + %R = xor i8 %A, %B ; [#uses=1] + ret i8 %R +} diff --git a/test/CodeGen/Blackfin/basictest.ll b/test/CodeGen/Blackfin/basictest.ll new file mode 100644 index 0000000000000..85040df0fde5d --- /dev/null +++ b/test/CodeGen/Blackfin/basictest.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=bfin -verify-machineinstrs + +define void @void(i32, i32) { + add i32 0, 0 ; :3 [#uses=2] + sub i32 0, 4 ; :4 [#uses=2] + br label %5 + +;